aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_super.c22
-rw-r--r--fs/adfs/super.c9
-rw-r--r--fs/affs/super.c9
-rw-r--r--fs/afs/super.c19
-rw-r--r--fs/anon_inodes.c10
-rw-r--r--fs/autofs4/init.c8
-rw-r--r--fs/befs/linuxvfs.c11
-rw-r--r--fs/bfs/inode.c8
-rw-r--r--fs/binfmt_misc.c8
-rw-r--r--fs/bio.c23
-rw-r--r--fs/block_dev.c8
-rw-r--r--fs/btrfs/compression.c2
-rw-r--r--fs/btrfs/ctree.c57
-rw-r--r--fs/btrfs/ctree.h100
-rw-r--r--fs/btrfs/dir-item.c2
-rw-r--r--fs/btrfs/disk-io.c32
-rw-r--r--fs/btrfs/extent-tree.c694
-rw-r--r--fs/btrfs/extent_io.c168
-rw-r--r--fs/btrfs/extent_io.h4
-rw-r--r--fs/btrfs/extent_map.c4
-rw-r--r--fs/btrfs/free-space-cache.c751
-rw-r--r--fs/btrfs/free-space-cache.h18
-rw-r--r--fs/btrfs/inode.c202
-rw-r--r--fs/btrfs/ioctl.c398
-rw-r--r--fs/btrfs/ioctl.h13
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/relocation.c109
-rw-r--r--fs/btrfs/root-tree.c2
-rw-r--r--fs/btrfs/super.c57
-rw-r--r--fs/btrfs/transaction.c234
-rw-r--r--fs/btrfs/transaction.h8
-rw-r--r--fs/btrfs/tree-defrag.c2
-rw-r--r--fs/btrfs/tree-log.c17
-rw-r--r--fs/btrfs/volumes.c7
-rw-r--r--fs/btrfs/xattr.c2
-rw-r--r--fs/btrfs/zlib.c5
-rw-r--r--fs/ceph/super.c50
-rw-r--r--fs/cifs/Kconfig3
-rw-r--r--fs/cifs/TODO2
-rw-r--r--fs/cifs/cifs_fs_sb.h6
-rw-r--r--fs/cifs/cifsencrypt.c427
-rw-r--r--fs/cifs/cifsfs.c21
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h58
-rw-r--r--fs/cifs/cifspdu.h13
-rw-r--r--fs/cifs/cifsproto.h15
-rw-r--r--fs/cifs/cifssmb.c4
-rw-r--r--fs/cifs/connect.c246
-rw-r--r--fs/cifs/file.c129
-rw-r--r--fs/cifs/inode.c16
-rw-r--r--fs/cifs/ioctl.c16
-rw-r--r--fs/cifs/misc.c27
-rw-r--r--fs/cifs/sess.c166
-rw-r--r--fs/cifs/transport.c6
-rw-r--r--fs/coda/inode.c8
-rw-r--r--fs/compat.c13
-rw-r--r--fs/configfs/mount.c8
-rw-r--r--fs/cramfs/inode.c9
-rw-r--r--fs/debugfs/inode.c8
-rw-r--r--fs/devpts/inode.c32
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h1
-rw-r--r--fs/ecryptfs/inode.c11
-rw-r--r--fs/ecryptfs/keystore.c45
-rw-r--r--fs/ecryptfs/main.c20
-rw-r--r--fs/ecryptfs/super.c2
-rw-r--r--fs/efs/super.c8
-rw-r--r--fs/exofs/super.c10
-rw-r--r--fs/ext2/super.c8
-rw-r--r--fs/ext3/super.c8
-rw-r--r--fs/ext4/ext4.h4
-rw-r--r--fs/ext4/inode.c17
-rw-r--r--fs/ext4/mballoc.c2
-rw-r--r--fs/ext4/page-io.c97
-rw-r--r--fs/ext4/super.c118
-rw-r--r--fs/fat/namei_msdos.c9
-rw-r--r--fs/fat/namei_vfat.c9
-rw-r--r--fs/freevxfs/vxfs_super.c9
-rw-r--r--fs/fs-writeback.c68
-rw-r--r--fs/fuse/control.c10
-rw-r--r--fs/fuse/inode.c17
-rw-r--r--fs/gfs2/ops_fstype.c51
-rw-r--r--fs/hfs/super.c9
-rw-r--r--fs/hfsplus/super.c10
-rw-r--r--fs/hostfs/hostfs_kern.c8
-rw-r--r--fs/hpfs/buffer.c4
-rw-r--r--fs/hpfs/hpfs_fn.h2
-rw-r--r--fs/hpfs/super.c11
-rw-r--r--fs/hppfs/hppfs.c8
-rw-r--r--fs/hugetlbfs/inode.c11
-rw-r--r--fs/internal.h2
-rw-r--r--fs/ioprio.c18
-rw-r--r--fs/isofs/inode.c9
-rw-r--r--fs/jbd2/journal.c8
-rw-r--r--fs/jffs2/build.c2
-rw-r--r--fs/jffs2/compr.c6
-rw-r--r--fs/jffs2/compr.h4
-rw-r--r--fs/jffs2/compr_lzo.c4
-rw-r--r--fs/jffs2/compr_rtime.c6
-rw-r--r--fs/jffs2/compr_rubin.c11
-rw-r--r--fs/jffs2/compr_zlib.c6
-rw-r--r--fs/jffs2/dir.c3
-rw-r--r--fs/jffs2/erase.c2
-rw-r--r--fs/jffs2/fs.c22
-rw-r--r--fs/jffs2/gc.c7
-rw-r--r--fs/jffs2/jffs2_fs_sb.h1
-rw-r--r--fs/jffs2/nodelist.c8
-rw-r--r--fs/jffs2/nodelist.h3
-rw-r--r--fs/jffs2/scan.c12
-rw-r--r--fs/jffs2/super.c9
-rw-r--r--fs/jfs/super.c9
-rw-r--r--fs/libfs.c14
-rw-r--r--fs/locks.c90
-rw-r--r--fs/logfs/dev_bdev.c15
-rw-r--r--fs/logfs/dev_mtd.c18
-rw-r--r--fs/logfs/logfs.h22
-rw-r--r--fs/logfs/super.c77
-rw-r--r--fs/minix/inode.c9
-rw-r--r--fs/namei.c2
-rw-r--r--fs/ncpfs/inode.c8
-rw-r--r--fs/nfs/direct.c2
-rw-r--r--fs/nfs/file.c1
-rw-r--r--fs/nfs/idmap.c2
-rw-r--r--fs/nfs/nfs4proc.c4
-rw-r--r--fs/nfs/pagelist.c8
-rw-r--r--fs/nfs/super.c96
-rw-r--r--fs/nfs/unlink.c4
-rw-r--r--fs/nfsd/nfs4state.c36
-rw-r--r--fs/nfsd/nfsctl.c8
-rw-r--r--fs/nilfs2/super.c16
-rw-r--r--fs/notify/Kconfig2
-rw-r--r--fs/notify/fanotify/fanotify.c27
-rw-r--r--fs/notify/fanotify/fanotify_user.c98
-rw-r--r--fs/notify/fsnotify.c35
-rw-r--r--fs/notify/inode_mark.c9
-rw-r--r--fs/notify/inotify/inotify_user.c2
-rw-r--r--fs/notify/vfsmount_mark.c6
-rw-r--r--fs/ntfs/super.c9
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c8
-rw-r--r--fs/ocfs2/super.c11
-rw-r--r--fs/omfs/inode.c9
-rw-r--r--fs/open.c6
-rw-r--r--fs/openpromfs/inode.c8
-rw-r--r--fs/pipe.c9
-rw-r--r--fs/proc/root.c16
-rw-r--r--fs/qnx4/inode.c9
-rw-r--r--fs/ramfs/inode.c17
-rw-r--r--fs/read_write.c62
-rw-r--r--fs/reiserfs/super.c9
-rw-r--r--fs/romfs/super.c17
-rw-r--r--fs/squashfs/super.c10
-rw-r--r--fs/squashfs/xattr.c9
-rw-r--r--fs/squashfs/xattr.h4
-rw-r--r--fs/squashfs/xattr_id.c1
-rw-r--r--fs/super.c111
-rw-r--r--fs/sysfs/mount.c32
-rw-r--r--fs/sysv/super.c17
-rw-r--r--fs/ubifs/super.c13
-rw-r--r--fs/udf/super.c9
-rw-r--r--fs/ufs/super.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c7
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c1
-rw-r--r--fs/xfs/xfs_filestream.c8
-rw-r--r--fs/xfs/xfs_mount.c1
-rw-r--r--fs/xfs/xfs_quota.h20
168 files changed, 4247 insertions, 1876 deletions
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 48d4215c60a8..c55c614500ad 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -68,7 +68,7 @@ static int v9fs_set_super(struct super_block *s, void *data)
68 * v9fs_fill_super - populate superblock with info 68 * v9fs_fill_super - populate superblock with info
69 * @sb: superblock 69 * @sb: superblock
70 * @v9ses: session information 70 * @v9ses: session information
71 * @flags: flags propagated from v9fs_get_sb() 71 * @flags: flags propagated from v9fs_mount()
72 * 72 *
73 */ 73 */
74 74
@@ -99,18 +99,16 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
99} 99}
100 100
101/** 101/**
102 * v9fs_get_sb - mount a superblock 102 * v9fs_mount - mount a superblock
103 * @fs_type: file system type 103 * @fs_type: file system type
104 * @flags: mount flags 104 * @flags: mount flags
105 * @dev_name: device name that was mounted 105 * @dev_name: device name that was mounted
106 * @data: mount options 106 * @data: mount options
107 * @mnt: mountpoint record to be instantiated
108 * 107 *
109 */ 108 */
110 109
111static int v9fs_get_sb(struct file_system_type *fs_type, int flags, 110static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
112 const char *dev_name, void *data, 111 const char *dev_name, void *data)
113 struct vfsmount *mnt)
114{ 112{
115 struct super_block *sb = NULL; 113 struct super_block *sb = NULL;
116 struct inode *inode = NULL; 114 struct inode *inode = NULL;
@@ -124,7 +122,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
124 122
125 v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL); 123 v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL);
126 if (!v9ses) 124 if (!v9ses)
127 return -ENOMEM; 125 return ERR_PTR(-ENOMEM);
128 126
129 fid = v9fs_session_init(v9ses, dev_name, data); 127 fid = v9fs_session_init(v9ses, dev_name, data);
130 if (IS_ERR(fid)) { 128 if (IS_ERR(fid)) {
@@ -186,15 +184,15 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
186 v9fs_fid_add(root, fid); 184 v9fs_fid_add(root, fid);
187 185
188 P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); 186 P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
189 simple_set_mnt(mnt, sb); 187 return dget(sb->s_root);
190 return 0;
191 188
192clunk_fid: 189clunk_fid:
193 p9_client_clunk(fid); 190 p9_client_clunk(fid);
194close_session: 191close_session:
195 v9fs_session_close(v9ses); 192 v9fs_session_close(v9ses);
196 kfree(v9ses); 193 kfree(v9ses);
197 return retval; 194 return ERR_PTR(retval);
195
198release_sb: 196release_sb:
199 /* 197 /*
200 * we will do the session_close and root dentry release 198 * we will do the session_close and root dentry release
@@ -204,7 +202,7 @@ release_sb:
204 */ 202 */
205 p9_client_clunk(fid); 203 p9_client_clunk(fid);
206 deactivate_locked_super(sb); 204 deactivate_locked_super(sb);
207 return retval; 205 return ERR_PTR(retval);
208} 206}
209 207
210/** 208/**
@@ -300,7 +298,7 @@ static const struct super_operations v9fs_super_ops_dotl = {
300 298
301struct file_system_type v9fs_fs_type = { 299struct file_system_type v9fs_fs_type = {
302 .name = "9p", 300 .name = "9p",
303 .get_sb = v9fs_get_sb, 301 .mount = v9fs_mount,
304 .kill_sb = v9fs_kill_super, 302 .kill_sb = v9fs_kill_super,
305 .owner = THIS_MODULE, 303 .owner = THIS_MODULE,
306 .fs_flags = FS_RENAME_DOES_D_MOVE, 304 .fs_flags = FS_RENAME_DOES_D_MOVE,
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index d9803f73236f..959dbff2d42d 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -490,17 +490,16 @@ error:
490 return -EINVAL; 490 return -EINVAL;
491} 491}
492 492
493static int adfs_get_sb(struct file_system_type *fs_type, 493static struct dentry *adfs_mount(struct file_system_type *fs_type,
494 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 494 int flags, const char *dev_name, void *data)
495{ 495{
496 return get_sb_bdev(fs_type, flags, dev_name, data, adfs_fill_super, 496 return mount_bdev(fs_type, flags, dev_name, data, adfs_fill_super);
497 mnt);
498} 497}
499 498
500static struct file_system_type adfs_fs_type = { 499static struct file_system_type adfs_fs_type = {
501 .owner = THIS_MODULE, 500 .owner = THIS_MODULE,
502 .name = "adfs", 501 .name = "adfs",
503 .get_sb = adfs_get_sb, 502 .mount = adfs_mount,
504 .kill_sb = kill_block_super, 503 .kill_sb = kill_block_super,
505 .fs_flags = FS_REQUIRES_DEV, 504 .fs_flags = FS_REQUIRES_DEV,
506}; 505};
diff --git a/fs/affs/super.c b/fs/affs/super.c
index fa4fbe1e238a..0cf7f4384cbd 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -573,17 +573,16 @@ affs_statfs(struct dentry *dentry, struct kstatfs *buf)
573 return 0; 573 return 0;
574} 574}
575 575
576static int affs_get_sb(struct file_system_type *fs_type, 576static struct dentry *affs_mount(struct file_system_type *fs_type,
577 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 577 int flags, const char *dev_name, void *data)
578{ 578{
579 return get_sb_bdev(fs_type, flags, dev_name, data, affs_fill_super, 579 return mount_bdev(fs_type, flags, dev_name, data, affs_fill_super);
580 mnt);
581} 580}
582 581
583static struct file_system_type affs_fs_type = { 582static struct file_system_type affs_fs_type = {
584 .owner = THIS_MODULE, 583 .owner = THIS_MODULE,
585 .name = "affs", 584 .name = "affs",
586 .get_sb = affs_get_sb, 585 .mount = affs_mount,
587 .kill_sb = kill_block_super, 586 .kill_sb = kill_block_super,
588 .fs_flags = FS_REQUIRES_DEV, 587 .fs_flags = FS_REQUIRES_DEV,
589}; 588};
diff --git a/fs/afs/super.c b/fs/afs/super.c
index eacf76d98ae0..27201cffece4 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -29,9 +29,8 @@
29#define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */ 29#define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */
30 30
31static void afs_i_init_once(void *foo); 31static void afs_i_init_once(void *foo);
32static int afs_get_sb(struct file_system_type *fs_type, 32static struct dentry *afs_mount(struct file_system_type *fs_type,
33 int flags, const char *dev_name, 33 int flags, const char *dev_name, void *data);
34 void *data, struct vfsmount *mnt);
35static struct inode *afs_alloc_inode(struct super_block *sb); 34static struct inode *afs_alloc_inode(struct super_block *sb);
36static void afs_put_super(struct super_block *sb); 35static void afs_put_super(struct super_block *sb);
37static void afs_destroy_inode(struct inode *inode); 36static void afs_destroy_inode(struct inode *inode);
@@ -40,7 +39,7 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf);
40struct file_system_type afs_fs_type = { 39struct file_system_type afs_fs_type = {
41 .owner = THIS_MODULE, 40 .owner = THIS_MODULE,
42 .name = "afs", 41 .name = "afs",
43 .get_sb = afs_get_sb, 42 .mount = afs_mount,
44 .kill_sb = kill_anon_super, 43 .kill_sb = kill_anon_super,
45 .fs_flags = 0, 44 .fs_flags = 0,
46}; 45};
@@ -359,11 +358,8 @@ error:
359/* 358/*
360 * get an AFS superblock 359 * get an AFS superblock
361 */ 360 */
362static int afs_get_sb(struct file_system_type *fs_type, 361static struct dentry *afs_mount(struct file_system_type *fs_type,
363 int flags, 362 int flags, const char *dev_name, void *options)
364 const char *dev_name,
365 void *options,
366 struct vfsmount *mnt)
367{ 363{
368 struct afs_mount_params params; 364 struct afs_mount_params params;
369 struct super_block *sb; 365 struct super_block *sb;
@@ -427,12 +423,11 @@ static int afs_get_sb(struct file_system_type *fs_type,
427 ASSERTCMP(sb->s_flags, &, MS_ACTIVE); 423 ASSERTCMP(sb->s_flags, &, MS_ACTIVE);
428 } 424 }
429 425
430 simple_set_mnt(mnt, sb);
431 afs_put_volume(params.volume); 426 afs_put_volume(params.volume);
432 afs_put_cell(params.cell); 427 afs_put_cell(params.cell);
433 kfree(new_opts); 428 kfree(new_opts);
434 _leave(" = 0 [%p]", sb); 429 _leave(" = 0 [%p]", sb);
435 return 0; 430 return dget(sb->s_root);
436 431
437error: 432error:
438 afs_put_volume(params.volume); 433 afs_put_volume(params.volume);
@@ -440,7 +435,7 @@ error:
440 key_put(params.key); 435 key_put(params.key);
441 kfree(new_opts); 436 kfree(new_opts);
442 _leave(" = %d", ret); 437 _leave(" = %d", ret);
443 return ret; 438 return ERR_PTR(ret);
444} 439}
445 440
446/* 441/*
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 5365527ca43f..57ce55b2564c 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -26,12 +26,10 @@ static struct vfsmount *anon_inode_mnt __read_mostly;
26static struct inode *anon_inode_inode; 26static struct inode *anon_inode_inode;
27static const struct file_operations anon_inode_fops; 27static const struct file_operations anon_inode_fops;
28 28
29static int anon_inodefs_get_sb(struct file_system_type *fs_type, int flags, 29static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
30 const char *dev_name, void *data, 30 int flags, const char *dev_name, void *data)
31 struct vfsmount *mnt)
32{ 31{
33 return get_sb_pseudo(fs_type, "anon_inode:", NULL, ANON_INODE_FS_MAGIC, 32 return mount_pseudo(fs_type, "anon_inode:", NULL, ANON_INODE_FS_MAGIC);
34 mnt);
35} 33}
36 34
37/* 35/*
@@ -45,7 +43,7 @@ static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen)
45 43
46static struct file_system_type anon_inode_fs_type = { 44static struct file_system_type anon_inode_fs_type = {
47 .name = "anon_inodefs", 45 .name = "anon_inodefs",
48 .get_sb = anon_inodefs_get_sb, 46 .mount = anon_inodefs_mount,
49 .kill_sb = kill_anon_super, 47 .kill_sb = kill_anon_super,
50}; 48};
51static const struct dentry_operations anon_inodefs_dentry_operations = { 49static const struct dentry_operations anon_inodefs_dentry_operations = {
diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c
index 9722e4bd8957..c038727b4050 100644
--- a/fs/autofs4/init.c
+++ b/fs/autofs4/init.c
@@ -14,16 +14,16 @@
14#include <linux/init.h> 14#include <linux/init.h>
15#include "autofs_i.h" 15#include "autofs_i.h"
16 16
17static int autofs_get_sb(struct file_system_type *fs_type, 17static struct dentry *autofs_mount(struct file_system_type *fs_type,
18 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 18 int flags, const char *dev_name, void *data)
19{ 19{
20 return get_sb_nodev(fs_type, flags, data, autofs4_fill_super, mnt); 20 return mount_nodev(fs_type, flags, data, autofs4_fill_super);
21} 21}
22 22
23static struct file_system_type autofs_fs_type = { 23static struct file_system_type autofs_fs_type = {
24 .owner = THIS_MODULE, 24 .owner = THIS_MODULE,
25 .name = "autofs", 25 .name = "autofs",
26 .get_sb = autofs_get_sb, 26 .mount = autofs_mount,
27 .kill_sb = autofs4_kill_sb, 27 .kill_sb = autofs4_kill_sb,
28}; 28};
29 29
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index dc39d2824885..aa4e7c7ae3c6 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -913,18 +913,17 @@ befs_statfs(struct dentry *dentry, struct kstatfs *buf)
913 return 0; 913 return 0;
914} 914}
915 915
916static int 916static struct dentry *
917befs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, 917befs_mount(struct file_system_type *fs_type, int flags, const char *dev_name,
918 void *data, struct vfsmount *mnt) 918 void *data)
919{ 919{
920 return get_sb_bdev(fs_type, flags, dev_name, data, befs_fill_super, 920 return mount_bdev(fs_type, flags, dev_name, data, befs_fill_super);
921 mnt);
922} 921}
923 922
924static struct file_system_type befs_fs_type = { 923static struct file_system_type befs_fs_type = {
925 .owner = THIS_MODULE, 924 .owner = THIS_MODULE,
926 .name = "befs", 925 .name = "befs",
927 .get_sb = befs_get_sb, 926 .mount = befs_mount,
928 .kill_sb = kill_block_super, 927 .kill_sb = kill_block_super,
929 .fs_flags = FS_REQUIRES_DEV, 928 .fs_flags = FS_REQUIRES_DEV,
930}; 929};
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 883e77acd5a8..76db6d7d49bb 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -450,16 +450,16 @@ out:
450 return ret; 450 return ret;
451} 451}
452 452
453static int bfs_get_sb(struct file_system_type *fs_type, 453static struct dentry *bfs_mount(struct file_system_type *fs_type,
454 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 454 int flags, const char *dev_name, void *data)
455{ 455{
456 return get_sb_bdev(fs_type, flags, dev_name, data, bfs_fill_super, mnt); 456 return mount_bdev(fs_type, flags, dev_name, data, bfs_fill_super);
457} 457}
458 458
459static struct file_system_type bfs_fs_type = { 459static struct file_system_type bfs_fs_type = {
460 .owner = THIS_MODULE, 460 .owner = THIS_MODULE,
461 .name = "bfs", 461 .name = "bfs",
462 .get_sb = bfs_get_sb, 462 .mount = bfs_mount,
463 .kill_sb = kill_block_super, 463 .kill_sb = kill_block_super,
464 .fs_flags = FS_REQUIRES_DEV, 464 .fs_flags = FS_REQUIRES_DEV,
465}; 465};
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 29990f0eee0c..1befe2ec8186 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -706,10 +706,10 @@ static int bm_fill_super(struct super_block * sb, void * data, int silent)
706 return err; 706 return err;
707} 707}
708 708
709static int bm_get_sb(struct file_system_type *fs_type, 709static struct dentry *bm_mount(struct file_system_type *fs_type,
710 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 710 int flags, const char *dev_name, void *data)
711{ 711{
712 return get_sb_single(fs_type, flags, data, bm_fill_super, mnt); 712 return mount_single(fs_type, flags, data, bm_fill_super);
713} 713}
714 714
715static struct linux_binfmt misc_format = { 715static struct linux_binfmt misc_format = {
@@ -720,7 +720,7 @@ static struct linux_binfmt misc_format = {
720static struct file_system_type bm_fs_type = { 720static struct file_system_type bm_fs_type = {
721 .owner = THIS_MODULE, 721 .owner = THIS_MODULE,
722 .name = "binfmt_misc", 722 .name = "binfmt_misc",
723 .get_sb = bm_get_sb, 723 .mount = bm_mount,
724 .kill_sb = kill_litter_super, 724 .kill_sb = kill_litter_super,
725}; 725};
726 726
diff --git a/fs/bio.c b/fs/bio.c
index 8abb2dfb2e7c..4bd454fa844e 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -370,6 +370,9 @@ struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs)
370{ 370{
371 struct bio *bio; 371 struct bio *bio;
372 372
373 if (nr_iovecs > UIO_MAXIOV)
374 return NULL;
375
373 bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec), 376 bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec),
374 gfp_mask); 377 gfp_mask);
375 if (unlikely(!bio)) 378 if (unlikely(!bio))
@@ -697,8 +700,12 @@ static void bio_free_map_data(struct bio_map_data *bmd)
697static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count, 700static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count,
698 gfp_t gfp_mask) 701 gfp_t gfp_mask)
699{ 702{
700 struct bio_map_data *bmd = kmalloc(sizeof(*bmd), gfp_mask); 703 struct bio_map_data *bmd;
701 704
705 if (iov_count > UIO_MAXIOV)
706 return NULL;
707
708 bmd = kmalloc(sizeof(*bmd), gfp_mask);
702 if (!bmd) 709 if (!bmd)
703 return NULL; 710 return NULL;
704 711
@@ -827,6 +834,12 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
827 end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT; 834 end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
828 start = uaddr >> PAGE_SHIFT; 835 start = uaddr >> PAGE_SHIFT;
829 836
837 /*
838 * Overflow, abort
839 */
840 if (end < start)
841 return ERR_PTR(-EINVAL);
842
830 nr_pages += end - start; 843 nr_pages += end - start;
831 len += iov[i].iov_len; 844 len += iov[i].iov_len;
832 } 845 }
@@ -955,6 +968,12 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
955 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 968 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
956 unsigned long start = uaddr >> PAGE_SHIFT; 969 unsigned long start = uaddr >> PAGE_SHIFT;
957 970
971 /*
972 * Overflow, abort
973 */
974 if (end < start)
975 return ERR_PTR(-EINVAL);
976
958 nr_pages += end - start; 977 nr_pages += end - start;
959 /* 978 /*
960 * buffer must be aligned to at least hardsector size for now 979 * buffer must be aligned to at least hardsector size for now
@@ -982,7 +1001,7 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
982 unsigned long start = uaddr >> PAGE_SHIFT; 1001 unsigned long start = uaddr >> PAGE_SHIFT;
983 const int local_nr_pages = end - start; 1002 const int local_nr_pages = end - start;
984 const int page_limit = cur_page + local_nr_pages; 1003 const int page_limit = cur_page + local_nr_pages;
985 1004
986 ret = get_user_pages_fast(uaddr, local_nr_pages, 1005 ret = get_user_pages_fast(uaddr, local_nr_pages,
987 write_to_vm, &pages[cur_page]); 1006 write_to_vm, &pages[cur_page]);
988 if (ret < local_nr_pages) { 1007 if (ret < local_nr_pages) {
diff --git a/fs/block_dev.c b/fs/block_dev.c
index dea3b628a6ce..06e8ff12b97c 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -464,15 +464,15 @@ static const struct super_operations bdev_sops = {
464 .evict_inode = bdev_evict_inode, 464 .evict_inode = bdev_evict_inode,
465}; 465};
466 466
467static int bd_get_sb(struct file_system_type *fs_type, 467static struct dentry *bd_mount(struct file_system_type *fs_type,
468 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 468 int flags, const char *dev_name, void *data)
469{ 469{
470 return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576, mnt); 470 return mount_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576);
471} 471}
472 472
473static struct file_system_type bd_type = { 473static struct file_system_type bd_type = {
474 .name = "bdev", 474 .name = "bdev",
475 .get_sb = bd_get_sb, 475 .mount = bd_mount,
476 .kill_sb = kill_anon_super, 476 .kill_sb = kill_anon_super,
477}; 477};
478 478
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 396039b3a8a2..7845d1f7d1d9 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -163,7 +163,6 @@ fail:
163 */ 163 */
164static void end_compressed_bio_read(struct bio *bio, int err) 164static void end_compressed_bio_read(struct bio *bio, int err)
165{ 165{
166 struct extent_io_tree *tree;
167 struct compressed_bio *cb = bio->bi_private; 166 struct compressed_bio *cb = bio->bi_private;
168 struct inode *inode; 167 struct inode *inode;
169 struct page *page; 168 struct page *page;
@@ -187,7 +186,6 @@ static void end_compressed_bio_read(struct bio *bio, int err)
187 /* ok, we're the last bio for this extent, lets start 186 /* ok, we're the last bio for this extent, lets start
188 * the decompression. 187 * the decompression.
189 */ 188 */
190 tree = &BTRFS_I(inode)->io_tree;
191 ret = btrfs_zlib_decompress_biovec(cb->compressed_pages, 189 ret = btrfs_zlib_decompress_biovec(cb->compressed_pages,
192 cb->start, 190 cb->start,
193 cb->orig_bio->bi_io_vec, 191 cb->orig_bio->bi_io_vec,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index c3df14ce2cc2..9ac171599258 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -200,7 +200,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
200 struct extent_buffer **cow_ret, u64 new_root_objectid) 200 struct extent_buffer **cow_ret, u64 new_root_objectid)
201{ 201{
202 struct extent_buffer *cow; 202 struct extent_buffer *cow;
203 u32 nritems;
204 int ret = 0; 203 int ret = 0;
205 int level; 204 int level;
206 struct btrfs_disk_key disk_key; 205 struct btrfs_disk_key disk_key;
@@ -210,7 +209,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
210 WARN_ON(root->ref_cows && trans->transid != root->last_trans); 209 WARN_ON(root->ref_cows && trans->transid != root->last_trans);
211 210
212 level = btrfs_header_level(buf); 211 level = btrfs_header_level(buf);
213 nritems = btrfs_header_nritems(buf);
214 if (level == 0) 212 if (level == 0)
215 btrfs_item_key(buf, &disk_key, 0); 213 btrfs_item_key(buf, &disk_key, 0);
216 else 214 else
@@ -1008,7 +1006,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1008 int wret; 1006 int wret;
1009 int pslot; 1007 int pslot;
1010 int orig_slot = path->slots[level]; 1008 int orig_slot = path->slots[level];
1011 int err_on_enospc = 0;
1012 u64 orig_ptr; 1009 u64 orig_ptr;
1013 1010
1014 if (level == 0) 1011 if (level == 0)
@@ -1071,8 +1068,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1071 BTRFS_NODEPTRS_PER_BLOCK(root) / 4) 1068 BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
1072 return 0; 1069 return 0;
1073 1070
1074 if (btrfs_header_nritems(mid) < 2) 1071 btrfs_header_nritems(mid);
1075 err_on_enospc = 1;
1076 1072
1077 left = read_node_slot(root, parent, pslot - 1); 1073 left = read_node_slot(root, parent, pslot - 1);
1078 if (left) { 1074 if (left) {
@@ -1103,8 +1099,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1103 wret = push_node_left(trans, root, left, mid, 1); 1099 wret = push_node_left(trans, root, left, mid, 1);
1104 if (wret < 0) 1100 if (wret < 0)
1105 ret = wret; 1101 ret = wret;
1106 if (btrfs_header_nritems(mid) < 2) 1102 btrfs_header_nritems(mid);
1107 err_on_enospc = 1;
1108 } 1103 }
1109 1104
1110 /* 1105 /*
@@ -1224,14 +1219,12 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
1224 int wret; 1219 int wret;
1225 int pslot; 1220 int pslot;
1226 int orig_slot = path->slots[level]; 1221 int orig_slot = path->slots[level];
1227 u64 orig_ptr;
1228 1222
1229 if (level == 0) 1223 if (level == 0)
1230 return 1; 1224 return 1;
1231 1225
1232 mid = path->nodes[level]; 1226 mid = path->nodes[level];
1233 WARN_ON(btrfs_header_generation(mid) != trans->transid); 1227 WARN_ON(btrfs_header_generation(mid) != trans->transid);
1234 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
1235 1228
1236 if (level < BTRFS_MAX_LEVEL - 1) 1229 if (level < BTRFS_MAX_LEVEL - 1)
1237 parent = path->nodes[level + 1]; 1230 parent = path->nodes[level + 1];
@@ -1577,13 +1570,33 @@ read_block_for_search(struct btrfs_trans_handle *trans,
1577 blocksize = btrfs_level_size(root, level - 1); 1570 blocksize = btrfs_level_size(root, level - 1);
1578 1571
1579 tmp = btrfs_find_tree_block(root, blocknr, blocksize); 1572 tmp = btrfs_find_tree_block(root, blocknr, blocksize);
1580 if (tmp && btrfs_buffer_uptodate(tmp, gen)) { 1573 if (tmp) {
1581 /* 1574 if (btrfs_buffer_uptodate(tmp, 0)) {
1582 * we found an up to date block without sleeping, return 1575 if (btrfs_buffer_uptodate(tmp, gen)) {
1583 * right away 1576 /*
1584 */ 1577 * we found an up to date block without
1585 *eb_ret = tmp; 1578 * sleeping, return
1586 return 0; 1579 * right away
1580 */
1581 *eb_ret = tmp;
1582 return 0;
1583 }
1584 /* the pages were up to date, but we failed
1585 * the generation number check. Do a full
1586 * read for the generation number that is correct.
1587 * We must do this without dropping locks so
1588 * we can trust our generation number
1589 */
1590 free_extent_buffer(tmp);
1591 tmp = read_tree_block(root, blocknr, blocksize, gen);
1592 if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
1593 *eb_ret = tmp;
1594 return 0;
1595 }
1596 free_extent_buffer(tmp);
1597 btrfs_release_path(NULL, p);
1598 return -EIO;
1599 }
1587 } 1600 }
1588 1601
1589 /* 1602 /*
@@ -1596,8 +1609,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
1596 btrfs_unlock_up_safe(p, level + 1); 1609 btrfs_unlock_up_safe(p, level + 1);
1597 btrfs_set_path_blocking(p); 1610 btrfs_set_path_blocking(p);
1598 1611
1599 if (tmp) 1612 free_extent_buffer(tmp);
1600 free_extent_buffer(tmp);
1601 if (p->reada) 1613 if (p->reada)
1602 reada_for_search(root, p, level, slot, key->objectid); 1614 reada_for_search(root, p, level, slot, key->objectid);
1603 1615
@@ -2548,7 +2560,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2548{ 2560{
2549 struct btrfs_disk_key disk_key; 2561 struct btrfs_disk_key disk_key;
2550 struct extent_buffer *right = path->nodes[0]; 2562 struct extent_buffer *right = path->nodes[0];
2551 int slot;
2552 int i; 2563 int i;
2553 int push_space = 0; 2564 int push_space = 0;
2554 int push_items = 0; 2565 int push_items = 0;
@@ -2560,8 +2571,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2560 u32 this_item_size; 2571 u32 this_item_size;
2561 u32 old_left_item_size; 2572 u32 old_left_item_size;
2562 2573
2563 slot = path->slots[1];
2564
2565 if (empty) 2574 if (empty)
2566 nr = min(right_nritems, max_slot); 2575 nr = min(right_nritems, max_slot);
2567 else 2576 else
@@ -3330,7 +3339,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
3330{ 3339{
3331 int ret = 0; 3340 int ret = 0;
3332 int slot; 3341 int slot;
3333 int slot_orig;
3334 struct extent_buffer *leaf; 3342 struct extent_buffer *leaf;
3335 struct btrfs_item *item; 3343 struct btrfs_item *item;
3336 u32 nritems; 3344 u32 nritems;
@@ -3340,7 +3348,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
3340 unsigned int size_diff; 3348 unsigned int size_diff;
3341 int i; 3349 int i;
3342 3350
3343 slot_orig = path->slots[0];
3344 leaf = path->nodes[0]; 3351 leaf = path->nodes[0];
3345 slot = path->slots[0]; 3352 slot = path->slots[0];
3346 3353
@@ -3445,7 +3452,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
3445{ 3452{
3446 int ret = 0; 3453 int ret = 0;
3447 int slot; 3454 int slot;
3448 int slot_orig;
3449 struct extent_buffer *leaf; 3455 struct extent_buffer *leaf;
3450 struct btrfs_item *item; 3456 struct btrfs_item *item;
3451 u32 nritems; 3457 u32 nritems;
@@ -3454,7 +3460,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
3454 unsigned int old_size; 3460 unsigned int old_size;
3455 int i; 3461 int i;
3456 3462
3457 slot_orig = path->slots[0];
3458 leaf = path->nodes[0]; 3463 leaf = path->nodes[0];
3459 3464
3460 nritems = btrfs_header_nritems(leaf); 3465 nritems = btrfs_header_nritems(leaf);
@@ -3787,7 +3792,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
3787 struct btrfs_key *cpu_key, u32 *data_size, 3792 struct btrfs_key *cpu_key, u32 *data_size,
3788 int nr) 3793 int nr)
3789{ 3794{
3790 struct extent_buffer *leaf;
3791 int ret = 0; 3795 int ret = 0;
3792 int slot; 3796 int slot;
3793 int i; 3797 int i;
@@ -3804,7 +3808,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
3804 if (ret < 0) 3808 if (ret < 0)
3805 goto out; 3809 goto out;
3806 3810
3807 leaf = path->nodes[0];
3808 slot = path->slots[0]; 3811 slot = path->slots[0];
3809 BUG_ON(slot < 0); 3812 BUG_ON(slot < 0);
3810 3813
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index eaf286abad17..8db9234f6b41 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -99,6 +99,9 @@ struct btrfs_ordered_sum;
99 */ 99 */
100#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL 100#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
101 101
102/* For storing free space cache */
103#define BTRFS_FREE_SPACE_OBJECTID -11ULL
104
102/* dummy objectid represents multiple objectids */ 105/* dummy objectid represents multiple objectids */
103#define BTRFS_MULTIPLE_OBJECTIDS -255ULL 106#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
104 107
@@ -265,6 +268,22 @@ struct btrfs_chunk {
265 /* additional stripes go here */ 268 /* additional stripes go here */
266} __attribute__ ((__packed__)); 269} __attribute__ ((__packed__));
267 270
271#define BTRFS_FREE_SPACE_EXTENT 1
272#define BTRFS_FREE_SPACE_BITMAP 2
273
274struct btrfs_free_space_entry {
275 __le64 offset;
276 __le64 bytes;
277 u8 type;
278} __attribute__ ((__packed__));
279
280struct btrfs_free_space_header {
281 struct btrfs_disk_key location;
282 __le64 generation;
283 __le64 num_entries;
284 __le64 num_bitmaps;
285} __attribute__ ((__packed__));
286
268static inline unsigned long btrfs_chunk_item_size(int num_stripes) 287static inline unsigned long btrfs_chunk_item_size(int num_stripes)
269{ 288{
270 BUG_ON(num_stripes == 0); 289 BUG_ON(num_stripes == 0);
@@ -365,8 +384,10 @@ struct btrfs_super_block {
365 384
366 char label[BTRFS_LABEL_SIZE]; 385 char label[BTRFS_LABEL_SIZE];
367 386
387 __le64 cache_generation;
388
368 /* future expansion */ 389 /* future expansion */
369 __le64 reserved[32]; 390 __le64 reserved[31];
370 u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; 391 u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
371} __attribute__ ((__packed__)); 392} __attribute__ ((__packed__));
372 393
@@ -375,13 +396,15 @@ struct btrfs_super_block {
375 * ones specified below then we will fail to mount 396 * ones specified below then we will fail to mount
376 */ 397 */
377#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) 398#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
378#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (2ULL << 0) 399#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
400#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2)
379 401
380#define BTRFS_FEATURE_COMPAT_SUPP 0ULL 402#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
381#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL 403#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
382#define BTRFS_FEATURE_INCOMPAT_SUPP \ 404#define BTRFS_FEATURE_INCOMPAT_SUPP \
383 (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ 405 (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
384 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL) 406 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
407 BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
385 408
386/* 409/*
387 * A leaf is full of items. offset and size tell us where to find 410 * A leaf is full of items. offset and size tell us where to find
@@ -675,7 +698,8 @@ struct btrfs_block_group_item {
675struct btrfs_space_info { 698struct btrfs_space_info {
676 u64 flags; 699 u64 flags;
677 700
678 u64 total_bytes; /* total bytes in the space */ 701 u64 total_bytes; /* total bytes in the space,
702 this doesn't take mirrors into account */
679 u64 bytes_used; /* total bytes used, 703 u64 bytes_used; /* total bytes used,
680 this does't take mirrors into account */ 704 this does't take mirrors into account */
681 u64 bytes_pinned; /* total bytes pinned, will be freed when the 705 u64 bytes_pinned; /* total bytes pinned, will be freed when the
@@ -687,6 +711,8 @@ struct btrfs_space_info {
687 u64 bytes_may_use; /* number of bytes that may be used for 711 u64 bytes_may_use; /* number of bytes that may be used for
688 delalloc/allocations */ 712 delalloc/allocations */
689 u64 disk_used; /* total bytes used on disk */ 713 u64 disk_used; /* total bytes used on disk */
714 u64 disk_total; /* total bytes on disk, takes mirrors into
715 account */
690 716
691 int full; /* indicates that we cannot allocate any more 717 int full; /* indicates that we cannot allocate any more
692 chunks for this space */ 718 chunks for this space */
@@ -750,6 +776,14 @@ enum btrfs_caching_type {
750 BTRFS_CACHE_FINISHED = 2, 776 BTRFS_CACHE_FINISHED = 2,
751}; 777};
752 778
779enum btrfs_disk_cache_state {
780 BTRFS_DC_WRITTEN = 0,
781 BTRFS_DC_ERROR = 1,
782 BTRFS_DC_CLEAR = 2,
783 BTRFS_DC_SETUP = 3,
784 BTRFS_DC_NEED_WRITE = 4,
785};
786
753struct btrfs_caching_control { 787struct btrfs_caching_control {
754 struct list_head list; 788 struct list_head list;
755 struct mutex mutex; 789 struct mutex mutex;
@@ -763,6 +797,7 @@ struct btrfs_block_group_cache {
763 struct btrfs_key key; 797 struct btrfs_key key;
764 struct btrfs_block_group_item item; 798 struct btrfs_block_group_item item;
765 struct btrfs_fs_info *fs_info; 799 struct btrfs_fs_info *fs_info;
800 struct inode *inode;
766 spinlock_t lock; 801 spinlock_t lock;
767 u64 pinned; 802 u64 pinned;
768 u64 reserved; 803 u64 reserved;
@@ -773,8 +808,11 @@ struct btrfs_block_group_cache {
773 int extents_thresh; 808 int extents_thresh;
774 int free_extents; 809 int free_extents;
775 int total_bitmaps; 810 int total_bitmaps;
776 int ro; 811 int ro:1;
777 int dirty; 812 int dirty:1;
813 int iref:1;
814
815 int disk_cache_state;
778 816
779 /* cache tracking stuff */ 817 /* cache tracking stuff */
780 int cached; 818 int cached;
@@ -863,6 +901,7 @@ struct btrfs_fs_info {
863 struct btrfs_transaction *running_transaction; 901 struct btrfs_transaction *running_transaction;
864 wait_queue_head_t transaction_throttle; 902 wait_queue_head_t transaction_throttle;
865 wait_queue_head_t transaction_wait; 903 wait_queue_head_t transaction_wait;
904 wait_queue_head_t transaction_blocked_wait;
866 wait_queue_head_t async_submit_wait; 905 wait_queue_head_t async_submit_wait;
867 906
868 struct btrfs_super_block super_copy; 907 struct btrfs_super_block super_copy;
@@ -949,6 +988,7 @@ struct btrfs_fs_info {
949 struct btrfs_workers endio_meta_workers; 988 struct btrfs_workers endio_meta_workers;
950 struct btrfs_workers endio_meta_write_workers; 989 struct btrfs_workers endio_meta_write_workers;
951 struct btrfs_workers endio_write_workers; 990 struct btrfs_workers endio_write_workers;
991 struct btrfs_workers endio_freespace_worker;
952 struct btrfs_workers submit_workers; 992 struct btrfs_workers submit_workers;
953 /* 993 /*
954 * fixup workers take dirty pages that didn't properly go through 994 * fixup workers take dirty pages that didn't properly go through
@@ -1192,6 +1232,9 @@ struct btrfs_root {
1192#define BTRFS_MOUNT_NOSSD (1 << 9) 1232#define BTRFS_MOUNT_NOSSD (1 << 9)
1193#define BTRFS_MOUNT_DISCARD (1 << 10) 1233#define BTRFS_MOUNT_DISCARD (1 << 10)
1194#define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11) 1234#define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11)
1235#define BTRFS_MOUNT_SPACE_CACHE (1 << 12)
1236#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13)
1237#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
1195 1238
1196#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 1239#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1197#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 1240#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -1665,6 +1708,27 @@ static inline void btrfs_set_dir_item_key(struct extent_buffer *eb,
1665 write_eb_member(eb, item, struct btrfs_dir_item, location, key); 1708 write_eb_member(eb, item, struct btrfs_dir_item, location, key);
1666} 1709}
1667 1710
1711BTRFS_SETGET_FUNCS(free_space_entries, struct btrfs_free_space_header,
1712 num_entries, 64);
1713BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header,
1714 num_bitmaps, 64);
1715BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header,
1716 generation, 64);
1717
1718static inline void btrfs_free_space_key(struct extent_buffer *eb,
1719 struct btrfs_free_space_header *h,
1720 struct btrfs_disk_key *key)
1721{
1722 read_eb_member(eb, h, struct btrfs_free_space_header, location, key);
1723}
1724
1725static inline void btrfs_set_free_space_key(struct extent_buffer *eb,
1726 struct btrfs_free_space_header *h,
1727 struct btrfs_disk_key *key)
1728{
1729 write_eb_member(eb, h, struct btrfs_free_space_header, location, key);
1730}
1731
1668/* struct btrfs_disk_key */ 1732/* struct btrfs_disk_key */
1669BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key, 1733BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key,
1670 objectid, 64); 1734 objectid, 64);
@@ -1876,6 +1940,8 @@ BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block,
1876 incompat_flags, 64); 1940 incompat_flags, 64);
1877BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block, 1941BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block,
1878 csum_type, 16); 1942 csum_type, 16);
1943BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
1944 cache_generation, 64);
1879 1945
1880static inline int btrfs_super_csum_size(struct btrfs_super_block *s) 1946static inline int btrfs_super_csum_size(struct btrfs_super_block *s)
1881{ 1947{
@@ -1988,6 +2054,12 @@ static inline struct dentry *fdentry(struct file *file)
1988 return file->f_path.dentry; 2054 return file->f_path.dentry;
1989} 2055}
1990 2056
2057static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
2058{
2059 return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) &&
2060 (space_info->flags & BTRFS_BLOCK_GROUP_DATA));
2061}
2062
1991/* extent-tree.c */ 2063/* extent-tree.c */
1992void btrfs_put_block_group(struct btrfs_block_group_cache *cache); 2064void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
1993int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, 2065int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
@@ -2079,7 +2151,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
2079void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); 2151void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
2080int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, 2152int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
2081 struct btrfs_root *root, 2153 struct btrfs_root *root,
2082 int num_items, int *retries); 2154 int num_items);
2083void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, 2155void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
2084 struct btrfs_root *root); 2156 struct btrfs_root *root);
2085int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, 2157int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
@@ -2100,7 +2172,7 @@ void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
2100int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, 2172int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
2101 struct btrfs_root *root, 2173 struct btrfs_root *root,
2102 struct btrfs_block_rsv *block_rsv, 2174 struct btrfs_block_rsv *block_rsv,
2103 u64 num_bytes, int *retries); 2175 u64 num_bytes);
2104int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, 2176int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
2105 struct btrfs_root *root, 2177 struct btrfs_root *root,
2106 struct btrfs_block_rsv *block_rsv, 2178 struct btrfs_block_rsv *block_rsv,
@@ -2115,6 +2187,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
2115 struct btrfs_block_group_cache *cache); 2187 struct btrfs_block_group_cache *cache);
2116int btrfs_set_block_group_rw(struct btrfs_root *root, 2188int btrfs_set_block_group_rw(struct btrfs_root *root,
2117 struct btrfs_block_group_cache *cache); 2189 struct btrfs_block_group_cache *cache);
2190void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
2118/* ctree.c */ 2191/* ctree.c */
2119int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 2192int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
2120 int level, int *slot); 2193 int level, int *slot);
@@ -2373,7 +2446,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2373 u32 min_type); 2446 u32 min_type);
2374 2447
2375int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); 2448int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
2376int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput); 2449int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
2450 int sync);
2377int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, 2451int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
2378 struct extent_state **cached_state); 2452 struct extent_state **cached_state);
2379int btrfs_writepages(struct address_space *mapping, 2453int btrfs_writepages(struct address_space *mapping,
@@ -2426,6 +2500,10 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root);
2426int btrfs_prealloc_file_range(struct inode *inode, int mode, 2500int btrfs_prealloc_file_range(struct inode *inode, int mode,
2427 u64 start, u64 num_bytes, u64 min_size, 2501 u64 start, u64 num_bytes, u64 min_size,
2428 loff_t actual_len, u64 *alloc_hint); 2502 loff_t actual_len, u64 *alloc_hint);
2503int btrfs_prealloc_file_range_trans(struct inode *inode,
2504 struct btrfs_trans_handle *trans, int mode,
2505 u64 start, u64 num_bytes, u64 min_size,
2506 loff_t actual_len, u64 *alloc_hint);
2429extern const struct dentry_operations btrfs_dentry_operations; 2507extern const struct dentry_operations btrfs_dentry_operations;
2430 2508
2431/* ioctl.c */ 2509/* ioctl.c */
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index e9103b3baa49..f0cad5ae5be7 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -427,5 +427,5 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
427 ret = btrfs_truncate_item(trans, root, path, 427 ret = btrfs_truncate_item(trans, root, path,
428 item_len - sub_item_len, 1); 428 item_len - sub_item_len, 1);
429 } 429 }
430 return 0; 430 return ret;
431} 431}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5e789f4a3ed0..fb827d0d7181 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -338,7 +338,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
338 struct extent_io_tree *tree; 338 struct extent_io_tree *tree;
339 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 339 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
340 u64 found_start; 340 u64 found_start;
341 int found_level;
342 unsigned long len; 341 unsigned long len;
343 struct extent_buffer *eb; 342 struct extent_buffer *eb;
344 int ret; 343 int ret;
@@ -369,8 +368,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
369 WARN_ON(1); 368 WARN_ON(1);
370 goto err; 369 goto err;
371 } 370 }
372 found_level = btrfs_header_level(eb);
373
374 csum_tree_block(root, eb, 0); 371 csum_tree_block(root, eb, 0);
375err: 372err:
376 free_extent_buffer(eb); 373 free_extent_buffer(eb);
@@ -481,9 +478,12 @@ static void end_workqueue_bio(struct bio *bio, int err)
481 end_io_wq->work.flags = 0; 478 end_io_wq->work.flags = 0;
482 479
483 if (bio->bi_rw & REQ_WRITE) { 480 if (bio->bi_rw & REQ_WRITE) {
484 if (end_io_wq->metadata) 481 if (end_io_wq->metadata == 1)
485 btrfs_queue_worker(&fs_info->endio_meta_write_workers, 482 btrfs_queue_worker(&fs_info->endio_meta_write_workers,
486 &end_io_wq->work); 483 &end_io_wq->work);
484 else if (end_io_wq->metadata == 2)
485 btrfs_queue_worker(&fs_info->endio_freespace_worker,
486 &end_io_wq->work);
487 else 487 else
488 btrfs_queue_worker(&fs_info->endio_write_workers, 488 btrfs_queue_worker(&fs_info->endio_write_workers,
489 &end_io_wq->work); 489 &end_io_wq->work);
@@ -497,6 +497,13 @@ static void end_workqueue_bio(struct bio *bio, int err)
497 } 497 }
498} 498}
499 499
500/*
501 * For the metadata arg you want
502 *
503 * 0 - if data
504 * 1 - if normal metadta
505 * 2 - if writing to the free space cache area
506 */
500int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, 507int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
501 int metadata) 508 int metadata)
502{ 509{
@@ -533,11 +540,9 @@ int btrfs_congested_async(struct btrfs_fs_info *info, int iodone)
533 540
534static void run_one_async_start(struct btrfs_work *work) 541static void run_one_async_start(struct btrfs_work *work)
535{ 542{
536 struct btrfs_fs_info *fs_info;
537 struct async_submit_bio *async; 543 struct async_submit_bio *async;
538 544
539 async = container_of(work, struct async_submit_bio, work); 545 async = container_of(work, struct async_submit_bio, work);
540 fs_info = BTRFS_I(async->inode)->root->fs_info;
541 async->submit_bio_start(async->inode, async->rw, async->bio, 546 async->submit_bio_start(async->inode, async->rw, async->bio,
542 async->mirror_num, async->bio_flags, 547 async->mirror_num, async->bio_flags,
543 async->bio_offset); 548 async->bio_offset);
@@ -850,12 +855,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
850 u32 blocksize, u64 parent_transid) 855 u32 blocksize, u64 parent_transid)
851{ 856{
852 struct extent_buffer *buf = NULL; 857 struct extent_buffer *buf = NULL;
853 struct inode *btree_inode = root->fs_info->btree_inode;
854 struct extent_io_tree *io_tree;
855 int ret; 858 int ret;
856 859
857 io_tree = &BTRFS_I(btree_inode)->io_tree;
858
859 buf = btrfs_find_create_tree_block(root, bytenr, blocksize); 860 buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
860 if (!buf) 861 if (!buf)
861 return NULL; 862 return NULL;
@@ -1377,7 +1378,6 @@ static int bio_ready_for_csum(struct bio *bio)
1377 u64 start = 0; 1378 u64 start = 0;
1378 struct page *page; 1379 struct page *page;
1379 struct extent_io_tree *io_tree = NULL; 1380 struct extent_io_tree *io_tree = NULL;
1380 struct btrfs_fs_info *info = NULL;
1381 struct bio_vec *bvec; 1381 struct bio_vec *bvec;
1382 int i; 1382 int i;
1383 int ret; 1383 int ret;
@@ -1396,7 +1396,6 @@ static int bio_ready_for_csum(struct bio *bio)
1396 buf_len = page->private >> 2; 1396 buf_len = page->private >> 2;
1397 start = page_offset(page) + bvec->bv_offset; 1397 start = page_offset(page) + bvec->bv_offset;
1398 io_tree = &BTRFS_I(page->mapping->host)->io_tree; 1398 io_tree = &BTRFS_I(page->mapping->host)->io_tree;
1399 info = BTRFS_I(page->mapping->host)->root->fs_info;
1400 } 1399 }
1401 /* are we fully contained in this bio? */ 1400 /* are we fully contained in this bio? */
1402 if (buf_len <= length) 1401 if (buf_len <= length)
@@ -1680,12 +1679,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1680 1679
1681 init_waitqueue_head(&fs_info->transaction_throttle); 1680 init_waitqueue_head(&fs_info->transaction_throttle);
1682 init_waitqueue_head(&fs_info->transaction_wait); 1681 init_waitqueue_head(&fs_info->transaction_wait);
1682 init_waitqueue_head(&fs_info->transaction_blocked_wait);
1683 init_waitqueue_head(&fs_info->async_submit_wait); 1683 init_waitqueue_head(&fs_info->async_submit_wait);
1684 1684
1685 __setup_root(4096, 4096, 4096, 4096, tree_root, 1685 __setup_root(4096, 4096, 4096, 4096, tree_root,
1686 fs_info, BTRFS_ROOT_TREE_OBJECTID); 1686 fs_info, BTRFS_ROOT_TREE_OBJECTID);
1687 1687
1688
1689 bh = btrfs_read_dev_super(fs_devices->latest_bdev); 1688 bh = btrfs_read_dev_super(fs_devices->latest_bdev);
1690 if (!bh) 1689 if (!bh)
1691 goto fail_iput; 1690 goto fail_iput;
@@ -1775,6 +1774,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1775 btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", 1774 btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
1776 fs_info->thread_pool_size, 1775 fs_info->thread_pool_size,
1777 &fs_info->generic_worker); 1776 &fs_info->generic_worker);
1777 btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write",
1778 1, &fs_info->generic_worker);
1778 1779
1779 /* 1780 /*
1780 * endios are largely parallel and should have a very 1781 * endios are largely parallel and should have a very
@@ -1795,6 +1796,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1795 btrfs_start_workers(&fs_info->endio_meta_workers, 1); 1796 btrfs_start_workers(&fs_info->endio_meta_workers, 1);
1796 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); 1797 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
1797 btrfs_start_workers(&fs_info->endio_write_workers, 1); 1798 btrfs_start_workers(&fs_info->endio_write_workers, 1);
1799 btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
1798 1800
1799 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); 1801 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
1800 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, 1802 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -1993,6 +1995,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1993 if (!(sb->s_flags & MS_RDONLY)) { 1995 if (!(sb->s_flags & MS_RDONLY)) {
1994 down_read(&fs_info->cleanup_work_sem); 1996 down_read(&fs_info->cleanup_work_sem);
1995 btrfs_orphan_cleanup(fs_info->fs_root); 1997 btrfs_orphan_cleanup(fs_info->fs_root);
1998 btrfs_orphan_cleanup(fs_info->tree_root);
1996 up_read(&fs_info->cleanup_work_sem); 1999 up_read(&fs_info->cleanup_work_sem);
1997 } 2000 }
1998 2001
@@ -2035,6 +2038,7 @@ fail_sb_buffer:
2035 btrfs_stop_workers(&fs_info->endio_meta_workers); 2038 btrfs_stop_workers(&fs_info->endio_meta_workers);
2036 btrfs_stop_workers(&fs_info->endio_meta_write_workers); 2039 btrfs_stop_workers(&fs_info->endio_meta_write_workers);
2037 btrfs_stop_workers(&fs_info->endio_write_workers); 2040 btrfs_stop_workers(&fs_info->endio_write_workers);
2041 btrfs_stop_workers(&fs_info->endio_freespace_worker);
2038 btrfs_stop_workers(&fs_info->submit_workers); 2042 btrfs_stop_workers(&fs_info->submit_workers);
2039fail_iput: 2043fail_iput:
2040 invalidate_inode_pages2(fs_info->btree_inode->i_mapping); 2044 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
@@ -2410,6 +2414,7 @@ int close_ctree(struct btrfs_root *root)
2410 fs_info->closing = 1; 2414 fs_info->closing = 1;
2411 smp_mb(); 2415 smp_mb();
2412 2416
2417 btrfs_put_block_group_cache(fs_info);
2413 if (!(fs_info->sb->s_flags & MS_RDONLY)) { 2418 if (!(fs_info->sb->s_flags & MS_RDONLY)) {
2414 ret = btrfs_commit_super(root); 2419 ret = btrfs_commit_super(root);
2415 if (ret) 2420 if (ret)
@@ -2456,6 +2461,7 @@ int close_ctree(struct btrfs_root *root)
2456 btrfs_stop_workers(&fs_info->endio_meta_workers); 2461 btrfs_stop_workers(&fs_info->endio_meta_workers);
2457 btrfs_stop_workers(&fs_info->endio_meta_write_workers); 2462 btrfs_stop_workers(&fs_info->endio_meta_write_workers);
2458 btrfs_stop_workers(&fs_info->endio_write_workers); 2463 btrfs_stop_workers(&fs_info->endio_write_workers);
2464 btrfs_stop_workers(&fs_info->endio_freespace_worker);
2459 btrfs_stop_workers(&fs_info->submit_workers); 2465 btrfs_stop_workers(&fs_info->submit_workers);
2460 2466
2461 btrfs_close_devices(fs_info->fs_devices); 2467 btrfs_close_devices(fs_info->fs_devices);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 0b81ecdb101c..0c097f3aec41 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -242,6 +242,12 @@ get_caching_control(struct btrfs_block_group_cache *cache)
242 return NULL; 242 return NULL;
243 } 243 }
244 244
245 /* We're loading it the fast way, so we don't have a caching_ctl. */
246 if (!cache->caching_ctl) {
247 spin_unlock(&cache->lock);
248 return NULL;
249 }
250
245 ctl = cache->caching_ctl; 251 ctl = cache->caching_ctl;
246 atomic_inc(&ctl->count); 252 atomic_inc(&ctl->count);
247 spin_unlock(&cache->lock); 253 spin_unlock(&cache->lock);
@@ -421,7 +427,9 @@ err:
421 return 0; 427 return 0;
422} 428}
423 429
424static int cache_block_group(struct btrfs_block_group_cache *cache) 430static int cache_block_group(struct btrfs_block_group_cache *cache,
431 struct btrfs_trans_handle *trans,
432 int load_cache_only)
425{ 433{
426 struct btrfs_fs_info *fs_info = cache->fs_info; 434 struct btrfs_fs_info *fs_info = cache->fs_info;
427 struct btrfs_caching_control *caching_ctl; 435 struct btrfs_caching_control *caching_ctl;
@@ -432,6 +440,36 @@ static int cache_block_group(struct btrfs_block_group_cache *cache)
432 if (cache->cached != BTRFS_CACHE_NO) 440 if (cache->cached != BTRFS_CACHE_NO)
433 return 0; 441 return 0;
434 442
443 /*
444 * We can't do the read from on-disk cache during a commit since we need
445 * to have the normal tree locking.
446 */
447 if (!trans->transaction->in_commit) {
448 spin_lock(&cache->lock);
449 if (cache->cached != BTRFS_CACHE_NO) {
450 spin_unlock(&cache->lock);
451 return 0;
452 }
453 cache->cached = BTRFS_CACHE_STARTED;
454 spin_unlock(&cache->lock);
455
456 ret = load_free_space_cache(fs_info, cache);
457
458 spin_lock(&cache->lock);
459 if (ret == 1) {
460 cache->cached = BTRFS_CACHE_FINISHED;
461 cache->last_byte_to_unpin = (u64)-1;
462 } else {
463 cache->cached = BTRFS_CACHE_NO;
464 }
465 spin_unlock(&cache->lock);
466 if (ret == 1)
467 return 0;
468 }
469
470 if (load_cache_only)
471 return 0;
472
435 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); 473 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL);
436 BUG_ON(!caching_ctl); 474 BUG_ON(!caching_ctl);
437 475
@@ -509,7 +547,7 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
509 547
510 rcu_read_lock(); 548 rcu_read_lock();
511 list_for_each_entry_rcu(found, head, list) { 549 list_for_each_entry_rcu(found, head, list) {
512 if (found->flags == flags) { 550 if (found->flags & flags) {
513 rcu_read_unlock(); 551 rcu_read_unlock();
514 return found; 552 return found;
515 } 553 }
@@ -542,6 +580,15 @@ static u64 div_factor(u64 num, int factor)
542 return num; 580 return num;
543} 581}
544 582
583static u64 div_factor_fine(u64 num, int factor)
584{
585 if (factor == 100)
586 return num;
587 num *= factor;
588 do_div(num, 100);
589 return num;
590}
591
545u64 btrfs_find_block_group(struct btrfs_root *root, 592u64 btrfs_find_block_group(struct btrfs_root *root,
546 u64 search_start, u64 search_hint, int owner) 593 u64 search_start, u64 search_hint, int owner)
547{ 594{
@@ -2687,6 +2734,109 @@ next_block_group(struct btrfs_root *root,
2687 return cache; 2734 return cache;
2688} 2735}
2689 2736
2737static int cache_save_setup(struct btrfs_block_group_cache *block_group,
2738 struct btrfs_trans_handle *trans,
2739 struct btrfs_path *path)
2740{
2741 struct btrfs_root *root = block_group->fs_info->tree_root;
2742 struct inode *inode = NULL;
2743 u64 alloc_hint = 0;
2744 int num_pages = 0;
2745 int retries = 0;
2746 int ret = 0;
2747
2748 /*
2749 * If this block group is smaller than 100 megs don't bother caching the
2750 * block group.
2751 */
2752 if (block_group->key.offset < (100 * 1024 * 1024)) {
2753 spin_lock(&block_group->lock);
2754 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
2755 spin_unlock(&block_group->lock);
2756 return 0;
2757 }
2758
2759again:
2760 inode = lookup_free_space_inode(root, block_group, path);
2761 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
2762 ret = PTR_ERR(inode);
2763 btrfs_release_path(root, path);
2764 goto out;
2765 }
2766
2767 if (IS_ERR(inode)) {
2768 BUG_ON(retries);
2769 retries++;
2770
2771 if (block_group->ro)
2772 goto out_free;
2773
2774 ret = create_free_space_inode(root, trans, block_group, path);
2775 if (ret)
2776 goto out_free;
2777 goto again;
2778 }
2779
2780 /*
2781 * We want to set the generation to 0, that way if anything goes wrong
2782 * from here on out we know not to trust this cache when we load up next
2783 * time.
2784 */
2785 BTRFS_I(inode)->generation = 0;
2786 ret = btrfs_update_inode(trans, root, inode);
2787 WARN_ON(ret);
2788
2789 if (i_size_read(inode) > 0) {
2790 ret = btrfs_truncate_free_space_cache(root, trans, path,
2791 inode);
2792 if (ret)
2793 goto out_put;
2794 }
2795
2796 spin_lock(&block_group->lock);
2797 if (block_group->cached != BTRFS_CACHE_FINISHED) {
2798 spin_unlock(&block_group->lock);
2799 goto out_put;
2800 }
2801 spin_unlock(&block_group->lock);
2802
2803 num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024);
2804 if (!num_pages)
2805 num_pages = 1;
2806
2807 /*
2808 * Just to make absolutely sure we have enough space, we're going to
2809 * preallocate 12 pages worth of space for each block group. In
2810 * practice we ought to use at most 8, but we need extra space so we can
2811 * add our header and have a terminator between the extents and the
2812 * bitmaps.
2813 */
2814 num_pages *= 16;
2815 num_pages *= PAGE_CACHE_SIZE;
2816
2817 ret = btrfs_check_data_free_space(inode, num_pages);
2818 if (ret)
2819 goto out_put;
2820
2821 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
2822 num_pages, num_pages,
2823 &alloc_hint);
2824 btrfs_free_reserved_data_space(inode, num_pages);
2825out_put:
2826 iput(inode);
2827out_free:
2828 btrfs_release_path(root, path);
2829out:
2830 spin_lock(&block_group->lock);
2831 if (ret)
2832 block_group->disk_cache_state = BTRFS_DC_ERROR;
2833 else
2834 block_group->disk_cache_state = BTRFS_DC_SETUP;
2835 spin_unlock(&block_group->lock);
2836
2837 return ret;
2838}
2839
2690int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 2840int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2691 struct btrfs_root *root) 2841 struct btrfs_root *root)
2692{ 2842{
@@ -2699,6 +2849,25 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2699 if (!path) 2849 if (!path)
2700 return -ENOMEM; 2850 return -ENOMEM;
2701 2851
2852again:
2853 while (1) {
2854 cache = btrfs_lookup_first_block_group(root->fs_info, last);
2855 while (cache) {
2856 if (cache->disk_cache_state == BTRFS_DC_CLEAR)
2857 break;
2858 cache = next_block_group(root, cache);
2859 }
2860 if (!cache) {
2861 if (last == 0)
2862 break;
2863 last = 0;
2864 continue;
2865 }
2866 err = cache_save_setup(cache, trans, path);
2867 last = cache->key.objectid + cache->key.offset;
2868 btrfs_put_block_group(cache);
2869 }
2870
2702 while (1) { 2871 while (1) {
2703 if (last == 0) { 2872 if (last == 0) {
2704 err = btrfs_run_delayed_refs(trans, root, 2873 err = btrfs_run_delayed_refs(trans, root,
@@ -2708,6 +2877,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2708 2877
2709 cache = btrfs_lookup_first_block_group(root->fs_info, last); 2878 cache = btrfs_lookup_first_block_group(root->fs_info, last);
2710 while (cache) { 2879 while (cache) {
2880 if (cache->disk_cache_state == BTRFS_DC_CLEAR) {
2881 btrfs_put_block_group(cache);
2882 goto again;
2883 }
2884
2711 if (cache->dirty) 2885 if (cache->dirty)
2712 break; 2886 break;
2713 cache = next_block_group(root, cache); 2887 cache = next_block_group(root, cache);
@@ -2719,6 +2893,8 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2719 continue; 2893 continue;
2720 } 2894 }
2721 2895
2896 if (cache->disk_cache_state == BTRFS_DC_SETUP)
2897 cache->disk_cache_state = BTRFS_DC_NEED_WRITE;
2722 cache->dirty = 0; 2898 cache->dirty = 0;
2723 last = cache->key.objectid + cache->key.offset; 2899 last = cache->key.objectid + cache->key.offset;
2724 2900
@@ -2727,6 +2903,52 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2727 btrfs_put_block_group(cache); 2903 btrfs_put_block_group(cache);
2728 } 2904 }
2729 2905
2906 while (1) {
2907 /*
2908 * I don't think this is needed since we're just marking our
2909 * preallocated extent as written, but just in case it can't
2910 * hurt.
2911 */
2912 if (last == 0) {
2913 err = btrfs_run_delayed_refs(trans, root,
2914 (unsigned long)-1);
2915 BUG_ON(err);
2916 }
2917
2918 cache = btrfs_lookup_first_block_group(root->fs_info, last);
2919 while (cache) {
2920 /*
2921 * Really this shouldn't happen, but it could if we
2922 * couldn't write the entire preallocated extent and
2923 * splitting the extent resulted in a new block.
2924 */
2925 if (cache->dirty) {
2926 btrfs_put_block_group(cache);
2927 goto again;
2928 }
2929 if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
2930 break;
2931 cache = next_block_group(root, cache);
2932 }
2933 if (!cache) {
2934 if (last == 0)
2935 break;
2936 last = 0;
2937 continue;
2938 }
2939
2940 btrfs_write_out_cache(root, trans, cache, path);
2941
2942 /*
2943 * If we didn't have an error then the cache state is still
2944 * NEED_WRITE, so we can set it to WRITTEN.
2945 */
2946 if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
2947 cache->disk_cache_state = BTRFS_DC_WRITTEN;
2948 last = cache->key.objectid + cache->key.offset;
2949 btrfs_put_block_group(cache);
2950 }
2951
2730 btrfs_free_path(path); 2952 btrfs_free_path(path);
2731 return 0; 2953 return 0;
2732} 2954}
@@ -2762,6 +2984,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
2762 if (found) { 2984 if (found) {
2763 spin_lock(&found->lock); 2985 spin_lock(&found->lock);
2764 found->total_bytes += total_bytes; 2986 found->total_bytes += total_bytes;
2987 found->disk_total += total_bytes * factor;
2765 found->bytes_used += bytes_used; 2988 found->bytes_used += bytes_used;
2766 found->disk_used += bytes_used * factor; 2989 found->disk_used += bytes_used * factor;
2767 found->full = 0; 2990 found->full = 0;
@@ -2781,6 +3004,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
2781 BTRFS_BLOCK_GROUP_SYSTEM | 3004 BTRFS_BLOCK_GROUP_SYSTEM |
2782 BTRFS_BLOCK_GROUP_METADATA); 3005 BTRFS_BLOCK_GROUP_METADATA);
2783 found->total_bytes = total_bytes; 3006 found->total_bytes = total_bytes;
3007 found->disk_total = total_bytes * factor;
2784 found->bytes_used = bytes_used; 3008 found->bytes_used = bytes_used;
2785 found->disk_used = bytes_used * factor; 3009 found->disk_used = bytes_used * factor;
2786 found->bytes_pinned = 0; 3010 found->bytes_pinned = 0;
@@ -2882,11 +3106,16 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
2882 struct btrfs_space_info *data_sinfo; 3106 struct btrfs_space_info *data_sinfo;
2883 struct btrfs_root *root = BTRFS_I(inode)->root; 3107 struct btrfs_root *root = BTRFS_I(inode)->root;
2884 u64 used; 3108 u64 used;
2885 int ret = 0, committed = 0; 3109 int ret = 0, committed = 0, alloc_chunk = 1;
2886 3110
2887 /* make sure bytes are sectorsize aligned */ 3111 /* make sure bytes are sectorsize aligned */
2888 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3112 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
2889 3113
3114 if (root == root->fs_info->tree_root) {
3115 alloc_chunk = 0;
3116 committed = 1;
3117 }
3118
2890 data_sinfo = BTRFS_I(inode)->space_info; 3119 data_sinfo = BTRFS_I(inode)->space_info;
2891 if (!data_sinfo) 3120 if (!data_sinfo)
2892 goto alloc; 3121 goto alloc;
@@ -2905,7 +3134,7 @@ again:
2905 * if we don't have enough free bytes in this space then we need 3134 * if we don't have enough free bytes in this space then we need
2906 * to alloc a new chunk. 3135 * to alloc a new chunk.
2907 */ 3136 */
2908 if (!data_sinfo->full) { 3137 if (!data_sinfo->full && alloc_chunk) {
2909 u64 alloc_target; 3138 u64 alloc_target;
2910 3139
2911 data_sinfo->force_alloc = 1; 3140 data_sinfo->force_alloc = 1;
@@ -2997,10 +3226,11 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
2997 rcu_read_unlock(); 3226 rcu_read_unlock();
2998} 3227}
2999 3228
3000static int should_alloc_chunk(struct btrfs_space_info *sinfo, 3229static int should_alloc_chunk(struct btrfs_root *root,
3001 u64 alloc_bytes) 3230 struct btrfs_space_info *sinfo, u64 alloc_bytes)
3002{ 3231{
3003 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; 3232 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
3233 u64 thresh;
3004 3234
3005 if (sinfo->bytes_used + sinfo->bytes_reserved + 3235 if (sinfo->bytes_used + sinfo->bytes_reserved +
3006 alloc_bytes + 256 * 1024 * 1024 < num_bytes) 3236 alloc_bytes + 256 * 1024 * 1024 < num_bytes)
@@ -3010,6 +3240,12 @@ static int should_alloc_chunk(struct btrfs_space_info *sinfo,
3010 alloc_bytes < div_factor(num_bytes, 8)) 3240 alloc_bytes < div_factor(num_bytes, 8))
3011 return 0; 3241 return 0;
3012 3242
3243 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
3244 thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
3245
3246 if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
3247 return 0;
3248
3013 return 1; 3249 return 1;
3014} 3250}
3015 3251
@@ -3041,13 +3277,21 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3041 goto out; 3277 goto out;
3042 } 3278 }
3043 3279
3044 if (!force && !should_alloc_chunk(space_info, alloc_bytes)) { 3280 if (!force && !should_alloc_chunk(extent_root, space_info,
3281 alloc_bytes)) {
3045 spin_unlock(&space_info->lock); 3282 spin_unlock(&space_info->lock);
3046 goto out; 3283 goto out;
3047 } 3284 }
3048 spin_unlock(&space_info->lock); 3285 spin_unlock(&space_info->lock);
3049 3286
3050 /* 3287 /*
3288 * If we have mixed data/metadata chunks we want to make sure we keep
3289 * allocating mixed chunks instead of individual chunks.
3290 */
3291 if (btrfs_mixed_space_info(space_info))
3292 flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
3293
3294 /*
3051 * if we're doing a data chunk, go ahead and make sure that 3295 * if we're doing a data chunk, go ahead and make sure that
3052 * we keep a reasonable number of metadata chunks allocated in the 3296 * we keep a reasonable number of metadata chunks allocated in the
3053 * FS as well. 3297 * FS as well.
@@ -3072,55 +3316,25 @@ out:
3072 return ret; 3316 return ret;
3073} 3317}
3074 3318
3075static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
3076 struct btrfs_root *root,
3077 struct btrfs_space_info *sinfo, u64 num_bytes)
3078{
3079 int ret;
3080 int end_trans = 0;
3081
3082 if (sinfo->full)
3083 return 0;
3084
3085 spin_lock(&sinfo->lock);
3086 ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024);
3087 spin_unlock(&sinfo->lock);
3088 if (!ret)
3089 return 0;
3090
3091 if (!trans) {
3092 trans = btrfs_join_transaction(root, 1);
3093 BUG_ON(IS_ERR(trans));
3094 end_trans = 1;
3095 }
3096
3097 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3098 num_bytes + 2 * 1024 * 1024,
3099 get_alloc_profile(root, sinfo->flags), 0);
3100
3101 if (end_trans)
3102 btrfs_end_transaction(trans, root);
3103
3104 return ret == 1 ? 1 : 0;
3105}
3106
3107/* 3319/*
3108 * shrink metadata reservation for delalloc 3320 * shrink metadata reservation for delalloc
3109 */ 3321 */
3110static int shrink_delalloc(struct btrfs_trans_handle *trans, 3322static int shrink_delalloc(struct btrfs_trans_handle *trans,
3111 struct btrfs_root *root, u64 to_reclaim) 3323 struct btrfs_root *root, u64 to_reclaim, int sync)
3112{ 3324{
3113 struct btrfs_block_rsv *block_rsv; 3325 struct btrfs_block_rsv *block_rsv;
3326 struct btrfs_space_info *space_info;
3114 u64 reserved; 3327 u64 reserved;
3115 u64 max_reclaim; 3328 u64 max_reclaim;
3116 u64 reclaimed = 0; 3329 u64 reclaimed = 0;
3117 int pause = 1; 3330 int pause = 1;
3118 int ret; 3331 int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
3119 3332
3120 block_rsv = &root->fs_info->delalloc_block_rsv; 3333 block_rsv = &root->fs_info->delalloc_block_rsv;
3121 spin_lock(&block_rsv->lock); 3334 space_info = block_rsv->space_info;
3122 reserved = block_rsv->reserved; 3335
3123 spin_unlock(&block_rsv->lock); 3336 smp_mb();
3337 reserved = space_info->bytes_reserved;
3124 3338
3125 if (reserved == 0) 3339 if (reserved == 0)
3126 return 0; 3340 return 0;
@@ -3128,104 +3342,169 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3128 max_reclaim = min(reserved, to_reclaim); 3342 max_reclaim = min(reserved, to_reclaim);
3129 3343
3130 while (1) { 3344 while (1) {
3131 ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0); 3345 /* have the flusher threads jump in and do some IO */
3132 if (!ret) { 3346 smp_mb();
3133 __set_current_state(TASK_INTERRUPTIBLE); 3347 nr_pages = min_t(unsigned long, nr_pages,
3134 schedule_timeout(pause); 3348 root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
3135 pause <<= 1; 3349 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
3136 if (pause > HZ / 10)
3137 pause = HZ / 10;
3138 } else {
3139 pause = 1;
3140 }
3141 3350
3142 spin_lock(&block_rsv->lock); 3351 spin_lock(&space_info->lock);
3143 if (reserved > block_rsv->reserved) 3352 if (reserved > space_info->bytes_reserved)
3144 reclaimed = reserved - block_rsv->reserved; 3353 reclaimed += reserved - space_info->bytes_reserved;
3145 reserved = block_rsv->reserved; 3354 reserved = space_info->bytes_reserved;
3146 spin_unlock(&block_rsv->lock); 3355 spin_unlock(&space_info->lock);
3147 3356
3148 if (reserved == 0 || reclaimed >= max_reclaim) 3357 if (reserved == 0 || reclaimed >= max_reclaim)
3149 break; 3358 break;
3150 3359
3151 if (trans && trans->transaction->blocked) 3360 if (trans && trans->transaction->blocked)
3152 return -EAGAIN; 3361 return -EAGAIN;
3362
3363 __set_current_state(TASK_INTERRUPTIBLE);
3364 schedule_timeout(pause);
3365 pause <<= 1;
3366 if (pause > HZ / 10)
3367 pause = HZ / 10;
3368
3153 } 3369 }
3154 return reclaimed >= to_reclaim; 3370 return reclaimed >= to_reclaim;
3155} 3371}
3156 3372
3157static int should_retry_reserve(struct btrfs_trans_handle *trans, 3373/*
3158 struct btrfs_root *root, 3374 * Retries tells us how many times we've called reserve_metadata_bytes. The
3159 struct btrfs_block_rsv *block_rsv, 3375 * idea is if this is the first call (retries == 0) then we will add to our
3160 u64 num_bytes, int *retries) 3376 * reserved count if we can't make the allocation in order to hold our place
3377 * while we go and try and free up space. That way for retries > 1 we don't try
3378 * and add space, we just check to see if the amount of unused space is >= the
3379 * total space, meaning that our reservation is valid.
3380 *
3381 * However if we don't intend to retry this reservation, pass -1 as retries so
3382 * that it short circuits this logic.
3383 */
3384static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
3385 struct btrfs_root *root,
3386 struct btrfs_block_rsv *block_rsv,
3387 u64 orig_bytes, int flush)
3161{ 3388{
3162 struct btrfs_space_info *space_info = block_rsv->space_info; 3389 struct btrfs_space_info *space_info = block_rsv->space_info;
3163 int ret; 3390 u64 unused;
3391 u64 num_bytes = orig_bytes;
3392 int retries = 0;
3393 int ret = 0;
3394 bool reserved = false;
3395 bool committed = false;
3164 3396
3165 if ((*retries) > 2) 3397again:
3166 return -ENOSPC; 3398 ret = -ENOSPC;
3399 if (reserved)
3400 num_bytes = 0;
3167 3401
3168 ret = maybe_allocate_chunk(trans, root, space_info, num_bytes); 3402 spin_lock(&space_info->lock);
3169 if (ret) 3403 unused = space_info->bytes_used + space_info->bytes_reserved +
3170 return 1; 3404 space_info->bytes_pinned + space_info->bytes_readonly +
3405 space_info->bytes_may_use;
3171 3406
3172 if (trans && trans->transaction->in_commit) 3407 /*
3173 return -ENOSPC; 3408 * The idea here is that we've not already over-reserved the block group
3409 * then we can go ahead and save our reservation first and then start
3410 * flushing if we need to. Otherwise if we've already overcommitted
3411 * lets start flushing stuff first and then come back and try to make
3412 * our reservation.
3413 */
3414 if (unused <= space_info->total_bytes) {
3415 unused -= space_info->total_bytes;
3416 if (unused >= num_bytes) {
3417 if (!reserved)
3418 space_info->bytes_reserved += orig_bytes;
3419 ret = 0;
3420 } else {
3421 /*
3422 * Ok set num_bytes to orig_bytes since we aren't
3423 * overocmmitted, this way we only try and reclaim what
3424 * we need.
3425 */
3426 num_bytes = orig_bytes;
3427 }
3428 } else {
3429 /*
3430 * Ok we're over committed, set num_bytes to the overcommitted
3431 * amount plus the amount of bytes that we need for this
3432 * reservation.
3433 */
3434 num_bytes = unused - space_info->total_bytes +
3435 (orig_bytes * (retries + 1));
3436 }
3174 3437
3175 ret = shrink_delalloc(trans, root, num_bytes); 3438 /*
3176 if (ret) 3439 * Couldn't make our reservation, save our place so while we're trying
3177 return ret; 3440 * to reclaim space we can actually use it instead of somebody else
3441 * stealing it from us.
3442 */
3443 if (ret && !reserved) {
3444 space_info->bytes_reserved += orig_bytes;
3445 reserved = true;
3446 }
3178 3447
3179 spin_lock(&space_info->lock);
3180 if (space_info->bytes_pinned < num_bytes)
3181 ret = 1;
3182 spin_unlock(&space_info->lock); 3448 spin_unlock(&space_info->lock);
3183 if (ret)
3184 return -ENOSPC;
3185
3186 (*retries)++;
3187 3449
3188 if (trans) 3450 if (!ret)
3189 return -EAGAIN; 3451 return 0;
3190 3452
3191 trans = btrfs_join_transaction(root, 1); 3453 if (!flush)
3192 BUG_ON(IS_ERR(trans)); 3454 goto out;
3193 ret = btrfs_commit_transaction(trans, root);
3194 BUG_ON(ret);
3195 3455
3196 return 1; 3456 /*
3197} 3457 * We do synchronous shrinking since we don't actually unreserve
3458 * metadata until after the IO is completed.
3459 */
3460 ret = shrink_delalloc(trans, root, num_bytes, 1);
3461 if (ret > 0)
3462 return 0;
3463 else if (ret < 0)
3464 goto out;
3198 3465
3199static int reserve_metadata_bytes(struct btrfs_block_rsv *block_rsv, 3466 /*
3200 u64 num_bytes) 3467 * So if we were overcommitted it's possible that somebody else flushed
3201{ 3468 * out enough space and we simply didn't have enough space to reclaim,
3202 struct btrfs_space_info *space_info = block_rsv->space_info; 3469 * so go back around and try again.
3203 u64 unused; 3470 */
3204 int ret = -ENOSPC; 3471 if (retries < 2) {
3472 retries++;
3473 goto again;
3474 }
3205 3475
3206 spin_lock(&space_info->lock); 3476 spin_lock(&space_info->lock);
3207 unused = space_info->bytes_used + space_info->bytes_reserved + 3477 /*
3208 space_info->bytes_pinned + space_info->bytes_readonly; 3478 * Not enough space to be reclaimed, don't bother committing the
3479 * transaction.
3480 */
3481 if (space_info->bytes_pinned < orig_bytes)
3482 ret = -ENOSPC;
3483 spin_unlock(&space_info->lock);
3484 if (ret)
3485 goto out;
3209 3486
3210 if (unused < space_info->total_bytes) 3487 ret = -EAGAIN;
3211 unused = space_info->total_bytes - unused; 3488 if (trans || committed)
3212 else 3489 goto out;
3213 unused = 0;
3214 3490
3215 if (unused >= num_bytes) { 3491 ret = -ENOSPC;
3216 if (block_rsv->priority >= 10) { 3492 trans = btrfs_join_transaction(root, 1);
3217 space_info->bytes_reserved += num_bytes; 3493 if (IS_ERR(trans))
3218 ret = 0; 3494 goto out;
3219 } else { 3495 ret = btrfs_commit_transaction(trans, root);
3220 if ((unused + block_rsv->reserved) * 3496 if (!ret) {
3221 block_rsv->priority >= 3497 trans = NULL;
3222 (num_bytes + block_rsv->reserved) * 10) { 3498 committed = true;
3223 space_info->bytes_reserved += num_bytes; 3499 goto again;
3224 ret = 0; 3500 }
3225 } 3501
3226 } 3502out:
3503 if (reserved) {
3504 spin_lock(&space_info->lock);
3505 space_info->bytes_reserved -= orig_bytes;
3506 spin_unlock(&space_info->lock);
3227 } 3507 }
3228 spin_unlock(&space_info->lock);
3229 3508
3230 return ret; 3509 return ret;
3231} 3510}
@@ -3327,18 +3606,14 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
3327{ 3606{
3328 struct btrfs_block_rsv *block_rsv; 3607 struct btrfs_block_rsv *block_rsv;
3329 struct btrfs_fs_info *fs_info = root->fs_info; 3608 struct btrfs_fs_info *fs_info = root->fs_info;
3330 u64 alloc_target;
3331 3609
3332 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS); 3610 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
3333 if (!block_rsv) 3611 if (!block_rsv)
3334 return NULL; 3612 return NULL;
3335 3613
3336 btrfs_init_block_rsv(block_rsv); 3614 btrfs_init_block_rsv(block_rsv);
3337
3338 alloc_target = btrfs_get_alloc_profile(root, 0);
3339 block_rsv->space_info = __find_space_info(fs_info, 3615 block_rsv->space_info = __find_space_info(fs_info,
3340 BTRFS_BLOCK_GROUP_METADATA); 3616 BTRFS_BLOCK_GROUP_METADATA);
3341
3342 return block_rsv; 3617 return block_rsv;
3343} 3618}
3344 3619
@@ -3369,23 +3644,19 @@ void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
3369int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, 3644int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
3370 struct btrfs_root *root, 3645 struct btrfs_root *root,
3371 struct btrfs_block_rsv *block_rsv, 3646 struct btrfs_block_rsv *block_rsv,
3372 u64 num_bytes, int *retries) 3647 u64 num_bytes)
3373{ 3648{
3374 int ret; 3649 int ret;
3375 3650
3376 if (num_bytes == 0) 3651 if (num_bytes == 0)
3377 return 0; 3652 return 0;
3378again: 3653
3379 ret = reserve_metadata_bytes(block_rsv, num_bytes); 3654 ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1);
3380 if (!ret) { 3655 if (!ret) {
3381 block_rsv_add_bytes(block_rsv, num_bytes, 1); 3656 block_rsv_add_bytes(block_rsv, num_bytes, 1);
3382 return 0; 3657 return 0;
3383 } 3658 }
3384 3659
3385 ret = should_retry_reserve(trans, root, block_rsv, num_bytes, retries);
3386 if (ret > 0)
3387 goto again;
3388
3389 return ret; 3660 return ret;
3390} 3661}
3391 3662
@@ -3420,7 +3691,8 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
3420 return 0; 3691 return 0;
3421 3692
3422 if (block_rsv->refill_used) { 3693 if (block_rsv->refill_used) {
3423 ret = reserve_metadata_bytes(block_rsv, num_bytes); 3694 ret = reserve_metadata_bytes(trans, root, block_rsv,
3695 num_bytes, 0);
3424 if (!ret) { 3696 if (!ret) {
3425 block_rsv_add_bytes(block_rsv, num_bytes, 0); 3697 block_rsv_add_bytes(block_rsv, num_bytes, 0);
3426 return 0; 3698 return 0;
@@ -3499,6 +3771,8 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
3499 3771
3500 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); 3772 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
3501 spin_lock(&sinfo->lock); 3773 spin_lock(&sinfo->lock);
3774 if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA)
3775 data_used = 0;
3502 meta_used = sinfo->bytes_used; 3776 meta_used = sinfo->bytes_used;
3503 spin_unlock(&sinfo->lock); 3777 spin_unlock(&sinfo->lock);
3504 3778
@@ -3526,7 +3800,8 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
3526 block_rsv->size = num_bytes; 3800 block_rsv->size = num_bytes;
3527 3801
3528 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + 3802 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
3529 sinfo->bytes_reserved + sinfo->bytes_readonly; 3803 sinfo->bytes_reserved + sinfo->bytes_readonly +
3804 sinfo->bytes_may_use;
3530 3805
3531 if (sinfo->total_bytes > num_bytes) { 3806 if (sinfo->total_bytes > num_bytes) {
3532 num_bytes = sinfo->total_bytes - num_bytes; 3807 num_bytes = sinfo->total_bytes - num_bytes;
@@ -3597,7 +3872,7 @@ static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items)
3597 3872
3598int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, 3873int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
3599 struct btrfs_root *root, 3874 struct btrfs_root *root,
3600 int num_items, int *retries) 3875 int num_items)
3601{ 3876{
3602 u64 num_bytes; 3877 u64 num_bytes;
3603 int ret; 3878 int ret;
@@ -3607,7 +3882,7 @@ int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
3607 3882
3608 num_bytes = calc_trans_metadata_size(root, num_items); 3883 num_bytes = calc_trans_metadata_size(root, num_items);
3609 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, 3884 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
3610 num_bytes, retries); 3885 num_bytes);
3611 if (!ret) { 3886 if (!ret) {
3612 trans->bytes_reserved += num_bytes; 3887 trans->bytes_reserved += num_bytes;
3613 trans->block_rsv = &root->fs_info->trans_block_rsv; 3888 trans->block_rsv = &root->fs_info->trans_block_rsv;
@@ -3681,14 +3956,13 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
3681 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; 3956 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
3682 u64 to_reserve; 3957 u64 to_reserve;
3683 int nr_extents; 3958 int nr_extents;
3684 int retries = 0;
3685 int ret; 3959 int ret;
3686 3960
3687 if (btrfs_transaction_in_commit(root->fs_info)) 3961 if (btrfs_transaction_in_commit(root->fs_info))
3688 schedule_timeout(1); 3962 schedule_timeout(1);
3689 3963
3690 num_bytes = ALIGN(num_bytes, root->sectorsize); 3964 num_bytes = ALIGN(num_bytes, root->sectorsize);
3691again: 3965
3692 spin_lock(&BTRFS_I(inode)->accounting_lock); 3966 spin_lock(&BTRFS_I(inode)->accounting_lock);
3693 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; 3967 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
3694 if (nr_extents > BTRFS_I(inode)->reserved_extents) { 3968 if (nr_extents > BTRFS_I(inode)->reserved_extents) {
@@ -3698,18 +3972,14 @@ again:
3698 nr_extents = 0; 3972 nr_extents = 0;
3699 to_reserve = 0; 3973 to_reserve = 0;
3700 } 3974 }
3975 spin_unlock(&BTRFS_I(inode)->accounting_lock);
3701 3976
3702 to_reserve += calc_csum_metadata_size(inode, num_bytes); 3977 to_reserve += calc_csum_metadata_size(inode, num_bytes);
3703 ret = reserve_metadata_bytes(block_rsv, to_reserve); 3978 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
3704 if (ret) { 3979 if (ret)
3705 spin_unlock(&BTRFS_I(inode)->accounting_lock);
3706 ret = should_retry_reserve(NULL, root, block_rsv, to_reserve,
3707 &retries);
3708 if (ret > 0)
3709 goto again;
3710 return ret; 3980 return ret;
3711 }
3712 3981
3982 spin_lock(&BTRFS_I(inode)->accounting_lock);
3713 BTRFS_I(inode)->reserved_extents += nr_extents; 3983 BTRFS_I(inode)->reserved_extents += nr_extents;
3714 atomic_inc(&BTRFS_I(inode)->outstanding_extents); 3984 atomic_inc(&BTRFS_I(inode)->outstanding_extents);
3715 spin_unlock(&BTRFS_I(inode)->accounting_lock); 3985 spin_unlock(&BTRFS_I(inode)->accounting_lock);
@@ -3717,7 +3987,7 @@ again:
3717 block_rsv_add_bytes(block_rsv, to_reserve, 1); 3987 block_rsv_add_bytes(block_rsv, to_reserve, 1);
3718 3988
3719 if (block_rsv->size > 512 * 1024 * 1024) 3989 if (block_rsv->size > 512 * 1024 * 1024)
3720 shrink_delalloc(NULL, root, to_reserve); 3990 shrink_delalloc(NULL, root, to_reserve, 0);
3721 3991
3722 return 0; 3992 return 0;
3723} 3993}
@@ -3776,12 +4046,12 @@ static int update_block_group(struct btrfs_trans_handle *trans,
3776 struct btrfs_root *root, 4046 struct btrfs_root *root,
3777 u64 bytenr, u64 num_bytes, int alloc) 4047 u64 bytenr, u64 num_bytes, int alloc)
3778{ 4048{
3779 struct btrfs_block_group_cache *cache; 4049 struct btrfs_block_group_cache *cache = NULL;
3780 struct btrfs_fs_info *info = root->fs_info; 4050 struct btrfs_fs_info *info = root->fs_info;
3781 int factor;
3782 u64 total = num_bytes; 4051 u64 total = num_bytes;
3783 u64 old_val; 4052 u64 old_val;
3784 u64 byte_in_group; 4053 u64 byte_in_group;
4054 int factor;
3785 4055
3786 /* block accounting for super block */ 4056 /* block accounting for super block */
3787 spin_lock(&info->delalloc_lock); 4057 spin_lock(&info->delalloc_lock);
@@ -3803,11 +4073,25 @@ static int update_block_group(struct btrfs_trans_handle *trans,
3803 factor = 2; 4073 factor = 2;
3804 else 4074 else
3805 factor = 1; 4075 factor = 1;
4076 /*
4077 * If this block group has free space cache written out, we
4078 * need to make sure to load it if we are removing space. This
4079 * is because we need the unpinning stage to actually add the
4080 * space back to the block group, otherwise we will leak space.
4081 */
4082 if (!alloc && cache->cached == BTRFS_CACHE_NO)
4083 cache_block_group(cache, trans, 1);
4084
3806 byte_in_group = bytenr - cache->key.objectid; 4085 byte_in_group = bytenr - cache->key.objectid;
3807 WARN_ON(byte_in_group > cache->key.offset); 4086 WARN_ON(byte_in_group > cache->key.offset);
3808 4087
3809 spin_lock(&cache->space_info->lock); 4088 spin_lock(&cache->space_info->lock);
3810 spin_lock(&cache->lock); 4089 spin_lock(&cache->lock);
4090
4091 if (btrfs_super_cache_generation(&info->super_copy) != 0 &&
4092 cache->disk_cache_state < BTRFS_DC_CLEAR)
4093 cache->disk_cache_state = BTRFS_DC_CLEAR;
4094
3811 cache->dirty = 1; 4095 cache->dirty = 1;
3812 old_val = btrfs_block_group_used(&cache->item); 4096 old_val = btrfs_block_group_used(&cache->item);
3813 num_bytes = min(total, cache->key.offset - byte_in_group); 4097 num_bytes = min(total, cache->key.offset - byte_in_group);
@@ -4554,6 +4838,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4554 bool found_uncached_bg = false; 4838 bool found_uncached_bg = false;
4555 bool failed_cluster_refill = false; 4839 bool failed_cluster_refill = false;
4556 bool failed_alloc = false; 4840 bool failed_alloc = false;
4841 bool use_cluster = true;
4557 u64 ideal_cache_percent = 0; 4842 u64 ideal_cache_percent = 0;
4558 u64 ideal_cache_offset = 0; 4843 u64 ideal_cache_offset = 0;
4559 4844
@@ -4568,16 +4853,24 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4568 return -ENOSPC; 4853 return -ENOSPC;
4569 } 4854 }
4570 4855
4856 /*
4857 * If the space info is for both data and metadata it means we have a
4858 * small filesystem and we can't use the clustering stuff.
4859 */
4860 if (btrfs_mixed_space_info(space_info))
4861 use_cluster = false;
4862
4571 if (orig_root->ref_cows || empty_size) 4863 if (orig_root->ref_cows || empty_size)
4572 allowed_chunk_alloc = 1; 4864 allowed_chunk_alloc = 1;
4573 4865
4574 if (data & BTRFS_BLOCK_GROUP_METADATA) { 4866 if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
4575 last_ptr = &root->fs_info->meta_alloc_cluster; 4867 last_ptr = &root->fs_info->meta_alloc_cluster;
4576 if (!btrfs_test_opt(root, SSD)) 4868 if (!btrfs_test_opt(root, SSD))
4577 empty_cluster = 64 * 1024; 4869 empty_cluster = 64 * 1024;
4578 } 4870 }
4579 4871
4580 if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { 4872 if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
4873 btrfs_test_opt(root, SSD)) {
4581 last_ptr = &root->fs_info->data_alloc_cluster; 4874 last_ptr = &root->fs_info->data_alloc_cluster;
4582 } 4875 }
4583 4876
@@ -4641,6 +4934,10 @@ have_block_group:
4641 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { 4934 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
4642 u64 free_percent; 4935 u64 free_percent;
4643 4936
4937 ret = cache_block_group(block_group, trans, 1);
4938 if (block_group->cached == BTRFS_CACHE_FINISHED)
4939 goto have_block_group;
4940
4644 free_percent = btrfs_block_group_used(&block_group->item); 4941 free_percent = btrfs_block_group_used(&block_group->item);
4645 free_percent *= 100; 4942 free_percent *= 100;
4646 free_percent = div64_u64(free_percent, 4943 free_percent = div64_u64(free_percent,
@@ -4661,7 +4958,7 @@ have_block_group:
4661 if (loop > LOOP_CACHING_NOWAIT || 4958 if (loop > LOOP_CACHING_NOWAIT ||
4662 (loop > LOOP_FIND_IDEAL && 4959 (loop > LOOP_FIND_IDEAL &&
4663 atomic_read(&space_info->caching_threads) < 2)) { 4960 atomic_read(&space_info->caching_threads) < 2)) {
4664 ret = cache_block_group(block_group); 4961 ret = cache_block_group(block_group, trans, 0);
4665 BUG_ON(ret); 4962 BUG_ON(ret);
4666 } 4963 }
4667 found_uncached_bg = true; 4964 found_uncached_bg = true;
@@ -5218,7 +5515,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
5218 u64 num_bytes = ins->offset; 5515 u64 num_bytes = ins->offset;
5219 5516
5220 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 5517 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
5221 cache_block_group(block_group); 5518 cache_block_group(block_group, trans, 0);
5222 caching_ctl = get_caching_control(block_group); 5519 caching_ctl = get_caching_control(block_group);
5223 5520
5224 if (!caching_ctl) { 5521 if (!caching_ctl) {
@@ -5308,7 +5605,8 @@ use_block_rsv(struct btrfs_trans_handle *trans,
5308 block_rsv = get_block_rsv(trans, root); 5605 block_rsv = get_block_rsv(trans, root);
5309 5606
5310 if (block_rsv->size == 0) { 5607 if (block_rsv->size == 0) {
5311 ret = reserve_metadata_bytes(block_rsv, blocksize); 5608 ret = reserve_metadata_bytes(trans, root, block_rsv,
5609 blocksize, 0);
5312 if (ret) 5610 if (ret)
5313 return ERR_PTR(ret); 5611 return ERR_PTR(ret);
5314 return block_rsv; 5612 return block_rsv;
@@ -5318,11 +5616,6 @@ use_block_rsv(struct btrfs_trans_handle *trans,
5318 if (!ret) 5616 if (!ret)
5319 return block_rsv; 5617 return block_rsv;
5320 5618
5321 WARN_ON(1);
5322 printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n",
5323 block_rsv->size, block_rsv->reserved,
5324 block_rsv->freed[0], block_rsv->freed[1]);
5325
5326 return ERR_PTR(-ENOSPC); 5619 return ERR_PTR(-ENOSPC);
5327} 5620}
5328 5621
@@ -5421,7 +5714,6 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
5421 u64 generation; 5714 u64 generation;
5422 u64 refs; 5715 u64 refs;
5423 u64 flags; 5716 u64 flags;
5424 u64 last = 0;
5425 u32 nritems; 5717 u32 nritems;
5426 u32 blocksize; 5718 u32 blocksize;
5427 struct btrfs_key key; 5719 struct btrfs_key key;
@@ -5489,7 +5781,6 @@ reada:
5489 generation); 5781 generation);
5490 if (ret) 5782 if (ret)
5491 break; 5783 break;
5492 last = bytenr + blocksize;
5493 nread++; 5784 nread++;
5494 } 5785 }
5495 wc->reada_slot = slot; 5786 wc->reada_slot = slot;
@@ -7813,6 +8104,40 @@ out:
7813 return ret; 8104 return ret;
7814} 8105}
7815 8106
8107void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
8108{
8109 struct btrfs_block_group_cache *block_group;
8110 u64 last = 0;
8111
8112 while (1) {
8113 struct inode *inode;
8114
8115 block_group = btrfs_lookup_first_block_group(info, last);
8116 while (block_group) {
8117 spin_lock(&block_group->lock);
8118 if (block_group->iref)
8119 break;
8120 spin_unlock(&block_group->lock);
8121 block_group = next_block_group(info->tree_root,
8122 block_group);
8123 }
8124 if (!block_group) {
8125 if (last == 0)
8126 break;
8127 last = 0;
8128 continue;
8129 }
8130
8131 inode = block_group->inode;
8132 block_group->iref = 0;
8133 block_group->inode = NULL;
8134 spin_unlock(&block_group->lock);
8135 iput(inode);
8136 last = block_group->key.objectid + block_group->key.offset;
8137 btrfs_put_block_group(block_group);
8138 }
8139}
8140
7816int btrfs_free_block_groups(struct btrfs_fs_info *info) 8141int btrfs_free_block_groups(struct btrfs_fs_info *info)
7817{ 8142{
7818 struct btrfs_block_group_cache *block_group; 8143 struct btrfs_block_group_cache *block_group;
@@ -7896,6 +8221,8 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7896 struct btrfs_key key; 8221 struct btrfs_key key;
7897 struct btrfs_key found_key; 8222 struct btrfs_key found_key;
7898 struct extent_buffer *leaf; 8223 struct extent_buffer *leaf;
8224 int need_clear = 0;
8225 u64 cache_gen;
7899 8226
7900 root = info->extent_root; 8227 root = info->extent_root;
7901 key.objectid = 0; 8228 key.objectid = 0;
@@ -7905,6 +8232,15 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7905 if (!path) 8232 if (!path)
7906 return -ENOMEM; 8233 return -ENOMEM;
7907 8234
8235 cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy);
8236 if (cache_gen != 0 &&
8237 btrfs_super_generation(&root->fs_info->super_copy) != cache_gen)
8238 need_clear = 1;
8239 if (btrfs_test_opt(root, CLEAR_CACHE))
8240 need_clear = 1;
8241 if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen)
8242 printk(KERN_INFO "btrfs: disk space caching is enabled\n");
8243
7908 while (1) { 8244 while (1) {
7909 ret = find_first_block_group(root, path, &key); 8245 ret = find_first_block_group(root, path, &key);
7910 if (ret > 0) 8246 if (ret > 0)
@@ -7927,6 +8263,9 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7927 INIT_LIST_HEAD(&cache->list); 8263 INIT_LIST_HEAD(&cache->list);
7928 INIT_LIST_HEAD(&cache->cluster_list); 8264 INIT_LIST_HEAD(&cache->cluster_list);
7929 8265
8266 if (need_clear)
8267 cache->disk_cache_state = BTRFS_DC_CLEAR;
8268
7930 /* 8269 /*
7931 * we only want to have 32k of ram per block group for keeping 8270 * we only want to have 32k of ram per block group for keeping
7932 * track of free space, and if we pass 1/2 of that we want to 8271 * track of free space, and if we pass 1/2 of that we want to
@@ -8031,6 +8370,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
8031 cache->key.offset = size; 8370 cache->key.offset = size;
8032 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 8371 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
8033 cache->sectorsize = root->sectorsize; 8372 cache->sectorsize = root->sectorsize;
8373 cache->fs_info = root->fs_info;
8034 8374
8035 /* 8375 /*
8036 * we only want to have 32k of ram per block group for keeping track 8376 * we only want to have 32k of ram per block group for keeping track
@@ -8087,8 +8427,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8087 struct btrfs_path *path; 8427 struct btrfs_path *path;
8088 struct btrfs_block_group_cache *block_group; 8428 struct btrfs_block_group_cache *block_group;
8089 struct btrfs_free_cluster *cluster; 8429 struct btrfs_free_cluster *cluster;
8430 struct btrfs_root *tree_root = root->fs_info->tree_root;
8090 struct btrfs_key key; 8431 struct btrfs_key key;
8432 struct inode *inode;
8091 int ret; 8433 int ret;
8434 int factor;
8092 8435
8093 root = root->fs_info->extent_root; 8436 root = root->fs_info->extent_root;
8094 8437
@@ -8097,6 +8440,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8097 BUG_ON(!block_group->ro); 8440 BUG_ON(!block_group->ro);
8098 8441
8099 memcpy(&key, &block_group->key, sizeof(key)); 8442 memcpy(&key, &block_group->key, sizeof(key));
8443 if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
8444 BTRFS_BLOCK_GROUP_RAID1 |
8445 BTRFS_BLOCK_GROUP_RAID10))
8446 factor = 2;
8447 else
8448 factor = 1;
8100 8449
8101 /* make sure this block group isn't part of an allocation cluster */ 8450 /* make sure this block group isn't part of an allocation cluster */
8102 cluster = &root->fs_info->data_alloc_cluster; 8451 cluster = &root->fs_info->data_alloc_cluster;
@@ -8116,6 +8465,40 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8116 path = btrfs_alloc_path(); 8465 path = btrfs_alloc_path();
8117 BUG_ON(!path); 8466 BUG_ON(!path);
8118 8467
8468 inode = lookup_free_space_inode(root, block_group, path);
8469 if (!IS_ERR(inode)) {
8470 btrfs_orphan_add(trans, inode);
8471 clear_nlink(inode);
8472 /* One for the block groups ref */
8473 spin_lock(&block_group->lock);
8474 if (block_group->iref) {
8475 block_group->iref = 0;
8476 block_group->inode = NULL;
8477 spin_unlock(&block_group->lock);
8478 iput(inode);
8479 } else {
8480 spin_unlock(&block_group->lock);
8481 }
8482 /* One for our lookup ref */
8483 iput(inode);
8484 }
8485
8486 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
8487 key.offset = block_group->key.objectid;
8488 key.type = 0;
8489
8490 ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
8491 if (ret < 0)
8492 goto out;
8493 if (ret > 0)
8494 btrfs_release_path(tree_root, path);
8495 if (ret == 0) {
8496 ret = btrfs_del_item(trans, tree_root, path);
8497 if (ret)
8498 goto out;
8499 btrfs_release_path(tree_root, path);
8500 }
8501
8119 spin_lock(&root->fs_info->block_group_cache_lock); 8502 spin_lock(&root->fs_info->block_group_cache_lock);
8120 rb_erase(&block_group->cache_node, 8503 rb_erase(&block_group->cache_node,
8121 &root->fs_info->block_group_cache_tree); 8504 &root->fs_info->block_group_cache_tree);
@@ -8137,8 +8520,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8137 spin_lock(&block_group->space_info->lock); 8520 spin_lock(&block_group->space_info->lock);
8138 block_group->space_info->total_bytes -= block_group->key.offset; 8521 block_group->space_info->total_bytes -= block_group->key.offset;
8139 block_group->space_info->bytes_readonly -= block_group->key.offset; 8522 block_group->space_info->bytes_readonly -= block_group->key.offset;
8523 block_group->space_info->disk_total -= block_group->key.offset * factor;
8140 spin_unlock(&block_group->space_info->lock); 8524 spin_unlock(&block_group->space_info->lock);
8141 8525
8526 memcpy(&key, &block_group->key, sizeof(key));
8527
8142 btrfs_clear_space_info_full(root->fs_info); 8528 btrfs_clear_space_info_full(root->fs_info);
8143 8529
8144 btrfs_put_block_group(block_group); 8530 btrfs_put_block_group(block_group);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d74e6af9b53a..eac10e3260a9 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -104,7 +104,7 @@ void extent_io_tree_init(struct extent_io_tree *tree,
104 struct address_space *mapping, gfp_t mask) 104 struct address_space *mapping, gfp_t mask)
105{ 105{
106 tree->state = RB_ROOT; 106 tree->state = RB_ROOT;
107 tree->buffer = RB_ROOT; 107 INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC);
108 tree->ops = NULL; 108 tree->ops = NULL;
109 tree->dirty_bytes = 0; 109 tree->dirty_bytes = 0;
110 spin_lock_init(&tree->lock); 110 spin_lock_init(&tree->lock);
@@ -235,50 +235,6 @@ static inline struct rb_node *tree_search(struct extent_io_tree *tree,
235 return ret; 235 return ret;
236} 236}
237 237
238static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree,
239 u64 offset, struct rb_node *node)
240{
241 struct rb_root *root = &tree->buffer;
242 struct rb_node **p = &root->rb_node;
243 struct rb_node *parent = NULL;
244 struct extent_buffer *eb;
245
246 while (*p) {
247 parent = *p;
248 eb = rb_entry(parent, struct extent_buffer, rb_node);
249
250 if (offset < eb->start)
251 p = &(*p)->rb_left;
252 else if (offset > eb->start)
253 p = &(*p)->rb_right;
254 else
255 return eb;
256 }
257
258 rb_link_node(node, parent, p);
259 rb_insert_color(node, root);
260 return NULL;
261}
262
263static struct extent_buffer *buffer_search(struct extent_io_tree *tree,
264 u64 offset)
265{
266 struct rb_root *root = &tree->buffer;
267 struct rb_node *n = root->rb_node;
268 struct extent_buffer *eb;
269
270 while (n) {
271 eb = rb_entry(n, struct extent_buffer, rb_node);
272 if (offset < eb->start)
273 n = n->rb_left;
274 else if (offset > eb->start)
275 n = n->rb_right;
276 else
277 return eb;
278 }
279 return NULL;
280}
281
282static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, 238static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
283 struct extent_state *other) 239 struct extent_state *other)
284{ 240{
@@ -1901,10 +1857,8 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
1901 struct page *page = bvec->bv_page; 1857 struct page *page = bvec->bv_page;
1902 struct extent_io_tree *tree = bio->bi_private; 1858 struct extent_io_tree *tree = bio->bi_private;
1903 u64 start; 1859 u64 start;
1904 u64 end;
1905 1860
1906 start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; 1861 start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
1907 end = start + bvec->bv_len - 1;
1908 1862
1909 bio->bi_private = NULL; 1863 bio->bi_private = NULL;
1910 1864
@@ -2204,7 +2158,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2204 u64 last_byte = i_size_read(inode); 2158 u64 last_byte = i_size_read(inode);
2205 u64 block_start; 2159 u64 block_start;
2206 u64 iosize; 2160 u64 iosize;
2207 u64 unlock_start;
2208 sector_t sector; 2161 sector_t sector;
2209 struct extent_state *cached_state = NULL; 2162 struct extent_state *cached_state = NULL;
2210 struct extent_map *em; 2163 struct extent_map *em;
@@ -2329,7 +2282,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2329 if (tree->ops && tree->ops->writepage_end_io_hook) 2282 if (tree->ops && tree->ops->writepage_end_io_hook)
2330 tree->ops->writepage_end_io_hook(page, start, 2283 tree->ops->writepage_end_io_hook(page, start,
2331 page_end, NULL, 1); 2284 page_end, NULL, 1);
2332 unlock_start = page_end + 1;
2333 goto done; 2285 goto done;
2334 } 2286 }
2335 2287
@@ -2340,7 +2292,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2340 if (tree->ops && tree->ops->writepage_end_io_hook) 2292 if (tree->ops && tree->ops->writepage_end_io_hook)
2341 tree->ops->writepage_end_io_hook(page, cur, 2293 tree->ops->writepage_end_io_hook(page, cur,
2342 page_end, NULL, 1); 2294 page_end, NULL, 1);
2343 unlock_start = page_end + 1;
2344 break; 2295 break;
2345 } 2296 }
2346 em = epd->get_extent(inode, page, pg_offset, cur, 2297 em = epd->get_extent(inode, page, pg_offset, cur,
@@ -2387,7 +2338,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2387 2338
2388 cur += iosize; 2339 cur += iosize;
2389 pg_offset += iosize; 2340 pg_offset += iosize;
2390 unlock_start = cur;
2391 continue; 2341 continue;
2392 } 2342 }
2393 /* leave this out until we have a page_mkwrite call */ 2343 /* leave this out until we have a page_mkwrite call */
@@ -2473,7 +2423,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
2473 pgoff_t index; 2423 pgoff_t index;
2474 pgoff_t end; /* Inclusive */ 2424 pgoff_t end; /* Inclusive */
2475 int scanned = 0; 2425 int scanned = 0;
2476 int range_whole = 0;
2477 2426
2478 pagevec_init(&pvec, 0); 2427 pagevec_init(&pvec, 0);
2479 if (wbc->range_cyclic) { 2428 if (wbc->range_cyclic) {
@@ -2482,8 +2431,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
2482 } else { 2431 } else {
2483 index = wbc->range_start >> PAGE_CACHE_SHIFT; 2432 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2484 end = wbc->range_end >> PAGE_CACHE_SHIFT; 2433 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2485 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2486 range_whole = 1;
2487 scanned = 1; 2434 scanned = 1;
2488 } 2435 }
2489retry: 2436retry:
@@ -2823,6 +2770,8 @@ int extent_prepare_write(struct extent_io_tree *tree,
2823 NULL, 1, 2770 NULL, 1,
2824 end_bio_extent_preparewrite, 0, 2771 end_bio_extent_preparewrite, 0,
2825 0, 0); 2772 0, 0);
2773 if (ret && !err)
2774 err = ret;
2826 iocount++; 2775 iocount++;
2827 block_start = block_start + iosize; 2776 block_start = block_start + iosize;
2828 } else { 2777 } else {
@@ -3104,6 +3053,39 @@ static void __free_extent_buffer(struct extent_buffer *eb)
3104 kmem_cache_free(extent_buffer_cache, eb); 3053 kmem_cache_free(extent_buffer_cache, eb);
3105} 3054}
3106 3055
3056/*
3057 * Helper for releasing extent buffer page.
3058 */
3059static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
3060 unsigned long start_idx)
3061{
3062 unsigned long index;
3063 struct page *page;
3064
3065 if (!eb->first_page)
3066 return;
3067
3068 index = num_extent_pages(eb->start, eb->len);
3069 if (start_idx >= index)
3070 return;
3071
3072 do {
3073 index--;
3074 page = extent_buffer_page(eb, index);
3075 if (page)
3076 page_cache_release(page);
3077 } while (index != start_idx);
3078}
3079
3080/*
3081 * Helper for releasing the extent buffer.
3082 */
3083static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
3084{
3085 btrfs_release_extent_buffer_page(eb, 0);
3086 __free_extent_buffer(eb);
3087}
3088
3107struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, 3089struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3108 u64 start, unsigned long len, 3090 u64 start, unsigned long len,
3109 struct page *page0, 3091 struct page *page0,
@@ -3117,16 +3099,16 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3117 struct page *p; 3099 struct page *p;
3118 struct address_space *mapping = tree->mapping; 3100 struct address_space *mapping = tree->mapping;
3119 int uptodate = 1; 3101 int uptodate = 1;
3102 int ret;
3120 3103
3121 spin_lock(&tree->buffer_lock); 3104 rcu_read_lock();
3122 eb = buffer_search(tree, start); 3105 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3123 if (eb) { 3106 if (eb && atomic_inc_not_zero(&eb->refs)) {
3124 atomic_inc(&eb->refs); 3107 rcu_read_unlock();
3125 spin_unlock(&tree->buffer_lock);
3126 mark_page_accessed(eb->first_page); 3108 mark_page_accessed(eb->first_page);
3127 return eb; 3109 return eb;
3128 } 3110 }
3129 spin_unlock(&tree->buffer_lock); 3111 rcu_read_unlock();
3130 3112
3131 eb = __alloc_extent_buffer(tree, start, len, mask); 3113 eb = __alloc_extent_buffer(tree, start, len, mask);
3132 if (!eb) 3114 if (!eb)
@@ -3165,26 +3147,31 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3165 if (uptodate) 3147 if (uptodate)
3166 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3148 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
3167 3149
3150 ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
3151 if (ret)
3152 goto free_eb;
3153
3168 spin_lock(&tree->buffer_lock); 3154 spin_lock(&tree->buffer_lock);
3169 exists = buffer_tree_insert(tree, start, &eb->rb_node); 3155 ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb);
3170 if (exists) { 3156 if (ret == -EEXIST) {
3157 exists = radix_tree_lookup(&tree->buffer,
3158 start >> PAGE_CACHE_SHIFT);
3171 /* add one reference for the caller */ 3159 /* add one reference for the caller */
3172 atomic_inc(&exists->refs); 3160 atomic_inc(&exists->refs);
3173 spin_unlock(&tree->buffer_lock); 3161 spin_unlock(&tree->buffer_lock);
3162 radix_tree_preload_end();
3174 goto free_eb; 3163 goto free_eb;
3175 } 3164 }
3176 /* add one reference for the tree */ 3165 /* add one reference for the tree */
3177 atomic_inc(&eb->refs); 3166 atomic_inc(&eb->refs);
3178 spin_unlock(&tree->buffer_lock); 3167 spin_unlock(&tree->buffer_lock);
3168 radix_tree_preload_end();
3179 return eb; 3169 return eb;
3180 3170
3181free_eb: 3171free_eb:
3182 if (!atomic_dec_and_test(&eb->refs)) 3172 if (!atomic_dec_and_test(&eb->refs))
3183 return exists; 3173 return exists;
3184 for (index = 1; index < i; index++) 3174 btrfs_release_extent_buffer(eb);
3185 page_cache_release(extent_buffer_page(eb, index));
3186 page_cache_release(extent_buffer_page(eb, 0));
3187 __free_extent_buffer(eb);
3188 return exists; 3175 return exists;
3189} 3176}
3190 3177
@@ -3194,16 +3181,16 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
3194{ 3181{
3195 struct extent_buffer *eb; 3182 struct extent_buffer *eb;
3196 3183
3197 spin_lock(&tree->buffer_lock); 3184 rcu_read_lock();
3198 eb = buffer_search(tree, start); 3185 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3199 if (eb) 3186 if (eb && atomic_inc_not_zero(&eb->refs)) {
3200 atomic_inc(&eb->refs); 3187 rcu_read_unlock();
3201 spin_unlock(&tree->buffer_lock);
3202
3203 if (eb)
3204 mark_page_accessed(eb->first_page); 3188 mark_page_accessed(eb->first_page);
3189 return eb;
3190 }
3191 rcu_read_unlock();
3205 3192
3206 return eb; 3193 return NULL;
3207} 3194}
3208 3195
3209void free_extent_buffer(struct extent_buffer *eb) 3196void free_extent_buffer(struct extent_buffer *eb)
@@ -3833,34 +3820,45 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
3833 } 3820 }
3834} 3821}
3835 3822
3823static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
3824{
3825 struct extent_buffer *eb =
3826 container_of(head, struct extent_buffer, rcu_head);
3827
3828 btrfs_release_extent_buffer(eb);
3829}
3830
3836int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) 3831int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
3837{ 3832{
3838 u64 start = page_offset(page); 3833 u64 start = page_offset(page);
3839 struct extent_buffer *eb; 3834 struct extent_buffer *eb;
3840 int ret = 1; 3835 int ret = 1;
3841 unsigned long i;
3842 unsigned long num_pages;
3843 3836
3844 spin_lock(&tree->buffer_lock); 3837 spin_lock(&tree->buffer_lock);
3845 eb = buffer_search(tree, start); 3838 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3846 if (!eb) 3839 if (!eb)
3847 goto out; 3840 goto out;
3848 3841
3849 if (atomic_read(&eb->refs) > 1) { 3842 if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
3850 ret = 0; 3843 ret = 0;
3851 goto out; 3844 goto out;
3852 } 3845 }
3853 if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { 3846
3847 /*
3848 * set @eb->refs to 0 if it is already 1, and then release the @eb.
3849 * Or go back.
3850 */
3851 if (atomic_cmpxchg(&eb->refs, 1, 0) != 1) {
3854 ret = 0; 3852 ret = 0;
3855 goto out; 3853 goto out;
3856 } 3854 }
3857 /* at this point we can safely release the extent buffer */ 3855
3858 num_pages = num_extent_pages(eb->start, eb->len); 3856 radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3859 for (i = 0; i < num_pages; i++)
3860 page_cache_release(extent_buffer_page(eb, i));
3861 rb_erase(&eb->rb_node, &tree->buffer);
3862 __free_extent_buffer(eb);
3863out: 3857out:
3864 spin_unlock(&tree->buffer_lock); 3858 spin_unlock(&tree->buffer_lock);
3859
3860 /* at this point we can safely release the extent buffer */
3861 if (atomic_read(&eb->refs) == 0)
3862 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
3865 return ret; 3863 return ret;
3866} 3864}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 5691c7b590da..1c6d4f342ef7 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -85,7 +85,7 @@ struct extent_io_ops {
85 85
86struct extent_io_tree { 86struct extent_io_tree {
87 struct rb_root state; 87 struct rb_root state;
88 struct rb_root buffer; 88 struct radix_tree_root buffer;
89 struct address_space *mapping; 89 struct address_space *mapping;
90 u64 dirty_bytes; 90 u64 dirty_bytes;
91 spinlock_t lock; 91 spinlock_t lock;
@@ -123,7 +123,7 @@ struct extent_buffer {
123 unsigned long bflags; 123 unsigned long bflags;
124 atomic_t refs; 124 atomic_t refs;
125 struct list_head leak_list; 125 struct list_head leak_list;
126 struct rb_node rb_node; 126 struct rcu_head rcu_head;
127 127
128 /* the spinlock is used to protect most operations */ 128 /* the spinlock is used to protect most operations */
129 spinlock_t lock; 129 spinlock_t lock;
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 454ca52d6451..23cb8da3ff66 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -335,7 +335,7 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
335 goto out; 335 goto out;
336 } 336 }
337 if (IS_ERR(rb_node)) { 337 if (IS_ERR(rb_node)) {
338 em = ERR_PTR(PTR_ERR(rb_node)); 338 em = ERR_CAST(rb_node);
339 goto out; 339 goto out;
340 } 340 }
341 em = rb_entry(rb_node, struct extent_map, rb_node); 341 em = rb_entry(rb_node, struct extent_map, rb_node);
@@ -384,7 +384,7 @@ struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
384 goto out; 384 goto out;
385 } 385 }
386 if (IS_ERR(rb_node)) { 386 if (IS_ERR(rb_node)) {
387 em = ERR_PTR(PTR_ERR(rb_node)); 387 em = ERR_CAST(rb_node);
388 goto out; 388 goto out;
389 } 389 }
390 em = rb_entry(rb_node, struct extent_map, rb_node); 390 em = rb_entry(rb_node, struct extent_map, rb_node);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index f488fac04d99..22ee0dc2e6b8 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -23,10 +23,761 @@
23#include "ctree.h" 23#include "ctree.h"
24#include "free-space-cache.h" 24#include "free-space-cache.h"
25#include "transaction.h" 25#include "transaction.h"
26#include "disk-io.h"
26 27
27#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 28#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
28#define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 29#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
29 30
31static void recalculate_thresholds(struct btrfs_block_group_cache
32 *block_group);
33static int link_free_space(struct btrfs_block_group_cache *block_group,
34 struct btrfs_free_space *info);
35
36struct inode *lookup_free_space_inode(struct btrfs_root *root,
37 struct btrfs_block_group_cache
38 *block_group, struct btrfs_path *path)
39{
40 struct btrfs_key key;
41 struct btrfs_key location;
42 struct btrfs_disk_key disk_key;
43 struct btrfs_free_space_header *header;
44 struct extent_buffer *leaf;
45 struct inode *inode = NULL;
46 int ret;
47
48 spin_lock(&block_group->lock);
49 if (block_group->inode)
50 inode = igrab(block_group->inode);
51 spin_unlock(&block_group->lock);
52 if (inode)
53 return inode;
54
55 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
56 key.offset = block_group->key.objectid;
57 key.type = 0;
58
59 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
60 if (ret < 0)
61 return ERR_PTR(ret);
62 if (ret > 0) {
63 btrfs_release_path(root, path);
64 return ERR_PTR(-ENOENT);
65 }
66
67 leaf = path->nodes[0];
68 header = btrfs_item_ptr(leaf, path->slots[0],
69 struct btrfs_free_space_header);
70 btrfs_free_space_key(leaf, header, &disk_key);
71 btrfs_disk_key_to_cpu(&location, &disk_key);
72 btrfs_release_path(root, path);
73
74 inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
75 if (!inode)
76 return ERR_PTR(-ENOENT);
77 if (IS_ERR(inode))
78 return inode;
79 if (is_bad_inode(inode)) {
80 iput(inode);
81 return ERR_PTR(-ENOENT);
82 }
83
84 spin_lock(&block_group->lock);
85 if (!root->fs_info->closing) {
86 block_group->inode = igrab(inode);
87 block_group->iref = 1;
88 }
89 spin_unlock(&block_group->lock);
90
91 return inode;
92}
93
94int create_free_space_inode(struct btrfs_root *root,
95 struct btrfs_trans_handle *trans,
96 struct btrfs_block_group_cache *block_group,
97 struct btrfs_path *path)
98{
99 struct btrfs_key key;
100 struct btrfs_disk_key disk_key;
101 struct btrfs_free_space_header *header;
102 struct btrfs_inode_item *inode_item;
103 struct extent_buffer *leaf;
104 u64 objectid;
105 int ret;
106
107 ret = btrfs_find_free_objectid(trans, root, 0, &objectid);
108 if (ret < 0)
109 return ret;
110
111 ret = btrfs_insert_empty_inode(trans, root, path, objectid);
112 if (ret)
113 return ret;
114
115 leaf = path->nodes[0];
116 inode_item = btrfs_item_ptr(leaf, path->slots[0],
117 struct btrfs_inode_item);
118 btrfs_item_key(leaf, &disk_key, path->slots[0]);
119 memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
120 sizeof(*inode_item));
121 btrfs_set_inode_generation(leaf, inode_item, trans->transid);
122 btrfs_set_inode_size(leaf, inode_item, 0);
123 btrfs_set_inode_nbytes(leaf, inode_item, 0);
124 btrfs_set_inode_uid(leaf, inode_item, 0);
125 btrfs_set_inode_gid(leaf, inode_item, 0);
126 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
127 btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS |
128 BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM);
129 btrfs_set_inode_nlink(leaf, inode_item, 1);
130 btrfs_set_inode_transid(leaf, inode_item, trans->transid);
131 btrfs_set_inode_block_group(leaf, inode_item,
132 block_group->key.objectid);
133 btrfs_mark_buffer_dirty(leaf);
134 btrfs_release_path(root, path);
135
136 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
137 key.offset = block_group->key.objectid;
138 key.type = 0;
139
140 ret = btrfs_insert_empty_item(trans, root, path, &key,
141 sizeof(struct btrfs_free_space_header));
142 if (ret < 0) {
143 btrfs_release_path(root, path);
144 return ret;
145 }
146 leaf = path->nodes[0];
147 header = btrfs_item_ptr(leaf, path->slots[0],
148 struct btrfs_free_space_header);
149 memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
150 btrfs_set_free_space_key(leaf, header, &disk_key);
151 btrfs_mark_buffer_dirty(leaf);
152 btrfs_release_path(root, path);
153
154 return 0;
155}
156
157int btrfs_truncate_free_space_cache(struct btrfs_root *root,
158 struct btrfs_trans_handle *trans,
159 struct btrfs_path *path,
160 struct inode *inode)
161{
162 loff_t oldsize;
163 int ret = 0;
164
165 trans->block_rsv = root->orphan_block_rsv;
166 ret = btrfs_block_rsv_check(trans, root,
167 root->orphan_block_rsv,
168 0, 5);
169 if (ret)
170 return ret;
171
172 oldsize = i_size_read(inode);
173 btrfs_i_size_write(inode, 0);
174 truncate_pagecache(inode, oldsize, 0);
175
176 /*
177 * We don't need an orphan item because truncating the free space cache
178 * will never be split across transactions.
179 */
180 ret = btrfs_truncate_inode_items(trans, root, inode,
181 0, BTRFS_EXTENT_DATA_KEY);
182 if (ret) {
183 WARN_ON(1);
184 return ret;
185 }
186
187 return btrfs_update_inode(trans, root, inode);
188}
189
190static int readahead_cache(struct inode *inode)
191{
192 struct file_ra_state *ra;
193 unsigned long last_index;
194
195 ra = kzalloc(sizeof(*ra), GFP_NOFS);
196 if (!ra)
197 return -ENOMEM;
198
199 file_ra_state_init(ra, inode->i_mapping);
200 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
201
202 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
203
204 kfree(ra);
205
206 return 0;
207}
208
209int load_free_space_cache(struct btrfs_fs_info *fs_info,
210 struct btrfs_block_group_cache *block_group)
211{
212 struct btrfs_root *root = fs_info->tree_root;
213 struct inode *inode;
214 struct btrfs_free_space_header *header;
215 struct extent_buffer *leaf;
216 struct page *page;
217 struct btrfs_path *path;
218 u32 *checksums = NULL, *crc;
219 char *disk_crcs = NULL;
220 struct btrfs_key key;
221 struct list_head bitmaps;
222 u64 num_entries;
223 u64 num_bitmaps;
224 u64 generation;
225 u32 cur_crc = ~(u32)0;
226 pgoff_t index = 0;
227 unsigned long first_page_offset;
228 int num_checksums;
229 int ret = 0;
230
231 /*
232 * If we're unmounting then just return, since this does a search on the
233 * normal root and not the commit root and we could deadlock.
234 */
235 smp_mb();
236 if (fs_info->closing)
237 return 0;
238
239 /*
240 * If this block group has been marked to be cleared for one reason or
241 * another then we can't trust the on disk cache, so just return.
242 */
243 spin_lock(&block_group->lock);
244 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
245 spin_unlock(&block_group->lock);
246 return 0;
247 }
248 spin_unlock(&block_group->lock);
249
250 INIT_LIST_HEAD(&bitmaps);
251
252 path = btrfs_alloc_path();
253 if (!path)
254 return 0;
255
256 inode = lookup_free_space_inode(root, block_group, path);
257 if (IS_ERR(inode)) {
258 btrfs_free_path(path);
259 return 0;
260 }
261
262 /* Nothing in the space cache, goodbye */
263 if (!i_size_read(inode)) {
264 btrfs_free_path(path);
265 goto out;
266 }
267
268 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
269 key.offset = block_group->key.objectid;
270 key.type = 0;
271
272 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
273 if (ret) {
274 btrfs_free_path(path);
275 goto out;
276 }
277
278 leaf = path->nodes[0];
279 header = btrfs_item_ptr(leaf, path->slots[0],
280 struct btrfs_free_space_header);
281 num_entries = btrfs_free_space_entries(leaf, header);
282 num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
283 generation = btrfs_free_space_generation(leaf, header);
284 btrfs_free_path(path);
285
286 if (BTRFS_I(inode)->generation != generation) {
287 printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
288 " not match free space cache generation (%llu) for "
289 "block group %llu\n",
290 (unsigned long long)BTRFS_I(inode)->generation,
291 (unsigned long long)generation,
292 (unsigned long long)block_group->key.objectid);
293 goto out;
294 }
295
296 if (!num_entries)
297 goto out;
298
299 /* Setup everything for doing checksumming */
300 num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
301 checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
302 if (!checksums)
303 goto out;
304 first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
305 disk_crcs = kzalloc(first_page_offset, GFP_NOFS);
306 if (!disk_crcs)
307 goto out;
308
309 ret = readahead_cache(inode);
310 if (ret) {
311 ret = 0;
312 goto out;
313 }
314
315 while (1) {
316 struct btrfs_free_space_entry *entry;
317 struct btrfs_free_space *e;
318 void *addr;
319 unsigned long offset = 0;
320 unsigned long start_offset = 0;
321 int need_loop = 0;
322
323 if (!num_entries && !num_bitmaps)
324 break;
325
326 if (index == 0) {
327 start_offset = first_page_offset;
328 offset = start_offset;
329 }
330
331 page = grab_cache_page(inode->i_mapping, index);
332 if (!page) {
333 ret = 0;
334 goto free_cache;
335 }
336
337 if (!PageUptodate(page)) {
338 btrfs_readpage(NULL, page);
339 lock_page(page);
340 if (!PageUptodate(page)) {
341 unlock_page(page);
342 page_cache_release(page);
343 printk(KERN_ERR "btrfs: error reading free "
344 "space cache: %llu\n",
345 (unsigned long long)
346 block_group->key.objectid);
347 goto free_cache;
348 }
349 }
350 addr = kmap(page);
351
352 if (index == 0) {
353 u64 *gen;
354
355 memcpy(disk_crcs, addr, first_page_offset);
356 gen = addr + (sizeof(u32) * num_checksums);
357 if (*gen != BTRFS_I(inode)->generation) {
358 printk(KERN_ERR "btrfs: space cache generation"
359 " (%llu) does not match inode (%llu) "
360 "for block group %llu\n",
361 (unsigned long long)*gen,
362 (unsigned long long)
363 BTRFS_I(inode)->generation,
364 (unsigned long long)
365 block_group->key.objectid);
366 kunmap(page);
367 unlock_page(page);
368 page_cache_release(page);
369 goto free_cache;
370 }
371 crc = (u32 *)disk_crcs;
372 }
373 entry = addr + start_offset;
374
375 /* First lets check our crc before we do anything fun */
376 cur_crc = ~(u32)0;
377 cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc,
378 PAGE_CACHE_SIZE - start_offset);
379 btrfs_csum_final(cur_crc, (char *)&cur_crc);
380 if (cur_crc != *crc) {
381 printk(KERN_ERR "btrfs: crc mismatch for page %lu in "
382 "block group %llu\n", index,
383 (unsigned long long)block_group->key.objectid);
384 kunmap(page);
385 unlock_page(page);
386 page_cache_release(page);
387 goto free_cache;
388 }
389 crc++;
390
391 while (1) {
392 if (!num_entries)
393 break;
394
395 need_loop = 1;
396 e = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
397 if (!e) {
398 kunmap(page);
399 unlock_page(page);
400 page_cache_release(page);
401 goto free_cache;
402 }
403
404 e->offset = le64_to_cpu(entry->offset);
405 e->bytes = le64_to_cpu(entry->bytes);
406 if (!e->bytes) {
407 kunmap(page);
408 kfree(e);
409 unlock_page(page);
410 page_cache_release(page);
411 goto free_cache;
412 }
413
414 if (entry->type == BTRFS_FREE_SPACE_EXTENT) {
415 spin_lock(&block_group->tree_lock);
416 ret = link_free_space(block_group, e);
417 spin_unlock(&block_group->tree_lock);
418 BUG_ON(ret);
419 } else {
420 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
421 if (!e->bitmap) {
422 kunmap(page);
423 kfree(e);
424 unlock_page(page);
425 page_cache_release(page);
426 goto free_cache;
427 }
428 spin_lock(&block_group->tree_lock);
429 ret = link_free_space(block_group, e);
430 block_group->total_bitmaps++;
431 recalculate_thresholds(block_group);
432 spin_unlock(&block_group->tree_lock);
433 list_add_tail(&e->list, &bitmaps);
434 }
435
436 num_entries--;
437 offset += sizeof(struct btrfs_free_space_entry);
438 if (offset + sizeof(struct btrfs_free_space_entry) >=
439 PAGE_CACHE_SIZE)
440 break;
441 entry++;
442 }
443
444 /*
445 * We read an entry out of this page, we need to move on to the
446 * next page.
447 */
448 if (need_loop) {
449 kunmap(page);
450 goto next;
451 }
452
453 /*
454 * We add the bitmaps at the end of the entries in order that
455 * the bitmap entries are added to the cache.
456 */
457 e = list_entry(bitmaps.next, struct btrfs_free_space, list);
458 list_del_init(&e->list);
459 memcpy(e->bitmap, addr, PAGE_CACHE_SIZE);
460 kunmap(page);
461 num_bitmaps--;
462next:
463 unlock_page(page);
464 page_cache_release(page);
465 index++;
466 }
467
468 ret = 1;
469out:
470 kfree(checksums);
471 kfree(disk_crcs);
472 iput(inode);
473 return ret;
474
475free_cache:
476 /* This cache is bogus, make sure it gets cleared */
477 spin_lock(&block_group->lock);
478 block_group->disk_cache_state = BTRFS_DC_CLEAR;
479 spin_unlock(&block_group->lock);
480 btrfs_remove_free_space_cache(block_group);
481 goto out;
482}
483
484int btrfs_write_out_cache(struct btrfs_root *root,
485 struct btrfs_trans_handle *trans,
486 struct btrfs_block_group_cache *block_group,
487 struct btrfs_path *path)
488{
489 struct btrfs_free_space_header *header;
490 struct extent_buffer *leaf;
491 struct inode *inode;
492 struct rb_node *node;
493 struct list_head *pos, *n;
494 struct page *page;
495 struct extent_state *cached_state = NULL;
496 struct list_head bitmap_list;
497 struct btrfs_key key;
498 u64 bytes = 0;
499 u32 *crc, *checksums;
500 pgoff_t index = 0, last_index = 0;
501 unsigned long first_page_offset;
502 int num_checksums;
503 int entries = 0;
504 int bitmaps = 0;
505 int ret = 0;
506
507 root = root->fs_info->tree_root;
508
509 INIT_LIST_HEAD(&bitmap_list);
510
511 spin_lock(&block_group->lock);
512 if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
513 spin_unlock(&block_group->lock);
514 return 0;
515 }
516 spin_unlock(&block_group->lock);
517
518 inode = lookup_free_space_inode(root, block_group, path);
519 if (IS_ERR(inode))
520 return 0;
521
522 if (!i_size_read(inode)) {
523 iput(inode);
524 return 0;
525 }
526
527 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
528 filemap_write_and_wait(inode->i_mapping);
529 btrfs_wait_ordered_range(inode, inode->i_size &
530 ~(root->sectorsize - 1), (u64)-1);
531
532 /* We need a checksum per page. */
533 num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
534 crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
535 if (!crc) {
536 iput(inode);
537 return 0;
538 }
539
540 /* Since the first page has all of our checksums and our generation we
541 * need to calculate the offset into the page that we can start writing
542 * our entries.
543 */
544 first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
545
546 node = rb_first(&block_group->free_space_offset);
547 if (!node)
548 goto out_free;
549
550 /*
551 * Lock all pages first so we can lock the extent safely.
552 *
553 * NOTE: Because we hold the ref the entire time we're going to write to
554 * the page find_get_page should never fail, so we don't do a check
555 * after find_get_page at this point. Just putting this here so people
556 * know and don't freak out.
557 */
558 while (index <= last_index) {
559 page = grab_cache_page(inode->i_mapping, index);
560 if (!page) {
561 pgoff_t i = 0;
562
563 while (i < index) {
564 page = find_get_page(inode->i_mapping, i);
565 unlock_page(page);
566 page_cache_release(page);
567 page_cache_release(page);
568 i++;
569 }
570 goto out_free;
571 }
572 index++;
573 }
574
575 index = 0;
576 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
577 0, &cached_state, GFP_NOFS);
578
579 /* Write out the extent entries */
580 do {
581 struct btrfs_free_space_entry *entry;
582 void *addr;
583 unsigned long offset = 0;
584 unsigned long start_offset = 0;
585
586 if (index == 0) {
587 start_offset = first_page_offset;
588 offset = start_offset;
589 }
590
591 page = find_get_page(inode->i_mapping, index);
592
593 addr = kmap(page);
594 entry = addr + start_offset;
595
596 memset(addr, 0, PAGE_CACHE_SIZE);
597 while (1) {
598 struct btrfs_free_space *e;
599
600 e = rb_entry(node, struct btrfs_free_space, offset_index);
601 entries++;
602
603 entry->offset = cpu_to_le64(e->offset);
604 entry->bytes = cpu_to_le64(e->bytes);
605 if (e->bitmap) {
606 entry->type = BTRFS_FREE_SPACE_BITMAP;
607 list_add_tail(&e->list, &bitmap_list);
608 bitmaps++;
609 } else {
610 entry->type = BTRFS_FREE_SPACE_EXTENT;
611 }
612 node = rb_next(node);
613 if (!node)
614 break;
615 offset += sizeof(struct btrfs_free_space_entry);
616 if (offset + sizeof(struct btrfs_free_space_entry) >=
617 PAGE_CACHE_SIZE)
618 break;
619 entry++;
620 }
621 *crc = ~(u32)0;
622 *crc = btrfs_csum_data(root, addr + start_offset, *crc,
623 PAGE_CACHE_SIZE - start_offset);
624 kunmap(page);
625
626 btrfs_csum_final(*crc, (char *)crc);
627 crc++;
628
629 bytes += PAGE_CACHE_SIZE;
630
631 ClearPageChecked(page);
632 set_page_extent_mapped(page);
633 SetPageUptodate(page);
634 set_page_dirty(page);
635
636 /*
637 * We need to release our reference we got for grab_cache_page,
638 * except for the first page which will hold our checksums, we
639 * do that below.
640 */
641 if (index != 0) {
642 unlock_page(page);
643 page_cache_release(page);
644 }
645
646 page_cache_release(page);
647
648 index++;
649 } while (node);
650
651 /* Write out the bitmaps */
652 list_for_each_safe(pos, n, &bitmap_list) {
653 void *addr;
654 struct btrfs_free_space *entry =
655 list_entry(pos, struct btrfs_free_space, list);
656
657 page = find_get_page(inode->i_mapping, index);
658
659 addr = kmap(page);
660 memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
661 *crc = ~(u32)0;
662 *crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE);
663 kunmap(page);
664 btrfs_csum_final(*crc, (char *)crc);
665 crc++;
666 bytes += PAGE_CACHE_SIZE;
667
668 ClearPageChecked(page);
669 set_page_extent_mapped(page);
670 SetPageUptodate(page);
671 set_page_dirty(page);
672 unlock_page(page);
673 page_cache_release(page);
674 page_cache_release(page);
675 list_del_init(&entry->list);
676 index++;
677 }
678
679 /* Zero out the rest of the pages just to make sure */
680 while (index <= last_index) {
681 void *addr;
682
683 page = find_get_page(inode->i_mapping, index);
684
685 addr = kmap(page);
686 memset(addr, 0, PAGE_CACHE_SIZE);
687 kunmap(page);
688 ClearPageChecked(page);
689 set_page_extent_mapped(page);
690 SetPageUptodate(page);
691 set_page_dirty(page);
692 unlock_page(page);
693 page_cache_release(page);
694 page_cache_release(page);
695 bytes += PAGE_CACHE_SIZE;
696 index++;
697 }
698
699 btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state);
700
701 /* Write the checksums and trans id to the first page */
702 {
703 void *addr;
704 u64 *gen;
705
706 page = find_get_page(inode->i_mapping, 0);
707
708 addr = kmap(page);
709 memcpy(addr, checksums, sizeof(u32) * num_checksums);
710 gen = addr + (sizeof(u32) * num_checksums);
711 *gen = trans->transid;
712 kunmap(page);
713 ClearPageChecked(page);
714 set_page_extent_mapped(page);
715 SetPageUptodate(page);
716 set_page_dirty(page);
717 unlock_page(page);
718 page_cache_release(page);
719 page_cache_release(page);
720 }
721 BTRFS_I(inode)->generation = trans->transid;
722
723 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
724 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
725
726 filemap_write_and_wait(inode->i_mapping);
727
728 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
729 key.offset = block_group->key.objectid;
730 key.type = 0;
731
732 ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
733 if (ret < 0) {
734 ret = 0;
735 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
736 EXTENT_DIRTY | EXTENT_DELALLOC |
737 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
738 goto out_free;
739 }
740 leaf = path->nodes[0];
741 if (ret > 0) {
742 struct btrfs_key found_key;
743 BUG_ON(!path->slots[0]);
744 path->slots[0]--;
745 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
746 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
747 found_key.offset != block_group->key.objectid) {
748 ret = 0;
749 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
750 EXTENT_DIRTY | EXTENT_DELALLOC |
751 EXTENT_DO_ACCOUNTING, 0, 0, NULL,
752 GFP_NOFS);
753 btrfs_release_path(root, path);
754 goto out_free;
755 }
756 }
757 header = btrfs_item_ptr(leaf, path->slots[0],
758 struct btrfs_free_space_header);
759 btrfs_set_free_space_entries(leaf, header, entries);
760 btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
761 btrfs_set_free_space_generation(leaf, header, trans->transid);
762 btrfs_mark_buffer_dirty(leaf);
763 btrfs_release_path(root, path);
764
765 ret = 1;
766
767out_free:
768 if (ret == 0) {
769 invalidate_inode_pages2_range(inode->i_mapping, 0, index);
770 spin_lock(&block_group->lock);
771 block_group->disk_cache_state = BTRFS_DC_ERROR;
772 spin_unlock(&block_group->lock);
773 BTRFS_I(inode)->generation = 0;
774 }
775 kfree(checksums);
776 btrfs_update_inode(trans, root, inode);
777 iput(inode);
778 return ret;
779}
780
30static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize, 781static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
31 u64 offset) 782 u64 offset)
32{ 783{
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 890a8e79011b..e49ca5c321b5 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -27,6 +27,24 @@ struct btrfs_free_space {
27 struct list_head list; 27 struct list_head list;
28}; 28};
29 29
30struct inode *lookup_free_space_inode(struct btrfs_root *root,
31 struct btrfs_block_group_cache
32 *block_group, struct btrfs_path *path);
33int create_free_space_inode(struct btrfs_root *root,
34 struct btrfs_trans_handle *trans,
35 struct btrfs_block_group_cache *block_group,
36 struct btrfs_path *path);
37
38int btrfs_truncate_free_space_cache(struct btrfs_root *root,
39 struct btrfs_trans_handle *trans,
40 struct btrfs_path *path,
41 struct inode *inode);
42int load_free_space_cache(struct btrfs_fs_info *fs_info,
43 struct btrfs_block_group_cache *block_group);
44int btrfs_write_out_cache(struct btrfs_root *root,
45 struct btrfs_trans_handle *trans,
46 struct btrfs_block_group_cache *block_group,
47 struct btrfs_path *path);
30int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, 48int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
31 u64 bytenr, u64 size); 49 u64 bytenr, u64 size);
32int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 50int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 64f99cf69ce0..558cac2dfa54 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -319,8 +319,6 @@ static noinline int compress_file_range(struct inode *inode,
319 struct btrfs_root *root = BTRFS_I(inode)->root; 319 struct btrfs_root *root = BTRFS_I(inode)->root;
320 struct btrfs_trans_handle *trans; 320 struct btrfs_trans_handle *trans;
321 u64 num_bytes; 321 u64 num_bytes;
322 u64 orig_start;
323 u64 disk_num_bytes;
324 u64 blocksize = root->sectorsize; 322 u64 blocksize = root->sectorsize;
325 u64 actual_end; 323 u64 actual_end;
326 u64 isize = i_size_read(inode); 324 u64 isize = i_size_read(inode);
@@ -335,8 +333,6 @@ static noinline int compress_file_range(struct inode *inode,
335 int i; 333 int i;
336 int will_compress; 334 int will_compress;
337 335
338 orig_start = start;
339
340 actual_end = min_t(u64, isize, end + 1); 336 actual_end = min_t(u64, isize, end + 1);
341again: 337again:
342 will_compress = 0; 338 will_compress = 0;
@@ -371,7 +367,6 @@ again:
371 total_compressed = min(total_compressed, max_uncompressed); 367 total_compressed = min(total_compressed, max_uncompressed);
372 num_bytes = (end - start + blocksize) & ~(blocksize - 1); 368 num_bytes = (end - start + blocksize) & ~(blocksize - 1);
373 num_bytes = max(blocksize, num_bytes); 369 num_bytes = max(blocksize, num_bytes);
374 disk_num_bytes = num_bytes;
375 total_in = 0; 370 total_in = 0;
376 ret = 0; 371 ret = 0;
377 372
@@ -467,7 +462,6 @@ again:
467 if (total_compressed >= total_in) { 462 if (total_compressed >= total_in) {
468 will_compress = 0; 463 will_compress = 0;
469 } else { 464 } else {
470 disk_num_bytes = total_compressed;
471 num_bytes = total_in; 465 num_bytes = total_in;
472 } 466 }
473 } 467 }
@@ -757,20 +751,17 @@ static noinline int cow_file_range(struct inode *inode,
757 u64 disk_num_bytes; 751 u64 disk_num_bytes;
758 u64 cur_alloc_size; 752 u64 cur_alloc_size;
759 u64 blocksize = root->sectorsize; 753 u64 blocksize = root->sectorsize;
760 u64 actual_end;
761 u64 isize = i_size_read(inode);
762 struct btrfs_key ins; 754 struct btrfs_key ins;
763 struct extent_map *em; 755 struct extent_map *em;
764 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 756 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
765 int ret = 0; 757 int ret = 0;
766 758
759 BUG_ON(root == root->fs_info->tree_root);
767 trans = btrfs_join_transaction(root, 1); 760 trans = btrfs_join_transaction(root, 1);
768 BUG_ON(!trans); 761 BUG_ON(!trans);
769 btrfs_set_trans_block_group(trans, inode); 762 btrfs_set_trans_block_group(trans, inode);
770 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 763 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
771 764
772 actual_end = min_t(u64, isize, end + 1);
773
774 num_bytes = (end - start + blocksize) & ~(blocksize - 1); 765 num_bytes = (end - start + blocksize) & ~(blocksize - 1);
775 num_bytes = max(blocksize, num_bytes); 766 num_bytes = max(blocksize, num_bytes);
776 disk_num_bytes = num_bytes; 767 disk_num_bytes = num_bytes;
@@ -1035,10 +1026,16 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1035 int type; 1026 int type;
1036 int nocow; 1027 int nocow;
1037 int check_prev = 1; 1028 int check_prev = 1;
1029 bool nolock = false;
1038 1030
1039 path = btrfs_alloc_path(); 1031 path = btrfs_alloc_path();
1040 BUG_ON(!path); 1032 BUG_ON(!path);
1041 trans = btrfs_join_transaction(root, 1); 1033 if (root == root->fs_info->tree_root) {
1034 nolock = true;
1035 trans = btrfs_join_transaction_nolock(root, 1);
1036 } else {
1037 trans = btrfs_join_transaction(root, 1);
1038 }
1042 BUG_ON(!trans); 1039 BUG_ON(!trans);
1043 1040
1044 cow_start = (u64)-1; 1041 cow_start = (u64)-1;
@@ -1211,8 +1208,13 @@ out_check:
1211 BUG_ON(ret); 1208 BUG_ON(ret);
1212 } 1209 }
1213 1210
1214 ret = btrfs_end_transaction(trans, root); 1211 if (nolock) {
1215 BUG_ON(ret); 1212 ret = btrfs_end_transaction_nolock(trans, root);
1213 BUG_ON(ret);
1214 } else {
1215 ret = btrfs_end_transaction(trans, root);
1216 BUG_ON(ret);
1217 }
1216 btrfs_free_path(path); 1218 btrfs_free_path(path);
1217 return 0; 1219 return 0;
1218} 1220}
@@ -1289,6 +1291,8 @@ static int btrfs_set_bit_hook(struct inode *inode,
1289 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { 1291 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1290 struct btrfs_root *root = BTRFS_I(inode)->root; 1292 struct btrfs_root *root = BTRFS_I(inode)->root;
1291 u64 len = state->end + 1 - state->start; 1293 u64 len = state->end + 1 - state->start;
1294 int do_list = (root->root_key.objectid !=
1295 BTRFS_ROOT_TREE_OBJECTID);
1292 1296
1293 if (*bits & EXTENT_FIRST_DELALLOC) 1297 if (*bits & EXTENT_FIRST_DELALLOC)
1294 *bits &= ~EXTENT_FIRST_DELALLOC; 1298 *bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1298,7 +1302,7 @@ static int btrfs_set_bit_hook(struct inode *inode,
1298 spin_lock(&root->fs_info->delalloc_lock); 1302 spin_lock(&root->fs_info->delalloc_lock);
1299 BTRFS_I(inode)->delalloc_bytes += len; 1303 BTRFS_I(inode)->delalloc_bytes += len;
1300 root->fs_info->delalloc_bytes += len; 1304 root->fs_info->delalloc_bytes += len;
1301 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1305 if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1302 list_add_tail(&BTRFS_I(inode)->delalloc_inodes, 1306 list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
1303 &root->fs_info->delalloc_inodes); 1307 &root->fs_info->delalloc_inodes);
1304 } 1308 }
@@ -1321,6 +1325,8 @@ static int btrfs_clear_bit_hook(struct inode *inode,
1321 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { 1325 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1322 struct btrfs_root *root = BTRFS_I(inode)->root; 1326 struct btrfs_root *root = BTRFS_I(inode)->root;
1323 u64 len = state->end + 1 - state->start; 1327 u64 len = state->end + 1 - state->start;
1328 int do_list = (root->root_key.objectid !=
1329 BTRFS_ROOT_TREE_OBJECTID);
1324 1330
1325 if (*bits & EXTENT_FIRST_DELALLOC) 1331 if (*bits & EXTENT_FIRST_DELALLOC)
1326 *bits &= ~EXTENT_FIRST_DELALLOC; 1332 *bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1330,14 +1336,15 @@ static int btrfs_clear_bit_hook(struct inode *inode,
1330 if (*bits & EXTENT_DO_ACCOUNTING) 1336 if (*bits & EXTENT_DO_ACCOUNTING)
1331 btrfs_delalloc_release_metadata(inode, len); 1337 btrfs_delalloc_release_metadata(inode, len);
1332 1338
1333 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) 1339 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
1340 && do_list)
1334 btrfs_free_reserved_data_space(inode, len); 1341 btrfs_free_reserved_data_space(inode, len);
1335 1342
1336 spin_lock(&root->fs_info->delalloc_lock); 1343 spin_lock(&root->fs_info->delalloc_lock);
1337 root->fs_info->delalloc_bytes -= len; 1344 root->fs_info->delalloc_bytes -= len;
1338 BTRFS_I(inode)->delalloc_bytes -= len; 1345 BTRFS_I(inode)->delalloc_bytes -= len;
1339 1346
1340 if (BTRFS_I(inode)->delalloc_bytes == 0 && 1347 if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
1341 !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1348 !list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1342 list_del_init(&BTRFS_I(inode)->delalloc_inodes); 1349 list_del_init(&BTRFS_I(inode)->delalloc_inodes);
1343 } 1350 }
@@ -1372,7 +1379,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
1372 1379
1373 if (map_length < length + size) 1380 if (map_length < length + size)
1374 return 1; 1381 return 1;
1375 return 0; 1382 return ret;
1376} 1383}
1377 1384
1378/* 1385/*
@@ -1426,7 +1433,10 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1426 1433
1427 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 1434 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
1428 1435
1429 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 1436 if (root == root->fs_info->tree_root)
1437 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
1438 else
1439 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
1430 BUG_ON(ret); 1440 BUG_ON(ret);
1431 1441
1432 if (!(rw & REQ_WRITE)) { 1442 if (!(rw & REQ_WRITE)) {
@@ -1662,6 +1672,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1662 struct extent_state *cached_state = NULL; 1672 struct extent_state *cached_state = NULL;
1663 int compressed = 0; 1673 int compressed = 0;
1664 int ret; 1674 int ret;
1675 bool nolock = false;
1665 1676
1666 ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start, 1677 ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
1667 end - start + 1); 1678 end - start + 1);
@@ -1669,11 +1680,17 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1669 return 0; 1680 return 0;
1670 BUG_ON(!ordered_extent); 1681 BUG_ON(!ordered_extent);
1671 1682
1683 nolock = (root == root->fs_info->tree_root);
1684
1672 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { 1685 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
1673 BUG_ON(!list_empty(&ordered_extent->list)); 1686 BUG_ON(!list_empty(&ordered_extent->list));
1674 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1687 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1675 if (!ret) { 1688 if (!ret) {
1676 trans = btrfs_join_transaction(root, 1); 1689 if (nolock)
1690 trans = btrfs_join_transaction_nolock(root, 1);
1691 else
1692 trans = btrfs_join_transaction(root, 1);
1693 BUG_ON(!trans);
1677 btrfs_set_trans_block_group(trans, inode); 1694 btrfs_set_trans_block_group(trans, inode);
1678 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1695 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1679 ret = btrfs_update_inode(trans, root, inode); 1696 ret = btrfs_update_inode(trans, root, inode);
@@ -1686,7 +1703,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1686 ordered_extent->file_offset + ordered_extent->len - 1, 1703 ordered_extent->file_offset + ordered_extent->len - 1,
1687 0, &cached_state, GFP_NOFS); 1704 0, &cached_state, GFP_NOFS);
1688 1705
1689 trans = btrfs_join_transaction(root, 1); 1706 if (nolock)
1707 trans = btrfs_join_transaction_nolock(root, 1);
1708 else
1709 trans = btrfs_join_transaction(root, 1);
1690 btrfs_set_trans_block_group(trans, inode); 1710 btrfs_set_trans_block_group(trans, inode);
1691 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1711 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1692 1712
@@ -1700,6 +1720,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1700 ordered_extent->len); 1720 ordered_extent->len);
1701 BUG_ON(ret); 1721 BUG_ON(ret);
1702 } else { 1722 } else {
1723 BUG_ON(root == root->fs_info->tree_root);
1703 ret = insert_reserved_file_extent(trans, inode, 1724 ret = insert_reserved_file_extent(trans, inode,
1704 ordered_extent->file_offset, 1725 ordered_extent->file_offset,
1705 ordered_extent->start, 1726 ordered_extent->start,
@@ -1724,9 +1745,15 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1724 ret = btrfs_update_inode(trans, root, inode); 1745 ret = btrfs_update_inode(trans, root, inode);
1725 BUG_ON(ret); 1746 BUG_ON(ret);
1726out: 1747out:
1727 btrfs_delalloc_release_metadata(inode, ordered_extent->len); 1748 if (nolock) {
1728 if (trans) 1749 if (trans)
1729 btrfs_end_transaction(trans, root); 1750 btrfs_end_transaction_nolock(trans, root);
1751 } else {
1752 btrfs_delalloc_release_metadata(inode, ordered_extent->len);
1753 if (trans)
1754 btrfs_end_transaction(trans, root);
1755 }
1756
1730 /* once for us */ 1757 /* once for us */
1731 btrfs_put_ordered_extent(ordered_extent); 1758 btrfs_put_ordered_extent(ordered_extent);
1732 /* once for the tree */ 1759 /* once for the tree */
@@ -2237,7 +2264,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2237{ 2264{
2238 struct btrfs_path *path; 2265 struct btrfs_path *path;
2239 struct extent_buffer *leaf; 2266 struct extent_buffer *leaf;
2240 struct btrfs_item *item;
2241 struct btrfs_key key, found_key; 2267 struct btrfs_key key, found_key;
2242 struct btrfs_trans_handle *trans; 2268 struct btrfs_trans_handle *trans;
2243 struct inode *inode; 2269 struct inode *inode;
@@ -2275,7 +2301,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2275 2301
2276 /* pull out the item */ 2302 /* pull out the item */
2277 leaf = path->nodes[0]; 2303 leaf = path->nodes[0];
2278 item = btrfs_item_nr(leaf, path->slots[0]);
2279 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 2304 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2280 2305
2281 /* make sure the item matches what we want */ 2306 /* make sure the item matches what we want */
@@ -2651,7 +2676,8 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2651 2676
2652 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, 2677 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
2653 dir, index); 2678 dir, index);
2654 BUG_ON(ret); 2679 if (ret == -ENOENT)
2680 ret = 0;
2655err: 2681err:
2656 btrfs_free_path(path); 2682 btrfs_free_path(path);
2657 if (ret) 2683 if (ret)
@@ -2672,8 +2698,8 @@ static int check_path_shared(struct btrfs_root *root,
2672{ 2698{
2673 struct extent_buffer *eb; 2699 struct extent_buffer *eb;
2674 int level; 2700 int level;
2675 int ret;
2676 u64 refs = 1; 2701 u64 refs = 1;
2702 int uninitialized_var(ret);
2677 2703
2678 for (level = 0; level < BTRFS_MAX_LEVEL; level++) { 2704 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
2679 if (!path->nodes[level]) 2705 if (!path->nodes[level])
@@ -2686,7 +2712,7 @@ static int check_path_shared(struct btrfs_root *root,
2686 if (refs > 1) 2712 if (refs > 1)
2687 return 1; 2713 return 1;
2688 } 2714 }
2689 return 0; 2715 return ret; /* XXX callers? */
2690} 2716}
2691 2717
2692/* 2718/*
@@ -3196,7 +3222,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
3196 3222
3197 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); 3223 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
3198 3224
3199 if (root->ref_cows) 3225 if (root->ref_cows || root == root->fs_info->tree_root)
3200 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); 3226 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
3201 3227
3202 path = btrfs_alloc_path(); 3228 path = btrfs_alloc_path();
@@ -3344,7 +3370,8 @@ delete:
3344 } else { 3370 } else {
3345 break; 3371 break;
3346 } 3372 }
3347 if (found_extent && root->ref_cows) { 3373 if (found_extent && (root->ref_cows ||
3374 root == root->fs_info->tree_root)) {
3348 btrfs_set_path_blocking(path); 3375 btrfs_set_path_blocking(path);
3349 ret = btrfs_free_extent(trans, root, extent_start, 3376 ret = btrfs_free_extent(trans, root, extent_start,
3350 extent_num_bytes, 0, 3377 extent_num_bytes, 0,
@@ -3675,7 +3702,8 @@ void btrfs_evict_inode(struct inode *inode)
3675 int ret; 3702 int ret;
3676 3703
3677 truncate_inode_pages(&inode->i_data, 0); 3704 truncate_inode_pages(&inode->i_data, 0);
3678 if (inode->i_nlink && btrfs_root_refs(&root->root_item) != 0) 3705 if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
3706 root == root->fs_info->tree_root))
3679 goto no_delete; 3707 goto no_delete;
3680 3708
3681 if (is_bad_inode(inode)) { 3709 if (is_bad_inode(inode)) {
@@ -3888,7 +3916,14 @@ static void inode_tree_del(struct inode *inode)
3888 } 3916 }
3889 spin_unlock(&root->inode_lock); 3917 spin_unlock(&root->inode_lock);
3890 3918
3891 if (empty && btrfs_root_refs(&root->root_item) == 0) { 3919 /*
3920 * Free space cache has inodes in the tree root, but the tree root has a
3921 * root_refs of 0, so this could end up dropping the tree root as a
3922 * snapshot, so we need the extra !root->fs_info->tree_root check to
3923 * make sure we don't drop it.
3924 */
3925 if (empty && btrfs_root_refs(&root->root_item) == 0 &&
3926 root != root->fs_info->tree_root) {
3892 synchronize_srcu(&root->fs_info->subvol_srcu); 3927 synchronize_srcu(&root->fs_info->subvol_srcu);
3893 spin_lock(&root->inode_lock); 3928 spin_lock(&root->inode_lock);
3894 empty = RB_EMPTY_ROOT(&root->inode_tree); 3929 empty = RB_EMPTY_ROOT(&root->inode_tree);
@@ -4282,14 +4317,24 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
4282 struct btrfs_root *root = BTRFS_I(inode)->root; 4317 struct btrfs_root *root = BTRFS_I(inode)->root;
4283 struct btrfs_trans_handle *trans; 4318 struct btrfs_trans_handle *trans;
4284 int ret = 0; 4319 int ret = 0;
4320 bool nolock = false;
4285 4321
4286 if (BTRFS_I(inode)->dummy_inode) 4322 if (BTRFS_I(inode)->dummy_inode)
4287 return 0; 4323 return 0;
4288 4324
4325 smp_mb();
4326 nolock = (root->fs_info->closing && root == root->fs_info->tree_root);
4327
4289 if (wbc->sync_mode == WB_SYNC_ALL) { 4328 if (wbc->sync_mode == WB_SYNC_ALL) {
4290 trans = btrfs_join_transaction(root, 1); 4329 if (nolock)
4330 trans = btrfs_join_transaction_nolock(root, 1);
4331 else
4332 trans = btrfs_join_transaction(root, 1);
4291 btrfs_set_trans_block_group(trans, inode); 4333 btrfs_set_trans_block_group(trans, inode);
4292 ret = btrfs_commit_transaction(trans, root); 4334 if (nolock)
4335 ret = btrfs_end_transaction_nolock(trans, root);
4336 else
4337 ret = btrfs_commit_transaction(trans, root);
4293 } 4338 }
4294 return ret; 4339 return ret;
4295} 4340}
@@ -5645,7 +5690,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
5645 struct btrfs_root *root = BTRFS_I(inode)->root; 5690 struct btrfs_root *root = BTRFS_I(inode)->root;
5646 struct btrfs_dio_private *dip; 5691 struct btrfs_dio_private *dip;
5647 struct bio_vec *bvec = bio->bi_io_vec; 5692 struct bio_vec *bvec = bio->bi_io_vec;
5648 u64 start;
5649 int skip_sum; 5693 int skip_sum;
5650 int write = rw & REQ_WRITE; 5694 int write = rw & REQ_WRITE;
5651 int ret = 0; 5695 int ret = 0;
@@ -5671,7 +5715,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
5671 dip->inode = inode; 5715 dip->inode = inode;
5672 dip->logical_offset = file_offset; 5716 dip->logical_offset = file_offset;
5673 5717
5674 start = dip->logical_offset;
5675 dip->bytes = 0; 5718 dip->bytes = 0;
5676 do { 5719 do {
5677 dip->bytes += bvec->bv_len; 5720 dip->bytes += bvec->bv_len;
@@ -6308,6 +6351,21 @@ void btrfs_destroy_inode(struct inode *inode)
6308 spin_unlock(&root->fs_info->ordered_extent_lock); 6351 spin_unlock(&root->fs_info->ordered_extent_lock);
6309 } 6352 }
6310 6353
6354 if (root == root->fs_info->tree_root) {
6355 struct btrfs_block_group_cache *block_group;
6356
6357 block_group = btrfs_lookup_block_group(root->fs_info,
6358 BTRFS_I(inode)->block_group);
6359 if (block_group && block_group->inode == inode) {
6360 spin_lock(&block_group->lock);
6361 block_group->inode = NULL;
6362 spin_unlock(&block_group->lock);
6363 btrfs_put_block_group(block_group);
6364 } else if (block_group) {
6365 btrfs_put_block_group(block_group);
6366 }
6367 }
6368
6311 spin_lock(&root->orphan_lock); 6369 spin_lock(&root->orphan_lock);
6312 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 6370 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
6313 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", 6371 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
@@ -6340,7 +6398,8 @@ int btrfs_drop_inode(struct inode *inode)
6340{ 6398{
6341 struct btrfs_root *root = BTRFS_I(inode)->root; 6399 struct btrfs_root *root = BTRFS_I(inode)->root;
6342 6400
6343 if (btrfs_root_refs(&root->root_item) == 0) 6401 if (btrfs_root_refs(&root->root_item) == 0 &&
6402 root != root->fs_info->tree_root)
6344 return 1; 6403 return 1;
6345 else 6404 else
6346 return generic_drop_inode(inode); 6405 return generic_drop_inode(inode);
@@ -6609,7 +6668,8 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
6609 return 0; 6668 return 0;
6610} 6669}
6611 6670
6612int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput) 6671int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
6672 int sync)
6613{ 6673{
6614 struct btrfs_inode *binode; 6674 struct btrfs_inode *binode;
6615 struct inode *inode = NULL; 6675 struct inode *inode = NULL;
@@ -6631,7 +6691,26 @@ int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput)
6631 spin_unlock(&root->fs_info->delalloc_lock); 6691 spin_unlock(&root->fs_info->delalloc_lock);
6632 6692
6633 if (inode) { 6693 if (inode) {
6634 write_inode_now(inode, 0); 6694 if (sync) {
6695 filemap_write_and_wait(inode->i_mapping);
6696 /*
6697 * We have to do this because compression doesn't
6698 * actually set PG_writeback until it submits the pages
6699 * for IO, which happens in an async thread, so we could
6700 * race and not actually wait for any writeback pages
6701 * because they've not been submitted yet. Technically
6702 * this could still be the case for the ordered stuff
6703 * since the async thread may not have started to do its
6704 * work yet. If this becomes the case then we need to
6705 * figure out a way to make sure that in writepage we
6706 * wait for any async pages to be submitted before
6707 * returning so that fdatawait does what its supposed to
6708 * do.
6709 */
6710 btrfs_wait_ordered_range(inode, 0, (u64)-1);
6711 } else {
6712 filemap_flush(inode->i_mapping);
6713 }
6635 if (delay_iput) 6714 if (delay_iput)
6636 btrfs_add_delayed_iput(inode); 6715 btrfs_add_delayed_iput(inode);
6637 else 6716 else
@@ -6757,27 +6836,33 @@ out_unlock:
6757 return err; 6836 return err;
6758} 6837}
6759 6838
6760int btrfs_prealloc_file_range(struct inode *inode, int mode, 6839static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
6761 u64 start, u64 num_bytes, u64 min_size, 6840 u64 start, u64 num_bytes, u64 min_size,
6762 loff_t actual_len, u64 *alloc_hint) 6841 loff_t actual_len, u64 *alloc_hint,
6842 struct btrfs_trans_handle *trans)
6763{ 6843{
6764 struct btrfs_trans_handle *trans;
6765 struct btrfs_root *root = BTRFS_I(inode)->root; 6844 struct btrfs_root *root = BTRFS_I(inode)->root;
6766 struct btrfs_key ins; 6845 struct btrfs_key ins;
6767 u64 cur_offset = start; 6846 u64 cur_offset = start;
6768 int ret = 0; 6847 int ret = 0;
6848 bool own_trans = true;
6769 6849
6850 if (trans)
6851 own_trans = false;
6770 while (num_bytes > 0) { 6852 while (num_bytes > 0) {
6771 trans = btrfs_start_transaction(root, 3); 6853 if (own_trans) {
6772 if (IS_ERR(trans)) { 6854 trans = btrfs_start_transaction(root, 3);
6773 ret = PTR_ERR(trans); 6855 if (IS_ERR(trans)) {
6774 break; 6856 ret = PTR_ERR(trans);
6857 break;
6858 }
6775 } 6859 }
6776 6860
6777 ret = btrfs_reserve_extent(trans, root, num_bytes, min_size, 6861 ret = btrfs_reserve_extent(trans, root, num_bytes, min_size,
6778 0, *alloc_hint, (u64)-1, &ins, 1); 6862 0, *alloc_hint, (u64)-1, &ins, 1);
6779 if (ret) { 6863 if (ret) {
6780 btrfs_end_transaction(trans, root); 6864 if (own_trans)
6865 btrfs_end_transaction(trans, root);
6781 break; 6866 break;
6782 } 6867 }
6783 6868
@@ -6810,11 +6895,30 @@ int btrfs_prealloc_file_range(struct inode *inode, int mode,
6810 ret = btrfs_update_inode(trans, root, inode); 6895 ret = btrfs_update_inode(trans, root, inode);
6811 BUG_ON(ret); 6896 BUG_ON(ret);
6812 6897
6813 btrfs_end_transaction(trans, root); 6898 if (own_trans)
6899 btrfs_end_transaction(trans, root);
6814 } 6900 }
6815 return ret; 6901 return ret;
6816} 6902}
6817 6903
6904int btrfs_prealloc_file_range(struct inode *inode, int mode,
6905 u64 start, u64 num_bytes, u64 min_size,
6906 loff_t actual_len, u64 *alloc_hint)
6907{
6908 return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
6909 min_size, actual_len, alloc_hint,
6910 NULL);
6911}
6912
6913int btrfs_prealloc_file_range_trans(struct inode *inode,
6914 struct btrfs_trans_handle *trans, int mode,
6915 u64 start, u64 num_bytes, u64 min_size,
6916 loff_t actual_len, u64 *alloc_hint)
6917{
6918 return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
6919 min_size, actual_len, alloc_hint, trans);
6920}
6921
6818static long btrfs_fallocate(struct inode *inode, int mode, 6922static long btrfs_fallocate(struct inode *inode, int mode,
6819 loff_t offset, loff_t len) 6923 loff_t offset, loff_t len)
6820{ 6924{
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 9254b3d58dbe..463d91b4dd3a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -224,7 +224,8 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
224 224
225static noinline int create_subvol(struct btrfs_root *root, 225static noinline int create_subvol(struct btrfs_root *root,
226 struct dentry *dentry, 226 struct dentry *dentry,
227 char *name, int namelen) 227 char *name, int namelen,
228 u64 *async_transid)
228{ 229{
229 struct btrfs_trans_handle *trans; 230 struct btrfs_trans_handle *trans;
230 struct btrfs_key key; 231 struct btrfs_key key;
@@ -338,13 +339,19 @@ static noinline int create_subvol(struct btrfs_root *root,
338 339
339 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); 340 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
340fail: 341fail:
341 err = btrfs_commit_transaction(trans, root); 342 if (async_transid) {
343 *async_transid = trans->transid;
344 err = btrfs_commit_transaction_async(trans, root, 1);
345 } else {
346 err = btrfs_commit_transaction(trans, root);
347 }
342 if (err && !ret) 348 if (err && !ret)
343 ret = err; 349 ret = err;
344 return ret; 350 return ret;
345} 351}
346 352
347static int create_snapshot(struct btrfs_root *root, struct dentry *dentry) 353static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
354 char *name, int namelen, u64 *async_transid)
348{ 355{
349 struct inode *inode; 356 struct inode *inode;
350 struct btrfs_pending_snapshot *pending_snapshot; 357 struct btrfs_pending_snapshot *pending_snapshot;
@@ -373,7 +380,14 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry)
373 380
374 list_add(&pending_snapshot->list, 381 list_add(&pending_snapshot->list,
375 &trans->transaction->pending_snapshots); 382 &trans->transaction->pending_snapshots);
376 ret = btrfs_commit_transaction(trans, root->fs_info->extent_root); 383 if (async_transid) {
384 *async_transid = trans->transid;
385 ret = btrfs_commit_transaction_async(trans,
386 root->fs_info->extent_root, 1);
387 } else {
388 ret = btrfs_commit_transaction(trans,
389 root->fs_info->extent_root);
390 }
377 BUG_ON(ret); 391 BUG_ON(ret);
378 392
379 ret = pending_snapshot->error; 393 ret = pending_snapshot->error;
@@ -395,6 +409,76 @@ fail:
395 return ret; 409 return ret;
396} 410}
397 411
412/* copy of check_sticky in fs/namei.c()
413* It's inline, so penalty for filesystems that don't use sticky bit is
414* minimal.
415*/
416static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode)
417{
418 uid_t fsuid = current_fsuid();
419
420 if (!(dir->i_mode & S_ISVTX))
421 return 0;
422 if (inode->i_uid == fsuid)
423 return 0;
424 if (dir->i_uid == fsuid)
425 return 0;
426 return !capable(CAP_FOWNER);
427}
428
429/* copy of may_delete in fs/namei.c()
430 * Check whether we can remove a link victim from directory dir, check
431 * whether the type of victim is right.
432 * 1. We can't do it if dir is read-only (done in permission())
433 * 2. We should have write and exec permissions on dir
434 * 3. We can't remove anything from append-only dir
435 * 4. We can't do anything with immutable dir (done in permission())
436 * 5. If the sticky bit on dir is set we should either
437 * a. be owner of dir, or
438 * b. be owner of victim, or
439 * c. have CAP_FOWNER capability
440 * 6. If the victim is append-only or immutable we can't do antyhing with
441 * links pointing to it.
442 * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
443 * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
444 * 9. We can't remove a root or mountpoint.
445 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
446 * nfs_async_unlink().
447 */
448
449static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir)
450{
451 int error;
452
453 if (!victim->d_inode)
454 return -ENOENT;
455
456 BUG_ON(victim->d_parent->d_inode != dir);
457 audit_inode_child(victim, dir);
458
459 error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
460 if (error)
461 return error;
462 if (IS_APPEND(dir))
463 return -EPERM;
464 if (btrfs_check_sticky(dir, victim->d_inode)||
465 IS_APPEND(victim->d_inode)||
466 IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
467 return -EPERM;
468 if (isdir) {
469 if (!S_ISDIR(victim->d_inode->i_mode))
470 return -ENOTDIR;
471 if (IS_ROOT(victim))
472 return -EBUSY;
473 } else if (S_ISDIR(victim->d_inode->i_mode))
474 return -EISDIR;
475 if (IS_DEADDIR(dir))
476 return -ENOENT;
477 if (victim->d_flags & DCACHE_NFSFS_RENAMED)
478 return -EBUSY;
479 return 0;
480}
481
398/* copy of may_create in fs/namei.c() */ 482/* copy of may_create in fs/namei.c() */
399static inline int btrfs_may_create(struct inode *dir, struct dentry *child) 483static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
400{ 484{
@@ -412,7 +496,8 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
412 */ 496 */
413static noinline int btrfs_mksubvol(struct path *parent, 497static noinline int btrfs_mksubvol(struct path *parent,
414 char *name, int namelen, 498 char *name, int namelen,
415 struct btrfs_root *snap_src) 499 struct btrfs_root *snap_src,
500 u64 *async_transid)
416{ 501{
417 struct inode *dir = parent->dentry->d_inode; 502 struct inode *dir = parent->dentry->d_inode;
418 struct dentry *dentry; 503 struct dentry *dentry;
@@ -443,10 +528,11 @@ static noinline int btrfs_mksubvol(struct path *parent,
443 goto out_up_read; 528 goto out_up_read;
444 529
445 if (snap_src) { 530 if (snap_src) {
446 error = create_snapshot(snap_src, dentry); 531 error = create_snapshot(snap_src, dentry,
532 name, namelen, async_transid);
447 } else { 533 } else {
448 error = create_subvol(BTRFS_I(dir)->root, dentry, 534 error = create_subvol(BTRFS_I(dir)->root, dentry,
449 name, namelen); 535 name, namelen, async_transid);
450 } 536 }
451 if (!error) 537 if (!error)
452 fsnotify_mkdir(dir, dentry); 538 fsnotify_mkdir(dir, dentry);
@@ -708,7 +794,6 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
708 char *sizestr; 794 char *sizestr;
709 char *devstr = NULL; 795 char *devstr = NULL;
710 int ret = 0; 796 int ret = 0;
711 int namelen;
712 int mod = 0; 797 int mod = 0;
713 798
714 if (root->fs_info->sb->s_flags & MS_RDONLY) 799 if (root->fs_info->sb->s_flags & MS_RDONLY)
@@ -722,7 +807,6 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
722 return PTR_ERR(vol_args); 807 return PTR_ERR(vol_args);
723 808
724 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 809 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
725 namelen = strlen(vol_args->name);
726 810
727 mutex_lock(&root->fs_info->volume_mutex); 811 mutex_lock(&root->fs_info->volume_mutex);
728 sizestr = vol_args->name; 812 sizestr = vol_args->name;
@@ -801,11 +885,13 @@ out_unlock:
801 return ret; 885 return ret;
802} 886}
803 887
804static noinline int btrfs_ioctl_snap_create(struct file *file, 888static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
805 void __user *arg, int subvol) 889 char *name,
890 unsigned long fd,
891 int subvol,
892 u64 *transid)
806{ 893{
807 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; 894 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
808 struct btrfs_ioctl_vol_args *vol_args;
809 struct file *src_file; 895 struct file *src_file;
810 int namelen; 896 int namelen;
811 int ret = 0; 897 int ret = 0;
@@ -813,23 +899,18 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
813 if (root->fs_info->sb->s_flags & MS_RDONLY) 899 if (root->fs_info->sb->s_flags & MS_RDONLY)
814 return -EROFS; 900 return -EROFS;
815 901
816 vol_args = memdup_user(arg, sizeof(*vol_args)); 902 namelen = strlen(name);
817 if (IS_ERR(vol_args)) 903 if (strchr(name, '/')) {
818 return PTR_ERR(vol_args);
819
820 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
821 namelen = strlen(vol_args->name);
822 if (strchr(vol_args->name, '/')) {
823 ret = -EINVAL; 904 ret = -EINVAL;
824 goto out; 905 goto out;
825 } 906 }
826 907
827 if (subvol) { 908 if (subvol) {
828 ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen, 909 ret = btrfs_mksubvol(&file->f_path, name, namelen,
829 NULL); 910 NULL, transid);
830 } else { 911 } else {
831 struct inode *src_inode; 912 struct inode *src_inode;
832 src_file = fget(vol_args->fd); 913 src_file = fget(fd);
833 if (!src_file) { 914 if (!src_file) {
834 ret = -EINVAL; 915 ret = -EINVAL;
835 goto out; 916 goto out;
@@ -843,12 +924,56 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
843 fput(src_file); 924 fput(src_file);
844 goto out; 925 goto out;
845 } 926 }
846 ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen, 927 ret = btrfs_mksubvol(&file->f_path, name, namelen,
847 BTRFS_I(src_inode)->root); 928 BTRFS_I(src_inode)->root,
929 transid);
848 fput(src_file); 930 fput(src_file);
849 } 931 }
850out: 932out:
933 return ret;
934}
935
936static noinline int btrfs_ioctl_snap_create(struct file *file,
937 void __user *arg, int subvol,
938 int async)
939{
940 struct btrfs_ioctl_vol_args *vol_args = NULL;
941 struct btrfs_ioctl_async_vol_args *async_vol_args = NULL;
942 char *name;
943 u64 fd;
944 u64 transid = 0;
945 int ret;
946
947 if (async) {
948 async_vol_args = memdup_user(arg, sizeof(*async_vol_args));
949 if (IS_ERR(async_vol_args))
950 return PTR_ERR(async_vol_args);
951
952 name = async_vol_args->name;
953 fd = async_vol_args->fd;
954 async_vol_args->name[BTRFS_SNAPSHOT_NAME_MAX] = '\0';
955 } else {
956 vol_args = memdup_user(arg, sizeof(*vol_args));
957 if (IS_ERR(vol_args))
958 return PTR_ERR(vol_args);
959 name = vol_args->name;
960 fd = vol_args->fd;
961 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
962 }
963
964 ret = btrfs_ioctl_snap_create_transid(file, name, fd,
965 subvol, &transid);
966
967 if (!ret && async) {
968 if (copy_to_user(arg +
969 offsetof(struct btrfs_ioctl_async_vol_args,
970 transid), &transid, sizeof(transid)))
971 return -EFAULT;
972 }
973
851 kfree(vol_args); 974 kfree(vol_args);
975 kfree(async_vol_args);
976
852 return ret; 977 return ret;
853} 978}
854 979
@@ -1073,14 +1198,10 @@ static noinline int btrfs_ioctl_tree_search(struct file *file,
1073 if (!capable(CAP_SYS_ADMIN)) 1198 if (!capable(CAP_SYS_ADMIN))
1074 return -EPERM; 1199 return -EPERM;
1075 1200
1076 args = kmalloc(sizeof(*args), GFP_KERNEL); 1201 args = memdup_user(argp, sizeof(*args));
1077 if (!args) 1202 if (IS_ERR(args))
1078 return -ENOMEM; 1203 return PTR_ERR(args);
1079 1204
1080 if (copy_from_user(args, argp, sizeof(*args))) {
1081 kfree(args);
1082 return -EFAULT;
1083 }
1084 inode = fdentry(file)->d_inode; 1205 inode = fdentry(file)->d_inode;
1085 ret = search_ioctl(inode, args); 1206 ret = search_ioctl(inode, args);
1086 if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) 1207 if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
@@ -1188,14 +1309,10 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
1188 if (!capable(CAP_SYS_ADMIN)) 1309 if (!capable(CAP_SYS_ADMIN))
1189 return -EPERM; 1310 return -EPERM;
1190 1311
1191 args = kmalloc(sizeof(*args), GFP_KERNEL); 1312 args = memdup_user(argp, sizeof(*args));
1192 if (!args) 1313 if (IS_ERR(args))
1193 return -ENOMEM; 1314 return PTR_ERR(args);
1194 1315
1195 if (copy_from_user(args, argp, sizeof(*args))) {
1196 kfree(args);
1197 return -EFAULT;
1198 }
1199 inode = fdentry(file)->d_inode; 1316 inode = fdentry(file)->d_inode;
1200 1317
1201 if (args->treeid == 0) 1318 if (args->treeid == 0)
@@ -1227,9 +1344,6 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
1227 int ret; 1344 int ret;
1228 int err = 0; 1345 int err = 0;
1229 1346
1230 if (!capable(CAP_SYS_ADMIN))
1231 return -EPERM;
1232
1233 vol_args = memdup_user(arg, sizeof(*vol_args)); 1347 vol_args = memdup_user(arg, sizeof(*vol_args));
1234 if (IS_ERR(vol_args)) 1348 if (IS_ERR(vol_args))
1235 return PTR_ERR(vol_args); 1349 return PTR_ERR(vol_args);
@@ -1259,13 +1373,51 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
1259 } 1373 }
1260 1374
1261 inode = dentry->d_inode; 1375 inode = dentry->d_inode;
1376 dest = BTRFS_I(inode)->root;
1377 if (!capable(CAP_SYS_ADMIN)){
1378 /*
1379 * Regular user. Only allow this with a special mount
1380 * option, when the user has write+exec access to the
1381 * subvol root, and when rmdir(2) would have been
1382 * allowed.
1383 *
1384 * Note that this is _not_ check that the subvol is
1385 * empty or doesn't contain data that we wouldn't
1386 * otherwise be able to delete.
1387 *
1388 * Users who want to delete empty subvols should try
1389 * rmdir(2).
1390 */
1391 err = -EPERM;
1392 if (!btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
1393 goto out_dput;
1394
1395 /*
1396 * Do not allow deletion if the parent dir is the same
1397 * as the dir to be deleted. That means the ioctl
1398 * must be called on the dentry referencing the root
1399 * of the subvol, not a random directory contained
1400 * within it.
1401 */
1402 err = -EINVAL;
1403 if (root == dest)
1404 goto out_dput;
1405
1406 err = inode_permission(inode, MAY_WRITE | MAY_EXEC);
1407 if (err)
1408 goto out_dput;
1409
1410 /* check if subvolume may be deleted by a non-root user */
1411 err = btrfs_may_delete(dir, dentry, 1);
1412 if (err)
1413 goto out_dput;
1414 }
1415
1262 if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { 1416 if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
1263 err = -EINVAL; 1417 err = -EINVAL;
1264 goto out_dput; 1418 goto out_dput;
1265 } 1419 }
1266 1420
1267 dest = BTRFS_I(inode)->root;
1268
1269 mutex_lock(&inode->i_mutex); 1421 mutex_lock(&inode->i_mutex);
1270 err = d_invalidate(dentry); 1422 err = d_invalidate(dentry);
1271 if (err) 1423 if (err)
@@ -1304,7 +1456,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
1304 BUG_ON(ret); 1456 BUG_ON(ret);
1305 } 1457 }
1306 1458
1307 ret = btrfs_commit_transaction(trans, root); 1459 ret = btrfs_end_transaction(trans, root);
1308 BUG_ON(ret); 1460 BUG_ON(ret);
1309 inode->i_flags |= S_DEAD; 1461 inode->i_flags |= S_DEAD;
1310out_up_write: 1462out_up_write:
@@ -1502,11 +1654,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1502 path->reada = 2; 1654 path->reada = 2;
1503 1655
1504 if (inode < src) { 1656 if (inode < src) {
1505 mutex_lock(&inode->i_mutex); 1657 mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
1506 mutex_lock(&src->i_mutex); 1658 mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD);
1507 } else { 1659 } else {
1508 mutex_lock(&src->i_mutex); 1660 mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT);
1509 mutex_lock(&inode->i_mutex); 1661 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
1510 } 1662 }
1511 1663
1512 /* determine range to clone */ 1664 /* determine range to clone */
@@ -1530,13 +1682,15 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1530 while (1) { 1682 while (1) {
1531 struct btrfs_ordered_extent *ordered; 1683 struct btrfs_ordered_extent *ordered;
1532 lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); 1684 lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
1533 ordered = btrfs_lookup_first_ordered_extent(inode, off+len); 1685 ordered = btrfs_lookup_first_ordered_extent(src, off+len);
1534 if (BTRFS_I(src)->delalloc_bytes == 0 && !ordered) 1686 if (!ordered &&
1687 !test_range_bit(&BTRFS_I(src)->io_tree, off, off+len,
1688 EXTENT_DELALLOC, 0, NULL))
1535 break; 1689 break;
1536 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); 1690 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
1537 if (ordered) 1691 if (ordered)
1538 btrfs_put_ordered_extent(ordered); 1692 btrfs_put_ordered_extent(ordered);
1539 btrfs_wait_ordered_range(src, off, off+len); 1693 btrfs_wait_ordered_range(src, off, len);
1540 } 1694 }
1541 1695
1542 /* clone data */ 1696 /* clone data */
@@ -1605,7 +1759,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1605 } 1759 }
1606 btrfs_release_path(root, path); 1760 btrfs_release_path(root, path);
1607 1761
1608 if (key.offset + datal < off || 1762 if (key.offset + datal <= off ||
1609 key.offset >= off+len) 1763 key.offset >= off+len)
1610 goto next; 1764 goto next;
1611 1765
@@ -1879,6 +2033,22 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
1879 return 0; 2033 return 0;
1880} 2034}
1881 2035
2036static void get_block_group_info(struct list_head *groups_list,
2037 struct btrfs_ioctl_space_info *space)
2038{
2039 struct btrfs_block_group_cache *block_group;
2040
2041 space->total_bytes = 0;
2042 space->used_bytes = 0;
2043 space->flags = 0;
2044 list_for_each_entry(block_group, groups_list, list) {
2045 space->flags = block_group->flags;
2046 space->total_bytes += block_group->key.offset;
2047 space->used_bytes +=
2048 btrfs_block_group_used(&block_group->item);
2049 }
2050}
2051
1882long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) 2052long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
1883{ 2053{
1884 struct btrfs_ioctl_space_args space_args; 2054 struct btrfs_ioctl_space_args space_args;
@@ -1887,27 +2057,56 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
1887 struct btrfs_ioctl_space_info *dest_orig; 2057 struct btrfs_ioctl_space_info *dest_orig;
1888 struct btrfs_ioctl_space_info *user_dest; 2058 struct btrfs_ioctl_space_info *user_dest;
1889 struct btrfs_space_info *info; 2059 struct btrfs_space_info *info;
2060 u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
2061 BTRFS_BLOCK_GROUP_SYSTEM,
2062 BTRFS_BLOCK_GROUP_METADATA,
2063 BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA};
2064 int num_types = 4;
1890 int alloc_size; 2065 int alloc_size;
1891 int ret = 0; 2066 int ret = 0;
1892 int slot_count = 0; 2067 int slot_count = 0;
2068 int i, c;
1893 2069
1894 if (copy_from_user(&space_args, 2070 if (copy_from_user(&space_args,
1895 (struct btrfs_ioctl_space_args __user *)arg, 2071 (struct btrfs_ioctl_space_args __user *)arg,
1896 sizeof(space_args))) 2072 sizeof(space_args)))
1897 return -EFAULT; 2073 return -EFAULT;
1898 2074
1899 /* first we count slots */ 2075 for (i = 0; i < num_types; i++) {
1900 rcu_read_lock(); 2076 struct btrfs_space_info *tmp;
1901 list_for_each_entry_rcu(info, &root->fs_info->space_info, list) 2077
1902 slot_count++; 2078 info = NULL;
1903 rcu_read_unlock(); 2079 rcu_read_lock();
2080 list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
2081 list) {
2082 if (tmp->flags == types[i]) {
2083 info = tmp;
2084 break;
2085 }
2086 }
2087 rcu_read_unlock();
2088
2089 if (!info)
2090 continue;
2091
2092 down_read(&info->groups_sem);
2093 for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
2094 if (!list_empty(&info->block_groups[c]))
2095 slot_count++;
2096 }
2097 up_read(&info->groups_sem);
2098 }
1904 2099
1905 /* space_slots == 0 means they are asking for a count */ 2100 /* space_slots == 0 means they are asking for a count */
1906 if (space_args.space_slots == 0) { 2101 if (space_args.space_slots == 0) {
1907 space_args.total_spaces = slot_count; 2102 space_args.total_spaces = slot_count;
1908 goto out; 2103 goto out;
1909 } 2104 }
2105
2106 slot_count = min_t(int, space_args.space_slots, slot_count);
2107
1910 alloc_size = sizeof(*dest) * slot_count; 2108 alloc_size = sizeof(*dest) * slot_count;
2109
1911 /* we generally have at most 6 or so space infos, one for each raid 2110 /* we generally have at most 6 or so space infos, one for each raid
1912 * level. So, a whole page should be more than enough for everyone 2111 * level. So, a whole page should be more than enough for everyone
1913 */ 2112 */
@@ -1921,27 +2120,34 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
1921 dest_orig = dest; 2120 dest_orig = dest;
1922 2121
1923 /* now we have a buffer to copy into */ 2122 /* now we have a buffer to copy into */
1924 rcu_read_lock(); 2123 for (i = 0; i < num_types; i++) {
1925 list_for_each_entry_rcu(info, &root->fs_info->space_info, list) { 2124 struct btrfs_space_info *tmp;
1926 /* make sure we don't copy more than we allocated 2125
1927 * in our buffer 2126 info = NULL;
1928 */ 2127 rcu_read_lock();
1929 if (slot_count == 0) 2128 list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
1930 break; 2129 list) {
1931 slot_count--; 2130 if (tmp->flags == types[i]) {
1932 2131 info = tmp;
1933 /* make sure userland has enough room in their buffer */ 2132 break;
1934 if (space_args.total_spaces >= space_args.space_slots) 2133 }
1935 break; 2134 }
2135 rcu_read_unlock();
1936 2136
1937 space.flags = info->flags; 2137 if (!info)
1938 space.total_bytes = info->total_bytes; 2138 continue;
1939 space.used_bytes = info->bytes_used; 2139 down_read(&info->groups_sem);
1940 memcpy(dest, &space, sizeof(space)); 2140 for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
1941 dest++; 2141 if (!list_empty(&info->block_groups[c])) {
1942 space_args.total_spaces++; 2142 get_block_group_info(&info->block_groups[c],
2143 &space);
2144 memcpy(dest, &space, sizeof(space));
2145 dest++;
2146 space_args.total_spaces++;
2147 }
2148 }
2149 up_read(&info->groups_sem);
1943 } 2150 }
1944 rcu_read_unlock();
1945 2151
1946 user_dest = (struct btrfs_ioctl_space_info *) 2152 user_dest = (struct btrfs_ioctl_space_info *)
1947 (arg + sizeof(struct btrfs_ioctl_space_args)); 2153 (arg + sizeof(struct btrfs_ioctl_space_args));
@@ -1984,6 +2190,36 @@ long btrfs_ioctl_trans_end(struct file *file)
1984 return 0; 2190 return 0;
1985} 2191}
1986 2192
2193static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp)
2194{
2195 struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
2196 struct btrfs_trans_handle *trans;
2197 u64 transid;
2198
2199 trans = btrfs_start_transaction(root, 0);
2200 transid = trans->transid;
2201 btrfs_commit_transaction_async(trans, root, 0);
2202
2203 if (argp)
2204 if (copy_to_user(argp, &transid, sizeof(transid)))
2205 return -EFAULT;
2206 return 0;
2207}
2208
2209static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp)
2210{
2211 struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
2212 u64 transid;
2213
2214 if (argp) {
2215 if (copy_from_user(&transid, argp, sizeof(transid)))
2216 return -EFAULT;
2217 } else {
2218 transid = 0; /* current trans */
2219 }
2220 return btrfs_wait_for_commit(root, transid);
2221}
2222
1987long btrfs_ioctl(struct file *file, unsigned int 2223long btrfs_ioctl(struct file *file, unsigned int
1988 cmd, unsigned long arg) 2224 cmd, unsigned long arg)
1989{ 2225{
@@ -1998,9 +2234,11 @@ long btrfs_ioctl(struct file *file, unsigned int
1998 case FS_IOC_GETVERSION: 2234 case FS_IOC_GETVERSION:
1999 return btrfs_ioctl_getversion(file, argp); 2235 return btrfs_ioctl_getversion(file, argp);
2000 case BTRFS_IOC_SNAP_CREATE: 2236 case BTRFS_IOC_SNAP_CREATE:
2001 return btrfs_ioctl_snap_create(file, argp, 0); 2237 return btrfs_ioctl_snap_create(file, argp, 0, 0);
2238 case BTRFS_IOC_SNAP_CREATE_ASYNC:
2239 return btrfs_ioctl_snap_create(file, argp, 0, 1);
2002 case BTRFS_IOC_SUBVOL_CREATE: 2240 case BTRFS_IOC_SUBVOL_CREATE:
2003 return btrfs_ioctl_snap_create(file, argp, 1); 2241 return btrfs_ioctl_snap_create(file, argp, 1, 0);
2004 case BTRFS_IOC_SNAP_DESTROY: 2242 case BTRFS_IOC_SNAP_DESTROY:
2005 return btrfs_ioctl_snap_destroy(file, argp); 2243 return btrfs_ioctl_snap_destroy(file, argp);
2006 case BTRFS_IOC_DEFAULT_SUBVOL: 2244 case BTRFS_IOC_DEFAULT_SUBVOL:
@@ -2034,6 +2272,10 @@ long btrfs_ioctl(struct file *file, unsigned int
2034 case BTRFS_IOC_SYNC: 2272 case BTRFS_IOC_SYNC:
2035 btrfs_sync_fs(file->f_dentry->d_sb, 1); 2273 btrfs_sync_fs(file->f_dentry->d_sb, 1);
2036 return 0; 2274 return 0;
2275 case BTRFS_IOC_START_SYNC:
2276 return btrfs_ioctl_start_sync(file, argp);
2277 case BTRFS_IOC_WAIT_SYNC:
2278 return btrfs_ioctl_wait_sync(file, argp);
2037 } 2279 }
2038 2280
2039 return -ENOTTY; 2281 return -ENOTTY;
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 424694aa517f..17c99ebdf960 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -22,14 +22,21 @@
22 22
23#define BTRFS_IOCTL_MAGIC 0x94 23#define BTRFS_IOCTL_MAGIC 0x94
24#define BTRFS_VOL_NAME_MAX 255 24#define BTRFS_VOL_NAME_MAX 255
25#define BTRFS_PATH_NAME_MAX 4087
26 25
27/* this should be 4k */ 26/* this should be 4k */
27#define BTRFS_PATH_NAME_MAX 4087
28struct btrfs_ioctl_vol_args { 28struct btrfs_ioctl_vol_args {
29 __s64 fd; 29 __s64 fd;
30 char name[BTRFS_PATH_NAME_MAX + 1]; 30 char name[BTRFS_PATH_NAME_MAX + 1];
31}; 31};
32 32
33#define BTRFS_SNAPSHOT_NAME_MAX 4079
34struct btrfs_ioctl_async_vol_args {
35 __s64 fd;
36 __u64 transid;
37 char name[BTRFS_SNAPSHOT_NAME_MAX + 1];
38};
39
33#define BTRFS_INO_LOOKUP_PATH_MAX 4080 40#define BTRFS_INO_LOOKUP_PATH_MAX 4080
34struct btrfs_ioctl_ino_lookup_args { 41struct btrfs_ioctl_ino_lookup_args {
35 __u64 treeid; 42 __u64 treeid;
@@ -178,4 +185,8 @@ struct btrfs_ioctl_space_args {
178#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64) 185#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64)
179#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \ 186#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \
180 struct btrfs_ioctl_space_args) 187 struct btrfs_ioctl_space_args)
188#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64)
189#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
190#define BTRFS_IOC_SNAP_CREATE_ASYNC _IOW(BTRFS_IOCTL_MAGIC, 23, \
191 struct btrfs_ioctl_async_vol_args)
181#endif 192#endif
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index e56c72bc5add..f4621f6deca1 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -526,7 +526,6 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
526{ 526{
527 u64 end; 527 u64 end;
528 u64 orig_end; 528 u64 orig_end;
529 u64 wait_end;
530 struct btrfs_ordered_extent *ordered; 529 struct btrfs_ordered_extent *ordered;
531 int found; 530 int found;
532 531
@@ -537,7 +536,6 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
537 if (orig_end > INT_LIMIT(loff_t)) 536 if (orig_end > INT_LIMIT(loff_t))
538 orig_end = INT_LIMIT(loff_t); 537 orig_end = INT_LIMIT(loff_t);
539 } 538 }
540 wait_end = orig_end;
541again: 539again:
542 /* start IO across the range first to instantiate any delalloc 540 /* start IO across the range first to instantiate any delalloc
543 * extents 541 * extents
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b37d723b9d4a..045c9c2b2d7e 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -29,6 +29,7 @@
29#include "locking.h" 29#include "locking.h"
30#include "btrfs_inode.h" 30#include "btrfs_inode.h"
31#include "async-thread.h" 31#include "async-thread.h"
32#include "free-space-cache.h"
32 33
33/* 34/*
34 * backref_node, mapping_node and tree_block start with this 35 * backref_node, mapping_node and tree_block start with this
@@ -178,8 +179,6 @@ struct reloc_control {
178 u64 search_start; 179 u64 search_start;
179 u64 extents_found; 180 u64 extents_found;
180 181
181 int block_rsv_retries;
182
183 unsigned int stage:8; 182 unsigned int stage:8;
184 unsigned int create_reloc_tree:1; 183 unsigned int create_reloc_tree:1;
185 unsigned int merge_reloc_tree:1; 184 unsigned int merge_reloc_tree:1;
@@ -2133,7 +2132,6 @@ int prepare_to_merge(struct reloc_control *rc, int err)
2133 LIST_HEAD(reloc_roots); 2132 LIST_HEAD(reloc_roots);
2134 u64 num_bytes = 0; 2133 u64 num_bytes = 0;
2135 int ret; 2134 int ret;
2136 int retries = 0;
2137 2135
2138 mutex_lock(&root->fs_info->trans_mutex); 2136 mutex_lock(&root->fs_info->trans_mutex);
2139 rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; 2137 rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
@@ -2143,7 +2141,7 @@ again:
2143 if (!err) { 2141 if (!err) {
2144 num_bytes = rc->merging_rsv_size; 2142 num_bytes = rc->merging_rsv_size;
2145 ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv, 2143 ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv,
2146 num_bytes, &retries); 2144 num_bytes);
2147 if (ret) 2145 if (ret)
2148 err = ret; 2146 err = ret;
2149 } 2147 }
@@ -2155,7 +2153,6 @@ again:
2155 btrfs_end_transaction(trans, rc->extent_root); 2153 btrfs_end_transaction(trans, rc->extent_root);
2156 btrfs_block_rsv_release(rc->extent_root, 2154 btrfs_block_rsv_release(rc->extent_root,
2157 rc->block_rsv, num_bytes); 2155 rc->block_rsv, num_bytes);
2158 retries = 0;
2159 goto again; 2156 goto again;
2160 } 2157 }
2161 } 2158 }
@@ -2405,15 +2402,13 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans,
2405 num_bytes = calcu_metadata_size(rc, node, 1) * 2; 2402 num_bytes = calcu_metadata_size(rc, node, 1) * 2;
2406 2403
2407 trans->block_rsv = rc->block_rsv; 2404 trans->block_rsv = rc->block_rsv;
2408 ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes, 2405 ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes);
2409 &rc->block_rsv_retries);
2410 if (ret) { 2406 if (ret) {
2411 if (ret == -EAGAIN) 2407 if (ret == -EAGAIN)
2412 rc->commit_transaction = 1; 2408 rc->commit_transaction = 1;
2413 return ret; 2409 return ret;
2414 } 2410 }
2415 2411
2416 rc->block_rsv_retries = 0;
2417 return 0; 2412 return 0;
2418} 2413}
2419 2414
@@ -3099,6 +3094,8 @@ static int add_tree_block(struct reloc_control *rc,
3099 BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 3094 BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0));
3100 ret = get_ref_objectid_v0(rc, path, extent_key, 3095 ret = get_ref_objectid_v0(rc, path, extent_key,
3101 &ref_owner, NULL); 3096 &ref_owner, NULL);
3097 if (ret < 0)
3098 return ret;
3102 BUG_ON(ref_owner >= BTRFS_MAX_LEVEL); 3099 BUG_ON(ref_owner >= BTRFS_MAX_LEVEL);
3103 level = (int)ref_owner; 3100 level = (int)ref_owner;
3104 /* FIXME: get real generation */ 3101 /* FIXME: get real generation */
@@ -3191,6 +3188,54 @@ static int block_use_full_backref(struct reloc_control *rc,
3191 return ret; 3188 return ret;
3192} 3189}
3193 3190
3191static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
3192 struct inode *inode, u64 ino)
3193{
3194 struct btrfs_key key;
3195 struct btrfs_path *path;
3196 struct btrfs_root *root = fs_info->tree_root;
3197 struct btrfs_trans_handle *trans;
3198 unsigned long nr;
3199 int ret = 0;
3200
3201 if (inode)
3202 goto truncate;
3203
3204 key.objectid = ino;
3205 key.type = BTRFS_INODE_ITEM_KEY;
3206 key.offset = 0;
3207
3208 inode = btrfs_iget(fs_info->sb, &key, root, NULL);
3209 if (!inode || IS_ERR(inode) || is_bad_inode(inode)) {
3210 if (inode && !IS_ERR(inode))
3211 iput(inode);
3212 return -ENOENT;
3213 }
3214
3215truncate:
3216 path = btrfs_alloc_path();
3217 if (!path) {
3218 ret = -ENOMEM;
3219 goto out;
3220 }
3221
3222 trans = btrfs_join_transaction(root, 0);
3223 if (IS_ERR(trans)) {
3224 btrfs_free_path(path);
3225 goto out;
3226 }
3227
3228 ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
3229
3230 btrfs_free_path(path);
3231 nr = trans->blocks_used;
3232 btrfs_end_transaction(trans, root);
3233 btrfs_btree_balance_dirty(root, nr);
3234out:
3235 iput(inode);
3236 return ret;
3237}
3238
3194/* 3239/*
3195 * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY 3240 * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY
3196 * this function scans fs tree to find blocks reference the data extent 3241 * this function scans fs tree to find blocks reference the data extent
@@ -3217,15 +3262,27 @@ static int find_data_references(struct reloc_control *rc,
3217 int counted; 3262 int counted;
3218 int ret; 3263 int ret;
3219 3264
3220 path = btrfs_alloc_path();
3221 if (!path)
3222 return -ENOMEM;
3223
3224 ref_root = btrfs_extent_data_ref_root(leaf, ref); 3265 ref_root = btrfs_extent_data_ref_root(leaf, ref);
3225 ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref); 3266 ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref);
3226 ref_offset = btrfs_extent_data_ref_offset(leaf, ref); 3267 ref_offset = btrfs_extent_data_ref_offset(leaf, ref);
3227 ref_count = btrfs_extent_data_ref_count(leaf, ref); 3268 ref_count = btrfs_extent_data_ref_count(leaf, ref);
3228 3269
3270 /*
3271 * This is an extent belonging to the free space cache, lets just delete
3272 * it and redo the search.
3273 */
3274 if (ref_root == BTRFS_ROOT_TREE_OBJECTID) {
3275 ret = delete_block_group_cache(rc->extent_root->fs_info,
3276 NULL, ref_objectid);
3277 if (ret != -ENOENT)
3278 return ret;
3279 ret = 0;
3280 }
3281
3282 path = btrfs_alloc_path();
3283 if (!path)
3284 return -ENOMEM;
3285
3229 root = read_fs_root(rc->extent_root->fs_info, ref_root); 3286 root = read_fs_root(rc->extent_root->fs_info, ref_root);
3230 if (IS_ERR(root)) { 3287 if (IS_ERR(root)) {
3231 err = PTR_ERR(root); 3288 err = PTR_ERR(root);
@@ -3554,8 +3611,7 @@ int prepare_to_relocate(struct reloc_control *rc)
3554 * is no reservation in transaction handle. 3611 * is no reservation in transaction handle.
3555 */ 3612 */
3556 ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv, 3613 ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv,
3557 rc->extent_root->nodesize * 256, 3614 rc->extent_root->nodesize * 256);
3558 &rc->block_rsv_retries);
3559 if (ret) 3615 if (ret)
3560 return ret; 3616 return ret;
3561 3617
@@ -3567,7 +3623,6 @@ int prepare_to_relocate(struct reloc_control *rc)
3567 rc->extents_found = 0; 3623 rc->extents_found = 0;
3568 rc->nodes_relocated = 0; 3624 rc->nodes_relocated = 0;
3569 rc->merging_rsv_size = 0; 3625 rc->merging_rsv_size = 0;
3570 rc->block_rsv_retries = 0;
3571 3626
3572 rc->create_reloc_tree = 1; 3627 rc->create_reloc_tree = 1;
3573 set_reloc_control(rc); 3628 set_reloc_control(rc);
@@ -3860,6 +3915,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
3860{ 3915{
3861 struct btrfs_fs_info *fs_info = extent_root->fs_info; 3916 struct btrfs_fs_info *fs_info = extent_root->fs_info;
3862 struct reloc_control *rc; 3917 struct reloc_control *rc;
3918 struct inode *inode;
3919 struct btrfs_path *path;
3863 int ret; 3920 int ret;
3864 int rw = 0; 3921 int rw = 0;
3865 int err = 0; 3922 int err = 0;
@@ -3882,6 +3939,26 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
3882 rw = 1; 3939 rw = 1;
3883 } 3940 }
3884 3941
3942 path = btrfs_alloc_path();
3943 if (!path) {
3944 err = -ENOMEM;
3945 goto out;
3946 }
3947
3948 inode = lookup_free_space_inode(fs_info->tree_root, rc->block_group,
3949 path);
3950 btrfs_free_path(path);
3951
3952 if (!IS_ERR(inode))
3953 ret = delete_block_group_cache(fs_info, inode, 0);
3954 else
3955 ret = PTR_ERR(inode);
3956
3957 if (ret && ret != -ENOENT) {
3958 err = ret;
3959 goto out;
3960 }
3961
3885 rc->data_inode = create_reloc_inode(fs_info, rc->block_group); 3962 rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
3886 if (IS_ERR(rc->data_inode)) { 3963 if (IS_ERR(rc->data_inode)) {
3887 err = PTR_ERR(rc->data_inode); 3964 err = PTR_ERR(rc->data_inode);
@@ -4143,7 +4220,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
4143 btrfs_add_ordered_sum(inode, ordered, sums); 4220 btrfs_add_ordered_sum(inode, ordered, sums);
4144 } 4221 }
4145 btrfs_put_ordered_extent(ordered); 4222 btrfs_put_ordered_extent(ordered);
4146 return 0; 4223 return ret;
4147} 4224}
4148 4225
4149void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, 4226void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 2d958be761c8..6a1086e83ffc 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -181,7 +181,6 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
181int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid) 181int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid)
182{ 182{
183 struct btrfs_root *dead_root; 183 struct btrfs_root *dead_root;
184 struct btrfs_item *item;
185 struct btrfs_root_item *ri; 184 struct btrfs_root_item *ri;
186 struct btrfs_key key; 185 struct btrfs_key key;
187 struct btrfs_key found_key; 186 struct btrfs_key found_key;
@@ -214,7 +213,6 @@ again:
214 nritems = btrfs_header_nritems(leaf); 213 nritems = btrfs_header_nritems(leaf);
215 slot = path->slots[0]; 214 slot = path->slots[0];
216 } 215 }
217 item = btrfs_item_nr(leaf, slot);
218 btrfs_item_key_to_cpu(leaf, &key, slot); 216 btrfs_item_key_to_cpu(leaf, &key, slot);
219 if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY) 217 if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY)
220 goto next; 218 goto next;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 144f8a5730f5..8299a25ffc8f 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -61,6 +61,8 @@ static void btrfs_put_super(struct super_block *sb)
61 61
62 ret = close_ctree(root); 62 ret = close_ctree(root);
63 sb->s_fs_info = NULL; 63 sb->s_fs_info = NULL;
64
65 (void)ret; /* FIXME: need to fix VFS to return error? */
64} 66}
65 67
66enum { 68enum {
@@ -68,7 +70,8 @@ enum {
68 Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, 70 Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
69 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, 71 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
70 Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit, 72 Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit,
71 Opt_discard, Opt_err, 73 Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_err,
74 Opt_user_subvol_rm_allowed,
72}; 75};
73 76
74static match_table_t tokens = { 77static match_table_t tokens = {
@@ -92,6 +95,9 @@ static match_table_t tokens = {
92 {Opt_flushoncommit, "flushoncommit"}, 95 {Opt_flushoncommit, "flushoncommit"},
93 {Opt_ratio, "metadata_ratio=%d"}, 96 {Opt_ratio, "metadata_ratio=%d"},
94 {Opt_discard, "discard"}, 97 {Opt_discard, "discard"},
98 {Opt_space_cache, "space_cache"},
99 {Opt_clear_cache, "clear_cache"},
100 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
95 {Opt_err, NULL}, 101 {Opt_err, NULL},
96}; 102};
97 103
@@ -235,6 +241,16 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
235 case Opt_discard: 241 case Opt_discard:
236 btrfs_set_opt(info->mount_opt, DISCARD); 242 btrfs_set_opt(info->mount_opt, DISCARD);
237 break; 243 break;
244 case Opt_space_cache:
245 printk(KERN_INFO "btrfs: enabling disk space caching\n");
246 btrfs_set_opt(info->mount_opt, SPACE_CACHE);
247 case Opt_clear_cache:
248 printk(KERN_INFO "btrfs: force clearing of disk cache\n");
249 btrfs_set_opt(info->mount_opt, CLEAR_CACHE);
250 break;
251 case Opt_user_subvol_rm_allowed:
252 btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
253 break;
238 case Opt_err: 254 case Opt_err:
239 printk(KERN_INFO "btrfs: unrecognized mount option " 255 printk(KERN_INFO "btrfs: unrecognized mount option "
240 "'%s'\n", p); 256 "'%s'\n", p);
@@ -380,7 +396,7 @@ static struct dentry *get_default_root(struct super_block *sb,
380find_root: 396find_root:
381 new_root = btrfs_read_fs_root_no_name(root->fs_info, &location); 397 new_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
382 if (IS_ERR(new_root)) 398 if (IS_ERR(new_root))
383 return ERR_PTR(PTR_ERR(new_root)); 399 return ERR_CAST(new_root);
384 400
385 if (btrfs_root_refs(&new_root->root_item) == 0) 401 if (btrfs_root_refs(&new_root->root_item) == 0)
386 return ERR_PTR(-ENOENT); 402 return ERR_PTR(-ENOENT);
@@ -436,7 +452,6 @@ static int btrfs_fill_super(struct super_block *sb,
436{ 452{
437 struct inode *inode; 453 struct inode *inode;
438 struct dentry *root_dentry; 454 struct dentry *root_dentry;
439 struct btrfs_super_block *disk_super;
440 struct btrfs_root *tree_root; 455 struct btrfs_root *tree_root;
441 struct btrfs_key key; 456 struct btrfs_key key;
442 int err; 457 int err;
@@ -458,7 +473,6 @@ static int btrfs_fill_super(struct super_block *sb,
458 return PTR_ERR(tree_root); 473 return PTR_ERR(tree_root);
459 } 474 }
460 sb->s_fs_info = tree_root; 475 sb->s_fs_info = tree_root;
461 disk_super = &tree_root->fs_info->super_copy;
462 476
463 key.objectid = BTRFS_FIRST_FREE_OBJECTID; 477 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
464 key.type = BTRFS_INODE_ITEM_KEY; 478 key.type = BTRFS_INODE_ITEM_KEY;
@@ -560,8 +574,8 @@ static int btrfs_test_super(struct super_block *s, void *data)
560 * Note: This is based on get_sb_bdev from fs/super.c with a few additions 574 * Note: This is based on get_sb_bdev from fs/super.c with a few additions
561 * for multiple device setup. Make sure to keep it in sync. 575 * for multiple device setup. Make sure to keep it in sync.
562 */ 576 */
563static int btrfs_get_sb(struct file_system_type *fs_type, int flags, 577static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
564 const char *dev_name, void *data, struct vfsmount *mnt) 578 const char *dev_name, void *data)
565{ 579{
566 struct block_device *bdev = NULL; 580 struct block_device *bdev = NULL;
567 struct super_block *s; 581 struct super_block *s;
@@ -571,7 +585,6 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
571 char *subvol_name = NULL; 585 char *subvol_name = NULL;
572 u64 subvol_objectid = 0; 586 u64 subvol_objectid = 0;
573 int error = 0; 587 int error = 0;
574 int found = 0;
575 588
576 if (!(flags & MS_RDONLY)) 589 if (!(flags & MS_RDONLY))
577 mode |= FMODE_WRITE; 590 mode |= FMODE_WRITE;
@@ -580,7 +593,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
580 &subvol_name, &subvol_objectid, 593 &subvol_name, &subvol_objectid,
581 &fs_devices); 594 &fs_devices);
582 if (error) 595 if (error)
583 return error; 596 return ERR_PTR(error);
584 597
585 error = btrfs_scan_one_device(dev_name, mode, fs_type, &fs_devices); 598 error = btrfs_scan_one_device(dev_name, mode, fs_type, &fs_devices);
586 if (error) 599 if (error)
@@ -607,7 +620,6 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
607 goto error_close_devices; 620 goto error_close_devices;
608 } 621 }
609 622
610 found = 1;
611 btrfs_close_devices(fs_devices); 623 btrfs_close_devices(fs_devices);
612 } else { 624 } else {
613 char b[BDEVNAME_SIZE]; 625 char b[BDEVNAME_SIZE];
@@ -629,7 +641,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
629 if (IS_ERR(root)) { 641 if (IS_ERR(root)) {
630 error = PTR_ERR(root); 642 error = PTR_ERR(root);
631 deactivate_locked_super(s); 643 deactivate_locked_super(s);
632 goto error; 644 goto error_free_subvol_name;
633 } 645 }
634 /* if they gave us a subvolume name bind mount into that */ 646 /* if they gave us a subvolume name bind mount into that */
635 if (strcmp(subvol_name, ".")) { 647 if (strcmp(subvol_name, ".")) {
@@ -643,24 +655,21 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
643 deactivate_locked_super(s); 655 deactivate_locked_super(s);
644 error = PTR_ERR(new_root); 656 error = PTR_ERR(new_root);
645 dput(root); 657 dput(root);
646 goto error_close_devices; 658 goto error_free_subvol_name;
647 } 659 }
648 if (!new_root->d_inode) { 660 if (!new_root->d_inode) {
649 dput(root); 661 dput(root);
650 dput(new_root); 662 dput(new_root);
651 deactivate_locked_super(s); 663 deactivate_locked_super(s);
652 error = -ENXIO; 664 error = -ENXIO;
653 goto error_close_devices; 665 goto error_free_subvol_name;
654 } 666 }
655 dput(root); 667 dput(root);
656 root = new_root; 668 root = new_root;
657 } 669 }
658 670
659 mnt->mnt_sb = s;
660 mnt->mnt_root = root;
661
662 kfree(subvol_name); 671 kfree(subvol_name);
663 return 0; 672 return root;
664 673
665error_s: 674error_s:
666 error = PTR_ERR(s); 675 error = PTR_ERR(s);
@@ -668,8 +677,7 @@ error_close_devices:
668 btrfs_close_devices(fs_devices); 677 btrfs_close_devices(fs_devices);
669error_free_subvol_name: 678error_free_subvol_name:
670 kfree(subvol_name); 679 kfree(subvol_name);
671error: 680 return ERR_PTR(error);
672 return error;
673} 681}
674 682
675static int btrfs_remount(struct super_block *sb, int *flags, char *data) 683static int btrfs_remount(struct super_block *sb, int *flags, char *data)
@@ -716,18 +724,25 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
716 struct list_head *head = &root->fs_info->space_info; 724 struct list_head *head = &root->fs_info->space_info;
717 struct btrfs_space_info *found; 725 struct btrfs_space_info *found;
718 u64 total_used = 0; 726 u64 total_used = 0;
727 u64 total_used_data = 0;
719 int bits = dentry->d_sb->s_blocksize_bits; 728 int bits = dentry->d_sb->s_blocksize_bits;
720 __be32 *fsid = (__be32 *)root->fs_info->fsid; 729 __be32 *fsid = (__be32 *)root->fs_info->fsid;
721 730
722 rcu_read_lock(); 731 rcu_read_lock();
723 list_for_each_entry_rcu(found, head, list) 732 list_for_each_entry_rcu(found, head, list) {
733 if (found->flags & (BTRFS_BLOCK_GROUP_METADATA |
734 BTRFS_BLOCK_GROUP_SYSTEM))
735 total_used_data += found->disk_total;
736 else
737 total_used_data += found->disk_used;
724 total_used += found->disk_used; 738 total_used += found->disk_used;
739 }
725 rcu_read_unlock(); 740 rcu_read_unlock();
726 741
727 buf->f_namelen = BTRFS_NAME_LEN; 742 buf->f_namelen = BTRFS_NAME_LEN;
728 buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; 743 buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
729 buf->f_bfree = buf->f_blocks - (total_used >> bits); 744 buf->f_bfree = buf->f_blocks - (total_used >> bits);
730 buf->f_bavail = buf->f_bfree; 745 buf->f_bavail = buf->f_blocks - (total_used_data >> bits);
731 buf->f_bsize = dentry->d_sb->s_blocksize; 746 buf->f_bsize = dentry->d_sb->s_blocksize;
732 buf->f_type = BTRFS_SUPER_MAGIC; 747 buf->f_type = BTRFS_SUPER_MAGIC;
733 748
@@ -746,7 +761,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
746static struct file_system_type btrfs_fs_type = { 761static struct file_system_type btrfs_fs_type = {
747 .owner = THIS_MODULE, 762 .owner = THIS_MODULE,
748 .name = "btrfs", 763 .name = "btrfs",
749 .get_sb = btrfs_get_sb, 764 .mount = btrfs_mount,
750 .kill_sb = kill_anon_super, 765 .kill_sb = kill_anon_super,
751 .fs_flags = FS_REQUIRES_DEV, 766 .fs_flags = FS_REQUIRES_DEV,
752}; 767};
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 66e4c66cc63b..1fffbc017bdf 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -163,6 +163,7 @@ enum btrfs_trans_type {
163 TRANS_START, 163 TRANS_START,
164 TRANS_JOIN, 164 TRANS_JOIN,
165 TRANS_USERSPACE, 165 TRANS_USERSPACE,
166 TRANS_JOIN_NOLOCK,
166}; 167};
167 168
168static int may_wait_transaction(struct btrfs_root *root, int type) 169static int may_wait_transaction(struct btrfs_root *root, int type)
@@ -179,14 +180,14 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
179{ 180{
180 struct btrfs_trans_handle *h; 181 struct btrfs_trans_handle *h;
181 struct btrfs_transaction *cur_trans; 182 struct btrfs_transaction *cur_trans;
182 int retries = 0;
183 int ret; 183 int ret;
184again: 184again:
185 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); 185 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
186 if (!h) 186 if (!h)
187 return ERR_PTR(-ENOMEM); 187 return ERR_PTR(-ENOMEM);
188 188
189 mutex_lock(&root->fs_info->trans_mutex); 189 if (type != TRANS_JOIN_NOLOCK)
190 mutex_lock(&root->fs_info->trans_mutex);
190 if (may_wait_transaction(root, type)) 191 if (may_wait_transaction(root, type))
191 wait_current_trans(root); 192 wait_current_trans(root);
192 193
@@ -195,7 +196,8 @@ again:
195 196
196 cur_trans = root->fs_info->running_transaction; 197 cur_trans = root->fs_info->running_transaction;
197 cur_trans->use_count++; 198 cur_trans->use_count++;
198 mutex_unlock(&root->fs_info->trans_mutex); 199 if (type != TRANS_JOIN_NOLOCK)
200 mutex_unlock(&root->fs_info->trans_mutex);
199 201
200 h->transid = cur_trans->transid; 202 h->transid = cur_trans->transid;
201 h->transaction = cur_trans; 203 h->transaction = cur_trans;
@@ -212,8 +214,7 @@ again:
212 } 214 }
213 215
214 if (num_items > 0) { 216 if (num_items > 0) {
215 ret = btrfs_trans_reserve_metadata(h, root, num_items, 217 ret = btrfs_trans_reserve_metadata(h, root, num_items);
216 &retries);
217 if (ret == -EAGAIN) { 218 if (ret == -EAGAIN) {
218 btrfs_commit_transaction(h, root); 219 btrfs_commit_transaction(h, root);
219 goto again; 220 goto again;
@@ -224,9 +225,11 @@ again:
224 } 225 }
225 } 226 }
226 227
227 mutex_lock(&root->fs_info->trans_mutex); 228 if (type != TRANS_JOIN_NOLOCK)
229 mutex_lock(&root->fs_info->trans_mutex);
228 record_root_in_trans(h, root); 230 record_root_in_trans(h, root);
229 mutex_unlock(&root->fs_info->trans_mutex); 231 if (type != TRANS_JOIN_NOLOCK)
232 mutex_unlock(&root->fs_info->trans_mutex);
230 233
231 if (!current->journal_info && type != TRANS_USERSPACE) 234 if (!current->journal_info && type != TRANS_USERSPACE)
232 current->journal_info = h; 235 current->journal_info = h;
@@ -244,6 +247,12 @@ struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
244 return start_transaction(root, 0, TRANS_JOIN); 247 return start_transaction(root, 0, TRANS_JOIN);
245} 248}
246 249
250struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root,
251 int num_blocks)
252{
253 return start_transaction(root, 0, TRANS_JOIN_NOLOCK);
254}
255
247struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, 256struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
248 int num_blocks) 257 int num_blocks)
249{ 258{
@@ -270,6 +279,58 @@ static noinline int wait_for_commit(struct btrfs_root *root,
270 return 0; 279 return 0;
271} 280}
272 281
282int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
283{
284 struct btrfs_transaction *cur_trans = NULL, *t;
285 int ret;
286
287 mutex_lock(&root->fs_info->trans_mutex);
288
289 ret = 0;
290 if (transid) {
291 if (transid <= root->fs_info->last_trans_committed)
292 goto out_unlock;
293
294 /* find specified transaction */
295 list_for_each_entry(t, &root->fs_info->trans_list, list) {
296 if (t->transid == transid) {
297 cur_trans = t;
298 break;
299 }
300 if (t->transid > transid)
301 break;
302 }
303 ret = -EINVAL;
304 if (!cur_trans)
305 goto out_unlock; /* bad transid */
306 } else {
307 /* find newest transaction that is committing | committed */
308 list_for_each_entry_reverse(t, &root->fs_info->trans_list,
309 list) {
310 if (t->in_commit) {
311 if (t->commit_done)
312 goto out_unlock;
313 cur_trans = t;
314 break;
315 }
316 }
317 if (!cur_trans)
318 goto out_unlock; /* nothing committing|committed */
319 }
320
321 cur_trans->use_count++;
322 mutex_unlock(&root->fs_info->trans_mutex);
323
324 wait_for_commit(root, cur_trans);
325
326 mutex_lock(&root->fs_info->trans_mutex);
327 put_transaction(cur_trans);
328 ret = 0;
329out_unlock:
330 mutex_unlock(&root->fs_info->trans_mutex);
331 return ret;
332}
333
273#if 0 334#if 0
274/* 335/*
275 * rate limit against the drop_snapshot code. This helps to slow down new 336 * rate limit against the drop_snapshot code. This helps to slow down new
@@ -348,7 +409,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
348} 409}
349 410
350static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, 411static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
351 struct btrfs_root *root, int throttle) 412 struct btrfs_root *root, int throttle, int lock)
352{ 413{
353 struct btrfs_transaction *cur_trans = trans->transaction; 414 struct btrfs_transaction *cur_trans = trans->transaction;
354 struct btrfs_fs_info *info = root->fs_info; 415 struct btrfs_fs_info *info = root->fs_info;
@@ -376,26 +437,29 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
376 437
377 btrfs_trans_release_metadata(trans, root); 438 btrfs_trans_release_metadata(trans, root);
378 439
379 if (!root->fs_info->open_ioctl_trans && 440 if (lock && !root->fs_info->open_ioctl_trans &&
380 should_end_transaction(trans, root)) 441 should_end_transaction(trans, root))
381 trans->transaction->blocked = 1; 442 trans->transaction->blocked = 1;
382 443
383 if (cur_trans->blocked && !cur_trans->in_commit) { 444 if (lock && cur_trans->blocked && !cur_trans->in_commit) {
384 if (throttle) 445 if (throttle)
385 return btrfs_commit_transaction(trans, root); 446 return btrfs_commit_transaction(trans, root);
386 else 447 else
387 wake_up_process(info->transaction_kthread); 448 wake_up_process(info->transaction_kthread);
388 } 449 }
389 450
390 mutex_lock(&info->trans_mutex); 451 if (lock)
452 mutex_lock(&info->trans_mutex);
391 WARN_ON(cur_trans != info->running_transaction); 453 WARN_ON(cur_trans != info->running_transaction);
392 WARN_ON(cur_trans->num_writers < 1); 454 WARN_ON(cur_trans->num_writers < 1);
393 cur_trans->num_writers--; 455 cur_trans->num_writers--;
394 456
457 smp_mb();
395 if (waitqueue_active(&cur_trans->writer_wait)) 458 if (waitqueue_active(&cur_trans->writer_wait))
396 wake_up(&cur_trans->writer_wait); 459 wake_up(&cur_trans->writer_wait);
397 put_transaction(cur_trans); 460 put_transaction(cur_trans);
398 mutex_unlock(&info->trans_mutex); 461 if (lock)
462 mutex_unlock(&info->trans_mutex);
399 463
400 if (current->journal_info == trans) 464 if (current->journal_info == trans)
401 current->journal_info = NULL; 465 current->journal_info = NULL;
@@ -411,13 +475,19 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
411int btrfs_end_transaction(struct btrfs_trans_handle *trans, 475int btrfs_end_transaction(struct btrfs_trans_handle *trans,
412 struct btrfs_root *root) 476 struct btrfs_root *root)
413{ 477{
414 return __btrfs_end_transaction(trans, root, 0); 478 return __btrfs_end_transaction(trans, root, 0, 1);
415} 479}
416 480
417int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 481int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
418 struct btrfs_root *root) 482 struct btrfs_root *root)
419{ 483{
420 return __btrfs_end_transaction(trans, root, 1); 484 return __btrfs_end_transaction(trans, root, 1, 1);
485}
486
487int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
488 struct btrfs_root *root)
489{
490 return __btrfs_end_transaction(trans, root, 0, 0);
421} 491}
422 492
423/* 493/*
@@ -836,7 +906,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
836 struct extent_buffer *tmp; 906 struct extent_buffer *tmp;
837 struct extent_buffer *old; 907 struct extent_buffer *old;
838 int ret; 908 int ret;
839 int retries = 0;
840 u64 to_reserve = 0; 909 u64 to_reserve = 0;
841 u64 index = 0; 910 u64 index = 0;
842 u64 objectid; 911 u64 objectid;
@@ -858,7 +927,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
858 927
859 if (to_reserve > 0) { 928 if (to_reserve > 0) {
860 ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv, 929 ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv,
861 to_reserve, &retries); 930 to_reserve);
862 if (ret) { 931 if (ret) {
863 pending->error = ret; 932 pending->error = ret;
864 goto fail; 933 goto fail;
@@ -966,6 +1035,8 @@ static void update_super_roots(struct btrfs_root *root)
966 super->root = root_item->bytenr; 1035 super->root = root_item->bytenr;
967 super->generation = root_item->generation; 1036 super->generation = root_item->generation;
968 super->root_level = root_item->level; 1037 super->root_level = root_item->level;
1038 if (super->cache_generation != 0 || btrfs_test_opt(root, SPACE_CACHE))
1039 super->cache_generation = root_item->generation;
969} 1040}
970 1041
971int btrfs_transaction_in_commit(struct btrfs_fs_info *info) 1042int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
@@ -988,11 +1059,127 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info)
988 return ret; 1059 return ret;
989} 1060}
990 1061
1062/*
1063 * wait for the current transaction commit to start and block subsequent
1064 * transaction joins
1065 */
1066static void wait_current_trans_commit_start(struct btrfs_root *root,
1067 struct btrfs_transaction *trans)
1068{
1069 DEFINE_WAIT(wait);
1070
1071 if (trans->in_commit)
1072 return;
1073
1074 while (1) {
1075 prepare_to_wait(&root->fs_info->transaction_blocked_wait, &wait,
1076 TASK_UNINTERRUPTIBLE);
1077 if (trans->in_commit) {
1078 finish_wait(&root->fs_info->transaction_blocked_wait,
1079 &wait);
1080 break;
1081 }
1082 mutex_unlock(&root->fs_info->trans_mutex);
1083 schedule();
1084 mutex_lock(&root->fs_info->trans_mutex);
1085 finish_wait(&root->fs_info->transaction_blocked_wait, &wait);
1086 }
1087}
1088
1089/*
1090 * wait for the current transaction to start and then become unblocked.
1091 * caller holds ref.
1092 */
1093static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
1094 struct btrfs_transaction *trans)
1095{
1096 DEFINE_WAIT(wait);
1097
1098 if (trans->commit_done || (trans->in_commit && !trans->blocked))
1099 return;
1100
1101 while (1) {
1102 prepare_to_wait(&root->fs_info->transaction_wait, &wait,
1103 TASK_UNINTERRUPTIBLE);
1104 if (trans->commit_done ||
1105 (trans->in_commit && !trans->blocked)) {
1106 finish_wait(&root->fs_info->transaction_wait,
1107 &wait);
1108 break;
1109 }
1110 mutex_unlock(&root->fs_info->trans_mutex);
1111 schedule();
1112 mutex_lock(&root->fs_info->trans_mutex);
1113 finish_wait(&root->fs_info->transaction_wait,
1114 &wait);
1115 }
1116}
1117
1118/*
1119 * commit transactions asynchronously. once btrfs_commit_transaction_async
1120 * returns, any subsequent transaction will not be allowed to join.
1121 */
1122struct btrfs_async_commit {
1123 struct btrfs_trans_handle *newtrans;
1124 struct btrfs_root *root;
1125 struct delayed_work work;
1126};
1127
1128static void do_async_commit(struct work_struct *work)
1129{
1130 struct btrfs_async_commit *ac =
1131 container_of(work, struct btrfs_async_commit, work.work);
1132
1133 btrfs_commit_transaction(ac->newtrans, ac->root);
1134 kfree(ac);
1135}
1136
1137int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1138 struct btrfs_root *root,
1139 int wait_for_unblock)
1140{
1141 struct btrfs_async_commit *ac;
1142 struct btrfs_transaction *cur_trans;
1143
1144 ac = kmalloc(sizeof(*ac), GFP_NOFS);
1145 BUG_ON(!ac);
1146
1147 INIT_DELAYED_WORK(&ac->work, do_async_commit);
1148 ac->root = root;
1149 ac->newtrans = btrfs_join_transaction(root, 0);
1150
1151 /* take transaction reference */
1152 mutex_lock(&root->fs_info->trans_mutex);
1153 cur_trans = trans->transaction;
1154 cur_trans->use_count++;
1155 mutex_unlock(&root->fs_info->trans_mutex);
1156
1157 btrfs_end_transaction(trans, root);
1158 schedule_delayed_work(&ac->work, 0);
1159
1160 /* wait for transaction to start and unblock */
1161 mutex_lock(&root->fs_info->trans_mutex);
1162 if (wait_for_unblock)
1163 wait_current_trans_commit_start_and_unblock(root, cur_trans);
1164 else
1165 wait_current_trans_commit_start(root, cur_trans);
1166 put_transaction(cur_trans);
1167 mutex_unlock(&root->fs_info->trans_mutex);
1168
1169 return 0;
1170}
1171
1172/*
1173 * btrfs_transaction state sequence:
1174 * in_commit = 0, blocked = 0 (initial)
1175 * in_commit = 1, blocked = 1
1176 * blocked = 0
1177 * commit_done = 1
1178 */
991int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 1179int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
992 struct btrfs_root *root) 1180 struct btrfs_root *root)
993{ 1181{
994 unsigned long joined = 0; 1182 unsigned long joined = 0;
995 unsigned long timeout = 1;
996 struct btrfs_transaction *cur_trans; 1183 struct btrfs_transaction *cur_trans;
997 struct btrfs_transaction *prev_trans = NULL; 1184 struct btrfs_transaction *prev_trans = NULL;
998 DEFINE_WAIT(wait); 1185 DEFINE_WAIT(wait);
@@ -1039,6 +1226,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1039 1226
1040 trans->transaction->in_commit = 1; 1227 trans->transaction->in_commit = 1;
1041 trans->transaction->blocked = 1; 1228 trans->transaction->blocked = 1;
1229 wake_up(&root->fs_info->transaction_blocked_wait);
1230
1042 if (cur_trans->list.prev != &root->fs_info->trans_list) { 1231 if (cur_trans->list.prev != &root->fs_info->trans_list) {
1043 prev_trans = list_entry(cur_trans->list.prev, 1232 prev_trans = list_entry(cur_trans->list.prev,
1044 struct btrfs_transaction, list); 1233 struct btrfs_transaction, list);
@@ -1063,11 +1252,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1063 snap_pending = 1; 1252 snap_pending = 1;
1064 1253
1065 WARN_ON(cur_trans != trans->transaction); 1254 WARN_ON(cur_trans != trans->transaction);
1066 if (cur_trans->num_writers > 1)
1067 timeout = MAX_SCHEDULE_TIMEOUT;
1068 else if (should_grow)
1069 timeout = 1;
1070
1071 mutex_unlock(&root->fs_info->trans_mutex); 1255 mutex_unlock(&root->fs_info->trans_mutex);
1072 1256
1073 if (flush_on_commit || snap_pending) { 1257 if (flush_on_commit || snap_pending) {
@@ -1089,8 +1273,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1089 TASK_UNINTERRUPTIBLE); 1273 TASK_UNINTERRUPTIBLE);
1090 1274
1091 smp_mb(); 1275 smp_mb();
1092 if (cur_trans->num_writers > 1 || should_grow) 1276 if (cur_trans->num_writers > 1)
1093 schedule_timeout(timeout); 1277 schedule_timeout(MAX_SCHEDULE_TIMEOUT);
1278 else if (should_grow)
1279 schedule_timeout(1);
1094 1280
1095 mutex_lock(&root->fs_info->trans_mutex); 1281 mutex_lock(&root->fs_info->trans_mutex);
1096 finish_wait(&cur_trans->writer_wait, &wait); 1282 finish_wait(&cur_trans->writer_wait, &wait);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index e104986d0bfd..f104b57ad4ef 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -87,12 +87,17 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
87 87
88int btrfs_end_transaction(struct btrfs_trans_handle *trans, 88int btrfs_end_transaction(struct btrfs_trans_handle *trans,
89 struct btrfs_root *root); 89 struct btrfs_root *root);
90int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
91 struct btrfs_root *root);
90struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, 92struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
91 int num_items); 93 int num_items);
92struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, 94struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
93 int num_blocks); 95 int num_blocks);
96struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root,
97 int num_blocks);
94struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, 98struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
95 int num_blocks); 99 int num_blocks);
100int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid);
96int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, 101int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
97 struct btrfs_root *root); 102 struct btrfs_root *root);
98int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, 103int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
@@ -104,6 +109,9 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
104int btrfs_clean_old_snapshots(struct btrfs_root *root); 109int btrfs_clean_old_snapshots(struct btrfs_root *root);
105int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 110int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
106 struct btrfs_root *root); 111 struct btrfs_root *root);
112int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
113 struct btrfs_root *root,
114 int wait_for_unblock);
107int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 115int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
108 struct btrfs_root *root); 116 struct btrfs_root *root);
109int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, 117int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
index f7ac8e013ed7..992ab425599d 100644
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -36,7 +36,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
36 int ret = 0; 36 int ret = 0;
37 int wret; 37 int wret;
38 int level; 38 int level;
39 int orig_level;
40 int is_extent = 0; 39 int is_extent = 0;
41 int next_key_ret = 0; 40 int next_key_ret = 0;
42 u64 last_ret = 0; 41 u64 last_ret = 0;
@@ -64,7 +63,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
64 return -ENOMEM; 63 return -ENOMEM;
65 64
66 level = btrfs_header_level(root->node); 65 level = btrfs_header_level(root->node);
67 orig_level = level;
68 66
69 if (level == 0) 67 if (level == 0)
70 goto out; 68 goto out;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index fb102a9aee9c..a29f19384a27 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -786,7 +786,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
786{ 786{
787 struct inode *dir; 787 struct inode *dir;
788 int ret; 788 int ret;
789 struct btrfs_key location;
790 struct btrfs_inode_ref *ref; 789 struct btrfs_inode_ref *ref;
791 struct btrfs_dir_item *di; 790 struct btrfs_dir_item *di;
792 struct inode *inode; 791 struct inode *inode;
@@ -795,10 +794,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
795 unsigned long ref_ptr; 794 unsigned long ref_ptr;
796 unsigned long ref_end; 795 unsigned long ref_end;
797 796
798 location.objectid = key->objectid;
799 location.type = BTRFS_INODE_ITEM_KEY;
800 location.offset = 0;
801
802 /* 797 /*
803 * it is possible that we didn't log all the parent directories 798 * it is possible that we didn't log all the parent directories
804 * for a given inode. If we don't find the dir, just don't 799 * for a given inode. If we don't find the dir, just don't
@@ -1583,7 +1578,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1583 struct btrfs_path *path; 1578 struct btrfs_path *path;
1584 struct btrfs_root *root = wc->replay_dest; 1579 struct btrfs_root *root = wc->replay_dest;
1585 struct btrfs_key key; 1580 struct btrfs_key key;
1586 u32 item_size;
1587 int level; 1581 int level;
1588 int i; 1582 int i;
1589 int ret; 1583 int ret;
@@ -1601,7 +1595,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1601 nritems = btrfs_header_nritems(eb); 1595 nritems = btrfs_header_nritems(eb);
1602 for (i = 0; i < nritems; i++) { 1596 for (i = 0; i < nritems; i++) {
1603 btrfs_item_key_to_cpu(eb, &key, i); 1597 btrfs_item_key_to_cpu(eb, &key, i);
1604 item_size = btrfs_item_size_nr(eb, i);
1605 1598
1606 /* inode keys are done during the first stage */ 1599 /* inode keys are done during the first stage */
1607 if (key.type == BTRFS_INODE_ITEM_KEY && 1600 if (key.type == BTRFS_INODE_ITEM_KEY &&
@@ -1668,7 +1661,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
1668 struct walk_control *wc) 1661 struct walk_control *wc)
1669{ 1662{
1670 u64 root_owner; 1663 u64 root_owner;
1671 u64 root_gen;
1672 u64 bytenr; 1664 u64 bytenr;
1673 u64 ptr_gen; 1665 u64 ptr_gen;
1674 struct extent_buffer *next; 1666 struct extent_buffer *next;
@@ -1698,7 +1690,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
1698 1690
1699 parent = path->nodes[*level]; 1691 parent = path->nodes[*level];
1700 root_owner = btrfs_header_owner(parent); 1692 root_owner = btrfs_header_owner(parent);
1701 root_gen = btrfs_header_generation(parent);
1702 1693
1703 next = btrfs_find_create_tree_block(root, bytenr, blocksize); 1694 next = btrfs_find_create_tree_block(root, bytenr, blocksize);
1704 1695
@@ -1749,7 +1740,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
1749 struct walk_control *wc) 1740 struct walk_control *wc)
1750{ 1741{
1751 u64 root_owner; 1742 u64 root_owner;
1752 u64 root_gen;
1753 int i; 1743 int i;
1754 int slot; 1744 int slot;
1755 int ret; 1745 int ret;
@@ -1757,8 +1747,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
1757 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { 1747 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1758 slot = path->slots[i]; 1748 slot = path->slots[i];
1759 if (slot + 1 < btrfs_header_nritems(path->nodes[i])) { 1749 if (slot + 1 < btrfs_header_nritems(path->nodes[i])) {
1760 struct extent_buffer *node;
1761 node = path->nodes[i];
1762 path->slots[i]++; 1750 path->slots[i]++;
1763 *level = i; 1751 *level = i;
1764 WARN_ON(*level == 0); 1752 WARN_ON(*level == 0);
@@ -1771,7 +1759,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
1771 parent = path->nodes[*level + 1]; 1759 parent = path->nodes[*level + 1];
1772 1760
1773 root_owner = btrfs_header_owner(parent); 1761 root_owner = btrfs_header_owner(parent);
1774 root_gen = btrfs_header_generation(parent);
1775 wc->process_func(root, path->nodes[*level], wc, 1762 wc->process_func(root, path->nodes[*level], wc,
1776 btrfs_header_generation(path->nodes[*level])); 1763 btrfs_header_generation(path->nodes[*level]));
1777 if (wc->free) { 1764 if (wc->free) {
@@ -2273,7 +2260,7 @@ fail:
2273 } 2260 }
2274 btrfs_end_log_trans(root); 2261 btrfs_end_log_trans(root);
2275 2262
2276 return 0; 2263 return err;
2277} 2264}
2278 2265
2279/* see comments for btrfs_del_dir_entries_in_log */ 2266/* see comments for btrfs_del_dir_entries_in_log */
@@ -2729,7 +2716,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2729 struct btrfs_key max_key; 2716 struct btrfs_key max_key;
2730 struct btrfs_root *log = root->log_root; 2717 struct btrfs_root *log = root->log_root;
2731 struct extent_buffer *src = NULL; 2718 struct extent_buffer *src = NULL;
2732 u32 size;
2733 int err = 0; 2719 int err = 0;
2734 int ret; 2720 int ret;
2735 int nritems; 2721 int nritems;
@@ -2793,7 +2779,6 @@ again:
2793 break; 2779 break;
2794 2780
2795 src = path->nodes[0]; 2781 src = path->nodes[0];
2796 size = btrfs_item_size_nr(src, path->slots[0]);
2797 if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { 2782 if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
2798 ins_nr++; 2783 ins_nr++;
2799 goto next_slot; 2784 goto next_slot;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e25e46a8b4e2..cc04dc1445d6 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1898,7 +1898,6 @@ int btrfs_balance(struct btrfs_root *dev_root)
1898 u64 size_to_free; 1898 u64 size_to_free;
1899 struct btrfs_path *path; 1899 struct btrfs_path *path;
1900 struct btrfs_key key; 1900 struct btrfs_key key;
1901 struct btrfs_chunk *chunk;
1902 struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root; 1901 struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root;
1903 struct btrfs_trans_handle *trans; 1902 struct btrfs_trans_handle *trans;
1904 struct btrfs_key found_key; 1903 struct btrfs_key found_key;
@@ -1962,9 +1961,6 @@ int btrfs_balance(struct btrfs_root *dev_root)
1962 if (found_key.objectid != key.objectid) 1961 if (found_key.objectid != key.objectid)
1963 break; 1962 break;
1964 1963
1965 chunk = btrfs_item_ptr(path->nodes[0],
1966 path->slots[0],
1967 struct btrfs_chunk);
1968 /* chunk zero is special */ 1964 /* chunk zero is special */
1969 if (found_key.offset == 0) 1965 if (found_key.offset == 0)
1970 break; 1966 break;
@@ -3031,8 +3027,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
3031 } 3027 }
3032 bio->bi_sector = multi->stripes[dev_nr].physical >> 9; 3028 bio->bi_sector = multi->stripes[dev_nr].physical >> 9;
3033 dev = multi->stripes[dev_nr].dev; 3029 dev = multi->stripes[dev_nr].dev;
3034 BUG_ON(rw == WRITE && !dev->writeable); 3030 if (dev && dev->bdev && (rw != WRITE || dev->writeable)) {
3035 if (dev && dev->bdev) {
3036 bio->bi_bdev = dev->bdev; 3031 bio->bi_bdev = dev->bdev;
3037 if (async_submit) 3032 if (async_submit)
3038 schedule_bio(root, dev, rw, bio); 3033 schedule_bio(root, dev, rw, bio);
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 88ecbb215878..698fdd2c739c 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -178,7 +178,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
178 struct inode *inode = dentry->d_inode; 178 struct inode *inode = dentry->d_inode;
179 struct btrfs_root *root = BTRFS_I(inode)->root; 179 struct btrfs_root *root = BTRFS_I(inode)->root;
180 struct btrfs_path *path; 180 struct btrfs_path *path;
181 struct btrfs_item *item;
182 struct extent_buffer *leaf; 181 struct extent_buffer *leaf;
183 struct btrfs_dir_item *di; 182 struct btrfs_dir_item *di;
184 int ret = 0, slot, advance; 183 int ret = 0, slot, advance;
@@ -234,7 +233,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
234 } 233 }
235 advance = 1; 234 advance = 1;
236 235
237 item = btrfs_item_nr(leaf, slot);
238 btrfs_item_key_to_cpu(leaf, &found_key, slot); 236 btrfs_item_key_to_cpu(leaf, &found_key, slot);
239 237
240 /* check to make sure this item is what we want */ 238 /* check to make sure this item is what we want */
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 3e2b90eaa239..b9cd5445f71c 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -199,8 +199,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
199 int nr_pages = 0; 199 int nr_pages = 0;
200 struct page *in_page = NULL; 200 struct page *in_page = NULL;
201 struct page *out_page = NULL; 201 struct page *out_page = NULL;
202 int out_written = 0;
203 int in_read = 0;
204 unsigned long bytes_left; 202 unsigned long bytes_left;
205 203
206 *out_pages = 0; 204 *out_pages = 0;
@@ -233,9 +231,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
233 workspace->def_strm.avail_out = PAGE_CACHE_SIZE; 231 workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
234 workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE); 232 workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE);
235 233
236 out_written = 0;
237 in_read = 0;
238
239 while (workspace->def_strm.total_in < len) { 234 while (workspace->def_strm.total_in < len) {
240 ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH); 235 ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
241 if (ret != Z_OK) { 236 if (ret != Z_OK) {
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index d6e0e0421891..08b460ae0539 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -635,7 +635,7 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
635/* 635/*
636 * mount: join the ceph cluster, and open root directory. 636 * mount: join the ceph cluster, and open root directory.
637 */ 637 */
638static int ceph_mount(struct ceph_fs_client *fsc, struct vfsmount *mnt, 638static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
639 const char *path) 639 const char *path)
640{ 640{
641 int err; 641 int err;
@@ -678,16 +678,14 @@ static int ceph_mount(struct ceph_fs_client *fsc, struct vfsmount *mnt,
678 } 678 }
679 } 679 }
680 680
681 mnt->mnt_root = root;
682 mnt->mnt_sb = fsc->sb;
683
684 fsc->mount_state = CEPH_MOUNT_MOUNTED; 681 fsc->mount_state = CEPH_MOUNT_MOUNTED;
685 dout("mount success\n"); 682 dout("mount success\n");
686 err = 0; 683 mutex_unlock(&fsc->client->mount_mutex);
684 return root;
687 685
688out: 686out:
689 mutex_unlock(&fsc->client->mount_mutex); 687 mutex_unlock(&fsc->client->mount_mutex);
690 return err; 688 return ERR_PTR(err);
691 689
692fail: 690fail:
693 if (first) { 691 if (first) {
@@ -777,41 +775,45 @@ static int ceph_register_bdi(struct super_block *sb,
777 return err; 775 return err;
778} 776}
779 777
780static int ceph_get_sb(struct file_system_type *fs_type, 778static struct dentry *ceph_mount(struct file_system_type *fs_type,
781 int flags, const char *dev_name, void *data, 779 int flags, const char *dev_name, void *data)
782 struct vfsmount *mnt)
783{ 780{
784 struct super_block *sb; 781 struct super_block *sb;
785 struct ceph_fs_client *fsc; 782 struct ceph_fs_client *fsc;
783 struct dentry *res;
786 int err; 784 int err;
787 int (*compare_super)(struct super_block *, void *) = ceph_compare_super; 785 int (*compare_super)(struct super_block *, void *) = ceph_compare_super;
788 const char *path = NULL; 786 const char *path = NULL;
789 struct ceph_mount_options *fsopt = NULL; 787 struct ceph_mount_options *fsopt = NULL;
790 struct ceph_options *opt = NULL; 788 struct ceph_options *opt = NULL;
791 789
792 dout("ceph_get_sb\n"); 790 dout("ceph_mount\n");
793 err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path); 791 err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path);
794 if (err < 0) 792 if (err < 0) {
793 res = ERR_PTR(err);
795 goto out_final; 794 goto out_final;
795 }
796 796
797 /* create client (which we may/may not use) */ 797 /* create client (which we may/may not use) */
798 fsc = create_fs_client(fsopt, opt); 798 fsc = create_fs_client(fsopt, opt);
799 if (IS_ERR(fsc)) { 799 if (IS_ERR(fsc)) {
800 err = PTR_ERR(fsc); 800 res = ERR_CAST(fsc);
801 kfree(fsopt); 801 kfree(fsopt);
802 kfree(opt); 802 kfree(opt);
803 goto out_final; 803 goto out_final;
804 } 804 }
805 805
806 err = ceph_mdsc_init(fsc); 806 err = ceph_mdsc_init(fsc);
807 if (err < 0) 807 if (err < 0) {
808 res = ERR_PTR(err);
808 goto out; 809 goto out;
810 }
809 811
810 if (ceph_test_opt(fsc->client, NOSHARE)) 812 if (ceph_test_opt(fsc->client, NOSHARE))
811 compare_super = NULL; 813 compare_super = NULL;
812 sb = sget(fs_type, compare_super, ceph_set_super, fsc); 814 sb = sget(fs_type, compare_super, ceph_set_super, fsc);
813 if (IS_ERR(sb)) { 815 if (IS_ERR(sb)) {
814 err = PTR_ERR(sb); 816 res = ERR_CAST(sb);
815 goto out; 817 goto out;
816 } 818 }
817 819
@@ -823,16 +825,18 @@ static int ceph_get_sb(struct file_system_type *fs_type,
823 } else { 825 } else {
824 dout("get_sb using new client %p\n", fsc); 826 dout("get_sb using new client %p\n", fsc);
825 err = ceph_register_bdi(sb, fsc); 827 err = ceph_register_bdi(sb, fsc);
826 if (err < 0) 828 if (err < 0) {
829 res = ERR_PTR(err);
827 goto out_splat; 830 goto out_splat;
831 }
828 } 832 }
829 833
830 err = ceph_mount(fsc, mnt, path); 834 res = ceph_real_mount(fsc, path);
831 if (err < 0) 835 if (IS_ERR(res))
832 goto out_splat; 836 goto out_splat;
833 dout("root %p inode %p ino %llx.%llx\n", mnt->mnt_root, 837 dout("root %p inode %p ino %llx.%llx\n", res,
834 mnt->mnt_root->d_inode, ceph_vinop(mnt->mnt_root->d_inode)); 838 res->d_inode, ceph_vinop(res->d_inode));
835 return 0; 839 return res;
836 840
837out_splat: 841out_splat:
838 ceph_mdsc_close_sessions(fsc->mdsc); 842 ceph_mdsc_close_sessions(fsc->mdsc);
@@ -843,8 +847,8 @@ out:
843 ceph_mdsc_destroy(fsc); 847 ceph_mdsc_destroy(fsc);
844 destroy_fs_client(fsc); 848 destroy_fs_client(fsc);
845out_final: 849out_final:
846 dout("ceph_get_sb fail %d\n", err); 850 dout("ceph_mount fail %ld\n", PTR_ERR(res));
847 return err; 851 return res;
848} 852}
849 853
850static void ceph_kill_sb(struct super_block *s) 854static void ceph_kill_sb(struct super_block *s)
@@ -860,7 +864,7 @@ static void ceph_kill_sb(struct super_block *s)
860static struct file_system_type ceph_fs_type = { 864static struct file_system_type ceph_fs_type = {
861 .owner = THIS_MODULE, 865 .owner = THIS_MODULE,
862 .name = "ceph", 866 .name = "ceph",
863 .get_sb = ceph_get_sb, 867 .mount = ceph_mount,
864 .kill_sb = ceph_kill_sb, 868 .kill_sb = ceph_kill_sb,
865 .fs_flags = FS_RENAME_DOES_D_MOVE, 869 .fs_flags = FS_RENAME_DOES_D_MOVE,
866}; 870};
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 917b7d449bb2..0ed213970ced 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -2,6 +2,9 @@ config CIFS
2 tristate "CIFS support (advanced network filesystem, SMBFS successor)" 2 tristate "CIFS support (advanced network filesystem, SMBFS successor)"
3 depends on INET 3 depends on INET
4 select NLS 4 select NLS
5 select CRYPTO
6 select CRYPTO_MD5
7 select CRYPTO_ARC4
5 help 8 help
6 This is the client VFS module for the Common Internet File System 9 This is the client VFS module for the Common Internet File System
7 (CIFS) protocol which is the successor to the Server Message Block 10 (CIFS) protocol which is the successor to the Server Message Block
diff --git a/fs/cifs/TODO b/fs/cifs/TODO
index 5aff46c61e52..355abcdcda98 100644
--- a/fs/cifs/TODO
+++ b/fs/cifs/TODO
@@ -81,7 +81,7 @@ u) DOS attrs - returned as pseudo-xattr in Samba format (check VFAT and NTFS for
81 81
82v) mount check for unmatched uids 82v) mount check for unmatched uids
83 83
84w) Add support for new vfs entry points for setlease and fallocate 84w) Add support for new vfs entry point for fallocate
85 85
86x) Fix Samba 3 server to handle Linux kernel aio so dbench with lots of 86x) Fix Samba 3 server to handle Linux kernel aio so dbench with lots of
87processes can proceed better in parallel (on the server) 87processes can proceed better in parallel (on the server)
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 525ba59a4105..e9a393c9c2ca 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -15,7 +15,7 @@
15 * the GNU Lesser General Public License for more details. 15 * the GNU Lesser General Public License for more details.
16 * 16 *
17 */ 17 */
18#include <linux/radix-tree.h> 18#include <linux/rbtree.h>
19 19
20#ifndef _CIFS_FS_SB_H 20#ifndef _CIFS_FS_SB_H
21#define _CIFS_FS_SB_H 21#define _CIFS_FS_SB_H
@@ -42,9 +42,9 @@
42#define CIFS_MOUNT_MULTIUSER 0x20000 /* multiuser mount */ 42#define CIFS_MOUNT_MULTIUSER 0x20000 /* multiuser mount */
43 43
44struct cifs_sb_info { 44struct cifs_sb_info {
45 struct radix_tree_root tlink_tree; 45 struct rb_root tlink_tree;
46#define CIFS_TLINK_MASTER_TAG 0 /* is "master" (mount) tcon */
47 spinlock_t tlink_tree_lock; 46 spinlock_t tlink_tree_lock;
47 struct tcon_link *master_tlink;
48 struct nls_table *local_nls; 48 struct nls_table *local_nls;
49 unsigned int rsize; 49 unsigned int rsize;
50 unsigned int wsize; 50 unsigned int wsize;
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 7ac0056294cf..f856732161ab 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -43,18 +43,32 @@ extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8,
43 unsigned char *p24); 43 unsigned char *p24);
44 44
45static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu, 45static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
46 const struct session_key *key, char *signature) 46 struct TCP_Server_Info *server, char *signature)
47{ 47{
48 struct MD5Context context; 48 int rc;
49 49
50 if ((cifs_pdu == NULL) || (signature == NULL) || (key == NULL)) 50 if (cifs_pdu == NULL || signature == NULL || server == NULL)
51 return -EINVAL; 51 return -EINVAL;
52 52
53 cifs_MD5_init(&context); 53 if (!server->secmech.sdescmd5) {
54 cifs_MD5_update(&context, (char *)&key->data, key->len); 54 cERROR(1, "%s: Can't generate signature\n", __func__);
55 cifs_MD5_update(&context, cifs_pdu->Protocol, cifs_pdu->smb_buf_length); 55 return -1;
56 }
57
58 rc = crypto_shash_init(&server->secmech.sdescmd5->shash);
59 if (rc) {
60 cERROR(1, "%s: Oould not init md5\n", __func__);
61 return rc;
62 }
63
64 crypto_shash_update(&server->secmech.sdescmd5->shash,
65 server->session_key.response, server->session_key.len);
66
67 crypto_shash_update(&server->secmech.sdescmd5->shash,
68 cifs_pdu->Protocol, cifs_pdu->smb_buf_length);
69
70 rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature);
56 71
57 cifs_MD5_final(signature, &context);
58 return 0; 72 return 0;
59} 73}
60 74
@@ -79,8 +93,7 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
79 server->sequence_number++; 93 server->sequence_number++;
80 spin_unlock(&GlobalMid_Lock); 94 spin_unlock(&GlobalMid_Lock);
81 95
82 rc = cifs_calculate_signature(cifs_pdu, &server->session_key, 96 rc = cifs_calculate_signature(cifs_pdu, server, smb_signature);
83 smb_signature);
84 if (rc) 97 if (rc)
85 memset(cifs_pdu->Signature.SecuritySignature, 0, 8); 98 memset(cifs_pdu->Signature.SecuritySignature, 0, 8);
86 else 99 else
@@ -90,16 +103,28 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
90} 103}
91 104
92static int cifs_calc_signature2(const struct kvec *iov, int n_vec, 105static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
93 const struct session_key *key, char *signature) 106 struct TCP_Server_Info *server, char *signature)
94{ 107{
95 struct MD5Context context;
96 int i; 108 int i;
109 int rc;
97 110
98 if ((iov == NULL) || (signature == NULL) || (key == NULL)) 111 if (iov == NULL || signature == NULL || server == NULL)
99 return -EINVAL; 112 return -EINVAL;
100 113
101 cifs_MD5_init(&context); 114 if (!server->secmech.sdescmd5) {
102 cifs_MD5_update(&context, (char *)&key->data, key->len); 115 cERROR(1, "%s: Can't generate signature\n", __func__);
116 return -1;
117 }
118
119 rc = crypto_shash_init(&server->secmech.sdescmd5->shash);
120 if (rc) {
121 cERROR(1, "%s: Oould not init md5\n", __func__);
122 return rc;
123 }
124
125 crypto_shash_update(&server->secmech.sdescmd5->shash,
126 server->session_key.response, server->session_key.len);
127
103 for (i = 0; i < n_vec; i++) { 128 for (i = 0; i < n_vec; i++) {
104 if (iov[i].iov_len == 0) 129 if (iov[i].iov_len == 0)
105 continue; 130 continue;
@@ -112,18 +137,18 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
112 if (i == 0) { 137 if (i == 0) {
113 if (iov[0].iov_len <= 8) /* cmd field at offset 9 */ 138 if (iov[0].iov_len <= 8) /* cmd field at offset 9 */
114 break; /* nothing to sign or corrupt header */ 139 break; /* nothing to sign or corrupt header */
115 cifs_MD5_update(&context, iov[0].iov_base+4, 140 crypto_shash_update(&server->secmech.sdescmd5->shash,
116 iov[0].iov_len-4); 141 iov[i].iov_base + 4, iov[i].iov_len - 4);
117 } else 142 } else
118 cifs_MD5_update(&context, iov[i].iov_base, iov[i].iov_len); 143 crypto_shash_update(&server->secmech.sdescmd5->shash,
144 iov[i].iov_base, iov[i].iov_len);
119 } 145 }
120 146
121 cifs_MD5_final(signature, &context); 147 rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature);
122 148
123 return 0; 149 return rc;
124} 150}
125 151
126
127int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, 152int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
128 __u32 *pexpected_response_sequence_number) 153 __u32 *pexpected_response_sequence_number)
129{ 154{
@@ -146,8 +171,7 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
146 server->sequence_number++; 171 server->sequence_number++;
147 spin_unlock(&GlobalMid_Lock); 172 spin_unlock(&GlobalMid_Lock);
148 173
149 rc = cifs_calc_signature2(iov, n_vec, &server->session_key, 174 rc = cifs_calc_signature2(iov, n_vec, server, smb_signature);
150 smb_signature);
151 if (rc) 175 if (rc)
152 memset(cifs_pdu->Signature.SecuritySignature, 0, 8); 176 memset(cifs_pdu->Signature.SecuritySignature, 0, 8);
153 else 177 else
@@ -157,14 +181,14 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
157} 181}
158 182
159int cifs_verify_signature(struct smb_hdr *cifs_pdu, 183int cifs_verify_signature(struct smb_hdr *cifs_pdu,
160 const struct session_key *session_key, 184 struct TCP_Server_Info *server,
161 __u32 expected_sequence_number) 185 __u32 expected_sequence_number)
162{ 186{
163 unsigned int rc; 187 unsigned int rc;
164 char server_response_sig[8]; 188 char server_response_sig[8];
165 char what_we_think_sig_should_be[20]; 189 char what_we_think_sig_should_be[20];
166 190
167 if (cifs_pdu == NULL || session_key == NULL) 191 if (cifs_pdu == NULL || server == NULL)
168 return -EINVAL; 192 return -EINVAL;
169 193
170 if (cifs_pdu->Command == SMB_COM_NEGOTIATE) 194 if (cifs_pdu->Command == SMB_COM_NEGOTIATE)
@@ -193,7 +217,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
193 cpu_to_le32(expected_sequence_number); 217 cpu_to_le32(expected_sequence_number);
194 cifs_pdu->Signature.Sequence.Reserved = 0; 218 cifs_pdu->Signature.Sequence.Reserved = 0;
195 219
196 rc = cifs_calculate_signature(cifs_pdu, session_key, 220 rc = cifs_calculate_signature(cifs_pdu, server,
197 what_we_think_sig_should_be); 221 what_we_think_sig_should_be);
198 222
199 if (rc) 223 if (rc)
@@ -209,18 +233,28 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
209 233
210} 234}
211 235
212/* We fill in key by putting in 40 byte array which was allocated by caller */ 236/* first calculate 24 bytes ntlm response and then 16 byte session key */
213int cifs_calculate_session_key(struct session_key *key, const char *rn, 237int setup_ntlm_response(struct cifsSesInfo *ses)
214 const char *password)
215{ 238{
216 char temp_key[16]; 239 unsigned int temp_len = CIFS_SESS_KEY_SIZE + CIFS_AUTH_RESP_SIZE;
217 if ((key == NULL) || (rn == NULL)) 240 char temp_key[CIFS_SESS_KEY_SIZE];
241
242 if (!ses)
218 return -EINVAL; 243 return -EINVAL;
219 244
220 E_md4hash(password, temp_key); 245 ses->auth_key.response = kmalloc(temp_len, GFP_KERNEL);
221 mdfour(key->data.ntlm, temp_key, 16); 246 if (!ses->auth_key.response) {
222 memcpy(key->data.ntlm+16, rn, CIFS_SESS_KEY_SIZE); 247 cERROR(1, "NTLM can't allocate (%u bytes) memory", temp_len);
223 key->len = 40; 248 return -ENOMEM;
249 }
250 ses->auth_key.len = temp_len;
251
252 SMBNTencrypt(ses->password, ses->server->cryptkey,
253 ses->auth_key.response + CIFS_SESS_KEY_SIZE);
254
255 E_md4hash(ses->password, temp_key);
256 mdfour(ses->auth_key.response, temp_key, CIFS_SESS_KEY_SIZE);
257
224 return 0; 258 return 0;
225} 259}
226 260
@@ -294,15 +328,15 @@ build_avpair_blob(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
294 * two times the unicode length of a server name + 328 * two times the unicode length of a server name +
295 * size of a timestamp (which is 8 bytes). 329 * size of a timestamp (which is 8 bytes).
296 */ 330 */
297 ses->tilen = size + 2 * (2 * dlen) + 2 * (2 * wlen) + 8; 331 ses->auth_key.len = size + 2 * (2 * dlen) + 2 * (2 * wlen) + 8;
298 ses->tiblob = kzalloc(ses->tilen, GFP_KERNEL); 332 ses->auth_key.response = kzalloc(ses->auth_key.len, GFP_KERNEL);
299 if (!ses->tiblob) { 333 if (!ses->auth_key.response) {
300 ses->tilen = 0; 334 ses->auth_key.len = 0;
301 cERROR(1, "Challenge target info allocation failure"); 335 cERROR(1, "Challenge target info allocation failure");
302 return -ENOMEM; 336 return -ENOMEM;
303 } 337 }
304 338
305 blobptr = ses->tiblob; 339 blobptr = ses->auth_key.response;
306 attrptr = (struct ntlmssp2_name *) blobptr; 340 attrptr = (struct ntlmssp2_name *) blobptr;
307 341
308 attrptr->type = cpu_to_le16(NTLMSSP_AV_NB_DOMAIN_NAME); 342 attrptr->type = cpu_to_le16(NTLMSSP_AV_NB_DOMAIN_NAME);
@@ -357,7 +391,7 @@ build_avpair_blob(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
357 * about target string i.e. for some, just user name might suffice. 391 * about target string i.e. for some, just user name might suffice.
358 */ 392 */
359static int 393static int
360find_domain_name(struct cifsSesInfo *ses) 394find_domain_name(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
361{ 395{
362 unsigned int attrsize; 396 unsigned int attrsize;
363 unsigned int type; 397 unsigned int type;
@@ -366,11 +400,11 @@ find_domain_name(struct cifsSesInfo *ses)
366 unsigned char *blobend; 400 unsigned char *blobend;
367 struct ntlmssp2_name *attrptr; 401 struct ntlmssp2_name *attrptr;
368 402
369 if (!ses->tilen || !ses->tiblob) 403 if (!ses->auth_key.len || !ses->auth_key.response)
370 return 0; 404 return 0;
371 405
372 blobptr = ses->tiblob; 406 blobptr = ses->auth_key.response;
373 blobend = ses->tiblob + ses->tilen; 407 blobend = blobptr + ses->auth_key.len;
374 408
375 while (blobptr + onesize < blobend) { 409 while (blobptr + onesize < blobend) {
376 attrptr = (struct ntlmssp2_name *) blobptr; 410 attrptr = (struct ntlmssp2_name *) blobptr;
@@ -386,16 +420,13 @@ find_domain_name(struct cifsSesInfo *ses)
386 if (!attrsize) 420 if (!attrsize)
387 break; 421 break;
388 if (!ses->domainName) { 422 if (!ses->domainName) {
389 struct nls_table *default_nls;
390 ses->domainName = 423 ses->domainName =
391 kmalloc(attrsize + 1, GFP_KERNEL); 424 kmalloc(attrsize + 1, GFP_KERNEL);
392 if (!ses->domainName) 425 if (!ses->domainName)
393 return -ENOMEM; 426 return -ENOMEM;
394 default_nls = load_nls_default();
395 cifs_from_ucs2(ses->domainName, 427 cifs_from_ucs2(ses->domainName,
396 (__le16 *)blobptr, attrsize, attrsize, 428 (__le16 *)blobptr, attrsize, attrsize,
397 default_nls, false); 429 nls_cp, false);
398 unload_nls(default_nls);
399 break; 430 break;
400 } 431 }
401 } 432 }
@@ -405,82 +436,136 @@ find_domain_name(struct cifsSesInfo *ses)
405 return 0; 436 return 0;
406} 437}
407 438
408static int calc_ntlmv2_hash(struct cifsSesInfo *ses, 439static int calc_ntlmv2_hash(struct cifsSesInfo *ses, char *ntlmv2_hash,
409 const struct nls_table *nls_cp) 440 const struct nls_table *nls_cp)
410{ 441{
411 int rc = 0; 442 int rc = 0;
412 int len; 443 int len;
413 char nt_hash[16]; 444 char nt_hash[CIFS_NTHASH_SIZE];
414 struct HMACMD5Context *pctxt;
415 wchar_t *user; 445 wchar_t *user;
416 wchar_t *domain; 446 wchar_t *domain;
447 wchar_t *server;
417 448
418 pctxt = kmalloc(sizeof(struct HMACMD5Context), GFP_KERNEL); 449 if (!ses->server->secmech.sdeschmacmd5) {
419 450 cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n");
420 if (pctxt == NULL) 451 return -1;
421 return -ENOMEM; 452 }
422 453
423 /* calculate md4 hash of password */ 454 /* calculate md4 hash of password */
424 E_md4hash(ses->password, nt_hash); 455 E_md4hash(ses->password, nt_hash);
425 456
426 /* convert Domainname to unicode and uppercase */ 457 crypto_shash_setkey(ses->server->secmech.hmacmd5, nt_hash,
427 hmac_md5_init_limK_to_64(nt_hash, 16, pctxt); 458 CIFS_NTHASH_SIZE);
459
460 rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
461 if (rc) {
462 cERROR(1, "calc_ntlmv2_hash: could not init hmacmd5\n");
463 return rc;
464 }
428 465
429 /* convert ses->userName to unicode and uppercase */ 466 /* convert ses->userName to unicode and uppercase */
430 len = strlen(ses->userName); 467 len = strlen(ses->userName);
431 user = kmalloc(2 + (len * 2), GFP_KERNEL); 468 user = kmalloc(2 + (len * 2), GFP_KERNEL);
432 if (user == NULL) 469 if (user == NULL) {
470 cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n");
471 rc = -ENOMEM;
433 goto calc_exit_2; 472 goto calc_exit_2;
473 }
434 len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp); 474 len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp);
435 UniStrupr(user); 475 UniStrupr(user);
436 hmac_md5_update((char *)user, 2*len, pctxt); 476
477 crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
478 (char *)user, 2 * len);
437 479
438 /* convert ses->domainName to unicode and uppercase */ 480 /* convert ses->domainName to unicode and uppercase */
439 if (ses->domainName) { 481 if (ses->domainName) {
440 len = strlen(ses->domainName); 482 len = strlen(ses->domainName);
441 483
442 domain = kmalloc(2 + (len * 2), GFP_KERNEL); 484 domain = kmalloc(2 + (len * 2), GFP_KERNEL);
443 if (domain == NULL) 485 if (domain == NULL) {
486 cERROR(1, "calc_ntlmv2_hash: domain mem alloc failure");
487 rc = -ENOMEM;
444 goto calc_exit_1; 488 goto calc_exit_1;
489 }
445 len = cifs_strtoUCS((__le16 *)domain, ses->domainName, len, 490 len = cifs_strtoUCS((__le16 *)domain, ses->domainName, len,
446 nls_cp); 491 nls_cp);
447 /* the following line was removed since it didn't work well 492 crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
448 with lower cased domain name that passed as an option. 493 (char *)domain, 2 * len);
449 Maybe converting the domain name earlier makes sense */
450 /* UniStrupr(domain); */
451
452 hmac_md5_update((char *)domain, 2*len, pctxt);
453
454 kfree(domain); 494 kfree(domain);
495 } else if (ses->serverName) {
496 len = strlen(ses->serverName);
497
498 server = kmalloc(2 + (len * 2), GFP_KERNEL);
499 if (server == NULL) {
500 cERROR(1, "calc_ntlmv2_hash: server mem alloc failure");
501 rc = -ENOMEM;
502 goto calc_exit_1;
503 }
504 len = cifs_strtoUCS((__le16 *)server, ses->serverName, len,
505 nls_cp);
506 crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
507 (char *)server, 2 * len);
508 kfree(server);
455 } 509 }
510
511 rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
512 ntlmv2_hash);
513
456calc_exit_1: 514calc_exit_1:
457 kfree(user); 515 kfree(user);
458calc_exit_2: 516calc_exit_2:
459 /* BB FIXME what about bytes 24 through 40 of the signing key? 517 return rc;
460 compare with the NTLM example */ 518}
461 hmac_md5_final(ses->ntlmv2_hash, pctxt); 519
520static int
521CalcNTLMv2_response(const struct cifsSesInfo *ses, char *ntlmv2_hash)
522{
523 int rc;
524 unsigned int offset = CIFS_SESS_KEY_SIZE + 8;
525
526 if (!ses->server->secmech.sdeschmacmd5) {
527 cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n");
528 return -1;
529 }
530
531 crypto_shash_setkey(ses->server->secmech.hmacmd5,
532 ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
533
534 rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
535 if (rc) {
536 cERROR(1, "CalcNTLMv2_response: could not init hmacmd5");
537 return rc;
538 }
539
540 if (ses->server->secType == RawNTLMSSP)
541 memcpy(ses->auth_key.response + offset,
542 ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
543 else
544 memcpy(ses->auth_key.response + offset,
545 ses->server->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
546 crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
547 ses->auth_key.response + offset, ses->auth_key.len - offset);
548
549 rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
550 ses->auth_key.response + CIFS_SESS_KEY_SIZE);
462 551
463 kfree(pctxt);
464 return rc; 552 return rc;
465} 553}
466 554
555
467int 556int
468setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf, 557setup_ntlmv2_rsp(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
469 const struct nls_table *nls_cp)
470{ 558{
471 int rc; 559 int rc;
472 struct ntlmv2_resp *buf = (struct ntlmv2_resp *)resp_buf; 560 int baselen;
473 struct HMACMD5Context context; 561 unsigned int tilen;
474 562 struct ntlmv2_resp *buf;
475 buf->blob_signature = cpu_to_le32(0x00000101); 563 char ntlmv2_hash[16];
476 buf->reserved = 0; 564 unsigned char *tiblob = NULL; /* target info blob */
477 buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
478 get_random_bytes(&buf->client_chal, sizeof(buf->client_chal));
479 buf->reserved2 = 0;
480 565
481 if (ses->server->secType == RawNTLMSSP) { 566 if (ses->server->secType == RawNTLMSSP) {
482 if (!ses->domainName) { 567 if (!ses->domainName) {
483 rc = find_domain_name(ses); 568 rc = find_domain_name(ses, nls_cp);
484 if (rc) { 569 if (rc) {
485 cERROR(1, "error %d finding domain name", rc); 570 cERROR(1, "error %d finding domain name", rc);
486 goto setup_ntlmv2_rsp_ret; 571 goto setup_ntlmv2_rsp_ret;
@@ -490,51 +575,179 @@ setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf,
490 rc = build_avpair_blob(ses, nls_cp); 575 rc = build_avpair_blob(ses, nls_cp);
491 if (rc) { 576 if (rc) {
492 cERROR(1, "error %d building av pair blob", rc); 577 cERROR(1, "error %d building av pair blob", rc);
493 return rc; 578 goto setup_ntlmv2_rsp_ret;
494 } 579 }
495 } 580 }
496 581
497 /* calculate buf->ntlmv2_hash */ 582 baselen = CIFS_SESS_KEY_SIZE + sizeof(struct ntlmv2_resp);
498 rc = calc_ntlmv2_hash(ses, nls_cp); 583 tilen = ses->auth_key.len;
584 tiblob = ses->auth_key.response;
585
586 ses->auth_key.response = kmalloc(baselen + tilen, GFP_KERNEL);
587 if (!ses->auth_key.response) {
588 rc = ENOMEM;
589 ses->auth_key.len = 0;
590 cERROR(1, "%s: Can't allocate auth blob", __func__);
591 goto setup_ntlmv2_rsp_ret;
592 }
593 ses->auth_key.len += baselen;
594
595 buf = (struct ntlmv2_resp *)
596 (ses->auth_key.response + CIFS_SESS_KEY_SIZE);
597 buf->blob_signature = cpu_to_le32(0x00000101);
598 buf->reserved = 0;
599 buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
600 get_random_bytes(&buf->client_chal, sizeof(buf->client_chal));
601 buf->reserved2 = 0;
602
603 memcpy(ses->auth_key.response + baselen, tiblob, tilen);
604
605 /* calculate ntlmv2_hash */
606 rc = calc_ntlmv2_hash(ses, ntlmv2_hash, nls_cp);
499 if (rc) { 607 if (rc) {
500 cERROR(1, "could not get v2 hash rc %d", rc); 608 cERROR(1, "could not get v2 hash rc %d", rc);
501 goto setup_ntlmv2_rsp_ret; 609 goto setup_ntlmv2_rsp_ret;
502 } 610 }
503 CalcNTLMv2_response(ses, resp_buf); 611
612 /* calculate first part of the client response (CR1) */
613 rc = CalcNTLMv2_response(ses, ntlmv2_hash);
614 if (rc) {
615 cERROR(1, "Could not calculate CR1 rc: %d", rc);
616 goto setup_ntlmv2_rsp_ret;
617 }
504 618
505 /* now calculate the session key for NTLMv2 */ 619 /* now calculate the session key for NTLMv2 */
506 hmac_md5_init_limK_to_64(ses->ntlmv2_hash, 16, &context); 620 crypto_shash_setkey(ses->server->secmech.hmacmd5,
507 hmac_md5_update(resp_buf, 16, &context); 621 ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
508 hmac_md5_final(ses->auth_key.data.ntlmv2.key, &context); 622
623 rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
624 if (rc) {
625 cERROR(1, "%s: Could not init hmacmd5\n", __func__);
626 goto setup_ntlmv2_rsp_ret;
627 }
509 628
510 memcpy(&ses->auth_key.data.ntlmv2.resp, resp_buf, 629 crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
511 sizeof(struct ntlmv2_resp)); 630 ses->auth_key.response + CIFS_SESS_KEY_SIZE,
512 ses->auth_key.len = 16 + sizeof(struct ntlmv2_resp); 631 CIFS_HMAC_MD5_HASH_SIZE);
513 632
514 return 0; 633 rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
634 ses->auth_key.response);
515 635
516setup_ntlmv2_rsp_ret: 636setup_ntlmv2_rsp_ret:
517 kfree(ses->tiblob); 637 kfree(tiblob);
518 ses->tiblob = NULL;
519 ses->tilen = 0;
520 638
521 return rc; 639 return rc;
522} 640}
523 641
524void CalcNTLMv2_response(const struct cifsSesInfo *ses, 642int
525 char *v2_session_response) 643calc_seckey(struct cifsSesInfo *ses)
526{ 644{
527 struct HMACMD5Context context; 645 int rc;
528 /* rest of v2 struct already generated */ 646 struct crypto_blkcipher *tfm_arc4;
529 memcpy(v2_session_response + 8, ses->cryptKey, 8); 647 struct scatterlist sgin, sgout;
530 hmac_md5_init_limK_to_64(ses->ntlmv2_hash, 16, &context); 648 struct blkcipher_desc desc;
649 unsigned char sec_key[CIFS_SESS_KEY_SIZE]; /* a nonce */
650
651 get_random_bytes(sec_key, CIFS_SESS_KEY_SIZE);
652
653 tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
654 if (!tfm_arc4 || IS_ERR(tfm_arc4)) {
655 cERROR(1, "could not allocate crypto API arc4\n");
656 return PTR_ERR(tfm_arc4);
657 }
531 658
532 hmac_md5_update(v2_session_response+8, 659 desc.tfm = tfm_arc4;
533 sizeof(struct ntlmv2_resp) - 8, &context);
534 660
535 if (ses->tilen) 661 crypto_blkcipher_setkey(tfm_arc4, ses->auth_key.response,
536 hmac_md5_update(ses->tiblob, ses->tilen, &context); 662 CIFS_SESS_KEY_SIZE);
537 663
538 hmac_md5_final(v2_session_response, &context); 664 sg_init_one(&sgin, sec_key, CIFS_SESS_KEY_SIZE);
539/* cifs_dump_mem("v2_sess_rsp: ", v2_session_response, 32); */ 665 sg_init_one(&sgout, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE);
666
667 rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, CIFS_CPHTXT_SIZE);
668 if (rc) {
669 cERROR(1, "could not encrypt session key rc: %d\n", rc);
670 crypto_free_blkcipher(tfm_arc4);
671 return rc;
672 }
673
674 /* make secondary_key/nonce as session key */
675 memcpy(ses->auth_key.response, sec_key, CIFS_SESS_KEY_SIZE);
676 /* and make len as that of session key only */
677 ses->auth_key.len = CIFS_SESS_KEY_SIZE;
678
679 crypto_free_blkcipher(tfm_arc4);
680
681 return 0;
682}
683
684void
685cifs_crypto_shash_release(struct TCP_Server_Info *server)
686{
687 if (server->secmech.md5)
688 crypto_free_shash(server->secmech.md5);
689
690 if (server->secmech.hmacmd5)
691 crypto_free_shash(server->secmech.hmacmd5);
692
693 kfree(server->secmech.sdeschmacmd5);
694
695 kfree(server->secmech.sdescmd5);
696}
697
698int
699cifs_crypto_shash_allocate(struct TCP_Server_Info *server)
700{
701 int rc;
702 unsigned int size;
703
704 server->secmech.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0);
705 if (!server->secmech.hmacmd5 ||
706 IS_ERR(server->secmech.hmacmd5)) {
707 cERROR(1, "could not allocate crypto hmacmd5\n");
708 return PTR_ERR(server->secmech.hmacmd5);
709 }
710
711 server->secmech.md5 = crypto_alloc_shash("md5", 0, 0);
712 if (!server->secmech.md5 || IS_ERR(server->secmech.md5)) {
713 cERROR(1, "could not allocate crypto md5\n");
714 rc = PTR_ERR(server->secmech.md5);
715 goto crypto_allocate_md5_fail;
716 }
717
718 size = sizeof(struct shash_desc) +
719 crypto_shash_descsize(server->secmech.hmacmd5);
720 server->secmech.sdeschmacmd5 = kmalloc(size, GFP_KERNEL);
721 if (!server->secmech.sdeschmacmd5) {
722 cERROR(1, "cifs_crypto_shash_allocate: can't alloc hmacmd5\n");
723 rc = -ENOMEM;
724 goto crypto_allocate_hmacmd5_sdesc_fail;
725 }
726 server->secmech.sdeschmacmd5->shash.tfm = server->secmech.hmacmd5;
727 server->secmech.sdeschmacmd5->shash.flags = 0x0;
728
729
730 size = sizeof(struct shash_desc) +
731 crypto_shash_descsize(server->secmech.md5);
732 server->secmech.sdescmd5 = kmalloc(size, GFP_KERNEL);
733 if (!server->secmech.sdescmd5) {
734 cERROR(1, "cifs_crypto_shash_allocate: can't alloc md5\n");
735 rc = -ENOMEM;
736 goto crypto_allocate_md5_sdesc_fail;
737 }
738 server->secmech.sdescmd5->shash.tfm = server->secmech.md5;
739 server->secmech.sdescmd5->shash.flags = 0x0;
740
741 return 0;
742
743crypto_allocate_md5_sdesc_fail:
744 kfree(server->secmech.sdeschmacmd5);
745
746crypto_allocate_hmacmd5_sdesc_fail:
747 crypto_free_shash(server->secmech.md5);
748
749crypto_allocate_md5_fail:
750 crypto_free_shash(server->secmech.hmacmd5);
751
752 return rc;
540} 753}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 34371637f210..9c3789762ab7 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -116,7 +116,7 @@ cifs_read_super(struct super_block *sb, void *data,
116 return -ENOMEM; 116 return -ENOMEM;
117 117
118 spin_lock_init(&cifs_sb->tlink_tree_lock); 118 spin_lock_init(&cifs_sb->tlink_tree_lock);
119 INIT_RADIX_TREE(&cifs_sb->tlink_tree, GFP_KERNEL); 119 cifs_sb->tlink_tree = RB_ROOT;
120 120
121 rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY); 121 rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY);
122 if (rc) { 122 if (rc) {
@@ -318,12 +318,10 @@ cifs_alloc_inode(struct super_block *sb)
318 return NULL; 318 return NULL;
319 cifs_inode->cifsAttrs = 0x20; /* default */ 319 cifs_inode->cifsAttrs = 0x20; /* default */
320 cifs_inode->time = 0; 320 cifs_inode->time = 0;
321 cifs_inode->write_behind_rc = 0;
322 /* Until the file is open and we have gotten oplock 321 /* Until the file is open and we have gotten oplock
323 info back from the server, can not assume caching of 322 info back from the server, can not assume caching of
324 file data or metadata */ 323 file data or metadata */
325 cifs_inode->clientCanCacheRead = false; 324 cifs_set_oplock_level(cifs_inode, 0);
326 cifs_inode->clientCanCacheAll = false;
327 cifs_inode->delete_pending = false; 325 cifs_inode->delete_pending = false;
328 cifs_inode->invalid_mapping = false; 326 cifs_inode->invalid_mapping = false;
329 cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ 327 cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */
@@ -545,9 +543,9 @@ static const struct super_operations cifs_super_ops = {
545#endif 543#endif
546}; 544};
547 545
548static int 546static struct dentry *
549cifs_get_sb(struct file_system_type *fs_type, 547cifs_do_mount(struct file_system_type *fs_type,
550 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 548 int flags, const char *dev_name, void *data)
551{ 549{
552 int rc; 550 int rc;
553 struct super_block *sb; 551 struct super_block *sb;
@@ -557,18 +555,17 @@ cifs_get_sb(struct file_system_type *fs_type,
557 cFYI(1, "Devname: %s flags: %d ", dev_name, flags); 555 cFYI(1, "Devname: %s flags: %d ", dev_name, flags);
558 556
559 if (IS_ERR(sb)) 557 if (IS_ERR(sb))
560 return PTR_ERR(sb); 558 return ERR_CAST(sb);
561 559
562 sb->s_flags = flags; 560 sb->s_flags = flags;
563 561
564 rc = cifs_read_super(sb, data, dev_name, flags & MS_SILENT ? 1 : 0); 562 rc = cifs_read_super(sb, data, dev_name, flags & MS_SILENT ? 1 : 0);
565 if (rc) { 563 if (rc) {
566 deactivate_locked_super(sb); 564 deactivate_locked_super(sb);
567 return rc; 565 return ERR_PTR(rc);
568 } 566 }
569 sb->s_flags |= MS_ACTIVE; 567 sb->s_flags |= MS_ACTIVE;
570 simple_set_mnt(mnt, sb); 568 return dget(sb->s_root);
571 return 0;
572} 569}
573 570
574static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, 571static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
@@ -634,7 +631,7 @@ static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
634struct file_system_type cifs_fs_type = { 631struct file_system_type cifs_fs_type = {
635 .owner = THIS_MODULE, 632 .owner = THIS_MODULE,
636 .name = "cifs", 633 .name = "cifs",
637 .get_sb = cifs_get_sb, 634 .mount = cifs_do_mount,
638 .kill_sb = kill_anon_super, 635 .kill_sb = kill_anon_super,
639 /* .fs_flags */ 636 /* .fs_flags */
640}; 637};
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index f35795a16b42..897b2b2b28b5 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -112,5 +112,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
112extern const struct export_operations cifs_export_ops; 112extern const struct export_operations cifs_export_ops;
113#endif /* EXPERIMENTAL */ 113#endif /* EXPERIMENTAL */
114 114
115#define CIFS_VERSION "1.67" 115#define CIFS_VERSION "1.68"
116#endif /* _CIFSFS_H */ 116#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 3365e77f6f24..b577bf0a1bb3 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -25,6 +25,9 @@
25#include <linux/workqueue.h> 25#include <linux/workqueue.h>
26#include "cifs_fs_sb.h" 26#include "cifs_fs_sb.h"
27#include "cifsacl.h" 27#include "cifsacl.h"
28#include <crypto/internal/hash.h>
29#include <linux/scatterlist.h>
30
28/* 31/*
29 * The sizes of various internal tables and strings 32 * The sizes of various internal tables and strings
30 */ 33 */
@@ -74,7 +77,7 @@
74 * CIFS vfs client Status information (based on what we know.) 77 * CIFS vfs client Status information (based on what we know.)
75 */ 78 */
76 79
77 /* associated with each tcp and smb session */ 80/* associated with each tcp and smb session */
78enum statusEnum { 81enum statusEnum {
79 CifsNew = 0, 82 CifsNew = 0,
80 CifsGood, 83 CifsGood,
@@ -99,14 +102,29 @@ enum protocolEnum {
99 102
100struct session_key { 103struct session_key {
101 unsigned int len; 104 unsigned int len;
102 union { 105 char *response;
103 char ntlm[CIFS_SESS_KEY_SIZE + 16]; 106};
104 char krb5[CIFS_SESS_KEY_SIZE + 16]; /* BB: length correct? */ 107
105 struct { 108/* crypto security descriptor definition */
106 char key[16]; 109struct sdesc {
107 struct ntlmv2_resp resp; 110 struct shash_desc shash;
108 } ntlmv2; 111 char ctx[];
109 } data; 112};
113
114/* crypto hashing related structure/fields, not specific to a sec mech */
115struct cifs_secmech {
116 struct crypto_shash *hmacmd5; /* hmac-md5 hash function */
117 struct crypto_shash *md5; /* md5 hash function */
118 struct sdesc *sdeschmacmd5; /* ctxt to generate ntlmv2 hash, CR1 */
119 struct sdesc *sdescmd5; /* ctxt to generate cifs/smb signature */
120};
121
122/* per smb session structure/fields */
123struct ntlmssp_auth {
124 __u32 client_flags; /* sent by client in type 1 ntlmsssp exchange */
125 __u32 server_flags; /* sent by server in type 2 ntlmssp exchange */
126 unsigned char ciphertext[CIFS_CPHTXT_SIZE]; /* sent to server */
127 char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlmssp */
110}; 128};
111 129
112struct cifs_cred { 130struct cifs_cred {
@@ -179,12 +197,14 @@ struct TCP_Server_Info {
179 int capabilities; /* allow selective disabling of caps by smb sess */ 197 int capabilities; /* allow selective disabling of caps by smb sess */
180 int timeAdj; /* Adjust for difference in server time zone in sec */ 198 int timeAdj; /* Adjust for difference in server time zone in sec */
181 __u16 CurrentMid; /* multiplex id - rotating counter */ 199 __u16 CurrentMid; /* multiplex id - rotating counter */
200 char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */
182 /* 16th byte of RFC1001 workstation name is always null */ 201 /* 16th byte of RFC1001 workstation name is always null */
183 char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; 202 char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
184 __u32 sequence_number; /* needed for CIFS PDU signature */ 203 __u32 sequence_number; /* needed for CIFS PDU signature */
185 struct session_key session_key; 204 struct session_key session_key;
186 unsigned long lstrp; /* when we got last response from this server */ 205 unsigned long lstrp; /* when we got last response from this server */
187 u16 dialect; /* dialect index that server chose */ 206 u16 dialect; /* dialect index that server chose */
207 struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */
188 /* extended security flavors that server supports */ 208 /* extended security flavors that server supports */
189 bool sec_kerberos; /* supports plain Kerberos */ 209 bool sec_kerberos; /* supports plain Kerberos */
190 bool sec_mskerberos; /* supports legacy MS Kerberos */ 210 bool sec_mskerberos; /* supports legacy MS Kerberos */
@@ -222,11 +242,8 @@ struct cifsSesInfo {
222 char userName[MAX_USERNAME_SIZE + 1]; 242 char userName[MAX_USERNAME_SIZE + 1];
223 char *domainName; 243 char *domainName;
224 char *password; 244 char *password;
225 char cryptKey[CIFS_CRYPTO_KEY_SIZE];
226 struct session_key auth_key; 245 struct session_key auth_key;
227 char ntlmv2_hash[16]; 246 struct ntlmssp_auth *ntlmssp; /* ciphertext, flags, server challenge */
228 unsigned int tilen; /* length of the target info blob */
229 unsigned char *tiblob; /* target info blob in challenge response */
230 bool need_reconnect:1; /* connection reset, uid now invalid */ 247 bool need_reconnect:1; /* connection reset, uid now invalid */
231}; 248};
232/* no more than one of the following three session flags may be set */ 249/* no more than one of the following three session flags may be set */
@@ -319,7 +336,8 @@ struct cifsTconInfo {
319 * "get" on the container. 336 * "get" on the container.
320 */ 337 */
321struct tcon_link { 338struct tcon_link {
322 unsigned long tl_index; 339 struct rb_node tl_rbnode;
340 uid_t tl_uid;
323 unsigned long tl_flags; 341 unsigned long tl_flags;
324#define TCON_LINK_MASTER 0 342#define TCON_LINK_MASTER 0
325#define TCON_LINK_PENDING 1 343#define TCON_LINK_PENDING 1
@@ -395,16 +413,19 @@ struct cifsFileInfo {
395 struct list_head llist; /* list of byte range locks we have. */ 413 struct list_head llist; /* list of byte range locks we have. */
396 bool invalidHandle:1; /* file closed via session abend */ 414 bool invalidHandle:1; /* file closed via session abend */
397 bool oplock_break_cancelled:1; 415 bool oplock_break_cancelled:1;
398 atomic_t count; /* reference count */ 416 int count; /* refcount protected by cifs_file_list_lock */
399 struct mutex fh_mutex; /* prevents reopen race after dead ses*/ 417 struct mutex fh_mutex; /* prevents reopen race after dead ses*/
400 struct cifs_search_info srch_inf; 418 struct cifs_search_info srch_inf;
401 struct work_struct oplock_break; /* work for oplock breaks */ 419 struct work_struct oplock_break; /* work for oplock breaks */
402}; 420};
403 421
404/* Take a reference on the file private data */ 422/*
423 * Take a reference on the file private data. Must be called with
424 * cifs_file_list_lock held.
425 */
405static inline void cifsFileInfo_get(struct cifsFileInfo *cifs_file) 426static inline void cifsFileInfo_get(struct cifsFileInfo *cifs_file)
406{ 427{
407 atomic_inc(&cifs_file->count); 428 ++cifs_file->count;
408} 429}
409 430
410void cifsFileInfo_put(struct cifsFileInfo *cifs_file); 431void cifsFileInfo_put(struct cifsFileInfo *cifs_file);
@@ -417,7 +438,6 @@ struct cifsInodeInfo {
417 struct list_head lockList; 438 struct list_head lockList;
418 /* BB add in lists for dirty pages i.e. write caching info for oplock */ 439 /* BB add in lists for dirty pages i.e. write caching info for oplock */
419 struct list_head openFileList; 440 struct list_head openFileList;
420 int write_behind_rc;
421 __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ 441 __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */
422 unsigned long time; /* jiffies of last update/check of inode */ 442 unsigned long time; /* jiffies of last update/check of inode */
423 bool clientCanCacheRead:1; /* read oplock */ 443 bool clientCanCacheRead:1; /* read oplock */
@@ -668,7 +688,7 @@ require use of the stronger protocol */
668 * GlobalMid_Lock protects: 688 * GlobalMid_Lock protects:
669 * list operations on pending_mid_q and oplockQ 689 * list operations on pending_mid_q and oplockQ
670 * updates to XID counters, multiplex id and SMB sequence numbers 690 * updates to XID counters, multiplex id and SMB sequence numbers
671 * GlobalSMBSesLock protects: 691 * cifs_file_list_lock protects:
672 * list operations on tcp and SMB session lists and tCon lists 692 * list operations on tcp and SMB session lists and tCon lists
673 * f_owner.lock protects certain per file struct operations 693 * f_owner.lock protects certain per file struct operations
674 * mapping->page_lock protects certain per page operations 694 * mapping->page_lock protects certain per page operations
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index b0f4b5656d4c..de36b09763a8 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -131,9 +131,20 @@
131#define CIFS_CRYPTO_KEY_SIZE (8) 131#define CIFS_CRYPTO_KEY_SIZE (8)
132 132
133/* 133/*
134 * Size of the ntlm client response
135 */
136#define CIFS_AUTH_RESP_SIZE (24)
137
138/*
134 * Size of the session key (crypto key encrypted with the password 139 * Size of the session key (crypto key encrypted with the password
135 */ 140 */
136#define CIFS_SESS_KEY_SIZE (24) 141#define CIFS_SESS_KEY_SIZE (16)
142
143#define CIFS_CLIENT_CHALLENGE_SIZE (8)
144#define CIFS_SERVER_CHALLENGE_SIZE (8)
145#define CIFS_HMAC_MD5_HASH_SIZE (16)
146#define CIFS_CPHTXT_SIZE (16)
147#define CIFS_NTHASH_SIZE (16)
137 148
138/* 149/*
139 * Maximum user name length 150 * Maximum user name length
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index e593c40ba7ba..7ed69b6b5fe6 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -104,6 +104,7 @@ extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601);
104extern u64 cifs_UnixTimeToNT(struct timespec); 104extern u64 cifs_UnixTimeToNT(struct timespec);
105extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, 105extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time,
106 int offset); 106 int offset);
107extern void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock);
107 108
108extern struct cifsFileInfo *cifs_new_fileinfo(__u16 fileHandle, 109extern struct cifsFileInfo *cifs_new_fileinfo(__u16 fileHandle,
109 struct file *file, struct tcon_link *tlink, 110 struct file *file, struct tcon_link *tlink,
@@ -362,13 +363,15 @@ extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *);
362extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *, 363extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *,
363 __u32 *); 364 __u32 *);
364extern int cifs_verify_signature(struct smb_hdr *, 365extern int cifs_verify_signature(struct smb_hdr *,
365 const struct session_key *session_key, 366 struct TCP_Server_Info *server,
366 __u32 expected_sequence_number); 367 __u32 expected_sequence_number);
367extern int cifs_calculate_session_key(struct session_key *key, const char *rn, 368extern void SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *);
368 const char *pass); 369extern int setup_ntlm_response(struct cifsSesInfo *);
369extern void CalcNTLMv2_response(const struct cifsSesInfo *, char *); 370extern int setup_ntlmv2_rsp(struct cifsSesInfo *, const struct nls_table *);
370extern int setup_ntlmv2_rsp(struct cifsSesInfo *, char *, 371extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *);
371 const struct nls_table *); 372extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
373extern int calc_seckey(struct cifsSesInfo *);
374
372#ifdef CONFIG_CIFS_WEAK_PW_HASH 375#ifdef CONFIG_CIFS_WEAK_PW_HASH
373extern void calc_lanman_hash(const char *password, const char *cryptkey, 376extern void calc_lanman_hash(const char *password, const char *cryptkey,
374 bool encrypt, char *lnm_session_key); 377 bool encrypt, char *lnm_session_key);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index e98f1f317b15..2f2632b6df5a 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -503,7 +503,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
503 503
504 if (rsp->EncryptionKeyLength == 504 if (rsp->EncryptionKeyLength ==
505 cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) { 505 cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) {
506 memcpy(ses->cryptKey, rsp->EncryptionKey, 506 memcpy(ses->server->cryptkey, rsp->EncryptionKey,
507 CIFS_CRYPTO_KEY_SIZE); 507 CIFS_CRYPTO_KEY_SIZE);
508 } else if (server->secMode & SECMODE_PW_ENCRYPT) { 508 } else if (server->secMode & SECMODE_PW_ENCRYPT) {
509 rc = -EIO; /* need cryptkey unless plain text */ 509 rc = -EIO; /* need cryptkey unless plain text */
@@ -574,7 +574,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
574 server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone); 574 server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone);
575 server->timeAdj *= 60; 575 server->timeAdj *= 60;
576 if (pSMBr->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) { 576 if (pSMBr->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) {
577 memcpy(ses->cryptKey, pSMBr->u.EncryptionKey, 577 memcpy(ses->server->cryptkey, pSMBr->u.EncryptionKey,
578 CIFS_CRYPTO_KEY_SIZE); 578 CIFS_CRYPTO_KEY_SIZE);
579 } else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) 579 } else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC)
580 && (pSMBr->EncryptionKeyLength == 0)) { 580 && (pSMBr->EncryptionKeyLength == 0)) {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 7e73176acb58..251a17c03545 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -116,6 +116,7 @@ struct smb_vol {
116 116
117static int ipv4_connect(struct TCP_Server_Info *server); 117static int ipv4_connect(struct TCP_Server_Info *server);
118static int ipv6_connect(struct TCP_Server_Info *server); 118static int ipv6_connect(struct TCP_Server_Info *server);
119static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink);
119static void cifs_prune_tlinks(struct work_struct *work); 120static void cifs_prune_tlinks(struct work_struct *work);
120 121
121/* 122/*
@@ -175,6 +176,9 @@ cifs_reconnect(struct TCP_Server_Info *server)
175 } 176 }
176 server->sequence_number = 0; 177 server->sequence_number = 0;
177 server->session_estab = false; 178 server->session_estab = false;
179 kfree(server->session_key.response);
180 server->session_key.response = NULL;
181 server->session_key.len = 0;
178 182
179 spin_lock(&GlobalMid_Lock); 183 spin_lock(&GlobalMid_Lock);
180 list_for_each(tmp, &server->pending_mid_q) { 184 list_for_each(tmp, &server->pending_mid_q) {
@@ -1064,7 +1068,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1064 } 1068 }
1065 i = cifs_convert_address((struct sockaddr *)&vol->srcaddr, 1069 i = cifs_convert_address((struct sockaddr *)&vol->srcaddr,
1066 value, strlen(value)); 1070 value, strlen(value));
1067 if (i < 0) { 1071 if (i == 0) {
1068 printk(KERN_WARNING "CIFS: Could not parse" 1072 printk(KERN_WARNING "CIFS: Could not parse"
1069 " srcaddr: %s\n", 1073 " srcaddr: %s\n",
1070 value); 1074 value);
@@ -1560,8 +1564,13 @@ cifs_put_tcp_session(struct TCP_Server_Info *server)
1560 server->tcpStatus = CifsExiting; 1564 server->tcpStatus = CifsExiting;
1561 spin_unlock(&GlobalMid_Lock); 1565 spin_unlock(&GlobalMid_Lock);
1562 1566
1567 cifs_crypto_shash_release(server);
1563 cifs_fscache_release_client_cookie(server); 1568 cifs_fscache_release_client_cookie(server);
1564 1569
1570 kfree(server->session_key.response);
1571 server->session_key.response = NULL;
1572 server->session_key.len = 0;
1573
1565 task = xchg(&server->tsk, NULL); 1574 task = xchg(&server->tsk, NULL);
1566 if (task) 1575 if (task)
1567 force_sig(SIGKILL, task); 1576 force_sig(SIGKILL, task);
@@ -1614,10 +1623,16 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1614 goto out_err; 1623 goto out_err;
1615 } 1624 }
1616 1625
1626 rc = cifs_crypto_shash_allocate(tcp_ses);
1627 if (rc) {
1628 cERROR(1, "could not setup hash structures rc %d", rc);
1629 goto out_err;
1630 }
1631
1617 tcp_ses->hostname = extract_hostname(volume_info->UNC); 1632 tcp_ses->hostname = extract_hostname(volume_info->UNC);
1618 if (IS_ERR(tcp_ses->hostname)) { 1633 if (IS_ERR(tcp_ses->hostname)) {
1619 rc = PTR_ERR(tcp_ses->hostname); 1634 rc = PTR_ERR(tcp_ses->hostname);
1620 goto out_err; 1635 goto out_err_crypto_release;
1621 } 1636 }
1622 1637
1623 tcp_ses->noblocksnd = volume_info->noblocksnd; 1638 tcp_ses->noblocksnd = volume_info->noblocksnd;
@@ -1661,7 +1676,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1661 } 1676 }
1662 if (rc < 0) { 1677 if (rc < 0) {
1663 cERROR(1, "Error connecting to socket. Aborting operation"); 1678 cERROR(1, "Error connecting to socket. Aborting operation");
1664 goto out_err; 1679 goto out_err_crypto_release;
1665 } 1680 }
1666 1681
1667 /* 1682 /*
@@ -1675,7 +1690,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1675 rc = PTR_ERR(tcp_ses->tsk); 1690 rc = PTR_ERR(tcp_ses->tsk);
1676 cERROR(1, "error %d create cifsd thread", rc); 1691 cERROR(1, "error %d create cifsd thread", rc);
1677 module_put(THIS_MODULE); 1692 module_put(THIS_MODULE);
1678 goto out_err; 1693 goto out_err_crypto_release;
1679 } 1694 }
1680 1695
1681 /* thread spawned, put it on the list */ 1696 /* thread spawned, put it on the list */
@@ -1687,6 +1702,9 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1687 1702
1688 return tcp_ses; 1703 return tcp_ses;
1689 1704
1705out_err_crypto_release:
1706 cifs_crypto_shash_release(tcp_ses);
1707
1690out_err: 1708out_err:
1691 if (tcp_ses) { 1709 if (tcp_ses) {
1692 if (!IS_ERR(tcp_ses->hostname)) 1710 if (!IS_ERR(tcp_ses->hostname))
@@ -1801,8 +1819,6 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1801 if (ses == NULL) 1819 if (ses == NULL)
1802 goto get_ses_fail; 1820 goto get_ses_fail;
1803 1821
1804 ses->tilen = 0;
1805 ses->tiblob = NULL;
1806 /* new SMB session uses our server ref */ 1822 /* new SMB session uses our server ref */
1807 ses->server = server; 1823 ses->server = server;
1808 if (server->addr.sockAddr6.sin6_family == AF_INET6) 1824 if (server->addr.sockAddr6.sin6_family == AF_INET6)
@@ -1823,10 +1839,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1823 goto get_ses_fail; 1839 goto get_ses_fail;
1824 } 1840 }
1825 if (volume_info->domainname) { 1841 if (volume_info->domainname) {
1826 int len = strlen(volume_info->domainname); 1842 ses->domainName = kstrdup(volume_info->domainname, GFP_KERNEL);
1827 ses->domainName = kmalloc(len + 1, GFP_KERNEL); 1843 if (!ses->domainName)
1828 if (ses->domainName) 1844 goto get_ses_fail;
1829 strcpy(ses->domainName, volume_info->domainname);
1830 } 1845 }
1831 ses->cred_uid = volume_info->cred_uid; 1846 ses->cred_uid = volume_info->cred_uid;
1832 ses->linux_uid = volume_info->linux_uid; 1847 ses->linux_uid = volume_info->linux_uid;
@@ -2886,24 +2901,16 @@ remote_path_check:
2886 goto mount_fail_check; 2901 goto mount_fail_check;
2887 } 2902 }
2888 2903
2889 tlink->tl_index = pSesInfo->linux_uid; 2904 tlink->tl_uid = pSesInfo->linux_uid;
2890 tlink->tl_tcon = tcon; 2905 tlink->tl_tcon = tcon;
2891 tlink->tl_time = jiffies; 2906 tlink->tl_time = jiffies;
2892 set_bit(TCON_LINK_MASTER, &tlink->tl_flags); 2907 set_bit(TCON_LINK_MASTER, &tlink->tl_flags);
2893 set_bit(TCON_LINK_IN_TREE, &tlink->tl_flags); 2908 set_bit(TCON_LINK_IN_TREE, &tlink->tl_flags);
2894 2909
2895 rc = radix_tree_preload(GFP_KERNEL); 2910 cifs_sb->master_tlink = tlink;
2896 if (rc == -ENOMEM) {
2897 kfree(tlink);
2898 goto mount_fail_check;
2899 }
2900
2901 spin_lock(&cifs_sb->tlink_tree_lock); 2911 spin_lock(&cifs_sb->tlink_tree_lock);
2902 radix_tree_insert(&cifs_sb->tlink_tree, pSesInfo->linux_uid, tlink); 2912 tlink_rb_insert(&cifs_sb->tlink_tree, tlink);
2903 radix_tree_tag_set(&cifs_sb->tlink_tree, pSesInfo->linux_uid,
2904 CIFS_TLINK_MASTER_TAG);
2905 spin_unlock(&cifs_sb->tlink_tree_lock); 2913 spin_unlock(&cifs_sb->tlink_tree_lock);
2906 radix_tree_preload_end();
2907 2914
2908 queue_delayed_work(system_nrt_wq, &cifs_sb->prune_tlinks, 2915 queue_delayed_work(system_nrt_wq, &cifs_sb->prune_tlinks,
2909 TLINK_IDLE_EXPIRE); 2916 TLINK_IDLE_EXPIRE);
@@ -2985,13 +2992,13 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
2985#ifdef CONFIG_CIFS_WEAK_PW_HASH 2992#ifdef CONFIG_CIFS_WEAK_PW_HASH
2986 if ((global_secflags & CIFSSEC_MAY_LANMAN) && 2993 if ((global_secflags & CIFSSEC_MAY_LANMAN) &&
2987 (ses->server->secType == LANMAN)) 2994 (ses->server->secType == LANMAN))
2988 calc_lanman_hash(tcon->password, ses->cryptKey, 2995 calc_lanman_hash(tcon->password, ses->server->cryptkey,
2989 ses->server->secMode & 2996 ses->server->secMode &
2990 SECMODE_PW_ENCRYPT ? true : false, 2997 SECMODE_PW_ENCRYPT ? true : false,
2991 bcc_ptr); 2998 bcc_ptr);
2992 else 2999 else
2993#endif /* CIFS_WEAK_PW_HASH */ 3000#endif /* CIFS_WEAK_PW_HASH */
2994 SMBNTencrypt(tcon->password, ses->cryptKey, bcc_ptr); 3001 SMBNTencrypt(tcon->password, ses->server->cryptkey, bcc_ptr);
2995 3002
2996 bcc_ptr += CIFS_SESS_KEY_SIZE; 3003 bcc_ptr += CIFS_SESS_KEY_SIZE;
2997 if (ses->capabilities & CAP_UNICODE) { 3004 if (ses->capabilities & CAP_UNICODE) {
@@ -3093,32 +3100,25 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3093int 3100int
3094cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) 3101cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
3095{ 3102{
3096 int i, ret; 3103 struct rb_root *root = &cifs_sb->tlink_tree;
3104 struct rb_node *node;
3105 struct tcon_link *tlink;
3097 char *tmp; 3106 char *tmp;
3098 struct tcon_link *tlink[8];
3099 unsigned long index = 0;
3100 3107
3101 cancel_delayed_work_sync(&cifs_sb->prune_tlinks); 3108 cancel_delayed_work_sync(&cifs_sb->prune_tlinks);
3102 3109
3103 do { 3110 spin_lock(&cifs_sb->tlink_tree_lock);
3104 spin_lock(&cifs_sb->tlink_tree_lock); 3111 while ((node = rb_first(root))) {
3105 ret = radix_tree_gang_lookup(&cifs_sb->tlink_tree, 3112 tlink = rb_entry(node, struct tcon_link, tl_rbnode);
3106 (void **)tlink, index, 3113 cifs_get_tlink(tlink);
3107 ARRAY_SIZE(tlink)); 3114 clear_bit(TCON_LINK_IN_TREE, &tlink->tl_flags);
3108 /* increment index for next pass */ 3115 rb_erase(node, root);
3109 if (ret > 0)
3110 index = tlink[ret - 1]->tl_index + 1;
3111 for (i = 0; i < ret; i++) {
3112 cifs_get_tlink(tlink[i]);
3113 clear_bit(TCON_LINK_IN_TREE, &tlink[i]->tl_flags);
3114 radix_tree_delete(&cifs_sb->tlink_tree,
3115 tlink[i]->tl_index);
3116 }
3117 spin_unlock(&cifs_sb->tlink_tree_lock);
3118 3116
3119 for (i = 0; i < ret; i++) 3117 spin_unlock(&cifs_sb->tlink_tree_lock);
3120 cifs_put_tlink(tlink[i]); 3118 cifs_put_tlink(tlink);
3121 } while (ret != 0); 3119 spin_lock(&cifs_sb->tlink_tree_lock);
3120 }
3121 spin_unlock(&cifs_sb->tlink_tree_lock);
3122 3122
3123 tmp = cifs_sb->prepath; 3123 tmp = cifs_sb->prepath;
3124 cifs_sb->prepathlen = 0; 3124 cifs_sb->prepathlen = 0;
@@ -3178,10 +3178,11 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *ses,
3178 } else { 3178 } else {
3179 mutex_lock(&ses->server->srv_mutex); 3179 mutex_lock(&ses->server->srv_mutex);
3180 if (!server->session_estab) { 3180 if (!server->session_estab) {
3181 memcpy(&server->session_key.data, 3181 server->session_key.response = ses->auth_key.response;
3182 &ses->auth_key.data, ses->auth_key.len);
3183 server->session_key.len = ses->auth_key.len; 3182 server->session_key.len = ses->auth_key.len;
3184 ses->server->session_estab = true; 3183 server->sequence_number = 0x2;
3184 server->session_estab = true;
3185 ses->auth_key.response = NULL;
3185 } 3186 }
3186 mutex_unlock(&server->srv_mutex); 3187 mutex_unlock(&server->srv_mutex);
3187 3188
@@ -3192,6 +3193,12 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *ses,
3192 spin_unlock(&GlobalMid_Lock); 3193 spin_unlock(&GlobalMid_Lock);
3193 } 3194 }
3194 3195
3196 kfree(ses->auth_key.response);
3197 ses->auth_key.response = NULL;
3198 ses->auth_key.len = 0;
3199 kfree(ses->ntlmssp);
3200 ses->ntlmssp = NULL;
3201
3195 return rc; 3202 return rc;
3196} 3203}
3197 3204
@@ -3250,22 +3257,10 @@ out:
3250 return tcon; 3257 return tcon;
3251} 3258}
3252 3259
3253static struct tcon_link * 3260static inline struct tcon_link *
3254cifs_sb_master_tlink(struct cifs_sb_info *cifs_sb) 3261cifs_sb_master_tlink(struct cifs_sb_info *cifs_sb)
3255{ 3262{
3256 struct tcon_link *tlink; 3263 return cifs_sb->master_tlink;
3257 unsigned int ret;
3258
3259 spin_lock(&cifs_sb->tlink_tree_lock);
3260 ret = radix_tree_gang_lookup_tag(&cifs_sb->tlink_tree, (void **)&tlink,
3261 0, 1, CIFS_TLINK_MASTER_TAG);
3262 spin_unlock(&cifs_sb->tlink_tree_lock);
3263
3264 /* the master tcon should always be present */
3265 if (ret == 0)
3266 BUG();
3267
3268 return tlink;
3269} 3264}
3270 3265
3271struct cifsTconInfo * 3266struct cifsTconInfo *
@@ -3281,6 +3276,47 @@ cifs_sb_tcon_pending_wait(void *unused)
3281 return signal_pending(current) ? -ERESTARTSYS : 0; 3276 return signal_pending(current) ? -ERESTARTSYS : 0;
3282} 3277}
3283 3278
3279/* find and return a tlink with given uid */
3280static struct tcon_link *
3281tlink_rb_search(struct rb_root *root, uid_t uid)
3282{
3283 struct rb_node *node = root->rb_node;
3284 struct tcon_link *tlink;
3285
3286 while (node) {
3287 tlink = rb_entry(node, struct tcon_link, tl_rbnode);
3288
3289 if (tlink->tl_uid > uid)
3290 node = node->rb_left;
3291 else if (tlink->tl_uid < uid)
3292 node = node->rb_right;
3293 else
3294 return tlink;
3295 }
3296 return NULL;
3297}
3298
3299/* insert a tcon_link into the tree */
3300static void
3301tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink)
3302{
3303 struct rb_node **new = &(root->rb_node), *parent = NULL;
3304 struct tcon_link *tlink;
3305
3306 while (*new) {
3307 tlink = rb_entry(*new, struct tcon_link, tl_rbnode);
3308 parent = *new;
3309
3310 if (tlink->tl_uid > new_tlink->tl_uid)
3311 new = &((*new)->rb_left);
3312 else
3313 new = &((*new)->rb_right);
3314 }
3315
3316 rb_link_node(&new_tlink->tl_rbnode, parent, new);
3317 rb_insert_color(&new_tlink->tl_rbnode, root);
3318}
3319
3284/* 3320/*
3285 * Find or construct an appropriate tcon given a cifs_sb and the fsuid of the 3321 * Find or construct an appropriate tcon given a cifs_sb and the fsuid of the
3286 * current task. 3322 * current task.
@@ -3288,7 +3324,7 @@ cifs_sb_tcon_pending_wait(void *unused)
3288 * If the superblock doesn't refer to a multiuser mount, then just return 3324 * If the superblock doesn't refer to a multiuser mount, then just return
3289 * the master tcon for the mount. 3325 * the master tcon for the mount.
3290 * 3326 *
3291 * First, search the radix tree for an existing tcon for this fsuid. If one 3327 * First, search the rbtree for an existing tcon for this fsuid. If one
3292 * exists, then check to see if it's pending construction. If it is then wait 3328 * exists, then check to see if it's pending construction. If it is then wait
3293 * for construction to complete. Once it's no longer pending, check to see if 3329 * for construction to complete. Once it's no longer pending, check to see if
3294 * it failed and either return an error or retry construction, depending on 3330 * it failed and either return an error or retry construction, depending on
@@ -3301,14 +3337,14 @@ struct tcon_link *
3301cifs_sb_tlink(struct cifs_sb_info *cifs_sb) 3337cifs_sb_tlink(struct cifs_sb_info *cifs_sb)
3302{ 3338{
3303 int ret; 3339 int ret;
3304 unsigned long fsuid = (unsigned long) current_fsuid(); 3340 uid_t fsuid = current_fsuid();
3305 struct tcon_link *tlink, *newtlink; 3341 struct tcon_link *tlink, *newtlink;
3306 3342
3307 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) 3343 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
3308 return cifs_get_tlink(cifs_sb_master_tlink(cifs_sb)); 3344 return cifs_get_tlink(cifs_sb_master_tlink(cifs_sb));
3309 3345
3310 spin_lock(&cifs_sb->tlink_tree_lock); 3346 spin_lock(&cifs_sb->tlink_tree_lock);
3311 tlink = radix_tree_lookup(&cifs_sb->tlink_tree, fsuid); 3347 tlink = tlink_rb_search(&cifs_sb->tlink_tree, fsuid);
3312 if (tlink) 3348 if (tlink)
3313 cifs_get_tlink(tlink); 3349 cifs_get_tlink(tlink);
3314 spin_unlock(&cifs_sb->tlink_tree_lock); 3350 spin_unlock(&cifs_sb->tlink_tree_lock);
@@ -3317,36 +3353,24 @@ cifs_sb_tlink(struct cifs_sb_info *cifs_sb)
3317 newtlink = kzalloc(sizeof(*tlink), GFP_KERNEL); 3353 newtlink = kzalloc(sizeof(*tlink), GFP_KERNEL);
3318 if (newtlink == NULL) 3354 if (newtlink == NULL)
3319 return ERR_PTR(-ENOMEM); 3355 return ERR_PTR(-ENOMEM);
3320 newtlink->tl_index = fsuid; 3356 newtlink->tl_uid = fsuid;
3321 newtlink->tl_tcon = ERR_PTR(-EACCES); 3357 newtlink->tl_tcon = ERR_PTR(-EACCES);
3322 set_bit(TCON_LINK_PENDING, &newtlink->tl_flags); 3358 set_bit(TCON_LINK_PENDING, &newtlink->tl_flags);
3323 set_bit(TCON_LINK_IN_TREE, &newtlink->tl_flags); 3359 set_bit(TCON_LINK_IN_TREE, &newtlink->tl_flags);
3324 cifs_get_tlink(newtlink); 3360 cifs_get_tlink(newtlink);
3325 3361
3326 ret = radix_tree_preload(GFP_KERNEL);
3327 if (ret != 0) {
3328 kfree(newtlink);
3329 return ERR_PTR(ret);
3330 }
3331
3332 spin_lock(&cifs_sb->tlink_tree_lock); 3362 spin_lock(&cifs_sb->tlink_tree_lock);
3333 /* was one inserted after previous search? */ 3363 /* was one inserted after previous search? */
3334 tlink = radix_tree_lookup(&cifs_sb->tlink_tree, fsuid); 3364 tlink = tlink_rb_search(&cifs_sb->tlink_tree, fsuid);
3335 if (tlink) { 3365 if (tlink) {
3336 cifs_get_tlink(tlink); 3366 cifs_get_tlink(tlink);
3337 spin_unlock(&cifs_sb->tlink_tree_lock); 3367 spin_unlock(&cifs_sb->tlink_tree_lock);
3338 radix_tree_preload_end();
3339 kfree(newtlink); 3368 kfree(newtlink);
3340 goto wait_for_construction; 3369 goto wait_for_construction;
3341 } 3370 }
3342 ret = radix_tree_insert(&cifs_sb->tlink_tree, fsuid, newtlink);
3343 spin_unlock(&cifs_sb->tlink_tree_lock);
3344 radix_tree_preload_end();
3345 if (ret) {
3346 kfree(newtlink);
3347 return ERR_PTR(ret);
3348 }
3349 tlink = newtlink; 3371 tlink = newtlink;
3372 tlink_rb_insert(&cifs_sb->tlink_tree, tlink);
3373 spin_unlock(&cifs_sb->tlink_tree_lock);
3350 } else { 3374 } else {
3351wait_for_construction: 3375wait_for_construction:
3352 ret = wait_on_bit(&tlink->tl_flags, TCON_LINK_PENDING, 3376 ret = wait_on_bit(&tlink->tl_flags, TCON_LINK_PENDING,
@@ -3392,39 +3416,39 @@ cifs_prune_tlinks(struct work_struct *work)
3392{ 3416{
3393 struct cifs_sb_info *cifs_sb = container_of(work, struct cifs_sb_info, 3417 struct cifs_sb_info *cifs_sb = container_of(work, struct cifs_sb_info,
3394 prune_tlinks.work); 3418 prune_tlinks.work);
3395 struct tcon_link *tlink[8]; 3419 struct rb_root *root = &cifs_sb->tlink_tree;
3396 unsigned long now = jiffies; 3420 struct rb_node *node = rb_first(root);
3397 unsigned long index = 0; 3421 struct rb_node *tmp;
3398 int i, ret; 3422 struct tcon_link *tlink;
3399 3423
3400 do { 3424 /*
3401 spin_lock(&cifs_sb->tlink_tree_lock); 3425 * Because we drop the spinlock in the loop in order to put the tlink
3402 ret = radix_tree_gang_lookup(&cifs_sb->tlink_tree, 3426 * it's not guarded against removal of links from the tree. The only
3403 (void **)tlink, index, 3427 * places that remove entries from the tree are this function and
3404 ARRAY_SIZE(tlink)); 3428 * umounts. Because this function is non-reentrant and is canceled
3405 /* increment index for next pass */ 3429 * before umount can proceed, this is safe.
3406 if (ret > 0) 3430 */
3407 index = tlink[ret - 1]->tl_index + 1; 3431 spin_lock(&cifs_sb->tlink_tree_lock);
3408 for (i = 0; i < ret; i++) { 3432 node = rb_first(root);
3409 if (test_bit(TCON_LINK_MASTER, &tlink[i]->tl_flags) || 3433 while (node != NULL) {
3410 atomic_read(&tlink[i]->tl_count) != 0 || 3434 tmp = node;
3411 time_after(tlink[i]->tl_time + TLINK_IDLE_EXPIRE, 3435 node = rb_next(tmp);
3412 now)) { 3436 tlink = rb_entry(tmp, struct tcon_link, tl_rbnode);
3413 tlink[i] = NULL; 3437
3414 continue; 3438 if (test_bit(TCON_LINK_MASTER, &tlink->tl_flags) ||
3415 } 3439 atomic_read(&tlink->tl_count) != 0 ||
3416 cifs_get_tlink(tlink[i]); 3440 time_after(tlink->tl_time + TLINK_IDLE_EXPIRE, jiffies))
3417 clear_bit(TCON_LINK_IN_TREE, &tlink[i]->tl_flags); 3441 continue;
3418 radix_tree_delete(&cifs_sb->tlink_tree,
3419 tlink[i]->tl_index);
3420 }
3421 spin_unlock(&cifs_sb->tlink_tree_lock);
3422 3442
3423 for (i = 0; i < ret; i++) { 3443 cifs_get_tlink(tlink);
3424 if (tlink[i] != NULL) 3444 clear_bit(TCON_LINK_IN_TREE, &tlink->tl_flags);
3425 cifs_put_tlink(tlink[i]); 3445 rb_erase(tmp, root);
3426 } 3446
3427 } while (ret != 0); 3447 spin_unlock(&cifs_sb->tlink_tree_lock);
3448 cifs_put_tlink(tlink);
3449 spin_lock(&cifs_sb->tlink_tree_lock);
3450 }
3451 spin_unlock(&cifs_sb->tlink_tree_lock);
3428 3452
3429 queue_delayed_work(system_nrt_wq, &cifs_sb->prune_tlinks, 3453 queue_delayed_work(system_nrt_wq, &cifs_sb->prune_tlinks,
3430 TLINK_IDLE_EXPIRE); 3454 TLINK_IDLE_EXPIRE);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 45af003865d2..06c3e83fa387 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -131,8 +131,7 @@ static inline int cifs_open_inode_helper(struct inode *inode,
131 /* BB no need to lock inode until after invalidate 131 /* BB no need to lock inode until after invalidate
132 since namei code should already have it locked? */ 132 since namei code should already have it locked? */
133 rc = filemap_write_and_wait(inode->i_mapping); 133 rc = filemap_write_and_wait(inode->i_mapping);
134 if (rc != 0) 134 mapping_set_error(inode->i_mapping, rc);
135 pCifsInode->write_behind_rc = rc;
136 } 135 }
137 cFYI(1, "invalidating remote inode since open detected it " 136 cFYI(1, "invalidating remote inode since open detected it "
138 "changed"); 137 "changed");
@@ -147,12 +146,7 @@ client_can_cache:
147 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb, 146 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
148 xid, NULL); 147 xid, NULL);
149 148
150 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { 149 cifs_set_oplock_level(pCifsInode, oplock);
151 pCifsInode->clientCanCacheAll = true;
152 pCifsInode->clientCanCacheRead = true;
153 cFYI(1, "Exclusive Oplock granted on inode %p", inode);
154 } else if ((oplock & 0xF) == OPLOCK_READ)
155 pCifsInode->clientCanCacheRead = true;
156 150
157 return rc; 151 return rc;
158} 152}
@@ -232,6 +226,7 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
232 if (pCifsFile == NULL) 226 if (pCifsFile == NULL)
233 return pCifsFile; 227 return pCifsFile;
234 228
229 pCifsFile->count = 1;
235 pCifsFile->netfid = fileHandle; 230 pCifsFile->netfid = fileHandle;
236 pCifsFile->pid = current->tgid; 231 pCifsFile->pid = current->tgid;
237 pCifsFile->uid = current_fsuid(); 232 pCifsFile->uid = current_fsuid();
@@ -242,7 +237,6 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
242 mutex_init(&pCifsFile->fh_mutex); 237 mutex_init(&pCifsFile->fh_mutex);
243 mutex_init(&pCifsFile->lock_mutex); 238 mutex_init(&pCifsFile->lock_mutex);
244 INIT_LIST_HEAD(&pCifsFile->llist); 239 INIT_LIST_HEAD(&pCifsFile->llist);
245 atomic_set(&pCifsFile->count, 1);
246 INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break); 240 INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break);
247 241
248 spin_lock(&cifs_file_list_lock); 242 spin_lock(&cifs_file_list_lock);
@@ -254,12 +248,7 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
254 list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList); 248 list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList);
255 spin_unlock(&cifs_file_list_lock); 249 spin_unlock(&cifs_file_list_lock);
256 250
257 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { 251 cifs_set_oplock_level(pCifsInode, oplock);
258 pCifsInode->clientCanCacheAll = true;
259 pCifsInode->clientCanCacheRead = true;
260 cFYI(1, "Exclusive Oplock inode %p", inode);
261 } else if ((oplock & 0xF) == OPLOCK_READ)
262 pCifsInode->clientCanCacheRead = true;
263 252
264 file->private_data = pCifsFile; 253 file->private_data = pCifsFile;
265 return pCifsFile; 254 return pCifsFile;
@@ -267,16 +256,18 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
267 256
268/* 257/*
269 * Release a reference on the file private data. This may involve closing 258 * Release a reference on the file private data. This may involve closing
270 * the filehandle out on the server. 259 * the filehandle out on the server. Must be called without holding
260 * cifs_file_list_lock.
271 */ 261 */
272void cifsFileInfo_put(struct cifsFileInfo *cifs_file) 262void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
273{ 263{
264 struct inode *inode = cifs_file->dentry->d_inode;
274 struct cifsTconInfo *tcon = tlink_tcon(cifs_file->tlink); 265 struct cifsTconInfo *tcon = tlink_tcon(cifs_file->tlink);
275 struct cifsInodeInfo *cifsi = CIFS_I(cifs_file->dentry->d_inode); 266 struct cifsInodeInfo *cifsi = CIFS_I(inode);
276 struct cifsLockInfo *li, *tmp; 267 struct cifsLockInfo *li, *tmp;
277 268
278 spin_lock(&cifs_file_list_lock); 269 spin_lock(&cifs_file_list_lock);
279 if (!atomic_dec_and_test(&cifs_file->count)) { 270 if (--cifs_file->count > 0) {
280 spin_unlock(&cifs_file_list_lock); 271 spin_unlock(&cifs_file_list_lock);
281 return; 272 return;
282 } 273 }
@@ -288,8 +279,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
288 if (list_empty(&cifsi->openFileList)) { 279 if (list_empty(&cifsi->openFileList)) {
289 cFYI(1, "closing last open instance for inode %p", 280 cFYI(1, "closing last open instance for inode %p",
290 cifs_file->dentry->d_inode); 281 cifs_file->dentry->d_inode);
291 cifsi->clientCanCacheRead = false; 282 cifs_set_oplock_level(cifsi, 0);
292 cifsi->clientCanCacheAll = false;
293 } 283 }
294 spin_unlock(&cifs_file_list_lock); 284 spin_unlock(&cifs_file_list_lock);
295 285
@@ -605,11 +595,8 @@ reopen_success:
605 595
606 if (can_flush) { 596 if (can_flush) {
607 rc = filemap_write_and_wait(inode->i_mapping); 597 rc = filemap_write_and_wait(inode->i_mapping);
608 if (rc != 0) 598 mapping_set_error(inode->i_mapping, rc);
609 CIFS_I(inode)->write_behind_rc = rc;
610 599
611 pCifsInode->clientCanCacheAll = false;
612 pCifsInode->clientCanCacheRead = false;
613 if (tcon->unix_ext) 600 if (tcon->unix_ext)
614 rc = cifs_get_inode_info_unix(&inode, 601 rc = cifs_get_inode_info_unix(&inode,
615 full_path, inode->i_sb, xid); 602 full_path, inode->i_sb, xid);
@@ -623,18 +610,9 @@ reopen_success:
623 invalidate the current end of file on the server 610 invalidate the current end of file on the server
624 we can not go to the server to get the new inod 611 we can not go to the server to get the new inod
625 info */ 612 info */
626 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { 613
627 pCifsInode->clientCanCacheAll = true; 614 cifs_set_oplock_level(pCifsInode, oplock);
628 pCifsInode->clientCanCacheRead = true; 615
629 cFYI(1, "Exclusive Oplock granted on inode %p",
630 pCifsFile->dentry->d_inode);
631 } else if ((oplock & 0xF) == OPLOCK_READ) {
632 pCifsInode->clientCanCacheRead = true;
633 pCifsInode->clientCanCacheAll = false;
634 } else {
635 pCifsInode->clientCanCacheRead = false;
636 pCifsInode->clientCanCacheAll = false;
637 }
638 cifs_relock_file(pCifsFile); 616 cifs_relock_file(pCifsFile);
639 617
640reopen_error_exit: 618reopen_error_exit:
@@ -776,12 +754,6 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
776 754
777 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 755 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
778 tcon = tlink_tcon(((struct cifsFileInfo *)file->private_data)->tlink); 756 tcon = tlink_tcon(((struct cifsFileInfo *)file->private_data)->tlink);
779
780 if (file->private_data == NULL) {
781 rc = -EBADF;
782 FreeXid(xid);
783 return rc;
784 }
785 netfid = ((struct cifsFileInfo *)file->private_data)->netfid; 757 netfid = ((struct cifsFileInfo *)file->private_data)->netfid;
786 758
787 if ((tcon->ses->capabilities & CAP_UNIX) && 759 if ((tcon->ses->capabilities & CAP_UNIX) &&
@@ -957,6 +929,7 @@ cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
957ssize_t cifs_user_write(struct file *file, const char __user *write_data, 929ssize_t cifs_user_write(struct file *file, const char __user *write_data,
958 size_t write_size, loff_t *poffset) 930 size_t write_size, loff_t *poffset)
959{ 931{
932 struct inode *inode = file->f_path.dentry->d_inode;
960 int rc = 0; 933 int rc = 0;
961 unsigned int bytes_written = 0; 934 unsigned int bytes_written = 0;
962 unsigned int total_written; 935 unsigned int total_written;
@@ -964,7 +937,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
964 struct cifsTconInfo *pTcon; 937 struct cifsTconInfo *pTcon;
965 int xid, long_op; 938 int xid, long_op;
966 struct cifsFileInfo *open_file; 939 struct cifsFileInfo *open_file;
967 struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode); 940 struct cifsInodeInfo *cifsi = CIFS_I(inode);
968 941
969 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 942 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
970 943
@@ -1030,21 +1003,17 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
1030 1003
1031 cifs_stats_bytes_written(pTcon, total_written); 1004 cifs_stats_bytes_written(pTcon, total_written);
1032 1005
1033 /* since the write may have blocked check these pointers again */
1034 if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) {
1035 struct inode *inode = file->f_path.dentry->d_inode;
1036/* Do not update local mtime - server will set its actual value on write 1006/* Do not update local mtime - server will set its actual value on write
1037 * inode->i_ctime = inode->i_mtime = 1007 * inode->i_ctime = inode->i_mtime =
1038 * current_fs_time(inode->i_sb);*/ 1008 * current_fs_time(inode->i_sb);*/
1039 if (total_written > 0) { 1009 if (total_written > 0) {
1040 spin_lock(&inode->i_lock); 1010 spin_lock(&inode->i_lock);
1041 if (*poffset > file->f_path.dentry->d_inode->i_size) 1011 if (*poffset > inode->i_size)
1042 i_size_write(file->f_path.dentry->d_inode, 1012 i_size_write(inode, *poffset);
1043 *poffset); 1013 spin_unlock(&inode->i_lock);
1044 spin_unlock(&inode->i_lock);
1045 }
1046 mark_inode_dirty_sync(file->f_path.dentry->d_inode);
1047 } 1014 }
1015 mark_inode_dirty_sync(inode);
1016
1048 FreeXid(xid); 1017 FreeXid(xid);
1049 return total_written; 1018 return total_written;
1050} 1019}
@@ -1179,7 +1148,7 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1179 bool fsuid_only) 1148 bool fsuid_only)
1180{ 1149{
1181 struct cifsFileInfo *open_file; 1150 struct cifsFileInfo *open_file;
1182 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb); 1151 struct cifs_sb_info *cifs_sb;
1183 bool any_available = false; 1152 bool any_available = false;
1184 int rc; 1153 int rc;
1185 1154
@@ -1193,6 +1162,8 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1193 return NULL; 1162 return NULL;
1194 } 1163 }
1195 1164
1165 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1166
1196 /* only filter by fsuid on multiuser mounts */ 1167 /* only filter by fsuid on multiuser mounts */
1197 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) 1168 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1198 fsuid_only = false; 1169 fsuid_only = false;
@@ -1353,6 +1324,7 @@ static int cifs_writepages(struct address_space *mapping,
1353 if (!experimEnabled && tcon->ses->server->secMode & 1324 if (!experimEnabled && tcon->ses->server->secMode &
1354 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { 1325 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
1355 cifsFileInfo_put(open_file); 1326 cifsFileInfo_put(open_file);
1327 kfree(iov);
1356 return generic_writepages(mapping, wbc); 1328 return generic_writepages(mapping, wbc);
1357 } 1329 }
1358 cifsFileInfo_put(open_file); 1330 cifsFileInfo_put(open_file);
@@ -1478,12 +1450,7 @@ retry:
1478 if (rc || bytes_written < bytes_to_write) { 1450 if (rc || bytes_written < bytes_to_write) {
1479 cERROR(1, "Write2 ret %d, wrote %d", 1451 cERROR(1, "Write2 ret %d, wrote %d",
1480 rc, bytes_written); 1452 rc, bytes_written);
1481 /* BB what if continued retry is 1453 mapping_set_error(mapping, rc);
1482 requested via mount flags? */
1483 if (rc == -ENOSPC)
1484 set_bit(AS_ENOSPC, &mapping->flags);
1485 else
1486 set_bit(AS_EIO, &mapping->flags);
1487 } else { 1454 } else {
1488 cifs_stats_bytes_written(tcon, bytes_written); 1455 cifs_stats_bytes_written(tcon, bytes_written);
1489 } 1456 }
@@ -1628,11 +1595,10 @@ int cifs_fsync(struct file *file, int datasync)
1628 1595
1629 rc = filemap_write_and_wait(inode->i_mapping); 1596 rc = filemap_write_and_wait(inode->i_mapping);
1630 if (rc == 0) { 1597 if (rc == 0) {
1631 rc = CIFS_I(inode)->write_behind_rc; 1598 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1632 CIFS_I(inode)->write_behind_rc = 0; 1599
1633 tcon = tlink_tcon(smbfile->tlink); 1600 tcon = tlink_tcon(smbfile->tlink);
1634 if (!rc && tcon && smbfile && 1601 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
1635 !(CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
1636 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid); 1602 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
1637 } 1603 }
1638 1604
@@ -1677,21 +1643,8 @@ int cifs_flush(struct file *file, fl_owner_t id)
1677 struct inode *inode = file->f_path.dentry->d_inode; 1643 struct inode *inode = file->f_path.dentry->d_inode;
1678 int rc = 0; 1644 int rc = 0;
1679 1645
1680 /* Rather than do the steps manually: 1646 if (file->f_mode & FMODE_WRITE)
1681 lock the inode for writing 1647 rc = filemap_write_and_wait(inode->i_mapping);
1682 loop through pages looking for write behind data (dirty pages)
1683 coalesce into contiguous 16K (or smaller) chunks to write to server
1684 send to server (prefer in parallel)
1685 deal with writebehind errors
1686 unlock inode for writing
1687 filemapfdatawrite appears easier for the time being */
1688
1689 rc = filemap_fdatawrite(inode->i_mapping);
1690 /* reset wb rc if we were able to write out dirty pages */
1691 if (!rc) {
1692 rc = CIFS_I(inode)->write_behind_rc;
1693 CIFS_I(inode)->write_behind_rc = 0;
1694 }
1695 1648
1696 cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc); 1649 cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
1697 1650
@@ -2270,7 +2223,7 @@ void cifs_oplock_break(struct work_struct *work)
2270 oplock_break); 2223 oplock_break);
2271 struct inode *inode = cfile->dentry->d_inode; 2224 struct inode *inode = cfile->dentry->d_inode;
2272 struct cifsInodeInfo *cinode = CIFS_I(inode); 2225 struct cifsInodeInfo *cinode = CIFS_I(inode);
2273 int rc, waitrc = 0; 2226 int rc = 0;
2274 2227
2275 if (inode && S_ISREG(inode->i_mode)) { 2228 if (inode && S_ISREG(inode->i_mode)) {
2276 if (cinode->clientCanCacheRead) 2229 if (cinode->clientCanCacheRead)
@@ -2279,13 +2232,10 @@ void cifs_oplock_break(struct work_struct *work)
2279 break_lease(inode, O_WRONLY); 2232 break_lease(inode, O_WRONLY);
2280 rc = filemap_fdatawrite(inode->i_mapping); 2233 rc = filemap_fdatawrite(inode->i_mapping);
2281 if (cinode->clientCanCacheRead == 0) { 2234 if (cinode->clientCanCacheRead == 0) {
2282 waitrc = filemap_fdatawait(inode->i_mapping); 2235 rc = filemap_fdatawait(inode->i_mapping);
2236 mapping_set_error(inode->i_mapping, rc);
2283 invalidate_remote_inode(inode); 2237 invalidate_remote_inode(inode);
2284 } 2238 }
2285 if (!rc)
2286 rc = waitrc;
2287 if (rc)
2288 cinode->write_behind_rc = rc;
2289 cFYI(1, "Oplock flush inode %p rc %d", inode, rc); 2239 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
2290 } 2240 }
2291 2241
@@ -2304,7 +2254,7 @@ void cifs_oplock_break(struct work_struct *work)
2304 /* 2254 /*
2305 * We might have kicked in before is_valid_oplock_break() 2255 * We might have kicked in before is_valid_oplock_break()
2306 * finished grabbing reference for us. Make sure it's done by 2256 * finished grabbing reference for us. Make sure it's done by
2307 * waiting for GlobalSMSSeslock. 2257 * waiting for cifs_file_list_lock.
2308 */ 2258 */
2309 spin_lock(&cifs_file_list_lock); 2259 spin_lock(&cifs_file_list_lock);
2310 spin_unlock(&cifs_file_list_lock); 2260 spin_unlock(&cifs_file_list_lock);
@@ -2312,6 +2262,7 @@ void cifs_oplock_break(struct work_struct *work)
2312 cifs_oplock_break_put(cfile); 2262 cifs_oplock_break_put(cfile);
2313} 2263}
2314 2264
2265/* must be called while holding cifs_file_list_lock */
2315void cifs_oplock_break_get(struct cifsFileInfo *cfile) 2266void cifs_oplock_break_get(struct cifsFileInfo *cfile)
2316{ 2267{
2317 cifs_sb_active(cfile->dentry->d_sb); 2268 cifs_sb_active(cfile->dentry->d_sb);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 94979309698a..ef3a55bf86b6 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1682,8 +1682,7 @@ cifs_invalidate_mapping(struct inode *inode)
1682 /* write back any cached data */ 1682 /* write back any cached data */
1683 if (inode->i_mapping && inode->i_mapping->nrpages != 0) { 1683 if (inode->i_mapping && inode->i_mapping->nrpages != 0) {
1684 rc = filemap_write_and_wait(inode->i_mapping); 1684 rc = filemap_write_and_wait(inode->i_mapping);
1685 if (rc) 1685 mapping_set_error(inode->i_mapping, rc);
1686 cifs_i->write_behind_rc = rc;
1687 } 1686 }
1688 invalidate_remote_inode(inode); 1687 invalidate_remote_inode(inode);
1689 cifs_fscache_reset_inode_cookie(inode); 1688 cifs_fscache_reset_inode_cookie(inode);
@@ -1943,10 +1942,8 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
1943 * the flush returns error? 1942 * the flush returns error?
1944 */ 1943 */
1945 rc = filemap_write_and_wait(inode->i_mapping); 1944 rc = filemap_write_and_wait(inode->i_mapping);
1946 if (rc != 0) { 1945 mapping_set_error(inode->i_mapping, rc);
1947 cifsInode->write_behind_rc = rc; 1946 rc = 0;
1948 rc = 0;
1949 }
1950 1947
1951 if (attrs->ia_valid & ATTR_SIZE) { 1948 if (attrs->ia_valid & ATTR_SIZE) {
1952 rc = cifs_set_file_size(inode, attrs, xid, full_path); 1949 rc = cifs_set_file_size(inode, attrs, xid, full_path);
@@ -2087,10 +2084,8 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
2087 * the flush returns error? 2084 * the flush returns error?
2088 */ 2085 */
2089 rc = filemap_write_and_wait(inode->i_mapping); 2086 rc = filemap_write_and_wait(inode->i_mapping);
2090 if (rc != 0) { 2087 mapping_set_error(inode->i_mapping, rc);
2091 cifsInode->write_behind_rc = rc; 2088 rc = 0;
2092 rc = 0;
2093 }
2094 2089
2095 if (attrs->ia_valid & ATTR_SIZE) { 2090 if (attrs->ia_valid & ATTR_SIZE) {
2096 rc = cifs_set_file_size(inode, attrs, xid, full_path); 2091 rc = cifs_set_file_size(inode, attrs, xid, full_path);
@@ -2182,7 +2177,6 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
2182 2177
2183 setattr_copy(inode, attrs); 2178 setattr_copy(inode, attrs);
2184 mark_inode_dirty(inode); 2179 mark_inode_dirty(inode);
2185 return 0;
2186 2180
2187cifs_setattr_exit: 2181cifs_setattr_exit:
2188 kfree(full_path); 2182 kfree(full_path);
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 077bf756f342..0c98672d0122 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -38,10 +38,10 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
38 struct cifs_sb_info *cifs_sb; 38 struct cifs_sb_info *cifs_sb;
39#ifdef CONFIG_CIFS_POSIX 39#ifdef CONFIG_CIFS_POSIX
40 struct cifsFileInfo *pSMBFile = filep->private_data; 40 struct cifsFileInfo *pSMBFile = filep->private_data;
41 struct cifsTconInfo *tcon = tlink_tcon(pSMBFile->tlink); 41 struct cifsTconInfo *tcon;
42 __u64 ExtAttrBits = 0; 42 __u64 ExtAttrBits = 0;
43 __u64 ExtAttrMask = 0; 43 __u64 ExtAttrMask = 0;
44 __u64 caps = le64_to_cpu(tcon->fsUnixInfo.Capability); 44 __u64 caps;
45#endif /* CONFIG_CIFS_POSIX */ 45#endif /* CONFIG_CIFS_POSIX */
46 46
47 xid = GetXid(); 47 xid = GetXid();
@@ -62,9 +62,11 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
62 break; 62 break;
63#ifdef CONFIG_CIFS_POSIX 63#ifdef CONFIG_CIFS_POSIX
64 case FS_IOC_GETFLAGS: 64 case FS_IOC_GETFLAGS:
65 if (pSMBFile == NULL)
66 break;
67 tcon = tlink_tcon(pSMBFile->tlink);
68 caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
65 if (CIFS_UNIX_EXTATTR_CAP & caps) { 69 if (CIFS_UNIX_EXTATTR_CAP & caps) {
66 if (pSMBFile == NULL)
67 break;
68 rc = CIFSGetExtAttr(xid, tcon, pSMBFile->netfid, 70 rc = CIFSGetExtAttr(xid, tcon, pSMBFile->netfid,
69 &ExtAttrBits, &ExtAttrMask); 71 &ExtAttrBits, &ExtAttrMask);
70 if (rc == 0) 72 if (rc == 0)
@@ -75,13 +77,15 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
75 break; 77 break;
76 78
77 case FS_IOC_SETFLAGS: 79 case FS_IOC_SETFLAGS:
80 if (pSMBFile == NULL)
81 break;
82 tcon = tlink_tcon(pSMBFile->tlink);
83 caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
78 if (CIFS_UNIX_EXTATTR_CAP & caps) { 84 if (CIFS_UNIX_EXTATTR_CAP & caps) {
79 if (get_user(ExtAttrBits, (int __user *)arg)) { 85 if (get_user(ExtAttrBits, (int __user *)arg)) {
80 rc = -EFAULT; 86 rc = -EFAULT;
81 break; 87 break;
82 } 88 }
83 if (pSMBFile == NULL)
84 break;
85 /* rc= CIFSGetExtAttr(xid,tcon,pSMBFile->netfid, 89 /* rc= CIFSGetExtAttr(xid,tcon,pSMBFile->netfid,
86 extAttrBits, &ExtAttrMask);*/ 90 extAttrBits, &ExtAttrMask);*/
87 } 91 }
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 1c681f6a6803..43f10281bc19 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -569,15 +569,14 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
569 569
570 cFYI(1, "file id match, oplock break"); 570 cFYI(1, "file id match, oplock break");
571 pCifsInode = CIFS_I(netfile->dentry->d_inode); 571 pCifsInode = CIFS_I(netfile->dentry->d_inode);
572 pCifsInode->clientCanCacheAll = false;
573 if (pSMB->OplockLevel == 0)
574 pCifsInode->clientCanCacheRead = false;
575 572
573 cifs_set_oplock_level(pCifsInode,
574 pSMB->OplockLevel);
576 /* 575 /*
577 * cifs_oplock_break_put() can't be called 576 * cifs_oplock_break_put() can't be called
578 * from here. Get reference after queueing 577 * from here. Get reference after queueing
579 * succeeded. cifs_oplock_break() will 578 * succeeded. cifs_oplock_break() will
580 * synchronize using GlobalSMSSeslock. 579 * synchronize using cifs_file_list_lock.
581 */ 580 */
582 if (queue_work(system_nrt_wq, 581 if (queue_work(system_nrt_wq,
583 &netfile->oplock_break)) 582 &netfile->oplock_break))
@@ -722,3 +721,23 @@ cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb)
722 cifs_sb_master_tcon(cifs_sb)->treeName); 721 cifs_sb_master_tcon(cifs_sb)->treeName);
723 } 722 }
724} 723}
724
725void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock)
726{
727 oplock &= 0xF;
728
729 if (oplock == OPLOCK_EXCLUSIVE) {
730 cinode->clientCanCacheAll = true;
731 cinode->clientCanCacheRead = true;
732 cFYI(1, "Exclusive Oplock granted on inode %p",
733 &cinode->vfs_inode);
734 } else if (oplock == OPLOCK_READ) {
735 cinode->clientCanCacheAll = false;
736 cinode->clientCanCacheRead = true;
737 cFYI(1, "Level II Oplock granted on inode %p",
738 &cinode->vfs_inode);
739 } else {
740 cinode->clientCanCacheAll = false;
741 cinode->clientCanCacheRead = false;
742 }
743}
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 2a11efd96592..7b01d3f6eed6 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -32,9 +32,6 @@
32#include <linux/slab.h> 32#include <linux/slab.h>
33#include "cifs_spnego.h" 33#include "cifs_spnego.h"
34 34
35extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8,
36 unsigned char *p24);
37
38/* 35/*
39 * Checks if this is the first smb session to be reconnected after 36 * Checks if this is the first smb session to be reconnected after
40 * the socket has been reestablished (so we know whether to use vc 0). 37 * the socket has been reestablished (so we know whether to use vc 0).
@@ -402,23 +399,22 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
402 return -EINVAL; 399 return -EINVAL;
403 } 400 }
404 401
405 memcpy(ses->cryptKey, pblob->Challenge, CIFS_CRYPTO_KEY_SIZE); 402 memcpy(ses->ntlmssp->cryptkey, pblob->Challenge, CIFS_CRYPTO_KEY_SIZE);
406 /* BB we could decode pblob->NegotiateFlags; some may be useful */ 403 /* BB we could decode pblob->NegotiateFlags; some may be useful */
407 /* In particular we can examine sign flags */ 404 /* In particular we can examine sign flags */
408 /* BB spec says that if AvId field of MsvAvTimestamp is populated then 405 /* BB spec says that if AvId field of MsvAvTimestamp is populated then
409 we must set the MIC field of the AUTHENTICATE_MESSAGE */ 406 we must set the MIC field of the AUTHENTICATE_MESSAGE */
410 407 ses->ntlmssp->server_flags = le32_to_cpu(pblob->NegotiateFlags);
411 tioffset = cpu_to_le16(pblob->TargetInfoArray.BufferOffset); 408 tioffset = cpu_to_le16(pblob->TargetInfoArray.BufferOffset);
412 tilen = cpu_to_le16(pblob->TargetInfoArray.Length); 409 tilen = cpu_to_le16(pblob->TargetInfoArray.Length);
413 ses->tilen = tilen; 410 if (tilen) {
414 if (ses->tilen) { 411 ses->auth_key.response = kmalloc(tilen, GFP_KERNEL);
415 ses->tiblob = kmalloc(tilen, GFP_KERNEL); 412 if (!ses->auth_key.response) {
416 if (!ses->tiblob) {
417 cERROR(1, "Challenge target info allocation failure"); 413 cERROR(1, "Challenge target info allocation failure");
418 ses->tilen = 0;
419 return -ENOMEM; 414 return -ENOMEM;
420 } 415 }
421 memcpy(ses->tiblob, bcc_ptr + tioffset, ses->tilen); 416 memcpy(ses->auth_key.response, bcc_ptr + tioffset, tilen);
417 ses->auth_key.len = tilen;
422 } 418 }
423 419
424 return 0; 420 return 0;
@@ -443,10 +439,12 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
443 NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | 439 NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
444 NTLMSSP_NEGOTIATE_NTLM; 440 NTLMSSP_NEGOTIATE_NTLM;
445 if (ses->server->secMode & 441 if (ses->server->secMode &
446 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 442 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
447 flags |= NTLMSSP_NEGOTIATE_SIGN; 443 flags |= NTLMSSP_NEGOTIATE_SIGN;
448 if (ses->server->secMode & SECMODE_SIGN_REQUIRED) 444 if (!ses->server->session_estab)
449 flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN; 445 flags |= NTLMSSP_NEGOTIATE_KEY_XCH |
446 NTLMSSP_NEGOTIATE_EXTENDED_SEC;
447 }
450 448
451 sec_blob->NegotiateFlags |= cpu_to_le32(flags); 449 sec_blob->NegotiateFlags |= cpu_to_le32(flags);
452 450
@@ -469,11 +467,9 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
469 const struct nls_table *nls_cp) 467 const struct nls_table *nls_cp)
470{ 468{
471 int rc; 469 int rc;
472 unsigned int size;
473 AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer; 470 AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer;
474 __u32 flags; 471 __u32 flags;
475 unsigned char *tmp; 472 unsigned char *tmp;
476 struct ntlmv2_resp ntlmv2_response = {};
477 473
478 memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8); 474 memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
479 sec_blob->MessageType = NtLmAuthenticate; 475 sec_blob->MessageType = NtLmAuthenticate;
@@ -497,25 +493,19 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
497 sec_blob->LmChallengeResponse.MaximumLength = 0; 493 sec_blob->LmChallengeResponse.MaximumLength = 0;
498 494
499 sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer); 495 sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer);
500 rc = setup_ntlmv2_rsp(ses, (char *)&ntlmv2_response, nls_cp); 496 rc = setup_ntlmv2_rsp(ses, nls_cp);
501 if (rc) { 497 if (rc) {
502 cERROR(1, "Error %d during NTLMSSP authentication", rc); 498 cERROR(1, "Error %d during NTLMSSP authentication", rc);
503 goto setup_ntlmv2_ret; 499 goto setup_ntlmv2_ret;
504 } 500 }
505 size = sizeof(struct ntlmv2_resp); 501 memcpy(tmp, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
506 memcpy(tmp, (char *)&ntlmv2_response, size); 502 ses->auth_key.len - CIFS_SESS_KEY_SIZE);
507 tmp += size; 503 tmp += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
508 if (ses->tilen > 0) {
509 memcpy(tmp, ses->tiblob, ses->tilen);
510 tmp += ses->tilen;
511 }
512 504
513 sec_blob->NtChallengeResponse.Length = cpu_to_le16(size + ses->tilen); 505 sec_blob->NtChallengeResponse.Length =
506 cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
514 sec_blob->NtChallengeResponse.MaximumLength = 507 sec_blob->NtChallengeResponse.MaximumLength =
515 cpu_to_le16(size + ses->tilen); 508 cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
516 kfree(ses->tiblob);
517 ses->tiblob = NULL;
518 ses->tilen = 0;
519 509
520 if (ses->domainName == NULL) { 510 if (ses->domainName == NULL) {
521 sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); 511 sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
@@ -554,9 +544,19 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
554 sec_blob->WorkstationName.MaximumLength = 0; 544 sec_blob->WorkstationName.MaximumLength = 0;
555 tmp += 2; 545 tmp += 2;
556 546
557 sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer); 547 if ((ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_KEY_XCH) &&
558 sec_blob->SessionKey.Length = 0; 548 !calc_seckey(ses)) {
559 sec_blob->SessionKey.MaximumLength = 0; 549 memcpy(tmp, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE);
550 sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
551 sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE);
552 sec_blob->SessionKey.MaximumLength =
553 cpu_to_le16(CIFS_CPHTXT_SIZE);
554 tmp += CIFS_CPHTXT_SIZE;
555 } else {
556 sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
557 sec_blob->SessionKey.Length = 0;
558 sec_blob->SessionKey.MaximumLength = 0;
559 }
560 560
561setup_ntlmv2_ret: 561setup_ntlmv2_ret:
562 *buflen = tmp - pbuffer; 562 *buflen = tmp - pbuffer;
@@ -600,8 +600,16 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses,
600 return -EINVAL; 600 return -EINVAL;
601 601
602 type = ses->server->secType; 602 type = ses->server->secType;
603
604 cFYI(1, "sess setup type %d", type); 603 cFYI(1, "sess setup type %d", type);
604 if (type == RawNTLMSSP) {
605 /* if memory allocation is successful, caller of this function
606 * frees it.
607 */
608 ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL);
609 if (!ses->ntlmssp)
610 return -ENOMEM;
611 }
612
605ssetup_ntlmssp_authenticate: 613ssetup_ntlmssp_authenticate:
606 if (phase == NtLmChallenge) 614 if (phase == NtLmChallenge)
607 phase = NtLmAuthenticate; /* if ntlmssp, now final phase */ 615 phase = NtLmAuthenticate; /* if ntlmssp, now final phase */
@@ -666,10 +674,14 @@ ssetup_ntlmssp_authenticate:
666 /* no capabilities flags in old lanman negotiation */ 674 /* no capabilities flags in old lanman negotiation */
667 675
668 pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); 676 pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE);
669 /* BB calculate hash with password */
670 /* and copy into bcc */
671 677
672 calc_lanman_hash(ses->password, ses->cryptKey, 678 /* Calculate hash with password and copy into bcc_ptr.
679 * Encryption Key (stored as in cryptkey) gets used if the
680 * security mode bit in Negottiate Protocol response states
681 * to use challenge/response method (i.e. Password bit is 1).
682 */
683
684 calc_lanman_hash(ses->password, ses->server->cryptkey,
673 ses->server->secMode & SECMODE_PW_ENCRYPT ? 685 ses->server->secMode & SECMODE_PW_ENCRYPT ?
674 true : false, lnm_session_key); 686 true : false, lnm_session_key);
675 687
@@ -687,24 +699,27 @@ ssetup_ntlmssp_authenticate:
687 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); 699 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
688#endif 700#endif
689 } else if (type == NTLM) { 701 } else if (type == NTLM) {
690 char ntlm_session_key[CIFS_SESS_KEY_SIZE];
691
692 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities); 702 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
693 pSMB->req_no_secext.CaseInsensitivePasswordLength = 703 pSMB->req_no_secext.CaseInsensitivePasswordLength =
694 cpu_to_le16(CIFS_SESS_KEY_SIZE); 704 cpu_to_le16(CIFS_AUTH_RESP_SIZE);
695 pSMB->req_no_secext.CaseSensitivePasswordLength = 705 pSMB->req_no_secext.CaseSensitivePasswordLength =
696 cpu_to_le16(CIFS_SESS_KEY_SIZE); 706 cpu_to_le16(CIFS_AUTH_RESP_SIZE);
707
708 /* calculate ntlm response and session key */
709 rc = setup_ntlm_response(ses);
710 if (rc) {
711 cERROR(1, "Error %d during NTLM authentication", rc);
712 goto ssetup_exit;
713 }
697 714
698 /* calculate session key */ 715 /* copy ntlm response */
699 SMBNTencrypt(ses->password, ses->cryptKey, ntlm_session_key); 716 memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
717 CIFS_AUTH_RESP_SIZE);
718 bcc_ptr += CIFS_AUTH_RESP_SIZE;
719 memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
720 CIFS_AUTH_RESP_SIZE);
721 bcc_ptr += CIFS_AUTH_RESP_SIZE;
700 722
701 cifs_calculate_session_key(&ses->auth_key,
702 ntlm_session_key, ses->password);
703 /* copy session key */
704 memcpy(bcc_ptr, (char *)ntlm_session_key, CIFS_SESS_KEY_SIZE);
705 bcc_ptr += CIFS_SESS_KEY_SIZE;
706 memcpy(bcc_ptr, (char *)ntlm_session_key, CIFS_SESS_KEY_SIZE);
707 bcc_ptr += CIFS_SESS_KEY_SIZE;
708 if (ses->capabilities & CAP_UNICODE) { 723 if (ses->capabilities & CAP_UNICODE) {
709 /* unicode strings must be word aligned */ 724 /* unicode strings must be word aligned */
710 if (iov[0].iov_len % 2) { 725 if (iov[0].iov_len % 2) {
@@ -715,47 +730,26 @@ ssetup_ntlmssp_authenticate:
715 } else 730 } else
716 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); 731 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
717 } else if (type == NTLMv2) { 732 } else if (type == NTLMv2) {
718 char *v2_sess_key =
719 kmalloc(sizeof(struct ntlmv2_resp), GFP_KERNEL);
720
721 /* BB FIXME change all users of v2_sess_key to
722 struct ntlmv2_resp */
723
724 if (v2_sess_key == NULL) {
725 rc = -ENOMEM;
726 goto ssetup_exit;
727 }
728
729 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities); 733 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
730 734
731 /* LM2 password would be here if we supported it */ 735 /* LM2 password would be here if we supported it */
732 pSMB->req_no_secext.CaseInsensitivePasswordLength = 0; 736 pSMB->req_no_secext.CaseInsensitivePasswordLength = 0;
733 /* cpu_to_le16(LM2_SESS_KEY_SIZE); */
734 737
735 /* calculate session key */ 738 /* calculate nlmv2 response and session key */
736 rc = setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp); 739 rc = setup_ntlmv2_rsp(ses, nls_cp);
737 if (rc) { 740 if (rc) {
738 cERROR(1, "Error %d during NTLMv2 authentication", rc); 741 cERROR(1, "Error %d during NTLMv2 authentication", rc);
739 kfree(v2_sess_key);
740 goto ssetup_exit; 742 goto ssetup_exit;
741 } 743 }
742 memcpy(bcc_ptr, (char *)v2_sess_key, 744 memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
743 sizeof(struct ntlmv2_resp)); 745 ses->auth_key.len - CIFS_SESS_KEY_SIZE);
744 bcc_ptr += sizeof(struct ntlmv2_resp); 746 bcc_ptr += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
745 kfree(v2_sess_key); 747
746 /* set case sensitive password length after tilen may get 748 /* set case sensitive password length after tilen may get
747 * assigned, tilen is 0 otherwise. 749 * assigned, tilen is 0 otherwise.
748 */ 750 */
749 pSMB->req_no_secext.CaseSensitivePasswordLength = 751 pSMB->req_no_secext.CaseSensitivePasswordLength =
750 cpu_to_le16(sizeof(struct ntlmv2_resp) + ses->tilen); 752 cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
751 if (ses->tilen > 0) {
752 memcpy(bcc_ptr, ses->tiblob, ses->tilen);
753 bcc_ptr += ses->tilen;
754 /* we never did allocate ses->domainName to free */
755 kfree(ses->tiblob);
756 ses->tiblob = NULL;
757 ses->tilen = 0;
758 }
759 753
760 if (ses->capabilities & CAP_UNICODE) { 754 if (ses->capabilities & CAP_UNICODE) {
761 if (iov[0].iov_len % 2) { 755 if (iov[0].iov_len % 2) {
@@ -768,6 +762,7 @@ ssetup_ntlmssp_authenticate:
768 } else if (type == Kerberos) { 762 } else if (type == Kerberos) {
769#ifdef CONFIG_CIFS_UPCALL 763#ifdef CONFIG_CIFS_UPCALL
770 struct cifs_spnego_msg *msg; 764 struct cifs_spnego_msg *msg;
765
771 spnego_key = cifs_get_spnego_key(ses); 766 spnego_key = cifs_get_spnego_key(ses);
772 if (IS_ERR(spnego_key)) { 767 if (IS_ERR(spnego_key)) {
773 rc = PTR_ERR(spnego_key); 768 rc = PTR_ERR(spnego_key);
@@ -785,16 +780,17 @@ ssetup_ntlmssp_authenticate:
785 rc = -EKEYREJECTED; 780 rc = -EKEYREJECTED;
786 goto ssetup_exit; 781 goto ssetup_exit;
787 } 782 }
788 /* bail out if key is too long */ 783
789 if (msg->sesskey_len > 784 ses->auth_key.response = kmalloc(msg->sesskey_len, GFP_KERNEL);
790 sizeof(ses->auth_key.data.krb5)) { 785 if (!ses->auth_key.response) {
791 cERROR(1, "Kerberos signing key too long (%u bytes)", 786 cERROR(1, "Kerberos can't allocate (%u bytes) memory",
792 msg->sesskey_len); 787 msg->sesskey_len);
793 rc = -EOVERFLOW; 788 rc = -ENOMEM;
794 goto ssetup_exit; 789 goto ssetup_exit;
795 } 790 }
791 memcpy(ses->auth_key.response, msg->data, msg->sesskey_len);
796 ses->auth_key.len = msg->sesskey_len; 792 ses->auth_key.len = msg->sesskey_len;
797 memcpy(ses->auth_key.data.krb5, msg->data, msg->sesskey_len); 793
798 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; 794 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
799 capabilities |= CAP_EXTENDED_SECURITY; 795 capabilities |= CAP_EXTENDED_SECURITY;
800 pSMB->req.Capabilities = cpu_to_le32(capabilities); 796 pSMB->req.Capabilities = cpu_to_le32(capabilities);
@@ -897,8 +893,6 @@ ssetup_ntlmssp_authenticate:
897 CIFS_STD_OP /* not long */ | CIFS_LOG_ERROR); 893 CIFS_STD_OP /* not long */ | CIFS_LOG_ERROR);
898 /* SMB request buf freed in SendReceive2 */ 894 /* SMB request buf freed in SendReceive2 */
899 895
900 cFYI(1, "ssetup rc from sendrecv2 is %d", rc);
901
902 pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base; 896 pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base;
903 smb_buf = (struct smb_hdr *)iov[0].iov_base; 897 smb_buf = (struct smb_hdr *)iov[0].iov_base;
904 898
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index a66c91eb6eb4..e0588cdf4cc5 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -543,7 +543,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
543 (ses->server->secMode & (SECMODE_SIGN_REQUIRED | 543 (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
544 SECMODE_SIGN_ENABLED))) { 544 SECMODE_SIGN_ENABLED))) {
545 rc = cifs_verify_signature(midQ->resp_buf, 545 rc = cifs_verify_signature(midQ->resp_buf,
546 &ses->server->session_key, 546 ses->server,
547 midQ->sequence_number+1); 547 midQ->sequence_number+1);
548 if (rc) { 548 if (rc) {
549 cERROR(1, "Unexpected SMB signature"); 549 cERROR(1, "Unexpected SMB signature");
@@ -731,7 +731,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
731 (ses->server->secMode & (SECMODE_SIGN_REQUIRED | 731 (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
732 SECMODE_SIGN_ENABLED))) { 732 SECMODE_SIGN_ENABLED))) {
733 rc = cifs_verify_signature(out_buf, 733 rc = cifs_verify_signature(out_buf,
734 &ses->server->session_key, 734 ses->server,
735 midQ->sequence_number+1); 735 midQ->sequence_number+1);
736 if (rc) { 736 if (rc) {
737 cERROR(1, "Unexpected SMB signature"); 737 cERROR(1, "Unexpected SMB signature");
@@ -981,7 +981,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
981 (ses->server->secMode & (SECMODE_SIGN_REQUIRED | 981 (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
982 SECMODE_SIGN_ENABLED))) { 982 SECMODE_SIGN_ENABLED))) {
983 rc = cifs_verify_signature(out_buf, 983 rc = cifs_verify_signature(out_buf,
984 &ses->server->session_key, 984 ses->server,
985 midQ->sequence_number+1); 985 midQ->sequence_number+1);
986 if (rc) { 986 if (rc) {
987 cERROR(1, "Unexpected SMB signature"); 987 cERROR(1, "Unexpected SMB signature");
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 7993b96ca348..5ea57c8c7f97 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -306,16 +306,16 @@ static int coda_statfs(struct dentry *dentry, struct kstatfs *buf)
306 306
307/* init_coda: used by filesystems.c to register coda */ 307/* init_coda: used by filesystems.c to register coda */
308 308
309static int coda_get_sb(struct file_system_type *fs_type, 309static struct dentry *coda_mount(struct file_system_type *fs_type,
310 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 310 int flags, const char *dev_name, void *data)
311{ 311{
312 return get_sb_nodev(fs_type, flags, data, coda_fill_super, mnt); 312 return mount_nodev(fs_type, flags, data, coda_fill_super);
313} 313}
314 314
315struct file_system_type coda_fs_type = { 315struct file_system_type coda_fs_type = {
316 .owner = THIS_MODULE, 316 .owner = THIS_MODULE,
317 .name = "coda", 317 .name = "coda",
318 .get_sb = coda_get_sb, 318 .mount = coda_mount,
319 .kill_sb = kill_anon_super, 319 .kill_sb = kill_anon_super,
320 .fs_flags = FS_BINARY_MOUNTDATA, 320 .fs_flags = FS_BINARY_MOUNTDATA,
321}; 321};
diff --git a/fs/compat.c b/fs/compat.c
index 52cfeb61da77..c580c322fa6b 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -49,6 +49,7 @@
49#include <linux/eventpoll.h> 49#include <linux/eventpoll.h>
50#include <linux/fs_struct.h> 50#include <linux/fs_struct.h>
51#include <linux/slab.h> 51#include <linux/slab.h>
52#include <linux/pagemap.h>
52 53
53#include <asm/uaccess.h> 54#include <asm/uaccess.h>
54#include <asm/mmu_context.h> 55#include <asm/mmu_context.h>
@@ -606,14 +607,14 @@ ssize_t compat_rw_copy_check_uvector(int type,
606 /* 607 /*
607 * Single unix specification: 608 * Single unix specification:
608 * We should -EINVAL if an element length is not >= 0 and fitting an 609 * We should -EINVAL if an element length is not >= 0 and fitting an
609 * ssize_t. The total length is fitting an ssize_t 610 * ssize_t.
610 * 611 *
611 * Be careful here because iov_len is a size_t not an ssize_t 612 * In Linux, the total length is limited to MAX_RW_COUNT, there is
613 * no overflow possibility.
612 */ 614 */
613 tot_len = 0; 615 tot_len = 0;
614 ret = -EINVAL; 616 ret = -EINVAL;
615 for (seg = 0; seg < nr_segs; seg++) { 617 for (seg = 0; seg < nr_segs; seg++) {
616 compat_ssize_t tmp = tot_len;
617 compat_uptr_t buf; 618 compat_uptr_t buf;
618 compat_ssize_t len; 619 compat_ssize_t len;
619 620
@@ -624,13 +625,13 @@ ssize_t compat_rw_copy_check_uvector(int type,
624 } 625 }
625 if (len < 0) /* size_t not fitting in compat_ssize_t .. */ 626 if (len < 0) /* size_t not fitting in compat_ssize_t .. */
626 goto out; 627 goto out;
627 tot_len += len;
628 if (tot_len < tmp) /* maths overflow on the compat_ssize_t */
629 goto out;
630 if (!access_ok(vrfy_dir(type), compat_ptr(buf), len)) { 628 if (!access_ok(vrfy_dir(type), compat_ptr(buf), len)) {
631 ret = -EFAULT; 629 ret = -EFAULT;
632 goto out; 630 goto out;
633 } 631 }
632 if (len > MAX_RW_COUNT - tot_len)
633 len = MAX_RW_COUNT - tot_len;
634 tot_len += len;
634 iov->iov_base = compat_ptr(buf); 635 iov->iov_base = compat_ptr(buf);
635 iov->iov_len = (compat_size_t) len; 636 iov->iov_len = (compat_size_t) len;
636 uvector++; 637 uvector++;
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index 8c8d64230c2d..7d3607febe1c 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -104,16 +104,16 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent)
104 return 0; 104 return 0;
105} 105}
106 106
107static int configfs_get_sb(struct file_system_type *fs_type, 107static struct dentry *configfs_do_mount(struct file_system_type *fs_type,
108 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 108 int flags, const char *dev_name, void *data)
109{ 109{
110 return get_sb_single(fs_type, flags, data, configfs_fill_super, mnt); 110 return mount_single(fs_type, flags, data, configfs_fill_super);
111} 111}
112 112
113static struct file_system_type configfs_fs_type = { 113static struct file_system_type configfs_fs_type = {
114 .owner = THIS_MODULE, 114 .owner = THIS_MODULE,
115 .name = "configfs", 115 .name = "configfs",
116 .get_sb = configfs_get_sb, 116 .mount = configfs_do_mount,
117 .kill_sb = kill_litter_super, 117 .kill_sb = kill_litter_super,
118}; 118};
119 119
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 1e7a33028d33..32fd5fe9ca0e 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -533,17 +533,16 @@ static const struct super_operations cramfs_ops = {
533 .statfs = cramfs_statfs, 533 .statfs = cramfs_statfs,
534}; 534};
535 535
536static int cramfs_get_sb(struct file_system_type *fs_type, 536static struct dentry *cramfs_mount(struct file_system_type *fs_type,
537 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 537 int flags, const char *dev_name, void *data)
538{ 538{
539 return get_sb_bdev(fs_type, flags, dev_name, data, cramfs_fill_super, 539 return mount_bdev(fs_type, flags, dev_name, data, cramfs_fill_super);
540 mnt);
541} 540}
542 541
543static struct file_system_type cramfs_fs_type = { 542static struct file_system_type cramfs_fs_type = {
544 .owner = THIS_MODULE, 543 .owner = THIS_MODULE,
545 .name = "cramfs", 544 .name = "cramfs",
546 .get_sb = cramfs_get_sb, 545 .mount = cramfs_mount,
547 .kill_sb = kill_block_super, 546 .kill_sb = kill_block_super,
548 .fs_flags = FS_REQUIRES_DEV, 547 .fs_flags = FS_REQUIRES_DEV,
549}; 548};
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index a4ed8380e98a..37a8ca7c1222 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -135,17 +135,17 @@ static int debug_fill_super(struct super_block *sb, void *data, int silent)
135 return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files); 135 return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files);
136} 136}
137 137
138static int debug_get_sb(struct file_system_type *fs_type, 138static struct dentry *debug_mount(struct file_system_type *fs_type,
139 int flags, const char *dev_name, 139 int flags, const char *dev_name,
140 void *data, struct vfsmount *mnt) 140 void *data)
141{ 141{
142 return get_sb_single(fs_type, flags, data, debug_fill_super, mnt); 142 return mount_single(fs_type, flags, data, debug_fill_super);
143} 143}
144 144
145static struct file_system_type debug_fs_type = { 145static struct file_system_type debug_fs_type = {
146 .owner = THIS_MODULE, 146 .owner = THIS_MODULE,
147 .name = "debugfs", 147 .name = "debugfs",
148 .get_sb = debug_get_sb, 148 .mount = debug_mount,
149 .kill_sb = kill_litter_super, 149 .kill_sb = kill_litter_super,
150}; 150};
151 151
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 8b3ffd5b5235..1bb547c9cad6 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -331,7 +331,7 @@ static int compare_init_pts_sb(struct super_block *s, void *p)
331} 331}
332 332
333/* 333/*
334 * devpts_get_sb() 334 * devpts_mount()
335 * 335 *
336 * If the '-o newinstance' mount option was specified, mount a new 336 * If the '-o newinstance' mount option was specified, mount a new
337 * (private) instance of devpts. PTYs created in this instance are 337 * (private) instance of devpts. PTYs created in this instance are
@@ -345,20 +345,20 @@ static int compare_init_pts_sb(struct super_block *s, void *p)
345 * semantics in devpts while preserving backward compatibility of the 345 * semantics in devpts while preserving backward compatibility of the
346 * current 'single-namespace' semantics. i.e all mounts of devpts 346 * current 'single-namespace' semantics. i.e all mounts of devpts
347 * without the 'newinstance' mount option should bind to the initial 347 * without the 'newinstance' mount option should bind to the initial
348 * kernel mount, like get_sb_single(). 348 * kernel mount, like mount_single().
349 * 349 *
350 * Mounts with 'newinstance' option create a new, private namespace. 350 * Mounts with 'newinstance' option create a new, private namespace.
351 * 351 *
352 * NOTE: 352 * NOTE:
353 * 353 *
354 * For single-mount semantics, devpts cannot use get_sb_single(), 354 * For single-mount semantics, devpts cannot use mount_single(),
355 * because get_sb_single()/sget() find and use the super-block from 355 * because mount_single()/sget() find and use the super-block from
356 * the most recent mount of devpts. But that recent mount may be a 356 * the most recent mount of devpts. But that recent mount may be a
357 * 'newinstance' mount and get_sb_single() would pick the newinstance 357 * 'newinstance' mount and mount_single() would pick the newinstance
358 * super-block instead of the initial super-block. 358 * super-block instead of the initial super-block.
359 */ 359 */
360static int devpts_get_sb(struct file_system_type *fs_type, 360static struct dentry *devpts_mount(struct file_system_type *fs_type,
361 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 361 int flags, const char *dev_name, void *data)
362{ 362{
363 int error; 363 int error;
364 struct pts_mount_opts opts; 364 struct pts_mount_opts opts;
@@ -366,7 +366,7 @@ static int devpts_get_sb(struct file_system_type *fs_type,
366 366
367 error = parse_mount_options(data, PARSE_MOUNT, &opts); 367 error = parse_mount_options(data, PARSE_MOUNT, &opts);
368 if (error) 368 if (error)
369 return error; 369 return ERR_PTR(error);
370 370
371 if (opts.newinstance) 371 if (opts.newinstance)
372 s = sget(fs_type, NULL, set_anon_super, NULL); 372 s = sget(fs_type, NULL, set_anon_super, NULL);
@@ -374,7 +374,7 @@ static int devpts_get_sb(struct file_system_type *fs_type,
374 s = sget(fs_type, compare_init_pts_sb, set_anon_super, NULL); 374 s = sget(fs_type, compare_init_pts_sb, set_anon_super, NULL);
375 375
376 if (IS_ERR(s)) 376 if (IS_ERR(s))
377 return PTR_ERR(s); 377 return ERR_CAST(s);
378 378
379 if (!s->s_root) { 379 if (!s->s_root) {
380 s->s_flags = flags; 380 s->s_flags = flags;
@@ -390,13 +390,11 @@ static int devpts_get_sb(struct file_system_type *fs_type,
390 if (error) 390 if (error)
391 goto out_undo_sget; 391 goto out_undo_sget;
392 392
393 simple_set_mnt(mnt, s); 393 return dget(s->s_root);
394
395 return 0;
396 394
397out_undo_sget: 395out_undo_sget:
398 deactivate_locked_super(s); 396 deactivate_locked_super(s);
399 return error; 397 return ERR_PTR(error);
400} 398}
401 399
402#else 400#else
@@ -404,10 +402,10 @@ out_undo_sget:
404 * This supports only the legacy single-instance semantics (no 402 * This supports only the legacy single-instance semantics (no
405 * multiple-instance semantics) 403 * multiple-instance semantics)
406 */ 404 */
407static int devpts_get_sb(struct file_system_type *fs_type, int flags, 405static struct dentry *devpts_mount(struct file_system_type *fs_type, int flags,
408 const char *dev_name, void *data, struct vfsmount *mnt) 406 const char *dev_name, void *data)
409{ 407{
410 return get_sb_single(fs_type, flags, data, devpts_fill_super, mnt); 408 return mount_single(fs_type, flags, data, devpts_fill_super);
411} 409}
412#endif 410#endif
413 411
@@ -421,7 +419,7 @@ static void devpts_kill_sb(struct super_block *sb)
421 419
422static struct file_system_type devpts_fs_type = { 420static struct file_system_type devpts_fs_type = {
423 .name = "devpts", 421 .name = "devpts",
424 .get_sb = devpts_get_sb, 422 .mount = devpts_mount,
425 .kill_sb = devpts_kill_sb, 423 .kill_sb = devpts_kill_sb,
426}; 424};
427 425
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 40186b959429..413a3c48f0bb 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -377,6 +377,7 @@ struct ecryptfs_mount_crypt_stat {
377#define ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES 0x00000010 377#define ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES 0x00000010
378#define ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK 0x00000020 378#define ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK 0x00000020
379#define ECRYPTFS_GLOBAL_ENCFN_USE_FEK 0x00000040 379#define ECRYPTFS_GLOBAL_ENCFN_USE_FEK 0x00000040
380#define ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY 0x00000080
380 u32 flags; 381 u32 flags;
381 struct list_head global_auth_tok_list; 382 struct list_head global_auth_tok_list;
382 struct mutex global_auth_tok_list_mutex; 383 struct mutex global_auth_tok_list_mutex;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 3fbc94203380..9d1a22d62765 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -32,6 +32,7 @@
32#include <linux/crypto.h> 32#include <linux/crypto.h>
33#include <linux/fs_stack.h> 33#include <linux/fs_stack.h>
34#include <linux/slab.h> 34#include <linux/slab.h>
35#include <linux/xattr.h>
35#include <asm/unaligned.h> 36#include <asm/unaligned.h>
36#include "ecryptfs_kernel.h" 37#include "ecryptfs_kernel.h"
37 38
@@ -70,15 +71,19 @@ ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
70 struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); 71 struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
71 struct dentry *dentry_save; 72 struct dentry *dentry_save;
72 struct vfsmount *vfsmount_save; 73 struct vfsmount *vfsmount_save;
74 unsigned int flags_save;
73 int rc; 75 int rc;
74 76
75 dentry_save = nd->path.dentry; 77 dentry_save = nd->path.dentry;
76 vfsmount_save = nd->path.mnt; 78 vfsmount_save = nd->path.mnt;
79 flags_save = nd->flags;
77 nd->path.dentry = lower_dentry; 80 nd->path.dentry = lower_dentry;
78 nd->path.mnt = lower_mnt; 81 nd->path.mnt = lower_mnt;
82 nd->flags &= ~LOOKUP_OPEN;
79 rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd); 83 rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd);
80 nd->path.dentry = dentry_save; 84 nd->path.dentry = dentry_save;
81 nd->path.mnt = vfsmount_save; 85 nd->path.mnt = vfsmount_save;
86 nd->flags = flags_save;
82 return rc; 87 return rc;
83} 88}
84 89
@@ -1108,10 +1113,8 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
1108 rc = -EOPNOTSUPP; 1113 rc = -EOPNOTSUPP;
1109 goto out; 1114 goto out;
1110 } 1115 }
1111 mutex_lock(&lower_dentry->d_inode->i_mutex); 1116
1112 rc = lower_dentry->d_inode->i_op->setxattr(lower_dentry, name, value, 1117 rc = vfs_setxattr(lower_dentry, name, value, size, flags);
1113 size, flags);
1114 mutex_unlock(&lower_dentry->d_inode->i_mutex);
1115out: 1118out:
1116 return rc; 1119 return rc;
1117} 1120}
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 73811cfa2ea4..b1f6858a5223 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -446,6 +446,7 @@ out:
446 */ 446 */
447static int 447static int
448ecryptfs_find_auth_tok_for_sig( 448ecryptfs_find_auth_tok_for_sig(
449 struct key **auth_tok_key,
449 struct ecryptfs_auth_tok **auth_tok, 450 struct ecryptfs_auth_tok **auth_tok,
450 struct ecryptfs_mount_crypt_stat *mount_crypt_stat, 451 struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
451 char *sig) 452 char *sig)
@@ -453,12 +454,21 @@ ecryptfs_find_auth_tok_for_sig(
453 struct ecryptfs_global_auth_tok *global_auth_tok; 454 struct ecryptfs_global_auth_tok *global_auth_tok;
454 int rc = 0; 455 int rc = 0;
455 456
457 (*auth_tok_key) = NULL;
456 (*auth_tok) = NULL; 458 (*auth_tok) = NULL;
457 if (ecryptfs_find_global_auth_tok_for_sig(&global_auth_tok, 459 if (ecryptfs_find_global_auth_tok_for_sig(&global_auth_tok,
458 mount_crypt_stat, sig)) { 460 mount_crypt_stat, sig)) {
459 struct key *auth_tok_key;
460 461
461 rc = ecryptfs_keyring_auth_tok_for_sig(&auth_tok_key, auth_tok, 462 /* if the flag ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY is set in the
463 * mount_crypt_stat structure, we prevent to use auth toks that
464 * are not inserted through the ecryptfs_add_global_auth_tok
465 * function.
466 */
467 if (mount_crypt_stat->flags
468 & ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY)
469 return -EINVAL;
470
471 rc = ecryptfs_keyring_auth_tok_for_sig(auth_tok_key, auth_tok,
462 sig); 472 sig);
463 } else 473 } else
464 (*auth_tok) = global_auth_tok->global_auth_tok; 474 (*auth_tok) = global_auth_tok->global_auth_tok;
@@ -509,6 +519,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
509 char *filename, size_t filename_size) 519 char *filename, size_t filename_size)
510{ 520{
511 struct ecryptfs_write_tag_70_packet_silly_stack *s; 521 struct ecryptfs_write_tag_70_packet_silly_stack *s;
522 struct key *auth_tok_key = NULL;
512 int rc = 0; 523 int rc = 0;
513 524
514 s = kmalloc(sizeof(*s), GFP_KERNEL); 525 s = kmalloc(sizeof(*s), GFP_KERNEL);
@@ -606,6 +617,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
606 } 617 }
607 dest[s->i++] = s->cipher_code; 618 dest[s->i++] = s->cipher_code;
608 rc = ecryptfs_find_auth_tok_for_sig( 619 rc = ecryptfs_find_auth_tok_for_sig(
620 &auth_tok_key,
609 &s->auth_tok, mount_crypt_stat, 621 &s->auth_tok, mount_crypt_stat,
610 mount_crypt_stat->global_default_fnek_sig); 622 mount_crypt_stat->global_default_fnek_sig);
611 if (rc) { 623 if (rc) {
@@ -753,6 +765,8 @@ out_free_unlock:
753out_unlock: 765out_unlock:
754 mutex_unlock(s->tfm_mutex); 766 mutex_unlock(s->tfm_mutex);
755out: 767out:
768 if (auth_tok_key)
769 key_put(auth_tok_key);
756 kfree(s); 770 kfree(s);
757 return rc; 771 return rc;
758} 772}
@@ -798,6 +812,7 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
798 char *data, size_t max_packet_size) 812 char *data, size_t max_packet_size)
799{ 813{
800 struct ecryptfs_parse_tag_70_packet_silly_stack *s; 814 struct ecryptfs_parse_tag_70_packet_silly_stack *s;
815 struct key *auth_tok_key = NULL;
801 int rc = 0; 816 int rc = 0;
802 817
803 (*packet_size) = 0; 818 (*packet_size) = 0;
@@ -910,7 +925,8 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
910 * >= ECRYPTFS_MAX_IV_BYTES. */ 925 * >= ECRYPTFS_MAX_IV_BYTES. */
911 memset(s->iv, 0, ECRYPTFS_MAX_IV_BYTES); 926 memset(s->iv, 0, ECRYPTFS_MAX_IV_BYTES);
912 s->desc.info = s->iv; 927 s->desc.info = s->iv;
913 rc = ecryptfs_find_auth_tok_for_sig(&s->auth_tok, mount_crypt_stat, 928 rc = ecryptfs_find_auth_tok_for_sig(&auth_tok_key,
929 &s->auth_tok, mount_crypt_stat,
914 s->fnek_sig_hex); 930 s->fnek_sig_hex);
915 if (rc) { 931 if (rc) {
916 printk(KERN_ERR "%s: Error attempting to find auth tok for " 932 printk(KERN_ERR "%s: Error attempting to find auth tok for "
@@ -986,6 +1002,8 @@ out:
986 (*filename_size) = 0; 1002 (*filename_size) = 0;
987 (*filename) = NULL; 1003 (*filename) = NULL;
988 } 1004 }
1005 if (auth_tok_key)
1006 key_put(auth_tok_key);
989 kfree(s); 1007 kfree(s);
990 return rc; 1008 return rc;
991} 1009}
@@ -1557,14 +1575,19 @@ int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key,
1557 ECRYPTFS_VERSION_MAJOR, 1575 ECRYPTFS_VERSION_MAJOR,
1558 ECRYPTFS_VERSION_MINOR); 1576 ECRYPTFS_VERSION_MINOR);
1559 rc = -EINVAL; 1577 rc = -EINVAL;
1560 goto out; 1578 goto out_release_key;
1561 } 1579 }
1562 if ((*auth_tok)->token_type != ECRYPTFS_PASSWORD 1580 if ((*auth_tok)->token_type != ECRYPTFS_PASSWORD
1563 && (*auth_tok)->token_type != ECRYPTFS_PRIVATE_KEY) { 1581 && (*auth_tok)->token_type != ECRYPTFS_PRIVATE_KEY) {
1564 printk(KERN_ERR "Invalid auth_tok structure " 1582 printk(KERN_ERR "Invalid auth_tok structure "
1565 "returned from key query\n"); 1583 "returned from key query\n");
1566 rc = -EINVAL; 1584 rc = -EINVAL;
1567 goto out; 1585 goto out_release_key;
1586 }
1587out_release_key:
1588 if (rc) {
1589 key_put(*auth_tok_key);
1590 (*auth_tok_key) = NULL;
1568 } 1591 }
1569out: 1592out:
1570 return rc; 1593 return rc;
@@ -1688,6 +1711,7 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
1688 struct ecryptfs_auth_tok_list_item *auth_tok_list_item; 1711 struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
1689 size_t tag_11_contents_size; 1712 size_t tag_11_contents_size;
1690 size_t tag_11_packet_size; 1713 size_t tag_11_packet_size;
1714 struct key *auth_tok_key = NULL;
1691 int rc = 0; 1715 int rc = 0;
1692 1716
1693 INIT_LIST_HEAD(&auth_tok_list); 1717 INIT_LIST_HEAD(&auth_tok_list);
@@ -1784,6 +1808,10 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
1784 * just one will be sufficient to decrypt to get the FEK. */ 1808 * just one will be sufficient to decrypt to get the FEK. */
1785find_next_matching_auth_tok: 1809find_next_matching_auth_tok:
1786 found_auth_tok = 0; 1810 found_auth_tok = 0;
1811 if (auth_tok_key) {
1812 key_put(auth_tok_key);
1813 auth_tok_key = NULL;
1814 }
1787 list_for_each_entry(auth_tok_list_item, &auth_tok_list, list) { 1815 list_for_each_entry(auth_tok_list_item, &auth_tok_list, list) {
1788 candidate_auth_tok = &auth_tok_list_item->auth_tok; 1816 candidate_auth_tok = &auth_tok_list_item->auth_tok;
1789 if (unlikely(ecryptfs_verbosity > 0)) { 1817 if (unlikely(ecryptfs_verbosity > 0)) {
@@ -1800,10 +1828,11 @@ find_next_matching_auth_tok:
1800 rc = -EINVAL; 1828 rc = -EINVAL;
1801 goto out_wipe_list; 1829 goto out_wipe_list;
1802 } 1830 }
1803 ecryptfs_find_auth_tok_for_sig(&matching_auth_tok, 1831 rc = ecryptfs_find_auth_tok_for_sig(&auth_tok_key,
1832 &matching_auth_tok,
1804 crypt_stat->mount_crypt_stat, 1833 crypt_stat->mount_crypt_stat,
1805 candidate_auth_tok_sig); 1834 candidate_auth_tok_sig);
1806 if (matching_auth_tok) { 1835 if (!rc) {
1807 found_auth_tok = 1; 1836 found_auth_tok = 1;
1808 goto found_matching_auth_tok; 1837 goto found_matching_auth_tok;
1809 } 1838 }
@@ -1866,6 +1895,8 @@ found_matching_auth_tok:
1866out_wipe_list: 1895out_wipe_list:
1867 wipe_auth_tok_list(&auth_tok_list); 1896 wipe_auth_tok_list(&auth_tok_list);
1868out: 1897out:
1898 if (auth_tok_key)
1899 key_put(auth_tok_key);
1869 return rc; 1900 return rc;
1870} 1901}
1871 1902
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index cbd4e18adb20..a9dbd62518e6 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -208,7 +208,8 @@ enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig,
208 ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata, 208 ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata,
209 ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig, 209 ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig,
210 ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes, 210 ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes,
211 ecryptfs_opt_unlink_sigs, ecryptfs_opt_err }; 211 ecryptfs_opt_unlink_sigs, ecryptfs_opt_mount_auth_tok_only,
212 ecryptfs_opt_err };
212 213
213static const match_table_t tokens = { 214static const match_table_t tokens = {
214 {ecryptfs_opt_sig, "sig=%s"}, 215 {ecryptfs_opt_sig, "sig=%s"},
@@ -223,6 +224,7 @@ static const match_table_t tokens = {
223 {ecryptfs_opt_fn_cipher, "ecryptfs_fn_cipher=%s"}, 224 {ecryptfs_opt_fn_cipher, "ecryptfs_fn_cipher=%s"},
224 {ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"}, 225 {ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"},
225 {ecryptfs_opt_unlink_sigs, "ecryptfs_unlink_sigs"}, 226 {ecryptfs_opt_unlink_sigs, "ecryptfs_unlink_sigs"},
227 {ecryptfs_opt_mount_auth_tok_only, "ecryptfs_mount_auth_tok_only"},
226 {ecryptfs_opt_err, NULL} 228 {ecryptfs_opt_err, NULL}
227}; 229};
228 230
@@ -406,6 +408,10 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options)
406 case ecryptfs_opt_unlink_sigs: 408 case ecryptfs_opt_unlink_sigs:
407 mount_crypt_stat->flags |= ECRYPTFS_UNLINK_SIGS; 409 mount_crypt_stat->flags |= ECRYPTFS_UNLINK_SIGS;
408 break; 410 break;
411 case ecryptfs_opt_mount_auth_tok_only:
412 mount_crypt_stat->flags |=
413 ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY;
414 break;
409 case ecryptfs_opt_err: 415 case ecryptfs_opt_err:
410 default: 416 default:
411 printk(KERN_WARNING 417 printk(KERN_WARNING
@@ -540,9 +546,8 @@ out:
540 * ecryptfs_interpose to perform most of the linking 546 * ecryptfs_interpose to perform most of the linking
541 * ecryptfs_interpose(): links the lower filesystem into ecryptfs (inode.c) 547 * ecryptfs_interpose(): links the lower filesystem into ecryptfs (inode.c)
542 */ 548 */
543static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags, 549static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags,
544 const char *dev_name, void *raw_data, 550 const char *dev_name, void *raw_data)
545 struct vfsmount *mnt)
546{ 551{
547 struct super_block *s; 552 struct super_block *s;
548 struct ecryptfs_sb_info *sbi; 553 struct ecryptfs_sb_info *sbi;
@@ -607,8 +612,7 @@ static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags,
607 err = "Reading sb failed"; 612 err = "Reading sb failed";
608 goto out; 613 goto out;
609 } 614 }
610 simple_set_mnt(mnt, s); 615 return dget(s->s_root);
611 return 0;
612 616
613out: 617out:
614 if (sbi) { 618 if (sbi) {
@@ -616,7 +620,7 @@ out:
616 kmem_cache_free(ecryptfs_sb_info_cache, sbi); 620 kmem_cache_free(ecryptfs_sb_info_cache, sbi);
617 } 621 }
618 printk(KERN_ERR "%s; rc = [%d]\n", err, rc); 622 printk(KERN_ERR "%s; rc = [%d]\n", err, rc);
619 return rc; 623 return ERR_PTR(rc);
620} 624}
621 625
622/** 626/**
@@ -639,7 +643,7 @@ static void ecryptfs_kill_block_super(struct super_block *sb)
639static struct file_system_type ecryptfs_fs_type = { 643static struct file_system_type ecryptfs_fs_type = {
640 .owner = THIS_MODULE, 644 .owner = THIS_MODULE,
641 .name = "ecryptfs", 645 .name = "ecryptfs",
642 .get_sb = ecryptfs_get_sb, 646 .mount = ecryptfs_mount,
643 .kill_sb = ecryptfs_kill_block_super, 647 .kill_sb = ecryptfs_kill_block_super,
644 .fs_flags = 0 648 .fs_flags = 0
645}; 649};
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index f7fc286a3aa9..253732382d37 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -180,6 +180,8 @@ static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt)
180 seq_printf(m, ",ecryptfs_encrypted_view"); 180 seq_printf(m, ",ecryptfs_encrypted_view");
181 if (mount_crypt_stat->flags & ECRYPTFS_UNLINK_SIGS) 181 if (mount_crypt_stat->flags & ECRYPTFS_UNLINK_SIGS)
182 seq_printf(m, ",ecryptfs_unlink_sigs"); 182 seq_printf(m, ",ecryptfs_unlink_sigs");
183 if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY)
184 seq_printf(m, ",ecryptfs_mount_auth_tok_only");
183 185
184 return 0; 186 return 0;
185} 187}
diff --git a/fs/efs/super.c b/fs/efs/super.c
index f04942810818..5073a07652cc 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -20,16 +20,16 @@
20static int efs_statfs(struct dentry *dentry, struct kstatfs *buf); 20static int efs_statfs(struct dentry *dentry, struct kstatfs *buf);
21static int efs_fill_super(struct super_block *s, void *d, int silent); 21static int efs_fill_super(struct super_block *s, void *d, int silent);
22 22
23static int efs_get_sb(struct file_system_type *fs_type, 23static struct dentry *efs_mount(struct file_system_type *fs_type,
24 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 24 int flags, const char *dev_name, void *data)
25{ 25{
26 return get_sb_bdev(fs_type, flags, dev_name, data, efs_fill_super, mnt); 26 return mount_bdev(fs_type, flags, dev_name, data, efs_fill_super);
27} 27}
28 28
29static struct file_system_type efs_fs_type = { 29static struct file_system_type efs_fs_type = {
30 .owner = THIS_MODULE, 30 .owner = THIS_MODULE,
31 .name = "efs", 31 .name = "efs",
32 .get_sb = efs_get_sb, 32 .mount = efs_mount,
33 .kill_sb = kill_block_super, 33 .kill_sb = kill_block_super,
34 .fs_flags = FS_REQUIRES_DEV, 34 .fs_flags = FS_REQUIRES_DEV,
35}; 35};
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 047e92fa3af8..79c3ae6e0456 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -659,19 +659,19 @@ free_bdi:
659/* 659/*
660 * Set up the superblock (calls exofs_fill_super eventually) 660 * Set up the superblock (calls exofs_fill_super eventually)
661 */ 661 */
662static int exofs_get_sb(struct file_system_type *type, 662static struct dentry *exofs_mount(struct file_system_type *type,
663 int flags, const char *dev_name, 663 int flags, const char *dev_name,
664 void *data, struct vfsmount *mnt) 664 void *data)
665{ 665{
666 struct exofs_mountopt opts; 666 struct exofs_mountopt opts;
667 int ret; 667 int ret;
668 668
669 ret = parse_options(data, &opts); 669 ret = parse_options(data, &opts);
670 if (ret) 670 if (ret)
671 return ret; 671 return ERR_PTR(ret);
672 672
673 opts.dev_name = dev_name; 673 opts.dev_name = dev_name;
674 return get_sb_nodev(type, flags, &opts, exofs_fill_super, mnt); 674 return mount_nodev(type, flags, &opts, exofs_fill_super);
675} 675}
676 676
677/* 677/*
@@ -809,7 +809,7 @@ static const struct export_operations exofs_export_ops = {
809static struct file_system_type exofs_type = { 809static struct file_system_type exofs_type = {
810 .owner = THIS_MODULE, 810 .owner = THIS_MODULE,
811 .name = "exofs", 811 .name = "exofs",
812 .get_sb = exofs_get_sb, 812 .mount = exofs_mount,
813 .kill_sb = generic_shutdown_super, 813 .kill_sb = generic_shutdown_super,
814}; 814};
815 815
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 0901320671da..d89e0b6a2d78 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1356,10 +1356,10 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
1356 return 0; 1356 return 0;
1357} 1357}
1358 1358
1359static int ext2_get_sb(struct file_system_type *fs_type, 1359static struct dentry *ext2_mount(struct file_system_type *fs_type,
1360 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 1360 int flags, const char *dev_name, void *data)
1361{ 1361{
1362 return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super, mnt); 1362 return mount_bdev(fs_type, flags, dev_name, data, ext2_fill_super);
1363} 1363}
1364 1364
1365#ifdef CONFIG_QUOTA 1365#ifdef CONFIG_QUOTA
@@ -1473,7 +1473,7 @@ out:
1473static struct file_system_type ext2_fs_type = { 1473static struct file_system_type ext2_fs_type = {
1474 .owner = THIS_MODULE, 1474 .owner = THIS_MODULE,
1475 .name = "ext2", 1475 .name = "ext2",
1476 .get_sb = ext2_get_sb, 1476 .mount = ext2_mount,
1477 .kill_sb = kill_block_super, 1477 .kill_sb = kill_block_super,
1478 .fs_flags = FS_REQUIRES_DEV, 1478 .fs_flags = FS_REQUIRES_DEV,
1479}; 1479};
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index db87413d3479..2fedaf8b5012 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -3020,16 +3020,16 @@ out:
3020 3020
3021#endif 3021#endif
3022 3022
3023static int ext3_get_sb(struct file_system_type *fs_type, 3023static struct dentry *ext3_mount(struct file_system_type *fs_type,
3024 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 3024 int flags, const char *dev_name, void *data)
3025{ 3025{
3026 return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super, mnt); 3026 return mount_bdev(fs_type, flags, dev_name, data, ext3_fill_super);
3027} 3027}
3028 3028
3029static struct file_system_type ext3_fs_type = { 3029static struct file_system_type ext3_fs_type = {
3030 .owner = THIS_MODULE, 3030 .owner = THIS_MODULE,
3031 .name = "ext3", 3031 .name = "ext3",
3032 .get_sb = ext3_get_sb, 3032 .mount = ext3_mount,
3033 .kill_sb = kill_block_super, 3033 .kill_sb = kill_block_super,
3034 .fs_flags = FS_REQUIRES_DEV, 3034 .fs_flags = FS_REQUIRES_DEV,
3035}; 3035};
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 8b5dd6369f82..6a5edea2d70b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -177,7 +177,7 @@ struct mpage_da_data {
177 177
178struct ext4_io_page { 178struct ext4_io_page {
179 struct page *p_page; 179 struct page *p_page;
180 int p_count; 180 atomic_t p_count;
181}; 181};
182 182
183#define MAX_IO_PAGES 128 183#define MAX_IO_PAGES 128
@@ -858,6 +858,7 @@ struct ext4_inode_info {
858 spinlock_t i_completed_io_lock; 858 spinlock_t i_completed_io_lock;
859 /* current io_end structure for async DIO write*/ 859 /* current io_end structure for async DIO write*/
860 ext4_io_end_t *cur_aio_dio; 860 ext4_io_end_t *cur_aio_dio;
861 atomic_t i_ioend_count; /* Number of outstanding io_end structs */
861 862
862 /* 863 /*
863 * Transactions that contain inode's metadata needed to complete 864 * Transactions that contain inode's metadata needed to complete
@@ -2060,6 +2061,7 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
2060/* page-io.c */ 2061/* page-io.c */
2061extern int __init ext4_init_pageio(void); 2062extern int __init ext4_init_pageio(void);
2062extern void ext4_exit_pageio(void); 2063extern void ext4_exit_pageio(void);
2064extern void ext4_ioend_wait(struct inode *);
2063extern void ext4_free_io_end(ext4_io_end_t *io); 2065extern void ext4_free_io_end(ext4_io_end_t *io);
2064extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); 2066extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
2065extern int ext4_end_io_nolock(ext4_io_end_t *io); 2067extern int ext4_end_io_nolock(ext4_io_end_t *io);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2d6c6c8c036d..bdbe69902207 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -53,6 +53,7 @@
53static inline int ext4_begin_ordered_truncate(struct inode *inode, 53static inline int ext4_begin_ordered_truncate(struct inode *inode,
54 loff_t new_size) 54 loff_t new_size)
55{ 55{
56 trace_ext4_begin_ordered_truncate(inode, new_size);
56 return jbd2_journal_begin_ordered_truncate( 57 return jbd2_journal_begin_ordered_truncate(
57 EXT4_SB(inode->i_sb)->s_journal, 58 EXT4_SB(inode->i_sb)->s_journal,
58 &EXT4_I(inode)->jinode, 59 &EXT4_I(inode)->jinode,
@@ -178,6 +179,7 @@ void ext4_evict_inode(struct inode *inode)
178 handle_t *handle; 179 handle_t *handle;
179 int err; 180 int err;
180 181
182 trace_ext4_evict_inode(inode);
181 if (inode->i_nlink) { 183 if (inode->i_nlink) {
182 truncate_inode_pages(&inode->i_data, 0); 184 truncate_inode_pages(&inode->i_data, 0);
183 goto no_delete; 185 goto no_delete;
@@ -2718,7 +2720,7 @@ static int ext4_writepage(struct page *page,
2718 * try to create them using __block_write_begin. If this 2720 * try to create them using __block_write_begin. If this
2719 * fails, redirty the page and move on. 2721 * fails, redirty the page and move on.
2720 */ 2722 */
2721 if (!page_buffers(page)) { 2723 if (!page_has_buffers(page)) {
2722 if (__block_write_begin(page, 0, len, 2724 if (__block_write_begin(page, 0, len,
2723 noalloc_get_block_write)) { 2725 noalloc_get_block_write)) {
2724 redirty_page: 2726 redirty_page:
@@ -2732,12 +2734,10 @@ static int ext4_writepage(struct page *page,
2732 if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, 2734 if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
2733 ext4_bh_delay_or_unwritten)) { 2735 ext4_bh_delay_or_unwritten)) {
2734 /* 2736 /*
2735 * We don't want to do block allocation So redirty the 2737 * We don't want to do block allocation, so redirty
2736 * page and return We may reach here when we do a 2738 * the page and return. We may reach here when we do
2737 * journal commit via 2739 * a journal commit via journal_submit_inode_data_buffers.
2738 * journal_submit_inode_data_buffers. If we don't 2740 * We can also reach here via shrink_page_list
2739 * have mapping block we just ignore them. We can also
2740 * reach here via shrink_page_list
2741 */ 2741 */
2742 goto redirty_page; 2742 goto redirty_page;
2743 } 2743 }
@@ -5412,9 +5412,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
5412 * will return the blocks that include the delayed allocation 5412 * will return the blocks that include the delayed allocation
5413 * blocks for this file. 5413 * blocks for this file.
5414 */ 5414 */
5415 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
5416 delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks; 5415 delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks;
5417 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
5418 5416
5419 stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; 5417 stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9;
5420 return 0; 5418 return 0;
@@ -5651,6 +5649,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
5651 int err, ret; 5649 int err, ret;
5652 5650
5653 might_sleep(); 5651 might_sleep();
5652 trace_ext4_mark_inode_dirty(inode, _RET_IP_);
5654 err = ext4_reserve_inode_write(handle, inode, &iloc); 5653 err = ext4_reserve_inode_write(handle, inode, &iloc);
5655 if (ext4_handle_valid(handle) && 5654 if (ext4_handle_valid(handle) &&
5656 EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && 5655 EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index c58eba34724a..5b4d4e3a4d58 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4640,8 +4640,6 @@ do_more:
4640 * with group lock held. generate_buddy look at 4640 * with group lock held. generate_buddy look at
4641 * them with group lock_held 4641 * them with group lock_held
4642 */ 4642 */
4643 if (test_opt(sb, DISCARD))
4644 ext4_issue_discard(sb, block_group, bit, count);
4645 ext4_lock_group(sb, block_group); 4643 ext4_lock_group(sb, block_group);
4646 mb_clear_bits(bitmap_bh->b_data, bit, count); 4644 mb_clear_bits(bitmap_bh->b_data, bit, count);
4647 mb_free_blocks(inode, &e4b, bit, count); 4645 mb_free_blocks(inode, &e4b, bit, count);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 46a7d6a9d976..7f5451cd1d38 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -32,8 +32,14 @@
32 32
33static struct kmem_cache *io_page_cachep, *io_end_cachep; 33static struct kmem_cache *io_page_cachep, *io_end_cachep;
34 34
35#define WQ_HASH_SZ 37
36#define to_ioend_wq(v) (&ioend_wq[((unsigned long)v) % WQ_HASH_SZ])
37static wait_queue_head_t ioend_wq[WQ_HASH_SZ];
38
35int __init ext4_init_pageio(void) 39int __init ext4_init_pageio(void)
36{ 40{
41 int i;
42
37 io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT); 43 io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
38 if (io_page_cachep == NULL) 44 if (io_page_cachep == NULL)
39 return -ENOMEM; 45 return -ENOMEM;
@@ -42,6 +48,8 @@ int __init ext4_init_pageio(void)
42 kmem_cache_destroy(io_page_cachep); 48 kmem_cache_destroy(io_page_cachep);
43 return -ENOMEM; 49 return -ENOMEM;
44 } 50 }
51 for (i = 0; i < WQ_HASH_SZ; i++)
52 init_waitqueue_head(&ioend_wq[i]);
45 53
46 return 0; 54 return 0;
47} 55}
@@ -52,24 +60,37 @@ void ext4_exit_pageio(void)
52 kmem_cache_destroy(io_page_cachep); 60 kmem_cache_destroy(io_page_cachep);
53} 61}
54 62
63void ext4_ioend_wait(struct inode *inode)
64{
65 wait_queue_head_t *wq = to_ioend_wq(inode);
66
67 wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
68}
69
70static void put_io_page(struct ext4_io_page *io_page)
71{
72 if (atomic_dec_and_test(&io_page->p_count)) {
73 end_page_writeback(io_page->p_page);
74 put_page(io_page->p_page);
75 kmem_cache_free(io_page_cachep, io_page);
76 }
77}
78
55void ext4_free_io_end(ext4_io_end_t *io) 79void ext4_free_io_end(ext4_io_end_t *io)
56{ 80{
57 int i; 81 int i;
82 wait_queue_head_t *wq;
58 83
59 BUG_ON(!io); 84 BUG_ON(!io);
60 if (io->page) 85 if (io->page)
61 put_page(io->page); 86 put_page(io->page);
62 for (i = 0; i < io->num_io_pages; i++) { 87 for (i = 0; i < io->num_io_pages; i++)
63 if (--io->pages[i]->p_count == 0) { 88 put_io_page(io->pages[i]);
64 struct page *page = io->pages[i]->p_page;
65
66 end_page_writeback(page);
67 put_page(page);
68 kmem_cache_free(io_page_cachep, io->pages[i]);
69 }
70 }
71 io->num_io_pages = 0; 89 io->num_io_pages = 0;
72 iput(io->inode); 90 wq = to_ioend_wq(io->inode);
91 if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) &&
92 waitqueue_active(wq))
93 wake_up_all(wq);
73 kmem_cache_free(io_end_cachep, io); 94 kmem_cache_free(io_end_cachep, io);
74} 95}
75 96
@@ -142,8 +163,8 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
142 io = kmem_cache_alloc(io_end_cachep, flags); 163 io = kmem_cache_alloc(io_end_cachep, flags);
143 if (io) { 164 if (io) {
144 memset(io, 0, sizeof(*io)); 165 memset(io, 0, sizeof(*io));
145 io->inode = igrab(inode); 166 atomic_inc(&EXT4_I(inode)->i_ioend_count);
146 BUG_ON(!io->inode); 167 io->inode = inode;
147 INIT_WORK(&io->work, ext4_end_io_work); 168 INIT_WORK(&io->work, ext4_end_io_work);
148 INIT_LIST_HEAD(&io->list); 169 INIT_LIST_HEAD(&io->list);
149 } 170 }
@@ -171,35 +192,15 @@ static void ext4_end_bio(struct bio *bio, int error)
171 struct workqueue_struct *wq; 192 struct workqueue_struct *wq;
172 struct inode *inode; 193 struct inode *inode;
173 unsigned long flags; 194 unsigned long flags;
174 ext4_fsblk_t err_block;
175 int i; 195 int i;
176 196
177 BUG_ON(!io_end); 197 BUG_ON(!io_end);
178 inode = io_end->inode;
179 bio->bi_private = NULL; 198 bio->bi_private = NULL;
180 bio->bi_end_io = NULL; 199 bio->bi_end_io = NULL;
181 if (test_bit(BIO_UPTODATE, &bio->bi_flags)) 200 if (test_bit(BIO_UPTODATE, &bio->bi_flags))
182 error = 0; 201 error = 0;
183 err_block = bio->bi_sector >> (inode->i_blkbits - 9);
184 bio_put(bio); 202 bio_put(bio);
185 203
186 if (!(inode->i_sb->s_flags & MS_ACTIVE)) {
187 pr_err("sb umounted, discard end_io request for inode %lu\n",
188 io_end->inode->i_ino);
189 ext4_free_io_end(io_end);
190 return;
191 }
192
193 if (error) {
194 io_end->flag |= EXT4_IO_END_ERROR;
195 ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
196 "(offset %llu size %ld starting block %llu)",
197 inode->i_ino,
198 (unsigned long long) io_end->offset,
199 (long) io_end->size,
200 (unsigned long long) err_block);
201 }
202
203 for (i = 0; i < io_end->num_io_pages; i++) { 204 for (i = 0; i < io_end->num_io_pages; i++) {
204 struct page *page = io_end->pages[i]->p_page; 205 struct page *page = io_end->pages[i]->p_page;
205 struct buffer_head *bh, *head; 206 struct buffer_head *bh, *head;
@@ -236,13 +237,7 @@ static void ext4_end_bio(struct bio *bio, int error)
236 } while (bh != head); 237 } while (bh != head);
237 } 238 }
238 239
239 if (--io_end->pages[i]->p_count == 0) { 240 put_io_page(io_end->pages[i]);
240 struct page *page = io_end->pages[i]->p_page;
241
242 end_page_writeback(page);
243 put_page(page);
244 kmem_cache_free(io_page_cachep, io_end->pages[i]);
245 }
246 241
247 /* 242 /*
248 * If this is a partial write which happened to make 243 * If this is a partial write which happened to make
@@ -254,8 +249,19 @@ static void ext4_end_bio(struct bio *bio, int error)
254 if (!partial_write) 249 if (!partial_write)
255 SetPageUptodate(page); 250 SetPageUptodate(page);
256 } 251 }
257
258 io_end->num_io_pages = 0; 252 io_end->num_io_pages = 0;
253 inode = io_end->inode;
254
255 if (error) {
256 io_end->flag |= EXT4_IO_END_ERROR;
257 ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
258 "(offset %llu size %ld starting block %llu)",
259 inode->i_ino,
260 (unsigned long long) io_end->offset,
261 (long) io_end->size,
262 (unsigned long long)
263 bio->bi_sector >> (inode->i_blkbits - 9));
264 }
259 265
260 /* Add the io_end to per-inode completed io list*/ 266 /* Add the io_end to per-inode completed io list*/
261 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); 267 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
@@ -305,7 +311,6 @@ static int io_submit_init(struct ext4_io_submit *io,
305 bio->bi_private = io->io_end = io_end; 311 bio->bi_private = io->io_end = io_end;
306 bio->bi_end_io = ext4_end_bio; 312 bio->bi_end_io = ext4_end_bio;
307 313
308 io_end->inode = inode;
309 io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh); 314 io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
310 315
311 io->io_bio = bio; 316 io->io_bio = bio;
@@ -360,7 +365,7 @@ submit_and_retry:
360 if ((io_end->num_io_pages == 0) || 365 if ((io_end->num_io_pages == 0) ||
361 (io_end->pages[io_end->num_io_pages-1] != io_page)) { 366 (io_end->pages[io_end->num_io_pages-1] != io_page)) {
362 io_end->pages[io_end->num_io_pages++] = io_page; 367 io_end->pages[io_end->num_io_pages++] = io_page;
363 io_page->p_count++; 368 atomic_inc(&io_page->p_count);
364 } 369 }
365 return 0; 370 return 0;
366} 371}
@@ -389,7 +394,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
389 return -ENOMEM; 394 return -ENOMEM;
390 } 395 }
391 io_page->p_page = page; 396 io_page->p_page = page;
392 io_page->p_count = 0; 397 atomic_set(&io_page->p_count, 1);
393 get_page(page); 398 get_page(page);
394 399
395 for (bh = head = page_buffers(page), block_start = 0; 400 for (bh = head = page_buffers(page), block_start = 0;
@@ -421,10 +426,6 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
421 * PageWriteback bit from the page to prevent the system from 426 * PageWriteback bit from the page to prevent the system from
422 * wedging later on. 427 * wedging later on.
423 */ 428 */
424 if (io_page->p_count == 0) { 429 put_io_page(io_page);
425 put_page(page);
426 end_page_writeback(page);
427 kmem_cache_free(io_page_cachep, io_page);
428 }
429 return ret; 430 return ret;
430} 431}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 0348ce066592..61182fe6254e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -73,8 +73,8 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
73static int ext4_unfreeze(struct super_block *sb); 73static int ext4_unfreeze(struct super_block *sb);
74static void ext4_write_super(struct super_block *sb); 74static void ext4_write_super(struct super_block *sb);
75static int ext4_freeze(struct super_block *sb); 75static int ext4_freeze(struct super_block *sb);
76static int ext4_get_sb(struct file_system_type *fs_type, int flags, 76static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
77 const char *dev_name, void *data, struct vfsmount *mnt); 77 const char *dev_name, void *data);
78static void ext4_destroy_lazyinit_thread(void); 78static void ext4_destroy_lazyinit_thread(void);
79static void ext4_unregister_li_request(struct super_block *sb); 79static void ext4_unregister_li_request(struct super_block *sb);
80 80
@@ -82,7 +82,7 @@ static void ext4_unregister_li_request(struct super_block *sb);
82static struct file_system_type ext3_fs_type = { 82static struct file_system_type ext3_fs_type = {
83 .owner = THIS_MODULE, 83 .owner = THIS_MODULE,
84 .name = "ext3", 84 .name = "ext3",
85 .get_sb = ext4_get_sb, 85 .mount = ext4_mount,
86 .kill_sb = kill_block_super, 86 .kill_sb = kill_block_super,
87 .fs_flags = FS_REQUIRES_DEV, 87 .fs_flags = FS_REQUIRES_DEV,
88}; 88};
@@ -828,12 +828,22 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
828 ei->cur_aio_dio = NULL; 828 ei->cur_aio_dio = NULL;
829 ei->i_sync_tid = 0; 829 ei->i_sync_tid = 0;
830 ei->i_datasync_tid = 0; 830 ei->i_datasync_tid = 0;
831 atomic_set(&ei->i_ioend_count, 0);
831 832
832 return &ei->vfs_inode; 833 return &ei->vfs_inode;
833} 834}
834 835
836static int ext4_drop_inode(struct inode *inode)
837{
838 int drop = generic_drop_inode(inode);
839
840 trace_ext4_drop_inode(inode, drop);
841 return drop;
842}
843
835static void ext4_destroy_inode(struct inode *inode) 844static void ext4_destroy_inode(struct inode *inode)
836{ 845{
846 ext4_ioend_wait(inode);
837 if (!list_empty(&(EXT4_I(inode)->i_orphan))) { 847 if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
838 ext4_msg(inode->i_sb, KERN_ERR, 848 ext4_msg(inode->i_sb, KERN_ERR,
839 "Inode %lu (%p): orphan list check failed!", 849 "Inode %lu (%p): orphan list check failed!",
@@ -1173,6 +1183,7 @@ static const struct super_operations ext4_sops = {
1173 .destroy_inode = ext4_destroy_inode, 1183 .destroy_inode = ext4_destroy_inode,
1174 .write_inode = ext4_write_inode, 1184 .write_inode = ext4_write_inode,
1175 .dirty_inode = ext4_dirty_inode, 1185 .dirty_inode = ext4_dirty_inode,
1186 .drop_inode = ext4_drop_inode,
1176 .evict_inode = ext4_evict_inode, 1187 .evict_inode = ext4_evict_inode,
1177 .put_super = ext4_put_super, 1188 .put_super = ext4_put_super,
1178 .sync_fs = ext4_sync_fs, 1189 .sync_fs = ext4_sync_fs,
@@ -1194,6 +1205,7 @@ static const struct super_operations ext4_nojournal_sops = {
1194 .destroy_inode = ext4_destroy_inode, 1205 .destroy_inode = ext4_destroy_inode,
1195 .write_inode = ext4_write_inode, 1206 .write_inode = ext4_write_inode,
1196 .dirty_inode = ext4_dirty_inode, 1207 .dirty_inode = ext4_dirty_inode,
1208 .drop_inode = ext4_drop_inode,
1197 .evict_inode = ext4_evict_inode, 1209 .evict_inode = ext4_evict_inode,
1198 .write_super = ext4_write_super, 1210 .write_super = ext4_write_super,
1199 .put_super = ext4_put_super, 1211 .put_super = ext4_put_super,
@@ -2699,7 +2711,6 @@ static int ext4_lazyinit_thread(void *arg)
2699 struct ext4_li_request *elr; 2711 struct ext4_li_request *elr;
2700 unsigned long next_wakeup; 2712 unsigned long next_wakeup;
2701 DEFINE_WAIT(wait); 2713 DEFINE_WAIT(wait);
2702 int ret;
2703 2714
2704 BUG_ON(NULL == eli); 2715 BUG_ON(NULL == eli);
2705 2716
@@ -2723,13 +2734,12 @@ cont_thread:
2723 elr = list_entry(pos, struct ext4_li_request, 2734 elr = list_entry(pos, struct ext4_li_request,
2724 lr_request); 2735 lr_request);
2725 2736
2726 if (time_after_eq(jiffies, elr->lr_next_sched)) 2737 if (time_after_eq(jiffies, elr->lr_next_sched)) {
2727 ret = ext4_run_li_request(elr); 2738 if (ext4_run_li_request(elr) != 0) {
2728 2739 /* error, remove the lazy_init job */
2729 if (ret) { 2740 ext4_remove_li_request(elr);
2730 ret = 0; 2741 continue;
2731 ext4_remove_li_request(elr); 2742 }
2732 continue;
2733 } 2743 }
2734 2744
2735 if (time_before(elr->lr_next_sched, next_wakeup)) 2745 if (time_before(elr->lr_next_sched, next_wakeup))
@@ -2740,7 +2750,8 @@ cont_thread:
2740 if (freezing(current)) 2750 if (freezing(current))
2741 refrigerator(); 2751 refrigerator();
2742 2752
2743 if (time_after_eq(jiffies, next_wakeup)) { 2753 if ((time_after_eq(jiffies, next_wakeup)) ||
2754 (MAX_JIFFY_OFFSET == next_wakeup)) {
2744 cond_resched(); 2755 cond_resched();
2745 continue; 2756 continue;
2746 } 2757 }
@@ -3348,6 +3359,24 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3348 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 3359 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
3349 spin_lock_init(&sbi->s_next_gen_lock); 3360 spin_lock_init(&sbi->s_next_gen_lock);
3350 3361
3362 err = percpu_counter_init(&sbi->s_freeblocks_counter,
3363 ext4_count_free_blocks(sb));
3364 if (!err) {
3365 err = percpu_counter_init(&sbi->s_freeinodes_counter,
3366 ext4_count_free_inodes(sb));
3367 }
3368 if (!err) {
3369 err = percpu_counter_init(&sbi->s_dirs_counter,
3370 ext4_count_dirs(sb));
3371 }
3372 if (!err) {
3373 err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
3374 }
3375 if (err) {
3376 ext4_msg(sb, KERN_ERR, "insufficient memory");
3377 goto failed_mount3;
3378 }
3379
3351 sbi->s_stripe = ext4_get_stripe_size(sbi); 3380 sbi->s_stripe = ext4_get_stripe_size(sbi);
3352 sbi->s_max_writeback_mb_bump = 128; 3381 sbi->s_max_writeback_mb_bump = 128;
3353 3382
@@ -3446,22 +3475,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3446 } 3475 }
3447 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 3476 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
3448 3477
3449no_journal: 3478 /*
3450 err = percpu_counter_init(&sbi->s_freeblocks_counter, 3479 * The journal may have updated the bg summary counts, so we
3451 ext4_count_free_blocks(sb)); 3480 * need to update the global counters.
3452 if (!err) 3481 */
3453 err = percpu_counter_init(&sbi->s_freeinodes_counter, 3482 percpu_counter_set(&sbi->s_freeblocks_counter,
3454 ext4_count_free_inodes(sb)); 3483 ext4_count_free_blocks(sb));
3455 if (!err) 3484 percpu_counter_set(&sbi->s_freeinodes_counter,
3456 err = percpu_counter_init(&sbi->s_dirs_counter, 3485 ext4_count_free_inodes(sb));
3457 ext4_count_dirs(sb)); 3486 percpu_counter_set(&sbi->s_dirs_counter,
3458 if (!err) 3487 ext4_count_dirs(sb));
3459 err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); 3488 percpu_counter_set(&sbi->s_dirtyblocks_counter, 0);
3460 if (err) {
3461 ext4_msg(sb, KERN_ERR, "insufficient memory");
3462 goto failed_mount_wq;
3463 }
3464 3489
3490no_journal:
3465 EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); 3491 EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten");
3466 if (!EXT4_SB(sb)->dio_unwritten_wq) { 3492 if (!EXT4_SB(sb)->dio_unwritten_wq) {
3467 printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); 3493 printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
@@ -3611,10 +3637,6 @@ failed_mount_wq:
3611 jbd2_journal_destroy(sbi->s_journal); 3637 jbd2_journal_destroy(sbi->s_journal);
3612 sbi->s_journal = NULL; 3638 sbi->s_journal = NULL;
3613 } 3639 }
3614 percpu_counter_destroy(&sbi->s_freeblocks_counter);
3615 percpu_counter_destroy(&sbi->s_freeinodes_counter);
3616 percpu_counter_destroy(&sbi->s_dirs_counter);
3617 percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
3618failed_mount3: 3640failed_mount3:
3619 if (sbi->s_flex_groups) { 3641 if (sbi->s_flex_groups) {
3620 if (is_vmalloc_addr(sbi->s_flex_groups)) 3642 if (is_vmalloc_addr(sbi->s_flex_groups))
@@ -3622,6 +3644,10 @@ failed_mount3:
3622 else 3644 else
3623 kfree(sbi->s_flex_groups); 3645 kfree(sbi->s_flex_groups);
3624 } 3646 }
3647 percpu_counter_destroy(&sbi->s_freeblocks_counter);
3648 percpu_counter_destroy(&sbi->s_freeinodes_counter);
3649 percpu_counter_destroy(&sbi->s_dirs_counter);
3650 percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
3625failed_mount2: 3651failed_mount2:
3626 for (i = 0; i < db_count; i++) 3652 for (i = 0; i < db_count; i++)
3627 brelse(sbi->s_group_desc[i]); 3653 brelse(sbi->s_group_desc[i]);
@@ -3949,13 +3975,11 @@ static int ext4_commit_super(struct super_block *sb, int sync)
3949 else 3975 else
3950 es->s_kbytes_written = 3976 es->s_kbytes_written =
3951 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); 3977 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
3952 if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeblocks_counter)) 3978 ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
3953 ext4_free_blocks_count_set(es, percpu_counter_sum_positive( 3979 &EXT4_SB(sb)->s_freeblocks_counter));
3954 &EXT4_SB(sb)->s_freeblocks_counter)); 3980 es->s_free_inodes_count =
3955 if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter)) 3981 cpu_to_le32(percpu_counter_sum_positive(
3956 es->s_free_inodes_count = 3982 &EXT4_SB(sb)->s_freeinodes_counter));
3957 cpu_to_le32(percpu_counter_sum_positive(
3958 &EXT4_SB(sb)->s_freeinodes_counter));
3959 sb->s_dirt = 0; 3983 sb->s_dirt = 0;
3960 BUFFER_TRACE(sbh, "marking dirty"); 3984 BUFFER_TRACE(sbh, "marking dirty");
3961 mark_buffer_dirty(sbh); 3985 mark_buffer_dirty(sbh);
@@ -4556,12 +4580,10 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
4556 4580
4557static int ext4_quota_off(struct super_block *sb, int type) 4581static int ext4_quota_off(struct super_block *sb, int type)
4558{ 4582{
4559 /* Force all delayed allocation blocks to be allocated */ 4583 /* Force all delayed allocation blocks to be allocated.
4560 if (test_opt(sb, DELALLOC)) { 4584 * Caller already holds s_umount sem */
4561 down_read(&sb->s_umount); 4585 if (test_opt(sb, DELALLOC))
4562 sync_filesystem(sb); 4586 sync_filesystem(sb);
4563 up_read(&sb->s_umount);
4564 }
4565 4587
4566 return dquot_quota_off(sb, type); 4588 return dquot_quota_off(sb, type);
4567} 4589}
@@ -4667,17 +4689,17 @@ out:
4667 4689
4668#endif 4690#endif
4669 4691
4670static int ext4_get_sb(struct file_system_type *fs_type, int flags, 4692static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
4671 const char *dev_name, void *data, struct vfsmount *mnt) 4693 const char *dev_name, void *data)
4672{ 4694{
4673 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt); 4695 return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
4674} 4696}
4675 4697
4676#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 4698#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
4677static struct file_system_type ext2_fs_type = { 4699static struct file_system_type ext2_fs_type = {
4678 .owner = THIS_MODULE, 4700 .owner = THIS_MODULE,
4679 .name = "ext2", 4701 .name = "ext2",
4680 .get_sb = ext4_get_sb, 4702 .mount = ext4_mount,
4681 .kill_sb = kill_block_super, 4703 .kill_sb = kill_block_super,
4682 .fs_flags = FS_REQUIRES_DEV, 4704 .fs_flags = FS_REQUIRES_DEV,
4683}; 4705};
@@ -4722,7 +4744,7 @@ static inline void unregister_as_ext3(void) { }
4722static struct file_system_type ext4_fs_type = { 4744static struct file_system_type ext4_fs_type = {
4723 .owner = THIS_MODULE, 4745 .owner = THIS_MODULE,
4724 .name = "ext4", 4746 .name = "ext4",
4725 .get_sb = ext4_get_sb, 4747 .mount = ext4_mount,
4726 .kill_sb = kill_block_super, 4748 .kill_sb = kill_block_super,
4727 .fs_flags = FS_REQUIRES_DEV, 4749 .fs_flags = FS_REQUIRES_DEV,
4728}; 4750};
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index bbca5c186ae7..3345aabd1dd7 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -675,18 +675,17 @@ static int msdos_fill_super(struct super_block *sb, void *data, int silent)
675 return 0; 675 return 0;
676} 676}
677 677
678static int msdos_get_sb(struct file_system_type *fs_type, 678static struct dentry *msdos_mount(struct file_system_type *fs_type,
679 int flags, const char *dev_name, 679 int flags, const char *dev_name,
680 void *data, struct vfsmount *mnt) 680 void *data)
681{ 681{
682 return get_sb_bdev(fs_type, flags, dev_name, data, msdos_fill_super, 682 return mount_bdev(fs_type, flags, dev_name, data, msdos_fill_super);
683 mnt);
684} 683}
685 684
686static struct file_system_type msdos_fs_type = { 685static struct file_system_type msdos_fs_type = {
687 .owner = THIS_MODULE, 686 .owner = THIS_MODULE,
688 .name = "msdos", 687 .name = "msdos",
689 .get_sb = msdos_get_sb, 688 .mount = msdos_mount,
690 .kill_sb = kill_block_super, 689 .kill_sb = kill_block_super,
691 .fs_flags = FS_REQUIRES_DEV, 690 .fs_flags = FS_REQUIRES_DEV,
692}; 691};
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 6f0f6c9a0152..b936703b8924 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -1071,18 +1071,17 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent)
1071 return 0; 1071 return 0;
1072} 1072}
1073 1073
1074static int vfat_get_sb(struct file_system_type *fs_type, 1074static struct dentry *vfat_mount(struct file_system_type *fs_type,
1075 int flags, const char *dev_name, 1075 int flags, const char *dev_name,
1076 void *data, struct vfsmount *mnt) 1076 void *data)
1077{ 1077{
1078 return get_sb_bdev(fs_type, flags, dev_name, data, vfat_fill_super, 1078 return mount_bdev(fs_type, flags, dev_name, data, vfat_fill_super);
1079 mnt);
1080} 1079}
1081 1080
1082static struct file_system_type vfat_fs_type = { 1081static struct file_system_type vfat_fs_type = {
1083 .owner = THIS_MODULE, 1082 .owner = THIS_MODULE,
1084 .name = "vfat", 1083 .name = "vfat",
1085 .get_sb = vfat_get_sb, 1084 .mount = vfat_mount,
1086 .kill_sb = kill_block_super, 1085 .kill_sb = kill_block_super,
1087 .fs_flags = FS_REQUIRES_DEV, 1086 .fs_flags = FS_REQUIRES_DEV,
1088}; 1087};
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 71b0148b8784..9d1c99558389 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -246,17 +246,16 @@ out:
246/* 246/*
247 * The usual module blurb. 247 * The usual module blurb.
248 */ 248 */
249static int vxfs_get_sb(struct file_system_type *fs_type, 249static struct dentry *vxfs_mount(struct file_system_type *fs_type,
250 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 250 int flags, const char *dev_name, void *data)
251{ 251{
252 return get_sb_bdev(fs_type, flags, dev_name, data, vxfs_fill_super, 252 return mount_bdev(fs_type, flags, dev_name, data, vxfs_fill_super);
253 mnt);
254} 253}
255 254
256static struct file_system_type vxfs_fs_type = { 255static struct file_system_type vxfs_fs_type = {
257 .owner = THIS_MODULE, 256 .owner = THIS_MODULE,
258 .name = "vxfs", 257 .name = "vxfs",
259 .get_sb = vxfs_get_sb, 258 .mount = vxfs_mount,
260 .kill_sb = kill_block_super, 259 .kill_sb = kill_block_super,
261 .fs_flags = FS_REQUIRES_DEV, 260 .fs_flags = FS_REQUIRES_DEV,
262}; 261};
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index aed881a76b22..3d06ccc953aa 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -707,6 +707,17 @@ get_next_work_item(struct backing_dev_info *bdi)
707 return work; 707 return work;
708} 708}
709 709
710/*
711 * Add in the number of potentially dirty inodes, because each inode
712 * write can dirty pagecache in the underlying blockdev.
713 */
714static unsigned long get_nr_dirty_pages(void)
715{
716 return global_page_state(NR_FILE_DIRTY) +
717 global_page_state(NR_UNSTABLE_NFS) +
718 get_nr_dirty_inodes();
719}
720
710static long wb_check_old_data_flush(struct bdi_writeback *wb) 721static long wb_check_old_data_flush(struct bdi_writeback *wb)
711{ 722{
712 unsigned long expired; 723 unsigned long expired;
@@ -724,13 +735,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
724 return 0; 735 return 0;
725 736
726 wb->last_old_flush = jiffies; 737 wb->last_old_flush = jiffies;
727 /* 738 nr_pages = get_nr_dirty_pages();
728 * Add in the number of potentially dirty inodes, because each inode
729 * write can dirty pagecache in the underlying blockdev.
730 */
731 nr_pages = global_page_state(NR_FILE_DIRTY) +
732 global_page_state(NR_UNSTABLE_NFS) +
733 get_nr_dirty_inodes();
734 739
735 if (nr_pages) { 740 if (nr_pages) {
736 struct wb_writeback_work work = { 741 struct wb_writeback_work work = {
@@ -1076,32 +1081,42 @@ static void wait_sb_inodes(struct super_block *sb)
1076} 1081}
1077 1082
1078/** 1083/**
1079 * writeback_inodes_sb - writeback dirty inodes from given super_block 1084 * writeback_inodes_sb_nr - writeback dirty inodes from given super_block
1080 * @sb: the superblock 1085 * @sb: the superblock
1086 * @nr: the number of pages to write
1081 * 1087 *
1082 * Start writeback on some inodes on this super_block. No guarantees are made 1088 * Start writeback on some inodes on this super_block. No guarantees are made
1083 * on how many (if any) will be written, and this function does not wait 1089 * on how many (if any) will be written, and this function does not wait
1084 * for IO completion of submitted IO. The number of pages submitted is 1090 * for IO completion of submitted IO.
1085 * returned.
1086 */ 1091 */
1087void writeback_inodes_sb(struct super_block *sb) 1092void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr)
1088{ 1093{
1089 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
1090 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
1091 DECLARE_COMPLETION_ONSTACK(done); 1094 DECLARE_COMPLETION_ONSTACK(done);
1092 struct wb_writeback_work work = { 1095 struct wb_writeback_work work = {
1093 .sb = sb, 1096 .sb = sb,
1094 .sync_mode = WB_SYNC_NONE, 1097 .sync_mode = WB_SYNC_NONE,
1095 .done = &done, 1098 .done = &done,
1099 .nr_pages = nr,
1096 }; 1100 };
1097 1101
1098 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 1102 WARN_ON(!rwsem_is_locked(&sb->s_umount));
1099
1100 work.nr_pages = nr_dirty + nr_unstable + get_nr_dirty_inodes();
1101
1102 bdi_queue_work(sb->s_bdi, &work); 1103 bdi_queue_work(sb->s_bdi, &work);
1103 wait_for_completion(&done); 1104 wait_for_completion(&done);
1104} 1105}
1106EXPORT_SYMBOL(writeback_inodes_sb_nr);
1107
1108/**
1109 * writeback_inodes_sb - writeback dirty inodes from given super_block
1110 * @sb: the superblock
1111 *
1112 * Start writeback on some inodes on this super_block. No guarantees are made
1113 * on how many (if any) will be written, and this function does not wait
1114 * for IO completion of submitted IO.
1115 */
1116void writeback_inodes_sb(struct super_block *sb)
1117{
1118 return writeback_inodes_sb_nr(sb, get_nr_dirty_pages());
1119}
1105EXPORT_SYMBOL(writeback_inodes_sb); 1120EXPORT_SYMBOL(writeback_inodes_sb);
1106 1121
1107/** 1122/**
@@ -1124,6 +1139,27 @@ int writeback_inodes_sb_if_idle(struct super_block *sb)
1124EXPORT_SYMBOL(writeback_inodes_sb_if_idle); 1139EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
1125 1140
1126/** 1141/**
1142 * writeback_inodes_sb_if_idle - start writeback if none underway
1143 * @sb: the superblock
1144 * @nr: the number of pages to write
1145 *
1146 * Invoke writeback_inodes_sb if no writeback is currently underway.
1147 * Returns 1 if writeback was started, 0 if not.
1148 */
1149int writeback_inodes_sb_nr_if_idle(struct super_block *sb,
1150 unsigned long nr)
1151{
1152 if (!writeback_in_progress(sb->s_bdi)) {
1153 down_read(&sb->s_umount);
1154 writeback_inodes_sb_nr(sb, nr);
1155 up_read(&sb->s_umount);
1156 return 1;
1157 } else
1158 return 0;
1159}
1160EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle);
1161
1162/**
1127 * sync_inodes_sb - sync sb inode pages 1163 * sync_inodes_sb - sync sb inode pages
1128 * @sb: the superblock 1164 * @sb: the superblock
1129 * 1165 *
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 4eba07661e5c..85542a7daf40 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -322,12 +322,10 @@ static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent)
322 return 0; 322 return 0;
323} 323}
324 324
325static int fuse_ctl_get_sb(struct file_system_type *fs_type, int flags, 325static struct dentry *fuse_ctl_mount(struct file_system_type *fs_type,
326 const char *dev_name, void *raw_data, 326 int flags, const char *dev_name, void *raw_data)
327 struct vfsmount *mnt)
328{ 327{
329 return get_sb_single(fs_type, flags, raw_data, 328 return mount_single(fs_type, flags, raw_data, fuse_ctl_fill_super);
330 fuse_ctl_fill_super, mnt);
331} 329}
332 330
333static void fuse_ctl_kill_sb(struct super_block *sb) 331static void fuse_ctl_kill_sb(struct super_block *sb)
@@ -346,7 +344,7 @@ static void fuse_ctl_kill_sb(struct super_block *sb)
346static struct file_system_type fuse_ctl_fs_type = { 344static struct file_system_type fuse_ctl_fs_type = {
347 .owner = THIS_MODULE, 345 .owner = THIS_MODULE,
348 .name = "fusectl", 346 .name = "fusectl",
349 .get_sb = fuse_ctl_get_sb, 347 .mount = fuse_ctl_mount,
350 .kill_sb = fuse_ctl_kill_sb, 348 .kill_sb = fuse_ctl_kill_sb,
351}; 349};
352 350
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index da9e6e11374c..cfce3ad86a92 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1041,11 +1041,11 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
1041 return err; 1041 return err;
1042} 1042}
1043 1043
1044static int fuse_get_sb(struct file_system_type *fs_type, 1044static struct dentry *fuse_mount(struct file_system_type *fs_type,
1045 int flags, const char *dev_name, 1045 int flags, const char *dev_name,
1046 void *raw_data, struct vfsmount *mnt) 1046 void *raw_data)
1047{ 1047{
1048 return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt); 1048 return mount_nodev(fs_type, flags, raw_data, fuse_fill_super);
1049} 1049}
1050 1050
1051static void fuse_kill_sb_anon(struct super_block *sb) 1051static void fuse_kill_sb_anon(struct super_block *sb)
@@ -1065,17 +1065,16 @@ static struct file_system_type fuse_fs_type = {
1065 .owner = THIS_MODULE, 1065 .owner = THIS_MODULE,
1066 .name = "fuse", 1066 .name = "fuse",
1067 .fs_flags = FS_HAS_SUBTYPE, 1067 .fs_flags = FS_HAS_SUBTYPE,
1068 .get_sb = fuse_get_sb, 1068 .mount = fuse_mount,
1069 .kill_sb = fuse_kill_sb_anon, 1069 .kill_sb = fuse_kill_sb_anon,
1070}; 1070};
1071 1071
1072#ifdef CONFIG_BLOCK 1072#ifdef CONFIG_BLOCK
1073static int fuse_get_sb_blk(struct file_system_type *fs_type, 1073static struct dentry *fuse_mount_blk(struct file_system_type *fs_type,
1074 int flags, const char *dev_name, 1074 int flags, const char *dev_name,
1075 void *raw_data, struct vfsmount *mnt) 1075 void *raw_data)
1076{ 1076{
1077 return get_sb_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super, 1077 return mount_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super);
1078 mnt);
1079} 1078}
1080 1079
1081static void fuse_kill_sb_blk(struct super_block *sb) 1080static void fuse_kill_sb_blk(struct super_block *sb)
@@ -1094,7 +1093,7 @@ static void fuse_kill_sb_blk(struct super_block *sb)
1094static struct file_system_type fuseblk_fs_type = { 1093static struct file_system_type fuseblk_fs_type = {
1095 .owner = THIS_MODULE, 1094 .owner = THIS_MODULE,
1096 .name = "fuseblk", 1095 .name = "fuseblk",
1097 .get_sb = fuse_get_sb_blk, 1096 .mount = fuse_mount_blk,
1098 .kill_sb = fuse_kill_sb_blk, 1097 .kill_sb = fuse_kill_sb_blk,
1099 .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, 1098 .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
1100}; 1099};
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index cade1acbcea9..3eb1393f7b81 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1250,12 +1250,11 @@ static int test_gfs2_super(struct super_block *s, void *ptr)
1250} 1250}
1251 1251
1252/** 1252/**
1253 * gfs2_get_sb - Get the GFS2 superblock 1253 * gfs2_mount - Get the GFS2 superblock
1254 * @fs_type: The GFS2 filesystem type 1254 * @fs_type: The GFS2 filesystem type
1255 * @flags: Mount flags 1255 * @flags: Mount flags
1256 * @dev_name: The name of the device 1256 * @dev_name: The name of the device
1257 * @data: The mount arguments 1257 * @data: The mount arguments
1258 * @mnt: The vfsmnt for this mount
1259 * 1258 *
1260 * Q. Why not use get_sb_bdev() ? 1259 * Q. Why not use get_sb_bdev() ?
1261 * A. We need to select one of two root directories to mount, independent 1260 * A. We need to select one of two root directories to mount, independent
@@ -1264,8 +1263,8 @@ static int test_gfs2_super(struct super_block *s, void *ptr)
1264 * Returns: 0 or -ve on error 1263 * Returns: 0 or -ve on error
1265 */ 1264 */
1266 1265
1267static int gfs2_get_sb(struct file_system_type *fs_type, int flags, 1266static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
1268 const char *dev_name, void *data, struct vfsmount *mnt) 1267 const char *dev_name, void *data)
1269{ 1268{
1270 struct block_device *bdev; 1269 struct block_device *bdev;
1271 struct super_block *s; 1270 struct super_block *s;
@@ -1279,7 +1278,7 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
1279 1278
1280 bdev = open_bdev_exclusive(dev_name, mode, fs_type); 1279 bdev = open_bdev_exclusive(dev_name, mode, fs_type);
1281 if (IS_ERR(bdev)) 1280 if (IS_ERR(bdev))
1282 return PTR_ERR(bdev); 1281 return ERR_CAST(bdev);
1283 1282
1284 /* 1283 /*
1285 * once the super is inserted into the list by sget, s_umount 1284 * once the super is inserted into the list by sget, s_umount
@@ -1298,6 +1297,9 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
1298 if (IS_ERR(s)) 1297 if (IS_ERR(s))
1299 goto error_bdev; 1298 goto error_bdev;
1300 1299
1300 if (s->s_root)
1301 close_bdev_exclusive(bdev, mode);
1302
1301 memset(&args, 0, sizeof(args)); 1303 memset(&args, 0, sizeof(args));
1302 args.ar_quota = GFS2_QUOTA_DEFAULT; 1304 args.ar_quota = GFS2_QUOTA_DEFAULT;
1303 args.ar_data = GFS2_DATA_DEFAULT; 1305 args.ar_data = GFS2_DATA_DEFAULT;
@@ -1309,17 +1311,13 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
1309 error = gfs2_mount_args(&args, data); 1311 error = gfs2_mount_args(&args, data);
1310 if (error) { 1312 if (error) {
1311 printk(KERN_WARNING "GFS2: can't parse mount arguments\n"); 1313 printk(KERN_WARNING "GFS2: can't parse mount arguments\n");
1312 if (s->s_root) 1314 goto error_super;
1313 goto error_super;
1314 deactivate_locked_super(s);
1315 return error;
1316 } 1315 }
1317 1316
1318 if (s->s_root) { 1317 if (s->s_root) {
1319 error = -EBUSY; 1318 error = -EBUSY;
1320 if ((flags ^ s->s_flags) & MS_RDONLY) 1319 if ((flags ^ s->s_flags) & MS_RDONLY)
1321 goto error_super; 1320 goto error_super;
1322 close_bdev_exclusive(bdev, mode);
1323 } else { 1321 } else {
1324 char b[BDEVNAME_SIZE]; 1322 char b[BDEVNAME_SIZE];
1325 1323
@@ -1328,27 +1326,24 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
1328 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); 1326 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
1329 sb_set_blocksize(s, block_size(bdev)); 1327 sb_set_blocksize(s, block_size(bdev));
1330 error = fill_super(s, &args, flags & MS_SILENT ? 1 : 0); 1328 error = fill_super(s, &args, flags & MS_SILENT ? 1 : 0);
1331 if (error) { 1329 if (error)
1332 deactivate_locked_super(s); 1330 goto error_super;
1333 return error;
1334 }
1335 s->s_flags |= MS_ACTIVE; 1331 s->s_flags |= MS_ACTIVE;
1336 bdev->bd_super = s; 1332 bdev->bd_super = s;
1337 } 1333 }
1338 1334
1339 sdp = s->s_fs_info; 1335 sdp = s->s_fs_info;
1340 mnt->mnt_sb = s;
1341 if (args.ar_meta) 1336 if (args.ar_meta)
1342 mnt->mnt_root = dget(sdp->sd_master_dir); 1337 return dget(sdp->sd_master_dir);
1343 else 1338 else
1344 mnt->mnt_root = dget(sdp->sd_root_dir); 1339 return dget(sdp->sd_root_dir);
1345 return 0;
1346 1340
1347error_super: 1341error_super:
1348 deactivate_locked_super(s); 1342 deactivate_locked_super(s);
1343 return ERR_PTR(error);
1349error_bdev: 1344error_bdev:
1350 close_bdev_exclusive(bdev, mode); 1345 close_bdev_exclusive(bdev, mode);
1351 return error; 1346 return ERR_PTR(error);
1352} 1347}
1353 1348
1354static int set_meta_super(struct super_block *s, void *ptr) 1349static int set_meta_super(struct super_block *s, void *ptr)
@@ -1356,8 +1351,8 @@ static int set_meta_super(struct super_block *s, void *ptr)
1356 return -EINVAL; 1351 return -EINVAL;
1357} 1352}
1358 1353
1359static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, 1354static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type,
1360 const char *dev_name, void *data, struct vfsmount *mnt) 1355 int flags, const char *dev_name, void *data)
1361{ 1356{
1362 struct super_block *s; 1357 struct super_block *s;
1363 struct gfs2_sbd *sdp; 1358 struct gfs2_sbd *sdp;
@@ -1368,23 +1363,21 @@ static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
1368 if (error) { 1363 if (error) {
1369 printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n", 1364 printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n",
1370 dev_name, error); 1365 dev_name, error);
1371 return error; 1366 return ERR_PTR(error);
1372 } 1367 }
1373 s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super, 1368 s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super,
1374 path.dentry->d_inode->i_sb->s_bdev); 1369 path.dentry->d_inode->i_sb->s_bdev);
1375 path_put(&path); 1370 path_put(&path);
1376 if (IS_ERR(s)) { 1371 if (IS_ERR(s)) {
1377 printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); 1372 printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n");
1378 return PTR_ERR(s); 1373 return ERR_CAST(s);
1379 } 1374 }
1380 if ((flags ^ s->s_flags) & MS_RDONLY) { 1375 if ((flags ^ s->s_flags) & MS_RDONLY) {
1381 deactivate_locked_super(s); 1376 deactivate_locked_super(s);
1382 return -EBUSY; 1377 return ERR_PTR(-EBUSY);
1383 } 1378 }
1384 sdp = s->s_fs_info; 1379 sdp = s->s_fs_info;
1385 mnt->mnt_sb = s; 1380 return dget(sdp->sd_master_dir);
1386 mnt->mnt_root = dget(sdp->sd_master_dir);
1387 return 0;
1388} 1381}
1389 1382
1390static void gfs2_kill_sb(struct super_block *sb) 1383static void gfs2_kill_sb(struct super_block *sb)
@@ -1410,7 +1403,7 @@ static void gfs2_kill_sb(struct super_block *sb)
1410struct file_system_type gfs2_fs_type = { 1403struct file_system_type gfs2_fs_type = {
1411 .name = "gfs2", 1404 .name = "gfs2",
1412 .fs_flags = FS_REQUIRES_DEV, 1405 .fs_flags = FS_REQUIRES_DEV,
1413 .get_sb = gfs2_get_sb, 1406 .mount = gfs2_mount,
1414 .kill_sb = gfs2_kill_sb, 1407 .kill_sb = gfs2_kill_sb,
1415 .owner = THIS_MODULE, 1408 .owner = THIS_MODULE,
1416}; 1409};
@@ -1418,7 +1411,7 @@ struct file_system_type gfs2_fs_type = {
1418struct file_system_type gfs2meta_fs_type = { 1411struct file_system_type gfs2meta_fs_type = {
1419 .name = "gfs2meta", 1412 .name = "gfs2meta",
1420 .fs_flags = FS_REQUIRES_DEV, 1413 .fs_flags = FS_REQUIRES_DEV,
1421 .get_sb = gfs2_get_sb_meta, 1414 .mount = gfs2_mount_meta,
1422 .owner = THIS_MODULE, 1415 .owner = THIS_MODULE,
1423}; 1416};
1424 1417
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 6ee1586f2334..4824c27cebb8 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -441,17 +441,16 @@ bail:
441 return res; 441 return res;
442} 442}
443 443
444static int hfs_get_sb(struct file_system_type *fs_type, 444static struct dentry *hfs_mount(struct file_system_type *fs_type,
445 int flags, const char *dev_name, void *data, 445 int flags, const char *dev_name, void *data)
446 struct vfsmount *mnt)
447{ 446{
448 return get_sb_bdev(fs_type, flags, dev_name, data, hfs_fill_super, mnt); 447 return mount_bdev(fs_type, flags, dev_name, data, hfs_fill_super);
449} 448}
450 449
451static struct file_system_type hfs_fs_type = { 450static struct file_system_type hfs_fs_type = {
452 .owner = THIS_MODULE, 451 .owner = THIS_MODULE,
453 .name = "hfs", 452 .name = "hfs",
454 .get_sb = hfs_get_sb, 453 .mount = hfs_mount,
455 .kill_sb = kill_block_super, 454 .kill_sb = kill_block_super,
456 .fs_flags = FS_REQUIRES_DEV, 455 .fs_flags = FS_REQUIRES_DEV,
457}; 456};
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 9a88d7536103..52cc746d3ba3 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -495,18 +495,16 @@ static void hfsplus_destroy_inode(struct inode *inode)
495 495
496#define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info) 496#define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info)
497 497
498static int hfsplus_get_sb(struct file_system_type *fs_type, 498static struct dentry *hfsplus_mount(struct file_system_type *fs_type,
499 int flags, const char *dev_name, void *data, 499 int flags, const char *dev_name, void *data)
500 struct vfsmount *mnt)
501{ 500{
502 return get_sb_bdev(fs_type, flags, dev_name, data, hfsplus_fill_super, 501 return mount_bdev(fs_type, flags, dev_name, data, hfsplus_fill_super);
503 mnt);
504} 502}
505 503
506static struct file_system_type hfsplus_fs_type = { 504static struct file_system_type hfsplus_fs_type = {
507 .owner = THIS_MODULE, 505 .owner = THIS_MODULE,
508 .name = "hfsplus", 506 .name = "hfsplus",
509 .get_sb = hfsplus_get_sb, 507 .mount = hfsplus_mount,
510 .kill_sb = kill_block_super, 508 .kill_sb = kill_block_super,
511 .fs_flags = FS_REQUIRES_DEV, 509 .fs_flags = FS_REQUIRES_DEV,
512}; 510};
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index cd7c93917cc7..2c0f148a49e6 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -962,11 +962,11 @@ out:
962 return err; 962 return err;
963} 963}
964 964
965static int hostfs_read_sb(struct file_system_type *type, 965static struct dentry *hostfs_read_sb(struct file_system_type *type,
966 int flags, const char *dev_name, 966 int flags, const char *dev_name,
967 void *data, struct vfsmount *mnt) 967 void *data)
968{ 968{
969 return get_sb_nodev(type, flags, data, hostfs_fill_sb_common, mnt); 969 return mount_nodev(type, flags, data, hostfs_fill_sb_common);
970} 970}
971 971
972static void hostfs_kill_sb(struct super_block *s) 972static void hostfs_kill_sb(struct super_block *s)
@@ -978,7 +978,7 @@ static void hostfs_kill_sb(struct super_block *s)
978static struct file_system_type hostfs_type = { 978static struct file_system_type hostfs_type = {
979 .owner = THIS_MODULE, 979 .owner = THIS_MODULE,
980 .name = "hostfs", 980 .name = "hostfs",
981 .get_sb = hostfs_read_sb, 981 .mount = hostfs_read_sb,
982 .kill_sb = hostfs_kill_sb, 982 .kill_sb = hostfs_kill_sb,
983 .fs_flags = 0, 983 .fs_flags = 0,
984}; 984};
diff --git a/fs/hpfs/buffer.c b/fs/hpfs/buffer.c
index eac5f96323e3..793cb9d943d2 100644
--- a/fs/hpfs/buffer.c
+++ b/fs/hpfs/buffer.c
@@ -14,7 +14,7 @@ void hpfs_lock_creation(struct super_block *s)
14#ifdef DEBUG_LOCKS 14#ifdef DEBUG_LOCKS
15 printk("lock creation\n"); 15 printk("lock creation\n");
16#endif 16#endif
17 down(&hpfs_sb(s)->hpfs_creation_de); 17 mutex_lock(&hpfs_sb(s)->hpfs_creation_de);
18} 18}
19 19
20void hpfs_unlock_creation(struct super_block *s) 20void hpfs_unlock_creation(struct super_block *s)
@@ -22,7 +22,7 @@ void hpfs_unlock_creation(struct super_block *s)
22#ifdef DEBUG_LOCKS 22#ifdef DEBUG_LOCKS
23 printk("unlock creation\n"); 23 printk("unlock creation\n");
24#endif 24#endif
25 up(&hpfs_sb(s)->hpfs_creation_de); 25 mutex_unlock(&hpfs_sb(s)->hpfs_creation_de);
26} 26}
27 27
28/* Map a sector into a buffer and return pointers to it and to the buffer. */ 28/* Map a sector into a buffer and return pointers to it and to the buffer. */
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index b59eac0232a0..2fee17d0d9ab 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -87,7 +87,7 @@ struct hpfs_sb_info {
87 unsigned *sb_bmp_dir; /* main bitmap directory */ 87 unsigned *sb_bmp_dir; /* main bitmap directory */
88 unsigned sb_c_bitmap; /* current bitmap */ 88 unsigned sb_c_bitmap; /* current bitmap */
89 unsigned sb_max_fwd_alloc; /* max forwad allocation */ 89 unsigned sb_max_fwd_alloc; /* max forwad allocation */
90 struct semaphore hpfs_creation_de; /* when creating dirents, nobody else 90 struct mutex hpfs_creation_de; /* when creating dirents, nobody else
91 can alloc blocks */ 91 can alloc blocks */
92 /*unsigned sb_mounting : 1;*/ 92 /*unsigned sb_mounting : 1;*/
93 int sb_timeshift; 93 int sb_timeshift;
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index c969a1aa163a..6c5f01597c3a 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -491,7 +491,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
491 sbi->sb_bmp_dir = NULL; 491 sbi->sb_bmp_dir = NULL;
492 sbi->sb_cp_table = NULL; 492 sbi->sb_cp_table = NULL;
493 493
494 init_MUTEX(&sbi->hpfs_creation_de); 494 mutex_init(&sbi->hpfs_creation_de);
495 495
496 uid = current_uid(); 496 uid = current_uid();
497 gid = current_gid(); 497 gid = current_gid();
@@ -686,17 +686,16 @@ bail0:
686 return -EINVAL; 686 return -EINVAL;
687} 687}
688 688
689static int hpfs_get_sb(struct file_system_type *fs_type, 689static struct dentry *hpfs_mount(struct file_system_type *fs_type,
690 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 690 int flags, const char *dev_name, void *data)
691{ 691{
692 return get_sb_bdev(fs_type, flags, dev_name, data, hpfs_fill_super, 692 return mount_bdev(fs_type, flags, dev_name, data, hpfs_fill_super);
693 mnt);
694} 693}
695 694
696static struct file_system_type hpfs_fs_type = { 695static struct file_system_type hpfs_fs_type = {
697 .owner = THIS_MODULE, 696 .owner = THIS_MODULE,
698 .name = "hpfs", 697 .name = "hpfs",
699 .get_sb = hpfs_get_sb, 698 .mount = hpfs_mount,
700 .kill_sb = kill_block_super, 699 .kill_sb = kill_block_super,
701 .fs_flags = FS_REQUIRES_DEV, 700 .fs_flags = FS_REQUIRES_DEV,
702}; 701};
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 4e2a45ea6140..f702b5f713fc 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -748,17 +748,17 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
748 return(err); 748 return(err);
749} 749}
750 750
751static int hppfs_read_super(struct file_system_type *type, 751static struct dentry *hppfs_read_super(struct file_system_type *type,
752 int flags, const char *dev_name, 752 int flags, const char *dev_name,
753 void *data, struct vfsmount *mnt) 753 void *data)
754{ 754{
755 return get_sb_nodev(type, flags, data, hppfs_fill_super, mnt); 755 return mount_nodev(type, flags, data, hppfs_fill_super);
756} 756}
757 757
758static struct file_system_type hppfs_type = { 758static struct file_system_type hppfs_type = {
759 .owner = THIS_MODULE, 759 .owner = THIS_MODULE,
760 .name = "hppfs", 760 .name = "hppfs",
761 .get_sb = hppfs_read_super, 761 .mount = hppfs_read_super,
762 .kill_sb = kill_anon_super, 762 .kill_sb = kill_anon_super,
763 .fs_flags = 0, 763 .fs_flags = 0,
764}; 764};
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index b14be3f781c7..a5fe68189eed 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -896,15 +896,15 @@ void hugetlb_put_quota(struct address_space *mapping, long delta)
896 } 896 }
897} 897}
898 898
899static int hugetlbfs_get_sb(struct file_system_type *fs_type, 899static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type,
900 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 900 int flags, const char *dev_name, void *data)
901{ 901{
902 return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super, mnt); 902 return mount_nodev(fs_type, flags, data, hugetlbfs_fill_super);
903} 903}
904 904
905static struct file_system_type hugetlbfs_fs_type = { 905static struct file_system_type hugetlbfs_fs_type = {
906 .name = "hugetlbfs", 906 .name = "hugetlbfs",
907 .get_sb = hugetlbfs_get_sb, 907 .mount = hugetlbfs_mount,
908 .kill_sb = kill_litter_super, 908 .kill_sb = kill_litter_super,
909}; 909};
910 910
@@ -932,8 +932,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
932 if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) { 932 if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
933 *user = current_user(); 933 *user = current_user();
934 if (user_shm_lock(size, *user)) { 934 if (user_shm_lock(size, *user)) {
935 WARN_ONCE(1, 935 printk_once(KERN_WARNING "Using mlock ulimits for SHM_HUGETLB is deprecated\n");
936 "Using mlock ulimits for SHM_HUGETLB deprecated\n");
937 } else { 936 } else {
938 *user = NULL; 937 *user = NULL;
939 return ERR_PTR(-EPERM); 938 return ERR_PTR(-EPERM);
diff --git a/fs/internal.h b/fs/internal.h
index ebad3b90752d..e43b9a4dbf4e 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -106,5 +106,5 @@ extern void release_open_intent(struct nameidata *);
106 * inode.c 106 * inode.c
107 */ 107 */
108extern int get_nr_dirty_inodes(void); 108extern int get_nr_dirty_inodes(void);
109extern int evict_inodes(struct super_block *); 109extern void evict_inodes(struct super_block *);
110extern int invalidate_inodes(struct super_block *); 110extern int invalidate_inodes(struct super_block *);
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 748cfb92dcc6..2f7d05c89922 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -111,12 +111,14 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
111 read_lock(&tasklist_lock); 111 read_lock(&tasklist_lock);
112 switch (which) { 112 switch (which) {
113 case IOPRIO_WHO_PROCESS: 113 case IOPRIO_WHO_PROCESS:
114 rcu_read_lock();
114 if (!who) 115 if (!who)
115 p = current; 116 p = current;
116 else 117 else
117 p = find_task_by_vpid(who); 118 p = find_task_by_vpid(who);
118 if (p) 119 if (p)
119 ret = set_task_ioprio(p, ioprio); 120 ret = set_task_ioprio(p, ioprio);
121 rcu_read_unlock();
120 break; 122 break;
121 case IOPRIO_WHO_PGRP: 123 case IOPRIO_WHO_PGRP:
122 if (!who) 124 if (!who)
@@ -139,7 +141,12 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
139 break; 141 break;
140 142
141 do_each_thread(g, p) { 143 do_each_thread(g, p) {
142 if (__task_cred(p)->uid != who) 144 int match;
145
146 rcu_read_lock();
147 match = __task_cred(p)->uid == who;
148 rcu_read_unlock();
149 if (!match)
143 continue; 150 continue;
144 ret = set_task_ioprio(p, ioprio); 151 ret = set_task_ioprio(p, ioprio);
145 if (ret) 152 if (ret)
@@ -200,12 +207,14 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
200 read_lock(&tasklist_lock); 207 read_lock(&tasklist_lock);
201 switch (which) { 208 switch (which) {
202 case IOPRIO_WHO_PROCESS: 209 case IOPRIO_WHO_PROCESS:
210 rcu_read_lock();
203 if (!who) 211 if (!who)
204 p = current; 212 p = current;
205 else 213 else
206 p = find_task_by_vpid(who); 214 p = find_task_by_vpid(who);
207 if (p) 215 if (p)
208 ret = get_task_ioprio(p); 216 ret = get_task_ioprio(p);
217 rcu_read_unlock();
209 break; 218 break;
210 case IOPRIO_WHO_PGRP: 219 case IOPRIO_WHO_PGRP:
211 if (!who) 220 if (!who)
@@ -232,7 +241,12 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
232 break; 241 break;
233 242
234 do_each_thread(g, p) { 243 do_each_thread(g, p) {
235 if (__task_cred(p)->uid != user->uid) 244 int match;
245
246 rcu_read_lock();
247 match = __task_cred(p)->uid == user->uid;
248 rcu_read_unlock();
249 if (!match)
236 continue; 250 continue;
237 tmpio = get_task_ioprio(p); 251 tmpio = get_task_ioprio(p);
238 if (tmpio < 0) 252 if (tmpio < 0)
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 79cf7f616bbe..bfdeb82a53be 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -1507,17 +1507,16 @@ struct inode *isofs_iget(struct super_block *sb,
1507 return inode; 1507 return inode;
1508} 1508}
1509 1509
1510static int isofs_get_sb(struct file_system_type *fs_type, 1510static struct dentry *isofs_mount(struct file_system_type *fs_type,
1511 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 1511 int flags, const char *dev_name, void *data)
1512{ 1512{
1513 return get_sb_bdev(fs_type, flags, dev_name, data, isofs_fill_super, 1513 return mount_bdev(fs_type, flags, dev_name, data, isofs_fill_super);
1514 mnt);
1515} 1514}
1516 1515
1517static struct file_system_type iso9660_fs_type = { 1516static struct file_system_type iso9660_fs_type = {
1518 .owner = THIS_MODULE, 1517 .owner = THIS_MODULE,
1519 .name = "iso9660", 1518 .name = "iso9660",
1520 .get_sb = isofs_get_sb, 1519 .mount = isofs_mount,
1521 .kill_sb = kill_block_super, 1520 .kill_sb = kill_block_super,
1522 .fs_flags = FS_REQUIRES_DEV, 1521 .fs_flags = FS_REQUIRES_DEV,
1523}; 1522};
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 538417c1fdbb..c590d155c095 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1838,7 +1838,6 @@ size_t journal_tag_bytes(journal_t *journal)
1838 */ 1838 */
1839#define JBD2_MAX_SLABS 8 1839#define JBD2_MAX_SLABS 8
1840static struct kmem_cache *jbd2_slab[JBD2_MAX_SLABS]; 1840static struct kmem_cache *jbd2_slab[JBD2_MAX_SLABS];
1841static DECLARE_MUTEX(jbd2_slab_create_sem);
1842 1841
1843static const char *jbd2_slab_names[JBD2_MAX_SLABS] = { 1842static const char *jbd2_slab_names[JBD2_MAX_SLABS] = {
1844 "jbd2_1k", "jbd2_2k", "jbd2_4k", "jbd2_8k", 1843 "jbd2_1k", "jbd2_2k", "jbd2_4k", "jbd2_8k",
@@ -1859,6 +1858,7 @@ static void jbd2_journal_destroy_slabs(void)
1859 1858
1860static int jbd2_journal_create_slab(size_t size) 1859static int jbd2_journal_create_slab(size_t size)
1861{ 1860{
1861 static DEFINE_MUTEX(jbd2_slab_create_mutex);
1862 int i = order_base_2(size) - 10; 1862 int i = order_base_2(size) - 10;
1863 size_t slab_size; 1863 size_t slab_size;
1864 1864
@@ -1870,16 +1870,16 @@ static int jbd2_journal_create_slab(size_t size)
1870 1870
1871 if (unlikely(i < 0)) 1871 if (unlikely(i < 0))
1872 i = 0; 1872 i = 0;
1873 down(&jbd2_slab_create_sem); 1873 mutex_lock(&jbd2_slab_create_mutex);
1874 if (jbd2_slab[i]) { 1874 if (jbd2_slab[i]) {
1875 up(&jbd2_slab_create_sem); 1875 mutex_unlock(&jbd2_slab_create_mutex);
1876 return 0; /* Already created */ 1876 return 0; /* Already created */
1877 } 1877 }
1878 1878
1879 slab_size = 1 << (i+10); 1879 slab_size = 1 << (i+10);
1880 jbd2_slab[i] = kmem_cache_create(jbd2_slab_names[i], slab_size, 1880 jbd2_slab[i] = kmem_cache_create(jbd2_slab_names[i], slab_size,
1881 slab_size, 0, NULL); 1881 slab_size, 0, NULL);
1882 up(&jbd2_slab_create_sem); 1882 mutex_unlock(&jbd2_slab_create_mutex);
1883 if (!jbd2_slab[i]) { 1883 if (!jbd2_slab[i]) {
1884 printk(KERN_EMERG "JBD2: no memory for jbd2_slab cache\n"); 1884 printk(KERN_EMERG "JBD2: no memory for jbd2_slab cache\n");
1885 return -ENOMEM; 1885 return -ENOMEM;
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index a906f538d11c..85c6be2db02f 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -23,7 +23,7 @@ static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *,
23static inline struct jffs2_inode_cache * 23static inline struct jffs2_inode_cache *
24first_inode_chain(int *i, struct jffs2_sb_info *c) 24first_inode_chain(int *i, struct jffs2_sb_info *c)
25{ 25{
26 for (; *i < INOCACHE_HASHSIZE; (*i)++) { 26 for (; *i < c->inocache_hashsize; (*i)++) {
27 if (c->inocache_list[*i]) 27 if (c->inocache_list[*i])
28 return c->inocache_list[*i]; 28 return c->inocache_list[*i];
29 } 29 }
diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c
index 617a1e5694c1..de4247021d25 100644
--- a/fs/jffs2/compr.c
+++ b/fs/jffs2/compr.c
@@ -103,7 +103,7 @@ uint16_t jffs2_compress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
103 spin_unlock(&jffs2_compressor_list_lock); 103 spin_unlock(&jffs2_compressor_list_lock);
104 *datalen = orig_slen; 104 *datalen = orig_slen;
105 *cdatalen = orig_dlen; 105 *cdatalen = orig_dlen;
106 compr_ret = this->compress(data_in, output_buf, datalen, cdatalen, NULL); 106 compr_ret = this->compress(data_in, output_buf, datalen, cdatalen);
107 spin_lock(&jffs2_compressor_list_lock); 107 spin_lock(&jffs2_compressor_list_lock);
108 this->usecount--; 108 this->usecount--;
109 if (!compr_ret) { 109 if (!compr_ret) {
@@ -152,7 +152,7 @@ uint16_t jffs2_compress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
152 spin_unlock(&jffs2_compressor_list_lock); 152 spin_unlock(&jffs2_compressor_list_lock);
153 *datalen = orig_slen; 153 *datalen = orig_slen;
154 *cdatalen = orig_dlen; 154 *cdatalen = orig_dlen;
155 compr_ret = this->compress(data_in, this->compr_buf, datalen, cdatalen, NULL); 155 compr_ret = this->compress(data_in, this->compr_buf, datalen, cdatalen);
156 spin_lock(&jffs2_compressor_list_lock); 156 spin_lock(&jffs2_compressor_list_lock);
157 this->usecount--; 157 this->usecount--;
158 if (!compr_ret) { 158 if (!compr_ret) {
@@ -220,7 +220,7 @@ int jffs2_decompress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
220 if (comprtype == this->compr) { 220 if (comprtype == this->compr) {
221 this->usecount++; 221 this->usecount++;
222 spin_unlock(&jffs2_compressor_list_lock); 222 spin_unlock(&jffs2_compressor_list_lock);
223 ret = this->decompress(cdata_in, data_out, cdatalen, datalen, NULL); 223 ret = this->decompress(cdata_in, data_out, cdatalen, datalen);
224 spin_lock(&jffs2_compressor_list_lock); 224 spin_lock(&jffs2_compressor_list_lock);
225 if (ret) { 225 if (ret) {
226 printk(KERN_WARNING "Decompressor \"%s\" returned %d\n", this->name, ret); 226 printk(KERN_WARNING "Decompressor \"%s\" returned %d\n", this->name, ret);
diff --git a/fs/jffs2/compr.h b/fs/jffs2/compr.h
index e471a9106fd9..13bb7597ab39 100644
--- a/fs/jffs2/compr.h
+++ b/fs/jffs2/compr.h
@@ -49,9 +49,9 @@ struct jffs2_compressor {
49 char *name; 49 char *name;
50 char compr; /* JFFS2_COMPR_XXX */ 50 char compr; /* JFFS2_COMPR_XXX */
51 int (*compress)(unsigned char *data_in, unsigned char *cpage_out, 51 int (*compress)(unsigned char *data_in, unsigned char *cpage_out,
52 uint32_t *srclen, uint32_t *destlen, void *model); 52 uint32_t *srclen, uint32_t *destlen);
53 int (*decompress)(unsigned char *cdata_in, unsigned char *data_out, 53 int (*decompress)(unsigned char *cdata_in, unsigned char *data_out,
54 uint32_t cdatalen, uint32_t datalen, void *model); 54 uint32_t cdatalen, uint32_t datalen);
55 int usecount; 55 int usecount;
56 int disabled; /* if set the compressor won't compress */ 56 int disabled; /* if set the compressor won't compress */
57 unsigned char *compr_buf; /* used by size compr. mode */ 57 unsigned char *compr_buf; /* used by size compr. mode */
diff --git a/fs/jffs2/compr_lzo.c b/fs/jffs2/compr_lzo.c
index ed25ae7c98eb..af186ee674d8 100644
--- a/fs/jffs2/compr_lzo.c
+++ b/fs/jffs2/compr_lzo.c
@@ -42,7 +42,7 @@ static int __init alloc_workspace(void)
42} 42}
43 43
44static int jffs2_lzo_compress(unsigned char *data_in, unsigned char *cpage_out, 44static int jffs2_lzo_compress(unsigned char *data_in, unsigned char *cpage_out,
45 uint32_t *sourcelen, uint32_t *dstlen, void *model) 45 uint32_t *sourcelen, uint32_t *dstlen)
46{ 46{
47 size_t compress_size; 47 size_t compress_size;
48 int ret; 48 int ret;
@@ -67,7 +67,7 @@ static int jffs2_lzo_compress(unsigned char *data_in, unsigned char *cpage_out,
67} 67}
68 68
69static int jffs2_lzo_decompress(unsigned char *data_in, unsigned char *cpage_out, 69static int jffs2_lzo_decompress(unsigned char *data_in, unsigned char *cpage_out,
70 uint32_t srclen, uint32_t destlen, void *model) 70 uint32_t srclen, uint32_t destlen)
71{ 71{
72 size_t dl = destlen; 72 size_t dl = destlen;
73 int ret; 73 int ret;
diff --git a/fs/jffs2/compr_rtime.c b/fs/jffs2/compr_rtime.c
index 9696ad9ef5f7..16a5047903a6 100644
--- a/fs/jffs2/compr_rtime.c
+++ b/fs/jffs2/compr_rtime.c
@@ -31,8 +31,7 @@
31/* _compress returns the compressed size, -1 if bigger */ 31/* _compress returns the compressed size, -1 if bigger */
32static int jffs2_rtime_compress(unsigned char *data_in, 32static int jffs2_rtime_compress(unsigned char *data_in,
33 unsigned char *cpage_out, 33 unsigned char *cpage_out,
34 uint32_t *sourcelen, uint32_t *dstlen, 34 uint32_t *sourcelen, uint32_t *dstlen)
35 void *model)
36{ 35{
37 short positions[256]; 36 short positions[256];
38 int outpos = 0; 37 int outpos = 0;
@@ -73,8 +72,7 @@ static int jffs2_rtime_compress(unsigned char *data_in,
73 72
74static int jffs2_rtime_decompress(unsigned char *data_in, 73static int jffs2_rtime_decompress(unsigned char *data_in,
75 unsigned char *cpage_out, 74 unsigned char *cpage_out,
76 uint32_t srclen, uint32_t destlen, 75 uint32_t srclen, uint32_t destlen)
77 void *model)
78{ 76{
79 short positions[256]; 77 short positions[256];
80 int outpos = 0; 78 int outpos = 0;
diff --git a/fs/jffs2/compr_rubin.c b/fs/jffs2/compr_rubin.c
index a12b4f763373..9e7cec808c4c 100644
--- a/fs/jffs2/compr_rubin.c
+++ b/fs/jffs2/compr_rubin.c
@@ -298,7 +298,7 @@ static int rubin_do_compress(int bit_divider, int *bits, unsigned char *data_in,
298#if 0 298#if 0
299/* _compress returns the compressed size, -1 if bigger */ 299/* _compress returns the compressed size, -1 if bigger */
300int jffs2_rubinmips_compress(unsigned char *data_in, unsigned char *cpage_out, 300int jffs2_rubinmips_compress(unsigned char *data_in, unsigned char *cpage_out,
301 uint32_t *sourcelen, uint32_t *dstlen, void *model) 301 uint32_t *sourcelen, uint32_t *dstlen)
302{ 302{
303 return rubin_do_compress(BIT_DIVIDER_MIPS, bits_mips, data_in, 303 return rubin_do_compress(BIT_DIVIDER_MIPS, bits_mips, data_in,
304 cpage_out, sourcelen, dstlen); 304 cpage_out, sourcelen, dstlen);
@@ -306,8 +306,7 @@ int jffs2_rubinmips_compress(unsigned char *data_in, unsigned char *cpage_out,
306#endif 306#endif
307static int jffs2_dynrubin_compress(unsigned char *data_in, 307static int jffs2_dynrubin_compress(unsigned char *data_in,
308 unsigned char *cpage_out, 308 unsigned char *cpage_out,
309 uint32_t *sourcelen, uint32_t *dstlen, 309 uint32_t *sourcelen, uint32_t *dstlen)
310 void *model)
311{ 310{
312 int bits[8]; 311 int bits[8];
313 unsigned char histo[256]; 312 unsigned char histo[256];
@@ -387,8 +386,7 @@ static void rubin_do_decompress(int bit_divider, int *bits,
387 386
388static int jffs2_rubinmips_decompress(unsigned char *data_in, 387static int jffs2_rubinmips_decompress(unsigned char *data_in,
389 unsigned char *cpage_out, 388 unsigned char *cpage_out,
390 uint32_t sourcelen, uint32_t dstlen, 389 uint32_t sourcelen, uint32_t dstlen)
391 void *model)
392{ 390{
393 rubin_do_decompress(BIT_DIVIDER_MIPS, bits_mips, data_in, 391 rubin_do_decompress(BIT_DIVIDER_MIPS, bits_mips, data_in,
394 cpage_out, sourcelen, dstlen); 392 cpage_out, sourcelen, dstlen);
@@ -397,8 +395,7 @@ static int jffs2_rubinmips_decompress(unsigned char *data_in,
397 395
398static int jffs2_dynrubin_decompress(unsigned char *data_in, 396static int jffs2_dynrubin_decompress(unsigned char *data_in,
399 unsigned char *cpage_out, 397 unsigned char *cpage_out,
400 uint32_t sourcelen, uint32_t dstlen, 398 uint32_t sourcelen, uint32_t dstlen)
401 void *model)
402{ 399{
403 int bits[8]; 400 int bits[8];
404 int c; 401 int c;
diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c
index 97fc45de6f81..fd05a0b9431d 100644
--- a/fs/jffs2/compr_zlib.c
+++ b/fs/jffs2/compr_zlib.c
@@ -68,8 +68,7 @@ static void free_workspaces(void)
68 68
69static int jffs2_zlib_compress(unsigned char *data_in, 69static int jffs2_zlib_compress(unsigned char *data_in,
70 unsigned char *cpage_out, 70 unsigned char *cpage_out,
71 uint32_t *sourcelen, uint32_t *dstlen, 71 uint32_t *sourcelen, uint32_t *dstlen)
72 void *model)
73{ 72{
74 int ret; 73 int ret;
75 74
@@ -136,8 +135,7 @@ static int jffs2_zlib_compress(unsigned char *data_in,
136 135
137static int jffs2_zlib_decompress(unsigned char *data_in, 136static int jffs2_zlib_decompress(unsigned char *data_in,
138 unsigned char *cpage_out, 137 unsigned char *cpage_out,
139 uint32_t srclen, uint32_t destlen, 138 uint32_t srclen, uint32_t destlen)
140 void *model)
141{ 139{
142 int ret; 140 int ret;
143 int wbits = MAX_WBITS; 141 int wbits = MAX_WBITS;
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 79121aa5858b..92978658ed18 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -367,7 +367,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
367 } 367 }
368 368
369 /* We use f->target field to store the target path. */ 369 /* We use f->target field to store the target path. */
370 f->target = kmalloc(targetlen + 1, GFP_KERNEL); 370 f->target = kmemdup(target, targetlen + 1, GFP_KERNEL);
371 if (!f->target) { 371 if (!f->target) {
372 printk(KERN_WARNING "Can't allocate %d bytes of memory\n", targetlen + 1); 372 printk(KERN_WARNING "Can't allocate %d bytes of memory\n", targetlen + 1);
373 mutex_unlock(&f->sem); 373 mutex_unlock(&f->sem);
@@ -376,7 +376,6 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
376 goto fail; 376 goto fail;
377 } 377 }
378 378
379 memcpy(f->target, target, targetlen + 1);
380 D1(printk(KERN_DEBUG "jffs2_symlink: symlink's target '%s' cached\n", (char *)f->target)); 379 D1(printk(KERN_DEBUG "jffs2_symlink: symlink's target '%s' cached\n", (char *)f->target));
381 380
382 /* No data here. Only a metadata node, which will be 381 /* No data here. Only a metadata node, which will be
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index abac961f617b..e513f1913c15 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -151,7 +151,7 @@ int jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count)
151 } 151 }
152 152
153 /* Be nice */ 153 /* Be nice */
154 yield(); 154 cond_resched();
155 mutex_lock(&c->erase_free_sem); 155 mutex_lock(&c->erase_free_sem);
156 spin_lock(&c->erase_completion_lock); 156 spin_lock(&c->erase_completion_lock);
157 } 157 }
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index d9beb06e6fca..e896e67767eb 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -474,6 +474,25 @@ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_i
474 return inode; 474 return inode;
475} 475}
476 476
477static int calculate_inocache_hashsize(uint32_t flash_size)
478{
479 /*
480 * Pick a inocache hash size based on the size of the medium.
481 * Count how many megabytes we're dealing with, apply a hashsize twice
482 * that size, but rounding down to the usual big powers of 2. And keep
483 * to sensible bounds.
484 */
485
486 int size_mb = flash_size / 1024 / 1024;
487 int hashsize = (size_mb * 2) & ~0x3f;
488
489 if (hashsize < INOCACHE_HASHSIZE_MIN)
490 return INOCACHE_HASHSIZE_MIN;
491 if (hashsize > INOCACHE_HASHSIZE_MAX)
492 return INOCACHE_HASHSIZE_MAX;
493
494 return hashsize;
495}
477 496
478int jffs2_do_fill_super(struct super_block *sb, void *data, int silent) 497int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
479{ 498{
@@ -520,7 +539,8 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
520 if (ret) 539 if (ret)
521 return ret; 540 return ret;
522 541
523 c->inocache_list = kcalloc(INOCACHE_HASHSIZE, sizeof(struct jffs2_inode_cache *), GFP_KERNEL); 542 c->inocache_hashsize = calculate_inocache_hashsize(c->flash_size);
543 c->inocache_list = kcalloc(c->inocache_hashsize, sizeof(struct jffs2_inode_cache *), GFP_KERNEL);
524 if (!c->inocache_list) { 544 if (!c->inocache_list) {
525 ret = -ENOMEM; 545 ret = -ENOMEM;
526 goto out_wbuf; 546 goto out_wbuf;
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index 846a79452497..31dce611337c 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -219,13 +219,14 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
219 if (!list_empty(&c->erase_complete_list) || 219 if (!list_empty(&c->erase_complete_list) ||
220 !list_empty(&c->erase_pending_list)) { 220 !list_empty(&c->erase_pending_list)) {
221 spin_unlock(&c->erase_completion_lock); 221 spin_unlock(&c->erase_completion_lock);
222 mutex_unlock(&c->alloc_sem);
222 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() erasing pending blocks\n")); 223 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() erasing pending blocks\n"));
223 if (jffs2_erase_pending_blocks(c, 1)) { 224 if (jffs2_erase_pending_blocks(c, 1))
224 mutex_unlock(&c->alloc_sem);
225 return 0; 225 return 0;
226 } 226
227 D1(printk(KERN_DEBUG "No progress from erasing blocks; doing GC anyway\n")); 227 D1(printk(KERN_DEBUG "No progress from erasing blocks; doing GC anyway\n"));
228 spin_lock(&c->erase_completion_lock); 228 spin_lock(&c->erase_completion_lock);
229 mutex_lock(&c->alloc_sem);
229 } 230 }
230 231
231 /* First, work out which block we're garbage-collecting */ 232 /* First, work out which block we're garbage-collecting */
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h
index 6784bc89add1..f864005de64c 100644
--- a/fs/jffs2/jffs2_fs_sb.h
+++ b/fs/jffs2/jffs2_fs_sb.h
@@ -100,6 +100,7 @@ struct jffs2_sb_info {
100 wait_queue_head_t erase_wait; /* For waiting for erases to complete */ 100 wait_queue_head_t erase_wait; /* For waiting for erases to complete */
101 101
102 wait_queue_head_t inocache_wq; 102 wait_queue_head_t inocache_wq;
103 int inocache_hashsize;
103 struct jffs2_inode_cache **inocache_list; 104 struct jffs2_inode_cache **inocache_list;
104 spinlock_t inocache_lock; 105 spinlock_t inocache_lock;
105 106
diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c
index af02bd138469..5e03233c2363 100644
--- a/fs/jffs2/nodelist.c
+++ b/fs/jffs2/nodelist.c
@@ -420,7 +420,7 @@ struct jffs2_inode_cache *jffs2_get_ino_cache(struct jffs2_sb_info *c, uint32_t
420{ 420{
421 struct jffs2_inode_cache *ret; 421 struct jffs2_inode_cache *ret;
422 422
423 ret = c->inocache_list[ino % INOCACHE_HASHSIZE]; 423 ret = c->inocache_list[ino % c->inocache_hashsize];
424 while (ret && ret->ino < ino) { 424 while (ret && ret->ino < ino) {
425 ret = ret->next; 425 ret = ret->next;
426 } 426 }
@@ -441,7 +441,7 @@ void jffs2_add_ino_cache (struct jffs2_sb_info *c, struct jffs2_inode_cache *new
441 441
442 dbg_inocache("add %p (ino #%u)\n", new, new->ino); 442 dbg_inocache("add %p (ino #%u)\n", new, new->ino);
443 443
444 prev = &c->inocache_list[new->ino % INOCACHE_HASHSIZE]; 444 prev = &c->inocache_list[new->ino % c->inocache_hashsize];
445 445
446 while ((*prev) && (*prev)->ino < new->ino) { 446 while ((*prev) && (*prev)->ino < new->ino) {
447 prev = &(*prev)->next; 447 prev = &(*prev)->next;
@@ -462,7 +462,7 @@ void jffs2_del_ino_cache(struct jffs2_sb_info *c, struct jffs2_inode_cache *old)
462 dbg_inocache("del %p (ino #%u)\n", old, old->ino); 462 dbg_inocache("del %p (ino #%u)\n", old, old->ino);
463 spin_lock(&c->inocache_lock); 463 spin_lock(&c->inocache_lock);
464 464
465 prev = &c->inocache_list[old->ino % INOCACHE_HASHSIZE]; 465 prev = &c->inocache_list[old->ino % c->inocache_hashsize];
466 466
467 while ((*prev) && (*prev)->ino < old->ino) { 467 while ((*prev) && (*prev)->ino < old->ino) {
468 prev = &(*prev)->next; 468 prev = &(*prev)->next;
@@ -487,7 +487,7 @@ void jffs2_free_ino_caches(struct jffs2_sb_info *c)
487 int i; 487 int i;
488 struct jffs2_inode_cache *this, *next; 488 struct jffs2_inode_cache *this, *next;
489 489
490 for (i=0; i<INOCACHE_HASHSIZE; i++) { 490 for (i=0; i < c->inocache_hashsize; i++) {
491 this = c->inocache_list[i]; 491 this = c->inocache_list[i];
492 while (this) { 492 while (this) {
493 next = this->next; 493 next = this->next;
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index 523a91691052..5a53d9bdb2b5 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -199,7 +199,8 @@ struct jffs2_inode_cache {
199#define RAWNODE_CLASS_XATTR_DATUM 1 199#define RAWNODE_CLASS_XATTR_DATUM 1
200#define RAWNODE_CLASS_XATTR_REF 2 200#define RAWNODE_CLASS_XATTR_REF 2
201 201
202#define INOCACHE_HASHSIZE 128 202#define INOCACHE_HASHSIZE_MIN 128
203#define INOCACHE_HASHSIZE_MAX 1024
203 204
204#define write_ofs(c) ((c)->nextblock->offset + (c)->sector_size - (c)->nextblock->free_size) 205#define write_ofs(c) ((c)->nextblock->offset + (c)->sector_size - (c)->nextblock->free_size)
205 206
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 46f870d1cc36..b632dddcb482 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -20,7 +20,7 @@
20#include "summary.h" 20#include "summary.h"
21#include "debug.h" 21#include "debug.h"
22 22
23#define DEFAULT_EMPTY_SCAN_SIZE 1024 23#define DEFAULT_EMPTY_SCAN_SIZE 256
24 24
25#define noisy_printk(noise, args...) do { \ 25#define noisy_printk(noise, args...) do { \
26 if (*(noise)) { \ 26 if (*(noise)) { \
@@ -435,7 +435,7 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
435 unsigned char *buf, uint32_t buf_size, struct jffs2_summary *s) { 435 unsigned char *buf, uint32_t buf_size, struct jffs2_summary *s) {
436 struct jffs2_unknown_node *node; 436 struct jffs2_unknown_node *node;
437 struct jffs2_unknown_node crcnode; 437 struct jffs2_unknown_node crcnode;
438 uint32_t ofs, prevofs; 438 uint32_t ofs, prevofs, max_ofs;
439 uint32_t hdr_crc, buf_ofs, buf_len; 439 uint32_t hdr_crc, buf_ofs, buf_len;
440 int err; 440 int err;
441 int noise = 0; 441 int noise = 0;
@@ -550,12 +550,12 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
550 550
551 /* We temporarily use 'ofs' as a pointer into the buffer/jeb */ 551 /* We temporarily use 'ofs' as a pointer into the buffer/jeb */
552 ofs = 0; 552 ofs = 0;
553 553 max_ofs = EMPTY_SCAN_SIZE(c->sector_size);
554 /* Scan only 4KiB of 0xFF before declaring it's empty */ 554 /* Scan only EMPTY_SCAN_SIZE of 0xFF before declaring it's empty */
555 while(ofs < EMPTY_SCAN_SIZE(c->sector_size) && *(uint32_t *)(&buf[ofs]) == 0xFFFFFFFF) 555 while(ofs < max_ofs && *(uint32_t *)(&buf[ofs]) == 0xFFFFFFFF)
556 ofs += 4; 556 ofs += 4;
557 557
558 if (ofs == EMPTY_SCAN_SIZE(c->sector_size)) { 558 if (ofs == max_ofs) {
559#ifdef CONFIG_JFFS2_FS_WRITEBUFFER 559#ifdef CONFIG_JFFS2_FS_WRITEBUFFER
560 if (jffs2_cleanmarker_oob(c)) { 560 if (jffs2_cleanmarker_oob(c)) {
561 /* scan oob, take care of cleanmarker */ 561 /* scan oob, take care of cleanmarker */
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index d1ae5dfc22b9..c86041b866a4 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -179,12 +179,11 @@ static int jffs2_fill_super(struct super_block *sb, void *data, int silent)
179 return ret; 179 return ret;
180} 180}
181 181
182static int jffs2_get_sb(struct file_system_type *fs_type, 182static struct dentry *jffs2_mount(struct file_system_type *fs_type,
183 int flags, const char *dev_name, 183 int flags, const char *dev_name,
184 void *data, struct vfsmount *mnt) 184 void *data)
185{ 185{
186 return get_sb_mtd(fs_type, flags, dev_name, data, jffs2_fill_super, 186 return mount_mtd(fs_type, flags, dev_name, data, jffs2_fill_super);
187 mnt);
188} 187}
189 188
190static void jffs2_put_super (struct super_block *sb) 189static void jffs2_put_super (struct super_block *sb)
@@ -229,7 +228,7 @@ static void jffs2_kill_sb(struct super_block *sb)
229static struct file_system_type jffs2_fs_type = { 228static struct file_system_type jffs2_fs_type = {
230 .owner = THIS_MODULE, 229 .owner = THIS_MODULE,
231 .name = "jffs2", 230 .name = "jffs2",
232 .get_sb = jffs2_get_sb, 231 .mount = jffs2_mount,
233 .kill_sb = jffs2_kill_sb, 232 .kill_sb = jffs2_kill_sb,
234}; 233};
235 234
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 68eee2bf629e..0669fc1cc3bf 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -583,11 +583,10 @@ static int jfs_unfreeze(struct super_block *sb)
583 return 0; 583 return 0;
584} 584}
585 585
586static int jfs_get_sb(struct file_system_type *fs_type, 586static struct dentry *jfs_do_mount(struct file_system_type *fs_type,
587 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 587 int flags, const char *dev_name, void *data)
588{ 588{
589 return get_sb_bdev(fs_type, flags, dev_name, data, jfs_fill_super, 589 return mount_bdev(fs_type, flags, dev_name, data, jfs_fill_super);
590 mnt);
591} 590}
592 591
593static int jfs_sync_fs(struct super_block *sb, int wait) 592static int jfs_sync_fs(struct super_block *sb, int wait)
@@ -770,7 +769,7 @@ static const struct export_operations jfs_export_operations = {
770static struct file_system_type jfs_fs_type = { 769static struct file_system_type jfs_fs_type = {
771 .owner = THIS_MODULE, 770 .owner = THIS_MODULE,
772 .name = "jfs", 771 .name = "jfs",
773 .get_sb = jfs_get_sb, 772 .mount = jfs_do_mount,
774 .kill_sb = kill_block_super, 773 .kill_sb = kill_block_super,
775 .fs_flags = FS_REQUIRES_DEV, 774 .fs_flags = FS_REQUIRES_DEV,
776}; 775};
diff --git a/fs/libfs.c b/fs/libfs.c
index 304a5132ca27..a3accdf528ad 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -201,9 +201,8 @@ static const struct super_operations simple_super_operations = {
201 * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that 201 * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that
202 * will never be mountable) 202 * will never be mountable)
203 */ 203 */
204int get_sb_pseudo(struct file_system_type *fs_type, char *name, 204struct dentry *mount_pseudo(struct file_system_type *fs_type, char *name,
205 const struct super_operations *ops, unsigned long magic, 205 const struct super_operations *ops, unsigned long magic)
206 struct vfsmount *mnt)
207{ 206{
208 struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); 207 struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
209 struct dentry *dentry; 208 struct dentry *dentry;
@@ -211,7 +210,7 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
211 struct qstr d_name = {.name = name, .len = strlen(name)}; 210 struct qstr d_name = {.name = name, .len = strlen(name)};
212 211
213 if (IS_ERR(s)) 212 if (IS_ERR(s))
214 return PTR_ERR(s); 213 return ERR_CAST(s);
215 214
216 s->s_flags = MS_NOUSER; 215 s->s_flags = MS_NOUSER;
217 s->s_maxbytes = MAX_LFS_FILESIZE; 216 s->s_maxbytes = MAX_LFS_FILESIZE;
@@ -241,12 +240,11 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
241 d_instantiate(dentry, root); 240 d_instantiate(dentry, root);
242 s->s_root = dentry; 241 s->s_root = dentry;
243 s->s_flags |= MS_ACTIVE; 242 s->s_flags |= MS_ACTIVE;
244 simple_set_mnt(mnt, s); 243 return dget(s->s_root);
245 return 0;
246 244
247Enomem: 245Enomem:
248 deactivate_locked_super(s); 246 deactivate_locked_super(s);
249 return -ENOMEM; 247 return ERR_PTR(-ENOMEM);
250} 248}
251 249
252int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) 250int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
@@ -951,7 +949,7 @@ EXPORT_SYMBOL(dcache_dir_lseek);
951EXPORT_SYMBOL(dcache_dir_open); 949EXPORT_SYMBOL(dcache_dir_open);
952EXPORT_SYMBOL(dcache_readdir); 950EXPORT_SYMBOL(dcache_readdir);
953EXPORT_SYMBOL(generic_read_dir); 951EXPORT_SYMBOL(generic_read_dir);
954EXPORT_SYMBOL(get_sb_pseudo); 952EXPORT_SYMBOL(mount_pseudo);
955EXPORT_SYMBOL(simple_write_begin); 953EXPORT_SYMBOL(simple_write_begin);
956EXPORT_SYMBOL(simple_write_end); 954EXPORT_SYMBOL(simple_write_end);
957EXPORT_SYMBOL(simple_dir_inode_operations); 955EXPORT_SYMBOL(simple_dir_inode_operations);
diff --git a/fs/locks.c b/fs/locks.c
index 50ec15927aab..0e62dd35d088 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -186,7 +186,7 @@ void locks_release_private(struct file_lock *fl)
186EXPORT_SYMBOL_GPL(locks_release_private); 186EXPORT_SYMBOL_GPL(locks_release_private);
187 187
188/* Free a lock which is not in use. */ 188/* Free a lock which is not in use. */
189static void locks_free_lock(struct file_lock *fl) 189void locks_free_lock(struct file_lock *fl)
190{ 190{
191 BUG_ON(waitqueue_active(&fl->fl_wait)); 191 BUG_ON(waitqueue_active(&fl->fl_wait));
192 BUG_ON(!list_empty(&fl->fl_block)); 192 BUG_ON(!list_empty(&fl->fl_block));
@@ -195,6 +195,7 @@ static void locks_free_lock(struct file_lock *fl)
195 locks_release_private(fl); 195 locks_release_private(fl);
196 kmem_cache_free(filelock_cache, fl); 196 kmem_cache_free(filelock_cache, fl);
197} 197}
198EXPORT_SYMBOL(locks_free_lock);
198 199
199void locks_init_lock(struct file_lock *fl) 200void locks_init_lock(struct file_lock *fl)
200{ 201{
@@ -234,11 +235,8 @@ static void locks_copy_private(struct file_lock *new, struct file_lock *fl)
234 fl->fl_ops->fl_copy_lock(new, fl); 235 fl->fl_ops->fl_copy_lock(new, fl);
235 new->fl_ops = fl->fl_ops; 236 new->fl_ops = fl->fl_ops;
236 } 237 }
237 if (fl->fl_lmops) { 238 if (fl->fl_lmops)
238 if (fl->fl_lmops->fl_copy_lock)
239 fl->fl_lmops->fl_copy_lock(new, fl);
240 new->fl_lmops = fl->fl_lmops; 239 new->fl_lmops = fl->fl_lmops;
241 }
242} 240}
243 241
244/* 242/*
@@ -1371,20 +1369,22 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
1371 struct inode *inode = dentry->d_inode; 1369 struct inode *inode = dentry->d_inode;
1372 int error, rdlease_count = 0, wrlease_count = 0; 1370 int error, rdlease_count = 0, wrlease_count = 0;
1373 1371
1372 lease = *flp;
1373
1374 error = -EACCES;
1374 if ((current_fsuid() != inode->i_uid) && !capable(CAP_LEASE)) 1375 if ((current_fsuid() != inode->i_uid) && !capable(CAP_LEASE))
1375 return -EACCES; 1376 goto out;
1377 error = -EINVAL;
1376 if (!S_ISREG(inode->i_mode)) 1378 if (!S_ISREG(inode->i_mode))
1377 return -EINVAL; 1379 goto out;
1378 error = security_file_lock(filp, arg); 1380 error = security_file_lock(filp, arg);
1379 if (error) 1381 if (error)
1380 return error; 1382 goto out;
1381 1383
1382 time_out_leases(inode); 1384 time_out_leases(inode);
1383 1385
1384 BUG_ON(!(*flp)->fl_lmops->fl_break); 1386 BUG_ON(!(*flp)->fl_lmops->fl_break);
1385 1387
1386 lease = *flp;
1387
1388 if (arg != F_UNLCK) { 1388 if (arg != F_UNLCK) {
1389 error = -EAGAIN; 1389 error = -EAGAIN;
1390 if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) 1390 if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
@@ -1425,8 +1425,9 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
1425 goto out; 1425 goto out;
1426 1426
1427 if (my_before != NULL) { 1427 if (my_before != NULL) {
1428 *flp = *my_before;
1429 error = lease->fl_lmops->fl_change(my_before, arg); 1428 error = lease->fl_lmops->fl_change(my_before, arg);
1429 if (!error)
1430 *flp = *my_before;
1430 goto out; 1431 goto out;
1431 } 1432 }
1432 1433
@@ -1441,7 +1442,6 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
1441 return 0; 1442 return 0;
1442 1443
1443out: 1444out:
1444 locks_free_lock(lease);
1445 return error; 1445 return error;
1446} 1446}
1447EXPORT_SYMBOL(generic_setlease); 1447EXPORT_SYMBOL(generic_setlease);
@@ -1493,21 +1493,19 @@ int vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
1493} 1493}
1494EXPORT_SYMBOL_GPL(vfs_setlease); 1494EXPORT_SYMBOL_GPL(vfs_setlease);
1495 1495
1496/** 1496static int do_fcntl_delete_lease(struct file *filp)
1497 * fcntl_setlease - sets a lease on an open file
1498 * @fd: open file descriptor
1499 * @filp: file pointer
1500 * @arg: type of lease to obtain
1501 *
1502 * Call this fcntl to establish a lease on the file.
1503 * Note that you also need to call %F_SETSIG to
1504 * receive a signal when the lease is broken.
1505 */
1506int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
1507{ 1497{
1508 struct file_lock *fl; 1498 struct file_lock fl, *flp = &fl;
1499
1500 lease_init(filp, F_UNLCK, flp);
1501
1502 return vfs_setlease(filp, F_UNLCK, &flp);
1503}
1504
1505static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
1506{
1507 struct file_lock *fl, *ret;
1509 struct fasync_struct *new; 1508 struct fasync_struct *new;
1510 struct inode *inode = filp->f_path.dentry->d_inode;
1511 int error; 1509 int error;
1512 1510
1513 fl = lease_alloc(filp, arg); 1511 fl = lease_alloc(filp, arg);
@@ -1519,10 +1517,16 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
1519 locks_free_lock(fl); 1517 locks_free_lock(fl);
1520 return -ENOMEM; 1518 return -ENOMEM;
1521 } 1519 }
1520 ret = fl;
1522 lock_flocks(); 1521 lock_flocks();
1523 error = __vfs_setlease(filp, arg, &fl); 1522 error = __vfs_setlease(filp, arg, &ret);
1524 if (error || arg == F_UNLCK) 1523 if (error) {
1525 goto out_unlock; 1524 unlock_flocks();
1525 locks_free_lock(fl);
1526 goto out_free_fasync;
1527 }
1528 if (ret != fl)
1529 locks_free_lock(fl);
1526 1530
1527 /* 1531 /*
1528 * fasync_insert_entry() returns the old entry if any. 1532 * fasync_insert_entry() returns the old entry if any.
@@ -1530,26 +1534,36 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
1530 * inserted it into the fasync list. Clear new so that 1534 * inserted it into the fasync list. Clear new so that
1531 * we don't release it here. 1535 * we don't release it here.
1532 */ 1536 */
1533 if (!fasync_insert_entry(fd, filp, &fl->fl_fasync, new)) 1537 if (!fasync_insert_entry(fd, filp, &ret->fl_fasync, new))
1534 new = NULL; 1538 new = NULL;
1535 1539
1536 if (error < 0) {
1537 /* remove lease just inserted by setlease */
1538 fl->fl_type = F_UNLCK | F_INPROGRESS;
1539 fl->fl_break_time = jiffies - 10;
1540 time_out_leases(inode);
1541 goto out_unlock;
1542 }
1543
1544 error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); 1540 error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
1545out_unlock:
1546 unlock_flocks(); 1541 unlock_flocks();
1542
1543out_free_fasync:
1547 if (new) 1544 if (new)
1548 fasync_free(new); 1545 fasync_free(new);
1549 return error; 1546 return error;
1550} 1547}
1551 1548
1552/** 1549/**
1550 * fcntl_setlease - sets a lease on an open file
1551 * @fd: open file descriptor
1552 * @filp: file pointer
1553 * @arg: type of lease to obtain
1554 *
1555 * Call this fcntl to establish a lease on the file.
1556 * Note that you also need to call %F_SETSIG to
1557 * receive a signal when the lease is broken.
1558 */
1559int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
1560{
1561 if (arg == F_UNLCK)
1562 return do_fcntl_delete_lease(filp);
1563 return do_fcntl_add_lease(fd, filp, arg);
1564}
1565
1566/**
1553 * flock_lock_file_wait - Apply a FLOCK-style lock to a file 1567 * flock_lock_file_wait - Apply a FLOCK-style lock to a file
1554 * @filp: The file to apply the lock to 1568 * @filp: The file to apply the lock to
1555 * @fl: The lock to be applied 1569 * @fl: The lock to be applied
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 9bd2ce2a3040..92ca6fbe09bd 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -298,9 +298,9 @@ static int bdev_write_sb(struct super_block *sb, struct page *page)
298 return sync_request(page, bdev, WRITE); 298 return sync_request(page, bdev, WRITE);
299} 299}
300 300
301static void bdev_put_device(struct super_block *sb) 301static void bdev_put_device(struct logfs_super *s)
302{ 302{
303 close_bdev_exclusive(logfs_super(sb)->s_bdev, FMODE_READ|FMODE_WRITE); 303 close_bdev_exclusive(s->s_bdev, FMODE_READ|FMODE_WRITE);
304} 304}
305 305
306static int bdev_can_write_buf(struct super_block *sb, u64 ofs) 306static int bdev_can_write_buf(struct super_block *sb, u64 ofs)
@@ -320,8 +320,8 @@ static const struct logfs_device_ops bd_devops = {
320 .put_device = bdev_put_device, 320 .put_device = bdev_put_device,
321}; 321};
322 322
323int logfs_get_sb_bdev(struct file_system_type *type, int flags, 323int logfs_get_sb_bdev(struct logfs_super *p, struct file_system_type *type,
324 const char *devname, struct vfsmount *mnt) 324 const char *devname)
325{ 325{
326 struct block_device *bdev; 326 struct block_device *bdev;
327 327
@@ -332,8 +332,11 @@ int logfs_get_sb_bdev(struct file_system_type *type, int flags,
332 if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) { 332 if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) {
333 int mtdnr = MINOR(bdev->bd_dev); 333 int mtdnr = MINOR(bdev->bd_dev);
334 close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE); 334 close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE);
335 return logfs_get_sb_mtd(type, flags, mtdnr, mnt); 335 return logfs_get_sb_mtd(p, mtdnr);
336 } 336 }
337 337
338 return logfs_get_sb_device(type, flags, NULL, bdev, &bd_devops, mnt); 338 p->s_bdev = bdev;
339 p->s_mtd = NULL;
340 p->s_devops = &bd_devops;
341 return 0;
339} 342}
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index a85d47d13e4b..7466e9dcc8c5 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -230,9 +230,9 @@ static void mtd_writeseg(struct super_block *sb, u64 ofs, size_t len)
230 __mtd_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT); 230 __mtd_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT);
231} 231}
232 232
233static void mtd_put_device(struct super_block *sb) 233static void mtd_put_device(struct logfs_super *s)
234{ 234{
235 put_mtd_device(logfs_super(sb)->s_mtd); 235 put_mtd_device(s->s_mtd);
236} 236}
237 237
238static int mtd_can_write_buf(struct super_block *sb, u64 ofs) 238static int mtd_can_write_buf(struct super_block *sb, u64 ofs)
@@ -265,14 +265,14 @@ static const struct logfs_device_ops mtd_devops = {
265 .put_device = mtd_put_device, 265 .put_device = mtd_put_device,
266}; 266};
267 267
268int logfs_get_sb_mtd(struct file_system_type *type, int flags, 268int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr)
269 int mtdnr, struct vfsmount *mnt)
270{ 269{
271 struct mtd_info *mtd; 270 struct mtd_info *mtd = get_mtd_device(NULL, mtdnr);
272 const struct logfs_device_ops *devops = &mtd_devops;
273
274 mtd = get_mtd_device(NULL, mtdnr);
275 if (IS_ERR(mtd)) 271 if (IS_ERR(mtd))
276 return PTR_ERR(mtd); 272 return PTR_ERR(mtd);
277 return logfs_get_sb_device(type, flags, mtd, NULL, devops, mnt); 273
274 s->s_bdev = NULL;
275 s->s_mtd = mtd;
276 s->s_devops = &mtd_devops;
277 return 0;
278} 278}
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index b8786264d243..57afd4a6fabb 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -136,6 +136,7 @@ struct logfs_area_ops {
136 int (*erase_segment)(struct logfs_area *area); 136 int (*erase_segment)(struct logfs_area *area);
137}; 137};
138 138
139struct logfs_super; /* forward */
139/** 140/**
140 * struct logfs_device_ops - device access operations 141 * struct logfs_device_ops - device access operations
141 * 142 *
@@ -156,7 +157,7 @@ struct logfs_device_ops {
156 int ensure_write); 157 int ensure_write);
157 int (*can_write_buf)(struct super_block *sb, u64 ofs); 158 int (*can_write_buf)(struct super_block *sb, u64 ofs);
158 void (*sync)(struct super_block *sb); 159 void (*sync)(struct super_block *sb);
159 void (*put_device)(struct super_block *sb); 160 void (*put_device)(struct logfs_super *s);
160}; 161};
161 162
162/** 163/**
@@ -471,11 +472,13 @@ void logfs_compr_exit(void);
471 472
472/* dev_bdev.c */ 473/* dev_bdev.c */
473#ifdef CONFIG_BLOCK 474#ifdef CONFIG_BLOCK
474int logfs_get_sb_bdev(struct file_system_type *type, int flags, 475int logfs_get_sb_bdev(struct logfs_super *s,
475 const char *devname, struct vfsmount *mnt); 476 struct file_system_type *type,
477 const char *devname);
476#else 478#else
477static inline int logfs_get_sb_bdev(struct file_system_type *type, int flags, 479static inline int logfs_get_sb_bdev(struct logfs_super *s,
478 const char *devname, struct vfsmount *mnt) 480 struct file_system_type *type,
481 const char *devname)
479{ 482{
480 return -ENODEV; 483 return -ENODEV;
481} 484}
@@ -483,11 +486,9 @@ static inline int logfs_get_sb_bdev(struct file_system_type *type, int flags,
483 486
484/* dev_mtd.c */ 487/* dev_mtd.c */
485#ifdef CONFIG_MTD 488#ifdef CONFIG_MTD
486int logfs_get_sb_mtd(struct file_system_type *type, int flags, 489int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr);
487 int mtdnr, struct vfsmount *mnt);
488#else 490#else
489static inline int logfs_get_sb_mtd(struct file_system_type *type, int flags, 491static inline int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr)
490 int mtdnr, struct vfsmount *mnt)
491{ 492{
492 return -ENODEV; 493 return -ENODEV;
493} 494}
@@ -619,9 +620,6 @@ void emergency_read_end(struct page *page);
619void logfs_crash_dump(struct super_block *sb); 620void logfs_crash_dump(struct super_block *sb);
620void *memchr_inv(const void *s, int c, size_t n); 621void *memchr_inv(const void *s, int c, size_t n);
621int logfs_statfs(struct dentry *dentry, struct kstatfs *stats); 622int logfs_statfs(struct dentry *dentry, struct kstatfs *stats);
622int logfs_get_sb_device(struct file_system_type *type, int flags,
623 struct mtd_info *mtd, struct block_device *bdev,
624 const struct logfs_device_ops *devops, struct vfsmount *mnt);
625int logfs_check_ds(struct logfs_disk_super *ds); 623int logfs_check_ds(struct logfs_disk_super *ds);
626int logfs_write_sb(struct super_block *sb); 624int logfs_write_sb(struct super_block *sb);
627 625
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index 5336155c5d81..33435e4b14d2 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -325,7 +325,7 @@ static int logfs_make_writeable(struct super_block *sb)
325 return 0; 325 return 0;
326} 326}
327 327
328static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt) 328static int logfs_get_sb_final(struct super_block *sb)
329{ 329{
330 struct logfs_super *super = logfs_super(sb); 330 struct logfs_super *super = logfs_super(sb);
331 struct inode *rootdir; 331 struct inode *rootdir;
@@ -356,7 +356,6 @@ static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
356 } 356 }
357 357
358 log_super("LogFS: Finished mounting\n"); 358 log_super("LogFS: Finished mounting\n");
359 simple_set_mnt(mnt, sb);
360 return 0; 359 return 0;
361 360
362fail: 361fail:
@@ -529,43 +528,37 @@ static void logfs_kill_sb(struct super_block *sb)
529 logfs_cleanup_rw(sb); 528 logfs_cleanup_rw(sb);
530 if (super->s_erase_page) 529 if (super->s_erase_page)
531 __free_page(super->s_erase_page); 530 __free_page(super->s_erase_page);
532 super->s_devops->put_device(sb); 531 super->s_devops->put_device(super);
533 logfs_mempool_destroy(super->s_btree_pool); 532 logfs_mempool_destroy(super->s_btree_pool);
534 logfs_mempool_destroy(super->s_alias_pool); 533 logfs_mempool_destroy(super->s_alias_pool);
535 kfree(super); 534 kfree(super);
536 log_super("LogFS: Finished unmounting\n"); 535 log_super("LogFS: Finished unmounting\n");
537} 536}
538 537
539int logfs_get_sb_device(struct file_system_type *type, int flags, 538static struct dentry *logfs_get_sb_device(struct logfs_super *super,
540 struct mtd_info *mtd, struct block_device *bdev, 539 struct file_system_type *type, int flags)
541 const struct logfs_device_ops *devops, struct vfsmount *mnt)
542{ 540{
543 struct logfs_super *super;
544 struct super_block *sb; 541 struct super_block *sb;
545 int err = -ENOMEM; 542 int err = -ENOMEM;
546 static int mount_count; 543 static int mount_count;
547 544
548 log_super("LogFS: Start mount %x\n", mount_count++); 545 log_super("LogFS: Start mount %x\n", mount_count++);
549 super = kzalloc(sizeof(*super), GFP_KERNEL);
550 if (!super)
551 goto err0;
552 546
553 super->s_mtd = mtd;
554 super->s_bdev = bdev;
555 err = -EINVAL; 547 err = -EINVAL;
556 sb = sget(type, logfs_sb_test, logfs_sb_set, super); 548 sb = sget(type, logfs_sb_test, logfs_sb_set, super);
557 if (IS_ERR(sb)) 549 if (IS_ERR(sb)) {
558 goto err0; 550 super->s_devops->put_device(super);
551 kfree(super);
552 return ERR_CAST(sb);
553 }
559 554
560 if (sb->s_root) { 555 if (sb->s_root) {
561 /* Device is already in use */ 556 /* Device is already in use */
562 err = 0; 557 super->s_devops->put_device(super);
563 simple_set_mnt(mnt, sb); 558 kfree(super);
564 goto err0; 559 return dget(sb->s_root);
565 } 560 }
566 561
567 super->s_devops = devops;
568
569 /* 562 /*
570 * sb->s_maxbytes is limited to 8TB. On 32bit systems, the page cache 563 * sb->s_maxbytes is limited to 8TB. On 32bit systems, the page cache
571 * only covers 16TB and the upper 8TB are used for indirect blocks. 564 * only covers 16TB and the upper 8TB are used for indirect blocks.
@@ -581,10 +574,12 @@ int logfs_get_sb_device(struct file_system_type *type, int flags,
581 goto err1; 574 goto err1;
582 575
583 sb->s_flags |= MS_ACTIVE; 576 sb->s_flags |= MS_ACTIVE;
584 err = logfs_get_sb_final(sb, mnt); 577 err = logfs_get_sb_final(sb);
585 if (err) 578 if (err) {
586 deactivate_locked_super(sb); 579 deactivate_locked_super(sb);
587 return err; 580 return ERR_PTR(err);
581 }
582 return dget(sb->s_root);
588 583
589err1: 584err1:
590 /* no ->s_root, no ->put_super() */ 585 /* no ->s_root, no ->put_super() */
@@ -592,37 +587,45 @@ err1:
592 iput(super->s_segfile_inode); 587 iput(super->s_segfile_inode);
593 iput(super->s_mapping_inode); 588 iput(super->s_mapping_inode);
594 deactivate_locked_super(sb); 589 deactivate_locked_super(sb);
595 return err; 590 return ERR_PTR(err);
596err0:
597 kfree(super);
598 //devops->put_device(sb);
599 return err;
600} 591}
601 592
602static int logfs_get_sb(struct file_system_type *type, int flags, 593static struct dentry *logfs_mount(struct file_system_type *type, int flags,
603 const char *devname, void *data, struct vfsmount *mnt) 594 const char *devname, void *data)
604{ 595{
605 ulong mtdnr; 596 ulong mtdnr;
597 struct logfs_super *super;
598 int err;
606 599
607 if (!devname) 600 super = kzalloc(sizeof(*super), GFP_KERNEL);
608 return logfs_get_sb_bdev(type, flags, devname, mnt); 601 if (!super)
609 if (strncmp(devname, "mtd", 3)) 602 return ERR_PTR(-ENOMEM);
610 return logfs_get_sb_bdev(type, flags, devname, mnt);
611 603
612 { 604 if (!devname)
605 err = logfs_get_sb_bdev(super, type, devname);
606 else if (strncmp(devname, "mtd", 3))
607 err = logfs_get_sb_bdev(super, type, devname);
608 else {
613 char *garbage; 609 char *garbage;
614 mtdnr = simple_strtoul(devname+3, &garbage, 0); 610 mtdnr = simple_strtoul(devname+3, &garbage, 0);
615 if (*garbage) 611 if (*garbage)
616 return -EINVAL; 612 err = -EINVAL;
613 else
614 err = logfs_get_sb_mtd(super, mtdnr);
615 }
616
617 if (err) {
618 kfree(super);
619 return ERR_PTR(err);
617 } 620 }
618 621
619 return logfs_get_sb_mtd(type, flags, mtdnr, mnt); 622 return logfs_get_sb_device(super, type, flags);
620} 623}
621 624
622static struct file_system_type logfs_fs_type = { 625static struct file_system_type logfs_fs_type = {
623 .owner = THIS_MODULE, 626 .owner = THIS_MODULE,
624 .name = "logfs", 627 .name = "logfs",
625 .get_sb = logfs_get_sb, 628 .mount = logfs_mount,
626 .kill_sb = logfs_kill_sb, 629 .kill_sb = logfs_kill_sb,
627 .fs_flags = FS_REQUIRES_DEV, 630 .fs_flags = FS_REQUIRES_DEV,
628 631
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index e39d6bf2e8fb..fb2020858a34 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -614,17 +614,16 @@ void minix_truncate(struct inode * inode)
614 V2_minix_truncate(inode); 614 V2_minix_truncate(inode);
615} 615}
616 616
617static int minix_get_sb(struct file_system_type *fs_type, 617static struct dentry *minix_mount(struct file_system_type *fs_type,
618 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 618 int flags, const char *dev_name, void *data)
619{ 619{
620 return get_sb_bdev(fs_type, flags, dev_name, data, minix_fill_super, 620 return mount_bdev(fs_type, flags, dev_name, data, minix_fill_super);
621 mnt);
622} 621}
623 622
624static struct file_system_type minix_fs_type = { 623static struct file_system_type minix_fs_type = {
625 .owner = THIS_MODULE, 624 .owner = THIS_MODULE,
626 .name = "minix", 625 .name = "minix",
627 .get_sb = minix_get_sb, 626 .mount = minix_mount,
628 .kill_sb = kill_block_super, 627 .kill_sb = kill_block_super,
629 .fs_flags = FS_REQUIRES_DEV, 628 .fs_flags = FS_REQUIRES_DEV,
630}; 629};
diff --git a/fs/namei.c b/fs/namei.c
index f7dbc06857ab..5362af9b7372 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1574,6 +1574,7 @@ static struct file *finish_open(struct nameidata *nd,
1574 */ 1574 */
1575 if (will_truncate) 1575 if (will_truncate)
1576 mnt_drop_write(nd->path.mnt); 1576 mnt_drop_write(nd->path.mnt);
1577 path_put(&nd->path);
1577 return filp; 1578 return filp;
1578 1579
1579exit: 1580exit:
@@ -1675,6 +1676,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
1675 } 1676 }
1676 filp = nameidata_to_filp(nd); 1677 filp = nameidata_to_filp(nd);
1677 mnt_drop_write(nd->path.mnt); 1678 mnt_drop_write(nd->path.mnt);
1679 path_put(&nd->path);
1678 if (!IS_ERR(filp)) { 1680 if (!IS_ERR(filp)) {
1679 error = ima_file_check(filp, acc_mode); 1681 error = ima_file_check(filp, acc_mode);
1680 if (error) { 1682 if (error) {
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 985fabb26aca..d290545aa0c4 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -1020,16 +1020,16 @@ out:
1020 return result; 1020 return result;
1021} 1021}
1022 1022
1023static int ncp_get_sb(struct file_system_type *fs_type, 1023static struct dentry *ncp_mount(struct file_system_type *fs_type,
1024 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 1024 int flags, const char *dev_name, void *data)
1025{ 1025{
1026 return get_sb_nodev(fs_type, flags, data, ncp_fill_super, mnt); 1026 return mount_nodev(fs_type, flags, data, ncp_fill_super);
1027} 1027}
1028 1028
1029static struct file_system_type ncp_fs_type = { 1029static struct file_system_type ncp_fs_type = {
1030 .owner = THIS_MODULE, 1030 .owner = THIS_MODULE,
1031 .name = "ncpfs", 1031 .name = "ncpfs",
1032 .get_sb = ncp_get_sb, 1032 .mount = ncp_mount,
1033 .kill_sb = kill_anon_super, 1033 .kill_sb = kill_anon_super,
1034 .fs_flags = FS_BINARY_MOUNTDATA, 1034 .fs_flags = FS_BINARY_MOUNTDATA,
1035}; 1035};
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 064a80961677..84d3c8b90206 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -873,7 +873,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
873 dreq->inode = inode; 873 dreq->inode = inode;
874 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); 874 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
875 dreq->l_ctx = nfs_get_lock_context(dreq->ctx); 875 dreq->l_ctx = nfs_get_lock_context(dreq->ctx);
876 if (dreq->l_ctx != NULL) 876 if (dreq->l_ctx == NULL)
877 goto out_release; 877 goto out_release;
878 if (!is_sync_kiocb(iocb)) 878 if (!is_sync_kiocb(iocb))
879 dreq->iocb = iocb; 879 dreq->iocb = iocb;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index e756075637b0..60677f9f1311 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -884,6 +884,5 @@ static int nfs_setlease(struct file *file, long arg, struct file_lock **fl)
884 dprintk("NFS: setlease(%s/%s, arg=%ld)\n", 884 dprintk("NFS: setlease(%s/%s, arg=%ld)\n",
885 file->f_path.dentry->d_parent->d_name.name, 885 file->f_path.dentry->d_parent->d_name.name,
886 file->f_path.dentry->d_name.name, arg); 886 file->f_path.dentry->d_name.name, arg);
887
888 return -EINVAL; 887 return -EINVAL;
889} 888}
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index dec47ed8b6b9..4e2d9b6b1380 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -123,7 +123,7 @@ static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen,
123 size_t desclen = typelen + namelen + 2; 123 size_t desclen = typelen + namelen + 2;
124 124
125 *desc = kmalloc(desclen, GFP_KERNEL); 125 *desc = kmalloc(desclen, GFP_KERNEL);
126 if (!desc) 126 if (!*desc)
127 return -ENOMEM; 127 return -ENOMEM;
128 128
129 cp = *desc; 129 cp = *desc;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 32c8758c99fd..0f24cdf2cb13 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -429,7 +429,7 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
429 * returned NFS4ERR_DELAY as per Section 2.10.6.2 429 * returned NFS4ERR_DELAY as per Section 2.10.6.2
430 * of RFC5661. 430 * of RFC5661.
431 */ 431 */
432 dprintk("%s: slot=%ld seq=%d: Operation in progress\n", 432 dprintk("%s: slot=%td seq=%d: Operation in progress\n",
433 __func__, 433 __func__,
434 res->sr_slot - res->sr_session->fc_slot_table.slots, 434 res->sr_slot - res->sr_session->fc_slot_table.slots,
435 res->sr_slot->seq_nr); 435 res->sr_slot->seq_nr);
@@ -573,7 +573,7 @@ int nfs4_setup_sequence(const struct nfs_server *server,
573 goto out; 573 goto out;
574 } 574 }
575 575
576 dprintk("--> %s clp %p session %p sr_slot %ld\n", 576 dprintk("--> %s clp %p session %p sr_slot %td\n",
577 __func__, session->clp, session, res->sr_slot ? 577 __func__, session->clp, session, res->sr_slot ?
578 res->sr_slot - session->fc_slot_table.slots : -1); 578 res->sr_slot - session->fc_slot_table.slots : -1);
579 579
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 919490232e17..137b549e63db 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -65,6 +65,13 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
65 if (req == NULL) 65 if (req == NULL)
66 return ERR_PTR(-ENOMEM); 66 return ERR_PTR(-ENOMEM);
67 67
68 /* get lock context early so we can deal with alloc failures */
69 req->wb_lock_context = nfs_get_lock_context(ctx);
70 if (req->wb_lock_context == NULL) {
71 nfs_page_free(req);
72 return ERR_PTR(-ENOMEM);
73 }
74
68 /* Initialize the request struct. Initially, we assume a 75 /* Initialize the request struct. Initially, we assume a
69 * long write-back delay. This will be adjusted in 76 * long write-back delay. This will be adjusted in
70 * update_nfs_request below if the region is not locked. */ 77 * update_nfs_request below if the region is not locked. */
@@ -79,7 +86,6 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
79 req->wb_pgbase = offset; 86 req->wb_pgbase = offset;
80 req->wb_bytes = count; 87 req->wb_bytes = count;
81 req->wb_context = get_nfs_open_context(ctx); 88 req->wb_context = get_nfs_open_context(ctx);
82 req->wb_lock_context = nfs_get_lock_context(ctx);
83 kref_init(&req->wb_kref); 89 kref_init(&req->wb_kref);
84 return req; 90 return req;
85} 91}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 3600ec700d58..0a42e8f4adcb 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -260,8 +260,8 @@ static int nfs_statfs(struct dentry *, struct kstatfs *);
260static int nfs_show_options(struct seq_file *, struct vfsmount *); 260static int nfs_show_options(struct seq_file *, struct vfsmount *);
261static int nfs_show_stats(struct seq_file *, struct vfsmount *); 261static int nfs_show_stats(struct seq_file *, struct vfsmount *);
262static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *); 262static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *);
263static int nfs_xdev_get_sb(struct file_system_type *fs_type, 263static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type,
264 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 264 int flags, const char *dev_name, void *raw_data);
265static void nfs_put_super(struct super_block *); 265static void nfs_put_super(struct super_block *);
266static void nfs_kill_super(struct super_block *); 266static void nfs_kill_super(struct super_block *);
267static int nfs_remount(struct super_block *sb, int *flags, char *raw_data); 267static int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
@@ -277,7 +277,7 @@ static struct file_system_type nfs_fs_type = {
277struct file_system_type nfs_xdev_fs_type = { 277struct file_system_type nfs_xdev_fs_type = {
278 .owner = THIS_MODULE, 278 .owner = THIS_MODULE,
279 .name = "nfs", 279 .name = "nfs",
280 .get_sb = nfs_xdev_get_sb, 280 .mount = nfs_xdev_mount,
281 .kill_sb = nfs_kill_super, 281 .kill_sb = nfs_kill_super,
282 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 282 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
283}; 283};
@@ -302,14 +302,14 @@ static int nfs4_try_mount(int flags, const char *dev_name,
302 struct nfs_parsed_mount_data *data, struct vfsmount *mnt); 302 struct nfs_parsed_mount_data *data, struct vfsmount *mnt);
303static int nfs4_get_sb(struct file_system_type *fs_type, 303static int nfs4_get_sb(struct file_system_type *fs_type,
304 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 304 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
305static int nfs4_remote_get_sb(struct file_system_type *fs_type, 305static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type,
306 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 306 int flags, const char *dev_name, void *raw_data);
307static int nfs4_xdev_get_sb(struct file_system_type *fs_type, 307static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type,
308 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 308 int flags, const char *dev_name, void *raw_data);
309static int nfs4_referral_get_sb(struct file_system_type *fs_type, 309static int nfs4_referral_get_sb(struct file_system_type *fs_type,
310 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 310 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
311static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type, 311static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type,
312 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 312 int flags, const char *dev_name, void *raw_data);
313static void nfs4_kill_super(struct super_block *sb); 313static void nfs4_kill_super(struct super_block *sb);
314 314
315static struct file_system_type nfs4_fs_type = { 315static struct file_system_type nfs4_fs_type = {
@@ -323,7 +323,7 @@ static struct file_system_type nfs4_fs_type = {
323static struct file_system_type nfs4_remote_fs_type = { 323static struct file_system_type nfs4_remote_fs_type = {
324 .owner = THIS_MODULE, 324 .owner = THIS_MODULE,
325 .name = "nfs4", 325 .name = "nfs4",
326 .get_sb = nfs4_remote_get_sb, 326 .mount = nfs4_remote_mount,
327 .kill_sb = nfs4_kill_super, 327 .kill_sb = nfs4_kill_super,
328 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 328 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
329}; 329};
@@ -331,7 +331,7 @@ static struct file_system_type nfs4_remote_fs_type = {
331struct file_system_type nfs4_xdev_fs_type = { 331struct file_system_type nfs4_xdev_fs_type = {
332 .owner = THIS_MODULE, 332 .owner = THIS_MODULE,
333 .name = "nfs4", 333 .name = "nfs4",
334 .get_sb = nfs4_xdev_get_sb, 334 .mount = nfs4_xdev_mount,
335 .kill_sb = nfs4_kill_super, 335 .kill_sb = nfs4_kill_super,
336 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 336 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
337}; 337};
@@ -339,7 +339,7 @@ struct file_system_type nfs4_xdev_fs_type = {
339static struct file_system_type nfs4_remote_referral_fs_type = { 339static struct file_system_type nfs4_remote_referral_fs_type = {
340 .owner = THIS_MODULE, 340 .owner = THIS_MODULE,
341 .name = "nfs4", 341 .name = "nfs4",
342 .get_sb = nfs4_remote_referral_get_sb, 342 .mount = nfs4_remote_referral_mount,
343 .kill_sb = nfs4_kill_super, 343 .kill_sb = nfs4_kill_super,
344 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 344 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
345}; 345};
@@ -2397,9 +2397,9 @@ static void nfs_kill_super(struct super_block *s)
2397/* 2397/*
2398 * Clone an NFS2/3 server record on xdev traversal (FSID-change) 2398 * Clone an NFS2/3 server record on xdev traversal (FSID-change)
2399 */ 2399 */
2400static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, 2400static struct dentry *
2401 const char *dev_name, void *raw_data, 2401nfs_xdev_mount(struct file_system_type *fs_type, int flags,
2402 struct vfsmount *mnt) 2402 const char *dev_name, void *raw_data)
2403{ 2403{
2404 struct nfs_clone_mount *data = raw_data; 2404 struct nfs_clone_mount *data = raw_data;
2405 struct super_block *s; 2405 struct super_block *s;
@@ -2411,7 +2411,7 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
2411 }; 2411 };
2412 int error; 2412 int error;
2413 2413
2414 dprintk("--> nfs_xdev_get_sb()\n"); 2414 dprintk("--> nfs_xdev_mount()\n");
2415 2415
2416 /* create a new volume representation */ 2416 /* create a new volume representation */
2417 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr); 2417 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
@@ -2458,28 +2458,26 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
2458 } 2458 }
2459 2459
2460 s->s_flags |= MS_ACTIVE; 2460 s->s_flags |= MS_ACTIVE;
2461 mnt->mnt_sb = s;
2462 mnt->mnt_root = mntroot;
2463 2461
2464 /* clone any lsm security options from the parent to the new sb */ 2462 /* clone any lsm security options from the parent to the new sb */
2465 security_sb_clone_mnt_opts(data->sb, s); 2463 security_sb_clone_mnt_opts(data->sb, s);
2466 2464
2467 dprintk("<-- nfs_xdev_get_sb() = 0\n"); 2465 dprintk("<-- nfs_xdev_mount() = 0\n");
2468 return 0; 2466 return mntroot;
2469 2467
2470out_err_nosb: 2468out_err_nosb:
2471 nfs_free_server(server); 2469 nfs_free_server(server);
2472out_err_noserver: 2470out_err_noserver:
2473 dprintk("<-- nfs_xdev_get_sb() = %d [error]\n", error); 2471 dprintk("<-- nfs_xdev_mount() = %d [error]\n", error);
2474 return error; 2472 return ERR_PTR(error);
2475 2473
2476error_splat_super: 2474error_splat_super:
2477 if (server && !s->s_root) 2475 if (server && !s->s_root)
2478 bdi_unregister(&server->backing_dev_info); 2476 bdi_unregister(&server->backing_dev_info);
2479error_splat_bdi: 2477error_splat_bdi:
2480 deactivate_locked_super(s); 2478 deactivate_locked_super(s);
2481 dprintk("<-- nfs_xdev_get_sb() = %d [splat]\n", error); 2479 dprintk("<-- nfs_xdev_mount() = %d [splat]\n", error);
2482 return error; 2480 return ERR_PTR(error);
2483} 2481}
2484 2482
2485#ifdef CONFIG_NFS_V4 2483#ifdef CONFIG_NFS_V4
@@ -2649,8 +2647,9 @@ out_no_address:
2649/* 2647/*
2650 * Get the superblock for the NFS4 root partition 2648 * Get the superblock for the NFS4 root partition
2651 */ 2649 */
2652static int nfs4_remote_get_sb(struct file_system_type *fs_type, 2650static struct dentry *
2653 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 2651nfs4_remote_mount(struct file_system_type *fs_type, int flags,
2652 const char *dev_name, void *raw_data)
2654{ 2653{
2655 struct nfs_parsed_mount_data *data = raw_data; 2654 struct nfs_parsed_mount_data *data = raw_data;
2656 struct super_block *s; 2655 struct super_block *s;
@@ -2714,15 +2713,16 @@ static int nfs4_remote_get_sb(struct file_system_type *fs_type,
2714 goto error_splat_root; 2713 goto error_splat_root;
2715 2714
2716 s->s_flags |= MS_ACTIVE; 2715 s->s_flags |= MS_ACTIVE;
2717 mnt->mnt_sb = s; 2716
2718 mnt->mnt_root = mntroot; 2717 security_free_mnt_opts(&data->lsm_opts);
2719 error = 0; 2718 nfs_free_fhandle(mntfh);
2719 return mntroot;
2720 2720
2721out: 2721out:
2722 security_free_mnt_opts(&data->lsm_opts); 2722 security_free_mnt_opts(&data->lsm_opts);
2723out_free_fh: 2723out_free_fh:
2724 nfs_free_fhandle(mntfh); 2724 nfs_free_fhandle(mntfh);
2725 return error; 2725 return ERR_PTR(error);
2726 2726
2727out_free: 2727out_free:
2728 nfs_free_server(server); 2728 nfs_free_server(server);
@@ -2968,9 +2968,9 @@ static void nfs4_kill_super(struct super_block *sb)
2968/* 2968/*
2969 * Clone an NFS4 server record on xdev traversal (FSID-change) 2969 * Clone an NFS4 server record on xdev traversal (FSID-change)
2970 */ 2970 */
2971static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags, 2971static struct dentry *
2972 const char *dev_name, void *raw_data, 2972nfs4_xdev_mount(struct file_system_type *fs_type, int flags,
2973 struct vfsmount *mnt) 2973 const char *dev_name, void *raw_data)
2974{ 2974{
2975 struct nfs_clone_mount *data = raw_data; 2975 struct nfs_clone_mount *data = raw_data;
2976 struct super_block *s; 2976 struct super_block *s;
@@ -2982,7 +2982,7 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
2982 }; 2982 };
2983 int error; 2983 int error;
2984 2984
2985 dprintk("--> nfs4_xdev_get_sb()\n"); 2985 dprintk("--> nfs4_xdev_mount()\n");
2986 2986
2987 /* create a new volume representation */ 2987 /* create a new volume representation */
2988 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr); 2988 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
@@ -3029,32 +3029,30 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
3029 } 3029 }
3030 3030
3031 s->s_flags |= MS_ACTIVE; 3031 s->s_flags |= MS_ACTIVE;
3032 mnt->mnt_sb = s;
3033 mnt->mnt_root = mntroot;
3034 3032
3035 security_sb_clone_mnt_opts(data->sb, s); 3033 security_sb_clone_mnt_opts(data->sb, s);
3036 3034
3037 dprintk("<-- nfs4_xdev_get_sb() = 0\n"); 3035 dprintk("<-- nfs4_xdev_mount() = 0\n");
3038 return 0; 3036 return mntroot;
3039 3037
3040out_err_nosb: 3038out_err_nosb:
3041 nfs_free_server(server); 3039 nfs_free_server(server);
3042out_err_noserver: 3040out_err_noserver:
3043 dprintk("<-- nfs4_xdev_get_sb() = %d [error]\n", error); 3041 dprintk("<-- nfs4_xdev_mount() = %d [error]\n", error);
3044 return error; 3042 return ERR_PTR(error);
3045 3043
3046error_splat_super: 3044error_splat_super:
3047 if (server && !s->s_root) 3045 if (server && !s->s_root)
3048 bdi_unregister(&server->backing_dev_info); 3046 bdi_unregister(&server->backing_dev_info);
3049error_splat_bdi: 3047error_splat_bdi:
3050 deactivate_locked_super(s); 3048 deactivate_locked_super(s);
3051 dprintk("<-- nfs4_xdev_get_sb() = %d [splat]\n", error); 3049 dprintk("<-- nfs4_xdev_mount() = %d [splat]\n", error);
3052 return error; 3050 return ERR_PTR(error);
3053} 3051}
3054 3052
3055static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type, 3053static struct dentry *
3056 int flags, const char *dev_name, void *raw_data, 3054nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags,
3057 struct vfsmount *mnt) 3055 const char *dev_name, void *raw_data)
3058{ 3056{
3059 struct nfs_clone_mount *data = raw_data; 3057 struct nfs_clone_mount *data = raw_data;
3060 struct super_block *s; 3058 struct super_block *s;
@@ -3118,14 +3116,12 @@ static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type,
3118 } 3116 }
3119 3117
3120 s->s_flags |= MS_ACTIVE; 3118 s->s_flags |= MS_ACTIVE;
3121 mnt->mnt_sb = s;
3122 mnt->mnt_root = mntroot;
3123 3119
3124 security_sb_clone_mnt_opts(data->sb, s); 3120 security_sb_clone_mnt_opts(data->sb, s);
3125 3121
3126 nfs_free_fhandle(mntfh); 3122 nfs_free_fhandle(mntfh);
3127 dprintk("<-- nfs4_referral_get_sb() = 0\n"); 3123 dprintk("<-- nfs4_referral_get_sb() = 0\n");
3128 return 0; 3124 return mntroot;
3129 3125
3130out_err_nosb: 3126out_err_nosb:
3131 nfs_free_server(server); 3127 nfs_free_server(server);
@@ -3133,7 +3129,7 @@ out_err_noserver:
3133 nfs_free_fhandle(mntfh); 3129 nfs_free_fhandle(mntfh);
3134out_err_nofh: 3130out_err_nofh:
3135 dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error); 3131 dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error);
3136 return error; 3132 return ERR_PTR(error);
3137 3133
3138error_splat_super: 3134error_splat_super:
3139 if (server && !s->s_root) 3135 if (server && !s->s_root)
@@ -3142,7 +3138,7 @@ error_splat_bdi:
3142 deactivate_locked_super(s); 3138 deactivate_locked_super(s);
3143 nfs_free_fhandle(mntfh); 3139 nfs_free_fhandle(mntfh);
3144 dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error); 3140 dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error);
3145 return error; 3141 return ERR_PTR(error);
3146} 3142}
3147 3143
3148/* 3144/*
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 9a16bad5d2ea..7bdec8531400 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -444,9 +444,9 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
444 444
445 /* set up nfs_renamedata */ 445 /* set up nfs_renamedata */
446 data->old_dir = old_dir; 446 data->old_dir = old_dir;
447 atomic_inc(&old_dir->i_count); 447 ihold(old_dir);
448 data->new_dir = new_dir; 448 data->new_dir = new_dir;
449 atomic_inc(&new_dir->i_count); 449 ihold(new_dir);
450 data->old_dentry = dget(old_dentry); 450 data->old_dentry = dget(old_dentry);
451 data->new_dentry = dget(new_dentry); 451 data->new_dentry = dget(new_dentry);
452 nfs_fattr_init(&data->old_fattr); 452 nfs_fattr_init(&data->old_fattr);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 56347e0ac88d..ad2bfa68d534 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -673,16 +673,17 @@ static void nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses)
673 spin_unlock(&clp->cl_lock); 673 spin_unlock(&clp->cl_lock);
674} 674}
675 675
676static void nfsd4_register_conn(struct nfsd4_conn *conn) 676static int nfsd4_register_conn(struct nfsd4_conn *conn)
677{ 677{
678 conn->cn_xpt_user.callback = nfsd4_conn_lost; 678 conn->cn_xpt_user.callback = nfsd4_conn_lost;
679 register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user); 679 return register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user);
680} 680}
681 681
682static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses) 682static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses)
683{ 683{
684 struct nfsd4_conn *conn; 684 struct nfsd4_conn *conn;
685 u32 flags = NFS4_CDFC4_FORE; 685 u32 flags = NFS4_CDFC4_FORE;
686 int ret;
686 687
687 if (ses->se_flags & SESSION4_BACK_CHAN) 688 if (ses->se_flags & SESSION4_BACK_CHAN)
688 flags |= NFS4_CDFC4_BACK; 689 flags |= NFS4_CDFC4_BACK;
@@ -690,7 +691,10 @@ static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses)
690 if (!conn) 691 if (!conn)
691 return nfserr_jukebox; 692 return nfserr_jukebox;
692 nfsd4_hash_conn(conn, ses); 693 nfsd4_hash_conn(conn, ses);
693 nfsd4_register_conn(conn); 694 ret = nfsd4_register_conn(conn);
695 if (ret)
696 /* oops; xprt is already down: */
697 nfsd4_conn_lost(&conn->cn_xpt_user);
694 return nfs_ok; 698 return nfs_ok;
695} 699}
696 700
@@ -1644,6 +1648,7 @@ static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_sessi
1644{ 1648{
1645 struct nfs4_client *clp = ses->se_client; 1649 struct nfs4_client *clp = ses->se_client;
1646 struct nfsd4_conn *c; 1650 struct nfsd4_conn *c;
1651 int ret;
1647 1652
1648 spin_lock(&clp->cl_lock); 1653 spin_lock(&clp->cl_lock);
1649 c = __nfsd4_find_conn(new->cn_xprt, ses); 1654 c = __nfsd4_find_conn(new->cn_xprt, ses);
@@ -1654,7 +1659,10 @@ static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_sessi
1654 } 1659 }
1655 __nfsd4_hash_conn(new, ses); 1660 __nfsd4_hash_conn(new, ses);
1656 spin_unlock(&clp->cl_lock); 1661 spin_unlock(&clp->cl_lock);
1657 nfsd4_register_conn(new); 1662 ret = nfsd4_register_conn(new);
1663 if (ret)
1664 /* oops; xprt is already down: */
1665 nfsd4_conn_lost(&new->cn_xpt_user);
1658 return; 1666 return;
1659} 1667}
1660 1668
@@ -2310,22 +2318,6 @@ void nfsd_release_deleg_cb(struct file_lock *fl)
2310} 2318}
2311 2319
2312/* 2320/*
2313 * Set the delegation file_lock back pointer.
2314 *
2315 * Called from setlease() with lock_kernel() held.
2316 */
2317static
2318void nfsd_copy_lock_deleg_cb(struct file_lock *new, struct file_lock *fl)
2319{
2320 struct nfs4_delegation *dp = (struct nfs4_delegation *)new->fl_owner;
2321
2322 dprintk("NFSD: nfsd_copy_lock_deleg_cb: new fl %p dp %p\n", new, dp);
2323 if (!dp)
2324 return;
2325 dp->dl_flock = new;
2326}
2327
2328/*
2329 * Called from setlease() with lock_kernel() held 2321 * Called from setlease() with lock_kernel() held
2330 */ 2322 */
2331static 2323static
@@ -2355,7 +2347,6 @@ int nfsd_change_deleg_cb(struct file_lock **onlist, int arg)
2355static const struct lock_manager_operations nfsd_lease_mng_ops = { 2347static const struct lock_manager_operations nfsd_lease_mng_ops = {
2356 .fl_break = nfsd_break_deleg_cb, 2348 .fl_break = nfsd_break_deleg_cb,
2357 .fl_release_private = nfsd_release_deleg_cb, 2349 .fl_release_private = nfsd_release_deleg_cb,
2358 .fl_copy_lock = nfsd_copy_lock_deleg_cb,
2359 .fl_mylease = nfsd_same_client_deleg_cb, 2350 .fl_mylease = nfsd_same_client_deleg_cb,
2360 .fl_change = nfsd_change_deleg_cb, 2351 .fl_change = nfsd_change_deleg_cb,
2361}; 2352};
@@ -2661,12 +2652,15 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
2661 fl->fl_file = find_readable_file(stp->st_file); 2652 fl->fl_file = find_readable_file(stp->st_file);
2662 BUG_ON(!fl->fl_file); 2653 BUG_ON(!fl->fl_file);
2663 fl->fl_pid = current->tgid; 2654 fl->fl_pid = current->tgid;
2655 dp->dl_flock = fl;
2664 2656
2665 /* vfs_setlease checks to see if delegation should be handed out. 2657 /* vfs_setlease checks to see if delegation should be handed out.
2666 * the lock_manager callbacks fl_mylease and fl_change are used 2658 * the lock_manager callbacks fl_mylease and fl_change are used
2667 */ 2659 */
2668 if ((status = vfs_setlease(fl->fl_file, fl->fl_type, &fl))) { 2660 if ((status = vfs_setlease(fl->fl_file, fl->fl_type, &fl))) {
2669 dprintk("NFSD: setlease failed [%d], no delegation\n", status); 2661 dprintk("NFSD: setlease failed [%d], no delegation\n", status);
2662 dp->dl_flock = NULL;
2663 locks_free_lock(fl);
2670 unhash_delegation(dp); 2664 unhash_delegation(dp);
2671 flag = NFS4_OPEN_DELEGATE_NONE; 2665 flag = NFS4_OPEN_DELEGATE_NONE;
2672 goto out; 2666 goto out;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index d6dc3f61f8ba..4514ebbee4d6 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1405,16 +1405,16 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
1405 return simple_fill_super(sb, 0x6e667364, nfsd_files); 1405 return simple_fill_super(sb, 0x6e667364, nfsd_files);
1406} 1406}
1407 1407
1408static int nfsd_get_sb(struct file_system_type *fs_type, 1408static struct dentry *nfsd_mount(struct file_system_type *fs_type,
1409 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 1409 int flags, const char *dev_name, void *data)
1410{ 1410{
1411 return get_sb_single(fs_type, flags, data, nfsd_fill_super, mnt); 1411 return mount_single(fs_type, flags, data, nfsd_fill_super);
1412} 1412}
1413 1413
1414static struct file_system_type nfsd_fs_type = { 1414static struct file_system_type nfsd_fs_type = {
1415 .owner = THIS_MODULE, 1415 .owner = THIS_MODULE,
1416 .name = "nfsd", 1416 .name = "nfsd",
1417 .get_sb = nfsd_get_sb, 1417 .mount = nfsd_mount,
1418 .kill_sb = kill_litter_super, 1418 .kill_sb = kill_litter_super,
1419}; 1419};
1420 1420
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 35ae03c0db86..f804d41ec9d3 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1141,9 +1141,9 @@ static int nilfs_test_bdev_super(struct super_block *s, void *data)
1141 return (void *)s->s_bdev == data; 1141 return (void *)s->s_bdev == data;
1142} 1142}
1143 1143
1144static int 1144static struct dentry *
1145nilfs_get_sb(struct file_system_type *fs_type, int flags, 1145nilfs_mount(struct file_system_type *fs_type, int flags,
1146 const char *dev_name, void *data, struct vfsmount *mnt) 1146 const char *dev_name, void *data)
1147{ 1147{
1148 struct nilfs_super_data sd; 1148 struct nilfs_super_data sd;
1149 struct super_block *s; 1149 struct super_block *s;
@@ -1156,7 +1156,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
1156 1156
1157 sd.bdev = open_bdev_exclusive(dev_name, mode, fs_type); 1157 sd.bdev = open_bdev_exclusive(dev_name, mode, fs_type);
1158 if (IS_ERR(sd.bdev)) 1158 if (IS_ERR(sd.bdev))
1159 return PTR_ERR(sd.bdev); 1159 return ERR_CAST(sd.bdev);
1160 1160
1161 sd.cno = 0; 1161 sd.cno = 0;
1162 sd.flags = flags; 1162 sd.flags = flags;
@@ -1235,9 +1235,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
1235 if (!s_new) 1235 if (!s_new)
1236 close_bdev_exclusive(sd.bdev, mode); 1236 close_bdev_exclusive(sd.bdev, mode);
1237 1237
1238 mnt->mnt_sb = s; 1238 return root_dentry;
1239 mnt->mnt_root = root_dentry;
1240 return 0;
1241 1239
1242 failed_super: 1240 failed_super:
1243 deactivate_locked_super(s); 1241 deactivate_locked_super(s);
@@ -1245,13 +1243,13 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
1245 failed: 1243 failed:
1246 if (!s_new) 1244 if (!s_new)
1247 close_bdev_exclusive(sd.bdev, mode); 1245 close_bdev_exclusive(sd.bdev, mode);
1248 return err; 1246 return ERR_PTR(err);
1249} 1247}
1250 1248
1251struct file_system_type nilfs_fs_type = { 1249struct file_system_type nilfs_fs_type = {
1252 .owner = THIS_MODULE, 1250 .owner = THIS_MODULE,
1253 .name = "nilfs2", 1251 .name = "nilfs2",
1254 .get_sb = nilfs_get_sb, 1252 .mount = nilfs_mount,
1255 .kill_sb = kill_block_super, 1253 .kill_sb = kill_block_super,
1256 .fs_flags = FS_REQUIRES_DEV, 1254 .fs_flags = FS_REQUIRES_DEV,
1257}; 1255};
diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig
index b388443c3a09..22c629eedd82 100644
--- a/fs/notify/Kconfig
+++ b/fs/notify/Kconfig
@@ -3,4 +3,4 @@ config FSNOTIFY
3 3
4source "fs/notify/dnotify/Kconfig" 4source "fs/notify/dnotify/Kconfig"
5source "fs/notify/inotify/Kconfig" 5source "fs/notify/inotify/Kconfig"
6#source "fs/notify/fanotify/Kconfig" 6source "fs/notify/fanotify/Kconfig"
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 85366c78cc37..b04f88eed09e 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -131,6 +131,7 @@ static int fanotify_handle_event(struct fsnotify_group *group,
131 BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW); 131 BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
132 BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM); 132 BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
133 BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM); 133 BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
134 BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
134 135
135 pr_debug("%s: group=%p event=%p\n", __func__, group, event); 136 pr_debug("%s: group=%p event=%p\n", __func__, group, event);
136 137
@@ -160,20 +161,21 @@ static bool fanotify_should_send_event(struct fsnotify_group *group,
160 __u32 event_mask, void *data, int data_type) 161 __u32 event_mask, void *data, int data_type)
161{ 162{
162 __u32 marks_mask, marks_ignored_mask; 163 __u32 marks_mask, marks_ignored_mask;
164 struct path *path = data;
163 165
164 pr_debug("%s: group=%p to_tell=%p inode_mark=%p vfsmnt_mark=%p " 166 pr_debug("%s: group=%p to_tell=%p inode_mark=%p vfsmnt_mark=%p "
165 "mask=%x data=%p data_type=%d\n", __func__, group, to_tell, 167 "mask=%x data=%p data_type=%d\n", __func__, group, to_tell,
166 inode_mark, vfsmnt_mark, event_mask, data, data_type); 168 inode_mark, vfsmnt_mark, event_mask, data, data_type);
167 169
168 /* sorry, fanotify only gives a damn about files and dirs */
169 if (!S_ISREG(to_tell->i_mode) &&
170 !S_ISDIR(to_tell->i_mode))
171 return false;
172
173 /* if we don't have enough info to send an event to userspace say no */ 170 /* if we don't have enough info to send an event to userspace say no */
174 if (data_type != FSNOTIFY_EVENT_PATH) 171 if (data_type != FSNOTIFY_EVENT_PATH)
175 return false; 172 return false;
176 173
174 /* sorry, fanotify only gives a damn about files and dirs */
175 if (!S_ISREG(path->dentry->d_inode->i_mode) &&
176 !S_ISDIR(path->dentry->d_inode->i_mode))
177 return false;
178
177 if (inode_mark && vfsmnt_mark) { 179 if (inode_mark && vfsmnt_mark) {
178 marks_mask = (vfsmnt_mark->mask | inode_mark->mask); 180 marks_mask = (vfsmnt_mark->mask | inode_mark->mask);
179 marks_ignored_mask = (vfsmnt_mark->ignored_mask | inode_mark->ignored_mask); 181 marks_ignored_mask = (vfsmnt_mark->ignored_mask | inode_mark->ignored_mask);
@@ -194,16 +196,29 @@ static bool fanotify_should_send_event(struct fsnotify_group *group,
194 BUG(); 196 BUG();
195 } 197 }
196 198
199 if (S_ISDIR(path->dentry->d_inode->i_mode) &&
200 (marks_ignored_mask & FS_ISDIR))
201 return false;
202
197 if (event_mask & marks_mask & ~marks_ignored_mask) 203 if (event_mask & marks_mask & ~marks_ignored_mask)
198 return true; 204 return true;
199 205
200 return false; 206 return false;
201} 207}
202 208
209static void fanotify_free_group_priv(struct fsnotify_group *group)
210{
211 struct user_struct *user;
212
213 user = group->fanotify_data.user;
214 atomic_dec(&user->fanotify_listeners);
215 free_uid(user);
216}
217
203const struct fsnotify_ops fanotify_fsnotify_ops = { 218const struct fsnotify_ops fanotify_fsnotify_ops = {
204 .handle_event = fanotify_handle_event, 219 .handle_event = fanotify_handle_event,
205 .should_send_event = fanotify_should_send_event, 220 .should_send_event = fanotify_should_send_event,
206 .free_group_priv = NULL, 221 .free_group_priv = fanotify_free_group_priv,
207 .free_event_priv = NULL, 222 .free_event_priv = NULL,
208 .freeing_mark = NULL, 223 .freeing_mark = NULL,
209}; 224};
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index bbcb98e7fcc6..063224812b7e 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -16,6 +16,10 @@
16 16
17#include <asm/ioctls.h> 17#include <asm/ioctls.h>
18 18
19#define FANOTIFY_DEFAULT_MAX_EVENTS 16384
20#define FANOTIFY_DEFAULT_MAX_MARKS 8192
21#define FANOTIFY_DEFAULT_MAX_LISTENERS 128
22
19extern const struct fsnotify_ops fanotify_fsnotify_ops; 23extern const struct fsnotify_ops fanotify_fsnotify_ops;
20 24
21static struct kmem_cache *fanotify_mark_cache __read_mostly; 25static struct kmem_cache *fanotify_mark_cache __read_mostly;
@@ -326,7 +330,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
326 ret = -EAGAIN; 330 ret = -EAGAIN;
327 if (file->f_flags & O_NONBLOCK) 331 if (file->f_flags & O_NONBLOCK)
328 break; 332 break;
329 ret = -EINTR; 333 ret = -ERESTARTSYS;
330 if (signal_pending(current)) 334 if (signal_pending(current))
331 break; 335 break;
332 336
@@ -372,11 +376,10 @@ static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t
372static int fanotify_release(struct inode *ignored, struct file *file) 376static int fanotify_release(struct inode *ignored, struct file *file)
373{ 377{
374 struct fsnotify_group *group = file->private_data; 378 struct fsnotify_group *group = file->private_data;
375 struct fanotify_response_event *re, *lre;
376
377 pr_debug("%s: file=%p group=%p\n", __func__, file, group);
378 379
379#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS 380#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
381 struct fanotify_response_event *re, *lre;
382
380 mutex_lock(&group->fanotify_data.access_mutex); 383 mutex_lock(&group->fanotify_data.access_mutex);
381 384
382 group->fanotify_data.bypass_perm = true; 385 group->fanotify_data.bypass_perm = true;
@@ -554,18 +557,24 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
554 __u32 mask, 557 __u32 mask,
555 unsigned int flags) 558 unsigned int flags)
556{ 559{
557 __u32 oldmask; 560 __u32 oldmask = -1;
558 561
559 spin_lock(&fsn_mark->lock); 562 spin_lock(&fsn_mark->lock);
560 if (!(flags & FAN_MARK_IGNORED_MASK)) { 563 if (!(flags & FAN_MARK_IGNORED_MASK)) {
561 oldmask = fsn_mark->mask; 564 oldmask = fsn_mark->mask;
562 fsnotify_set_mark_mask_locked(fsn_mark, (oldmask | mask)); 565 fsnotify_set_mark_mask_locked(fsn_mark, (oldmask | mask));
563 } else { 566 } else {
564 oldmask = fsn_mark->ignored_mask; 567 __u32 tmask = fsn_mark->ignored_mask | mask;
565 fsnotify_set_mark_ignored_mask_locked(fsn_mark, (oldmask | mask)); 568 fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask);
566 if (flags & FAN_MARK_IGNORED_SURV_MODIFY) 569 if (flags & FAN_MARK_IGNORED_SURV_MODIFY)
567 fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; 570 fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
568 } 571 }
572
573 if (!(flags & FAN_MARK_ONDIR)) {
574 __u32 tmask = fsn_mark->ignored_mask | FAN_ONDIR;
575 fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask);
576 }
577
569 spin_unlock(&fsn_mark->lock); 578 spin_unlock(&fsn_mark->lock);
570 579
571 return mask & ~oldmask; 580 return mask & ~oldmask;
@@ -582,6 +591,9 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
582 if (!fsn_mark) { 591 if (!fsn_mark) {
583 int ret; 592 int ret;
584 593
594 if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks)
595 return -ENOSPC;
596
585 fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL); 597 fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
586 if (!fsn_mark) 598 if (!fsn_mark)
587 return -ENOMEM; 599 return -ENOMEM;
@@ -610,10 +622,23 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
610 622
611 pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); 623 pr_debug("%s: group=%p inode=%p\n", __func__, group, inode);
612 624
625 /*
626 * If some other task has this inode open for write we should not add
627 * an ignored mark, unless that ignored mark is supposed to survive
628 * modification changes anyway.
629 */
630 if ((flags & FAN_MARK_IGNORED_MASK) &&
631 !(flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
632 (atomic_read(&inode->i_writecount) > 0))
633 return 0;
634
613 fsn_mark = fsnotify_find_inode_mark(group, inode); 635 fsn_mark = fsnotify_find_inode_mark(group, inode);
614 if (!fsn_mark) { 636 if (!fsn_mark) {
615 int ret; 637 int ret;
616 638
639 if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks)
640 return -ENOSPC;
641
617 fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL); 642 fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
618 if (!fsn_mark) 643 if (!fsn_mark)
619 return -ENOMEM; 644 return -ENOMEM;
@@ -637,6 +662,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
637{ 662{
638 struct fsnotify_group *group; 663 struct fsnotify_group *group;
639 int f_flags, fd; 664 int f_flags, fd;
665 struct user_struct *user;
640 666
641 pr_debug("%s: flags=%d event_f_flags=%d\n", 667 pr_debug("%s: flags=%d event_f_flags=%d\n",
642 __func__, flags, event_f_flags); 668 __func__, flags, event_f_flags);
@@ -647,6 +673,12 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
647 if (flags & ~FAN_ALL_INIT_FLAGS) 673 if (flags & ~FAN_ALL_INIT_FLAGS)
648 return -EINVAL; 674 return -EINVAL;
649 675
676 user = get_current_user();
677 if (atomic_read(&user->fanotify_listeners) > FANOTIFY_DEFAULT_MAX_LISTENERS) {
678 free_uid(user);
679 return -EMFILE;
680 }
681
650 f_flags = O_RDWR | FMODE_NONOTIFY; 682 f_flags = O_RDWR | FMODE_NONOTIFY;
651 if (flags & FAN_CLOEXEC) 683 if (flags & FAN_CLOEXEC)
652 f_flags |= O_CLOEXEC; 684 f_flags |= O_CLOEXEC;
@@ -658,12 +690,47 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
658 if (IS_ERR(group)) 690 if (IS_ERR(group))
659 return PTR_ERR(group); 691 return PTR_ERR(group);
660 692
693 group->fanotify_data.user = user;
694 atomic_inc(&user->fanotify_listeners);
695
661 group->fanotify_data.f_flags = event_f_flags; 696 group->fanotify_data.f_flags = event_f_flags;
662#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS 697#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
663 mutex_init(&group->fanotify_data.access_mutex); 698 mutex_init(&group->fanotify_data.access_mutex);
664 init_waitqueue_head(&group->fanotify_data.access_waitq); 699 init_waitqueue_head(&group->fanotify_data.access_waitq);
665 INIT_LIST_HEAD(&group->fanotify_data.access_list); 700 INIT_LIST_HEAD(&group->fanotify_data.access_list);
666#endif 701#endif
702 switch (flags & FAN_ALL_CLASS_BITS) {
703 case FAN_CLASS_NOTIF:
704 group->priority = FS_PRIO_0;
705 break;
706 case FAN_CLASS_CONTENT:
707 group->priority = FS_PRIO_1;
708 break;
709 case FAN_CLASS_PRE_CONTENT:
710 group->priority = FS_PRIO_2;
711 break;
712 default:
713 fd = -EINVAL;
714 goto out_put_group;
715 }
716
717 if (flags & FAN_UNLIMITED_QUEUE) {
718 fd = -EPERM;
719 if (!capable(CAP_SYS_ADMIN))
720 goto out_put_group;
721 group->max_events = UINT_MAX;
722 } else {
723 group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS;
724 }
725
726 if (flags & FAN_UNLIMITED_MARKS) {
727 fd = -EPERM;
728 if (!capable(CAP_SYS_ADMIN))
729 goto out_put_group;
730 group->fanotify_data.max_marks = UINT_MAX;
731 } else {
732 group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS;
733 }
667 734
668 fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); 735 fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
669 if (fd < 0) 736 if (fd < 0)
@@ -704,6 +771,12 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
704 default: 771 default:
705 return -EINVAL; 772 return -EINVAL;
706 } 773 }
774
775 if (mask & FAN_ONDIR) {
776 flags |= FAN_MARK_ONDIR;
777 mask &= ~FAN_ONDIR;
778 }
779
707#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS 780#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
708 if (mask & ~(FAN_ALL_EVENTS | FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD)) 781 if (mask & ~(FAN_ALL_EVENTS | FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD))
709#else 782#else
@@ -719,6 +792,16 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
719 ret = -EINVAL; 792 ret = -EINVAL;
720 if (unlikely(filp->f_op != &fanotify_fops)) 793 if (unlikely(filp->f_op != &fanotify_fops))
721 goto fput_and_out; 794 goto fput_and_out;
795 group = filp->private_data;
796
797 /*
798 * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not
799 * allowed to set permissions events.
800 */
801 ret = -EINVAL;
802 if (mask & FAN_ALL_PERM_EVENTS &&
803 group->priority == FS_PRIO_0)
804 goto fput_and_out;
722 805
723 ret = fanotify_find_path(dfd, pathname, &path, flags); 806 ret = fanotify_find_path(dfd, pathname, &path, flags);
724 if (ret) 807 if (ret)
@@ -729,7 +812,6 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
729 inode = path.dentry->d_inode; 812 inode = path.dentry->d_inode;
730 else 813 else
731 mnt = path.mnt; 814 mnt = path.mnt;
732 group = filp->private_data;
733 815
734 /* create/update an inode mark */ 816 /* create/update an inode mark */
735 switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) { 817 switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 4498a208df94..20dc218707ca 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -84,16 +84,17 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
84} 84}
85 85
86/* Notify this dentry's parent about a child's events. */ 86/* Notify this dentry's parent about a child's events. */
87void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask) 87int __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
88{ 88{
89 struct dentry *parent; 89 struct dentry *parent;
90 struct inode *p_inode; 90 struct inode *p_inode;
91 int ret = 0;
91 92
92 if (!dentry) 93 if (!dentry)
93 dentry = path->dentry; 94 dentry = path->dentry;
94 95
95 if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED)) 96 if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED))
96 return; 97 return 0;
97 98
98 parent = dget_parent(dentry); 99 parent = dget_parent(dentry);
99 p_inode = parent->d_inode; 100 p_inode = parent->d_inode;
@@ -106,14 +107,16 @@ void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
106 mask |= FS_EVENT_ON_CHILD; 107 mask |= FS_EVENT_ON_CHILD;
107 108
108 if (path) 109 if (path)
109 fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH, 110 ret = fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH,
110 dentry->d_name.name, 0); 111 dentry->d_name.name, 0);
111 else 112 else
112 fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE, 113 ret = fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
113 dentry->d_name.name, 0); 114 dentry->d_name.name, 0);
114 } 115 }
115 116
116 dput(parent); 117 dput(parent);
118
119 return ret;
117} 120}
118EXPORT_SYMBOL_GPL(__fsnotify_parent); 121EXPORT_SYMBOL_GPL(__fsnotify_parent);
119 122
@@ -252,20 +255,23 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
252 255
253 if (inode_group > vfsmount_group) { 256 if (inode_group > vfsmount_group) {
254 /* handle inode */ 257 /* handle inode */
255 send_to_group(to_tell, NULL, inode_mark, NULL, mask, data, 258 ret = send_to_group(to_tell, NULL, inode_mark, NULL, mask, data,
256 data_is, cookie, file_name, &event); 259 data_is, cookie, file_name, &event);
257 /* we didn't use the vfsmount_mark */ 260 /* we didn't use the vfsmount_mark */
258 vfsmount_group = NULL; 261 vfsmount_group = NULL;
259 } else if (vfsmount_group > inode_group) { 262 } else if (vfsmount_group > inode_group) {
260 send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data, 263 ret = send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data,
261 data_is, cookie, file_name, &event); 264 data_is, cookie, file_name, &event);
262 inode_group = NULL; 265 inode_group = NULL;
263 } else { 266 } else {
264 send_to_group(to_tell, mnt, inode_mark, vfsmount_mark, 267 ret = send_to_group(to_tell, mnt, inode_mark, vfsmount_mark,
265 mask, data, data_is, cookie, file_name, 268 mask, data, data_is, cookie, file_name,
266 &event); 269 &event);
267 } 270 }
268 271
272 if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
273 goto out;
274
269 if (inode_group) 275 if (inode_group)
270 inode_node = srcu_dereference(inode_node->next, 276 inode_node = srcu_dereference(inode_node->next,
271 &fsnotify_mark_srcu); 277 &fsnotify_mark_srcu);
@@ -273,7 +279,8 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
273 vfsmount_node = srcu_dereference(vfsmount_node->next, 279 vfsmount_node = srcu_dereference(vfsmount_node->next,
274 &fsnotify_mark_srcu); 280 &fsnotify_mark_srcu);
275 } 281 }
276 282 ret = 0;
283out:
277 srcu_read_unlock(&fsnotify_mark_srcu, idx); 284 srcu_read_unlock(&fsnotify_mark_srcu, idx);
278 /* 285 /*
279 * fsnotify_create_event() took a reference so the event can't be cleaned 286 * fsnotify_create_event() took a reference so the event can't be cleaned
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 21ed10660b80..4c29fcf557d1 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -177,7 +177,8 @@ void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *mark,
177 * Attach an initialized mark to a given inode. 177 * Attach an initialized mark to a given inode.
178 * These marks may be used for the fsnotify backend to determine which 178 * These marks may be used for the fsnotify backend to determine which
179 * event types should be delivered to which group and for which inodes. These 179 * event types should be delivered to which group and for which inodes. These
180 * marks are ordered according to the group's location in memory. 180 * marks are ordered according to priority, highest number first, and then by
181 * the group's location in memory.
181 */ 182 */
182int fsnotify_add_inode_mark(struct fsnotify_mark *mark, 183int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
183 struct fsnotify_group *group, struct inode *inode, 184 struct fsnotify_group *group, struct inode *inode,
@@ -211,7 +212,11 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
211 goto out; 212 goto out;
212 } 213 }
213 214
214 if (mark->group < lmark->group) 215 if (mark->group->priority < lmark->group->priority)
216 continue;
217
218 if ((mark->group->priority == lmark->group->priority) &&
219 (mark->group < lmark->group))
215 continue; 220 continue;
216 221
217 hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list); 222 hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 24edc1185d53..444c305a468c 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -862,7 +862,7 @@ static int __init inotify_user_setup(void)
862 BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW); 862 BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW);
863 BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED); 863 BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED);
864 BUILD_BUG_ON(IN_EXCL_UNLINK != FS_EXCL_UNLINK); 864 BUILD_BUG_ON(IN_EXCL_UNLINK != FS_EXCL_UNLINK);
865 BUILD_BUG_ON(IN_ISDIR != FS_IN_ISDIR); 865 BUILD_BUG_ON(IN_ISDIR != FS_ISDIR);
866 BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT); 866 BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
867 867
868 BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21); 868 BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21);
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index 56772b578fbd..85eebff6d0d7 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -169,7 +169,11 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
169 goto out; 169 goto out;
170 } 170 }
171 171
172 if (mark->group < lmark->group) 172 if (mark->group->priority < lmark->group->priority)
173 continue;
174
175 if ((mark->group->priority == lmark->group->priority) &&
176 (mark->group < lmark->group))
173 continue; 177 continue;
174 178
175 hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list); 179 hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list);
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index d3fbe5730bfc..a30ecacc01f2 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -3059,17 +3059,16 @@ struct kmem_cache *ntfs_index_ctx_cache;
3059/* Driver wide mutex. */ 3059/* Driver wide mutex. */
3060DEFINE_MUTEX(ntfs_lock); 3060DEFINE_MUTEX(ntfs_lock);
3061 3061
3062static int ntfs_get_sb(struct file_system_type *fs_type, 3062static struct dentry *ntfs_mount(struct file_system_type *fs_type,
3063 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 3063 int flags, const char *dev_name, void *data)
3064{ 3064{
3065 return get_sb_bdev(fs_type, flags, dev_name, data, ntfs_fill_super, 3065 return mount_bdev(fs_type, flags, dev_name, data, ntfs_fill_super);
3066 mnt);
3067} 3066}
3068 3067
3069static struct file_system_type ntfs_fs_type = { 3068static struct file_system_type ntfs_fs_type = {
3070 .owner = THIS_MODULE, 3069 .owner = THIS_MODULE,
3071 .name = "ntfs", 3070 .name = "ntfs",
3072 .get_sb = ntfs_get_sb, 3071 .mount = ntfs_mount,
3073 .kill_sb = kill_block_super, 3072 .kill_sb = kill_block_super,
3074 .fs_flags = FS_REQUIRES_DEV, 3073 .fs_flags = FS_REQUIRES_DEV,
3075}; 3074};
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 75e115f1bd73..b2df490a19ed 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -643,16 +643,16 @@ static const struct inode_operations dlmfs_file_inode_operations = {
643 .setattr = dlmfs_file_setattr, 643 .setattr = dlmfs_file_setattr,
644}; 644};
645 645
646static int dlmfs_get_sb(struct file_system_type *fs_type, 646static struct dentry *dlmfs_mount(struct file_system_type *fs_type,
647 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 647 int flags, const char *dev_name, void *data)
648{ 648{
649 return get_sb_nodev(fs_type, flags, data, dlmfs_fill_super, mnt); 649 return mount_nodev(fs_type, flags, data, dlmfs_fill_super);
650} 650}
651 651
652static struct file_system_type dlmfs_fs_type = { 652static struct file_system_type dlmfs_fs_type = {
653 .owner = THIS_MODULE, 653 .owner = THIS_MODULE,
654 .name = "ocfs2_dlmfs", 654 .name = "ocfs2_dlmfs",
655 .get_sb = dlmfs_get_sb, 655 .mount = dlmfs_mount,
656 .kill_sb = kill_litter_super, 656 .kill_sb = kill_litter_super,
657}; 657};
658 658
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 56f0cb395820..f02c0ef31578 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1236,14 +1236,12 @@ read_super_error:
1236 return status; 1236 return status;
1237} 1237}
1238 1238
1239static int ocfs2_get_sb(struct file_system_type *fs_type, 1239static struct dentry *ocfs2_mount(struct file_system_type *fs_type,
1240 int flags, 1240 int flags,
1241 const char *dev_name, 1241 const char *dev_name,
1242 void *data, 1242 void *data)
1243 struct vfsmount *mnt)
1244{ 1243{
1245 return get_sb_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super, 1244 return mount_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super);
1246 mnt);
1247} 1245}
1248 1246
1249static void ocfs2_kill_sb(struct super_block *sb) 1247static void ocfs2_kill_sb(struct super_block *sb)
@@ -1267,8 +1265,7 @@ out:
1267static struct file_system_type ocfs2_fs_type = { 1265static struct file_system_type ocfs2_fs_type = {
1268 .owner = THIS_MODULE, 1266 .owner = THIS_MODULE,
1269 .name = "ocfs2", 1267 .name = "ocfs2",
1270 .get_sb = ocfs2_get_sb, /* is this called when we mount 1268 .mount = ocfs2_mount,
1271 * the fs? */
1272 .kill_sb = ocfs2_kill_sb, 1269 .kill_sb = ocfs2_kill_sb,
1273 1270
1274 .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, 1271 .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index 14a22863291a..e043c4cb9a97 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -557,17 +557,16 @@ end:
557 return ret; 557 return ret;
558} 558}
559 559
560static int omfs_get_sb(struct file_system_type *fs_type, 560static struct dentry *omfs_mount(struct file_system_type *fs_type,
561 int flags, const char *dev_name, 561 int flags, const char *dev_name, void *data)
562 void *data, struct vfsmount *m)
563{ 562{
564 return get_sb_bdev(fs_type, flags, dev_name, data, omfs_fill_super, m); 563 return mount_bdev(fs_type, flags, dev_name, data, omfs_fill_super);
565} 564}
566 565
567static struct file_system_type omfs_fs_type = { 566static struct file_system_type omfs_fs_type = {
568 .owner = THIS_MODULE, 567 .owner = THIS_MODULE,
569 .name = "omfs", 568 .name = "omfs",
570 .get_sb = omfs_get_sb, 569 .mount = omfs_mount,
571 .kill_sb = kill_block_super, 570 .kill_sb = kill_block_super,
572 .fs_flags = FS_REQUIRES_DEV, 571 .fs_flags = FS_REQUIRES_DEV,
573}; 572};
diff --git a/fs/open.c b/fs/open.c
index d74e1983e8dc..4197b9ed023d 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -786,11 +786,11 @@ struct file *nameidata_to_filp(struct nameidata *nd)
786 /* Pick up the filp from the open intent */ 786 /* Pick up the filp from the open intent */
787 filp = nd->intent.open.file; 787 filp = nd->intent.open.file;
788 /* Has the filesystem initialised the file for us? */ 788 /* Has the filesystem initialised the file for us? */
789 if (filp->f_path.dentry == NULL) 789 if (filp->f_path.dentry == NULL) {
790 path_get(&nd->path);
790 filp = __dentry_open(nd->path.dentry, nd->path.mnt, filp, 791 filp = __dentry_open(nd->path.dentry, nd->path.mnt, filp,
791 NULL, cred); 792 NULL, cred);
792 else 793 }
793 path_put(&nd->path);
794 return filp; 794 return filp;
795} 795}
796 796
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index ffcd04f0012c..911e61f348fc 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -415,16 +415,16 @@ out_no_root:
415 return ret; 415 return ret;
416} 416}
417 417
418static int openprom_get_sb(struct file_system_type *fs_type, 418static struct dentry *openprom_mount(struct file_system_type *fs_type,
419 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 419 int flags, const char *dev_name, void *data)
420{ 420{
421 return get_sb_single(fs_type, flags, data, openprom_fill_super, mnt); 421 return mount_single(fs_type, flags, data, openprom_fill_super);
422} 422}
423 423
424static struct file_system_type openprom_fs_type = { 424static struct file_system_type openprom_fs_type = {
425 .owner = THIS_MODULE, 425 .owner = THIS_MODULE,
426 .name = "openpromfs", 426 .name = "openpromfs",
427 .get_sb = openprom_get_sb, 427 .mount = openprom_mount,
428 .kill_sb = kill_anon_super, 428 .kill_sb = kill_anon_super,
429}; 429};
430 430
diff --git a/fs/pipe.c b/fs/pipe.c
index d2d7566ce68e..a8012a955720 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1247,16 +1247,15 @@ out:
1247 * any operations on the root directory. However, we need a non-trivial 1247 * any operations on the root directory. However, we need a non-trivial
1248 * d_name - pipe: will go nicely and kill the special-casing in procfs. 1248 * d_name - pipe: will go nicely and kill the special-casing in procfs.
1249 */ 1249 */
1250static int pipefs_get_sb(struct file_system_type *fs_type, 1250static struct dentry *pipefs_mount(struct file_system_type *fs_type,
1251 int flags, const char *dev_name, void *data, 1251 int flags, const char *dev_name, void *data)
1252 struct vfsmount *mnt)
1253{ 1252{
1254 return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC, mnt); 1253 return mount_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
1255} 1254}
1256 1255
1257static struct file_system_type pipe_fs_type = { 1256static struct file_system_type pipe_fs_type = {
1258 .name = "pipefs", 1257 .name = "pipefs",
1259 .get_sb = pipefs_get_sb, 1258 .mount = pipefs_mount,
1260 .kill_sb = kill_anon_super, 1259 .kill_sb = kill_anon_super,
1261}; 1260};
1262 1261
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 93d99b316325..ef9fa8e24ad6 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -35,8 +35,8 @@ static int proc_set_super(struct super_block *sb, void *data)
35 return set_anon_super(sb, NULL); 35 return set_anon_super(sb, NULL);
36} 36}
37 37
38static int proc_get_sb(struct file_system_type *fs_type, 38static struct dentry *proc_mount(struct file_system_type *fs_type,
39 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 39 int flags, const char *dev_name, void *data)
40{ 40{
41 int err; 41 int err;
42 struct super_block *sb; 42 struct super_block *sb;
@@ -61,14 +61,14 @@ static int proc_get_sb(struct file_system_type *fs_type,
61 61
62 sb = sget(fs_type, proc_test_super, proc_set_super, ns); 62 sb = sget(fs_type, proc_test_super, proc_set_super, ns);
63 if (IS_ERR(sb)) 63 if (IS_ERR(sb))
64 return PTR_ERR(sb); 64 return ERR_CAST(sb);
65 65
66 if (!sb->s_root) { 66 if (!sb->s_root) {
67 sb->s_flags = flags; 67 sb->s_flags = flags;
68 err = proc_fill_super(sb); 68 err = proc_fill_super(sb);
69 if (err) { 69 if (err) {
70 deactivate_locked_super(sb); 70 deactivate_locked_super(sb);
71 return err; 71 return ERR_PTR(err);
72 } 72 }
73 73
74 ei = PROC_I(sb->s_root->d_inode); 74 ei = PROC_I(sb->s_root->d_inode);
@@ -79,11 +79,9 @@ static int proc_get_sb(struct file_system_type *fs_type,
79 } 79 }
80 80
81 sb->s_flags |= MS_ACTIVE; 81 sb->s_flags |= MS_ACTIVE;
82 ns->proc_mnt = mnt;
83 } 82 }
84 83
85 simple_set_mnt(mnt, sb); 84 return dget(sb->s_root);
86 return 0;
87} 85}
88 86
89static void proc_kill_sb(struct super_block *sb) 87static void proc_kill_sb(struct super_block *sb)
@@ -97,7 +95,7 @@ static void proc_kill_sb(struct super_block *sb)
97 95
98static struct file_system_type proc_fs_type = { 96static struct file_system_type proc_fs_type = {
99 .name = "proc", 97 .name = "proc",
100 .get_sb = proc_get_sb, 98 .mount = proc_mount,
101 .kill_sb = proc_kill_sb, 99 .kill_sb = proc_kill_sb,
102}; 100};
103 101
@@ -115,6 +113,7 @@ void __init proc_root_init(void)
115 return; 113 return;
116 } 114 }
117 115
116 init_pid_ns.proc_mnt = proc_mnt;
118 proc_symlink("mounts", NULL, "self/mounts"); 117 proc_symlink("mounts", NULL, "self/mounts");
119 118
120 proc_net_init(); 119 proc_net_init();
@@ -213,6 +212,7 @@ int pid_ns_prepare_proc(struct pid_namespace *ns)
213 if (IS_ERR(mnt)) 212 if (IS_ERR(mnt))
214 return PTR_ERR(mnt); 213 return PTR_ERR(mnt);
215 214
215 ns->proc_mnt = mnt;
216 return 0; 216 return 0;
217} 217}
218 218
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 01bad30026fc..fcada42f1aa3 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -454,17 +454,16 @@ static void destroy_inodecache(void)
454 kmem_cache_destroy(qnx4_inode_cachep); 454 kmem_cache_destroy(qnx4_inode_cachep);
455} 455}
456 456
457static int qnx4_get_sb(struct file_system_type *fs_type, 457static struct dentry *qnx4_mount(struct file_system_type *fs_type,
458 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 458 int flags, const char *dev_name, void *data)
459{ 459{
460 return get_sb_bdev(fs_type, flags, dev_name, data, qnx4_fill_super, 460 return mount_bdev(fs_type, flags, dev_name, data, qnx4_fill_super);
461 mnt);
462} 461}
463 462
464static struct file_system_type qnx4_fs_type = { 463static struct file_system_type qnx4_fs_type = {
465 .owner = THIS_MODULE, 464 .owner = THIS_MODULE,
466 .name = "qnx4", 465 .name = "qnx4",
467 .get_sb = qnx4_get_sb, 466 .mount = qnx4_mount,
468 .kill_sb = kill_block_super, 467 .kill_sb = kill_block_super,
469 .fs_flags = FS_REQUIRES_DEV, 468 .fs_flags = FS_REQUIRES_DEV,
470}; 469};
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 67fadb1ad2c1..eacb166fb259 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -255,17 +255,16 @@ fail:
255 return err; 255 return err;
256} 256}
257 257
258int ramfs_get_sb(struct file_system_type *fs_type, 258struct dentry *ramfs_mount(struct file_system_type *fs_type,
259 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 259 int flags, const char *dev_name, void *data)
260{ 260{
261 return get_sb_nodev(fs_type, flags, data, ramfs_fill_super, mnt); 261 return mount_nodev(fs_type, flags, data, ramfs_fill_super);
262} 262}
263 263
264static int rootfs_get_sb(struct file_system_type *fs_type, 264static struct dentry *rootfs_mount(struct file_system_type *fs_type,
265 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 265 int flags, const char *dev_name, void *data)
266{ 266{
267 return get_sb_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super, 267 return mount_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super);
268 mnt);
269} 268}
270 269
271static void ramfs_kill_sb(struct super_block *sb) 270static void ramfs_kill_sb(struct super_block *sb)
@@ -276,12 +275,12 @@ static void ramfs_kill_sb(struct super_block *sb)
276 275
277static struct file_system_type ramfs_fs_type = { 276static struct file_system_type ramfs_fs_type = {
278 .name = "ramfs", 277 .name = "ramfs",
279 .get_sb = ramfs_get_sb, 278 .mount = ramfs_mount,
280 .kill_sb = ramfs_kill_sb, 279 .kill_sb = ramfs_kill_sb,
281}; 280};
282static struct file_system_type rootfs_fs_type = { 281static struct file_system_type rootfs_fs_type = {
283 .name = "rootfs", 282 .name = "rootfs",
284 .get_sb = rootfs_get_sb, 283 .mount = rootfs_mount,
285 .kill_sb = kill_litter_super, 284 .kill_sb = kill_litter_super,
286}; 285};
287 286
diff --git a/fs/read_write.c b/fs/read_write.c
index 9cd9d148105d..431a0ed610c8 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -243,8 +243,6 @@ bad:
243 * them to something that fits in "int" so that others 243 * them to something that fits in "int" so that others
244 * won't have to do range checks all the time. 244 * won't have to do range checks all the time.
245 */ 245 */
246#define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK)
247
248int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count) 246int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count)
249{ 247{
250 struct inode *inode; 248 struct inode *inode;
@@ -584,65 +582,71 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
584 unsigned long nr_segs, unsigned long fast_segs, 582 unsigned long nr_segs, unsigned long fast_segs,
585 struct iovec *fast_pointer, 583 struct iovec *fast_pointer,
586 struct iovec **ret_pointer) 584 struct iovec **ret_pointer)
587 { 585{
588 unsigned long seg; 586 unsigned long seg;
589 ssize_t ret; 587 ssize_t ret;
590 struct iovec *iov = fast_pointer; 588 struct iovec *iov = fast_pointer;
591 589
592 /* 590 /*
593 * SuS says "The readv() function *may* fail if the iovcnt argument 591 * SuS says "The readv() function *may* fail if the iovcnt argument
594 * was less than or equal to 0, or greater than {IOV_MAX}. Linux has 592 * was less than or equal to 0, or greater than {IOV_MAX}. Linux has
595 * traditionally returned zero for zero segments, so... 593 * traditionally returned zero for zero segments, so...
596 */ 594 */
597 if (nr_segs == 0) { 595 if (nr_segs == 0) {
598 ret = 0; 596 ret = 0;
599 goto out; 597 goto out;
600 } 598 }
601 599
602 /* 600 /*
603 * First get the "struct iovec" from user memory and 601 * First get the "struct iovec" from user memory and
604 * verify all the pointers 602 * verify all the pointers
605 */ 603 */
606 if (nr_segs > UIO_MAXIOV) { 604 if (nr_segs > UIO_MAXIOV) {
607 ret = -EINVAL; 605 ret = -EINVAL;
608 goto out; 606 goto out;
609 } 607 }
610 if (nr_segs > fast_segs) { 608 if (nr_segs > fast_segs) {
611 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); 609 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
612 if (iov == NULL) { 610 if (iov == NULL) {
613 ret = -ENOMEM; 611 ret = -ENOMEM;
614 goto out; 612 goto out;
615 } 613 }
616 } 614 }
617 if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) { 615 if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
618 ret = -EFAULT; 616 ret = -EFAULT;
619 goto out; 617 goto out;
620 } 618 }
621 619
622 /* 620 /*
623 * According to the Single Unix Specification we should return EINVAL 621 * According to the Single Unix Specification we should return EINVAL
624 * if an element length is < 0 when cast to ssize_t or if the 622 * if an element length is < 0 when cast to ssize_t or if the
625 * total length would overflow the ssize_t return value of the 623 * total length would overflow the ssize_t return value of the
626 * system call. 624 * system call.
627 */ 625 *
626 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
627 * overflow case.
628 */
628 ret = 0; 629 ret = 0;
629 for (seg = 0; seg < nr_segs; seg++) { 630 for (seg = 0; seg < nr_segs; seg++) {
630 void __user *buf = iov[seg].iov_base; 631 void __user *buf = iov[seg].iov_base;
631 ssize_t len = (ssize_t)iov[seg].iov_len; 632 ssize_t len = (ssize_t)iov[seg].iov_len;
632 633
633 /* see if we we're about to use an invalid len or if 634 /* see if we we're about to use an invalid len or if
634 * it's about to overflow ssize_t */ 635 * it's about to overflow ssize_t */
635 if (len < 0 || (ret + len < ret)) { 636 if (len < 0) {
636 ret = -EINVAL; 637 ret = -EINVAL;
637 goto out; 638 goto out;
638 } 639 }
639 if (unlikely(!access_ok(vrfy_dir(type), buf, len))) { 640 if (unlikely(!access_ok(vrfy_dir(type), buf, len))) {
640 ret = -EFAULT; 641 ret = -EFAULT;
641 goto out; 642 goto out;
643 }
644 if (len > MAX_RW_COUNT - ret) {
645 len = MAX_RW_COUNT - ret;
646 iov[seg].iov_len = len;
642 } 647 }
643
644 ret += len; 648 ret += len;
645 } 649 }
646out: 650out:
647 *ret_pointer = iov; 651 *ret_pointer = iov;
648 return ret; 652 return ret;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index e15ff612002d..3bf7a6457f4d 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2213,12 +2213,11 @@ out:
2213 2213
2214#endif 2214#endif
2215 2215
2216static int get_super_block(struct file_system_type *fs_type, 2216static struct dentry *get_super_block(struct file_system_type *fs_type,
2217 int flags, const char *dev_name, 2217 int flags, const char *dev_name,
2218 void *data, struct vfsmount *mnt) 2218 void *data)
2219{ 2219{
2220 return get_sb_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super, 2220 return mount_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super);
2221 mnt);
2222} 2221}
2223 2222
2224static int __init init_reiserfs_fs(void) 2223static int __init init_reiserfs_fs(void)
@@ -2253,7 +2252,7 @@ static void __exit exit_reiserfs_fs(void)
2253struct file_system_type reiserfs_fs_type = { 2252struct file_system_type reiserfs_fs_type = {
2254 .owner = THIS_MODULE, 2253 .owner = THIS_MODULE,
2255 .name = "reiserfs", 2254 .name = "reiserfs",
2256 .get_sb = get_super_block, 2255 .mount = get_super_block,
2257 .kill_sb = reiserfs_kill_sb, 2256 .kill_sb = reiserfs_kill_sb,
2258 .fs_flags = FS_REQUIRES_DEV, 2257 .fs_flags = FS_REQUIRES_DEV,
2259}; 2258};
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 268580535c92..6647f90e55cd 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -552,20 +552,19 @@ error_rsb:
552/* 552/*
553 * get a superblock for mounting 553 * get a superblock for mounting
554 */ 554 */
555static int romfs_get_sb(struct file_system_type *fs_type, 555static struct dentry *romfs_mount(struct file_system_type *fs_type,
556 int flags, const char *dev_name, 556 int flags, const char *dev_name,
557 void *data, struct vfsmount *mnt) 557 void *data)
558{ 558{
559 int ret = -EINVAL; 559 struct dentry *ret = ERR_PTR(-EINVAL);
560 560
561#ifdef CONFIG_ROMFS_ON_MTD 561#ifdef CONFIG_ROMFS_ON_MTD
562 ret = get_sb_mtd(fs_type, flags, dev_name, data, romfs_fill_super, 562 ret = mount_mtd(fs_type, flags, dev_name, data, romfs_fill_super);
563 mnt);
564#endif 563#endif
565#ifdef CONFIG_ROMFS_ON_BLOCK 564#ifdef CONFIG_ROMFS_ON_BLOCK
566 if (ret == -EINVAL) 565 if (ret == ERR_PTR(-EINVAL))
567 ret = get_sb_bdev(fs_type, flags, dev_name, data, 566 ret = mount_bdev(fs_type, flags, dev_name, data,
568 romfs_fill_super, mnt); 567 romfs_fill_super);
569#endif 568#endif
570 return ret; 569 return ret;
571} 570}
@@ -592,7 +591,7 @@ static void romfs_kill_sb(struct super_block *sb)
592static struct file_system_type romfs_fs_type = { 591static struct file_system_type romfs_fs_type = {
593 .owner = THIS_MODULE, 592 .owner = THIS_MODULE,
594 .name = "romfs", 593 .name = "romfs",
595 .get_sb = romfs_get_sb, 594 .mount = romfs_mount,
596 .kill_sb = romfs_kill_sb, 595 .kill_sb = romfs_kill_sb,
597 .fs_flags = FS_REQUIRES_DEV, 596 .fs_flags = FS_REQUIRES_DEV,
598}; 597};
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 07a4f1156048..24de30ba34c1 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -370,12 +370,10 @@ static void squashfs_put_super(struct super_block *sb)
370} 370}
371 371
372 372
373static int squashfs_get_sb(struct file_system_type *fs_type, int flags, 373static struct dentry *squashfs_mount(struct file_system_type *fs_type, int flags,
374 const char *dev_name, void *data, 374 const char *dev_name, void *data)
375 struct vfsmount *mnt)
376{ 375{
377 return get_sb_bdev(fs_type, flags, dev_name, data, squashfs_fill_super, 376 return mount_bdev(fs_type, flags, dev_name, data, squashfs_fill_super);
378 mnt);
379} 377}
380 378
381 379
@@ -451,7 +449,7 @@ static void squashfs_destroy_inode(struct inode *inode)
451static struct file_system_type squashfs_fs_type = { 449static struct file_system_type squashfs_fs_type = {
452 .owner = THIS_MODULE, 450 .owner = THIS_MODULE,
453 .name = "squashfs", 451 .name = "squashfs",
454 .get_sb = squashfs_get_sb, 452 .mount = squashfs_mount,
455 .kill_sb = kill_block_super, 453 .kill_sb = kill_block_super,
456 .fs_flags = FS_REQUIRES_DEV 454 .fs_flags = FS_REQUIRES_DEV
457}; 455};
diff --git a/fs/squashfs/xattr.c b/fs/squashfs/xattr.c
index 652b8541f9c6..3876c36699a1 100644
--- a/fs/squashfs/xattr.c
+++ b/fs/squashfs/xattr.c
@@ -158,17 +158,18 @@ static int squashfs_xattr_get(struct inode *inode, int name_index,
158 strncmp(target, name, name_size) == 0) { 158 strncmp(target, name, name_size) == 0) {
159 /* found xattr */ 159 /* found xattr */
160 if (type & SQUASHFS_XATTR_VALUE_OOL) { 160 if (type & SQUASHFS_XATTR_VALUE_OOL) {
161 __le64 xattr; 161 __le64 xattr_val;
162 u64 xattr;
162 /* val is a reference to the real location */ 163 /* val is a reference to the real location */
163 err = squashfs_read_metadata(sb, &val, &start, 164 err = squashfs_read_metadata(sb, &val, &start,
164 &offset, sizeof(val)); 165 &offset, sizeof(val));
165 if (err < 0) 166 if (err < 0)
166 goto failed; 167 goto failed;
167 err = squashfs_read_metadata(sb, &xattr, &start, 168 err = squashfs_read_metadata(sb, &xattr_val,
168 &offset, sizeof(xattr)); 169 &start, &offset, sizeof(xattr_val));
169 if (err < 0) 170 if (err < 0)
170 goto failed; 171 goto failed;
171 xattr = le64_to_cpu(xattr); 172 xattr = le64_to_cpu(xattr_val);
172 start = SQUASHFS_XATTR_BLK(xattr) + 173 start = SQUASHFS_XATTR_BLK(xattr) +
173 msblk->xattr_table; 174 msblk->xattr_table;
174 offset = SQUASHFS_XATTR_OFFSET(xattr); 175 offset = SQUASHFS_XATTR_OFFSET(xattr);
diff --git a/fs/squashfs/xattr.h b/fs/squashfs/xattr.h
index 49fe0d719fbf..b634efce4bde 100644
--- a/fs/squashfs/xattr.h
+++ b/fs/squashfs/xattr.h
@@ -25,7 +25,7 @@
25extern __le64 *squashfs_read_xattr_id_table(struct super_block *, u64, 25extern __le64 *squashfs_read_xattr_id_table(struct super_block *, u64,
26 u64 *, int *); 26 u64 *, int *);
27extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *, 27extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *,
28 int *, unsigned long long *); 28 unsigned int *, unsigned long long *);
29#else 29#else
30static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb, 30static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb,
31 u64 start, u64 *xattr_table_start, int *xattr_ids) 31 u64 start, u64 *xattr_table_start, int *xattr_ids)
@@ -35,7 +35,7 @@ static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb,
35} 35}
36 36
37static inline int squashfs_xattr_lookup(struct super_block *sb, 37static inline int squashfs_xattr_lookup(struct super_block *sb,
38 unsigned int index, int *count, int *size, 38 unsigned int index, int *count, unsigned int *size,
39 unsigned long long *xattr) 39 unsigned long long *xattr)
40{ 40{
41 return 0; 41 return 0;
diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c
index cfb41106098f..d33be5dd6c32 100644
--- a/fs/squashfs/xattr_id.c
+++ b/fs/squashfs/xattr_id.c
@@ -34,6 +34,7 @@
34#include "squashfs_fs_sb.h" 34#include "squashfs_fs_sb.h"
35#include "squashfs_fs_i.h" 35#include "squashfs_fs_i.h"
36#include "squashfs.h" 36#include "squashfs.h"
37#include "xattr.h"
37 38
38/* 39/*
39 * Map xattr id using the xattr id look up table 40 * Map xattr id using the xattr id look up table
diff --git a/fs/super.c b/fs/super.c
index b9c9869165db..ca696155cd9a 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -715,15 +715,14 @@ static int ns_set_super(struct super_block *sb, void *data)
715 return set_anon_super(sb, NULL); 715 return set_anon_super(sb, NULL);
716} 716}
717 717
718int get_sb_ns(struct file_system_type *fs_type, int flags, void *data, 718struct dentry *mount_ns(struct file_system_type *fs_type, int flags,
719 int (*fill_super)(struct super_block *, void *, int), 719 void *data, int (*fill_super)(struct super_block *, void *, int))
720 struct vfsmount *mnt)
721{ 720{
722 struct super_block *sb; 721 struct super_block *sb;
723 722
724 sb = sget(fs_type, ns_test_super, ns_set_super, data); 723 sb = sget(fs_type, ns_test_super, ns_set_super, data);
725 if (IS_ERR(sb)) 724 if (IS_ERR(sb))
726 return PTR_ERR(sb); 725 return ERR_CAST(sb);
727 726
728 if (!sb->s_root) { 727 if (!sb->s_root) {
729 int err; 728 int err;
@@ -731,17 +730,16 @@ int get_sb_ns(struct file_system_type *fs_type, int flags, void *data,
731 err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0); 730 err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
732 if (err) { 731 if (err) {
733 deactivate_locked_super(sb); 732 deactivate_locked_super(sb);
734 return err; 733 return ERR_PTR(err);
735 } 734 }
736 735
737 sb->s_flags |= MS_ACTIVE; 736 sb->s_flags |= MS_ACTIVE;
738 } 737 }
739 738
740 simple_set_mnt(mnt, sb); 739 return dget(sb->s_root);
741 return 0;
742} 740}
743 741
744EXPORT_SYMBOL(get_sb_ns); 742EXPORT_SYMBOL(mount_ns);
745 743
746#ifdef CONFIG_BLOCK 744#ifdef CONFIG_BLOCK
747static int set_bdev_super(struct super_block *s, void *data) 745static int set_bdev_super(struct super_block *s, void *data)
@@ -762,10 +760,9 @@ static int test_bdev_super(struct super_block *s, void *data)
762 return (void *)s->s_bdev == data; 760 return (void *)s->s_bdev == data;
763} 761}
764 762
765int get_sb_bdev(struct file_system_type *fs_type, 763struct dentry *mount_bdev(struct file_system_type *fs_type,
766 int flags, const char *dev_name, void *data, 764 int flags, const char *dev_name, void *data,
767 int (*fill_super)(struct super_block *, void *, int), 765 int (*fill_super)(struct super_block *, void *, int))
768 struct vfsmount *mnt)
769{ 766{
770 struct block_device *bdev; 767 struct block_device *bdev;
771 struct super_block *s; 768 struct super_block *s;
@@ -777,7 +774,7 @@ int get_sb_bdev(struct file_system_type *fs_type,
777 774
778 bdev = open_bdev_exclusive(dev_name, mode, fs_type); 775 bdev = open_bdev_exclusive(dev_name, mode, fs_type);
779 if (IS_ERR(bdev)) 776 if (IS_ERR(bdev))
780 return PTR_ERR(bdev); 777 return ERR_CAST(bdev);
781 778
782 /* 779 /*
783 * once the super is inserted into the list by sget, s_umount 780 * once the super is inserted into the list by sget, s_umount
@@ -829,15 +826,30 @@ int get_sb_bdev(struct file_system_type *fs_type,
829 bdev->bd_super = s; 826 bdev->bd_super = s;
830 } 827 }
831 828
832 simple_set_mnt(mnt, s); 829 return dget(s->s_root);
833 return 0;
834 830
835error_s: 831error_s:
836 error = PTR_ERR(s); 832 error = PTR_ERR(s);
837error_bdev: 833error_bdev:
838 close_bdev_exclusive(bdev, mode); 834 close_bdev_exclusive(bdev, mode);
839error: 835error:
840 return error; 836 return ERR_PTR(error);
837}
838EXPORT_SYMBOL(mount_bdev);
839
840int get_sb_bdev(struct file_system_type *fs_type,
841 int flags, const char *dev_name, void *data,
842 int (*fill_super)(struct super_block *, void *, int),
843 struct vfsmount *mnt)
844{
845 struct dentry *root;
846
847 root = mount_bdev(fs_type, flags, dev_name, data, fill_super);
848 if (IS_ERR(root))
849 return PTR_ERR(root);
850 mnt->mnt_root = root;
851 mnt->mnt_sb = root->d_sb;
852 return 0;
841} 853}
842 854
843EXPORT_SYMBOL(get_sb_bdev); 855EXPORT_SYMBOL(get_sb_bdev);
@@ -856,29 +868,42 @@ void kill_block_super(struct super_block *sb)
856EXPORT_SYMBOL(kill_block_super); 868EXPORT_SYMBOL(kill_block_super);
857#endif 869#endif
858 870
859int get_sb_nodev(struct file_system_type *fs_type, 871struct dentry *mount_nodev(struct file_system_type *fs_type,
860 int flags, void *data, 872 int flags, void *data,
861 int (*fill_super)(struct super_block *, void *, int), 873 int (*fill_super)(struct super_block *, void *, int))
862 struct vfsmount *mnt)
863{ 874{
864 int error; 875 int error;
865 struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); 876 struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
866 877
867 if (IS_ERR(s)) 878 if (IS_ERR(s))
868 return PTR_ERR(s); 879 return ERR_CAST(s);
869 880
870 s->s_flags = flags; 881 s->s_flags = flags;
871 882
872 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); 883 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
873 if (error) { 884 if (error) {
874 deactivate_locked_super(s); 885 deactivate_locked_super(s);
875 return error; 886 return ERR_PTR(error);
876 } 887 }
877 s->s_flags |= MS_ACTIVE; 888 s->s_flags |= MS_ACTIVE;
878 simple_set_mnt(mnt, s); 889 return dget(s->s_root);
879 return 0;
880} 890}
891EXPORT_SYMBOL(mount_nodev);
881 892
893int get_sb_nodev(struct file_system_type *fs_type,
894 int flags, void *data,
895 int (*fill_super)(struct super_block *, void *, int),
896 struct vfsmount *mnt)
897{
898 struct dentry *root;
899
900 root = mount_nodev(fs_type, flags, data, fill_super);
901 if (IS_ERR(root))
902 return PTR_ERR(root);
903 mnt->mnt_root = root;
904 mnt->mnt_sb = root->d_sb;
905 return 0;
906}
882EXPORT_SYMBOL(get_sb_nodev); 907EXPORT_SYMBOL(get_sb_nodev);
883 908
884static int compare_single(struct super_block *s, void *p) 909static int compare_single(struct super_block *s, void *p)
@@ -886,29 +911,42 @@ static int compare_single(struct super_block *s, void *p)
886 return 1; 911 return 1;
887} 912}
888 913
889int get_sb_single(struct file_system_type *fs_type, 914struct dentry *mount_single(struct file_system_type *fs_type,
890 int flags, void *data, 915 int flags, void *data,
891 int (*fill_super)(struct super_block *, void *, int), 916 int (*fill_super)(struct super_block *, void *, int))
892 struct vfsmount *mnt)
893{ 917{
894 struct super_block *s; 918 struct super_block *s;
895 int error; 919 int error;
896 920
897 s = sget(fs_type, compare_single, set_anon_super, NULL); 921 s = sget(fs_type, compare_single, set_anon_super, NULL);
898 if (IS_ERR(s)) 922 if (IS_ERR(s))
899 return PTR_ERR(s); 923 return ERR_CAST(s);
900 if (!s->s_root) { 924 if (!s->s_root) {
901 s->s_flags = flags; 925 s->s_flags = flags;
902 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); 926 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
903 if (error) { 927 if (error) {
904 deactivate_locked_super(s); 928 deactivate_locked_super(s);
905 return error; 929 return ERR_PTR(error);
906 } 930 }
907 s->s_flags |= MS_ACTIVE; 931 s->s_flags |= MS_ACTIVE;
908 } else { 932 } else {
909 do_remount_sb(s, flags, data, 0); 933 do_remount_sb(s, flags, data, 0);
910 } 934 }
911 simple_set_mnt(mnt, s); 935 return dget(s->s_root);
936}
937EXPORT_SYMBOL(mount_single);
938
939int get_sb_single(struct file_system_type *fs_type,
940 int flags, void *data,
941 int (*fill_super)(struct super_block *, void *, int),
942 struct vfsmount *mnt)
943{
944 struct dentry *root;
945 root = mount_single(fs_type, flags, data, fill_super);
946 if (IS_ERR(root))
947 return PTR_ERR(root);
948 mnt->mnt_root = root;
949 mnt->mnt_sb = root->d_sb;
912 return 0; 950 return 0;
913} 951}
914 952
@@ -918,6 +956,7 @@ struct vfsmount *
918vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) 956vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
919{ 957{
920 struct vfsmount *mnt; 958 struct vfsmount *mnt;
959 struct dentry *root;
921 char *secdata = NULL; 960 char *secdata = NULL;
922 int error; 961 int error;
923 962
@@ -942,9 +981,19 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
942 goto out_free_secdata; 981 goto out_free_secdata;
943 } 982 }
944 983
945 error = type->get_sb(type, flags, name, data, mnt); 984 if (type->mount) {
946 if (error < 0) 985 root = type->mount(type, flags, name, data);
947 goto out_free_secdata; 986 if (IS_ERR(root)) {
987 error = PTR_ERR(root);
988 goto out_free_secdata;
989 }
990 mnt->mnt_root = root;
991 mnt->mnt_sb = root->d_sb;
992 } else {
993 error = type->get_sb(type, flags, name, data, mnt);
994 if (error < 0)
995 goto out_free_secdata;
996 }
948 BUG_ON(!mnt->mnt_sb); 997 BUG_ON(!mnt->mnt_sb);
949 WARN_ON(!mnt->mnt_sb->s_bdi); 998 WARN_ON(!mnt->mnt_sb->s_bdi);
950 mnt->mnt_sb->s_flags |= MS_BORN; 999 mnt->mnt_sb->s_flags |= MS_BORN;
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index f2af22574c50..266895783b47 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -23,7 +23,7 @@
23#include "sysfs.h" 23#include "sysfs.h"
24 24
25 25
26static struct vfsmount *sysfs_mount; 26static struct vfsmount *sysfs_mnt;
27struct kmem_cache *sysfs_dir_cachep; 27struct kmem_cache *sysfs_dir_cachep;
28 28
29static const struct super_operations sysfs_ops = { 29static const struct super_operations sysfs_ops = {
@@ -95,18 +95,17 @@ static int sysfs_set_super(struct super_block *sb, void *data)
95 return error; 95 return error;
96} 96}
97 97
98static int sysfs_get_sb(struct file_system_type *fs_type, 98static struct dentry *sysfs_mount(struct file_system_type *fs_type,
99 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 99 int flags, const char *dev_name, void *data)
100{ 100{
101 struct sysfs_super_info *info; 101 struct sysfs_super_info *info;
102 enum kobj_ns_type type; 102 enum kobj_ns_type type;
103 struct super_block *sb; 103 struct super_block *sb;
104 int error; 104 int error;
105 105
106 error = -ENOMEM;
107 info = kzalloc(sizeof(*info), GFP_KERNEL); 106 info = kzalloc(sizeof(*info), GFP_KERNEL);
108 if (!info) 107 if (!info)
109 goto out; 108 return ERR_PTR(-ENOMEM);
110 109
111 for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) 110 for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
112 info->ns[type] = kobj_ns_current(type); 111 info->ns[type] = kobj_ns_current(type);
@@ -114,24 +113,19 @@ static int sysfs_get_sb(struct file_system_type *fs_type,
114 sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info); 113 sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info);
115 if (IS_ERR(sb) || sb->s_fs_info != info) 114 if (IS_ERR(sb) || sb->s_fs_info != info)
116 kfree(info); 115 kfree(info);
117 if (IS_ERR(sb)) { 116 if (IS_ERR(sb))
118 error = PTR_ERR(sb); 117 return ERR_CAST(sb);
119 goto out;
120 }
121 if (!sb->s_root) { 118 if (!sb->s_root) {
122 sb->s_flags = flags; 119 sb->s_flags = flags;
123 error = sysfs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); 120 error = sysfs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
124 if (error) { 121 if (error) {
125 deactivate_locked_super(sb); 122 deactivate_locked_super(sb);
126 goto out; 123 return ERR_PTR(error);
127 } 124 }
128 sb->s_flags |= MS_ACTIVE; 125 sb->s_flags |= MS_ACTIVE;
129 } 126 }
130 127
131 simple_set_mnt(mnt, sb); 128 return dget(sb->s_root);
132 error = 0;
133out:
134 return error;
135} 129}
136 130
137static void sysfs_kill_sb(struct super_block *sb) 131static void sysfs_kill_sb(struct super_block *sb)
@@ -147,7 +141,7 @@ static void sysfs_kill_sb(struct super_block *sb)
147 141
148static struct file_system_type sysfs_fs_type = { 142static struct file_system_type sysfs_fs_type = {
149 .name = "sysfs", 143 .name = "sysfs",
150 .get_sb = sysfs_get_sb, 144 .mount = sysfs_mount,
151 .kill_sb = sysfs_kill_sb, 145 .kill_sb = sysfs_kill_sb,
152}; 146};
153 147
@@ -189,11 +183,11 @@ int __init sysfs_init(void)
189 183
190 err = register_filesystem(&sysfs_fs_type); 184 err = register_filesystem(&sysfs_fs_type);
191 if (!err) { 185 if (!err) {
192 sysfs_mount = kern_mount(&sysfs_fs_type); 186 sysfs_mnt = kern_mount(&sysfs_fs_type);
193 if (IS_ERR(sysfs_mount)) { 187 if (IS_ERR(sysfs_mnt)) {
194 printk(KERN_ERR "sysfs: could not mount!\n"); 188 printk(KERN_ERR "sysfs: could not mount!\n");
195 err = PTR_ERR(sysfs_mount); 189 err = PTR_ERR(sysfs_mnt);
196 sysfs_mount = NULL; 190 sysfs_mnt = NULL;
197 unregister_filesystem(&sysfs_fs_type); 191 unregister_filesystem(&sysfs_fs_type);
198 goto out_err; 192 goto out_err;
199 } 193 }
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index a0b0cda6927e..3d9c62be0c10 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -526,23 +526,22 @@ failed:
526 526
527/* Every kernel module contains stuff like this. */ 527/* Every kernel module contains stuff like this. */
528 528
529static int sysv_get_sb(struct file_system_type *fs_type, 529static struct dentry *sysv_mount(struct file_system_type *fs_type,
530 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 530 int flags, const char *dev_name, void *data)
531{ 531{
532 return get_sb_bdev(fs_type, flags, dev_name, data, sysv_fill_super, 532 return mount_bdev(fs_type, flags, dev_name, data, sysv_fill_super);
533 mnt);
534} 533}
535 534
536static int v7_get_sb(struct file_system_type *fs_type, 535static struct dentry *v7_mount(struct file_system_type *fs_type,
537 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 536 int flags, const char *dev_name, void *data)
538{ 537{
539 return get_sb_bdev(fs_type, flags, dev_name, data, v7_fill_super, mnt); 538 return mount_bdev(fs_type, flags, dev_name, data, v7_fill_super);
540} 539}
541 540
542static struct file_system_type sysv_fs_type = { 541static struct file_system_type sysv_fs_type = {
543 .owner = THIS_MODULE, 542 .owner = THIS_MODULE,
544 .name = "sysv", 543 .name = "sysv",
545 .get_sb = sysv_get_sb, 544 .mount = sysv_mount,
546 .kill_sb = kill_block_super, 545 .kill_sb = kill_block_super,
547 .fs_flags = FS_REQUIRES_DEV, 546 .fs_flags = FS_REQUIRES_DEV,
548}; 547};
@@ -550,7 +549,7 @@ static struct file_system_type sysv_fs_type = {
550static struct file_system_type v7_fs_type = { 549static struct file_system_type v7_fs_type = {
551 .owner = THIS_MODULE, 550 .owner = THIS_MODULE,
552 .name = "v7", 551 .name = "v7",
553 .get_sb = v7_get_sb, 552 .mount = v7_mount,
554 .kill_sb = kill_block_super, 553 .kill_sb = kill_block_super,
555 .fs_flags = FS_REQUIRES_DEV, 554 .fs_flags = FS_REQUIRES_DEV,
556}; 555};
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 9a47c9f0ad07..91fac54c70e3 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2038,8 +2038,8 @@ static int sb_test(struct super_block *sb, void *data)
2038 return c->vi.cdev == *dev; 2038 return c->vi.cdev == *dev;
2039} 2039}
2040 2040
2041static int ubifs_get_sb(struct file_system_type *fs_type, int flags, 2041static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
2042 const char *name, void *data, struct vfsmount *mnt) 2042 const char *name, void *data)
2043{ 2043{
2044 struct ubi_volume_desc *ubi; 2044 struct ubi_volume_desc *ubi;
2045 struct ubi_volume_info vi; 2045 struct ubi_volume_info vi;
@@ -2057,7 +2057,7 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
2057 if (IS_ERR(ubi)) { 2057 if (IS_ERR(ubi)) {
2058 dbg_err("cannot open \"%s\", error %d", 2058 dbg_err("cannot open \"%s\", error %d",
2059 name, (int)PTR_ERR(ubi)); 2059 name, (int)PTR_ERR(ubi));
2060 return PTR_ERR(ubi); 2060 return ERR_CAST(ubi);
2061 } 2061 }
2062 ubi_get_volume_info(ubi, &vi); 2062 ubi_get_volume_info(ubi, &vi);
2063 2063
@@ -2095,20 +2095,19 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
2095 /* 'fill_super()' opens ubi again so we must close it here */ 2095 /* 'fill_super()' opens ubi again so we must close it here */
2096 ubi_close_volume(ubi); 2096 ubi_close_volume(ubi);
2097 2097
2098 simple_set_mnt(mnt, sb); 2098 return dget(sb->s_root);
2099 return 0;
2100 2099
2101out_deact: 2100out_deact:
2102 deactivate_locked_super(sb); 2101 deactivate_locked_super(sb);
2103out_close: 2102out_close:
2104 ubi_close_volume(ubi); 2103 ubi_close_volume(ubi);
2105 return err; 2104 return ERR_PTR(err);
2106} 2105}
2107 2106
2108static struct file_system_type ubifs_fs_type = { 2107static struct file_system_type ubifs_fs_type = {
2109 .name = "ubifs", 2108 .name = "ubifs",
2110 .owner = THIS_MODULE, 2109 .owner = THIS_MODULE,
2111 .get_sb = ubifs_get_sb, 2110 .mount = ubifs_mount,
2112 .kill_sb = kill_anon_super, 2111 .kill_sb = kill_anon_super,
2113}; 2112};
2114 2113
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 76f3d6d97b40..4a5c7c61836a 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -107,17 +107,16 @@ struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi)
107} 107}
108 108
109/* UDF filesystem type */ 109/* UDF filesystem type */
110static int udf_get_sb(struct file_system_type *fs_type, 110static struct dentry *udf_mount(struct file_system_type *fs_type,
111 int flags, const char *dev_name, void *data, 111 int flags, const char *dev_name, void *data)
112 struct vfsmount *mnt)
113{ 112{
114 return get_sb_bdev(fs_type, flags, dev_name, data, udf_fill_super, mnt); 113 return mount_bdev(fs_type, flags, dev_name, data, udf_fill_super);
115} 114}
116 115
117static struct file_system_type udf_fstype = { 116static struct file_system_type udf_fstype = {
118 .owner = THIS_MODULE, 117 .owner = THIS_MODULE,
119 .name = "udf", 118 .name = "udf",
120 .get_sb = udf_get_sb, 119 .mount = udf_mount,
121 .kill_sb = kill_block_super, 120 .kill_sb = kill_block_super,
122 .fs_flags = FS_REQUIRES_DEV, 121 .fs_flags = FS_REQUIRES_DEV,
123}; 122};
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 6b9be90dae7d..2c47daed56da 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1454,16 +1454,16 @@ static const struct super_operations ufs_super_ops = {
1454 .show_options = ufs_show_options, 1454 .show_options = ufs_show_options,
1455}; 1455};
1456 1456
1457static int ufs_get_sb(struct file_system_type *fs_type, 1457static struct dentry *ufs_mount(struct file_system_type *fs_type,
1458 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 1458 int flags, const char *dev_name, void *data)
1459{ 1459{
1460 return get_sb_bdev(fs_type, flags, dev_name, data, ufs_fill_super, mnt); 1460 return mount_bdev(fs_type, flags, dev_name, data, ufs_fill_super);
1461} 1461}
1462 1462
1463static struct file_system_type ufs_fs_type = { 1463static struct file_system_type ufs_fs_type = {
1464 .owner = THIS_MODULE, 1464 .owner = THIS_MODULE,
1465 .name = "ufs", 1465 .name = "ufs",
1466 .get_sb = ufs_get_sb, 1466 .mount = ufs_mount,
1467 .kill_sb = kill_block_super, 1467 .kill_sb = kill_block_super,
1468 .fs_flags = FS_REQUIRES_DEV, 1468 .fs_flags = FS_REQUIRES_DEV,
1469}; 1469};
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index c9af48fffcd7..7d287afccde5 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1111,11 +1111,12 @@ xfs_vm_writepage(
1111 uptodate = 0; 1111 uptodate = 0;
1112 1112
1113 /* 1113 /*
1114 * A hole may still be marked uptodate because discard_buffer 1114 * set_page_dirty dirties all buffers in a page, independent
1115 * leaves the flag set. 1115 * of their state. The dirty state however is entirely
1116 * meaningless for holes (!mapped && uptodate), so skip
1117 * buffers covering holes here.
1116 */ 1118 */
1117 if (!buffer_mapped(bh) && buffer_uptodate(bh)) { 1119 if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
1118 ASSERT(!buffer_dirty(bh));
1119 imap_valid = 0; 1120 imap_valid = 0;
1120 continue; 1121 continue;
1121 } 1122 }
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 63fd2c07cb57..aa1d353def29 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1781,7 +1781,6 @@ xfs_buf_delwri_split(
1781 INIT_LIST_HEAD(list); 1781 INIT_LIST_HEAD(list);
1782 spin_lock(dwlk); 1782 spin_lock(dwlk);
1783 list_for_each_entry_safe(bp, n, dwq, b_list) { 1783 list_for_each_entry_safe(bp, n, dwq, b_list) {
1784 trace_xfs_buf_delwri_split(bp, _RET_IP_);
1785 ASSERT(bp->b_flags & XBF_DELWRI); 1784 ASSERT(bp->b_flags & XBF_DELWRI);
1786 1785
1787 if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) { 1786 if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) {
@@ -1795,6 +1794,7 @@ xfs_buf_delwri_split(
1795 _XBF_RUN_QUEUES); 1794 _XBF_RUN_QUEUES);
1796 bp->b_flags |= XBF_WRITE; 1795 bp->b_flags |= XBF_WRITE;
1797 list_move_tail(&bp->b_list, list); 1796 list_move_tail(&bp->b_list, list);
1797 trace_xfs_buf_delwri_split(bp, _RET_IP_);
1798 } else 1798 } else
1799 skipped++; 1799 skipped++;
1800 } 1800 }
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 2ea238f6d38e..ad442d9e392e 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -416,7 +416,7 @@ xfs_attrlist_by_handle(
416 if (IS_ERR(dentry)) 416 if (IS_ERR(dentry))
417 return PTR_ERR(dentry); 417 return PTR_ERR(dentry);
418 418
419 kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL); 419 kbuf = kzalloc(al_hreq.buflen, GFP_KERNEL);
420 if (!kbuf) 420 if (!kbuf)
421 goto out_dput; 421 goto out_dput;
422 422
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 96107efc0c61..94d5fd6a2973 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -762,7 +762,8 @@ xfs_setup_inode(
762 inode->i_state = I_NEW; 762 inode->i_state = I_NEW;
763 763
764 inode_sb_list_add(inode); 764 inode_sb_list_add(inode);
765 insert_inode_hash(inode); 765 /* make the inode look hashed for the writeback code */
766 hlist_add_fake(&inode->i_hash);
766 767
767 inode->i_mode = ip->i_d.di_mode; 768 inode->i_mode = ip->i_d.di_mode;
768 inode->i_nlink = ip->i_d.di_nlink; 769 inode->i_nlink = ip->i_d.di_nlink;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index cf808782c065..064f964d4f3c 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -353,9 +353,6 @@ xfs_parseargs(
353 mp->m_qflags &= ~XFS_OQUOTA_ENFD; 353 mp->m_qflags &= ~XFS_OQUOTA_ENFD;
354 } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { 354 } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
355 mp->m_flags |= XFS_MOUNT_DELAYLOG; 355 mp->m_flags |= XFS_MOUNT_DELAYLOG;
356 cmn_err(CE_WARN,
357 "Enabling EXPERIMENTAL delayed logging feature "
358 "- use at your own risk.\n");
359 } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { 356 } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
360 mp->m_flags &= ~XFS_MOUNT_DELAYLOG; 357 mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
361 } else if (!strcmp(this_char, "ihashsize")) { 358 } else if (!strcmp(this_char, "ihashsize")) {
@@ -1609,16 +1606,14 @@ xfs_fs_fill_super(
1609 goto out_free_sb; 1606 goto out_free_sb;
1610} 1607}
1611 1608
1612STATIC int 1609STATIC struct dentry *
1613xfs_fs_get_sb( 1610xfs_fs_mount(
1614 struct file_system_type *fs_type, 1611 struct file_system_type *fs_type,
1615 int flags, 1612 int flags,
1616 const char *dev_name, 1613 const char *dev_name,
1617 void *data, 1614 void *data)
1618 struct vfsmount *mnt)
1619{ 1615{
1620 return get_sb_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super, 1616 return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
1621 mnt);
1622} 1617}
1623 1618
1624static const struct super_operations xfs_super_operations = { 1619static const struct super_operations xfs_super_operations = {
@@ -1639,7 +1634,7 @@ static const struct super_operations xfs_super_operations = {
1639static struct file_system_type xfs_fs_type = { 1634static struct file_system_type xfs_fs_type = {
1640 .owner = THIS_MODULE, 1635 .owner = THIS_MODULE,
1641 .name = "xfs", 1636 .name = "xfs",
1642 .get_sb = xfs_fs_get_sb, 1637 .mount = xfs_fs_mount,
1643 .kill_sb = kill_block_super, 1638 .kill_sb = kill_block_super,
1644 .fs_flags = FS_REQUIRES_DEV, 1639 .fs_flags = FS_REQUIRES_DEV,
1645}; 1640};
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 37d33254981d..afb0d7cfad1c 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -853,6 +853,7 @@ restart:
853 if (trylock) { 853 if (trylock) {
854 if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) { 854 if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
855 skipped++; 855 skipped++;
856 xfs_perag_put(pag);
856 continue; 857 continue;
857 } 858 }
858 first_index = pag->pag_ici_reclaim_cursor; 859 first_index = pag->pag_ici_reclaim_cursor;
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 9b715dce5699..9124425b7f2f 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -744,9 +744,15 @@ xfs_filestream_new_ag(
744 * If the file's parent directory is known, take its iolock in exclusive 744 * If the file's parent directory is known, take its iolock in exclusive
745 * mode to prevent two sibling files from racing each other to migrate 745 * mode to prevent two sibling files from racing each other to migrate
746 * themselves and their parent to different AGs. 746 * themselves and their parent to different AGs.
747 *
748 * Note that we lock the parent directory iolock inside the child
749 * iolock here. That's fine as we never hold both parent and child
750 * iolock in any other place. This is different from the ilock,
751 * which requires locking of the child after the parent for namespace
752 * operations.
747 */ 753 */
748 if (pip) 754 if (pip)
749 xfs_ilock(pip, XFS_IOLOCK_EXCL); 755 xfs_ilock(pip, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
750 756
751 /* 757 /*
752 * A new AG needs to be found for the file. If the file's parent 758 * A new AG needs to be found for the file. If the file's parent
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index b1498ab5a399..19e9dfa1c254 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -275,6 +275,7 @@ xfs_free_perag(
275 pag = radix_tree_delete(&mp->m_perag_tree, agno); 275 pag = radix_tree_delete(&mp->m_perag_tree, agno);
276 spin_unlock(&mp->m_perag_lock); 276 spin_unlock(&mp->m_perag_lock);
277 ASSERT(pag); 277 ASSERT(pag);
278 ASSERT(atomic_read(&pag->pag_ref) == 0);
278 call_rcu(&pag->rcu_head, __xfs_free_perag); 279 call_rcu(&pag->rcu_head, __xfs_free_perag);
279 } 280 }
280} 281}
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index e0e64b113bd6..9bb6eda4cd21 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -346,8 +346,17 @@ xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid,
346#define xfs_trans_mod_dquot_byino(tp, ip, fields, delta) 346#define xfs_trans_mod_dquot_byino(tp, ip, fields, delta)
347#define xfs_trans_apply_dquot_deltas(tp) 347#define xfs_trans_apply_dquot_deltas(tp)
348#define xfs_trans_unreserve_and_mod_dquots(tp) 348#define xfs_trans_unreserve_and_mod_dquots(tp)
349#define xfs_trans_reserve_quota_nblks(tp, ip, nblks, ninos, flags) (0) 349static inline int xfs_trans_reserve_quota_nblks(struct xfs_trans *tp,
350#define xfs_trans_reserve_quota_bydquots(tp, mp, u, g, nb, ni, fl) (0) 350 struct xfs_inode *ip, long nblks, long ninos, uint flags)
351{
352 return 0;
353}
354static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp,
355 struct xfs_mount *mp, struct xfs_dquot *udqp,
356 struct xfs_dquot *gdqp, long nblks, long nions, uint flags)
357{
358 return 0;
359}
351#define xfs_qm_vop_create_dqattach(tp, ip, u, g) 360#define xfs_qm_vop_create_dqattach(tp, ip, u, g)
352#define xfs_qm_vop_rename_dqattach(it) (0) 361#define xfs_qm_vop_rename_dqattach(it) (0)
353#define xfs_qm_vop_chown(tp, ip, old, new) (NULL) 362#define xfs_qm_vop_chown(tp, ip, old, new) (NULL)
@@ -357,11 +366,14 @@ xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid,
357#define xfs_qm_dqdetach(ip) 366#define xfs_qm_dqdetach(ip)
358#define xfs_qm_dqrele(d) 367#define xfs_qm_dqrele(d)
359#define xfs_qm_statvfs(ip, s) 368#define xfs_qm_statvfs(ip, s)
360#define xfs_qm_sync(mp, fl) (0) 369static inline int xfs_qm_sync(struct xfs_mount *mp, int flags)
370{
371 return 0;
372}
361#define xfs_qm_newmount(mp, a, b) (0) 373#define xfs_qm_newmount(mp, a, b) (0)
362#define xfs_qm_mount_quotas(mp) 374#define xfs_qm_mount_quotas(mp)
363#define xfs_qm_unmount(mp) 375#define xfs_qm_unmount(mp)
364#define xfs_qm_unmount_quotas(mp) (0) 376#define xfs_qm_unmount_quotas(mp)
365#endif /* CONFIG_XFS_QUOTA */ 377#endif /* CONFIG_XFS_QUOTA */
366 378
367#define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \ 379#define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \