aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/Kconfig13
-rw-r--r--fs/9p/Makefile1
-rw-r--r--fs/9p/acl.c392
-rw-r--r--fs/9p/acl.h49
-rw-r--r--fs/9p/fid.c1
-rw-r--r--fs/9p/v9fs.c22
-rw-r--r--fs/9p/v9fs.h10
-rw-r--r--fs/9p/v9fs_vfs.h4
-rw-r--r--fs/9p/vfs_addr.c30
-rw-r--r--fs/9p/vfs_dir.c4
-rw-r--r--fs/9p/vfs_file.c265
-rw-r--r--fs/9p/vfs_inode.c253
-rw-r--r--fs/9p/vfs_super.c36
-rw-r--r--fs/9p/xattr.c52
-rw-r--r--fs/9p/xattr.h6
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/Makefile2
-rw-r--r--fs/adfs/super.c9
-rw-r--r--fs/affs/super.c9
-rw-r--r--fs/afs/super.c19
-rw-r--r--fs/anon_inodes.c10
-rw-r--r--fs/autofs/Kconfig22
-rw-r--r--fs/autofs/Makefile7
-rw-r--r--fs/autofs/autofs_i.h165
-rw-r--r--fs/autofs/dirhash.c250
-rw-r--r--fs/autofs/init.c52
-rw-r--r--fs/autofs/inode.c288
-rw-r--r--fs/autofs/root.c645
-rw-r--r--fs/autofs/symlink.c26
-rw-r--r--fs/autofs/waitq.c205
-rw-r--r--fs/autofs4/init.c8
-rw-r--r--fs/befs/linuxvfs.c11
-rw-r--r--fs/bfs/inode.c8
-rw-r--r--fs/binfmt_misc.c8
-rw-r--r--fs/block_dev.c8
-rw-r--r--fs/btrfs/compression.c2
-rw-r--r--fs/btrfs/ctree.c57
-rw-r--r--fs/btrfs/ctree.h100
-rw-r--r--fs/btrfs/dir-item.c2
-rw-r--r--fs/btrfs/disk-io.c32
-rw-r--r--fs/btrfs/extent-tree.c694
-rw-r--r--fs/btrfs/extent_io.c168
-rw-r--r--fs/btrfs/extent_io.h4
-rw-r--r--fs/btrfs/extent_map.c4
-rw-r--r--fs/btrfs/free-space-cache.c751
-rw-r--r--fs/btrfs/free-space-cache.h18
-rw-r--r--fs/btrfs/inode.c202
-rw-r--r--fs/btrfs/ioctl.c398
-rw-r--r--fs/btrfs/ioctl.h13
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/relocation.c109
-rw-r--r--fs/btrfs/root-tree.c2
-rw-r--r--fs/btrfs/super.c57
-rw-r--r--fs/btrfs/transaction.c234
-rw-r--r--fs/btrfs/transaction.h8
-rw-r--r--fs/btrfs/tree-defrag.c2
-rw-r--r--fs/btrfs/tree-log.c17
-rw-r--r--fs/btrfs/volumes.c7
-rw-r--r--fs/btrfs/xattr.c2
-rw-r--r--fs/btrfs/zlib.c5
-rw-r--r--fs/ceph/super.c50
-rw-r--r--fs/cifs/Kconfig3
-rw-r--r--fs/cifs/cifsencrypt.c427
-rw-r--r--fs/cifs/cifsfs.c16
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h55
-rw-r--r--fs/cifs/cifspdu.h13
-rw-r--r--fs/cifs/cifsproto.h14
-rw-r--r--fs/cifs/cifssmb.c4
-rw-r--r--fs/cifs/connect.c51
-rw-r--r--fs/cifs/file.c57
-rw-r--r--fs/cifs/inode.c15
-rw-r--r--fs/cifs/misc.c2
-rw-r--r--fs/cifs/sess.c166
-rw-r--r--fs/cifs/transport.c6
-rw-r--r--fs/coda/inode.c8
-rw-r--r--fs/compat.c44
-rw-r--r--fs/compat_ioctl.c29
-rw-r--r--fs/configfs/mount.c8
-rw-r--r--fs/cramfs/inode.c9
-rw-r--r--fs/debugfs/inode.c8
-rw-r--r--fs/devpts/inode.c32
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h1
-rw-r--r--fs/ecryptfs/inode.c11
-rw-r--r--fs/ecryptfs/keystore.c45
-rw-r--r--fs/ecryptfs/main.c20
-rw-r--r--fs/ecryptfs/super.c2
-rw-r--r--fs/efs/super.c8
-rw-r--r--fs/exofs/super.c10
-rw-r--r--fs/ext2/balloc.c3
-rw-r--r--fs/ext2/super.c8
-rw-r--r--fs/ext3/balloc.c17
-rw-r--r--fs/ext3/ialloc.c11
-rw-r--r--fs/ext3/inode.c20
-rw-r--r--fs/ext3/resize.c13
-rw-r--r--fs/ext3/super.c49
-rw-r--r--fs/ext4/Makefile2
-rw-r--r--fs/ext4/balloc.c5
-rw-r--r--fs/ext4/block_validity.c7
-rw-r--r--fs/ext4/dir.c2
-rw-r--r--fs/ext4/ext4.h110
-rw-r--r--fs/ext4/ext4_extents.h65
-rw-r--r--fs/ext4/extents.c368
-rw-r--r--fs/ext4/file.c44
-rw-r--r--fs/ext4/fsync.c83
-rw-r--r--fs/ext4/ialloc.c135
-rw-r--r--fs/ext4/inode.c585
-rw-r--r--fs/ext4/mballoc.c554
-rw-r--r--fs/ext4/migrate.c2
-rw-r--r--fs/ext4/move_extent.c22
-rw-r--r--fs/ext4/namei.c63
-rw-r--r--fs/ext4/page-io.c430
-rw-r--r--fs/ext4/resize.c52
-rw-r--r--fs/ext4/super.c547
-rw-r--r--fs/ext4/xattr.c4
-rw-r--r--fs/ext4/xattr.h10
-rw-r--r--fs/fat/namei_msdos.c9
-rw-r--r--fs/fat/namei_vfat.c9
-rw-r--r--fs/freevxfs/vxfs_super.c9
-rw-r--r--fs/fs-writeback.c68
-rw-r--r--fs/fuse/control.c10
-rw-r--r--fs/fuse/inode.c17
-rw-r--r--fs/gfs2/ops_fstype.c51
-rw-r--r--fs/hfs/super.c9
-rw-r--r--fs/hfsplus/dir.c4
-rw-r--r--fs/hfsplus/ioctl.c2
-rw-r--r--fs/hfsplus/super.c10
-rw-r--r--fs/hostfs/hostfs_kern.c8
-rw-r--r--fs/hpfs/super.c9
-rw-r--r--fs/hppfs/hppfs.c8
-rw-r--r--fs/hugetlbfs/inode.c8
-rw-r--r--fs/internal.h2
-rw-r--r--fs/ioctl.c39
-rw-r--r--fs/isofs/inode.c9
-rw-r--r--fs/jbd/checkpoint.c4
-rw-r--r--fs/jbd/commit.c8
-rw-r--r--fs/jbd/journal.c44
-rw-r--r--fs/jbd/recovery.c2
-rw-r--r--fs/jbd/transaction.c6
-rw-r--r--fs/jbd2/checkpoint.c10
-rw-r--r--fs/jbd2/commit.c12
-rw-r--r--fs/jbd2/journal.c6
-rw-r--r--fs/jbd2/transaction.c1
-rw-r--r--fs/jffs2/build.c2
-rw-r--r--fs/jffs2/compr.c6
-rw-r--r--fs/jffs2/compr.h4
-rw-r--r--fs/jffs2/compr_lzo.c4
-rw-r--r--fs/jffs2/compr_rtime.c6
-rw-r--r--fs/jffs2/compr_rubin.c11
-rw-r--r--fs/jffs2/compr_zlib.c6
-rw-r--r--fs/jffs2/dir.c3
-rw-r--r--fs/jffs2/erase.c2
-rw-r--r--fs/jffs2/fs.c22
-rw-r--r--fs/jffs2/gc.c7
-rw-r--r--fs/jffs2/jffs2_fs_sb.h1
-rw-r--r--fs/jffs2/nodelist.c8
-rw-r--r--fs/jffs2/nodelist.h3
-rw-r--r--fs/jffs2/scan.c12
-rw-r--r--fs/jffs2/super.c9
-rw-r--r--fs/jfs/super.c9
-rw-r--r--fs/libfs.c14
-rw-r--r--fs/logfs/dev_bdev.c15
-rw-r--r--fs/logfs/dev_mtd.c18
-rw-r--r--fs/logfs/logfs.h22
-rw-r--r--fs/logfs/super.c77
-rw-r--r--fs/minix/inode.c9
-rw-r--r--fs/namei.c2
-rw-r--r--fs/ncpfs/inode.c8
-rw-r--r--fs/nfs/direct.c2
-rw-r--r--fs/nfs/idmap.c2
-rw-r--r--fs/nfs/nfs4proc.c4
-rw-r--r--fs/nfs/pagelist.c8
-rw-r--r--fs/nfs/super.c96
-rw-r--r--fs/nfs/unlink.c4
-rw-r--r--fs/nfsd/nfsctl.c8
-rw-r--r--fs/nilfs2/super.c16
-rw-r--r--fs/ntfs/super.c9
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c8
-rw-r--r--fs/ocfs2/super.c11
-rw-r--r--fs/omfs/inode.c9
-rw-r--r--fs/open.c6
-rw-r--r--fs/openpromfs/inode.c8
-rw-r--r--fs/pipe.c9
-rw-r--r--fs/proc/root.c16
-rw-r--r--fs/qnx4/inode.c9
-rw-r--r--fs/quota/Kconfig4
-rw-r--r--fs/quota/dquot.c30
-rw-r--r--fs/ramfs/inode.c17
-rw-r--r--fs/read_write.c62
-rw-r--r--fs/reiserfs/super.c9
-rw-r--r--fs/romfs/super.c17
-rw-r--r--fs/smbfs/Kconfig56
-rw-r--r--fs/smbfs/Makefile18
-rw-r--r--fs/smbfs/cache.c208
-rw-r--r--fs/smbfs/dir.c696
-rw-r--r--fs/smbfs/file.c454
-rw-r--r--fs/smbfs/getopt.c64
-rw-r--r--fs/smbfs/getopt.h14
-rw-r--r--fs/smbfs/inode.c843
-rw-r--r--fs/smbfs/ioctl.c69
-rw-r--r--fs/smbfs/proc.c3503
-rw-r--r--fs/smbfs/proto.h87
-rw-r--r--fs/smbfs/request.c818
-rw-r--r--fs/smbfs/request.h70
-rw-r--r--fs/smbfs/smb_debug.h34
-rw-r--r--fs/smbfs/smbiod.c344
-rw-r--r--fs/smbfs/sock.c386
-rw-r--r--fs/smbfs/symlink.c68
-rw-r--r--fs/squashfs/super.c10
-rw-r--r--fs/squashfs/xattr.c9
-rw-r--r--fs/squashfs/xattr.h4
-rw-r--r--fs/squashfs/xattr_id.c1
-rw-r--r--fs/super.c111
-rw-r--r--fs/sysfs/mount.c32
-rw-r--r--fs/sysv/super.c17
-rw-r--r--fs/ubifs/super.c13
-rw-r--r--fs/udf/super.c9
-rw-r--r--fs/ufs/super.c8
-rw-r--r--fs/xfs/Kconfig1
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c12
220 files changed, 7010 insertions, 12076 deletions
diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig
index 795233702a4e..7e0511476797 100644
--- a/fs/9p/Kconfig
+++ b/fs/9p/Kconfig
@@ -17,3 +17,16 @@ config 9P_FSCACHE
17 Choose Y here to enable persistent, read-only local 17 Choose Y here to enable persistent, read-only local
18 caching support for 9p clients using FS-Cache 18 caching support for 9p clients using FS-Cache
19 19
20
21config 9P_FS_POSIX_ACL
22 bool "9P POSIX Access Control Lists"
23 depends on 9P_FS
24 select FS_POSIX_ACL
25 help
26 POSIX Access Control Lists (ACLs) support permissions for users and
27 groups beyond the owner/group/world scheme.
28
29 To learn more about Access Control Lists, visit the POSIX ACLs for
30 Linux website <http://acl.bestbits.at/>.
31
32 If you don't know what Access Control Lists are, say N
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index 91fba025fcbe..f8ba37effd1b 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -13,3 +13,4 @@ obj-$(CONFIG_9P_FS) := 9p.o
13 xattr_user.o 13 xattr_user.o
14 14
159p-$(CONFIG_9P_FSCACHE) += cache.o 159p-$(CONFIG_9P_FSCACHE) += cache.o
169p-$(CONFIG_9P_FS_POSIX_ACL) += acl.o
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
new file mode 100644
index 000000000000..12d602351dbe
--- /dev/null
+++ b/fs/9p/acl.c
@@ -0,0 +1,392 @@
1/*
2 * Copyright IBM Corporation, 2010
3 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of version 2.1 of the GNU Lesser General Public License
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 *
13 */
14
15#include <linux/module.h>
16#include <linux/fs.h>
17#include <net/9p/9p.h>
18#include <net/9p/client.h>
19#include <linux/slab.h>
20#include <linux/sched.h>
21#include <linux/posix_acl_xattr.h>
22#include "xattr.h"
23#include "acl.h"
24#include "v9fs_vfs.h"
25#include "v9fs.h"
26
27static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, char *name)
28{
29 ssize_t size;
30 void *value = NULL;
31 struct posix_acl *acl = NULL;;
32
33 size = v9fs_fid_xattr_get(fid, name, NULL, 0);
34 if (size > 0) {
35 value = kzalloc(size, GFP_NOFS);
36 if (!value)
37 return ERR_PTR(-ENOMEM);
38 size = v9fs_fid_xattr_get(fid, name, value, size);
39 if (size > 0) {
40 acl = posix_acl_from_xattr(value, size);
41 if (IS_ERR(acl))
42 goto err_out;
43 }
44 } else if (size == -ENODATA || size == 0 ||
45 size == -ENOSYS || size == -EOPNOTSUPP) {
46 acl = NULL;
47 } else
48 acl = ERR_PTR(-EIO);
49
50err_out:
51 kfree(value);
52 return acl;
53}
54
55int v9fs_get_acl(struct inode *inode, struct p9_fid *fid)
56{
57 int retval = 0;
58 struct posix_acl *pacl, *dacl;
59 struct v9fs_session_info *v9ses;
60
61 v9ses = v9fs_inode2v9ses(inode);
62 if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) {
63 set_cached_acl(inode, ACL_TYPE_DEFAULT, NULL);
64 set_cached_acl(inode, ACL_TYPE_ACCESS, NULL);
65 return 0;
66 }
67 /* get the default/access acl values and cache them */
68 dacl = __v9fs_get_acl(fid, POSIX_ACL_XATTR_DEFAULT);
69 pacl = __v9fs_get_acl(fid, POSIX_ACL_XATTR_ACCESS);
70
71 if (!IS_ERR(dacl) && !IS_ERR(pacl)) {
72 set_cached_acl(inode, ACL_TYPE_DEFAULT, dacl);
73 set_cached_acl(inode, ACL_TYPE_ACCESS, pacl);
74 posix_acl_release(dacl);
75 posix_acl_release(pacl);
76 } else
77 retval = -EIO;
78
79 return retval;
80}
81
82static struct posix_acl *v9fs_get_cached_acl(struct inode *inode, int type)
83{
84 struct posix_acl *acl;
85 /*
86 * 9p Always cache the acl value when
87 * instantiating the inode (v9fs_inode_from_fid)
88 */
89 acl = get_cached_acl(inode, type);
90 BUG_ON(acl == ACL_NOT_CACHED);
91 return acl;
92}
93
94int v9fs_check_acl(struct inode *inode, int mask)
95{
96 struct posix_acl *acl;
97 struct v9fs_session_info *v9ses;
98
99 v9ses = v9fs_inode2v9ses(inode);
100 if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) {
101 /*
102 * On access = client mode get the acl
103 * values from the server
104 */
105 return 0;
106 }
107 acl = v9fs_get_cached_acl(inode, ACL_TYPE_ACCESS);
108
109 if (IS_ERR(acl))
110 return PTR_ERR(acl);
111 if (acl) {
112 int error = posix_acl_permission(inode, acl, mask);
113 posix_acl_release(acl);
114 return error;
115 }
116 return -EAGAIN;
117}
118
119static int v9fs_set_acl(struct dentry *dentry, int type, struct posix_acl *acl)
120{
121 int retval;
122 char *name;
123 size_t size;
124 void *buffer;
125 struct inode *inode = dentry->d_inode;
126
127 set_cached_acl(inode, type, acl);
128 /* Set a setxattr request to server */
129 size = posix_acl_xattr_size(acl->a_count);
130 buffer = kmalloc(size, GFP_KERNEL);
131 if (!buffer)
132 return -ENOMEM;
133 retval = posix_acl_to_xattr(acl, buffer, size);
134 if (retval < 0)
135 goto err_free_out;
136 switch (type) {
137 case ACL_TYPE_ACCESS:
138 name = POSIX_ACL_XATTR_ACCESS;
139 break;
140 case ACL_TYPE_DEFAULT:
141 name = POSIX_ACL_XATTR_DEFAULT;
142 break;
143 default:
144 BUG();
145 }
146 retval = v9fs_xattr_set(dentry, name, buffer, size, 0);
147err_free_out:
148 kfree(buffer);
149 return retval;
150}
151
152int v9fs_acl_chmod(struct dentry *dentry)
153{
154 int retval = 0;
155 struct posix_acl *acl, *clone;
156 struct inode *inode = dentry->d_inode;
157
158 if (S_ISLNK(inode->i_mode))
159 return -EOPNOTSUPP;
160 acl = v9fs_get_cached_acl(inode, ACL_TYPE_ACCESS);
161 if (acl) {
162 clone = posix_acl_clone(acl, GFP_KERNEL);
163 posix_acl_release(acl);
164 if (!clone)
165 return -ENOMEM;
166 retval = posix_acl_chmod_masq(clone, inode->i_mode);
167 if (!retval)
168 retval = v9fs_set_acl(dentry, ACL_TYPE_ACCESS, clone);
169 posix_acl_release(clone);
170 }
171 return retval;
172}
173
174int v9fs_set_create_acl(struct dentry *dentry,
175 struct posix_acl *dpacl, struct posix_acl *pacl)
176{
177 if (dpacl)
178 v9fs_set_acl(dentry, ACL_TYPE_DEFAULT, dpacl);
179 if (pacl)
180 v9fs_set_acl(dentry, ACL_TYPE_ACCESS, pacl);
181 posix_acl_release(dpacl);
182 posix_acl_release(pacl);
183 return 0;
184}
185
186int v9fs_acl_mode(struct inode *dir, mode_t *modep,
187 struct posix_acl **dpacl, struct posix_acl **pacl)
188{
189 int retval = 0;
190 mode_t mode = *modep;
191 struct posix_acl *acl = NULL;
192
193 if (!S_ISLNK(mode)) {
194 acl = v9fs_get_cached_acl(dir, ACL_TYPE_DEFAULT);
195 if (IS_ERR(acl))
196 return PTR_ERR(acl);
197 if (!acl)
198 mode &= ~current_umask();
199 }
200 if (acl) {
201 struct posix_acl *clone;
202
203 if (S_ISDIR(mode))
204 *dpacl = acl;
205 clone = posix_acl_clone(acl, GFP_NOFS);
206 retval = -ENOMEM;
207 if (!clone)
208 goto cleanup;
209
210 retval = posix_acl_create_masq(clone, &mode);
211 if (retval < 0) {
212 posix_acl_release(clone);
213 goto cleanup;
214 }
215 if (retval > 0)
216 *pacl = clone;
217 }
218 *modep = mode;
219 return 0;
220cleanup:
221 posix_acl_release(acl);
222 return retval;
223
224}
225
226static int v9fs_remote_get_acl(struct dentry *dentry, const char *name,
227 void *buffer, size_t size, int type)
228{
229 char *full_name;
230
231 switch (type) {
232 case ACL_TYPE_ACCESS:
233 full_name = POSIX_ACL_XATTR_ACCESS;
234 break;
235 case ACL_TYPE_DEFAULT:
236 full_name = POSIX_ACL_XATTR_DEFAULT;
237 break;
238 default:
239 BUG();
240 }
241 return v9fs_xattr_get(dentry, full_name, buffer, size);
242}
243
244static int v9fs_xattr_get_acl(struct dentry *dentry, const char *name,
245 void *buffer, size_t size, int type)
246{
247 struct v9fs_session_info *v9ses;
248 struct posix_acl *acl;
249 int error;
250
251 if (strcmp(name, "") != 0)
252 return -EINVAL;
253
254 v9ses = v9fs_inode2v9ses(dentry->d_inode);
255 /*
256 * We allow set/get/list of acl when access=client is not specified
257 */
258 if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT)
259 return v9fs_remote_get_acl(dentry, name, buffer, size, type);
260
261 acl = v9fs_get_cached_acl(dentry->d_inode, type);
262 if (IS_ERR(acl))
263 return PTR_ERR(acl);
264 if (acl == NULL)
265 return -ENODATA;
266 error = posix_acl_to_xattr(acl, buffer, size);
267 posix_acl_release(acl);
268
269 return error;
270}
271
272static int v9fs_remote_set_acl(struct dentry *dentry, const char *name,
273 const void *value, size_t size,
274 int flags, int type)
275{
276 char *full_name;
277
278 switch (type) {
279 case ACL_TYPE_ACCESS:
280 full_name = POSIX_ACL_XATTR_ACCESS;
281 break;
282 case ACL_TYPE_DEFAULT:
283 full_name = POSIX_ACL_XATTR_DEFAULT;
284 break;
285 default:
286 BUG();
287 }
288 return v9fs_xattr_set(dentry, full_name, value, size, flags);
289}
290
291
292static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name,
293 const void *value, size_t size,
294 int flags, int type)
295{
296 int retval;
297 struct posix_acl *acl;
298 struct v9fs_session_info *v9ses;
299 struct inode *inode = dentry->d_inode;
300
301 if (strcmp(name, "") != 0)
302 return -EINVAL;
303
304 v9ses = v9fs_inode2v9ses(dentry->d_inode);
305 /*
306 * set the attribute on the remote. Without even looking at the
307 * xattr value. We leave it to the server to validate
308 */
309 if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT)
310 return v9fs_remote_set_acl(dentry, name,
311 value, size, flags, type);
312
313 if (S_ISLNK(inode->i_mode))
314 return -EOPNOTSUPP;
315 if (!is_owner_or_cap(inode))
316 return -EPERM;
317 if (value) {
318 /* update the cached acl value */
319 acl = posix_acl_from_xattr(value, size);
320 if (IS_ERR(acl))
321 return PTR_ERR(acl);
322 else if (acl) {
323 retval = posix_acl_valid(acl);
324 if (retval)
325 goto err_out;
326 }
327 } else
328 acl = NULL;
329
330 switch (type) {
331 case ACL_TYPE_ACCESS:
332 name = POSIX_ACL_XATTR_ACCESS;
333 if (acl) {
334 mode_t mode = inode->i_mode;
335 retval = posix_acl_equiv_mode(acl, &mode);
336 if (retval < 0)
337 goto err_out;
338 else {
339 struct iattr iattr;
340 if (retval == 0) {
341 /*
342 * ACL can be represented
343 * by the mode bits. So don't
344 * update ACL.
345 */
346 acl = NULL;
347 value = NULL;
348 size = 0;
349 }
350 /* Updte the mode bits */
351 iattr.ia_mode = ((mode & S_IALLUGO) |
352 (inode->i_mode & ~S_IALLUGO));
353 iattr.ia_valid = ATTR_MODE;
354 /* FIXME should we update ctime ?
355 * What is the following setxattr update the
356 * mode ?
357 */
358 v9fs_vfs_setattr_dotl(dentry, &iattr);
359 }
360 }
361 break;
362 case ACL_TYPE_DEFAULT:
363 name = POSIX_ACL_XATTR_DEFAULT;
364 if (!S_ISDIR(inode->i_mode)) {
365 retval = -EINVAL;
366 goto err_out;
367 }
368 break;
369 default:
370 BUG();
371 }
372 retval = v9fs_xattr_set(dentry, name, value, size, flags);
373 if (!retval)
374 set_cached_acl(inode, type, acl);
375err_out:
376 posix_acl_release(acl);
377 return retval;
378}
379
380const struct xattr_handler v9fs_xattr_acl_access_handler = {
381 .prefix = POSIX_ACL_XATTR_ACCESS,
382 .flags = ACL_TYPE_ACCESS,
383 .get = v9fs_xattr_get_acl,
384 .set = v9fs_xattr_set_acl,
385};
386
387const struct xattr_handler v9fs_xattr_acl_default_handler = {
388 .prefix = POSIX_ACL_XATTR_DEFAULT,
389 .flags = ACL_TYPE_DEFAULT,
390 .get = v9fs_xattr_get_acl,
391 .set = v9fs_xattr_set_acl,
392};
diff --git a/fs/9p/acl.h b/fs/9p/acl.h
new file mode 100644
index 000000000000..59e18c2e8c7e
--- /dev/null
+++ b/fs/9p/acl.h
@@ -0,0 +1,49 @@
1/*
2 * Copyright IBM Corporation, 2010
3 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of version 2.1 of the GNU Lesser General Public License
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 *
13 */
14#ifndef FS_9P_ACL_H
15#define FS_9P_ACL_H
16
17#ifdef CONFIG_9P_FS_POSIX_ACL
18extern int v9fs_get_acl(struct inode *, struct p9_fid *);
19extern int v9fs_check_acl(struct inode *inode, int mask);
20extern int v9fs_acl_chmod(struct dentry *);
21extern int v9fs_set_create_acl(struct dentry *,
22 struct posix_acl *, struct posix_acl *);
23extern int v9fs_acl_mode(struct inode *dir, mode_t *modep,
24 struct posix_acl **dpacl, struct posix_acl **pacl);
25#else
26#define v9fs_check_acl NULL
27static inline int v9fs_get_acl(struct inode *inode, struct p9_fid *fid)
28{
29 return 0;
30}
31static inline int v9fs_acl_chmod(struct dentry *dentry)
32{
33 return 0;
34}
35static inline int v9fs_set_create_acl(struct dentry *dentry,
36 struct posix_acl *dpacl,
37 struct posix_acl *pacl)
38{
39 return 0;
40}
41static inline int v9fs_acl_mode(struct inode *dir, mode_t *modep,
42 struct posix_acl **dpacl,
43 struct posix_acl **pacl)
44{
45 return 0;
46}
47
48#endif
49#endif /* FS_9P_XATTR_H */
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 6406f896bf95..b00223c99d70 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -149,6 +149,7 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
149 switch (access) { 149 switch (access) {
150 case V9FS_ACCESS_SINGLE: 150 case V9FS_ACCESS_SINGLE:
151 case V9FS_ACCESS_USER: 151 case V9FS_ACCESS_USER:
152 case V9FS_ACCESS_CLIENT:
152 uid = current_fsuid(); 153 uid = current_fsuid();
153 any = 0; 154 any = 0;
154 break; 155 break;
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 38dc0e067599..2f77cd33ba83 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -193,7 +193,17 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
193 v9ses->flags |= V9FS_ACCESS_USER; 193 v9ses->flags |= V9FS_ACCESS_USER;
194 else if (strcmp(s, "any") == 0) 194 else if (strcmp(s, "any") == 0)
195 v9ses->flags |= V9FS_ACCESS_ANY; 195 v9ses->flags |= V9FS_ACCESS_ANY;
196 else { 196 else if (strcmp(s, "client") == 0) {
197#ifdef CONFIG_9P_FS_POSIX_ACL
198 v9ses->flags |= V9FS_ACCESS_CLIENT;
199#else
200 P9_DPRINTK(P9_DEBUG_ERROR,
201 "access=client option not supported\n");
202 kfree(s);
203 ret = -EINVAL;
204 goto free_and_return;
205#endif
206 } else {
197 v9ses->flags |= V9FS_ACCESS_SINGLE; 207 v9ses->flags |= V9FS_ACCESS_SINGLE;
198 v9ses->uid = simple_strtoul(s, &e, 10); 208 v9ses->uid = simple_strtoul(s, &e, 10);
199 if (*e != '\0') 209 if (*e != '\0')
@@ -278,6 +288,16 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
278 288
279 v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ; 289 v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ;
280 290
291 if (!v9fs_proto_dotl(v9ses) &&
292 ((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT)) {
293 /*
294 * We support ACCESS_CLIENT only for dotl.
295 * Fall back to ACCESS_USER
296 */
297 v9ses->flags &= ~V9FS_ACCESS_MASK;
298 v9ses->flags |= V9FS_ACCESS_USER;
299 }
300 /*FIXME !! */
281 /* for legacy mode, fall back to V9FS_ACCESS_ANY */ 301 /* for legacy mode, fall back to V9FS_ACCESS_ANY */
282 if (!(v9fs_proto_dotu(v9ses) || v9fs_proto_dotl(v9ses)) && 302 if (!(v9fs_proto_dotu(v9ses) || v9fs_proto_dotl(v9ses)) &&
283 ((v9ses->flags&V9FS_ACCESS_MASK) == V9FS_ACCESS_USER)) { 303 ((v9ses->flags&V9FS_ACCESS_MASK) == V9FS_ACCESS_USER)) {
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 4c963c9fc41f..cb6396855e2d 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -33,13 +33,17 @@
33 * 33 *
34 * Session flags reflect options selected by users at mount time 34 * Session flags reflect options selected by users at mount time
35 */ 35 */
36#define V9FS_ACCESS_ANY (V9FS_ACCESS_SINGLE | \
37 V9FS_ACCESS_USER | \
38 V9FS_ACCESS_CLIENT)
39#define V9FS_ACCESS_MASK V9FS_ACCESS_ANY
40
36enum p9_session_flags { 41enum p9_session_flags {
37 V9FS_PROTO_2000U = 0x01, 42 V9FS_PROTO_2000U = 0x01,
38 V9FS_PROTO_2000L = 0x02, 43 V9FS_PROTO_2000L = 0x02,
39 V9FS_ACCESS_SINGLE = 0x04, 44 V9FS_ACCESS_SINGLE = 0x04,
40 V9FS_ACCESS_USER = 0x08, 45 V9FS_ACCESS_USER = 0x08,
41 V9FS_ACCESS_ANY = 0x0C, 46 V9FS_ACCESS_CLIENT = 0x10
42 V9FS_ACCESS_MASK = 0x0C,
43}; 47};
44 48
45/* possible values of ->cache */ 49/* possible values of ->cache */
@@ -113,8 +117,6 @@ void v9fs_session_close(struct v9fs_session_info *v9ses);
113void v9fs_session_cancel(struct v9fs_session_info *v9ses); 117void v9fs_session_cancel(struct v9fs_session_info *v9ses);
114void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses); 118void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses);
115 119
116#define V9FS_MAGIC 0x01021997
117
118/* other default globals */ 120/* other default globals */
119#define V9FS_PORT 564 121#define V9FS_PORT 564
120#define V9FS_DEFUSER "nobody" 122#define V9FS_DEFUSER "nobody"
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 88418c419ea7..bab0eac873f4 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -64,3 +64,7 @@ int v9fs_uflags2omode(int uflags, int extended);
64 64
65ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64); 65ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
66void v9fs_blank_wstat(struct p9_wstat *wstat); 66void v9fs_blank_wstat(struct p9_wstat *wstat);
67int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *);
68int v9fs_file_fsync_dotl(struct file *filp, int datasync);
69
70#define P9_LOCK_TIMEOUT (30*HZ)
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 90e38449f4b3..b7f2a8e3863e 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -154,10 +154,40 @@ static int v9fs_launder_page(struct page *page)
154 return 0; 154 return 0;
155} 155}
156 156
157/**
158 * v9fs_direct_IO - 9P address space operation for direct I/O
159 * @rw: direction (read or write)
160 * @iocb: target I/O control block
161 * @iov: array of vectors that define I/O buffer
162 * @pos: offset in file to begin the operation
163 * @nr_segs: size of iovec array
164 *
165 * The presence of v9fs_direct_IO() in the address space ops vector
166 * allowes open() O_DIRECT flags which would have failed otherwise.
167 *
168 * In the non-cached mode, we shunt off direct read and write requests before
169 * the VFS gets them, so this method should never be called.
170 *
171 * Direct IO is not 'yet' supported in the cached mode. Hence when
172 * this routine is called through generic_file_aio_read(), the read/write fails
173 * with an error.
174 *
175 */
176ssize_t v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
177 loff_t pos, unsigned long nr_segs)
178{
179 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) "
180 "off/no(%lld/%lu) EINVAL\n",
181 iocb->ki_filp->f_path.dentry->d_name.name,
182 (long long) pos, nr_segs);
183
184 return -EINVAL;
185}
157const struct address_space_operations v9fs_addr_operations = { 186const struct address_space_operations v9fs_addr_operations = {
158 .readpage = v9fs_vfs_readpage, 187 .readpage = v9fs_vfs_readpage,
159 .readpages = v9fs_vfs_readpages, 188 .readpages = v9fs_vfs_readpages,
160 .releasepage = v9fs_release_page, 189 .releasepage = v9fs_release_page,
161 .invalidatepage = v9fs_invalidate_page, 190 .invalidatepage = v9fs_invalidate_page,
162 .launder_page = v9fs_launder_page, 191 .launder_page = v9fs_launder_page,
192 .direct_IO = v9fs_direct_IO,
163}; 193};
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 899f168fd19c..b84ebe8cefed 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -242,7 +242,8 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
242 while (rdir->head < rdir->tail) { 242 while (rdir->head < rdir->tail) {
243 243
244 err = p9dirent_read(rdir->buf + rdir->head, 244 err = p9dirent_read(rdir->buf + rdir->head,
245 buflen - rdir->head, &curdirent, 245 rdir->tail - rdir->head,
246 &curdirent,
246 fid->clnt->proto_version); 247 fid->clnt->proto_version);
247 if (err < 0) { 248 if (err < 0) {
248 P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err); 249 P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err);
@@ -314,4 +315,5 @@ const struct file_operations v9fs_dir_operations_dotl = {
314 .readdir = v9fs_dir_readdir_dotl, 315 .readdir = v9fs_dir_readdir_dotl,
315 .open = v9fs_file_open, 316 .open = v9fs_file_open,
316 .release = v9fs_dir_release, 317 .release = v9fs_dir_release,
318 .fsync = v9fs_file_fsync_dotl,
317}; 319};
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index e97c92bd6f16..240c30674396 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -33,6 +33,7 @@
33#include <linux/inet.h> 33#include <linux/inet.h>
34#include <linux/list.h> 34#include <linux/list.h>
35#include <linux/pagemap.h> 35#include <linux/pagemap.h>
36#include <linux/utsname.h>
36#include <asm/uaccess.h> 37#include <asm/uaccess.h>
37#include <linux/idr.h> 38#include <linux/idr.h>
38#include <net/9p/9p.h> 39#include <net/9p/9p.h>
@@ -44,6 +45,7 @@
44#include "cache.h" 45#include "cache.h"
45 46
46static const struct file_operations v9fs_cached_file_operations; 47static const struct file_operations v9fs_cached_file_operations;
48static const struct file_operations v9fs_cached_file_operations_dotl;
47 49
48/** 50/**
49 * v9fs_file_open - open a file (or directory) 51 * v9fs_file_open - open a file (or directory)
@@ -92,6 +94,8 @@ int v9fs_file_open(struct inode *inode, struct file *file)
92 /* enable cached file options */ 94 /* enable cached file options */
93 if(file->f_op == &v9fs_file_operations) 95 if(file->f_op == &v9fs_file_operations)
94 file->f_op = &v9fs_cached_file_operations; 96 file->f_op = &v9fs_cached_file_operations;
97 else if (file->f_op == &v9fs_file_operations_dotl)
98 file->f_op = &v9fs_cached_file_operations_dotl;
95 99
96#ifdef CONFIG_9P_FSCACHE 100#ifdef CONFIG_9P_FSCACHE
97 v9fs_cache_inode_set_cookie(inode, file); 101 v9fs_cache_inode_set_cookie(inode, file);
@@ -130,6 +134,206 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
130 return res; 134 return res;
131} 135}
132 136
137static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
138{
139 struct p9_flock flock;
140 struct p9_fid *fid;
141 uint8_t status;
142 int res = 0;
143 unsigned char fl_type;
144
145 fid = filp->private_data;
146 BUG_ON(fid == NULL);
147
148 if ((fl->fl_flags & FL_POSIX) != FL_POSIX)
149 BUG();
150
151 res = posix_lock_file_wait(filp, fl);
152 if (res < 0)
153 goto out;
154
155 /* convert posix lock to p9 tlock args */
156 memset(&flock, 0, sizeof(flock));
157 flock.type = fl->fl_type;
158 flock.start = fl->fl_start;
159 if (fl->fl_end == OFFSET_MAX)
160 flock.length = 0;
161 else
162 flock.length = fl->fl_end - fl->fl_start + 1;
163 flock.proc_id = fl->fl_pid;
164 flock.client_id = utsname()->nodename;
165 if (IS_SETLKW(cmd))
166 flock.flags = P9_LOCK_FLAGS_BLOCK;
167
168 /*
169 * if its a blocked request and we get P9_LOCK_BLOCKED as the status
170 * for lock request, keep on trying
171 */
172 for (;;) {
173 res = p9_client_lock_dotl(fid, &flock, &status);
174 if (res < 0)
175 break;
176
177 if (status != P9_LOCK_BLOCKED)
178 break;
179 if (status == P9_LOCK_BLOCKED && !IS_SETLKW(cmd))
180 break;
181 schedule_timeout_interruptible(P9_LOCK_TIMEOUT);
182 }
183
184 /* map 9p status to VFS status */
185 switch (status) {
186 case P9_LOCK_SUCCESS:
187 res = 0;
188 break;
189 case P9_LOCK_BLOCKED:
190 res = -EAGAIN;
191 break;
192 case P9_LOCK_ERROR:
193 case P9_LOCK_GRACE:
194 res = -ENOLCK;
195 break;
196 default:
197 BUG();
198 }
199
200 /*
201 * incase server returned error for lock request, revert
202 * it locally
203 */
204 if (res < 0 && fl->fl_type != F_UNLCK) {
205 fl_type = fl->fl_type;
206 fl->fl_type = F_UNLCK;
207 res = posix_lock_file_wait(filp, fl);
208 fl->fl_type = fl_type;
209 }
210out:
211 return res;
212}
213
214static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
215{
216 struct p9_getlock glock;
217 struct p9_fid *fid;
218 int res = 0;
219
220 fid = filp->private_data;
221 BUG_ON(fid == NULL);
222
223 posix_test_lock(filp, fl);
224 /*
225 * if we have a conflicting lock locally, no need to validate
226 * with server
227 */
228 if (fl->fl_type != F_UNLCK)
229 return res;
230
231 /* convert posix lock to p9 tgetlock args */
232 memset(&glock, 0, sizeof(glock));
233 glock.type = fl->fl_type;
234 glock.start = fl->fl_start;
235 if (fl->fl_end == OFFSET_MAX)
236 glock.length = 0;
237 else
238 glock.length = fl->fl_end - fl->fl_start + 1;
239 glock.proc_id = fl->fl_pid;
240 glock.client_id = utsname()->nodename;
241
242 res = p9_client_getlock_dotl(fid, &glock);
243 if (res < 0)
244 return res;
245 if (glock.type != F_UNLCK) {
246 fl->fl_type = glock.type;
247 fl->fl_start = glock.start;
248 if (glock.length == 0)
249 fl->fl_end = OFFSET_MAX;
250 else
251 fl->fl_end = glock.start + glock.length - 1;
252 fl->fl_pid = glock.proc_id;
253 } else
254 fl->fl_type = F_UNLCK;
255
256 return res;
257}
258
259/**
260 * v9fs_file_lock_dotl - lock a file (or directory)
261 * @filp: file to be locked
262 * @cmd: lock command
263 * @fl: file lock structure
264 *
265 */
266
267static int v9fs_file_lock_dotl(struct file *filp, int cmd, struct file_lock *fl)
268{
269 struct inode *inode = filp->f_path.dentry->d_inode;
270 int ret = -ENOLCK;
271
272 P9_DPRINTK(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", filp,
273 cmd, fl, filp->f_path.dentry->d_name.name);
274
275 /* No mandatory locks */
276 if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
277 goto out_err;
278
279 if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
280 filemap_write_and_wait(inode->i_mapping);
281 invalidate_mapping_pages(&inode->i_data, 0, -1);
282 }
283
284 if (IS_SETLK(cmd) || IS_SETLKW(cmd))
285 ret = v9fs_file_do_lock(filp, cmd, fl);
286 else if (IS_GETLK(cmd))
287 ret = v9fs_file_getlock(filp, fl);
288 else
289 ret = -EINVAL;
290out_err:
291 return ret;
292}
293
294/**
295 * v9fs_file_flock_dotl - lock a file
296 * @filp: file to be locked
297 * @cmd: lock command
298 * @fl: file lock structure
299 *
300 */
301
302static int v9fs_file_flock_dotl(struct file *filp, int cmd,
303 struct file_lock *fl)
304{
305 struct inode *inode = filp->f_path.dentry->d_inode;
306 int ret = -ENOLCK;
307
308 P9_DPRINTK(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", filp,
309 cmd, fl, filp->f_path.dentry->d_name.name);
310
311 /* No mandatory locks */
312 if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
313 goto out_err;
314
315 if (!(fl->fl_flags & FL_FLOCK))
316 goto out_err;
317
318 if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
319 filemap_write_and_wait(inode->i_mapping);
320 invalidate_mapping_pages(&inode->i_data, 0, -1);
321 }
322 /* Convert flock to posix lock */
323 fl->fl_owner = (fl_owner_t)filp;
324 fl->fl_start = 0;
325 fl->fl_end = OFFSET_MAX;
326 fl->fl_flags |= FL_POSIX;
327 fl->fl_flags ^= FL_FLOCK;
328
329 if (IS_SETLK(cmd) | IS_SETLKW(cmd))
330 ret = v9fs_file_do_lock(filp, cmd, fl);
331 else
332 ret = -EINVAL;
333out_err:
334 return ret;
335}
336
133/** 337/**
134 * v9fs_file_readn - read from a file 338 * v9fs_file_readn - read from a file
135 * @filp: file pointer to read 339 * @filp: file pointer to read
@@ -219,7 +423,9 @@ static ssize_t
219v9fs_file_write(struct file *filp, const char __user * data, 423v9fs_file_write(struct file *filp, const char __user * data,
220 size_t count, loff_t * offset) 424 size_t count, loff_t * offset)
221{ 425{
222 int n, rsize, total = 0; 426 ssize_t retval;
427 size_t total = 0;
428 int n;
223 struct p9_fid *fid; 429 struct p9_fid *fid;
224 struct p9_client *clnt; 430 struct p9_client *clnt;
225 struct inode *inode = filp->f_path.dentry->d_inode; 431 struct inode *inode = filp->f_path.dentry->d_inode;
@@ -232,14 +438,19 @@ v9fs_file_write(struct file *filp, const char __user * data,
232 fid = filp->private_data; 438 fid = filp->private_data;
233 clnt = fid->clnt; 439 clnt = fid->clnt;
234 440
235 rsize = fid->iounit ? fid->iounit : clnt->msize - P9_IOHDRSZ; 441 retval = generic_write_checks(filp, &origin, &count, 0);
442 if (retval)
443 goto out;
236 444
237 do { 445 retval = -EINVAL;
238 if (count < rsize) 446 if ((ssize_t) count < 0)
239 rsize = count; 447 goto out;
448 retval = 0;
449 if (!count)
450 goto out;
240 451
241 n = p9_client_write(fid, NULL, data+total, origin+total, 452 do {
242 rsize); 453 n = p9_client_write(fid, NULL, data+total, origin+total, count);
243 if (n <= 0) 454 if (n <= 0)
244 break; 455 break;
245 count -= n; 456 count -= n;
@@ -258,9 +469,11 @@ v9fs_file_write(struct file *filp, const char __user * data,
258 } 469 }
259 470
260 if (n < 0) 471 if (n < 0)
261 return n; 472 retval = n;
262 473 else
263 return total; 474 retval = total;
475out:
476 return retval;
264} 477}
265 478
266static int v9fs_file_fsync(struct file *filp, int datasync) 479static int v9fs_file_fsync(struct file *filp, int datasync)
@@ -278,6 +491,20 @@ static int v9fs_file_fsync(struct file *filp, int datasync)
278 return retval; 491 return retval;
279} 492}
280 493
494int v9fs_file_fsync_dotl(struct file *filp, int datasync)
495{
496 struct p9_fid *fid;
497 int retval;
498
499 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_file_fsync_dotl: filp %p datasync %x\n",
500 filp, datasync);
501
502 fid = filp->private_data;
503
504 retval = p9_client_fsync(fid, datasync);
505 return retval;
506}
507
281static const struct file_operations v9fs_cached_file_operations = { 508static const struct file_operations v9fs_cached_file_operations = {
282 .llseek = generic_file_llseek, 509 .llseek = generic_file_llseek,
283 .read = do_sync_read, 510 .read = do_sync_read,
@@ -290,6 +517,19 @@ static const struct file_operations v9fs_cached_file_operations = {
290 .fsync = v9fs_file_fsync, 517 .fsync = v9fs_file_fsync,
291}; 518};
292 519
520static const struct file_operations v9fs_cached_file_operations_dotl = {
521 .llseek = generic_file_llseek,
522 .read = do_sync_read,
523 .aio_read = generic_file_aio_read,
524 .write = v9fs_file_write,
525 .open = v9fs_file_open,
526 .release = v9fs_dir_release,
527 .lock = v9fs_file_lock_dotl,
528 .flock = v9fs_file_flock_dotl,
529 .mmap = generic_file_readonly_mmap,
530 .fsync = v9fs_file_fsync_dotl,
531};
532
293const struct file_operations v9fs_file_operations = { 533const struct file_operations v9fs_file_operations = {
294 .llseek = generic_file_llseek, 534 .llseek = generic_file_llseek,
295 .read = v9fs_file_read, 535 .read = v9fs_file_read,
@@ -307,7 +547,8 @@ const struct file_operations v9fs_file_operations_dotl = {
307 .write = v9fs_file_write, 547 .write = v9fs_file_write,
308 .open = v9fs_file_open, 548 .open = v9fs_file_open,
309 .release = v9fs_dir_release, 549 .release = v9fs_dir_release,
310 .lock = v9fs_file_lock, 550 .lock = v9fs_file_lock_dotl,
551 .flock = v9fs_file_flock_dotl,
311 .mmap = generic_file_readonly_mmap, 552 .mmap = generic_file_readonly_mmap,
312 .fsync = v9fs_file_fsync, 553 .fsync = v9fs_file_fsync_dotl,
313}; 554};
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index ef5905f7c8a3..34bf71b56542 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -36,6 +36,7 @@
36#include <linux/sched.h> 36#include <linux/sched.h>
37#include <linux/slab.h> 37#include <linux/slab.h>
38#include <linux/xattr.h> 38#include <linux/xattr.h>
39#include <linux/posix_acl.h>
39#include <net/9p/9p.h> 40#include <net/9p/9p.h>
40#include <net/9p/client.h> 41#include <net/9p/client.h>
41 42
@@ -44,6 +45,7 @@
44#include "fid.h" 45#include "fid.h"
45#include "cache.h" 46#include "cache.h"
46#include "xattr.h" 47#include "xattr.h"
48#include "acl.h"
47 49
48static const struct inode_operations v9fs_dir_inode_operations; 50static const struct inode_operations v9fs_dir_inode_operations;
49static const struct inode_operations v9fs_dir_inode_operations_dotu; 51static const struct inode_operations v9fs_dir_inode_operations_dotu;
@@ -53,6 +55,10 @@ static const struct inode_operations v9fs_file_inode_operations_dotl;
53static const struct inode_operations v9fs_symlink_inode_operations; 55static const struct inode_operations v9fs_symlink_inode_operations;
54static const struct inode_operations v9fs_symlink_inode_operations_dotl; 56static const struct inode_operations v9fs_symlink_inode_operations_dotl;
55 57
58static int
59v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
60 dev_t rdev);
61
56/** 62/**
57 * unixmode2p9mode - convert unix mode bits to plan 9 63 * unixmode2p9mode - convert unix mode bits to plan 9
58 * @v9ses: v9fs session information 64 * @v9ses: v9fs session information
@@ -500,6 +506,11 @@ v9fs_inode_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
500 v9fs_vcookie_set_qid(ret, &st->qid); 506 v9fs_vcookie_set_qid(ret, &st->qid);
501 v9fs_cache_inode_get_cookie(ret); 507 v9fs_cache_inode_get_cookie(ret);
502#endif 508#endif
509 err = v9fs_get_acl(ret, fid);
510 if (err) {
511 iput(ret);
512 goto error;
513 }
503 kfree(st); 514 kfree(st);
504 return ret; 515 return ret;
505error: 516error:
@@ -553,13 +564,6 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
553 return retval; 564 return retval;
554} 565}
555 566
556static int
557v9fs_open_created(struct inode *inode, struct file *file)
558{
559 return 0;
560}
561
562
563/** 567/**
564 * v9fs_create - Create a file 568 * v9fs_create - Create a file
565 * @v9ses: session information 569 * @v9ses: session information
@@ -655,29 +659,37 @@ error:
655 */ 659 */
656 660
657static int 661static int
658v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int mode, 662v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
659 struct nameidata *nd) 663 struct nameidata *nd)
660{ 664{
661 int err = 0; 665 int err = 0;
662 char *name = NULL; 666 char *name = NULL;
663 gid_t gid; 667 gid_t gid;
664 int flags; 668 int flags;
669 mode_t mode;
665 struct v9fs_session_info *v9ses; 670 struct v9fs_session_info *v9ses;
666 struct p9_fid *fid = NULL; 671 struct p9_fid *fid = NULL;
667 struct p9_fid *dfid, *ofid; 672 struct p9_fid *dfid, *ofid;
668 struct file *filp; 673 struct file *filp;
669 struct p9_qid qid; 674 struct p9_qid qid;
670 struct inode *inode; 675 struct inode *inode;
676 struct posix_acl *pacl = NULL, *dacl = NULL;
671 677
672 v9ses = v9fs_inode2v9ses(dir); 678 v9ses = v9fs_inode2v9ses(dir);
673 if (nd && nd->flags & LOOKUP_OPEN) 679 if (nd && nd->flags & LOOKUP_OPEN)
674 flags = nd->intent.open.flags - 1; 680 flags = nd->intent.open.flags - 1;
675 else 681 else {
676 flags = O_RDWR; 682 /*
683 * create call without LOOKUP_OPEN is due
684 * to mknod of regular files. So use mknod
685 * operation.
686 */
687 return v9fs_vfs_mknod_dotl(dir, dentry, omode, 0);
688 }
677 689
678 name = (char *) dentry->d_name.name; 690 name = (char *) dentry->d_name.name;
679 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_create_dotl: name:%s flags:0x%x " 691 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_create_dotl: name:%s flags:0x%x "
680 "mode:0x%x\n", name, flags, mode); 692 "mode:0x%x\n", name, flags, omode);
681 693
682 dfid = v9fs_fid_lookup(dentry->d_parent); 694 dfid = v9fs_fid_lookup(dentry->d_parent);
683 if (IS_ERR(dfid)) { 695 if (IS_ERR(dfid)) {
@@ -695,6 +707,15 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int mode,
695 } 707 }
696 708
697 gid = v9fs_get_fsgid_for_create(dir); 709 gid = v9fs_get_fsgid_for_create(dir);
710
711 mode = omode;
712 /* Update mode based on ACL value */
713 err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
714 if (err) {
715 P9_DPRINTK(P9_DEBUG_VFS,
716 "Failed to get acl values in creat %d\n", err);
717 goto error;
718 }
698 err = p9_client_create_dotl(ofid, name, flags, mode, gid, &qid); 719 err = p9_client_create_dotl(ofid, name, flags, mode, gid, &qid);
699 if (err < 0) { 720 if (err < 0) {
700 P9_DPRINTK(P9_DEBUG_VFS, 721 P9_DPRINTK(P9_DEBUG_VFS,
@@ -702,46 +723,52 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int mode,
702 err); 723 err);
703 goto error; 724 goto error;
704 } 725 }
726 /* instantiate inode and assign the unopened fid to the dentry */
727 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE ||
728 (nd && nd->flags & LOOKUP_OPEN)) {
729 fid = p9_client_walk(dfid, 1, &name, 1);
730 if (IS_ERR(fid)) {
731 err = PTR_ERR(fid);
732 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
733 err);
734 fid = NULL;
735 goto error;
736 }
705 737
706 /* No need to populate the inode if we are not opening the file AND 738 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
707 * not in cached mode. 739 if (IS_ERR(inode)) {
708 */ 740 err = PTR_ERR(inode);
709 if (!v9ses->cache && !(nd && nd->flags & LOOKUP_OPEN)) { 741 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
710 /* Not in cached mode. No need to populate inode with stat */ 742 err);
711 dentry->d_op = &v9fs_dentry_operations; 743 goto error;
712 p9_client_clunk(ofid); 744 }
713 d_instantiate(dentry, NULL);
714 return 0;
715 }
716
717 /* Now walk from the parent so we can get an unopened fid. */
718 fid = p9_client_walk(dfid, 1, &name, 1);
719 if (IS_ERR(fid)) {
720 err = PTR_ERR(fid);
721 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
722 fid = NULL;
723 goto error;
724 }
725
726 /* instantiate inode and assign the unopened fid to dentry */
727 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
728 if (IS_ERR(inode)) {
729 err = PTR_ERR(inode);
730 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
731 goto error;
732 }
733 if (v9ses->cache)
734 dentry->d_op = &v9fs_cached_dentry_operations; 745 dentry->d_op = &v9fs_cached_dentry_operations;
735 else 746 d_instantiate(dentry, inode);
747 err = v9fs_fid_add(dentry, fid);
748 if (err < 0)
749 goto error;
750 /* The fid would get clunked via a dput */
751 fid = NULL;
752 } else {
753 /*
754 * Not in cached mode. No need to populate
755 * inode with stat. We need to get an inode
756 * so that we can set the acl with dentry
757 */
758 inode = v9fs_get_inode(dir->i_sb, mode);
759 if (IS_ERR(inode)) {
760 err = PTR_ERR(inode);
761 goto error;
762 }
736 dentry->d_op = &v9fs_dentry_operations; 763 dentry->d_op = &v9fs_dentry_operations;
737 d_instantiate(dentry, inode); 764 d_instantiate(dentry, inode);
738 err = v9fs_fid_add(dentry, fid); 765 }
739 if (err < 0) 766 /* Now set the ACL based on the default value */
740 goto error; 767 v9fs_set_create_acl(dentry, dacl, pacl);
741 768
742 /* if we are opening a file, assign the open fid to the file */ 769 /* if we are opening a file, assign the open fid to the file */
743 if (nd && nd->flags & LOOKUP_OPEN) { 770 if (nd && nd->flags & LOOKUP_OPEN) {
744 filp = lookup_instantiate_filp(nd, dentry, v9fs_open_created); 771 filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
745 if (IS_ERR(filp)) { 772 if (IS_ERR(filp)) {
746 p9_client_clunk(ofid); 773 p9_client_clunk(ofid);
747 return PTR_ERR(filp); 774 return PTR_ERR(filp);
@@ -800,7 +827,7 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
800 827
801 /* if we are opening a file, assign the open fid to the file */ 828 /* if we are opening a file, assign the open fid to the file */
802 if (nd && nd->flags & LOOKUP_OPEN) { 829 if (nd && nd->flags & LOOKUP_OPEN) {
803 filp = lookup_instantiate_filp(nd, dentry, v9fs_open_created); 830 filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
804 if (IS_ERR(filp)) { 831 if (IS_ERR(filp)) {
805 err = PTR_ERR(filp); 832 err = PTR_ERR(filp);
806 goto error; 833 goto error;
@@ -859,23 +886,28 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
859 * 886 *
860 */ 887 */
861 888
862static int v9fs_vfs_mkdir_dotl(struct inode *dir, struct dentry *dentry, 889static int v9fs_vfs_mkdir_dotl(struct inode *dir,
863 int mode) 890 struct dentry *dentry, int omode)
864{ 891{
865 int err; 892 int err;
866 struct v9fs_session_info *v9ses; 893 struct v9fs_session_info *v9ses;
867 struct p9_fid *fid = NULL, *dfid = NULL; 894 struct p9_fid *fid = NULL, *dfid = NULL;
868 gid_t gid; 895 gid_t gid;
869 char *name; 896 char *name;
897 mode_t mode;
870 struct inode *inode; 898 struct inode *inode;
871 struct p9_qid qid; 899 struct p9_qid qid;
872 struct dentry *dir_dentry; 900 struct dentry *dir_dentry;
901 struct posix_acl *dacl = NULL, *pacl = NULL;
873 902
874 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); 903 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
875 err = 0; 904 err = 0;
876 v9ses = v9fs_inode2v9ses(dir); 905 v9ses = v9fs_inode2v9ses(dir);
877 906
878 mode |= S_IFDIR; 907 omode |= S_IFDIR;
908 if (dir->i_mode & S_ISGID)
909 omode |= S_ISGID;
910
879 dir_dentry = v9fs_dentry_from_dir_inode(dir); 911 dir_dentry = v9fs_dentry_from_dir_inode(dir);
880 dfid = v9fs_fid_lookup(dir_dentry); 912 dfid = v9fs_fid_lookup(dir_dentry);
881 if (IS_ERR(dfid)) { 913 if (IS_ERR(dfid)) {
@@ -886,11 +918,14 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, struct dentry *dentry,
886 } 918 }
887 919
888 gid = v9fs_get_fsgid_for_create(dir); 920 gid = v9fs_get_fsgid_for_create(dir);
889 if (gid < 0) { 921 mode = omode;
890 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_fsgid_for_create failed\n"); 922 /* Update mode based on ACL value */
923 err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
924 if (err) {
925 P9_DPRINTK(P9_DEBUG_VFS,
926 "Failed to get acl values in mkdir %d\n", err);
891 goto error; 927 goto error;
892 } 928 }
893
894 name = (char *) dentry->d_name.name; 929 name = (char *) dentry->d_name.name;
895 err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid); 930 err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid);
896 if (err < 0) 931 if (err < 0)
@@ -920,7 +955,23 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, struct dentry *dentry,
920 if (err < 0) 955 if (err < 0)
921 goto error; 956 goto error;
922 fid = NULL; 957 fid = NULL;
958 } else {
959 /*
960 * Not in cached mode. No need to populate
961 * inode with stat. We need to get an inode
962 * so that we can set the acl with dentry
963 */
964 inode = v9fs_get_inode(dir->i_sb, mode);
965 if (IS_ERR(inode)) {
966 err = PTR_ERR(inode);
967 goto error;
968 }
969 dentry->d_op = &v9fs_dentry_operations;
970 d_instantiate(dentry, inode);
923 } 971 }
972 /* Now set the ACL based on the default value */
973 v9fs_set_create_acl(dentry, dacl, pacl);
974
924error: 975error:
925 if (fid) 976 if (fid)
926 p9_client_clunk(fid); 977 p9_client_clunk(fid);
@@ -979,7 +1030,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
979 1030
980 result = v9fs_fid_add(dentry, fid); 1031 result = v9fs_fid_add(dentry, fid);
981 if (result < 0) 1032 if (result < 0)
982 goto error; 1033 goto error_iput;
983 1034
984inst_out: 1035inst_out:
985 if (v9ses->cache) 1036 if (v9ses->cache)
@@ -990,6 +1041,8 @@ inst_out:
990 d_add(dentry, inode); 1041 d_add(dentry, inode);
991 return NULL; 1042 return NULL;
992 1043
1044error_iput:
1045 iput(inode);
993error: 1046error:
994 p9_client_clunk(fid); 1047 p9_client_clunk(fid);
995 1048
@@ -1237,7 +1290,7 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
1237 * 1290 *
1238 */ 1291 */
1239 1292
1240static int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) 1293int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
1241{ 1294{
1242 int retval; 1295 int retval;
1243 struct v9fs_session_info *v9ses; 1296 struct v9fs_session_info *v9ses;
@@ -1279,6 +1332,12 @@ static int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
1279 1332
1280 setattr_copy(dentry->d_inode, iattr); 1333 setattr_copy(dentry->d_inode, iattr);
1281 mark_inode_dirty(dentry->d_inode); 1334 mark_inode_dirty(dentry->d_inode);
1335 if (iattr->ia_valid & ATTR_MODE) {
1336 /* We also want to update ACL when we update mode bits */
1337 retval = v9fs_acl_chmod(dentry);
1338 if (retval < 0)
1339 return retval;
1340 }
1282 return 0; 1341 return 0;
1283} 1342}
1284 1343
@@ -1473,7 +1532,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
1473 if (IS_ERR(fid)) 1532 if (IS_ERR(fid))
1474 return PTR_ERR(fid); 1533 return PTR_ERR(fid);
1475 1534
1476 if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)) 1535 if (!v9fs_proto_dotu(v9ses))
1477 return -EBADF; 1536 return -EBADF;
1478 1537
1479 st = p9_client_stat(fid); 1538 st = p9_client_stat(fid);
@@ -1616,11 +1675,6 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
1616 1675
1617 gid = v9fs_get_fsgid_for_create(dir); 1676 gid = v9fs_get_fsgid_for_create(dir);
1618 1677
1619 if (gid < 0) {
1620 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_egid failed %d\n", gid);
1621 goto error;
1622 }
1623
1624 /* Server doesn't alter fid on TSYMLINK. Hence no need to clone it. */ 1678 /* Server doesn't alter fid on TSYMLINK. Hence no need to clone it. */
1625 err = p9_client_symlink(dfid, name, (char *)symname, gid, &qid); 1679 err = p9_client_symlink(dfid, name, (char *)symname, gid, &qid);
1626 1680
@@ -1855,21 +1909,23 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
1855 * 1909 *
1856 */ 1910 */
1857static int 1911static int
1858v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int mode, 1912v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
1859 dev_t rdev) 1913 dev_t rdev)
1860{ 1914{
1861 int err; 1915 int err;
1862 char *name; 1916 char *name;
1917 mode_t mode;
1863 struct v9fs_session_info *v9ses; 1918 struct v9fs_session_info *v9ses;
1864 struct p9_fid *fid = NULL, *dfid = NULL; 1919 struct p9_fid *fid = NULL, *dfid = NULL;
1865 struct inode *inode; 1920 struct inode *inode;
1866 gid_t gid; 1921 gid_t gid;
1867 struct p9_qid qid; 1922 struct p9_qid qid;
1868 struct dentry *dir_dentry; 1923 struct dentry *dir_dentry;
1924 struct posix_acl *dacl = NULL, *pacl = NULL;
1869 1925
1870 P9_DPRINTK(P9_DEBUG_VFS, 1926 P9_DPRINTK(P9_DEBUG_VFS,
1871 " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino, 1927 " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
1872 dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev)); 1928 dentry->d_name.name, omode, MAJOR(rdev), MINOR(rdev));
1873 1929
1874 if (!new_valid_dev(rdev)) 1930 if (!new_valid_dev(rdev))
1875 return -EINVAL; 1931 return -EINVAL;
@@ -1885,11 +1941,14 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int mode,
1885 } 1941 }
1886 1942
1887 gid = v9fs_get_fsgid_for_create(dir); 1943 gid = v9fs_get_fsgid_for_create(dir);
1888 if (gid < 0) { 1944 mode = omode;
1889 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_fsgid_for_create failed\n"); 1945 /* Update mode based on ACL value */
1946 err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
1947 if (err) {
1948 P9_DPRINTK(P9_DEBUG_VFS,
1949 "Failed to get acl values in mknod %d\n", err);
1890 goto error; 1950 goto error;
1891 } 1951 }
1892
1893 name = (char *) dentry->d_name.name; 1952 name = (char *) dentry->d_name.name;
1894 1953
1895 err = p9_client_mknod_dotl(dfid, name, mode, rdev, gid, &qid); 1954 err = p9_client_mknod_dotl(dfid, name, mode, rdev, gid, &qid);
@@ -1933,13 +1992,68 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int mode,
1933 dentry->d_op = &v9fs_dentry_operations; 1992 dentry->d_op = &v9fs_dentry_operations;
1934 d_instantiate(dentry, inode); 1993 d_instantiate(dentry, inode);
1935 } 1994 }
1936 1995 /* Now set the ACL based on the default value */
1996 v9fs_set_create_acl(dentry, dacl, pacl);
1937error: 1997error:
1938 if (fid) 1998 if (fid)
1939 p9_client_clunk(fid); 1999 p9_client_clunk(fid);
1940 return err; 2000 return err;
1941} 2001}
1942 2002
2003static int
2004v9fs_vfs_readlink_dotl(struct dentry *dentry, char *buffer, int buflen)
2005{
2006 int retval;
2007 struct p9_fid *fid;
2008 char *target = NULL;
2009
2010 P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name);
2011 retval = -EPERM;
2012 fid = v9fs_fid_lookup(dentry);
2013 if (IS_ERR(fid))
2014 return PTR_ERR(fid);
2015
2016 retval = p9_client_readlink(fid, &target);
2017 if (retval < 0)
2018 return retval;
2019
2020 strncpy(buffer, target, buflen);
2021 P9_DPRINTK(P9_DEBUG_VFS, "%s -> %s\n", dentry->d_name.name, buffer);
2022
2023 retval = strnlen(buffer, buflen);
2024 return retval;
2025}
2026
2027/**
2028 * v9fs_vfs_follow_link_dotl - follow a symlink path
2029 * @dentry: dentry for symlink
2030 * @nd: nameidata
2031 *
2032 */
2033
2034static void *
2035v9fs_vfs_follow_link_dotl(struct dentry *dentry, struct nameidata *nd)
2036{
2037 int len = 0;
2038 char *link = __getname();
2039
2040 P9_DPRINTK(P9_DEBUG_VFS, "%s n", dentry->d_name.name);
2041
2042 if (!link)
2043 link = ERR_PTR(-ENOMEM);
2044 else {
2045 len = v9fs_vfs_readlink_dotl(dentry, link, PATH_MAX);
2046 if (len < 0) {
2047 __putname(link);
2048 link = ERR_PTR(len);
2049 } else
2050 link[min(len, PATH_MAX-1)] = 0;
2051 }
2052 nd_set_link(nd, link);
2053
2054 return NULL;
2055}
2056
1943static const struct inode_operations v9fs_dir_inode_operations_dotu = { 2057static const struct inode_operations v9fs_dir_inode_operations_dotu = {
1944 .create = v9fs_vfs_create, 2058 .create = v9fs_vfs_create,
1945 .lookup = v9fs_vfs_lookup, 2059 .lookup = v9fs_vfs_lookup,
@@ -1970,7 +2084,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotl = {
1970 .getxattr = generic_getxattr, 2084 .getxattr = generic_getxattr,
1971 .removexattr = generic_removexattr, 2085 .removexattr = generic_removexattr,
1972 .listxattr = v9fs_listxattr, 2086 .listxattr = v9fs_listxattr,
1973 2087 .check_acl = v9fs_check_acl,
1974}; 2088};
1975 2089
1976static const struct inode_operations v9fs_dir_inode_operations = { 2090static const struct inode_operations v9fs_dir_inode_operations = {
@@ -1997,6 +2111,7 @@ static const struct inode_operations v9fs_file_inode_operations_dotl = {
1997 .getxattr = generic_getxattr, 2111 .getxattr = generic_getxattr,
1998 .removexattr = generic_removexattr, 2112 .removexattr = generic_removexattr,
1999 .listxattr = v9fs_listxattr, 2113 .listxattr = v9fs_listxattr,
2114 .check_acl = v9fs_check_acl,
2000}; 2115};
2001 2116
2002static const struct inode_operations v9fs_symlink_inode_operations = { 2117static const struct inode_operations v9fs_symlink_inode_operations = {
@@ -2008,8 +2123,8 @@ static const struct inode_operations v9fs_symlink_inode_operations = {
2008}; 2123};
2009 2124
2010static const struct inode_operations v9fs_symlink_inode_operations_dotl = { 2125static const struct inode_operations v9fs_symlink_inode_operations_dotl = {
2011 .readlink = generic_readlink, 2126 .readlink = v9fs_vfs_readlink_dotl,
2012 .follow_link = v9fs_vfs_follow_link, 2127 .follow_link = v9fs_vfs_follow_link_dotl,
2013 .put_link = v9fs_vfs_put_link, 2128 .put_link = v9fs_vfs_put_link,
2014 .getattr = v9fs_vfs_getattr_dotl, 2129 .getattr = v9fs_vfs_getattr_dotl,
2015 .setattr = v9fs_vfs_setattr_dotl, 2130 .setattr = v9fs_vfs_setattr_dotl,
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 1d12ba0ed3db..c55c614500ad 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -39,6 +39,7 @@
39#include <linux/sched.h> 39#include <linux/sched.h>
40#include <linux/slab.h> 40#include <linux/slab.h>
41#include <linux/statfs.h> 41#include <linux/statfs.h>
42#include <linux/magic.h>
42#include <net/9p/9p.h> 43#include <net/9p/9p.h>
43#include <net/9p/client.h> 44#include <net/9p/client.h>
44 45
@@ -46,6 +47,7 @@
46#include "v9fs_vfs.h" 47#include "v9fs_vfs.h"
47#include "fid.h" 48#include "fid.h"
48#include "xattr.h" 49#include "xattr.h"
50#include "acl.h"
49 51
50static const struct super_operations v9fs_super_ops, v9fs_super_ops_dotl; 52static const struct super_operations v9fs_super_ops, v9fs_super_ops_dotl;
51 53
@@ -66,7 +68,7 @@ static int v9fs_set_super(struct super_block *s, void *data)
66 * v9fs_fill_super - populate superblock with info 68 * v9fs_fill_super - populate superblock with info
67 * @sb: superblock 69 * @sb: superblock
68 * @v9ses: session information 70 * @v9ses: session information
69 * @flags: flags propagated from v9fs_get_sb() 71 * @flags: flags propagated from v9fs_mount()
70 * 72 *
71 */ 73 */
72 74
@@ -88,22 +90,25 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
88 sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC | 90 sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC |
89 MS_NOATIME; 91 MS_NOATIME;
90 92
93#ifdef CONFIG_9P_FS_POSIX_ACL
94 if ((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT)
95 sb->s_flags |= MS_POSIXACL;
96#endif
97
91 save_mount_options(sb, data); 98 save_mount_options(sb, data);
92} 99}
93 100
94/** 101/**
95 * v9fs_get_sb - mount a superblock 102 * v9fs_mount - mount a superblock
96 * @fs_type: file system type 103 * @fs_type: file system type
97 * @flags: mount flags 104 * @flags: mount flags
98 * @dev_name: device name that was mounted 105 * @dev_name: device name that was mounted
99 * @data: mount options 106 * @data: mount options
100 * @mnt: mountpoint record to be instantiated
101 * 107 *
102 */ 108 */
103 109
104static int v9fs_get_sb(struct file_system_type *fs_type, int flags, 110static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
105 const char *dev_name, void *data, 111 const char *dev_name, void *data)
106 struct vfsmount *mnt)
107{ 112{
108 struct super_block *sb = NULL; 113 struct super_block *sb = NULL;
109 struct inode *inode = NULL; 114 struct inode *inode = NULL;
@@ -117,7 +122,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
117 122
118 v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL); 123 v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL);
119 if (!v9ses) 124 if (!v9ses)
120 return -ENOMEM; 125 return ERR_PTR(-ENOMEM);
121 126
122 fid = v9fs_session_init(v9ses, dev_name, data); 127 fid = v9fs_session_init(v9ses, dev_name, data);
123 if (IS_ERR(fid)) { 128 if (IS_ERR(fid)) {
@@ -149,7 +154,6 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
149 goto release_sb; 154 goto release_sb;
150 } 155 }
151 sb->s_root = root; 156 sb->s_root = root;
152
153 if (v9fs_proto_dotl(v9ses)) { 157 if (v9fs_proto_dotl(v9ses)) {
154 struct p9_stat_dotl *st = NULL; 158 struct p9_stat_dotl *st = NULL;
155 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC); 159 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
@@ -174,19 +178,21 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
174 p9stat_free(st); 178 p9stat_free(st);
175 kfree(st); 179 kfree(st);
176 } 180 }
177 181 retval = v9fs_get_acl(inode, fid);
182 if (retval)
183 goto release_sb;
178 v9fs_fid_add(root, fid); 184 v9fs_fid_add(root, fid);
179 185
180 P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); 186 P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
181 simple_set_mnt(mnt, sb); 187 return dget(sb->s_root);
182 return 0;
183 188
184clunk_fid: 189clunk_fid:
185 p9_client_clunk(fid); 190 p9_client_clunk(fid);
186close_session: 191close_session:
187 v9fs_session_close(v9ses); 192 v9fs_session_close(v9ses);
188 kfree(v9ses); 193 kfree(v9ses);
189 return retval; 194 return ERR_PTR(retval);
195
190release_sb: 196release_sb:
191 /* 197 /*
192 * we will do the session_close and root dentry release 198 * we will do the session_close and root dentry release
@@ -196,7 +202,7 @@ release_sb:
196 */ 202 */
197 p9_client_clunk(fid); 203 p9_client_clunk(fid);
198 deactivate_locked_super(sb); 204 deactivate_locked_super(sb);
199 return retval; 205 return ERR_PTR(retval);
200} 206}
201 207
202/** 208/**
@@ -249,7 +255,7 @@ static int v9fs_statfs(struct dentry *dentry, struct kstatfs *buf)
249 if (v9fs_proto_dotl(v9ses)) { 255 if (v9fs_proto_dotl(v9ses)) {
250 res = p9_client_statfs(fid, &rs); 256 res = p9_client_statfs(fid, &rs);
251 if (res == 0) { 257 if (res == 0) {
252 buf->f_type = rs.type; 258 buf->f_type = V9FS_MAGIC;
253 buf->f_bsize = rs.bsize; 259 buf->f_bsize = rs.bsize;
254 buf->f_blocks = rs.blocks; 260 buf->f_blocks = rs.blocks;
255 buf->f_bfree = rs.bfree; 261 buf->f_bfree = rs.bfree;
@@ -292,7 +298,7 @@ static const struct super_operations v9fs_super_ops_dotl = {
292 298
293struct file_system_type v9fs_fs_type = { 299struct file_system_type v9fs_fs_type = {
294 .name = "9p", 300 .name = "9p",
295 .get_sb = v9fs_get_sb, 301 .mount = v9fs_mount,
296 .kill_sb = v9fs_kill_super, 302 .kill_sb = v9fs_kill_super,
297 .owner = THIS_MODULE, 303 .owner = THIS_MODULE,
298 .fs_flags = FS_RENAME_DOES_D_MOVE, 304 .fs_flags = FS_RENAME_DOES_D_MOVE,
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index f88e5c2dc873..43ec7df84336 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -21,30 +21,13 @@
21#include "fid.h" 21#include "fid.h"
22#include "xattr.h" 22#include "xattr.h"
23 23
24/* 24ssize_t v9fs_fid_xattr_get(struct p9_fid *fid, const char *name,
25 * v9fs_xattr_get() 25 void *buffer, size_t buffer_size)
26 *
27 * Copy an extended attribute into the buffer
28 * provided, or compute the buffer size required.
29 * Buffer is NULL to compute the size of the buffer required.
30 *
31 * Returns a negative error number on failure, or the number of bytes
32 * used / required on success.
33 */
34ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name,
35 void *buffer, size_t buffer_size)
36{ 26{
37 ssize_t retval; 27 ssize_t retval;
38 int msize, read_count; 28 int msize, read_count;
39 u64 offset = 0, attr_size; 29 u64 offset = 0, attr_size;
40 struct p9_fid *fid, *attr_fid; 30 struct p9_fid *attr_fid;
41
42 P9_DPRINTK(P9_DEBUG_VFS, "%s: name = %s value_len = %zu\n",
43 __func__, name, buffer_size);
44
45 fid = v9fs_fid_lookup(dentry);
46 if (IS_ERR(fid))
47 return PTR_ERR(fid);
48 31
49 attr_fid = p9_client_xattrwalk(fid, name, &attr_size); 32 attr_fid = p9_client_xattrwalk(fid, name, &attr_size);
50 if (IS_ERR(attr_fid)) { 33 if (IS_ERR(attr_fid)) {
@@ -88,6 +71,31 @@ error:
88 71
89} 72}
90 73
74
75/*
76 * v9fs_xattr_get()
77 *
78 * Copy an extended attribute into the buffer
79 * provided, or compute the buffer size required.
80 * Buffer is NULL to compute the size of the buffer required.
81 *
82 * Returns a negative error number on failure, or the number of bytes
83 * used / required on success.
84 */
85ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name,
86 void *buffer, size_t buffer_size)
87{
88 struct p9_fid *fid;
89
90 P9_DPRINTK(P9_DEBUG_VFS, "%s: name = %s value_len = %zu\n",
91 __func__, name, buffer_size);
92 fid = v9fs_fid_lookup(dentry);
93 if (IS_ERR(fid))
94 return PTR_ERR(fid);
95
96 return v9fs_fid_xattr_get(fid, name, buffer, buffer_size);
97}
98
91/* 99/*
92 * v9fs_xattr_set() 100 * v9fs_xattr_set()
93 * 101 *
@@ -156,5 +164,9 @@ ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
156 164
157const struct xattr_handler *v9fs_xattr_handlers[] = { 165const struct xattr_handler *v9fs_xattr_handlers[] = {
158 &v9fs_xattr_user_handler, 166 &v9fs_xattr_user_handler,
167#ifdef CONFIG_9P_FS_POSIX_ACL
168 &v9fs_xattr_acl_access_handler,
169 &v9fs_xattr_acl_default_handler,
170#endif
159 NULL 171 NULL
160}; 172};
diff --git a/fs/9p/xattr.h b/fs/9p/xattr.h
index 9ddf672ae5c4..eaa837c53bd5 100644
--- a/fs/9p/xattr.h
+++ b/fs/9p/xattr.h
@@ -15,10 +15,16 @@
15#define FS_9P_XATTR_H 15#define FS_9P_XATTR_H
16 16
17#include <linux/xattr.h> 17#include <linux/xattr.h>
18#include <net/9p/9p.h>
19#include <net/9p/client.h>
18 20
19extern const struct xattr_handler *v9fs_xattr_handlers[]; 21extern const struct xattr_handler *v9fs_xattr_handlers[];
20extern struct xattr_handler v9fs_xattr_user_handler; 22extern struct xattr_handler v9fs_xattr_user_handler;
23extern const struct xattr_handler v9fs_xattr_acl_access_handler;
24extern const struct xattr_handler v9fs_xattr_acl_default_handler;
21 25
26extern ssize_t v9fs_fid_xattr_get(struct p9_fid *, const char *,
27 void *, size_t);
22extern ssize_t v9fs_xattr_get(struct dentry *, const char *, 28extern ssize_t v9fs_xattr_get(struct dentry *, const char *,
23 void *, size_t); 29 void *, size_t);
24extern int v9fs_xattr_set(struct dentry *, const char *, 30extern int v9fs_xattr_set(struct dentry *, const char *,
diff --git a/fs/Kconfig b/fs/Kconfig
index 97673c955484..771f457402d4 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -62,7 +62,6 @@ source "fs/notify/Kconfig"
62 62
63source "fs/quota/Kconfig" 63source "fs/quota/Kconfig"
64 64
65source "fs/autofs/Kconfig"
66source "fs/autofs4/Kconfig" 65source "fs/autofs4/Kconfig"
67source "fs/fuse/Kconfig" 66source "fs/fuse/Kconfig"
68 67
@@ -234,7 +233,6 @@ config NFS_COMMON
234 default y 233 default y
235 234
236source "net/sunrpc/Kconfig" 235source "net/sunrpc/Kconfig"
237source "fs/smbfs/Kconfig"
238source "fs/ceph/Kconfig" 236source "fs/ceph/Kconfig"
239source "fs/cifs/Kconfig" 237source "fs/cifs/Kconfig"
240source "fs/ncpfs/Kconfig" 238source "fs/ncpfs/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index 26956fcec917..a7f7cef0c0c8 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -88,7 +88,6 @@ obj-$(CONFIG_NFSD) += nfsd/
88obj-$(CONFIG_LOCKD) += lockd/ 88obj-$(CONFIG_LOCKD) += lockd/
89obj-$(CONFIG_NLS) += nls/ 89obj-$(CONFIG_NLS) += nls/
90obj-$(CONFIG_SYSV_FS) += sysv/ 90obj-$(CONFIG_SYSV_FS) += sysv/
91obj-$(CONFIG_SMB_FS) += smbfs/
92obj-$(CONFIG_CIFS) += cifs/ 91obj-$(CONFIG_CIFS) += cifs/
93obj-$(CONFIG_NCP_FS) += ncpfs/ 92obj-$(CONFIG_NCP_FS) += ncpfs/
94obj-$(CONFIG_HPFS_FS) += hpfs/ 93obj-$(CONFIG_HPFS_FS) += hpfs/
@@ -101,7 +100,6 @@ obj-$(CONFIG_UBIFS_FS) += ubifs/
101obj-$(CONFIG_AFFS_FS) += affs/ 100obj-$(CONFIG_AFFS_FS) += affs/
102obj-$(CONFIG_ROMFS_FS) += romfs/ 101obj-$(CONFIG_ROMFS_FS) += romfs/
103obj-$(CONFIG_QNX4FS_FS) += qnx4/ 102obj-$(CONFIG_QNX4FS_FS) += qnx4/
104obj-$(CONFIG_AUTOFS_FS) += autofs/
105obj-$(CONFIG_AUTOFS4_FS) += autofs4/ 103obj-$(CONFIG_AUTOFS4_FS) += autofs4/
106obj-$(CONFIG_ADFS_FS) += adfs/ 104obj-$(CONFIG_ADFS_FS) += adfs/
107obj-$(CONFIG_FUSE_FS) += fuse/ 105obj-$(CONFIG_FUSE_FS) += fuse/
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index d9803f73236f..959dbff2d42d 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -490,17 +490,16 @@ error:
490 return -EINVAL; 490 return -EINVAL;
491} 491}
492 492
493static int adfs_get_sb(struct file_system_type *fs_type, 493static struct dentry *adfs_mount(struct file_system_type *fs_type,
494 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 494 int flags, const char *dev_name, void *data)
495{ 495{
496 return get_sb_bdev(fs_type, flags, dev_name, data, adfs_fill_super, 496 return mount_bdev(fs_type, flags, dev_name, data, adfs_fill_super);
497 mnt);
498} 497}
499 498
500static struct file_system_type adfs_fs_type = { 499static struct file_system_type adfs_fs_type = {
501 .owner = THIS_MODULE, 500 .owner = THIS_MODULE,
502 .name = "adfs", 501 .name = "adfs",
503 .get_sb = adfs_get_sb, 502 .mount = adfs_mount,
504 .kill_sb = kill_block_super, 503 .kill_sb = kill_block_super,
505 .fs_flags = FS_REQUIRES_DEV, 504 .fs_flags = FS_REQUIRES_DEV,
506}; 505};
diff --git a/fs/affs/super.c b/fs/affs/super.c
index fa4fbe1e238a..0cf7f4384cbd 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -573,17 +573,16 @@ affs_statfs(struct dentry *dentry, struct kstatfs *buf)
573 return 0; 573 return 0;
574} 574}
575 575
576static int affs_get_sb(struct file_system_type *fs_type, 576static struct dentry *affs_mount(struct file_system_type *fs_type,
577 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 577 int flags, const char *dev_name, void *data)
578{ 578{
579 return get_sb_bdev(fs_type, flags, dev_name, data, affs_fill_super, 579 return mount_bdev(fs_type, flags, dev_name, data, affs_fill_super);
580 mnt);
581} 580}
582 581
583static struct file_system_type affs_fs_type = { 582static struct file_system_type affs_fs_type = {
584 .owner = THIS_MODULE, 583 .owner = THIS_MODULE,
585 .name = "affs", 584 .name = "affs",
586 .get_sb = affs_get_sb, 585 .mount = affs_mount,
587 .kill_sb = kill_block_super, 586 .kill_sb = kill_block_super,
588 .fs_flags = FS_REQUIRES_DEV, 587 .fs_flags = FS_REQUIRES_DEV,
589}; 588};
diff --git a/fs/afs/super.c b/fs/afs/super.c
index eacf76d98ae0..27201cffece4 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -29,9 +29,8 @@
29#define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */ 29#define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */
30 30
31static void afs_i_init_once(void *foo); 31static void afs_i_init_once(void *foo);
32static int afs_get_sb(struct file_system_type *fs_type, 32static struct dentry *afs_mount(struct file_system_type *fs_type,
33 int flags, const char *dev_name, 33 int flags, const char *dev_name, void *data);
34 void *data, struct vfsmount *mnt);
35static struct inode *afs_alloc_inode(struct super_block *sb); 34static struct inode *afs_alloc_inode(struct super_block *sb);
36static void afs_put_super(struct super_block *sb); 35static void afs_put_super(struct super_block *sb);
37static void afs_destroy_inode(struct inode *inode); 36static void afs_destroy_inode(struct inode *inode);
@@ -40,7 +39,7 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf);
40struct file_system_type afs_fs_type = { 39struct file_system_type afs_fs_type = {
41 .owner = THIS_MODULE, 40 .owner = THIS_MODULE,
42 .name = "afs", 41 .name = "afs",
43 .get_sb = afs_get_sb, 42 .mount = afs_mount,
44 .kill_sb = kill_anon_super, 43 .kill_sb = kill_anon_super,
45 .fs_flags = 0, 44 .fs_flags = 0,
46}; 45};
@@ -359,11 +358,8 @@ error:
359/* 358/*
360 * get an AFS superblock 359 * get an AFS superblock
361 */ 360 */
362static int afs_get_sb(struct file_system_type *fs_type, 361static struct dentry *afs_mount(struct file_system_type *fs_type,
363 int flags, 362 int flags, const char *dev_name, void *options)
364 const char *dev_name,
365 void *options,
366 struct vfsmount *mnt)
367{ 363{
368 struct afs_mount_params params; 364 struct afs_mount_params params;
369 struct super_block *sb; 365 struct super_block *sb;
@@ -427,12 +423,11 @@ static int afs_get_sb(struct file_system_type *fs_type,
427 ASSERTCMP(sb->s_flags, &, MS_ACTIVE); 423 ASSERTCMP(sb->s_flags, &, MS_ACTIVE);
428 } 424 }
429 425
430 simple_set_mnt(mnt, sb);
431 afs_put_volume(params.volume); 426 afs_put_volume(params.volume);
432 afs_put_cell(params.cell); 427 afs_put_cell(params.cell);
433 kfree(new_opts); 428 kfree(new_opts);
434 _leave(" = 0 [%p]", sb); 429 _leave(" = 0 [%p]", sb);
435 return 0; 430 return dget(sb->s_root);
436 431
437error: 432error:
438 afs_put_volume(params.volume); 433 afs_put_volume(params.volume);
@@ -440,7 +435,7 @@ error:
440 key_put(params.key); 435 key_put(params.key);
441 kfree(new_opts); 436 kfree(new_opts);
442 _leave(" = %d", ret); 437 _leave(" = %d", ret);
443 return ret; 438 return ERR_PTR(ret);
444} 439}
445 440
446/* 441/*
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 5365527ca43f..57ce55b2564c 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -26,12 +26,10 @@ static struct vfsmount *anon_inode_mnt __read_mostly;
26static struct inode *anon_inode_inode; 26static struct inode *anon_inode_inode;
27static const struct file_operations anon_inode_fops; 27static const struct file_operations anon_inode_fops;
28 28
29static int anon_inodefs_get_sb(struct file_system_type *fs_type, int flags, 29static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
30 const char *dev_name, void *data, 30 int flags, const char *dev_name, void *data)
31 struct vfsmount *mnt)
32{ 31{
33 return get_sb_pseudo(fs_type, "anon_inode:", NULL, ANON_INODE_FS_MAGIC, 32 return mount_pseudo(fs_type, "anon_inode:", NULL, ANON_INODE_FS_MAGIC);
34 mnt);
35} 33}
36 34
37/* 35/*
@@ -45,7 +43,7 @@ static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen)
45 43
46static struct file_system_type anon_inode_fs_type = { 44static struct file_system_type anon_inode_fs_type = {
47 .name = "anon_inodefs", 45 .name = "anon_inodefs",
48 .get_sb = anon_inodefs_get_sb, 46 .mount = anon_inodefs_mount,
49 .kill_sb = kill_anon_super, 47 .kill_sb = kill_anon_super,
50}; 48};
51static const struct dentry_operations anon_inodefs_dentry_operations = { 49static const struct dentry_operations anon_inodefs_dentry_operations = {
diff --git a/fs/autofs/Kconfig b/fs/autofs/Kconfig
deleted file mode 100644
index 480e210c83ab..000000000000
--- a/fs/autofs/Kconfig
+++ /dev/null
@@ -1,22 +0,0 @@
1config AUTOFS_FS
2 tristate "Kernel automounter support"
3 depends on BKL # unfixable, just use autofs4
4 help
5 The automounter is a tool to automatically mount remote file systems
6 on demand. This implementation is partially kernel-based to reduce
7 overhead in the already-mounted case; this is unlike the BSD
8 automounter (amd), which is a pure user space daemon.
9
10 To use the automounter you need the user-space tools from the autofs
11 package; you can find the location in <file:Documentation/Changes>.
12 You also want to answer Y to "NFS file system support", below.
13
14 If you want to use the newer version of the automounter with more
15 features, say N here and say Y to "Kernel automounter v4 support",
16 below.
17
18 To compile this support as a module, choose M here: the module will be
19 called autofs.
20
21 If you are not a part of a fairly large, distributed network, you
22 probably do not need an automounter, and can say N here.
diff --git a/fs/autofs/Makefile b/fs/autofs/Makefile
deleted file mode 100644
index 453a60f46d05..000000000000
--- a/fs/autofs/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
1#
2# Makefile for the linux autofs-filesystem routines.
3#
4
5obj-$(CONFIG_AUTOFS_FS) += autofs.o
6
7autofs-objs := dirhash.o init.o inode.o root.o symlink.o waitq.o
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
deleted file mode 100644
index 901a3e67ec45..000000000000
--- a/fs/autofs/autofs_i.h
+++ /dev/null
@@ -1,165 +0,0 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * linux/fs/autofs/autofs_i.h
4 *
5 * Copyright 1997-1998 Transmeta Corporation - All Rights Reserved
6 *
7 * This file is part of the Linux kernel and is made available under
8 * the terms of the GNU General Public License, version 2, or at your
9 * option, any later version, incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/* Internal header file for autofs */
14
15#include <linux/auto_fs.h>
16
17/* This is the range of ioctl() numbers we claim as ours */
18#define AUTOFS_IOC_FIRST AUTOFS_IOC_READY
19#define AUTOFS_IOC_COUNT 32
20
21#include <linux/kernel.h>
22#include <linux/slab.h>
23#include <linux/time.h>
24#include <linux/string.h>
25#include <linux/wait.h>
26#include <linux/dcache.h>
27#include <linux/namei.h>
28#include <linux/mount.h>
29#include <linux/sched.h>
30
31#include <asm/current.h>
32#include <asm/uaccess.h>
33
34#ifdef DEBUG
35#define DPRINTK(D) (printk D)
36#else
37#define DPRINTK(D) ((void)0)
38#endif
39
40/*
41 * If the daemon returns a negative response (AUTOFS_IOC_FAIL) then the
42 * kernel will keep the negative response cached for up to the time given
43 * here, although the time can be shorter if the kernel throws the dcache
44 * entry away. This probably should be settable from user space.
45 */
46#define AUTOFS_NEGATIVE_TIMEOUT (60*HZ) /* 1 minute */
47
48/* Structures associated with the root directory hash table */
49
50#define AUTOFS_HASH_SIZE 67
51
52struct autofs_dir_ent {
53 int hash;
54 char *name;
55 int len;
56 ino_t ino;
57 struct dentry *dentry;
58 /* Linked list of entries */
59 struct autofs_dir_ent *next;
60 struct autofs_dir_ent **back;
61 /* The following entries are for the expiry system */
62 unsigned long last_usage;
63 struct list_head exp;
64};
65
66struct autofs_dirhash {
67 struct autofs_dir_ent *h[AUTOFS_HASH_SIZE];
68 struct list_head expiry_head;
69};
70
71struct autofs_wait_queue {
72 wait_queue_head_t queue;
73 struct autofs_wait_queue *next;
74 autofs_wqt_t wait_queue_token;
75 /* We use the following to see what we are waiting for */
76 int hash;
77 int len;
78 char *name;
79 /* This is for status reporting upon return */
80 int status;
81 int wait_ctr;
82};
83
84struct autofs_symlink {
85 char *data;
86 int len;
87 time_t mtime;
88};
89
90#define AUTOFS_MAX_SYMLINKS 256
91
92#define AUTOFS_ROOT_INO 1
93#define AUTOFS_FIRST_SYMLINK 2
94#define AUTOFS_FIRST_DIR_INO (AUTOFS_FIRST_SYMLINK+AUTOFS_MAX_SYMLINKS)
95
96#define AUTOFS_SYMLINK_BITMAP_LEN \
97 ((AUTOFS_MAX_SYMLINKS+((sizeof(long)*1)-1))/(sizeof(long)*8))
98
99#define AUTOFS_SBI_MAGIC 0x6d4a556d
100
101struct autofs_sb_info {
102 u32 magic;
103 struct file *pipe;
104 struct pid *oz_pgrp;
105 int catatonic;
106 struct super_block *sb;
107 unsigned long exp_timeout;
108 ino_t next_dir_ino;
109 struct autofs_wait_queue *queues; /* Wait queue pointer */
110 struct autofs_dirhash dirhash; /* Root directory hash */
111 struct autofs_symlink symlink[AUTOFS_MAX_SYMLINKS];
112 unsigned long symlink_bitmap[AUTOFS_SYMLINK_BITMAP_LEN];
113};
114
115static inline struct autofs_sb_info *autofs_sbi(struct super_block *sb)
116{
117 return (struct autofs_sb_info *)(sb->s_fs_info);
118}
119
120/* autofs_oz_mode(): do we see the man behind the curtain? (The
121 processes which do manipulations for us in user space sees the raw
122 filesystem without "magic".) */
123
124static inline int autofs_oz_mode(struct autofs_sb_info *sbi) {
125 return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp;
126}
127
128/* Hash operations */
129
130void autofs_initialize_hash(struct autofs_dirhash *);
131struct autofs_dir_ent *autofs_hash_lookup(const struct autofs_dirhash *,struct qstr *);
132void autofs_hash_insert(struct autofs_dirhash *,struct autofs_dir_ent *);
133void autofs_hash_delete(struct autofs_dir_ent *);
134struct autofs_dir_ent *autofs_hash_enum(const struct autofs_dirhash *,off_t *,struct autofs_dir_ent *);
135void autofs_hash_dputall(struct autofs_dirhash *);
136void autofs_hash_nuke(struct autofs_sb_info *);
137
138/* Expiration-handling functions */
139
140void autofs_update_usage(struct autofs_dirhash *,struct autofs_dir_ent *);
141struct autofs_dir_ent *autofs_expire(struct super_block *,struct autofs_sb_info *, struct vfsmount *mnt);
142
143/* Operations structures */
144
145extern const struct inode_operations autofs_root_inode_operations;
146extern const struct inode_operations autofs_symlink_inode_operations;
147extern const struct file_operations autofs_root_operations;
148
149/* Initializing function */
150
151int autofs_fill_super(struct super_block *, void *, int);
152void autofs_kill_sb(struct super_block *sb);
153struct inode *autofs_iget(struct super_block *, unsigned long);
154
155/* Queue management functions */
156
157int autofs_wait(struct autofs_sb_info *,struct qstr *);
158int autofs_wait_release(struct autofs_sb_info *,autofs_wqt_t,int);
159void autofs_catatonic_mode(struct autofs_sb_info *);
160
161#ifdef DEBUG
162void autofs_say(const char *name, int len);
163#else
164#define autofs_say(n,l) ((void)0)
165#endif
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c
deleted file mode 100644
index e947915109e5..000000000000
--- a/fs/autofs/dirhash.c
+++ /dev/null
@@ -1,250 +0,0 @@
1/* -*- linux-c -*- --------------------------------------------------------- *
2 *
3 * linux/fs/autofs/dirhash.c
4 *
5 * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
6 *
7 * This file is part of the Linux kernel and is made available under
8 * the terms of the GNU General Public License, version 2, or at your
9 * option, any later version, incorporated herein by reference.
10 *
11 * ------------------------------------------------------------------------- */
12
13#include "autofs_i.h"
14
15/* Functions for maintenance of expiry queue */
16
17static void autofs_init_usage(struct autofs_dirhash *dh,
18 struct autofs_dir_ent *ent)
19{
20 list_add_tail(&ent->exp, &dh->expiry_head);
21 ent->last_usage = jiffies;
22}
23
24static void autofs_delete_usage(struct autofs_dir_ent *ent)
25{
26 list_del(&ent->exp);
27}
28
29void autofs_update_usage(struct autofs_dirhash *dh,
30 struct autofs_dir_ent *ent)
31{
32 autofs_delete_usage(ent); /* Unlink from current position */
33 autofs_init_usage(dh,ent); /* Relink at queue tail */
34}
35
36struct autofs_dir_ent *autofs_expire(struct super_block *sb,
37 struct autofs_sb_info *sbi,
38 struct vfsmount *mnt)
39{
40 struct autofs_dirhash *dh = &sbi->dirhash;
41 struct autofs_dir_ent *ent;
42 unsigned long timeout = sbi->exp_timeout;
43
44 while (1) {
45 struct path path;
46 int umount_ok;
47
48 if ( list_empty(&dh->expiry_head) || sbi->catatonic )
49 return NULL; /* No entries */
50 /* We keep the list sorted by last_usage and want old stuff */
51 ent = list_entry(dh->expiry_head.next, struct autofs_dir_ent, exp);
52 if (jiffies - ent->last_usage < timeout)
53 break;
54 /* Move to end of list in case expiry isn't desirable */
55 autofs_update_usage(dh, ent);
56
57 /* Check to see that entry is expirable */
58 if ( ent->ino < AUTOFS_FIRST_DIR_INO )
59 return ent; /* Symlinks are always expirable */
60
61 /* Get the dentry for the autofs subdirectory */
62 path.dentry = ent->dentry;
63
64 if (!path.dentry) {
65 /* Should only happen in catatonic mode */
66 printk("autofs: dentry == NULL but inode range is directory, entry %s\n", ent->name);
67 autofs_delete_usage(ent);
68 continue;
69 }
70
71 if (!path.dentry->d_inode) {
72 dput(path.dentry);
73 printk("autofs: negative dentry on expiry queue: %s\n",
74 ent->name);
75 autofs_delete_usage(ent);
76 continue;
77 }
78
79 /* Make sure entry is mounted and unused; note that dentry will
80 point to the mounted-on-top root. */
81 if (!S_ISDIR(path.dentry->d_inode->i_mode) ||
82 !d_mountpoint(path.dentry)) {
83 DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name));
84 continue;
85 }
86 path.mnt = mnt;
87 path_get(&path);
88 if (!follow_down(&path)) {
89 path_put(&path);
90 DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name));
91 continue;
92 }
93 while (d_mountpoint(path.dentry) && follow_down(&path))
94 ;
95 umount_ok = may_umount(path.mnt);
96 path_put(&path);
97
98 if (umount_ok) {
99 DPRINTK(("autofs: signaling expire on %s\n", ent->name));
100 return ent; /* Expirable! */
101 }
102 DPRINTK(("autofs: didn't expire due to may_umount: %s\n", ent->name));
103 }
104 return NULL; /* No expirable entries */
105}
106
107void autofs_initialize_hash(struct autofs_dirhash *dh) {
108 memset(&dh->h, 0, AUTOFS_HASH_SIZE*sizeof(struct autofs_dir_ent *));
109 INIT_LIST_HEAD(&dh->expiry_head);
110}
111
112struct autofs_dir_ent *autofs_hash_lookup(const struct autofs_dirhash *dh, struct qstr *name)
113{
114 struct autofs_dir_ent *dhn;
115
116 DPRINTK(("autofs_hash_lookup: hash = 0x%08x, name = ", name->hash));
117 autofs_say(name->name,name->len);
118
119 for ( dhn = dh->h[(unsigned) name->hash % AUTOFS_HASH_SIZE] ; dhn ; dhn = dhn->next ) {
120 if ( name->hash == dhn->hash &&
121 name->len == dhn->len &&
122 !memcmp(name->name, dhn->name, name->len) )
123 break;
124 }
125
126 return dhn;
127}
128
129void autofs_hash_insert(struct autofs_dirhash *dh, struct autofs_dir_ent *ent)
130{
131 struct autofs_dir_ent **dhnp;
132
133 DPRINTK(("autofs_hash_insert: hash = 0x%08x, name = ", ent->hash));
134 autofs_say(ent->name,ent->len);
135
136 autofs_init_usage(dh,ent);
137 if (ent->dentry)
138 dget(ent->dentry);
139
140 dhnp = &dh->h[(unsigned) ent->hash % AUTOFS_HASH_SIZE];
141 ent->next = *dhnp;
142 ent->back = dhnp;
143 *dhnp = ent;
144 if ( ent->next )
145 ent->next->back = &(ent->next);
146}
147
148void autofs_hash_delete(struct autofs_dir_ent *ent)
149{
150 *(ent->back) = ent->next;
151 if ( ent->next )
152 ent->next->back = ent->back;
153
154 autofs_delete_usage(ent);
155
156 if ( ent->dentry )
157 dput(ent->dentry);
158 kfree(ent->name);
159 kfree(ent);
160}
161
162/*
163 * Used by readdir(). We must validate "ptr", so we can't simply make it
164 * a pointer. Values below 0xffff are reserved; calling with any value
165 * <= 0x10000 will return the first entry found.
166 *
167 * "last" can be NULL or the value returned by the last search *if* we
168 * want the next sequential entry.
169 */
170struct autofs_dir_ent *autofs_hash_enum(const struct autofs_dirhash *dh,
171 off_t *ptr, struct autofs_dir_ent *last)
172{
173 int bucket, ecount, i;
174 struct autofs_dir_ent *ent;
175
176 bucket = (*ptr >> 16) - 1;
177 ecount = *ptr & 0xffff;
178
179 if ( bucket < 0 ) {
180 bucket = ecount = 0;
181 }
182
183 DPRINTK(("autofs_hash_enum: bucket %d, entry %d\n", bucket, ecount));
184
185 ent = last ? last->next : NULL;
186
187 if ( ent ) {
188 ecount++;
189 } else {
190 while ( bucket < AUTOFS_HASH_SIZE ) {
191 ent = dh->h[bucket];
192 for ( i = ecount ; ent && i ; i-- )
193 ent = ent->next;
194
195 if (ent) {
196 ecount++; /* Point to *next* entry */
197 break;
198 }
199
200 bucket++; ecount = 0;
201 }
202 }
203
204#ifdef DEBUG
205 if ( !ent )
206 printk("autofs_hash_enum: nothing found\n");
207 else {
208 printk("autofs_hash_enum: found hash %08x, name", ent->hash);
209 autofs_say(ent->name,ent->len);
210 }
211#endif
212
213 *ptr = ((bucket+1) << 16) + ecount;
214 return ent;
215}
216
217/* Iterate over all the ents, and remove all dentry pointers. Used on
218 entering catatonic mode, in order to make the filesystem unmountable. */
219void autofs_hash_dputall(struct autofs_dirhash *dh)
220{
221 int i;
222 struct autofs_dir_ent *ent;
223
224 for ( i = 0 ; i < AUTOFS_HASH_SIZE ; i++ ) {
225 for ( ent = dh->h[i] ; ent ; ent = ent->next ) {
226 if ( ent->dentry ) {
227 dput(ent->dentry);
228 ent->dentry = NULL;
229 }
230 }
231 }
232}
233
234/* Delete everything. This is used on filesystem destruction, so we
235 make no attempt to keep the pointers valid */
236void autofs_hash_nuke(struct autofs_sb_info *sbi)
237{
238 int i;
239 struct autofs_dir_ent *ent, *nent;
240
241 for ( i = 0 ; i < AUTOFS_HASH_SIZE ; i++ ) {
242 for ( ent = sbi->dirhash.h[i] ; ent ; ent = nent ) {
243 nent = ent->next;
244 if ( ent->dentry )
245 dput(ent->dentry);
246 kfree(ent->name);
247 kfree(ent);
248 }
249 }
250}
diff --git a/fs/autofs/init.c b/fs/autofs/init.c
deleted file mode 100644
index cea5219b4f37..000000000000
--- a/fs/autofs/init.c
+++ /dev/null
@@ -1,52 +0,0 @@
1/* -*- linux-c -*- --------------------------------------------------------- *
2 *
3 * linux/fs/autofs/init.c
4 *
5 * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
6 *
7 * This file is part of the Linux kernel and is made available under
8 * the terms of the GNU General Public License, version 2, or at your
9 * option, any later version, incorporated herein by reference.
10 *
11 * ------------------------------------------------------------------------- */
12
13#include <linux/module.h>
14#include <linux/init.h>
15#include "autofs_i.h"
16
17static int autofs_get_sb(struct file_system_type *fs_type,
18 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
19{
20 return get_sb_nodev(fs_type, flags, data, autofs_fill_super, mnt);
21}
22
23static struct file_system_type autofs_fs_type = {
24 .owner = THIS_MODULE,
25 .name = "autofs",
26 .get_sb = autofs_get_sb,
27 .kill_sb = autofs_kill_sb,
28};
29
30static int __init init_autofs_fs(void)
31{
32 return register_filesystem(&autofs_fs_type);
33}
34
35static void __exit exit_autofs_fs(void)
36{
37 unregister_filesystem(&autofs_fs_type);
38}
39
40module_init(init_autofs_fs);
41module_exit(exit_autofs_fs);
42
43#ifdef DEBUG
44void autofs_say(const char *name, int len)
45{
46 printk("(%d: ", len);
47 while ( len-- )
48 printk("%c", *name++);
49 printk(")\n");
50}
51#endif
52MODULE_LICENSE("GPL");
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
deleted file mode 100644
index e1734f2d6e26..000000000000
--- a/fs/autofs/inode.c
+++ /dev/null
@@ -1,288 +0,0 @@
1/* -*- linux-c -*- --------------------------------------------------------- *
2 *
3 * linux/fs/autofs/inode.c
4 *
5 * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
6 *
7 * This file is part of the Linux kernel and is made available under
8 * the terms of the GNU General Public License, version 2, or at your
9 * option, any later version, incorporated herein by reference.
10 *
11 * ------------------------------------------------------------------------- */
12
13#include <linux/kernel.h>
14#include <linux/mm.h>
15#include <linux/slab.h>
16#include <linux/file.h>
17#include <linux/parser.h>
18#include <linux/bitops.h>
19#include <linux/magic.h>
20#include "autofs_i.h"
21#include <linux/module.h>
22
23void autofs_kill_sb(struct super_block *sb)
24{
25 struct autofs_sb_info *sbi = autofs_sbi(sb);
26 unsigned int n;
27
28 /*
29 * In the event of a failure in get_sb_nodev the superblock
30 * info is not present so nothing else has been setup, so
31 * just call kill_anon_super when we are called from
32 * deactivate_super.
33 */
34 if (!sbi)
35 goto out_kill_sb;
36
37 if (!sbi->catatonic)
38 autofs_catatonic_mode(sbi); /* Free wait queues, close pipe */
39
40 put_pid(sbi->oz_pgrp);
41
42 autofs_hash_nuke(sbi);
43 for (n = 0; n < AUTOFS_MAX_SYMLINKS; n++) {
44 if (test_bit(n, sbi->symlink_bitmap))
45 kfree(sbi->symlink[n].data);
46 }
47
48 kfree(sb->s_fs_info);
49
50out_kill_sb:
51 DPRINTK(("autofs: shutting down\n"));
52 kill_anon_super(sb);
53}
54
55static const struct super_operations autofs_sops = {
56 .statfs = simple_statfs,
57 .show_options = generic_show_options,
58};
59
60enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto};
61
62static const match_table_t autofs_tokens = {
63 {Opt_fd, "fd=%u"},
64 {Opt_uid, "uid=%u"},
65 {Opt_gid, "gid=%u"},
66 {Opt_pgrp, "pgrp=%u"},
67 {Opt_minproto, "minproto=%u"},
68 {Opt_maxproto, "maxproto=%u"},
69 {Opt_err, NULL}
70};
71
72static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
73 pid_t *pgrp, int *minproto, int *maxproto)
74{
75 char *p;
76 substring_t args[MAX_OPT_ARGS];
77 int option;
78
79 *uid = current_uid();
80 *gid = current_gid();
81 *pgrp = task_pgrp_nr(current);
82
83 *minproto = *maxproto = AUTOFS_PROTO_VERSION;
84
85 *pipefd = -1;
86
87 if (!options)
88 return 1;
89
90 while ((p = strsep(&options, ",")) != NULL) {
91 int token;
92 if (!*p)
93 continue;
94
95 token = match_token(p, autofs_tokens, args);
96 switch (token) {
97 case Opt_fd:
98 if (match_int(&args[0], &option))
99 return 1;
100 *pipefd = option;
101 break;
102 case Opt_uid:
103 if (match_int(&args[0], &option))
104 return 1;
105 *uid = option;
106 break;
107 case Opt_gid:
108 if (match_int(&args[0], &option))
109 return 1;
110 *gid = option;
111 break;
112 case Opt_pgrp:
113 if (match_int(&args[0], &option))
114 return 1;
115 *pgrp = option;
116 break;
117 case Opt_minproto:
118 if (match_int(&args[0], &option))
119 return 1;
120 *minproto = option;
121 break;
122 case Opt_maxproto:
123 if (match_int(&args[0], &option))
124 return 1;
125 *maxproto = option;
126 break;
127 default:
128 return 1;
129 }
130 }
131 return (*pipefd < 0);
132}
133
134int autofs_fill_super(struct super_block *s, void *data, int silent)
135{
136 struct inode * root_inode;
137 struct dentry * root;
138 struct file * pipe;
139 int pipefd;
140 struct autofs_sb_info *sbi;
141 int minproto, maxproto;
142 pid_t pgid;
143
144 save_mount_options(s, data);
145
146 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
147 if (!sbi)
148 goto fail_unlock;
149 DPRINTK(("autofs: starting up, sbi = %p\n",sbi));
150
151 s->s_fs_info = sbi;
152 sbi->magic = AUTOFS_SBI_MAGIC;
153 sbi->pipe = NULL;
154 sbi->catatonic = 1;
155 sbi->exp_timeout = 0;
156 autofs_initialize_hash(&sbi->dirhash);
157 sbi->queues = NULL;
158 memset(sbi->symlink_bitmap, 0, sizeof(long)*AUTOFS_SYMLINK_BITMAP_LEN);
159 sbi->next_dir_ino = AUTOFS_FIRST_DIR_INO;
160 s->s_blocksize = 1024;
161 s->s_blocksize_bits = 10;
162 s->s_magic = AUTOFS_SUPER_MAGIC;
163 s->s_op = &autofs_sops;
164 s->s_time_gran = 1;
165 sbi->sb = s;
166
167 root_inode = autofs_iget(s, AUTOFS_ROOT_INO);
168 if (IS_ERR(root_inode))
169 goto fail_free;
170 root = d_alloc_root(root_inode);
171 pipe = NULL;
172
173 if (!root)
174 goto fail_iput;
175
176 /* Can this call block? - WTF cares? s is locked. */
177 if (parse_options(data, &pipefd, &root_inode->i_uid,
178 &root_inode->i_gid, &pgid, &minproto,
179 &maxproto)) {
180 printk("autofs: called with bogus options\n");
181 goto fail_dput;
182 }
183
184 /* Couldn't this be tested earlier? */
185 if (minproto > AUTOFS_PROTO_VERSION ||
186 maxproto < AUTOFS_PROTO_VERSION) {
187 printk("autofs: kernel does not match daemon version\n");
188 goto fail_dput;
189 }
190
191 DPRINTK(("autofs: pipe fd = %d, pgrp = %u\n", pipefd, pgid));
192 sbi->oz_pgrp = find_get_pid(pgid);
193
194 if (!sbi->oz_pgrp) {
195 printk("autofs: could not find process group %d\n", pgid);
196 goto fail_dput;
197 }
198
199 pipe = fget(pipefd);
200
201 if (!pipe) {
202 printk("autofs: could not open pipe file descriptor\n");
203 goto fail_put_pid;
204 }
205
206 if (!pipe->f_op || !pipe->f_op->write)
207 goto fail_fput;
208 sbi->pipe = pipe;
209 sbi->catatonic = 0;
210
211 /*
212 * Success! Install the root dentry now to indicate completion.
213 */
214 s->s_root = root;
215 return 0;
216
217fail_fput:
218 printk("autofs: pipe file descriptor does not contain proper ops\n");
219 fput(pipe);
220fail_put_pid:
221 put_pid(sbi->oz_pgrp);
222fail_dput:
223 dput(root);
224 goto fail_free;
225fail_iput:
226 printk("autofs: get root dentry failed\n");
227 iput(root_inode);
228fail_free:
229 kfree(sbi);
230 s->s_fs_info = NULL;
231fail_unlock:
232 return -EINVAL;
233}
234
235struct inode *autofs_iget(struct super_block *sb, unsigned long ino)
236{
237 unsigned int n;
238 struct autofs_sb_info *sbi = autofs_sbi(sb);
239 struct inode *inode;
240
241 inode = iget_locked(sb, ino);
242 if (!inode)
243 return ERR_PTR(-ENOMEM);
244 if (!(inode->i_state & I_NEW))
245 return inode;
246
247 /* Initialize to the default case (stub directory) */
248
249 inode->i_op = &simple_dir_inode_operations;
250 inode->i_fop = &simple_dir_operations;
251 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
252 inode->i_nlink = 2;
253 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
254
255 if (ino == AUTOFS_ROOT_INO) {
256 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
257 inode->i_op = &autofs_root_inode_operations;
258 inode->i_fop = &autofs_root_operations;
259 goto done;
260 }
261
262 inode->i_uid = inode->i_sb->s_root->d_inode->i_uid;
263 inode->i_gid = inode->i_sb->s_root->d_inode->i_gid;
264
265 if (ino >= AUTOFS_FIRST_SYMLINK && ino < AUTOFS_FIRST_DIR_INO) {
266 /* Symlink inode - should be in symlink list */
267 struct autofs_symlink *sl;
268
269 n = ino - AUTOFS_FIRST_SYMLINK;
270 if (n >= AUTOFS_MAX_SYMLINKS || !test_bit(n,sbi->symlink_bitmap)) {
271 printk("autofs: Looking for bad symlink inode %u\n", (unsigned int) ino);
272 goto done;
273 }
274
275 inode->i_op = &autofs_symlink_inode_operations;
276 sl = &sbi->symlink[n];
277 inode->i_private = sl;
278 inode->i_mode = S_IFLNK | S_IRWXUGO;
279 inode->i_mtime.tv_sec = inode->i_ctime.tv_sec = sl->mtime;
280 inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0;
281 inode->i_size = sl->len;
282 inode->i_nlink = 1;
283 }
284
285done:
286 unlock_new_inode(inode);
287 return inode;
288}
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
deleted file mode 100644
index 0c4ca81aeaeb..000000000000
--- a/fs/autofs/root.c
+++ /dev/null
@@ -1,645 +0,0 @@
1/* -*- linux-c -*- --------------------------------------------------------- *
2 *
3 * linux/fs/autofs/root.c
4 *
5 * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
6 *
7 * This file is part of the Linux kernel and is made available under
8 * the terms of the GNU General Public License, version 2, or at your
9 * option, any later version, incorporated herein by reference.
10 *
11 * ------------------------------------------------------------------------- */
12
13#include <linux/capability.h>
14#include <linux/errno.h>
15#include <linux/stat.h>
16#include <linux/slab.h>
17#include <linux/param.h>
18#include <linux/time.h>
19#include <linux/compat.h>
20#include <linux/smp_lock.h>
21#include "autofs_i.h"
22
23static int autofs_root_readdir(struct file *,void *,filldir_t);
24static struct dentry *autofs_root_lookup(struct inode *,struct dentry *, struct nameidata *);
25static int autofs_root_symlink(struct inode *,struct dentry *,const char *);
26static int autofs_root_unlink(struct inode *,struct dentry *);
27static int autofs_root_rmdir(struct inode *,struct dentry *);
28static int autofs_root_mkdir(struct inode *,struct dentry *,int);
29static long autofs_root_ioctl(struct file *,unsigned int,unsigned long);
30#ifdef CONFIG_COMPAT
31static long autofs_root_compat_ioctl(struct file *,unsigned int,unsigned long);
32#endif
33
34const struct file_operations autofs_root_operations = {
35 .llseek = generic_file_llseek,
36 .read = generic_read_dir,
37 .readdir = autofs_root_readdir,
38 .unlocked_ioctl = autofs_root_ioctl,
39#ifdef CONFIG_COMPAT
40 .compat_ioctl = autofs_root_compat_ioctl,
41#endif
42};
43
44const struct inode_operations autofs_root_inode_operations = {
45 .lookup = autofs_root_lookup,
46 .unlink = autofs_root_unlink,
47 .symlink = autofs_root_symlink,
48 .mkdir = autofs_root_mkdir,
49 .rmdir = autofs_root_rmdir,
50};
51
52static int autofs_root_readdir(struct file *filp, void *dirent, filldir_t filldir)
53{
54 struct autofs_dir_ent *ent = NULL;
55 struct autofs_dirhash *dirhash;
56 struct autofs_sb_info *sbi;
57 struct inode * inode = filp->f_path.dentry->d_inode;
58 off_t onr, nr;
59
60 lock_kernel();
61
62 sbi = autofs_sbi(inode->i_sb);
63 dirhash = &sbi->dirhash;
64 nr = filp->f_pos;
65
66 switch(nr)
67 {
68 case 0:
69 if (filldir(dirent, ".", 1, nr, inode->i_ino, DT_DIR) < 0)
70 goto out;
71 filp->f_pos = ++nr;
72 /* fall through */
73 case 1:
74 if (filldir(dirent, "..", 2, nr, inode->i_ino, DT_DIR) < 0)
75 goto out;
76 filp->f_pos = ++nr;
77 /* fall through */
78 default:
79 while (onr = nr, ent = autofs_hash_enum(dirhash,&nr,ent)) {
80 if (!ent->dentry || d_mountpoint(ent->dentry)) {
81 if (filldir(dirent,ent->name,ent->len,onr,ent->ino,DT_UNKNOWN) < 0)
82 goto out;
83 filp->f_pos = nr;
84 }
85 }
86 break;
87 }
88
89out:
90 unlock_kernel();
91 return 0;
92}
93
94static int try_to_fill_dentry(struct dentry *dentry, struct super_block *sb, struct autofs_sb_info *sbi)
95{
96 struct inode * inode;
97 struct autofs_dir_ent *ent;
98 int status = 0;
99
100 if (!(ent = autofs_hash_lookup(&sbi->dirhash, &dentry->d_name))) {
101 do {
102 if (status && dentry->d_inode) {
103 if (status != -ENOENT)
104 printk("autofs warning: lookup failure on positive dentry, status = %d, name = %s\n", status, dentry->d_name.name);
105 return 0; /* Try to get the kernel to invalidate this dentry */
106 }
107
108 /* Turn this into a real negative dentry? */
109 if (status == -ENOENT) {
110 dentry->d_time = jiffies + AUTOFS_NEGATIVE_TIMEOUT;
111 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
112 return 1;
113 } else if (status) {
114 /* Return a negative dentry, but leave it "pending" */
115 return 1;
116 }
117 status = autofs_wait(sbi, &dentry->d_name);
118 } while (!(ent = autofs_hash_lookup(&sbi->dirhash, &dentry->d_name)));
119 }
120
121 /* Abuse this field as a pointer to the directory entry, used to
122 find the expire list pointers */
123 dentry->d_time = (unsigned long) ent;
124
125 if (!dentry->d_inode) {
126 inode = autofs_iget(sb, ent->ino);
127 if (IS_ERR(inode)) {
128 /* Failed, but leave pending for next time */
129 return 1;
130 }
131 dentry->d_inode = inode;
132 }
133
134 /* If this is a directory that isn't a mount point, bitch at the
135 daemon and fix it in user space */
136 if (S_ISDIR(dentry->d_inode->i_mode) && !d_mountpoint(dentry)) {
137 return !autofs_wait(sbi, &dentry->d_name);
138 }
139
140 /* We don't update the usages for the autofs daemon itself, this
141 is necessary for recursive autofs mounts */
142 if (!autofs_oz_mode(sbi)) {
143 autofs_update_usage(&sbi->dirhash,ent);
144 }
145
146 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
147 return 1;
148}
149
150
151/*
152 * Revalidate is called on every cache lookup. Some of those
153 * cache lookups may actually happen while the dentry is not
154 * yet completely filled in, and revalidate has to delay such
155 * lookups..
156 */
157static int autofs_revalidate(struct dentry * dentry, struct nameidata *nd)
158{
159 struct inode * dir;
160 struct autofs_sb_info *sbi;
161 struct autofs_dir_ent *ent;
162 int res;
163
164 lock_kernel();
165 dir = dentry->d_parent->d_inode;
166 sbi = autofs_sbi(dir->i_sb);
167
168 /* Pending dentry */
169 if (dentry->d_flags & DCACHE_AUTOFS_PENDING) {
170 if (autofs_oz_mode(sbi))
171 res = 1;
172 else
173 res = try_to_fill_dentry(dentry, dir->i_sb, sbi);
174 unlock_kernel();
175 return res;
176 }
177
178 /* Negative dentry.. invalidate if "old" */
179 if (!dentry->d_inode) {
180 unlock_kernel();
181 return (dentry->d_time - jiffies <= AUTOFS_NEGATIVE_TIMEOUT);
182 }
183
184 /* Check for a non-mountpoint directory */
185 if (S_ISDIR(dentry->d_inode->i_mode) && !d_mountpoint(dentry)) {
186 if (autofs_oz_mode(sbi))
187 res = 1;
188 else
189 res = try_to_fill_dentry(dentry, dir->i_sb, sbi);
190 unlock_kernel();
191 return res;
192 }
193
194 /* Update the usage list */
195 if (!autofs_oz_mode(sbi)) {
196 ent = (struct autofs_dir_ent *) dentry->d_time;
197 if (ent)
198 autofs_update_usage(&sbi->dirhash,ent);
199 }
200 unlock_kernel();
201 return 1;
202}
203
204static const struct dentry_operations autofs_dentry_operations = {
205 .d_revalidate = autofs_revalidate,
206};
207
208static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
209{
210 struct autofs_sb_info *sbi;
211 int oz_mode;
212
213 DPRINTK(("autofs_root_lookup: name = "));
214 lock_kernel();
215 autofs_say(dentry->d_name.name,dentry->d_name.len);
216
217 if (dentry->d_name.len > NAME_MAX) {
218 unlock_kernel();
219 return ERR_PTR(-ENAMETOOLONG);/* File name too long to exist */
220 }
221
222 sbi = autofs_sbi(dir->i_sb);
223
224 oz_mode = autofs_oz_mode(sbi);
225 DPRINTK(("autofs_lookup: pid = %u, pgrp = %u, catatonic = %d, "
226 "oz_mode = %d\n", task_pid_nr(current),
227 task_pgrp_nr(current), sbi->catatonic,
228 oz_mode));
229
230 /*
231 * Mark the dentry incomplete, but add it. This is needed so
232 * that the VFS layer knows about the dentry, and we can count
233 * on catching any lookups through the revalidate.
234 *
235 * Let all the hard work be done by the revalidate function that
236 * needs to be able to do this anyway..
237 *
238 * We need to do this before we release the directory semaphore.
239 */
240 dentry->d_op = &autofs_dentry_operations;
241 dentry->d_flags |= DCACHE_AUTOFS_PENDING;
242 d_add(dentry, NULL);
243
244 mutex_unlock(&dir->i_mutex);
245 autofs_revalidate(dentry, nd);
246 mutex_lock(&dir->i_mutex);
247
248 /*
249 * If we are still pending, check if we had to handle
250 * a signal. If so we can force a restart..
251 */
252 if (dentry->d_flags & DCACHE_AUTOFS_PENDING) {
253 /* See if we were interrupted */
254 if (signal_pending(current)) {
255 sigset_t *sigset = &current->pending.signal;
256 if (sigismember (sigset, SIGKILL) ||
257 sigismember (sigset, SIGQUIT) ||
258 sigismember (sigset, SIGINT)) {
259 unlock_kernel();
260 return ERR_PTR(-ERESTARTNOINTR);
261 }
262 }
263 }
264 unlock_kernel();
265
266 /*
267 * If this dentry is unhashed, then we shouldn't honour this
268 * lookup even if the dentry is positive. Returning ENOENT here
269 * doesn't do the right thing for all system calls, but it should
270 * be OK for the operations we permit from an autofs.
271 */
272 if (dentry->d_inode && d_unhashed(dentry))
273 return ERR_PTR(-ENOENT);
274
275 return NULL;
276}
277
278static int autofs_root_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
279{
280 struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb);
281 struct autofs_dirhash *dh = &sbi->dirhash;
282 struct autofs_dir_ent *ent;
283 unsigned int n;
284 int slsize;
285 struct autofs_symlink *sl;
286 struct inode *inode;
287
288 DPRINTK(("autofs_root_symlink: %s <- ", symname));
289 autofs_say(dentry->d_name.name,dentry->d_name.len);
290
291 lock_kernel();
292 if (!autofs_oz_mode(sbi)) {
293 unlock_kernel();
294 return -EACCES;
295 }
296
297 if (autofs_hash_lookup(dh, &dentry->d_name)) {
298 unlock_kernel();
299 return -EEXIST;
300 }
301
302 n = find_first_zero_bit(sbi->symlink_bitmap,AUTOFS_MAX_SYMLINKS);
303 if (n >= AUTOFS_MAX_SYMLINKS) {
304 unlock_kernel();
305 return -ENOSPC;
306 }
307
308 set_bit(n,sbi->symlink_bitmap);
309 sl = &sbi->symlink[n];
310 sl->len = strlen(symname);
311 sl->data = kmalloc(slsize = sl->len+1, GFP_KERNEL);
312 if (!sl->data) {
313 clear_bit(n,sbi->symlink_bitmap);
314 unlock_kernel();
315 return -ENOSPC;
316 }
317
318 ent = kmalloc(sizeof(struct autofs_dir_ent), GFP_KERNEL);
319 if (!ent) {
320 kfree(sl->data);
321 clear_bit(n,sbi->symlink_bitmap);
322 unlock_kernel();
323 return -ENOSPC;
324 }
325
326 ent->name = kmalloc(dentry->d_name.len+1, GFP_KERNEL);
327 if (!ent->name) {
328 kfree(sl->data);
329 kfree(ent);
330 clear_bit(n,sbi->symlink_bitmap);
331 unlock_kernel();
332 return -ENOSPC;
333 }
334
335 memcpy(sl->data,symname,slsize);
336 sl->mtime = get_seconds();
337
338 ent->ino = AUTOFS_FIRST_SYMLINK + n;
339 ent->hash = dentry->d_name.hash;
340 memcpy(ent->name, dentry->d_name.name, 1+(ent->len = dentry->d_name.len));
341 ent->dentry = NULL; /* We don't keep the dentry for symlinks */
342
343 autofs_hash_insert(dh,ent);
344
345 inode = autofs_iget(dir->i_sb, ent->ino);
346 if (IS_ERR(inode))
347 return PTR_ERR(inode);
348
349 d_instantiate(dentry, inode);
350 unlock_kernel();
351 return 0;
352}
353
354/*
355 * NOTE!
356 *
357 * Normal filesystems would do a "d_delete()" to tell the VFS dcache
358 * that the file no longer exists. However, doing that means that the
359 * VFS layer can turn the dentry into a negative dentry, which we
360 * obviously do not want (we're dropping the entry not because it
361 * doesn't exist, but because it has timed out).
362 *
363 * Also see autofs_root_rmdir()..
364 */
365static int autofs_root_unlink(struct inode *dir, struct dentry *dentry)
366{
367 struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb);
368 struct autofs_dirhash *dh = &sbi->dirhash;
369 struct autofs_dir_ent *ent;
370 unsigned int n;
371
372 /* This allows root to remove symlinks */
373 lock_kernel();
374 if (!autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) {
375 unlock_kernel();
376 return -EACCES;
377 }
378
379 ent = autofs_hash_lookup(dh, &dentry->d_name);
380 if (!ent) {
381 unlock_kernel();
382 return -ENOENT;
383 }
384
385 n = ent->ino - AUTOFS_FIRST_SYMLINK;
386 if (n >= AUTOFS_MAX_SYMLINKS) {
387 unlock_kernel();
388 return -EISDIR; /* It's a directory, dummy */
389 }
390 if (!test_bit(n,sbi->symlink_bitmap)) {
391 unlock_kernel();
392 return -EINVAL; /* Nonexistent symlink? Shouldn't happen */
393 }
394
395 dentry->d_time = (unsigned long)(struct autofs_dirhash *)NULL;
396 autofs_hash_delete(ent);
397 clear_bit(n,sbi->symlink_bitmap);
398 kfree(sbi->symlink[n].data);
399 d_drop(dentry);
400
401 unlock_kernel();
402 return 0;
403}
404
405static int autofs_root_rmdir(struct inode *dir, struct dentry *dentry)
406{
407 struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb);
408 struct autofs_dirhash *dh = &sbi->dirhash;
409 struct autofs_dir_ent *ent;
410
411 lock_kernel();
412 if (!autofs_oz_mode(sbi)) {
413 unlock_kernel();
414 return -EACCES;
415 }
416
417 ent = autofs_hash_lookup(dh, &dentry->d_name);
418 if (!ent) {
419 unlock_kernel();
420 return -ENOENT;
421 }
422
423 if ((unsigned int)ent->ino < AUTOFS_FIRST_DIR_INO) {
424 unlock_kernel();
425 return -ENOTDIR; /* Not a directory */
426 }
427
428 if (ent->dentry != dentry) {
429 printk("autofs_rmdir: odentry != dentry for entry %s\n", dentry->d_name.name);
430 }
431
432 dentry->d_time = (unsigned long)(struct autofs_dir_ent *)NULL;
433 autofs_hash_delete(ent);
434 drop_nlink(dir);
435 d_drop(dentry);
436 unlock_kernel();
437
438 return 0;
439}
440
441static int autofs_root_mkdir(struct inode *dir, struct dentry *dentry, int mode)
442{
443 struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb);
444 struct autofs_dirhash *dh = &sbi->dirhash;
445 struct autofs_dir_ent *ent;
446 struct inode *inode;
447 ino_t ino;
448
449 lock_kernel();
450 if (!autofs_oz_mode(sbi)) {
451 unlock_kernel();
452 return -EACCES;
453 }
454
455 ent = autofs_hash_lookup(dh, &dentry->d_name);
456 if (ent) {
457 unlock_kernel();
458 return -EEXIST;
459 }
460
461 if (sbi->next_dir_ino < AUTOFS_FIRST_DIR_INO) {
462 printk("autofs: Out of inode numbers -- what the heck did you do??\n");
463 unlock_kernel();
464 return -ENOSPC;
465 }
466 ino = sbi->next_dir_ino++;
467
468 ent = kmalloc(sizeof(struct autofs_dir_ent), GFP_KERNEL);
469 if (!ent) {
470 unlock_kernel();
471 return -ENOSPC;
472 }
473
474 ent->name = kmalloc(dentry->d_name.len+1, GFP_KERNEL);
475 if (!ent->name) {
476 kfree(ent);
477 unlock_kernel();
478 return -ENOSPC;
479 }
480
481 ent->hash = dentry->d_name.hash;
482 memcpy(ent->name, dentry->d_name.name, 1+(ent->len = dentry->d_name.len));
483 ent->ino = ino;
484 ent->dentry = dentry;
485 autofs_hash_insert(dh,ent);
486
487 inc_nlink(dir);
488
489 inode = autofs_iget(dir->i_sb, ino);
490 if (IS_ERR(inode)) {
491 drop_nlink(dir);
492 return PTR_ERR(inode);
493 }
494
495 d_instantiate(dentry, inode);
496 unlock_kernel();
497
498 return 0;
499}
500
501/* Get/set timeout ioctl() operation */
502#ifdef CONFIG_COMPAT
503static inline int autofs_compat_get_set_timeout(struct autofs_sb_info *sbi,
504 unsigned int __user *p)
505{
506 unsigned long ntimeout;
507
508 if (get_user(ntimeout, p) ||
509 put_user(sbi->exp_timeout / HZ, p))
510 return -EFAULT;
511
512 if (ntimeout > UINT_MAX/HZ)
513 sbi->exp_timeout = 0;
514 else
515 sbi->exp_timeout = ntimeout * HZ;
516
517 return 0;
518}
519#endif
520
521static inline int autofs_get_set_timeout(struct autofs_sb_info *sbi,
522 unsigned long __user *p)
523{
524 unsigned long ntimeout;
525
526 if (get_user(ntimeout, p) ||
527 put_user(sbi->exp_timeout / HZ, p))
528 return -EFAULT;
529
530 if (ntimeout > ULONG_MAX/HZ)
531 sbi->exp_timeout = 0;
532 else
533 sbi->exp_timeout = ntimeout * HZ;
534
535 return 0;
536}
537
538/* Return protocol version */
539static inline int autofs_get_protover(int __user *p)
540{
541 return put_user(AUTOFS_PROTO_VERSION, p);
542}
543
544/* Perform an expiry operation */
545static inline int autofs_expire_run(struct super_block *sb,
546 struct autofs_sb_info *sbi,
547 struct vfsmount *mnt,
548 struct autofs_packet_expire __user *pkt_p)
549{
550 struct autofs_dir_ent *ent;
551 struct autofs_packet_expire pkt;
552
553 memset(&pkt,0,sizeof pkt);
554
555 pkt.hdr.proto_version = AUTOFS_PROTO_VERSION;
556 pkt.hdr.type = autofs_ptype_expire;
557
558 if (!sbi->exp_timeout || !(ent = autofs_expire(sb,sbi,mnt)))
559 return -EAGAIN;
560
561 pkt.len = ent->len;
562 memcpy(pkt.name, ent->name, pkt.len);
563 pkt.name[pkt.len] = '\0';
564
565 if (copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)))
566 return -EFAULT;
567
568 return 0;
569}
570
571/*
572 * ioctl()'s on the root directory is the chief method for the daemon to
573 * generate kernel reactions
574 */
575static int autofs_do_root_ioctl(struct inode *inode, struct file *filp,
576 unsigned int cmd, unsigned long arg)
577{
578 struct autofs_sb_info *sbi = autofs_sbi(inode->i_sb);
579 void __user *argp = (void __user *)arg;
580
581 DPRINTK(("autofs_ioctl: cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u\n",cmd,arg,sbi,task_pgrp_nr(current)));
582
583 if (_IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) ||
584 _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT)
585 return -ENOTTY;
586
587 if (!autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN))
588 return -EPERM;
589
590 switch(cmd) {
591 case AUTOFS_IOC_READY: /* Wait queue: go ahead and retry */
592 return autofs_wait_release(sbi,(autofs_wqt_t)arg,0);
593 case AUTOFS_IOC_FAIL: /* Wait queue: fail with ENOENT */
594 return autofs_wait_release(sbi,(autofs_wqt_t)arg,-ENOENT);
595 case AUTOFS_IOC_CATATONIC: /* Enter catatonic mode (daemon shutdown) */
596 autofs_catatonic_mode(sbi);
597 return 0;
598 case AUTOFS_IOC_PROTOVER: /* Get protocol version */
599 return autofs_get_protover(argp);
600#ifdef CONFIG_COMPAT
601 case AUTOFS_IOC_SETTIMEOUT32:
602 return autofs_compat_get_set_timeout(sbi, argp);
603#endif
604 case AUTOFS_IOC_SETTIMEOUT:
605 return autofs_get_set_timeout(sbi, argp);
606 case AUTOFS_IOC_EXPIRE:
607 return autofs_expire_run(inode->i_sb, sbi, filp->f_path.mnt,
608 argp);
609 default:
610 return -ENOSYS;
611 }
612
613}
614
615static long autofs_root_ioctl(struct file *filp,
616 unsigned int cmd, unsigned long arg)
617{
618 int ret;
619
620 lock_kernel();
621 ret = autofs_do_root_ioctl(filp->f_path.dentry->d_inode,
622 filp, cmd, arg);
623 unlock_kernel();
624
625 return ret;
626}
627
628#ifdef CONFIG_COMPAT
629static long autofs_root_compat_ioctl(struct file *filp,
630 unsigned int cmd, unsigned long arg)
631{
632 struct inode *inode = filp->f_path.dentry->d_inode;
633 int ret;
634
635 lock_kernel();
636 if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL)
637 ret = autofs_do_root_ioctl(inode, filp, cmd, arg);
638 else
639 ret = autofs_do_root_ioctl(inode, filp, cmd,
640 (unsigned long)compat_ptr(arg));
641 unlock_kernel();
642
643 return ret;
644}
645#endif
diff --git a/fs/autofs/symlink.c b/fs/autofs/symlink.c
deleted file mode 100644
index 7ce9cb2c9ce2..000000000000
--- a/fs/autofs/symlink.c
+++ /dev/null
@@ -1,26 +0,0 @@
1/* -*- linux-c -*- --------------------------------------------------------- *
2 *
3 * linux/fs/autofs/symlink.c
4 *
5 * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
6 *
7 * This file is part of the Linux kernel and is made available under
8 * the terms of the GNU General Public License, version 2, or at your
9 * option, any later version, incorporated herein by reference.
10 *
11 * ------------------------------------------------------------------------- */
12
13#include "autofs_i.h"
14
15/* Nothing to release.. */
16static void *autofs_follow_link(struct dentry *dentry, struct nameidata *nd)
17{
18 char *s=((struct autofs_symlink *)dentry->d_inode->i_private)->data;
19 nd_set_link(nd, s);
20 return NULL;
21}
22
23const struct inode_operations autofs_symlink_inode_operations = {
24 .readlink = generic_readlink,
25 .follow_link = autofs_follow_link
26};
diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c
deleted file mode 100644
index be46805972f0..000000000000
--- a/fs/autofs/waitq.c
+++ /dev/null
@@ -1,205 +0,0 @@
1/* -*- linux-c -*- --------------------------------------------------------- *
2 *
3 * linux/fs/autofs/waitq.c
4 *
5 * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
6 *
7 * This file is part of the Linux kernel and is made available under
8 * the terms of the GNU General Public License, version 2, or at your
9 * option, any later version, incorporated herein by reference.
10 *
11 * ------------------------------------------------------------------------- */
12
13#include <linux/slab.h>
14#include <linux/time.h>
15#include <linux/signal.h>
16#include <linux/file.h>
17#include "autofs_i.h"
18
19/* We make this a static variable rather than a part of the superblock; it
20 is better if we don't reassign numbers easily even across filesystems */
21static autofs_wqt_t autofs_next_wait_queue = 1;
22
23/* These are the signals we allow interrupting a pending mount */
24#define SHUTDOWN_SIGS (sigmask(SIGKILL) | sigmask(SIGINT) | sigmask(SIGQUIT))
25
26void autofs_catatonic_mode(struct autofs_sb_info *sbi)
27{
28 struct autofs_wait_queue *wq, *nwq;
29
30 DPRINTK(("autofs: entering catatonic mode\n"));
31
32 sbi->catatonic = 1;
33 wq = sbi->queues;
34 sbi->queues = NULL; /* Erase all wait queues */
35 while ( wq ) {
36 nwq = wq->next;
37 wq->status = -ENOENT; /* Magic is gone - report failure */
38 kfree(wq->name);
39 wq->name = NULL;
40 wake_up(&wq->queue);
41 wq = nwq;
42 }
43 fput(sbi->pipe); /* Close the pipe */
44 sbi->pipe = NULL;
45 autofs_hash_dputall(&sbi->dirhash); /* Remove all dentry pointers */
46}
47
48static int autofs_write(struct file *file, const void *addr, int bytes)
49{
50 unsigned long sigpipe, flags;
51 mm_segment_t fs;
52 const char *data = (const char *)addr;
53 ssize_t wr = 0;
54
55 /** WARNING: this is not safe for writing more than PIPE_BUF bytes! **/
56
57 sigpipe = sigismember(&current->pending.signal, SIGPIPE);
58
59 /* Save pointer to user space and point back to kernel space */
60 fs = get_fs();
61 set_fs(KERNEL_DS);
62
63 while (bytes &&
64 (wr = file->f_op->write(file,data,bytes,&file->f_pos)) > 0) {
65 data += wr;
66 bytes -= wr;
67 }
68
69 set_fs(fs);
70
71 /* Keep the currently executing process from receiving a
72 SIGPIPE unless it was already supposed to get one */
73 if (wr == -EPIPE && !sigpipe) {
74 spin_lock_irqsave(&current->sighand->siglock, flags);
75 sigdelset(&current->pending.signal, SIGPIPE);
76 recalc_sigpending();
77 spin_unlock_irqrestore(&current->sighand->siglock, flags);
78 }
79
80 return (bytes > 0);
81}
82
83static void autofs_notify_daemon(struct autofs_sb_info *sbi, struct autofs_wait_queue *wq)
84{
85 struct autofs_packet_missing pkt;
86
87 DPRINTK(("autofs_wait: wait id = 0x%08lx, name = ", wq->wait_queue_token));
88 autofs_say(wq->name,wq->len);
89
90 memset(&pkt,0,sizeof pkt); /* For security reasons */
91
92 pkt.hdr.proto_version = AUTOFS_PROTO_VERSION;
93 pkt.hdr.type = autofs_ptype_missing;
94 pkt.wait_queue_token = wq->wait_queue_token;
95 pkt.len = wq->len;
96 memcpy(pkt.name, wq->name, pkt.len);
97 pkt.name[pkt.len] = '\0';
98
99 if ( autofs_write(sbi->pipe,&pkt,sizeof(struct autofs_packet_missing)) )
100 autofs_catatonic_mode(sbi);
101}
102
103int autofs_wait(struct autofs_sb_info *sbi, struct qstr *name)
104{
105 struct autofs_wait_queue *wq;
106 int status;
107
108 /* In catatonic mode, we don't wait for nobody */
109 if ( sbi->catatonic )
110 return -ENOENT;
111
112 /* We shouldn't be able to get here, but just in case */
113 if ( name->len > NAME_MAX )
114 return -ENOENT;
115
116 for ( wq = sbi->queues ; wq ; wq = wq->next ) {
117 if ( wq->hash == name->hash &&
118 wq->len == name->len &&
119 wq->name && !memcmp(wq->name,name->name,name->len) )
120 break;
121 }
122
123 if ( !wq ) {
124 /* Create a new wait queue */
125 wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL);
126 if ( !wq )
127 return -ENOMEM;
128
129 wq->name = kmalloc(name->len,GFP_KERNEL);
130 if ( !wq->name ) {
131 kfree(wq);
132 return -ENOMEM;
133 }
134 wq->wait_queue_token = autofs_next_wait_queue++;
135 init_waitqueue_head(&wq->queue);
136 wq->hash = name->hash;
137 wq->len = name->len;
138 wq->status = -EINTR; /* Status return if interrupted */
139 memcpy(wq->name, name->name, name->len);
140 wq->next = sbi->queues;
141 sbi->queues = wq;
142
143 /* autofs_notify_daemon() may block */
144 wq->wait_ctr = 2;
145 autofs_notify_daemon(sbi,wq);
146 } else
147 wq->wait_ctr++;
148
149 /* wq->name is NULL if and only if the lock is already released */
150
151 if ( sbi->catatonic ) {
152 /* We might have slept, so check again for catatonic mode */
153 wq->status = -ENOENT;
154 kfree(wq->name);
155 wq->name = NULL;
156 }
157
158 if ( wq->name ) {
159 /* Block all but "shutdown" signals while waiting */
160 sigset_t sigmask;
161
162 siginitsetinv(&sigmask, SHUTDOWN_SIGS);
163 sigprocmask(SIG_BLOCK, &sigmask, &sigmask);
164
165 interruptible_sleep_on(&wq->queue);
166
167 sigprocmask(SIG_SETMASK, &sigmask, NULL);
168 } else {
169 DPRINTK(("autofs_wait: skipped sleeping\n"));
170 }
171
172 status = wq->status;
173
174 if ( ! --wq->wait_ctr ) /* Are we the last process to need status? */
175 kfree(wq);
176
177 return status;
178}
179
180
181int autofs_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_token, int status)
182{
183 struct autofs_wait_queue *wq, **wql;
184
185 for (wql = &sbi->queues; (wq = *wql) != NULL; wql = &wq->next) {
186 if ( wq->wait_queue_token == wait_queue_token )
187 break;
188 }
189 if ( !wq )
190 return -EINVAL;
191
192 *wql = wq->next; /* Unlink from chain */
193 kfree(wq->name);
194 wq->name = NULL; /* Do not wait on this queue */
195
196 wq->status = status;
197
198 if ( ! --wq->wait_ctr ) /* Is anyone still waiting for this guy? */
199 kfree(wq);
200 else
201 wake_up(&wq->queue);
202
203 return 0;
204}
205
diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c
index 9722e4bd8957..c038727b4050 100644
--- a/fs/autofs4/init.c
+++ b/fs/autofs4/init.c
@@ -14,16 +14,16 @@
14#include <linux/init.h> 14#include <linux/init.h>
15#include "autofs_i.h" 15#include "autofs_i.h"
16 16
17static int autofs_get_sb(struct file_system_type *fs_type, 17static struct dentry *autofs_mount(struct file_system_type *fs_type,
18 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 18 int flags, const char *dev_name, void *data)
19{ 19{
20 return get_sb_nodev(fs_type, flags, data, autofs4_fill_super, mnt); 20 return mount_nodev(fs_type, flags, data, autofs4_fill_super);
21} 21}
22 22
23static struct file_system_type autofs_fs_type = { 23static struct file_system_type autofs_fs_type = {
24 .owner = THIS_MODULE, 24 .owner = THIS_MODULE,
25 .name = "autofs", 25 .name = "autofs",
26 .get_sb = autofs_get_sb, 26 .mount = autofs_mount,
27 .kill_sb = autofs4_kill_sb, 27 .kill_sb = autofs4_kill_sb,
28}; 28};
29 29
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index dc39d2824885..aa4e7c7ae3c6 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -913,18 +913,17 @@ befs_statfs(struct dentry *dentry, struct kstatfs *buf)
913 return 0; 913 return 0;
914} 914}
915 915
916static int 916static struct dentry *
917befs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, 917befs_mount(struct file_system_type *fs_type, int flags, const char *dev_name,
918 void *data, struct vfsmount *mnt) 918 void *data)
919{ 919{
920 return get_sb_bdev(fs_type, flags, dev_name, data, befs_fill_super, 920 return mount_bdev(fs_type, flags, dev_name, data, befs_fill_super);
921 mnt);
922} 921}
923 922
924static struct file_system_type befs_fs_type = { 923static struct file_system_type befs_fs_type = {
925 .owner = THIS_MODULE, 924 .owner = THIS_MODULE,
926 .name = "befs", 925 .name = "befs",
927 .get_sb = befs_get_sb, 926 .mount = befs_mount,
928 .kill_sb = kill_block_super, 927 .kill_sb = kill_block_super,
929 .fs_flags = FS_REQUIRES_DEV, 928 .fs_flags = FS_REQUIRES_DEV,
930}; 929};
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 883e77acd5a8..76db6d7d49bb 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -450,16 +450,16 @@ out:
450 return ret; 450 return ret;
451} 451}
452 452
453static int bfs_get_sb(struct file_system_type *fs_type, 453static struct dentry *bfs_mount(struct file_system_type *fs_type,
454 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 454 int flags, const char *dev_name, void *data)
455{ 455{
456 return get_sb_bdev(fs_type, flags, dev_name, data, bfs_fill_super, mnt); 456 return mount_bdev(fs_type, flags, dev_name, data, bfs_fill_super);
457} 457}
458 458
459static struct file_system_type bfs_fs_type = { 459static struct file_system_type bfs_fs_type = {
460 .owner = THIS_MODULE, 460 .owner = THIS_MODULE,
461 .name = "bfs", 461 .name = "bfs",
462 .get_sb = bfs_get_sb, 462 .mount = bfs_mount,
463 .kill_sb = kill_block_super, 463 .kill_sb = kill_block_super,
464 .fs_flags = FS_REQUIRES_DEV, 464 .fs_flags = FS_REQUIRES_DEV,
465}; 465};
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 29990f0eee0c..1befe2ec8186 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -706,10 +706,10 @@ static int bm_fill_super(struct super_block * sb, void * data, int silent)
706 return err; 706 return err;
707} 707}
708 708
709static int bm_get_sb(struct file_system_type *fs_type, 709static struct dentry *bm_mount(struct file_system_type *fs_type,
710 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 710 int flags, const char *dev_name, void *data)
711{ 711{
712 return get_sb_single(fs_type, flags, data, bm_fill_super, mnt); 712 return mount_single(fs_type, flags, data, bm_fill_super);
713} 713}
714 714
715static struct linux_binfmt misc_format = { 715static struct linux_binfmt misc_format = {
@@ -720,7 +720,7 @@ static struct linux_binfmt misc_format = {
720static struct file_system_type bm_fs_type = { 720static struct file_system_type bm_fs_type = {
721 .owner = THIS_MODULE, 721 .owner = THIS_MODULE,
722 .name = "binfmt_misc", 722 .name = "binfmt_misc",
723 .get_sb = bm_get_sb, 723 .mount = bm_mount,
724 .kill_sb = kill_litter_super, 724 .kill_sb = kill_litter_super,
725}; 725};
726 726
diff --git a/fs/block_dev.c b/fs/block_dev.c
index dea3b628a6ce..06e8ff12b97c 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -464,15 +464,15 @@ static const struct super_operations bdev_sops = {
464 .evict_inode = bdev_evict_inode, 464 .evict_inode = bdev_evict_inode,
465}; 465};
466 466
467static int bd_get_sb(struct file_system_type *fs_type, 467static struct dentry *bd_mount(struct file_system_type *fs_type,
468 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 468 int flags, const char *dev_name, void *data)
469{ 469{
470 return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576, mnt); 470 return mount_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576);
471} 471}
472 472
473static struct file_system_type bd_type = { 473static struct file_system_type bd_type = {
474 .name = "bdev", 474 .name = "bdev",
475 .get_sb = bd_get_sb, 475 .mount = bd_mount,
476 .kill_sb = kill_anon_super, 476 .kill_sb = kill_anon_super,
477}; 477};
478 478
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 396039b3a8a2..7845d1f7d1d9 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -163,7 +163,6 @@ fail:
163 */ 163 */
164static void end_compressed_bio_read(struct bio *bio, int err) 164static void end_compressed_bio_read(struct bio *bio, int err)
165{ 165{
166 struct extent_io_tree *tree;
167 struct compressed_bio *cb = bio->bi_private; 166 struct compressed_bio *cb = bio->bi_private;
168 struct inode *inode; 167 struct inode *inode;
169 struct page *page; 168 struct page *page;
@@ -187,7 +186,6 @@ static void end_compressed_bio_read(struct bio *bio, int err)
187 /* ok, we're the last bio for this extent, lets start 186 /* ok, we're the last bio for this extent, lets start
188 * the decompression. 187 * the decompression.
189 */ 188 */
190 tree = &BTRFS_I(inode)->io_tree;
191 ret = btrfs_zlib_decompress_biovec(cb->compressed_pages, 189 ret = btrfs_zlib_decompress_biovec(cb->compressed_pages,
192 cb->start, 190 cb->start,
193 cb->orig_bio->bi_io_vec, 191 cb->orig_bio->bi_io_vec,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index c3df14ce2cc2..9ac171599258 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -200,7 +200,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
200 struct extent_buffer **cow_ret, u64 new_root_objectid) 200 struct extent_buffer **cow_ret, u64 new_root_objectid)
201{ 201{
202 struct extent_buffer *cow; 202 struct extent_buffer *cow;
203 u32 nritems;
204 int ret = 0; 203 int ret = 0;
205 int level; 204 int level;
206 struct btrfs_disk_key disk_key; 205 struct btrfs_disk_key disk_key;
@@ -210,7 +209,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
210 WARN_ON(root->ref_cows && trans->transid != root->last_trans); 209 WARN_ON(root->ref_cows && trans->transid != root->last_trans);
211 210
212 level = btrfs_header_level(buf); 211 level = btrfs_header_level(buf);
213 nritems = btrfs_header_nritems(buf);
214 if (level == 0) 212 if (level == 0)
215 btrfs_item_key(buf, &disk_key, 0); 213 btrfs_item_key(buf, &disk_key, 0);
216 else 214 else
@@ -1008,7 +1006,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1008 int wret; 1006 int wret;
1009 int pslot; 1007 int pslot;
1010 int orig_slot = path->slots[level]; 1008 int orig_slot = path->slots[level];
1011 int err_on_enospc = 0;
1012 u64 orig_ptr; 1009 u64 orig_ptr;
1013 1010
1014 if (level == 0) 1011 if (level == 0)
@@ -1071,8 +1068,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1071 BTRFS_NODEPTRS_PER_BLOCK(root) / 4) 1068 BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
1072 return 0; 1069 return 0;
1073 1070
1074 if (btrfs_header_nritems(mid) < 2) 1071 btrfs_header_nritems(mid);
1075 err_on_enospc = 1;
1076 1072
1077 left = read_node_slot(root, parent, pslot - 1); 1073 left = read_node_slot(root, parent, pslot - 1);
1078 if (left) { 1074 if (left) {
@@ -1103,8 +1099,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1103 wret = push_node_left(trans, root, left, mid, 1); 1099 wret = push_node_left(trans, root, left, mid, 1);
1104 if (wret < 0) 1100 if (wret < 0)
1105 ret = wret; 1101 ret = wret;
1106 if (btrfs_header_nritems(mid) < 2) 1102 btrfs_header_nritems(mid);
1107 err_on_enospc = 1;
1108 } 1103 }
1109 1104
1110 /* 1105 /*
@@ -1224,14 +1219,12 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
1224 int wret; 1219 int wret;
1225 int pslot; 1220 int pslot;
1226 int orig_slot = path->slots[level]; 1221 int orig_slot = path->slots[level];
1227 u64 orig_ptr;
1228 1222
1229 if (level == 0) 1223 if (level == 0)
1230 return 1; 1224 return 1;
1231 1225
1232 mid = path->nodes[level]; 1226 mid = path->nodes[level];
1233 WARN_ON(btrfs_header_generation(mid) != trans->transid); 1227 WARN_ON(btrfs_header_generation(mid) != trans->transid);
1234 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
1235 1228
1236 if (level < BTRFS_MAX_LEVEL - 1) 1229 if (level < BTRFS_MAX_LEVEL - 1)
1237 parent = path->nodes[level + 1]; 1230 parent = path->nodes[level + 1];
@@ -1577,13 +1570,33 @@ read_block_for_search(struct btrfs_trans_handle *trans,
1577 blocksize = btrfs_level_size(root, level - 1); 1570 blocksize = btrfs_level_size(root, level - 1);
1578 1571
1579 tmp = btrfs_find_tree_block(root, blocknr, blocksize); 1572 tmp = btrfs_find_tree_block(root, blocknr, blocksize);
1580 if (tmp && btrfs_buffer_uptodate(tmp, gen)) { 1573 if (tmp) {
1581 /* 1574 if (btrfs_buffer_uptodate(tmp, 0)) {
1582 * we found an up to date block without sleeping, return 1575 if (btrfs_buffer_uptodate(tmp, gen)) {
1583 * right away 1576 /*
1584 */ 1577 * we found an up to date block without
1585 *eb_ret = tmp; 1578 * sleeping, return
1586 return 0; 1579 * right away
1580 */
1581 *eb_ret = tmp;
1582 return 0;
1583 }
1584 /* the pages were up to date, but we failed
1585 * the generation number check. Do a full
1586 * read for the generation number that is correct.
1587 * We must do this without dropping locks so
1588 * we can trust our generation number
1589 */
1590 free_extent_buffer(tmp);
1591 tmp = read_tree_block(root, blocknr, blocksize, gen);
1592 if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
1593 *eb_ret = tmp;
1594 return 0;
1595 }
1596 free_extent_buffer(tmp);
1597 btrfs_release_path(NULL, p);
1598 return -EIO;
1599 }
1587 } 1600 }
1588 1601
1589 /* 1602 /*
@@ -1596,8 +1609,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
1596 btrfs_unlock_up_safe(p, level + 1); 1609 btrfs_unlock_up_safe(p, level + 1);
1597 btrfs_set_path_blocking(p); 1610 btrfs_set_path_blocking(p);
1598 1611
1599 if (tmp) 1612 free_extent_buffer(tmp);
1600 free_extent_buffer(tmp);
1601 if (p->reada) 1613 if (p->reada)
1602 reada_for_search(root, p, level, slot, key->objectid); 1614 reada_for_search(root, p, level, slot, key->objectid);
1603 1615
@@ -2548,7 +2560,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2548{ 2560{
2549 struct btrfs_disk_key disk_key; 2561 struct btrfs_disk_key disk_key;
2550 struct extent_buffer *right = path->nodes[0]; 2562 struct extent_buffer *right = path->nodes[0];
2551 int slot;
2552 int i; 2563 int i;
2553 int push_space = 0; 2564 int push_space = 0;
2554 int push_items = 0; 2565 int push_items = 0;
@@ -2560,8 +2571,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2560 u32 this_item_size; 2571 u32 this_item_size;
2561 u32 old_left_item_size; 2572 u32 old_left_item_size;
2562 2573
2563 slot = path->slots[1];
2564
2565 if (empty) 2574 if (empty)
2566 nr = min(right_nritems, max_slot); 2575 nr = min(right_nritems, max_slot);
2567 else 2576 else
@@ -3330,7 +3339,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
3330{ 3339{
3331 int ret = 0; 3340 int ret = 0;
3332 int slot; 3341 int slot;
3333 int slot_orig;
3334 struct extent_buffer *leaf; 3342 struct extent_buffer *leaf;
3335 struct btrfs_item *item; 3343 struct btrfs_item *item;
3336 u32 nritems; 3344 u32 nritems;
@@ -3340,7 +3348,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
3340 unsigned int size_diff; 3348 unsigned int size_diff;
3341 int i; 3349 int i;
3342 3350
3343 slot_orig = path->slots[0];
3344 leaf = path->nodes[0]; 3351 leaf = path->nodes[0];
3345 slot = path->slots[0]; 3352 slot = path->slots[0];
3346 3353
@@ -3445,7 +3452,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
3445{ 3452{
3446 int ret = 0; 3453 int ret = 0;
3447 int slot; 3454 int slot;
3448 int slot_orig;
3449 struct extent_buffer *leaf; 3455 struct extent_buffer *leaf;
3450 struct btrfs_item *item; 3456 struct btrfs_item *item;
3451 u32 nritems; 3457 u32 nritems;
@@ -3454,7 +3460,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
3454 unsigned int old_size; 3460 unsigned int old_size;
3455 int i; 3461 int i;
3456 3462
3457 slot_orig = path->slots[0];
3458 leaf = path->nodes[0]; 3463 leaf = path->nodes[0];
3459 3464
3460 nritems = btrfs_header_nritems(leaf); 3465 nritems = btrfs_header_nritems(leaf);
@@ -3787,7 +3792,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
3787 struct btrfs_key *cpu_key, u32 *data_size, 3792 struct btrfs_key *cpu_key, u32 *data_size,
3788 int nr) 3793 int nr)
3789{ 3794{
3790 struct extent_buffer *leaf;
3791 int ret = 0; 3795 int ret = 0;
3792 int slot; 3796 int slot;
3793 int i; 3797 int i;
@@ -3804,7 +3808,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
3804 if (ret < 0) 3808 if (ret < 0)
3805 goto out; 3809 goto out;
3806 3810
3807 leaf = path->nodes[0];
3808 slot = path->slots[0]; 3811 slot = path->slots[0];
3809 BUG_ON(slot < 0); 3812 BUG_ON(slot < 0);
3810 3813
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index eaf286abad17..8db9234f6b41 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -99,6 +99,9 @@ struct btrfs_ordered_sum;
99 */ 99 */
100#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL 100#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
101 101
102/* For storing free space cache */
103#define BTRFS_FREE_SPACE_OBJECTID -11ULL
104
102/* dummy objectid represents multiple objectids */ 105/* dummy objectid represents multiple objectids */
103#define BTRFS_MULTIPLE_OBJECTIDS -255ULL 106#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
104 107
@@ -265,6 +268,22 @@ struct btrfs_chunk {
265 /* additional stripes go here */ 268 /* additional stripes go here */
266} __attribute__ ((__packed__)); 269} __attribute__ ((__packed__));
267 270
271#define BTRFS_FREE_SPACE_EXTENT 1
272#define BTRFS_FREE_SPACE_BITMAP 2
273
274struct btrfs_free_space_entry {
275 __le64 offset;
276 __le64 bytes;
277 u8 type;
278} __attribute__ ((__packed__));
279
280struct btrfs_free_space_header {
281 struct btrfs_disk_key location;
282 __le64 generation;
283 __le64 num_entries;
284 __le64 num_bitmaps;
285} __attribute__ ((__packed__));
286
268static inline unsigned long btrfs_chunk_item_size(int num_stripes) 287static inline unsigned long btrfs_chunk_item_size(int num_stripes)
269{ 288{
270 BUG_ON(num_stripes == 0); 289 BUG_ON(num_stripes == 0);
@@ -365,8 +384,10 @@ struct btrfs_super_block {
365 384
366 char label[BTRFS_LABEL_SIZE]; 385 char label[BTRFS_LABEL_SIZE];
367 386
387 __le64 cache_generation;
388
368 /* future expansion */ 389 /* future expansion */
369 __le64 reserved[32]; 390 __le64 reserved[31];
370 u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; 391 u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
371} __attribute__ ((__packed__)); 392} __attribute__ ((__packed__));
372 393
@@ -375,13 +396,15 @@ struct btrfs_super_block {
375 * ones specified below then we will fail to mount 396 * ones specified below then we will fail to mount
376 */ 397 */
377#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) 398#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
378#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (2ULL << 0) 399#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
400#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2)
379 401
380#define BTRFS_FEATURE_COMPAT_SUPP 0ULL 402#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
381#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL 403#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
382#define BTRFS_FEATURE_INCOMPAT_SUPP \ 404#define BTRFS_FEATURE_INCOMPAT_SUPP \
383 (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ 405 (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
384 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL) 406 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
407 BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
385 408
386/* 409/*
387 * A leaf is full of items. offset and size tell us where to find 410 * A leaf is full of items. offset and size tell us where to find
@@ -675,7 +698,8 @@ struct btrfs_block_group_item {
675struct btrfs_space_info { 698struct btrfs_space_info {
676 u64 flags; 699 u64 flags;
677 700
678 u64 total_bytes; /* total bytes in the space */ 701 u64 total_bytes; /* total bytes in the space,
702 this doesn't take mirrors into account */
679 u64 bytes_used; /* total bytes used, 703 u64 bytes_used; /* total bytes used,
680 this does't take mirrors into account */ 704 this does't take mirrors into account */
681 u64 bytes_pinned; /* total bytes pinned, will be freed when the 705 u64 bytes_pinned; /* total bytes pinned, will be freed when the
@@ -687,6 +711,8 @@ struct btrfs_space_info {
687 u64 bytes_may_use; /* number of bytes that may be used for 711 u64 bytes_may_use; /* number of bytes that may be used for
688 delalloc/allocations */ 712 delalloc/allocations */
689 u64 disk_used; /* total bytes used on disk */ 713 u64 disk_used; /* total bytes used on disk */
714 u64 disk_total; /* total bytes on disk, takes mirrors into
715 account */
690 716
691 int full; /* indicates that we cannot allocate any more 717 int full; /* indicates that we cannot allocate any more
692 chunks for this space */ 718 chunks for this space */
@@ -750,6 +776,14 @@ enum btrfs_caching_type {
750 BTRFS_CACHE_FINISHED = 2, 776 BTRFS_CACHE_FINISHED = 2,
751}; 777};
752 778
779enum btrfs_disk_cache_state {
780 BTRFS_DC_WRITTEN = 0,
781 BTRFS_DC_ERROR = 1,
782 BTRFS_DC_CLEAR = 2,
783 BTRFS_DC_SETUP = 3,
784 BTRFS_DC_NEED_WRITE = 4,
785};
786
753struct btrfs_caching_control { 787struct btrfs_caching_control {
754 struct list_head list; 788 struct list_head list;
755 struct mutex mutex; 789 struct mutex mutex;
@@ -763,6 +797,7 @@ struct btrfs_block_group_cache {
763 struct btrfs_key key; 797 struct btrfs_key key;
764 struct btrfs_block_group_item item; 798 struct btrfs_block_group_item item;
765 struct btrfs_fs_info *fs_info; 799 struct btrfs_fs_info *fs_info;
800 struct inode *inode;
766 spinlock_t lock; 801 spinlock_t lock;
767 u64 pinned; 802 u64 pinned;
768 u64 reserved; 803 u64 reserved;
@@ -773,8 +808,11 @@ struct btrfs_block_group_cache {
773 int extents_thresh; 808 int extents_thresh;
774 int free_extents; 809 int free_extents;
775 int total_bitmaps; 810 int total_bitmaps;
776 int ro; 811 int ro:1;
777 int dirty; 812 int dirty:1;
813 int iref:1;
814
815 int disk_cache_state;
778 816
779 /* cache tracking stuff */ 817 /* cache tracking stuff */
780 int cached; 818 int cached;
@@ -863,6 +901,7 @@ struct btrfs_fs_info {
863 struct btrfs_transaction *running_transaction; 901 struct btrfs_transaction *running_transaction;
864 wait_queue_head_t transaction_throttle; 902 wait_queue_head_t transaction_throttle;
865 wait_queue_head_t transaction_wait; 903 wait_queue_head_t transaction_wait;
904 wait_queue_head_t transaction_blocked_wait;
866 wait_queue_head_t async_submit_wait; 905 wait_queue_head_t async_submit_wait;
867 906
868 struct btrfs_super_block super_copy; 907 struct btrfs_super_block super_copy;
@@ -949,6 +988,7 @@ struct btrfs_fs_info {
949 struct btrfs_workers endio_meta_workers; 988 struct btrfs_workers endio_meta_workers;
950 struct btrfs_workers endio_meta_write_workers; 989 struct btrfs_workers endio_meta_write_workers;
951 struct btrfs_workers endio_write_workers; 990 struct btrfs_workers endio_write_workers;
991 struct btrfs_workers endio_freespace_worker;
952 struct btrfs_workers submit_workers; 992 struct btrfs_workers submit_workers;
953 /* 993 /*
954 * fixup workers take dirty pages that didn't properly go through 994 * fixup workers take dirty pages that didn't properly go through
@@ -1192,6 +1232,9 @@ struct btrfs_root {
1192#define BTRFS_MOUNT_NOSSD (1 << 9) 1232#define BTRFS_MOUNT_NOSSD (1 << 9)
1193#define BTRFS_MOUNT_DISCARD (1 << 10) 1233#define BTRFS_MOUNT_DISCARD (1 << 10)
1194#define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11) 1234#define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11)
1235#define BTRFS_MOUNT_SPACE_CACHE (1 << 12)
1236#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13)
1237#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
1195 1238
1196#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 1239#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1197#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 1240#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -1665,6 +1708,27 @@ static inline void btrfs_set_dir_item_key(struct extent_buffer *eb,
1665 write_eb_member(eb, item, struct btrfs_dir_item, location, key); 1708 write_eb_member(eb, item, struct btrfs_dir_item, location, key);
1666} 1709}
1667 1710
1711BTRFS_SETGET_FUNCS(free_space_entries, struct btrfs_free_space_header,
1712 num_entries, 64);
1713BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header,
1714 num_bitmaps, 64);
1715BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header,
1716 generation, 64);
1717
1718static inline void btrfs_free_space_key(struct extent_buffer *eb,
1719 struct btrfs_free_space_header *h,
1720 struct btrfs_disk_key *key)
1721{
1722 read_eb_member(eb, h, struct btrfs_free_space_header, location, key);
1723}
1724
1725static inline void btrfs_set_free_space_key(struct extent_buffer *eb,
1726 struct btrfs_free_space_header *h,
1727 struct btrfs_disk_key *key)
1728{
1729 write_eb_member(eb, h, struct btrfs_free_space_header, location, key);
1730}
1731
1668/* struct btrfs_disk_key */ 1732/* struct btrfs_disk_key */
1669BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key, 1733BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key,
1670 objectid, 64); 1734 objectid, 64);
@@ -1876,6 +1940,8 @@ BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block,
1876 incompat_flags, 64); 1940 incompat_flags, 64);
1877BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block, 1941BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block,
1878 csum_type, 16); 1942 csum_type, 16);
1943BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
1944 cache_generation, 64);
1879 1945
1880static inline int btrfs_super_csum_size(struct btrfs_super_block *s) 1946static inline int btrfs_super_csum_size(struct btrfs_super_block *s)
1881{ 1947{
@@ -1988,6 +2054,12 @@ static inline struct dentry *fdentry(struct file *file)
1988 return file->f_path.dentry; 2054 return file->f_path.dentry;
1989} 2055}
1990 2056
2057static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
2058{
2059 return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) &&
2060 (space_info->flags & BTRFS_BLOCK_GROUP_DATA));
2061}
2062
1991/* extent-tree.c */ 2063/* extent-tree.c */
1992void btrfs_put_block_group(struct btrfs_block_group_cache *cache); 2064void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
1993int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, 2065int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
@@ -2079,7 +2151,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
2079void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); 2151void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
2080int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, 2152int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
2081 struct btrfs_root *root, 2153 struct btrfs_root *root,
2082 int num_items, int *retries); 2154 int num_items);
2083void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, 2155void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
2084 struct btrfs_root *root); 2156 struct btrfs_root *root);
2085int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, 2157int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
@@ -2100,7 +2172,7 @@ void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
2100int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, 2172int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
2101 struct btrfs_root *root, 2173 struct btrfs_root *root,
2102 struct btrfs_block_rsv *block_rsv, 2174 struct btrfs_block_rsv *block_rsv,
2103 u64 num_bytes, int *retries); 2175 u64 num_bytes);
2104int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, 2176int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
2105 struct btrfs_root *root, 2177 struct btrfs_root *root,
2106 struct btrfs_block_rsv *block_rsv, 2178 struct btrfs_block_rsv *block_rsv,
@@ -2115,6 +2187,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
2115 struct btrfs_block_group_cache *cache); 2187 struct btrfs_block_group_cache *cache);
2116int btrfs_set_block_group_rw(struct btrfs_root *root, 2188int btrfs_set_block_group_rw(struct btrfs_root *root,
2117 struct btrfs_block_group_cache *cache); 2189 struct btrfs_block_group_cache *cache);
2190void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
2118/* ctree.c */ 2191/* ctree.c */
2119int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 2192int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
2120 int level, int *slot); 2193 int level, int *slot);
@@ -2373,7 +2446,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2373 u32 min_type); 2446 u32 min_type);
2374 2447
2375int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); 2448int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
2376int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput); 2449int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
2450 int sync);
2377int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, 2451int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
2378 struct extent_state **cached_state); 2452 struct extent_state **cached_state);
2379int btrfs_writepages(struct address_space *mapping, 2453int btrfs_writepages(struct address_space *mapping,
@@ -2426,6 +2500,10 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root);
2426int btrfs_prealloc_file_range(struct inode *inode, int mode, 2500int btrfs_prealloc_file_range(struct inode *inode, int mode,
2427 u64 start, u64 num_bytes, u64 min_size, 2501 u64 start, u64 num_bytes, u64 min_size,
2428 loff_t actual_len, u64 *alloc_hint); 2502 loff_t actual_len, u64 *alloc_hint);
2503int btrfs_prealloc_file_range_trans(struct inode *inode,
2504 struct btrfs_trans_handle *trans, int mode,
2505 u64 start, u64 num_bytes, u64 min_size,
2506 loff_t actual_len, u64 *alloc_hint);
2429extern const struct dentry_operations btrfs_dentry_operations; 2507extern const struct dentry_operations btrfs_dentry_operations;
2430 2508
2431/* ioctl.c */ 2509/* ioctl.c */
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index e9103b3baa49..f0cad5ae5be7 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -427,5 +427,5 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
427 ret = btrfs_truncate_item(trans, root, path, 427 ret = btrfs_truncate_item(trans, root, path,
428 item_len - sub_item_len, 1); 428 item_len - sub_item_len, 1);
429 } 429 }
430 return 0; 430 return ret;
431} 431}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5e789f4a3ed0..fb827d0d7181 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -338,7 +338,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
338 struct extent_io_tree *tree; 338 struct extent_io_tree *tree;
339 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 339 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
340 u64 found_start; 340 u64 found_start;
341 int found_level;
342 unsigned long len; 341 unsigned long len;
343 struct extent_buffer *eb; 342 struct extent_buffer *eb;
344 int ret; 343 int ret;
@@ -369,8 +368,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
369 WARN_ON(1); 368 WARN_ON(1);
370 goto err; 369 goto err;
371 } 370 }
372 found_level = btrfs_header_level(eb);
373
374 csum_tree_block(root, eb, 0); 371 csum_tree_block(root, eb, 0);
375err: 372err:
376 free_extent_buffer(eb); 373 free_extent_buffer(eb);
@@ -481,9 +478,12 @@ static void end_workqueue_bio(struct bio *bio, int err)
481 end_io_wq->work.flags = 0; 478 end_io_wq->work.flags = 0;
482 479
483 if (bio->bi_rw & REQ_WRITE) { 480 if (bio->bi_rw & REQ_WRITE) {
484 if (end_io_wq->metadata) 481 if (end_io_wq->metadata == 1)
485 btrfs_queue_worker(&fs_info->endio_meta_write_workers, 482 btrfs_queue_worker(&fs_info->endio_meta_write_workers,
486 &end_io_wq->work); 483 &end_io_wq->work);
484 else if (end_io_wq->metadata == 2)
485 btrfs_queue_worker(&fs_info->endio_freespace_worker,
486 &end_io_wq->work);
487 else 487 else
488 btrfs_queue_worker(&fs_info->endio_write_workers, 488 btrfs_queue_worker(&fs_info->endio_write_workers,
489 &end_io_wq->work); 489 &end_io_wq->work);
@@ -497,6 +497,13 @@ static void end_workqueue_bio(struct bio *bio, int err)
497 } 497 }
498} 498}
499 499
500/*
501 * For the metadata arg you want
502 *
503 * 0 - if data
504 * 1 - if normal metadta
505 * 2 - if writing to the free space cache area
506 */
500int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, 507int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
501 int metadata) 508 int metadata)
502{ 509{
@@ -533,11 +540,9 @@ int btrfs_congested_async(struct btrfs_fs_info *info, int iodone)
533 540
534static void run_one_async_start(struct btrfs_work *work) 541static void run_one_async_start(struct btrfs_work *work)
535{ 542{
536 struct btrfs_fs_info *fs_info;
537 struct async_submit_bio *async; 543 struct async_submit_bio *async;
538 544
539 async = container_of(work, struct async_submit_bio, work); 545 async = container_of(work, struct async_submit_bio, work);
540 fs_info = BTRFS_I(async->inode)->root->fs_info;
541 async->submit_bio_start(async->inode, async->rw, async->bio, 546 async->submit_bio_start(async->inode, async->rw, async->bio,
542 async->mirror_num, async->bio_flags, 547 async->mirror_num, async->bio_flags,
543 async->bio_offset); 548 async->bio_offset);
@@ -850,12 +855,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
850 u32 blocksize, u64 parent_transid) 855 u32 blocksize, u64 parent_transid)
851{ 856{
852 struct extent_buffer *buf = NULL; 857 struct extent_buffer *buf = NULL;
853 struct inode *btree_inode = root->fs_info->btree_inode;
854 struct extent_io_tree *io_tree;
855 int ret; 858 int ret;
856 859
857 io_tree = &BTRFS_I(btree_inode)->io_tree;
858
859 buf = btrfs_find_create_tree_block(root, bytenr, blocksize); 860 buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
860 if (!buf) 861 if (!buf)
861 return NULL; 862 return NULL;
@@ -1377,7 +1378,6 @@ static int bio_ready_for_csum(struct bio *bio)
1377 u64 start = 0; 1378 u64 start = 0;
1378 struct page *page; 1379 struct page *page;
1379 struct extent_io_tree *io_tree = NULL; 1380 struct extent_io_tree *io_tree = NULL;
1380 struct btrfs_fs_info *info = NULL;
1381 struct bio_vec *bvec; 1381 struct bio_vec *bvec;
1382 int i; 1382 int i;
1383 int ret; 1383 int ret;
@@ -1396,7 +1396,6 @@ static int bio_ready_for_csum(struct bio *bio)
1396 buf_len = page->private >> 2; 1396 buf_len = page->private >> 2;
1397 start = page_offset(page) + bvec->bv_offset; 1397 start = page_offset(page) + bvec->bv_offset;
1398 io_tree = &BTRFS_I(page->mapping->host)->io_tree; 1398 io_tree = &BTRFS_I(page->mapping->host)->io_tree;
1399 info = BTRFS_I(page->mapping->host)->root->fs_info;
1400 } 1399 }
1401 /* are we fully contained in this bio? */ 1400 /* are we fully contained in this bio? */
1402 if (buf_len <= length) 1401 if (buf_len <= length)
@@ -1680,12 +1679,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1680 1679
1681 init_waitqueue_head(&fs_info->transaction_throttle); 1680 init_waitqueue_head(&fs_info->transaction_throttle);
1682 init_waitqueue_head(&fs_info->transaction_wait); 1681 init_waitqueue_head(&fs_info->transaction_wait);
1682 init_waitqueue_head(&fs_info->transaction_blocked_wait);
1683 init_waitqueue_head(&fs_info->async_submit_wait); 1683 init_waitqueue_head(&fs_info->async_submit_wait);
1684 1684
1685 __setup_root(4096, 4096, 4096, 4096, tree_root, 1685 __setup_root(4096, 4096, 4096, 4096, tree_root,
1686 fs_info, BTRFS_ROOT_TREE_OBJECTID); 1686 fs_info, BTRFS_ROOT_TREE_OBJECTID);
1687 1687
1688
1689 bh = btrfs_read_dev_super(fs_devices->latest_bdev); 1688 bh = btrfs_read_dev_super(fs_devices->latest_bdev);
1690 if (!bh) 1689 if (!bh)
1691 goto fail_iput; 1690 goto fail_iput;
@@ -1775,6 +1774,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1775 btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", 1774 btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
1776 fs_info->thread_pool_size, 1775 fs_info->thread_pool_size,
1777 &fs_info->generic_worker); 1776 &fs_info->generic_worker);
1777 btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write",
1778 1, &fs_info->generic_worker);
1778 1779
1779 /* 1780 /*
1780 * endios are largely parallel and should have a very 1781 * endios are largely parallel and should have a very
@@ -1795,6 +1796,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1795 btrfs_start_workers(&fs_info->endio_meta_workers, 1); 1796 btrfs_start_workers(&fs_info->endio_meta_workers, 1);
1796 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); 1797 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
1797 btrfs_start_workers(&fs_info->endio_write_workers, 1); 1798 btrfs_start_workers(&fs_info->endio_write_workers, 1);
1799 btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
1798 1800
1799 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); 1801 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
1800 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, 1802 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -1993,6 +1995,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1993 if (!(sb->s_flags & MS_RDONLY)) { 1995 if (!(sb->s_flags & MS_RDONLY)) {
1994 down_read(&fs_info->cleanup_work_sem); 1996 down_read(&fs_info->cleanup_work_sem);
1995 btrfs_orphan_cleanup(fs_info->fs_root); 1997 btrfs_orphan_cleanup(fs_info->fs_root);
1998 btrfs_orphan_cleanup(fs_info->tree_root);
1996 up_read(&fs_info->cleanup_work_sem); 1999 up_read(&fs_info->cleanup_work_sem);
1997 } 2000 }
1998 2001
@@ -2035,6 +2038,7 @@ fail_sb_buffer:
2035 btrfs_stop_workers(&fs_info->endio_meta_workers); 2038 btrfs_stop_workers(&fs_info->endio_meta_workers);
2036 btrfs_stop_workers(&fs_info->endio_meta_write_workers); 2039 btrfs_stop_workers(&fs_info->endio_meta_write_workers);
2037 btrfs_stop_workers(&fs_info->endio_write_workers); 2040 btrfs_stop_workers(&fs_info->endio_write_workers);
2041 btrfs_stop_workers(&fs_info->endio_freespace_worker);
2038 btrfs_stop_workers(&fs_info->submit_workers); 2042 btrfs_stop_workers(&fs_info->submit_workers);
2039fail_iput: 2043fail_iput:
2040 invalidate_inode_pages2(fs_info->btree_inode->i_mapping); 2044 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
@@ -2410,6 +2414,7 @@ int close_ctree(struct btrfs_root *root)
2410 fs_info->closing = 1; 2414 fs_info->closing = 1;
2411 smp_mb(); 2415 smp_mb();
2412 2416
2417 btrfs_put_block_group_cache(fs_info);
2413 if (!(fs_info->sb->s_flags & MS_RDONLY)) { 2418 if (!(fs_info->sb->s_flags & MS_RDONLY)) {
2414 ret = btrfs_commit_super(root); 2419 ret = btrfs_commit_super(root);
2415 if (ret) 2420 if (ret)
@@ -2456,6 +2461,7 @@ int close_ctree(struct btrfs_root *root)
2456 btrfs_stop_workers(&fs_info->endio_meta_workers); 2461 btrfs_stop_workers(&fs_info->endio_meta_workers);
2457 btrfs_stop_workers(&fs_info->endio_meta_write_workers); 2462 btrfs_stop_workers(&fs_info->endio_meta_write_workers);
2458 btrfs_stop_workers(&fs_info->endio_write_workers); 2463 btrfs_stop_workers(&fs_info->endio_write_workers);
2464 btrfs_stop_workers(&fs_info->endio_freespace_worker);
2459 btrfs_stop_workers(&fs_info->submit_workers); 2465 btrfs_stop_workers(&fs_info->submit_workers);
2460 2466
2461 btrfs_close_devices(fs_info->fs_devices); 2467 btrfs_close_devices(fs_info->fs_devices);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 0b81ecdb101c..0c097f3aec41 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -242,6 +242,12 @@ get_caching_control(struct btrfs_block_group_cache *cache)
242 return NULL; 242 return NULL;
243 } 243 }
244 244
245 /* We're loading it the fast way, so we don't have a caching_ctl. */
246 if (!cache->caching_ctl) {
247 spin_unlock(&cache->lock);
248 return NULL;
249 }
250
245 ctl = cache->caching_ctl; 251 ctl = cache->caching_ctl;
246 atomic_inc(&ctl->count); 252 atomic_inc(&ctl->count);
247 spin_unlock(&cache->lock); 253 spin_unlock(&cache->lock);
@@ -421,7 +427,9 @@ err:
421 return 0; 427 return 0;
422} 428}
423 429
424static int cache_block_group(struct btrfs_block_group_cache *cache) 430static int cache_block_group(struct btrfs_block_group_cache *cache,
431 struct btrfs_trans_handle *trans,
432 int load_cache_only)
425{ 433{
426 struct btrfs_fs_info *fs_info = cache->fs_info; 434 struct btrfs_fs_info *fs_info = cache->fs_info;
427 struct btrfs_caching_control *caching_ctl; 435 struct btrfs_caching_control *caching_ctl;
@@ -432,6 +440,36 @@ static int cache_block_group(struct btrfs_block_group_cache *cache)
432 if (cache->cached != BTRFS_CACHE_NO) 440 if (cache->cached != BTRFS_CACHE_NO)
433 return 0; 441 return 0;
434 442
443 /*
444 * We can't do the read from on-disk cache during a commit since we need
445 * to have the normal tree locking.
446 */
447 if (!trans->transaction->in_commit) {
448 spin_lock(&cache->lock);
449 if (cache->cached != BTRFS_CACHE_NO) {
450 spin_unlock(&cache->lock);
451 return 0;
452 }
453 cache->cached = BTRFS_CACHE_STARTED;
454 spin_unlock(&cache->lock);
455
456 ret = load_free_space_cache(fs_info, cache);
457
458 spin_lock(&cache->lock);
459 if (ret == 1) {
460 cache->cached = BTRFS_CACHE_FINISHED;
461 cache->last_byte_to_unpin = (u64)-1;
462 } else {
463 cache->cached = BTRFS_CACHE_NO;
464 }
465 spin_unlock(&cache->lock);
466 if (ret == 1)
467 return 0;
468 }
469
470 if (load_cache_only)
471 return 0;
472
435 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); 473 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL);
436 BUG_ON(!caching_ctl); 474 BUG_ON(!caching_ctl);
437 475
@@ -509,7 +547,7 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
509 547
510 rcu_read_lock(); 548 rcu_read_lock();
511 list_for_each_entry_rcu(found, head, list) { 549 list_for_each_entry_rcu(found, head, list) {
512 if (found->flags == flags) { 550 if (found->flags & flags) {
513 rcu_read_unlock(); 551 rcu_read_unlock();
514 return found; 552 return found;
515 } 553 }
@@ -542,6 +580,15 @@ static u64 div_factor(u64 num, int factor)
542 return num; 580 return num;
543} 581}
544 582
583static u64 div_factor_fine(u64 num, int factor)
584{
585 if (factor == 100)
586 return num;
587 num *= factor;
588 do_div(num, 100);
589 return num;
590}
591
545u64 btrfs_find_block_group(struct btrfs_root *root, 592u64 btrfs_find_block_group(struct btrfs_root *root,
546 u64 search_start, u64 search_hint, int owner) 593 u64 search_start, u64 search_hint, int owner)
547{ 594{
@@ -2687,6 +2734,109 @@ next_block_group(struct btrfs_root *root,
2687 return cache; 2734 return cache;
2688} 2735}
2689 2736
2737static int cache_save_setup(struct btrfs_block_group_cache *block_group,
2738 struct btrfs_trans_handle *trans,
2739 struct btrfs_path *path)
2740{
2741 struct btrfs_root *root = block_group->fs_info->tree_root;
2742 struct inode *inode = NULL;
2743 u64 alloc_hint = 0;
2744 int num_pages = 0;
2745 int retries = 0;
2746 int ret = 0;
2747
2748 /*
2749 * If this block group is smaller than 100 megs don't bother caching the
2750 * block group.
2751 */
2752 if (block_group->key.offset < (100 * 1024 * 1024)) {
2753 spin_lock(&block_group->lock);
2754 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
2755 spin_unlock(&block_group->lock);
2756 return 0;
2757 }
2758
2759again:
2760 inode = lookup_free_space_inode(root, block_group, path);
2761 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
2762 ret = PTR_ERR(inode);
2763 btrfs_release_path(root, path);
2764 goto out;
2765 }
2766
2767 if (IS_ERR(inode)) {
2768 BUG_ON(retries);
2769 retries++;
2770
2771 if (block_group->ro)
2772 goto out_free;
2773
2774 ret = create_free_space_inode(root, trans, block_group, path);
2775 if (ret)
2776 goto out_free;
2777 goto again;
2778 }
2779
2780 /*
2781 * We want to set the generation to 0, that way if anything goes wrong
2782 * from here on out we know not to trust this cache when we load up next
2783 * time.
2784 */
2785 BTRFS_I(inode)->generation = 0;
2786 ret = btrfs_update_inode(trans, root, inode);
2787 WARN_ON(ret);
2788
2789 if (i_size_read(inode) > 0) {
2790 ret = btrfs_truncate_free_space_cache(root, trans, path,
2791 inode);
2792 if (ret)
2793 goto out_put;
2794 }
2795
2796 spin_lock(&block_group->lock);
2797 if (block_group->cached != BTRFS_CACHE_FINISHED) {
2798 spin_unlock(&block_group->lock);
2799 goto out_put;
2800 }
2801 spin_unlock(&block_group->lock);
2802
2803 num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024);
2804 if (!num_pages)
2805 num_pages = 1;
2806
2807 /*
2808 * Just to make absolutely sure we have enough space, we're going to
2809 * preallocate 12 pages worth of space for each block group. In
2810 * practice we ought to use at most 8, but we need extra space so we can
2811 * add our header and have a terminator between the extents and the
2812 * bitmaps.
2813 */
2814 num_pages *= 16;
2815 num_pages *= PAGE_CACHE_SIZE;
2816
2817 ret = btrfs_check_data_free_space(inode, num_pages);
2818 if (ret)
2819 goto out_put;
2820
2821 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
2822 num_pages, num_pages,
2823 &alloc_hint);
2824 btrfs_free_reserved_data_space(inode, num_pages);
2825out_put:
2826 iput(inode);
2827out_free:
2828 btrfs_release_path(root, path);
2829out:
2830 spin_lock(&block_group->lock);
2831 if (ret)
2832 block_group->disk_cache_state = BTRFS_DC_ERROR;
2833 else
2834 block_group->disk_cache_state = BTRFS_DC_SETUP;
2835 spin_unlock(&block_group->lock);
2836
2837 return ret;
2838}
2839
2690int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 2840int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2691 struct btrfs_root *root) 2841 struct btrfs_root *root)
2692{ 2842{
@@ -2699,6 +2849,25 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2699 if (!path) 2849 if (!path)
2700 return -ENOMEM; 2850 return -ENOMEM;
2701 2851
2852again:
2853 while (1) {
2854 cache = btrfs_lookup_first_block_group(root->fs_info, last);
2855 while (cache) {
2856 if (cache->disk_cache_state == BTRFS_DC_CLEAR)
2857 break;
2858 cache = next_block_group(root, cache);
2859 }
2860 if (!cache) {
2861 if (last == 0)
2862 break;
2863 last = 0;
2864 continue;
2865 }
2866 err = cache_save_setup(cache, trans, path);
2867 last = cache->key.objectid + cache->key.offset;
2868 btrfs_put_block_group(cache);
2869 }
2870
2702 while (1) { 2871 while (1) {
2703 if (last == 0) { 2872 if (last == 0) {
2704 err = btrfs_run_delayed_refs(trans, root, 2873 err = btrfs_run_delayed_refs(trans, root,
@@ -2708,6 +2877,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2708 2877
2709 cache = btrfs_lookup_first_block_group(root->fs_info, last); 2878 cache = btrfs_lookup_first_block_group(root->fs_info, last);
2710 while (cache) { 2879 while (cache) {
2880 if (cache->disk_cache_state == BTRFS_DC_CLEAR) {
2881 btrfs_put_block_group(cache);
2882 goto again;
2883 }
2884
2711 if (cache->dirty) 2885 if (cache->dirty)
2712 break; 2886 break;
2713 cache = next_block_group(root, cache); 2887 cache = next_block_group(root, cache);
@@ -2719,6 +2893,8 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2719 continue; 2893 continue;
2720 } 2894 }
2721 2895
2896 if (cache->disk_cache_state == BTRFS_DC_SETUP)
2897 cache->disk_cache_state = BTRFS_DC_NEED_WRITE;
2722 cache->dirty = 0; 2898 cache->dirty = 0;
2723 last = cache->key.objectid + cache->key.offset; 2899 last = cache->key.objectid + cache->key.offset;
2724 2900
@@ -2727,6 +2903,52 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2727 btrfs_put_block_group(cache); 2903 btrfs_put_block_group(cache);
2728 } 2904 }
2729 2905
2906 while (1) {
2907 /*
2908 * I don't think this is needed since we're just marking our
2909 * preallocated extent as written, but just in case it can't
2910 * hurt.
2911 */
2912 if (last == 0) {
2913 err = btrfs_run_delayed_refs(trans, root,
2914 (unsigned long)-1);
2915 BUG_ON(err);
2916 }
2917
2918 cache = btrfs_lookup_first_block_group(root->fs_info, last);
2919 while (cache) {
2920 /*
2921 * Really this shouldn't happen, but it could if we
2922 * couldn't write the entire preallocated extent and
2923 * splitting the extent resulted in a new block.
2924 */
2925 if (cache->dirty) {
2926 btrfs_put_block_group(cache);
2927 goto again;
2928 }
2929 if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
2930 break;
2931 cache = next_block_group(root, cache);
2932 }
2933 if (!cache) {
2934 if (last == 0)
2935 break;
2936 last = 0;
2937 continue;
2938 }
2939
2940 btrfs_write_out_cache(root, trans, cache, path);
2941
2942 /*
2943 * If we didn't have an error then the cache state is still
2944 * NEED_WRITE, so we can set it to WRITTEN.
2945 */
2946 if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
2947 cache->disk_cache_state = BTRFS_DC_WRITTEN;
2948 last = cache->key.objectid + cache->key.offset;
2949 btrfs_put_block_group(cache);
2950 }
2951
2730 btrfs_free_path(path); 2952 btrfs_free_path(path);
2731 return 0; 2953 return 0;
2732} 2954}
@@ -2762,6 +2984,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
2762 if (found) { 2984 if (found) {
2763 spin_lock(&found->lock); 2985 spin_lock(&found->lock);
2764 found->total_bytes += total_bytes; 2986 found->total_bytes += total_bytes;
2987 found->disk_total += total_bytes * factor;
2765 found->bytes_used += bytes_used; 2988 found->bytes_used += bytes_used;
2766 found->disk_used += bytes_used * factor; 2989 found->disk_used += bytes_used * factor;
2767 found->full = 0; 2990 found->full = 0;
@@ -2781,6 +3004,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
2781 BTRFS_BLOCK_GROUP_SYSTEM | 3004 BTRFS_BLOCK_GROUP_SYSTEM |
2782 BTRFS_BLOCK_GROUP_METADATA); 3005 BTRFS_BLOCK_GROUP_METADATA);
2783 found->total_bytes = total_bytes; 3006 found->total_bytes = total_bytes;
3007 found->disk_total = total_bytes * factor;
2784 found->bytes_used = bytes_used; 3008 found->bytes_used = bytes_used;
2785 found->disk_used = bytes_used * factor; 3009 found->disk_used = bytes_used * factor;
2786 found->bytes_pinned = 0; 3010 found->bytes_pinned = 0;
@@ -2882,11 +3106,16 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
2882 struct btrfs_space_info *data_sinfo; 3106 struct btrfs_space_info *data_sinfo;
2883 struct btrfs_root *root = BTRFS_I(inode)->root; 3107 struct btrfs_root *root = BTRFS_I(inode)->root;
2884 u64 used; 3108 u64 used;
2885 int ret = 0, committed = 0; 3109 int ret = 0, committed = 0, alloc_chunk = 1;
2886 3110
2887 /* make sure bytes are sectorsize aligned */ 3111 /* make sure bytes are sectorsize aligned */
2888 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3112 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
2889 3113
3114 if (root == root->fs_info->tree_root) {
3115 alloc_chunk = 0;
3116 committed = 1;
3117 }
3118
2890 data_sinfo = BTRFS_I(inode)->space_info; 3119 data_sinfo = BTRFS_I(inode)->space_info;
2891 if (!data_sinfo) 3120 if (!data_sinfo)
2892 goto alloc; 3121 goto alloc;
@@ -2905,7 +3134,7 @@ again:
2905 * if we don't have enough free bytes in this space then we need 3134 * if we don't have enough free bytes in this space then we need
2906 * to alloc a new chunk. 3135 * to alloc a new chunk.
2907 */ 3136 */
2908 if (!data_sinfo->full) { 3137 if (!data_sinfo->full && alloc_chunk) {
2909 u64 alloc_target; 3138 u64 alloc_target;
2910 3139
2911 data_sinfo->force_alloc = 1; 3140 data_sinfo->force_alloc = 1;
@@ -2997,10 +3226,11 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
2997 rcu_read_unlock(); 3226 rcu_read_unlock();
2998} 3227}
2999 3228
3000static int should_alloc_chunk(struct btrfs_space_info *sinfo, 3229static int should_alloc_chunk(struct btrfs_root *root,
3001 u64 alloc_bytes) 3230 struct btrfs_space_info *sinfo, u64 alloc_bytes)
3002{ 3231{
3003 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; 3232 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
3233 u64 thresh;
3004 3234
3005 if (sinfo->bytes_used + sinfo->bytes_reserved + 3235 if (sinfo->bytes_used + sinfo->bytes_reserved +
3006 alloc_bytes + 256 * 1024 * 1024 < num_bytes) 3236 alloc_bytes + 256 * 1024 * 1024 < num_bytes)
@@ -3010,6 +3240,12 @@ static int should_alloc_chunk(struct btrfs_space_info *sinfo,
3010 alloc_bytes < div_factor(num_bytes, 8)) 3240 alloc_bytes < div_factor(num_bytes, 8))
3011 return 0; 3241 return 0;
3012 3242
3243 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
3244 thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
3245
3246 if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
3247 return 0;
3248
3013 return 1; 3249 return 1;
3014} 3250}
3015 3251
@@ -3041,13 +3277,21 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3041 goto out; 3277 goto out;
3042 } 3278 }
3043 3279
3044 if (!force && !should_alloc_chunk(space_info, alloc_bytes)) { 3280 if (!force && !should_alloc_chunk(extent_root, space_info,
3281 alloc_bytes)) {
3045 spin_unlock(&space_info->lock); 3282 spin_unlock(&space_info->lock);
3046 goto out; 3283 goto out;
3047 } 3284 }
3048 spin_unlock(&space_info->lock); 3285 spin_unlock(&space_info->lock);
3049 3286
3050 /* 3287 /*
3288 * If we have mixed data/metadata chunks we want to make sure we keep
3289 * allocating mixed chunks instead of individual chunks.
3290 */
3291 if (btrfs_mixed_space_info(space_info))
3292 flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
3293
3294 /*
3051 * if we're doing a data chunk, go ahead and make sure that 3295 * if we're doing a data chunk, go ahead and make sure that
3052 * we keep a reasonable number of metadata chunks allocated in the 3296 * we keep a reasonable number of metadata chunks allocated in the
3053 * FS as well. 3297 * FS as well.
@@ -3072,55 +3316,25 @@ out:
3072 return ret; 3316 return ret;
3073} 3317}
3074 3318
3075static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
3076 struct btrfs_root *root,
3077 struct btrfs_space_info *sinfo, u64 num_bytes)
3078{
3079 int ret;
3080 int end_trans = 0;
3081
3082 if (sinfo->full)
3083 return 0;
3084
3085 spin_lock(&sinfo->lock);
3086 ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024);
3087 spin_unlock(&sinfo->lock);
3088 if (!ret)
3089 return 0;
3090
3091 if (!trans) {
3092 trans = btrfs_join_transaction(root, 1);
3093 BUG_ON(IS_ERR(trans));
3094 end_trans = 1;
3095 }
3096
3097 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3098 num_bytes + 2 * 1024 * 1024,
3099 get_alloc_profile(root, sinfo->flags), 0);
3100
3101 if (end_trans)
3102 btrfs_end_transaction(trans, root);
3103
3104 return ret == 1 ? 1 : 0;
3105}
3106
3107/* 3319/*
3108 * shrink metadata reservation for delalloc 3320 * shrink metadata reservation for delalloc
3109 */ 3321 */
3110static int shrink_delalloc(struct btrfs_trans_handle *trans, 3322static int shrink_delalloc(struct btrfs_trans_handle *trans,
3111 struct btrfs_root *root, u64 to_reclaim) 3323 struct btrfs_root *root, u64 to_reclaim, int sync)
3112{ 3324{
3113 struct btrfs_block_rsv *block_rsv; 3325 struct btrfs_block_rsv *block_rsv;
3326 struct btrfs_space_info *space_info;
3114 u64 reserved; 3327 u64 reserved;
3115 u64 max_reclaim; 3328 u64 max_reclaim;
3116 u64 reclaimed = 0; 3329 u64 reclaimed = 0;
3117 int pause = 1; 3330 int pause = 1;
3118 int ret; 3331 int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
3119 3332
3120 block_rsv = &root->fs_info->delalloc_block_rsv; 3333 block_rsv = &root->fs_info->delalloc_block_rsv;
3121 spin_lock(&block_rsv->lock); 3334 space_info = block_rsv->space_info;
3122 reserved = block_rsv->reserved; 3335
3123 spin_unlock(&block_rsv->lock); 3336 smp_mb();
3337 reserved = space_info->bytes_reserved;
3124 3338
3125 if (reserved == 0) 3339 if (reserved == 0)
3126 return 0; 3340 return 0;
@@ -3128,104 +3342,169 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3128 max_reclaim = min(reserved, to_reclaim); 3342 max_reclaim = min(reserved, to_reclaim);
3129 3343
3130 while (1) { 3344 while (1) {
3131 ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0); 3345 /* have the flusher threads jump in and do some IO */
3132 if (!ret) { 3346 smp_mb();
3133 __set_current_state(TASK_INTERRUPTIBLE); 3347 nr_pages = min_t(unsigned long, nr_pages,
3134 schedule_timeout(pause); 3348 root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
3135 pause <<= 1; 3349 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
3136 if (pause > HZ / 10)
3137 pause = HZ / 10;
3138 } else {
3139 pause = 1;
3140 }
3141 3350
3142 spin_lock(&block_rsv->lock); 3351 spin_lock(&space_info->lock);
3143 if (reserved > block_rsv->reserved) 3352 if (reserved > space_info->bytes_reserved)
3144 reclaimed = reserved - block_rsv->reserved; 3353 reclaimed += reserved - space_info->bytes_reserved;
3145 reserved = block_rsv->reserved; 3354 reserved = space_info->bytes_reserved;
3146 spin_unlock(&block_rsv->lock); 3355 spin_unlock(&space_info->lock);
3147 3356
3148 if (reserved == 0 || reclaimed >= max_reclaim) 3357 if (reserved == 0 || reclaimed >= max_reclaim)
3149 break; 3358 break;
3150 3359
3151 if (trans && trans->transaction->blocked) 3360 if (trans && trans->transaction->blocked)
3152 return -EAGAIN; 3361 return -EAGAIN;
3362
3363 __set_current_state(TASK_INTERRUPTIBLE);
3364 schedule_timeout(pause);
3365 pause <<= 1;
3366 if (pause > HZ / 10)
3367 pause = HZ / 10;
3368
3153 } 3369 }
3154 return reclaimed >= to_reclaim; 3370 return reclaimed >= to_reclaim;
3155} 3371}
3156 3372
3157static int should_retry_reserve(struct btrfs_trans_handle *trans, 3373/*
3158 struct btrfs_root *root, 3374 * Retries tells us how many times we've called reserve_metadata_bytes. The
3159 struct btrfs_block_rsv *block_rsv, 3375 * idea is if this is the first call (retries == 0) then we will add to our
3160 u64 num_bytes, int *retries) 3376 * reserved count if we can't make the allocation in order to hold our place
3377 * while we go and try and free up space. That way for retries > 1 we don't try
3378 * and add space, we just check to see if the amount of unused space is >= the
3379 * total space, meaning that our reservation is valid.
3380 *
3381 * However if we don't intend to retry this reservation, pass -1 as retries so
3382 * that it short circuits this logic.
3383 */
3384static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
3385 struct btrfs_root *root,
3386 struct btrfs_block_rsv *block_rsv,
3387 u64 orig_bytes, int flush)
3161{ 3388{
3162 struct btrfs_space_info *space_info = block_rsv->space_info; 3389 struct btrfs_space_info *space_info = block_rsv->space_info;
3163 int ret; 3390 u64 unused;
3391 u64 num_bytes = orig_bytes;
3392 int retries = 0;
3393 int ret = 0;
3394 bool reserved = false;
3395 bool committed = false;
3164 3396
3165 if ((*retries) > 2) 3397again:
3166 return -ENOSPC; 3398 ret = -ENOSPC;
3399 if (reserved)
3400 num_bytes = 0;
3167 3401
3168 ret = maybe_allocate_chunk(trans, root, space_info, num_bytes); 3402 spin_lock(&space_info->lock);
3169 if (ret) 3403 unused = space_info->bytes_used + space_info->bytes_reserved +
3170 return 1; 3404 space_info->bytes_pinned + space_info->bytes_readonly +
3405 space_info->bytes_may_use;
3171 3406
3172 if (trans && trans->transaction->in_commit) 3407 /*
3173 return -ENOSPC; 3408 * The idea here is that we've not already over-reserved the block group
3409 * then we can go ahead and save our reservation first and then start
3410 * flushing if we need to. Otherwise if we've already overcommitted
3411 * lets start flushing stuff first and then come back and try to make
3412 * our reservation.
3413 */
3414 if (unused <= space_info->total_bytes) {
3415 unused -= space_info->total_bytes;
3416 if (unused >= num_bytes) {
3417 if (!reserved)
3418 space_info->bytes_reserved += orig_bytes;
3419 ret = 0;
3420 } else {
3421 /*
3422 * Ok set num_bytes to orig_bytes since we aren't
3423 * overocmmitted, this way we only try and reclaim what
3424 * we need.
3425 */
3426 num_bytes = orig_bytes;
3427 }
3428 } else {
3429 /*
3430 * Ok we're over committed, set num_bytes to the overcommitted
3431 * amount plus the amount of bytes that we need for this
3432 * reservation.
3433 */
3434 num_bytes = unused - space_info->total_bytes +
3435 (orig_bytes * (retries + 1));
3436 }
3174 3437
3175 ret = shrink_delalloc(trans, root, num_bytes); 3438 /*
3176 if (ret) 3439 * Couldn't make our reservation, save our place so while we're trying
3177 return ret; 3440 * to reclaim space we can actually use it instead of somebody else
3441 * stealing it from us.
3442 */
3443 if (ret && !reserved) {
3444 space_info->bytes_reserved += orig_bytes;
3445 reserved = true;
3446 }
3178 3447
3179 spin_lock(&space_info->lock);
3180 if (space_info->bytes_pinned < num_bytes)
3181 ret = 1;
3182 spin_unlock(&space_info->lock); 3448 spin_unlock(&space_info->lock);
3183 if (ret)
3184 return -ENOSPC;
3185
3186 (*retries)++;
3187 3449
3188 if (trans) 3450 if (!ret)
3189 return -EAGAIN; 3451 return 0;
3190 3452
3191 trans = btrfs_join_transaction(root, 1); 3453 if (!flush)
3192 BUG_ON(IS_ERR(trans)); 3454 goto out;
3193 ret = btrfs_commit_transaction(trans, root);
3194 BUG_ON(ret);
3195 3455
3196 return 1; 3456 /*
3197} 3457 * We do synchronous shrinking since we don't actually unreserve
3458 * metadata until after the IO is completed.
3459 */
3460 ret = shrink_delalloc(trans, root, num_bytes, 1);
3461 if (ret > 0)
3462 return 0;
3463 else if (ret < 0)
3464 goto out;
3198 3465
3199static int reserve_metadata_bytes(struct btrfs_block_rsv *block_rsv, 3466 /*
3200 u64 num_bytes) 3467 * So if we were overcommitted it's possible that somebody else flushed
3201{ 3468 * out enough space and we simply didn't have enough space to reclaim,
3202 struct btrfs_space_info *space_info = block_rsv->space_info; 3469 * so go back around and try again.
3203 u64 unused; 3470 */
3204 int ret = -ENOSPC; 3471 if (retries < 2) {
3472 retries++;
3473 goto again;
3474 }
3205 3475
3206 spin_lock(&space_info->lock); 3476 spin_lock(&space_info->lock);
3207 unused = space_info->bytes_used + space_info->bytes_reserved + 3477 /*
3208 space_info->bytes_pinned + space_info->bytes_readonly; 3478 * Not enough space to be reclaimed, don't bother committing the
3479 * transaction.
3480 */
3481 if (space_info->bytes_pinned < orig_bytes)
3482 ret = -ENOSPC;
3483 spin_unlock(&space_info->lock);
3484 if (ret)
3485 goto out;
3209 3486
3210 if (unused < space_info->total_bytes) 3487 ret = -EAGAIN;
3211 unused = space_info->total_bytes - unused; 3488 if (trans || committed)
3212 else 3489 goto out;
3213 unused = 0;
3214 3490
3215 if (unused >= num_bytes) { 3491 ret = -ENOSPC;
3216 if (block_rsv->priority >= 10) { 3492 trans = btrfs_join_transaction(root, 1);
3217 space_info->bytes_reserved += num_bytes; 3493 if (IS_ERR(trans))
3218 ret = 0; 3494 goto out;
3219 } else { 3495 ret = btrfs_commit_transaction(trans, root);
3220 if ((unused + block_rsv->reserved) * 3496 if (!ret) {
3221 block_rsv->priority >= 3497 trans = NULL;
3222 (num_bytes + block_rsv->reserved) * 10) { 3498 committed = true;
3223 space_info->bytes_reserved += num_bytes; 3499 goto again;
3224 ret = 0; 3500 }
3225 } 3501
3226 } 3502out:
3503 if (reserved) {
3504 spin_lock(&space_info->lock);
3505 space_info->bytes_reserved -= orig_bytes;
3506 spin_unlock(&space_info->lock);
3227 } 3507 }
3228 spin_unlock(&space_info->lock);
3229 3508
3230 return ret; 3509 return ret;
3231} 3510}
@@ -3327,18 +3606,14 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
3327{ 3606{
3328 struct btrfs_block_rsv *block_rsv; 3607 struct btrfs_block_rsv *block_rsv;
3329 struct btrfs_fs_info *fs_info = root->fs_info; 3608 struct btrfs_fs_info *fs_info = root->fs_info;
3330 u64 alloc_target;
3331 3609
3332 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS); 3610 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
3333 if (!block_rsv) 3611 if (!block_rsv)
3334 return NULL; 3612 return NULL;
3335 3613
3336 btrfs_init_block_rsv(block_rsv); 3614 btrfs_init_block_rsv(block_rsv);
3337
3338 alloc_target = btrfs_get_alloc_profile(root, 0);
3339 block_rsv->space_info = __find_space_info(fs_info, 3615 block_rsv->space_info = __find_space_info(fs_info,
3340 BTRFS_BLOCK_GROUP_METADATA); 3616 BTRFS_BLOCK_GROUP_METADATA);
3341
3342 return block_rsv; 3617 return block_rsv;
3343} 3618}
3344 3619
@@ -3369,23 +3644,19 @@ void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
3369int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, 3644int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
3370 struct btrfs_root *root, 3645 struct btrfs_root *root,
3371 struct btrfs_block_rsv *block_rsv, 3646 struct btrfs_block_rsv *block_rsv,
3372 u64 num_bytes, int *retries) 3647 u64 num_bytes)
3373{ 3648{
3374 int ret; 3649 int ret;
3375 3650
3376 if (num_bytes == 0) 3651 if (num_bytes == 0)
3377 return 0; 3652 return 0;
3378again: 3653
3379 ret = reserve_metadata_bytes(block_rsv, num_bytes); 3654 ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1);
3380 if (!ret) { 3655 if (!ret) {
3381 block_rsv_add_bytes(block_rsv, num_bytes, 1); 3656 block_rsv_add_bytes(block_rsv, num_bytes, 1);
3382 return 0; 3657 return 0;
3383 } 3658 }
3384 3659
3385 ret = should_retry_reserve(trans, root, block_rsv, num_bytes, retries);
3386 if (ret > 0)
3387 goto again;
3388
3389 return ret; 3660 return ret;
3390} 3661}
3391 3662
@@ -3420,7 +3691,8 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
3420 return 0; 3691 return 0;
3421 3692
3422 if (block_rsv->refill_used) { 3693 if (block_rsv->refill_used) {
3423 ret = reserve_metadata_bytes(block_rsv, num_bytes); 3694 ret = reserve_metadata_bytes(trans, root, block_rsv,
3695 num_bytes, 0);
3424 if (!ret) { 3696 if (!ret) {
3425 block_rsv_add_bytes(block_rsv, num_bytes, 0); 3697 block_rsv_add_bytes(block_rsv, num_bytes, 0);
3426 return 0; 3698 return 0;
@@ -3499,6 +3771,8 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
3499 3771
3500 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); 3772 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
3501 spin_lock(&sinfo->lock); 3773 spin_lock(&sinfo->lock);
3774 if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA)
3775 data_used = 0;
3502 meta_used = sinfo->bytes_used; 3776 meta_used = sinfo->bytes_used;
3503 spin_unlock(&sinfo->lock); 3777 spin_unlock(&sinfo->lock);
3504 3778
@@ -3526,7 +3800,8 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
3526 block_rsv->size = num_bytes; 3800 block_rsv->size = num_bytes;
3527 3801
3528 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + 3802 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
3529 sinfo->bytes_reserved + sinfo->bytes_readonly; 3803 sinfo->bytes_reserved + sinfo->bytes_readonly +
3804 sinfo->bytes_may_use;
3530 3805
3531 if (sinfo->total_bytes > num_bytes) { 3806 if (sinfo->total_bytes > num_bytes) {
3532 num_bytes = sinfo->total_bytes - num_bytes; 3807 num_bytes = sinfo->total_bytes - num_bytes;
@@ -3597,7 +3872,7 @@ static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items)
3597 3872
3598int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, 3873int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
3599 struct btrfs_root *root, 3874 struct btrfs_root *root,
3600 int num_items, int *retries) 3875 int num_items)
3601{ 3876{
3602 u64 num_bytes; 3877 u64 num_bytes;
3603 int ret; 3878 int ret;
@@ -3607,7 +3882,7 @@ int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
3607 3882
3608 num_bytes = calc_trans_metadata_size(root, num_items); 3883 num_bytes = calc_trans_metadata_size(root, num_items);
3609 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, 3884 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
3610 num_bytes, retries); 3885 num_bytes);
3611 if (!ret) { 3886 if (!ret) {
3612 trans->bytes_reserved += num_bytes; 3887 trans->bytes_reserved += num_bytes;
3613 trans->block_rsv = &root->fs_info->trans_block_rsv; 3888 trans->block_rsv = &root->fs_info->trans_block_rsv;
@@ -3681,14 +3956,13 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
3681 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; 3956 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
3682 u64 to_reserve; 3957 u64 to_reserve;
3683 int nr_extents; 3958 int nr_extents;
3684 int retries = 0;
3685 int ret; 3959 int ret;
3686 3960
3687 if (btrfs_transaction_in_commit(root->fs_info)) 3961 if (btrfs_transaction_in_commit(root->fs_info))
3688 schedule_timeout(1); 3962 schedule_timeout(1);
3689 3963
3690 num_bytes = ALIGN(num_bytes, root->sectorsize); 3964 num_bytes = ALIGN(num_bytes, root->sectorsize);
3691again: 3965
3692 spin_lock(&BTRFS_I(inode)->accounting_lock); 3966 spin_lock(&BTRFS_I(inode)->accounting_lock);
3693 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; 3967 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
3694 if (nr_extents > BTRFS_I(inode)->reserved_extents) { 3968 if (nr_extents > BTRFS_I(inode)->reserved_extents) {
@@ -3698,18 +3972,14 @@ again:
3698 nr_extents = 0; 3972 nr_extents = 0;
3699 to_reserve = 0; 3973 to_reserve = 0;
3700 } 3974 }
3975 spin_unlock(&BTRFS_I(inode)->accounting_lock);
3701 3976
3702 to_reserve += calc_csum_metadata_size(inode, num_bytes); 3977 to_reserve += calc_csum_metadata_size(inode, num_bytes);
3703 ret = reserve_metadata_bytes(block_rsv, to_reserve); 3978 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
3704 if (ret) { 3979 if (ret)
3705 spin_unlock(&BTRFS_I(inode)->accounting_lock);
3706 ret = should_retry_reserve(NULL, root, block_rsv, to_reserve,
3707 &retries);
3708 if (ret > 0)
3709 goto again;
3710 return ret; 3980 return ret;
3711 }
3712 3981
3982 spin_lock(&BTRFS_I(inode)->accounting_lock);
3713 BTRFS_I(inode)->reserved_extents += nr_extents; 3983 BTRFS_I(inode)->reserved_extents += nr_extents;
3714 atomic_inc(&BTRFS_I(inode)->outstanding_extents); 3984 atomic_inc(&BTRFS_I(inode)->outstanding_extents);
3715 spin_unlock(&BTRFS_I(inode)->accounting_lock); 3985 spin_unlock(&BTRFS_I(inode)->accounting_lock);
@@ -3717,7 +3987,7 @@ again:
3717 block_rsv_add_bytes(block_rsv, to_reserve, 1); 3987 block_rsv_add_bytes(block_rsv, to_reserve, 1);
3718 3988
3719 if (block_rsv->size > 512 * 1024 * 1024) 3989 if (block_rsv->size > 512 * 1024 * 1024)
3720 shrink_delalloc(NULL, root, to_reserve); 3990 shrink_delalloc(NULL, root, to_reserve, 0);
3721 3991
3722 return 0; 3992 return 0;
3723} 3993}
@@ -3776,12 +4046,12 @@ static int update_block_group(struct btrfs_trans_handle *trans,
3776 struct btrfs_root *root, 4046 struct btrfs_root *root,
3777 u64 bytenr, u64 num_bytes, int alloc) 4047 u64 bytenr, u64 num_bytes, int alloc)
3778{ 4048{
3779 struct btrfs_block_group_cache *cache; 4049 struct btrfs_block_group_cache *cache = NULL;
3780 struct btrfs_fs_info *info = root->fs_info; 4050 struct btrfs_fs_info *info = root->fs_info;
3781 int factor;
3782 u64 total = num_bytes; 4051 u64 total = num_bytes;
3783 u64 old_val; 4052 u64 old_val;
3784 u64 byte_in_group; 4053 u64 byte_in_group;
4054 int factor;
3785 4055
3786 /* block accounting for super block */ 4056 /* block accounting for super block */
3787 spin_lock(&info->delalloc_lock); 4057 spin_lock(&info->delalloc_lock);
@@ -3803,11 +4073,25 @@ static int update_block_group(struct btrfs_trans_handle *trans,
3803 factor = 2; 4073 factor = 2;
3804 else 4074 else
3805 factor = 1; 4075 factor = 1;
4076 /*
4077 * If this block group has free space cache written out, we
4078 * need to make sure to load it if we are removing space. This
4079 * is because we need the unpinning stage to actually add the
4080 * space back to the block group, otherwise we will leak space.
4081 */
4082 if (!alloc && cache->cached == BTRFS_CACHE_NO)
4083 cache_block_group(cache, trans, 1);
4084
3806 byte_in_group = bytenr - cache->key.objectid; 4085 byte_in_group = bytenr - cache->key.objectid;
3807 WARN_ON(byte_in_group > cache->key.offset); 4086 WARN_ON(byte_in_group > cache->key.offset);
3808 4087
3809 spin_lock(&cache->space_info->lock); 4088 spin_lock(&cache->space_info->lock);
3810 spin_lock(&cache->lock); 4089 spin_lock(&cache->lock);
4090
4091 if (btrfs_super_cache_generation(&info->super_copy) != 0 &&
4092 cache->disk_cache_state < BTRFS_DC_CLEAR)
4093 cache->disk_cache_state = BTRFS_DC_CLEAR;
4094
3811 cache->dirty = 1; 4095 cache->dirty = 1;
3812 old_val = btrfs_block_group_used(&cache->item); 4096 old_val = btrfs_block_group_used(&cache->item);
3813 num_bytes = min(total, cache->key.offset - byte_in_group); 4097 num_bytes = min(total, cache->key.offset - byte_in_group);
@@ -4554,6 +4838,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4554 bool found_uncached_bg = false; 4838 bool found_uncached_bg = false;
4555 bool failed_cluster_refill = false; 4839 bool failed_cluster_refill = false;
4556 bool failed_alloc = false; 4840 bool failed_alloc = false;
4841 bool use_cluster = true;
4557 u64 ideal_cache_percent = 0; 4842 u64 ideal_cache_percent = 0;
4558 u64 ideal_cache_offset = 0; 4843 u64 ideal_cache_offset = 0;
4559 4844
@@ -4568,16 +4853,24 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4568 return -ENOSPC; 4853 return -ENOSPC;
4569 } 4854 }
4570 4855
4856 /*
4857 * If the space info is for both data and metadata it means we have a
4858 * small filesystem and we can't use the clustering stuff.
4859 */
4860 if (btrfs_mixed_space_info(space_info))
4861 use_cluster = false;
4862
4571 if (orig_root->ref_cows || empty_size) 4863 if (orig_root->ref_cows || empty_size)
4572 allowed_chunk_alloc = 1; 4864 allowed_chunk_alloc = 1;
4573 4865
4574 if (data & BTRFS_BLOCK_GROUP_METADATA) { 4866 if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
4575 last_ptr = &root->fs_info->meta_alloc_cluster; 4867 last_ptr = &root->fs_info->meta_alloc_cluster;
4576 if (!btrfs_test_opt(root, SSD)) 4868 if (!btrfs_test_opt(root, SSD))
4577 empty_cluster = 64 * 1024; 4869 empty_cluster = 64 * 1024;
4578 } 4870 }
4579 4871
4580 if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { 4872 if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
4873 btrfs_test_opt(root, SSD)) {
4581 last_ptr = &root->fs_info->data_alloc_cluster; 4874 last_ptr = &root->fs_info->data_alloc_cluster;
4582 } 4875 }
4583 4876
@@ -4641,6 +4934,10 @@ have_block_group:
4641 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { 4934 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
4642 u64 free_percent; 4935 u64 free_percent;
4643 4936
4937 ret = cache_block_group(block_group, trans, 1);
4938 if (block_group->cached == BTRFS_CACHE_FINISHED)
4939 goto have_block_group;
4940
4644 free_percent = btrfs_block_group_used(&block_group->item); 4941 free_percent = btrfs_block_group_used(&block_group->item);
4645 free_percent *= 100; 4942 free_percent *= 100;
4646 free_percent = div64_u64(free_percent, 4943 free_percent = div64_u64(free_percent,
@@ -4661,7 +4958,7 @@ have_block_group:
4661 if (loop > LOOP_CACHING_NOWAIT || 4958 if (loop > LOOP_CACHING_NOWAIT ||
4662 (loop > LOOP_FIND_IDEAL && 4959 (loop > LOOP_FIND_IDEAL &&
4663 atomic_read(&space_info->caching_threads) < 2)) { 4960 atomic_read(&space_info->caching_threads) < 2)) {
4664 ret = cache_block_group(block_group); 4961 ret = cache_block_group(block_group, trans, 0);
4665 BUG_ON(ret); 4962 BUG_ON(ret);
4666 } 4963 }
4667 found_uncached_bg = true; 4964 found_uncached_bg = true;
@@ -5218,7 +5515,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
5218 u64 num_bytes = ins->offset; 5515 u64 num_bytes = ins->offset;
5219 5516
5220 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 5517 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
5221 cache_block_group(block_group); 5518 cache_block_group(block_group, trans, 0);
5222 caching_ctl = get_caching_control(block_group); 5519 caching_ctl = get_caching_control(block_group);
5223 5520
5224 if (!caching_ctl) { 5521 if (!caching_ctl) {
@@ -5308,7 +5605,8 @@ use_block_rsv(struct btrfs_trans_handle *trans,
5308 block_rsv = get_block_rsv(trans, root); 5605 block_rsv = get_block_rsv(trans, root);
5309 5606
5310 if (block_rsv->size == 0) { 5607 if (block_rsv->size == 0) {
5311 ret = reserve_metadata_bytes(block_rsv, blocksize); 5608 ret = reserve_metadata_bytes(trans, root, block_rsv,
5609 blocksize, 0);
5312 if (ret) 5610 if (ret)
5313 return ERR_PTR(ret); 5611 return ERR_PTR(ret);
5314 return block_rsv; 5612 return block_rsv;
@@ -5318,11 +5616,6 @@ use_block_rsv(struct btrfs_trans_handle *trans,
5318 if (!ret) 5616 if (!ret)
5319 return block_rsv; 5617 return block_rsv;
5320 5618
5321 WARN_ON(1);
5322 printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n",
5323 block_rsv->size, block_rsv->reserved,
5324 block_rsv->freed[0], block_rsv->freed[1]);
5325
5326 return ERR_PTR(-ENOSPC); 5619 return ERR_PTR(-ENOSPC);
5327} 5620}
5328 5621
@@ -5421,7 +5714,6 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
5421 u64 generation; 5714 u64 generation;
5422 u64 refs; 5715 u64 refs;
5423 u64 flags; 5716 u64 flags;
5424 u64 last = 0;
5425 u32 nritems; 5717 u32 nritems;
5426 u32 blocksize; 5718 u32 blocksize;
5427 struct btrfs_key key; 5719 struct btrfs_key key;
@@ -5489,7 +5781,6 @@ reada:
5489 generation); 5781 generation);
5490 if (ret) 5782 if (ret)
5491 break; 5783 break;
5492 last = bytenr + blocksize;
5493 nread++; 5784 nread++;
5494 } 5785 }
5495 wc->reada_slot = slot; 5786 wc->reada_slot = slot;
@@ -7813,6 +8104,40 @@ out:
7813 return ret; 8104 return ret;
7814} 8105}
7815 8106
8107void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
8108{
8109 struct btrfs_block_group_cache *block_group;
8110 u64 last = 0;
8111
8112 while (1) {
8113 struct inode *inode;
8114
8115 block_group = btrfs_lookup_first_block_group(info, last);
8116 while (block_group) {
8117 spin_lock(&block_group->lock);
8118 if (block_group->iref)
8119 break;
8120 spin_unlock(&block_group->lock);
8121 block_group = next_block_group(info->tree_root,
8122 block_group);
8123 }
8124 if (!block_group) {
8125 if (last == 0)
8126 break;
8127 last = 0;
8128 continue;
8129 }
8130
8131 inode = block_group->inode;
8132 block_group->iref = 0;
8133 block_group->inode = NULL;
8134 spin_unlock(&block_group->lock);
8135 iput(inode);
8136 last = block_group->key.objectid + block_group->key.offset;
8137 btrfs_put_block_group(block_group);
8138 }
8139}
8140
7816int btrfs_free_block_groups(struct btrfs_fs_info *info) 8141int btrfs_free_block_groups(struct btrfs_fs_info *info)
7817{ 8142{
7818 struct btrfs_block_group_cache *block_group; 8143 struct btrfs_block_group_cache *block_group;
@@ -7896,6 +8221,8 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7896 struct btrfs_key key; 8221 struct btrfs_key key;
7897 struct btrfs_key found_key; 8222 struct btrfs_key found_key;
7898 struct extent_buffer *leaf; 8223 struct extent_buffer *leaf;
8224 int need_clear = 0;
8225 u64 cache_gen;
7899 8226
7900 root = info->extent_root; 8227 root = info->extent_root;
7901 key.objectid = 0; 8228 key.objectid = 0;
@@ -7905,6 +8232,15 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7905 if (!path) 8232 if (!path)
7906 return -ENOMEM; 8233 return -ENOMEM;
7907 8234
8235 cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy);
8236 if (cache_gen != 0 &&
8237 btrfs_super_generation(&root->fs_info->super_copy) != cache_gen)
8238 need_clear = 1;
8239 if (btrfs_test_opt(root, CLEAR_CACHE))
8240 need_clear = 1;
8241 if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen)
8242 printk(KERN_INFO "btrfs: disk space caching is enabled\n");
8243
7908 while (1) { 8244 while (1) {
7909 ret = find_first_block_group(root, path, &key); 8245 ret = find_first_block_group(root, path, &key);
7910 if (ret > 0) 8246 if (ret > 0)
@@ -7927,6 +8263,9 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7927 INIT_LIST_HEAD(&cache->list); 8263 INIT_LIST_HEAD(&cache->list);
7928 INIT_LIST_HEAD(&cache->cluster_list); 8264 INIT_LIST_HEAD(&cache->cluster_list);
7929 8265
8266 if (need_clear)
8267 cache->disk_cache_state = BTRFS_DC_CLEAR;
8268
7930 /* 8269 /*
7931 * we only want to have 32k of ram per block group for keeping 8270 * we only want to have 32k of ram per block group for keeping
7932 * track of free space, and if we pass 1/2 of that we want to 8271 * track of free space, and if we pass 1/2 of that we want to
@@ -8031,6 +8370,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
8031 cache->key.offset = size; 8370 cache->key.offset = size;
8032 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 8371 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
8033 cache->sectorsize = root->sectorsize; 8372 cache->sectorsize = root->sectorsize;
8373 cache->fs_info = root->fs_info;
8034 8374
8035 /* 8375 /*
8036 * we only want to have 32k of ram per block group for keeping track 8376 * we only want to have 32k of ram per block group for keeping track
@@ -8087,8 +8427,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8087 struct btrfs_path *path; 8427 struct btrfs_path *path;
8088 struct btrfs_block_group_cache *block_group; 8428 struct btrfs_block_group_cache *block_group;
8089 struct btrfs_free_cluster *cluster; 8429 struct btrfs_free_cluster *cluster;
8430 struct btrfs_root *tree_root = root->fs_info->tree_root;
8090 struct btrfs_key key; 8431 struct btrfs_key key;
8432 struct inode *inode;
8091 int ret; 8433 int ret;
8434 int factor;
8092 8435
8093 root = root->fs_info->extent_root; 8436 root = root->fs_info->extent_root;
8094 8437
@@ -8097,6 +8440,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8097 BUG_ON(!block_group->ro); 8440 BUG_ON(!block_group->ro);
8098 8441
8099 memcpy(&key, &block_group->key, sizeof(key)); 8442 memcpy(&key, &block_group->key, sizeof(key));
8443 if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
8444 BTRFS_BLOCK_GROUP_RAID1 |
8445 BTRFS_BLOCK_GROUP_RAID10))
8446 factor = 2;
8447 else
8448 factor = 1;
8100 8449
8101 /* make sure this block group isn't part of an allocation cluster */ 8450 /* make sure this block group isn't part of an allocation cluster */
8102 cluster = &root->fs_info->data_alloc_cluster; 8451 cluster = &root->fs_info->data_alloc_cluster;
@@ -8116,6 +8465,40 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8116 path = btrfs_alloc_path(); 8465 path = btrfs_alloc_path();
8117 BUG_ON(!path); 8466 BUG_ON(!path);
8118 8467
8468 inode = lookup_free_space_inode(root, block_group, path);
8469 if (!IS_ERR(inode)) {
8470 btrfs_orphan_add(trans, inode);
8471 clear_nlink(inode);
8472 /* One for the block groups ref */
8473 spin_lock(&block_group->lock);
8474 if (block_group->iref) {
8475 block_group->iref = 0;
8476 block_group->inode = NULL;
8477 spin_unlock(&block_group->lock);
8478 iput(inode);
8479 } else {
8480 spin_unlock(&block_group->lock);
8481 }
8482 /* One for our lookup ref */
8483 iput(inode);
8484 }
8485
8486 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
8487 key.offset = block_group->key.objectid;
8488 key.type = 0;
8489
8490 ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
8491 if (ret < 0)
8492 goto out;
8493 if (ret > 0)
8494 btrfs_release_path(tree_root, path);
8495 if (ret == 0) {
8496 ret = btrfs_del_item(trans, tree_root, path);
8497 if (ret)
8498 goto out;
8499 btrfs_release_path(tree_root, path);
8500 }
8501
8119 spin_lock(&root->fs_info->block_group_cache_lock); 8502 spin_lock(&root->fs_info->block_group_cache_lock);
8120 rb_erase(&block_group->cache_node, 8503 rb_erase(&block_group->cache_node,
8121 &root->fs_info->block_group_cache_tree); 8504 &root->fs_info->block_group_cache_tree);
@@ -8137,8 +8520,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8137 spin_lock(&block_group->space_info->lock); 8520 spin_lock(&block_group->space_info->lock);
8138 block_group->space_info->total_bytes -= block_group->key.offset; 8521 block_group->space_info->total_bytes -= block_group->key.offset;
8139 block_group->space_info->bytes_readonly -= block_group->key.offset; 8522 block_group->space_info->bytes_readonly -= block_group->key.offset;
8523 block_group->space_info->disk_total -= block_group->key.offset * factor;
8140 spin_unlock(&block_group->space_info->lock); 8524 spin_unlock(&block_group->space_info->lock);
8141 8525
8526 memcpy(&key, &block_group->key, sizeof(key));
8527
8142 btrfs_clear_space_info_full(root->fs_info); 8528 btrfs_clear_space_info_full(root->fs_info);
8143 8529
8144 btrfs_put_block_group(block_group); 8530 btrfs_put_block_group(block_group);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d74e6af9b53a..eac10e3260a9 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -104,7 +104,7 @@ void extent_io_tree_init(struct extent_io_tree *tree,
104 struct address_space *mapping, gfp_t mask) 104 struct address_space *mapping, gfp_t mask)
105{ 105{
106 tree->state = RB_ROOT; 106 tree->state = RB_ROOT;
107 tree->buffer = RB_ROOT; 107 INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC);
108 tree->ops = NULL; 108 tree->ops = NULL;
109 tree->dirty_bytes = 0; 109 tree->dirty_bytes = 0;
110 spin_lock_init(&tree->lock); 110 spin_lock_init(&tree->lock);
@@ -235,50 +235,6 @@ static inline struct rb_node *tree_search(struct extent_io_tree *tree,
235 return ret; 235 return ret;
236} 236}
237 237
238static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree,
239 u64 offset, struct rb_node *node)
240{
241 struct rb_root *root = &tree->buffer;
242 struct rb_node **p = &root->rb_node;
243 struct rb_node *parent = NULL;
244 struct extent_buffer *eb;
245
246 while (*p) {
247 parent = *p;
248 eb = rb_entry(parent, struct extent_buffer, rb_node);
249
250 if (offset < eb->start)
251 p = &(*p)->rb_left;
252 else if (offset > eb->start)
253 p = &(*p)->rb_right;
254 else
255 return eb;
256 }
257
258 rb_link_node(node, parent, p);
259 rb_insert_color(node, root);
260 return NULL;
261}
262
263static struct extent_buffer *buffer_search(struct extent_io_tree *tree,
264 u64 offset)
265{
266 struct rb_root *root = &tree->buffer;
267 struct rb_node *n = root->rb_node;
268 struct extent_buffer *eb;
269
270 while (n) {
271 eb = rb_entry(n, struct extent_buffer, rb_node);
272 if (offset < eb->start)
273 n = n->rb_left;
274 else if (offset > eb->start)
275 n = n->rb_right;
276 else
277 return eb;
278 }
279 return NULL;
280}
281
282static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, 238static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
283 struct extent_state *other) 239 struct extent_state *other)
284{ 240{
@@ -1901,10 +1857,8 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
1901 struct page *page = bvec->bv_page; 1857 struct page *page = bvec->bv_page;
1902 struct extent_io_tree *tree = bio->bi_private; 1858 struct extent_io_tree *tree = bio->bi_private;
1903 u64 start; 1859 u64 start;
1904 u64 end;
1905 1860
1906 start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; 1861 start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
1907 end = start + bvec->bv_len - 1;
1908 1862
1909 bio->bi_private = NULL; 1863 bio->bi_private = NULL;
1910 1864
@@ -2204,7 +2158,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2204 u64 last_byte = i_size_read(inode); 2158 u64 last_byte = i_size_read(inode);
2205 u64 block_start; 2159 u64 block_start;
2206 u64 iosize; 2160 u64 iosize;
2207 u64 unlock_start;
2208 sector_t sector; 2161 sector_t sector;
2209 struct extent_state *cached_state = NULL; 2162 struct extent_state *cached_state = NULL;
2210 struct extent_map *em; 2163 struct extent_map *em;
@@ -2329,7 +2282,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2329 if (tree->ops && tree->ops->writepage_end_io_hook) 2282 if (tree->ops && tree->ops->writepage_end_io_hook)
2330 tree->ops->writepage_end_io_hook(page, start, 2283 tree->ops->writepage_end_io_hook(page, start,
2331 page_end, NULL, 1); 2284 page_end, NULL, 1);
2332 unlock_start = page_end + 1;
2333 goto done; 2285 goto done;
2334 } 2286 }
2335 2287
@@ -2340,7 +2292,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2340 if (tree->ops && tree->ops->writepage_end_io_hook) 2292 if (tree->ops && tree->ops->writepage_end_io_hook)
2341 tree->ops->writepage_end_io_hook(page, cur, 2293 tree->ops->writepage_end_io_hook(page, cur,
2342 page_end, NULL, 1); 2294 page_end, NULL, 1);
2343 unlock_start = page_end + 1;
2344 break; 2295 break;
2345 } 2296 }
2346 em = epd->get_extent(inode, page, pg_offset, cur, 2297 em = epd->get_extent(inode, page, pg_offset, cur,
@@ -2387,7 +2338,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2387 2338
2388 cur += iosize; 2339 cur += iosize;
2389 pg_offset += iosize; 2340 pg_offset += iosize;
2390 unlock_start = cur;
2391 continue; 2341 continue;
2392 } 2342 }
2393 /* leave this out until we have a page_mkwrite call */ 2343 /* leave this out until we have a page_mkwrite call */
@@ -2473,7 +2423,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
2473 pgoff_t index; 2423 pgoff_t index;
2474 pgoff_t end; /* Inclusive */ 2424 pgoff_t end; /* Inclusive */
2475 int scanned = 0; 2425 int scanned = 0;
2476 int range_whole = 0;
2477 2426
2478 pagevec_init(&pvec, 0); 2427 pagevec_init(&pvec, 0);
2479 if (wbc->range_cyclic) { 2428 if (wbc->range_cyclic) {
@@ -2482,8 +2431,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
2482 } else { 2431 } else {
2483 index = wbc->range_start >> PAGE_CACHE_SHIFT; 2432 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2484 end = wbc->range_end >> PAGE_CACHE_SHIFT; 2433 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2485 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2486 range_whole = 1;
2487 scanned = 1; 2434 scanned = 1;
2488 } 2435 }
2489retry: 2436retry:
@@ -2823,6 +2770,8 @@ int extent_prepare_write(struct extent_io_tree *tree,
2823 NULL, 1, 2770 NULL, 1,
2824 end_bio_extent_preparewrite, 0, 2771 end_bio_extent_preparewrite, 0,
2825 0, 0); 2772 0, 0);
2773 if (ret && !err)
2774 err = ret;
2826 iocount++; 2775 iocount++;
2827 block_start = block_start + iosize; 2776 block_start = block_start + iosize;
2828 } else { 2777 } else {
@@ -3104,6 +3053,39 @@ static void __free_extent_buffer(struct extent_buffer *eb)
3104 kmem_cache_free(extent_buffer_cache, eb); 3053 kmem_cache_free(extent_buffer_cache, eb);
3105} 3054}
3106 3055
3056/*
3057 * Helper for releasing extent buffer page.
3058 */
3059static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
3060 unsigned long start_idx)
3061{
3062 unsigned long index;
3063 struct page *page;
3064
3065 if (!eb->first_page)
3066 return;
3067
3068 index = num_extent_pages(eb->start, eb->len);
3069 if (start_idx >= index)
3070 return;
3071
3072 do {
3073 index--;
3074 page = extent_buffer_page(eb, index);
3075 if (page)
3076 page_cache_release(page);
3077 } while (index != start_idx);
3078}
3079
3080/*
3081 * Helper for releasing the extent buffer.
3082 */
3083static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
3084{
3085 btrfs_release_extent_buffer_page(eb, 0);
3086 __free_extent_buffer(eb);
3087}
3088
3107struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, 3089struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3108 u64 start, unsigned long len, 3090 u64 start, unsigned long len,
3109 struct page *page0, 3091 struct page *page0,
@@ -3117,16 +3099,16 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3117 struct page *p; 3099 struct page *p;
3118 struct address_space *mapping = tree->mapping; 3100 struct address_space *mapping = tree->mapping;
3119 int uptodate = 1; 3101 int uptodate = 1;
3102 int ret;
3120 3103
3121 spin_lock(&tree->buffer_lock); 3104 rcu_read_lock();
3122 eb = buffer_search(tree, start); 3105 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3123 if (eb) { 3106 if (eb && atomic_inc_not_zero(&eb->refs)) {
3124 atomic_inc(&eb->refs); 3107 rcu_read_unlock();
3125 spin_unlock(&tree->buffer_lock);
3126 mark_page_accessed(eb->first_page); 3108 mark_page_accessed(eb->first_page);
3127 return eb; 3109 return eb;
3128 } 3110 }
3129 spin_unlock(&tree->buffer_lock); 3111 rcu_read_unlock();
3130 3112
3131 eb = __alloc_extent_buffer(tree, start, len, mask); 3113 eb = __alloc_extent_buffer(tree, start, len, mask);
3132 if (!eb) 3114 if (!eb)
@@ -3165,26 +3147,31 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3165 if (uptodate) 3147 if (uptodate)
3166 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3148 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
3167 3149
3150 ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
3151 if (ret)
3152 goto free_eb;
3153
3168 spin_lock(&tree->buffer_lock); 3154 spin_lock(&tree->buffer_lock);
3169 exists = buffer_tree_insert(tree, start, &eb->rb_node); 3155 ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb);
3170 if (exists) { 3156 if (ret == -EEXIST) {
3157 exists = radix_tree_lookup(&tree->buffer,
3158 start >> PAGE_CACHE_SHIFT);
3171 /* add one reference for the caller */ 3159 /* add one reference for the caller */
3172 atomic_inc(&exists->refs); 3160 atomic_inc(&exists->refs);
3173 spin_unlock(&tree->buffer_lock); 3161 spin_unlock(&tree->buffer_lock);
3162 radix_tree_preload_end();
3174 goto free_eb; 3163 goto free_eb;
3175 } 3164 }
3176 /* add one reference for the tree */ 3165 /* add one reference for the tree */
3177 atomic_inc(&eb->refs); 3166 atomic_inc(&eb->refs);
3178 spin_unlock(&tree->buffer_lock); 3167 spin_unlock(&tree->buffer_lock);
3168 radix_tree_preload_end();
3179 return eb; 3169 return eb;
3180 3170
3181free_eb: 3171free_eb:
3182 if (!atomic_dec_and_test(&eb->refs)) 3172 if (!atomic_dec_and_test(&eb->refs))
3183 return exists; 3173 return exists;
3184 for (index = 1; index < i; index++) 3174 btrfs_release_extent_buffer(eb);
3185 page_cache_release(extent_buffer_page(eb, index));
3186 page_cache_release(extent_buffer_page(eb, 0));
3187 __free_extent_buffer(eb);
3188 return exists; 3175 return exists;
3189} 3176}
3190 3177
@@ -3194,16 +3181,16 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
3194{ 3181{
3195 struct extent_buffer *eb; 3182 struct extent_buffer *eb;
3196 3183
3197 spin_lock(&tree->buffer_lock); 3184 rcu_read_lock();
3198 eb = buffer_search(tree, start); 3185 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3199 if (eb) 3186 if (eb && atomic_inc_not_zero(&eb->refs)) {
3200 atomic_inc(&eb->refs); 3187 rcu_read_unlock();
3201 spin_unlock(&tree->buffer_lock);
3202
3203 if (eb)
3204 mark_page_accessed(eb->first_page); 3188 mark_page_accessed(eb->first_page);
3189 return eb;
3190 }
3191 rcu_read_unlock();
3205 3192
3206 return eb; 3193 return NULL;
3207} 3194}
3208 3195
3209void free_extent_buffer(struct extent_buffer *eb) 3196void free_extent_buffer(struct extent_buffer *eb)
@@ -3833,34 +3820,45 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
3833 } 3820 }
3834} 3821}
3835 3822
3823static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
3824{
3825 struct extent_buffer *eb =
3826 container_of(head, struct extent_buffer, rcu_head);
3827
3828 btrfs_release_extent_buffer(eb);
3829}
3830
3836int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) 3831int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
3837{ 3832{
3838 u64 start = page_offset(page); 3833 u64 start = page_offset(page);
3839 struct extent_buffer *eb; 3834 struct extent_buffer *eb;
3840 int ret = 1; 3835 int ret = 1;
3841 unsigned long i;
3842 unsigned long num_pages;
3843 3836
3844 spin_lock(&tree->buffer_lock); 3837 spin_lock(&tree->buffer_lock);
3845 eb = buffer_search(tree, start); 3838 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3846 if (!eb) 3839 if (!eb)
3847 goto out; 3840 goto out;
3848 3841
3849 if (atomic_read(&eb->refs) > 1) { 3842 if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
3850 ret = 0; 3843 ret = 0;
3851 goto out; 3844 goto out;
3852 } 3845 }
3853 if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { 3846
3847 /*
3848 * set @eb->refs to 0 if it is already 1, and then release the @eb.
3849 * Or go back.
3850 */
3851 if (atomic_cmpxchg(&eb->refs, 1, 0) != 1) {
3854 ret = 0; 3852 ret = 0;
3855 goto out; 3853 goto out;
3856 } 3854 }
3857 /* at this point we can safely release the extent buffer */ 3855
3858 num_pages = num_extent_pages(eb->start, eb->len); 3856 radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3859 for (i = 0; i < num_pages; i++)
3860 page_cache_release(extent_buffer_page(eb, i));
3861 rb_erase(&eb->rb_node, &tree->buffer);
3862 __free_extent_buffer(eb);
3863out: 3857out:
3864 spin_unlock(&tree->buffer_lock); 3858 spin_unlock(&tree->buffer_lock);
3859
3860 /* at this point we can safely release the extent buffer */
3861 if (atomic_read(&eb->refs) == 0)
3862 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
3865 return ret; 3863 return ret;
3866} 3864}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 5691c7b590da..1c6d4f342ef7 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -85,7 +85,7 @@ struct extent_io_ops {
85 85
86struct extent_io_tree { 86struct extent_io_tree {
87 struct rb_root state; 87 struct rb_root state;
88 struct rb_root buffer; 88 struct radix_tree_root buffer;
89 struct address_space *mapping; 89 struct address_space *mapping;
90 u64 dirty_bytes; 90 u64 dirty_bytes;
91 spinlock_t lock; 91 spinlock_t lock;
@@ -123,7 +123,7 @@ struct extent_buffer {
123 unsigned long bflags; 123 unsigned long bflags;
124 atomic_t refs; 124 atomic_t refs;
125 struct list_head leak_list; 125 struct list_head leak_list;
126 struct rb_node rb_node; 126 struct rcu_head rcu_head;
127 127
128 /* the spinlock is used to protect most operations */ 128 /* the spinlock is used to protect most operations */
129 spinlock_t lock; 129 spinlock_t lock;
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 454ca52d6451..23cb8da3ff66 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -335,7 +335,7 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
335 goto out; 335 goto out;
336 } 336 }
337 if (IS_ERR(rb_node)) { 337 if (IS_ERR(rb_node)) {
338 em = ERR_PTR(PTR_ERR(rb_node)); 338 em = ERR_CAST(rb_node);
339 goto out; 339 goto out;
340 } 340 }
341 em = rb_entry(rb_node, struct extent_map, rb_node); 341 em = rb_entry(rb_node, struct extent_map, rb_node);
@@ -384,7 +384,7 @@ struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
384 goto out; 384 goto out;
385 } 385 }
386 if (IS_ERR(rb_node)) { 386 if (IS_ERR(rb_node)) {
387 em = ERR_PTR(PTR_ERR(rb_node)); 387 em = ERR_CAST(rb_node);
388 goto out; 388 goto out;
389 } 389 }
390 em = rb_entry(rb_node, struct extent_map, rb_node); 390 em = rb_entry(rb_node, struct extent_map, rb_node);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index f488fac04d99..22ee0dc2e6b8 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -23,10 +23,761 @@
23#include "ctree.h" 23#include "ctree.h"
24#include "free-space-cache.h" 24#include "free-space-cache.h"
25#include "transaction.h" 25#include "transaction.h"
26#include "disk-io.h"
26 27
27#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 28#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
28#define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 29#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
29 30
31static void recalculate_thresholds(struct btrfs_block_group_cache
32 *block_group);
33static int link_free_space(struct btrfs_block_group_cache *block_group,
34 struct btrfs_free_space *info);
35
36struct inode *lookup_free_space_inode(struct btrfs_root *root,
37 struct btrfs_block_group_cache
38 *block_group, struct btrfs_path *path)
39{
40 struct btrfs_key key;
41 struct btrfs_key location;
42 struct btrfs_disk_key disk_key;
43 struct btrfs_free_space_header *header;
44 struct extent_buffer *leaf;
45 struct inode *inode = NULL;
46 int ret;
47
48 spin_lock(&block_group->lock);
49 if (block_group->inode)
50 inode = igrab(block_group->inode);
51 spin_unlock(&block_group->lock);
52 if (inode)
53 return inode;
54
55 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
56 key.offset = block_group->key.objectid;
57 key.type = 0;
58
59 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
60 if (ret < 0)
61 return ERR_PTR(ret);
62 if (ret > 0) {
63 btrfs_release_path(root, path);
64 return ERR_PTR(-ENOENT);
65 }
66
67 leaf = path->nodes[0];
68 header = btrfs_item_ptr(leaf, path->slots[0],
69 struct btrfs_free_space_header);
70 btrfs_free_space_key(leaf, header, &disk_key);
71 btrfs_disk_key_to_cpu(&location, &disk_key);
72 btrfs_release_path(root, path);
73
74 inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
75 if (!inode)
76 return ERR_PTR(-ENOENT);
77 if (IS_ERR(inode))
78 return inode;
79 if (is_bad_inode(inode)) {
80 iput(inode);
81 return ERR_PTR(-ENOENT);
82 }
83
84 spin_lock(&block_group->lock);
85 if (!root->fs_info->closing) {
86 block_group->inode = igrab(inode);
87 block_group->iref = 1;
88 }
89 spin_unlock(&block_group->lock);
90
91 return inode;
92}
93
94int create_free_space_inode(struct btrfs_root *root,
95 struct btrfs_trans_handle *trans,
96 struct btrfs_block_group_cache *block_group,
97 struct btrfs_path *path)
98{
99 struct btrfs_key key;
100 struct btrfs_disk_key disk_key;
101 struct btrfs_free_space_header *header;
102 struct btrfs_inode_item *inode_item;
103 struct extent_buffer *leaf;
104 u64 objectid;
105 int ret;
106
107 ret = btrfs_find_free_objectid(trans, root, 0, &objectid);
108 if (ret < 0)
109 return ret;
110
111 ret = btrfs_insert_empty_inode(trans, root, path, objectid);
112 if (ret)
113 return ret;
114
115 leaf = path->nodes[0];
116 inode_item = btrfs_item_ptr(leaf, path->slots[0],
117 struct btrfs_inode_item);
118 btrfs_item_key(leaf, &disk_key, path->slots[0]);
119 memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
120 sizeof(*inode_item));
121 btrfs_set_inode_generation(leaf, inode_item, trans->transid);
122 btrfs_set_inode_size(leaf, inode_item, 0);
123 btrfs_set_inode_nbytes(leaf, inode_item, 0);
124 btrfs_set_inode_uid(leaf, inode_item, 0);
125 btrfs_set_inode_gid(leaf, inode_item, 0);
126 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
127 btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS |
128 BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM);
129 btrfs_set_inode_nlink(leaf, inode_item, 1);
130 btrfs_set_inode_transid(leaf, inode_item, trans->transid);
131 btrfs_set_inode_block_group(leaf, inode_item,
132 block_group->key.objectid);
133 btrfs_mark_buffer_dirty(leaf);
134 btrfs_release_path(root, path);
135
136 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
137 key.offset = block_group->key.objectid;
138 key.type = 0;
139
140 ret = btrfs_insert_empty_item(trans, root, path, &key,
141 sizeof(struct btrfs_free_space_header));
142 if (ret < 0) {
143 btrfs_release_path(root, path);
144 return ret;
145 }
146 leaf = path->nodes[0];
147 header = btrfs_item_ptr(leaf, path->slots[0],
148 struct btrfs_free_space_header);
149 memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
150 btrfs_set_free_space_key(leaf, header, &disk_key);
151 btrfs_mark_buffer_dirty(leaf);
152 btrfs_release_path(root, path);
153
154 return 0;
155}
156
157int btrfs_truncate_free_space_cache(struct btrfs_root *root,
158 struct btrfs_trans_handle *trans,
159 struct btrfs_path *path,
160 struct inode *inode)
161{
162 loff_t oldsize;
163 int ret = 0;
164
165 trans->block_rsv = root->orphan_block_rsv;
166 ret = btrfs_block_rsv_check(trans, root,
167 root->orphan_block_rsv,
168 0, 5);
169 if (ret)
170 return ret;
171
172 oldsize = i_size_read(inode);
173 btrfs_i_size_write(inode, 0);
174 truncate_pagecache(inode, oldsize, 0);
175
176 /*
177 * We don't need an orphan item because truncating the free space cache
178 * will never be split across transactions.
179 */
180 ret = btrfs_truncate_inode_items(trans, root, inode,
181 0, BTRFS_EXTENT_DATA_KEY);
182 if (ret) {
183 WARN_ON(1);
184 return ret;
185 }
186
187 return btrfs_update_inode(trans, root, inode);
188}
189
190static int readahead_cache(struct inode *inode)
191{
192 struct file_ra_state *ra;
193 unsigned long last_index;
194
195 ra = kzalloc(sizeof(*ra), GFP_NOFS);
196 if (!ra)
197 return -ENOMEM;
198
199 file_ra_state_init(ra, inode->i_mapping);
200 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
201
202 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
203
204 kfree(ra);
205
206 return 0;
207}
208
209int load_free_space_cache(struct btrfs_fs_info *fs_info,
210 struct btrfs_block_group_cache *block_group)
211{
212 struct btrfs_root *root = fs_info->tree_root;
213 struct inode *inode;
214 struct btrfs_free_space_header *header;
215 struct extent_buffer *leaf;
216 struct page *page;
217 struct btrfs_path *path;
218 u32 *checksums = NULL, *crc;
219 char *disk_crcs = NULL;
220 struct btrfs_key key;
221 struct list_head bitmaps;
222 u64 num_entries;
223 u64 num_bitmaps;
224 u64 generation;
225 u32 cur_crc = ~(u32)0;
226 pgoff_t index = 0;
227 unsigned long first_page_offset;
228 int num_checksums;
229 int ret = 0;
230
231 /*
232 * If we're unmounting then just return, since this does a search on the
233 * normal root and not the commit root and we could deadlock.
234 */
235 smp_mb();
236 if (fs_info->closing)
237 return 0;
238
239 /*
240 * If this block group has been marked to be cleared for one reason or
241 * another then we can't trust the on disk cache, so just return.
242 */
243 spin_lock(&block_group->lock);
244 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
245 spin_unlock(&block_group->lock);
246 return 0;
247 }
248 spin_unlock(&block_group->lock);
249
250 INIT_LIST_HEAD(&bitmaps);
251
252 path = btrfs_alloc_path();
253 if (!path)
254 return 0;
255
256 inode = lookup_free_space_inode(root, block_group, path);
257 if (IS_ERR(inode)) {
258 btrfs_free_path(path);
259 return 0;
260 }
261
262 /* Nothing in the space cache, goodbye */
263 if (!i_size_read(inode)) {
264 btrfs_free_path(path);
265 goto out;
266 }
267
268 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
269 key.offset = block_group->key.objectid;
270 key.type = 0;
271
272 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
273 if (ret) {
274 btrfs_free_path(path);
275 goto out;
276 }
277
278 leaf = path->nodes[0];
279 header = btrfs_item_ptr(leaf, path->slots[0],
280 struct btrfs_free_space_header);
281 num_entries = btrfs_free_space_entries(leaf, header);
282 num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
283 generation = btrfs_free_space_generation(leaf, header);
284 btrfs_free_path(path);
285
286 if (BTRFS_I(inode)->generation != generation) {
287 printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
288 " not match free space cache generation (%llu) for "
289 "block group %llu\n",
290 (unsigned long long)BTRFS_I(inode)->generation,
291 (unsigned long long)generation,
292 (unsigned long long)block_group->key.objectid);
293 goto out;
294 }
295
296 if (!num_entries)
297 goto out;
298
299 /* Setup everything for doing checksumming */
300 num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
301 checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
302 if (!checksums)
303 goto out;
304 first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
305 disk_crcs = kzalloc(first_page_offset, GFP_NOFS);
306 if (!disk_crcs)
307 goto out;
308
309 ret = readahead_cache(inode);
310 if (ret) {
311 ret = 0;
312 goto out;
313 }
314
315 while (1) {
316 struct btrfs_free_space_entry *entry;
317 struct btrfs_free_space *e;
318 void *addr;
319 unsigned long offset = 0;
320 unsigned long start_offset = 0;
321 int need_loop = 0;
322
323 if (!num_entries && !num_bitmaps)
324 break;
325
326 if (index == 0) {
327 start_offset = first_page_offset;
328 offset = start_offset;
329 }
330
331 page = grab_cache_page(inode->i_mapping, index);
332 if (!page) {
333 ret = 0;
334 goto free_cache;
335 }
336
337 if (!PageUptodate(page)) {
338 btrfs_readpage(NULL, page);
339 lock_page(page);
340 if (!PageUptodate(page)) {
341 unlock_page(page);
342 page_cache_release(page);
343 printk(KERN_ERR "btrfs: error reading free "
344 "space cache: %llu\n",
345 (unsigned long long)
346 block_group->key.objectid);
347 goto free_cache;
348 }
349 }
350 addr = kmap(page);
351
352 if (index == 0) {
353 u64 *gen;
354
355 memcpy(disk_crcs, addr, first_page_offset);
356 gen = addr + (sizeof(u32) * num_checksums);
357 if (*gen != BTRFS_I(inode)->generation) {
358 printk(KERN_ERR "btrfs: space cache generation"
359 " (%llu) does not match inode (%llu) "
360 "for block group %llu\n",
361 (unsigned long long)*gen,
362 (unsigned long long)
363 BTRFS_I(inode)->generation,
364 (unsigned long long)
365 block_group->key.objectid);
366 kunmap(page);
367 unlock_page(page);
368 page_cache_release(page);
369 goto free_cache;
370 }
371 crc = (u32 *)disk_crcs;
372 }
373 entry = addr + start_offset;
374
375 /* First lets check our crc before we do anything fun */
376 cur_crc = ~(u32)0;
377 cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc,
378 PAGE_CACHE_SIZE - start_offset);
379 btrfs_csum_final(cur_crc, (char *)&cur_crc);
380 if (cur_crc != *crc) {
381 printk(KERN_ERR "btrfs: crc mismatch for page %lu in "
382 "block group %llu\n", index,
383 (unsigned long long)block_group->key.objectid);
384 kunmap(page);
385 unlock_page(page);
386 page_cache_release(page);
387 goto free_cache;
388 }
389 crc++;
390
391 while (1) {
392 if (!num_entries)
393 break;
394
395 need_loop = 1;
396 e = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
397 if (!e) {
398 kunmap(page);
399 unlock_page(page);
400 page_cache_release(page);
401 goto free_cache;
402 }
403
404 e->offset = le64_to_cpu(entry->offset);
405 e->bytes = le64_to_cpu(entry->bytes);
406 if (!e->bytes) {
407 kunmap(page);
408 kfree(e);
409 unlock_page(page);
410 page_cache_release(page);
411 goto free_cache;
412 }
413
414 if (entry->type == BTRFS_FREE_SPACE_EXTENT) {
415 spin_lock(&block_group->tree_lock);
416 ret = link_free_space(block_group, e);
417 spin_unlock(&block_group->tree_lock);
418 BUG_ON(ret);
419 } else {
420 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
421 if (!e->bitmap) {
422 kunmap(page);
423 kfree(e);
424 unlock_page(page);
425 page_cache_release(page);
426 goto free_cache;
427 }
428 spin_lock(&block_group->tree_lock);
429 ret = link_free_space(block_group, e);
430 block_group->total_bitmaps++;
431 recalculate_thresholds(block_group);
432 spin_unlock(&block_group->tree_lock);
433 list_add_tail(&e->list, &bitmaps);
434 }
435
436 num_entries--;
437 offset += sizeof(struct btrfs_free_space_entry);
438 if (offset + sizeof(struct btrfs_free_space_entry) >=
439 PAGE_CACHE_SIZE)
440 break;
441 entry++;
442 }
443
444 /*
445 * We read an entry out of this page, we need to move on to the
446 * next page.
447 */
448 if (need_loop) {
449 kunmap(page);
450 goto next;
451 }
452
453 /*
454 * We add the bitmaps at the end of the entries in order that
455 * the bitmap entries are added to the cache.
456 */
457 e = list_entry(bitmaps.next, struct btrfs_free_space, list);
458 list_del_init(&e->list);
459 memcpy(e->bitmap, addr, PAGE_CACHE_SIZE);
460 kunmap(page);
461 num_bitmaps--;
462next:
463 unlock_page(page);
464 page_cache_release(page);
465 index++;
466 }
467
468 ret = 1;
469out:
470 kfree(checksums);
471 kfree(disk_crcs);
472 iput(inode);
473 return ret;
474
475free_cache:
476 /* This cache is bogus, make sure it gets cleared */
477 spin_lock(&block_group->lock);
478 block_group->disk_cache_state = BTRFS_DC_CLEAR;
479 spin_unlock(&block_group->lock);
480 btrfs_remove_free_space_cache(block_group);
481 goto out;
482}
483
484int btrfs_write_out_cache(struct btrfs_root *root,
485 struct btrfs_trans_handle *trans,
486 struct btrfs_block_group_cache *block_group,
487 struct btrfs_path *path)
488{
489 struct btrfs_free_space_header *header;
490 struct extent_buffer *leaf;
491 struct inode *inode;
492 struct rb_node *node;
493 struct list_head *pos, *n;
494 struct page *page;
495 struct extent_state *cached_state = NULL;
496 struct list_head bitmap_list;
497 struct btrfs_key key;
498 u64 bytes = 0;
499 u32 *crc, *checksums;
500 pgoff_t index = 0, last_index = 0;
501 unsigned long first_page_offset;
502 int num_checksums;
503 int entries = 0;
504 int bitmaps = 0;
505 int ret = 0;
506
507 root = root->fs_info->tree_root;
508
509 INIT_LIST_HEAD(&bitmap_list);
510
511 spin_lock(&block_group->lock);
512 if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
513 spin_unlock(&block_group->lock);
514 return 0;
515 }
516 spin_unlock(&block_group->lock);
517
518 inode = lookup_free_space_inode(root, block_group, path);
519 if (IS_ERR(inode))
520 return 0;
521
522 if (!i_size_read(inode)) {
523 iput(inode);
524 return 0;
525 }
526
527 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
528 filemap_write_and_wait(inode->i_mapping);
529 btrfs_wait_ordered_range(inode, inode->i_size &
530 ~(root->sectorsize - 1), (u64)-1);
531
532 /* We need a checksum per page. */
533 num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
534 crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
535 if (!crc) {
536 iput(inode);
537 return 0;
538 }
539
540 /* Since the first page has all of our checksums and our generation we
541 * need to calculate the offset into the page that we can start writing
542 * our entries.
543 */
544 first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
545
546 node = rb_first(&block_group->free_space_offset);
547 if (!node)
548 goto out_free;
549
550 /*
551 * Lock all pages first so we can lock the extent safely.
552 *
553 * NOTE: Because we hold the ref the entire time we're going to write to
554 * the page find_get_page should never fail, so we don't do a check
555 * after find_get_page at this point. Just putting this here so people
556 * know and don't freak out.
557 */
558 while (index <= last_index) {
559 page = grab_cache_page(inode->i_mapping, index);
560 if (!page) {
561 pgoff_t i = 0;
562
563 while (i < index) {
564 page = find_get_page(inode->i_mapping, i);
565 unlock_page(page);
566 page_cache_release(page);
567 page_cache_release(page);
568 i++;
569 }
570 goto out_free;
571 }
572 index++;
573 }
574
575 index = 0;
576 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
577 0, &cached_state, GFP_NOFS);
578
579 /* Write out the extent entries */
580 do {
581 struct btrfs_free_space_entry *entry;
582 void *addr;
583 unsigned long offset = 0;
584 unsigned long start_offset = 0;
585
586 if (index == 0) {
587 start_offset = first_page_offset;
588 offset = start_offset;
589 }
590
591 page = find_get_page(inode->i_mapping, index);
592
593 addr = kmap(page);
594 entry = addr + start_offset;
595
596 memset(addr, 0, PAGE_CACHE_SIZE);
597 while (1) {
598 struct btrfs_free_space *e;
599
600 e = rb_entry(node, struct btrfs_free_space, offset_index);
601 entries++;
602
603 entry->offset = cpu_to_le64(e->offset);
604 entry->bytes = cpu_to_le64(e->bytes);
605 if (e->bitmap) {
606 entry->type = BTRFS_FREE_SPACE_BITMAP;
607 list_add_tail(&e->list, &bitmap_list);
608 bitmaps++;
609 } else {
610 entry->type = BTRFS_FREE_SPACE_EXTENT;
611 }
612 node = rb_next(node);
613 if (!node)
614 break;
615 offset += sizeof(struct btrfs_free_space_entry);
616 if (offset + sizeof(struct btrfs_free_space_entry) >=
617 PAGE_CACHE_SIZE)
618 break;
619 entry++;
620 }
621 *crc = ~(u32)0;
622 *crc = btrfs_csum_data(root, addr + start_offset, *crc,
623 PAGE_CACHE_SIZE - start_offset);
624 kunmap(page);
625
626 btrfs_csum_final(*crc, (char *)crc);
627 crc++;
628
629 bytes += PAGE_CACHE_SIZE;
630
631 ClearPageChecked(page);
632 set_page_extent_mapped(page);
633 SetPageUptodate(page);
634 set_page_dirty(page);
635
636 /*
637 * We need to release our reference we got for grab_cache_page,
638 * except for the first page which will hold our checksums, we
639 * do that below.
640 */
641 if (index != 0) {
642 unlock_page(page);
643 page_cache_release(page);
644 }
645
646 page_cache_release(page);
647
648 index++;
649 } while (node);
650
651 /* Write out the bitmaps */
652 list_for_each_safe(pos, n, &bitmap_list) {
653 void *addr;
654 struct btrfs_free_space *entry =
655 list_entry(pos, struct btrfs_free_space, list);
656
657 page = find_get_page(inode->i_mapping, index);
658
659 addr = kmap(page);
660 memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
661 *crc = ~(u32)0;
662 *crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE);
663 kunmap(page);
664 btrfs_csum_final(*crc, (char *)crc);
665 crc++;
666 bytes += PAGE_CACHE_SIZE;
667
668 ClearPageChecked(page);
669 set_page_extent_mapped(page);
670 SetPageUptodate(page);
671 set_page_dirty(page);
672 unlock_page(page);
673 page_cache_release(page);
674 page_cache_release(page);
675 list_del_init(&entry->list);
676 index++;
677 }
678
679 /* Zero out the rest of the pages just to make sure */
680 while (index <= last_index) {
681 void *addr;
682
683 page = find_get_page(inode->i_mapping, index);
684
685 addr = kmap(page);
686 memset(addr, 0, PAGE_CACHE_SIZE);
687 kunmap(page);
688 ClearPageChecked(page);
689 set_page_extent_mapped(page);
690 SetPageUptodate(page);
691 set_page_dirty(page);
692 unlock_page(page);
693 page_cache_release(page);
694 page_cache_release(page);
695 bytes += PAGE_CACHE_SIZE;
696 index++;
697 }
698
699 btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state);
700
701 /* Write the checksums and trans id to the first page */
702 {
703 void *addr;
704 u64 *gen;
705
706 page = find_get_page(inode->i_mapping, 0);
707
708 addr = kmap(page);
709 memcpy(addr, checksums, sizeof(u32) * num_checksums);
710 gen = addr + (sizeof(u32) * num_checksums);
711 *gen = trans->transid;
712 kunmap(page);
713 ClearPageChecked(page);
714 set_page_extent_mapped(page);
715 SetPageUptodate(page);
716 set_page_dirty(page);
717 unlock_page(page);
718 page_cache_release(page);
719 page_cache_release(page);
720 }
721 BTRFS_I(inode)->generation = trans->transid;
722
723 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
724 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
725
726 filemap_write_and_wait(inode->i_mapping);
727
728 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
729 key.offset = block_group->key.objectid;
730 key.type = 0;
731
732 ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
733 if (ret < 0) {
734 ret = 0;
735 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
736 EXTENT_DIRTY | EXTENT_DELALLOC |
737 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
738 goto out_free;
739 }
740 leaf = path->nodes[0];
741 if (ret > 0) {
742 struct btrfs_key found_key;
743 BUG_ON(!path->slots[0]);
744 path->slots[0]--;
745 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
746 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
747 found_key.offset != block_group->key.objectid) {
748 ret = 0;
749 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
750 EXTENT_DIRTY | EXTENT_DELALLOC |
751 EXTENT_DO_ACCOUNTING, 0, 0, NULL,
752 GFP_NOFS);
753 btrfs_release_path(root, path);
754 goto out_free;
755 }
756 }
757 header = btrfs_item_ptr(leaf, path->slots[0],
758 struct btrfs_free_space_header);
759 btrfs_set_free_space_entries(leaf, header, entries);
760 btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
761 btrfs_set_free_space_generation(leaf, header, trans->transid);
762 btrfs_mark_buffer_dirty(leaf);
763 btrfs_release_path(root, path);
764
765 ret = 1;
766
767out_free:
768 if (ret == 0) {
769 invalidate_inode_pages2_range(inode->i_mapping, 0, index);
770 spin_lock(&block_group->lock);
771 block_group->disk_cache_state = BTRFS_DC_ERROR;
772 spin_unlock(&block_group->lock);
773 BTRFS_I(inode)->generation = 0;
774 }
775 kfree(checksums);
776 btrfs_update_inode(trans, root, inode);
777 iput(inode);
778 return ret;
779}
780
30static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize, 781static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
31 u64 offset) 782 u64 offset)
32{ 783{
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 890a8e79011b..e49ca5c321b5 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -27,6 +27,24 @@ struct btrfs_free_space {
27 struct list_head list; 27 struct list_head list;
28}; 28};
29 29
30struct inode *lookup_free_space_inode(struct btrfs_root *root,
31 struct btrfs_block_group_cache
32 *block_group, struct btrfs_path *path);
33int create_free_space_inode(struct btrfs_root *root,
34 struct btrfs_trans_handle *trans,
35 struct btrfs_block_group_cache *block_group,
36 struct btrfs_path *path);
37
38int btrfs_truncate_free_space_cache(struct btrfs_root *root,
39 struct btrfs_trans_handle *trans,
40 struct btrfs_path *path,
41 struct inode *inode);
42int load_free_space_cache(struct btrfs_fs_info *fs_info,
43 struct btrfs_block_group_cache *block_group);
44int btrfs_write_out_cache(struct btrfs_root *root,
45 struct btrfs_trans_handle *trans,
46 struct btrfs_block_group_cache *block_group,
47 struct btrfs_path *path);
30int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, 48int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
31 u64 bytenr, u64 size); 49 u64 bytenr, u64 size);
32int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 50int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 64f99cf69ce0..558cac2dfa54 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -319,8 +319,6 @@ static noinline int compress_file_range(struct inode *inode,
319 struct btrfs_root *root = BTRFS_I(inode)->root; 319 struct btrfs_root *root = BTRFS_I(inode)->root;
320 struct btrfs_trans_handle *trans; 320 struct btrfs_trans_handle *trans;
321 u64 num_bytes; 321 u64 num_bytes;
322 u64 orig_start;
323 u64 disk_num_bytes;
324 u64 blocksize = root->sectorsize; 322 u64 blocksize = root->sectorsize;
325 u64 actual_end; 323 u64 actual_end;
326 u64 isize = i_size_read(inode); 324 u64 isize = i_size_read(inode);
@@ -335,8 +333,6 @@ static noinline int compress_file_range(struct inode *inode,
335 int i; 333 int i;
336 int will_compress; 334 int will_compress;
337 335
338 orig_start = start;
339
340 actual_end = min_t(u64, isize, end + 1); 336 actual_end = min_t(u64, isize, end + 1);
341again: 337again:
342 will_compress = 0; 338 will_compress = 0;
@@ -371,7 +367,6 @@ again:
371 total_compressed = min(total_compressed, max_uncompressed); 367 total_compressed = min(total_compressed, max_uncompressed);
372 num_bytes = (end - start + blocksize) & ~(blocksize - 1); 368 num_bytes = (end - start + blocksize) & ~(blocksize - 1);
373 num_bytes = max(blocksize, num_bytes); 369 num_bytes = max(blocksize, num_bytes);
374 disk_num_bytes = num_bytes;
375 total_in = 0; 370 total_in = 0;
376 ret = 0; 371 ret = 0;
377 372
@@ -467,7 +462,6 @@ again:
467 if (total_compressed >= total_in) { 462 if (total_compressed >= total_in) {
468 will_compress = 0; 463 will_compress = 0;
469 } else { 464 } else {
470 disk_num_bytes = total_compressed;
471 num_bytes = total_in; 465 num_bytes = total_in;
472 } 466 }
473 } 467 }
@@ -757,20 +751,17 @@ static noinline int cow_file_range(struct inode *inode,
757 u64 disk_num_bytes; 751 u64 disk_num_bytes;
758 u64 cur_alloc_size; 752 u64 cur_alloc_size;
759 u64 blocksize = root->sectorsize; 753 u64 blocksize = root->sectorsize;
760 u64 actual_end;
761 u64 isize = i_size_read(inode);
762 struct btrfs_key ins; 754 struct btrfs_key ins;
763 struct extent_map *em; 755 struct extent_map *em;
764 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 756 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
765 int ret = 0; 757 int ret = 0;
766 758
759 BUG_ON(root == root->fs_info->tree_root);
767 trans = btrfs_join_transaction(root, 1); 760 trans = btrfs_join_transaction(root, 1);
768 BUG_ON(!trans); 761 BUG_ON(!trans);
769 btrfs_set_trans_block_group(trans, inode); 762 btrfs_set_trans_block_group(trans, inode);
770 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 763 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
771 764
772 actual_end = min_t(u64, isize, end + 1);
773
774 num_bytes = (end - start + blocksize) & ~(blocksize - 1); 765 num_bytes = (end - start + blocksize) & ~(blocksize - 1);
775 num_bytes = max(blocksize, num_bytes); 766 num_bytes = max(blocksize, num_bytes);
776 disk_num_bytes = num_bytes; 767 disk_num_bytes = num_bytes;
@@ -1035,10 +1026,16 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1035 int type; 1026 int type;
1036 int nocow; 1027 int nocow;
1037 int check_prev = 1; 1028 int check_prev = 1;
1029 bool nolock = false;
1038 1030
1039 path = btrfs_alloc_path(); 1031 path = btrfs_alloc_path();
1040 BUG_ON(!path); 1032 BUG_ON(!path);
1041 trans = btrfs_join_transaction(root, 1); 1033 if (root == root->fs_info->tree_root) {
1034 nolock = true;
1035 trans = btrfs_join_transaction_nolock(root, 1);
1036 } else {
1037 trans = btrfs_join_transaction(root, 1);
1038 }
1042 BUG_ON(!trans); 1039 BUG_ON(!trans);
1043 1040
1044 cow_start = (u64)-1; 1041 cow_start = (u64)-1;
@@ -1211,8 +1208,13 @@ out_check:
1211 BUG_ON(ret); 1208 BUG_ON(ret);
1212 } 1209 }
1213 1210
1214 ret = btrfs_end_transaction(trans, root); 1211 if (nolock) {
1215 BUG_ON(ret); 1212 ret = btrfs_end_transaction_nolock(trans, root);
1213 BUG_ON(ret);
1214 } else {
1215 ret = btrfs_end_transaction(trans, root);
1216 BUG_ON(ret);
1217 }
1216 btrfs_free_path(path); 1218 btrfs_free_path(path);
1217 return 0; 1219 return 0;
1218} 1220}
@@ -1289,6 +1291,8 @@ static int btrfs_set_bit_hook(struct inode *inode,
1289 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { 1291 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1290 struct btrfs_root *root = BTRFS_I(inode)->root; 1292 struct btrfs_root *root = BTRFS_I(inode)->root;
1291 u64 len = state->end + 1 - state->start; 1293 u64 len = state->end + 1 - state->start;
1294 int do_list = (root->root_key.objectid !=
1295 BTRFS_ROOT_TREE_OBJECTID);
1292 1296
1293 if (*bits & EXTENT_FIRST_DELALLOC) 1297 if (*bits & EXTENT_FIRST_DELALLOC)
1294 *bits &= ~EXTENT_FIRST_DELALLOC; 1298 *bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1298,7 +1302,7 @@ static int btrfs_set_bit_hook(struct inode *inode,
1298 spin_lock(&root->fs_info->delalloc_lock); 1302 spin_lock(&root->fs_info->delalloc_lock);
1299 BTRFS_I(inode)->delalloc_bytes += len; 1303 BTRFS_I(inode)->delalloc_bytes += len;
1300 root->fs_info->delalloc_bytes += len; 1304 root->fs_info->delalloc_bytes += len;
1301 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1305 if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1302 list_add_tail(&BTRFS_I(inode)->delalloc_inodes, 1306 list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
1303 &root->fs_info->delalloc_inodes); 1307 &root->fs_info->delalloc_inodes);
1304 } 1308 }
@@ -1321,6 +1325,8 @@ static int btrfs_clear_bit_hook(struct inode *inode,
1321 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { 1325 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1322 struct btrfs_root *root = BTRFS_I(inode)->root; 1326 struct btrfs_root *root = BTRFS_I(inode)->root;
1323 u64 len = state->end + 1 - state->start; 1327 u64 len = state->end + 1 - state->start;
1328 int do_list = (root->root_key.objectid !=
1329 BTRFS_ROOT_TREE_OBJECTID);
1324 1330
1325 if (*bits & EXTENT_FIRST_DELALLOC) 1331 if (*bits & EXTENT_FIRST_DELALLOC)
1326 *bits &= ~EXTENT_FIRST_DELALLOC; 1332 *bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1330,14 +1336,15 @@ static int btrfs_clear_bit_hook(struct inode *inode,
1330 if (*bits & EXTENT_DO_ACCOUNTING) 1336 if (*bits & EXTENT_DO_ACCOUNTING)
1331 btrfs_delalloc_release_metadata(inode, len); 1337 btrfs_delalloc_release_metadata(inode, len);
1332 1338
1333 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) 1339 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
1340 && do_list)
1334 btrfs_free_reserved_data_space(inode, len); 1341 btrfs_free_reserved_data_space(inode, len);
1335 1342
1336 spin_lock(&root->fs_info->delalloc_lock); 1343 spin_lock(&root->fs_info->delalloc_lock);
1337 root->fs_info->delalloc_bytes -= len; 1344 root->fs_info->delalloc_bytes -= len;
1338 BTRFS_I(inode)->delalloc_bytes -= len; 1345 BTRFS_I(inode)->delalloc_bytes -= len;
1339 1346
1340 if (BTRFS_I(inode)->delalloc_bytes == 0 && 1347 if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
1341 !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1348 !list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1342 list_del_init(&BTRFS_I(inode)->delalloc_inodes); 1349 list_del_init(&BTRFS_I(inode)->delalloc_inodes);
1343 } 1350 }
@@ -1372,7 +1379,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
1372 1379
1373 if (map_length < length + size) 1380 if (map_length < length + size)
1374 return 1; 1381 return 1;
1375 return 0; 1382 return ret;
1376} 1383}
1377 1384
1378/* 1385/*
@@ -1426,7 +1433,10 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1426 1433
1427 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 1434 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
1428 1435
1429 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 1436 if (root == root->fs_info->tree_root)
1437 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
1438 else
1439 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
1430 BUG_ON(ret); 1440 BUG_ON(ret);
1431 1441
1432 if (!(rw & REQ_WRITE)) { 1442 if (!(rw & REQ_WRITE)) {
@@ -1662,6 +1672,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1662 struct extent_state *cached_state = NULL; 1672 struct extent_state *cached_state = NULL;
1663 int compressed = 0; 1673 int compressed = 0;
1664 int ret; 1674 int ret;
1675 bool nolock = false;
1665 1676
1666 ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start, 1677 ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
1667 end - start + 1); 1678 end - start + 1);
@@ -1669,11 +1680,17 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1669 return 0; 1680 return 0;
1670 BUG_ON(!ordered_extent); 1681 BUG_ON(!ordered_extent);
1671 1682
1683 nolock = (root == root->fs_info->tree_root);
1684
1672 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { 1685 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
1673 BUG_ON(!list_empty(&ordered_extent->list)); 1686 BUG_ON(!list_empty(&ordered_extent->list));
1674 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1687 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1675 if (!ret) { 1688 if (!ret) {
1676 trans = btrfs_join_transaction(root, 1); 1689 if (nolock)
1690 trans = btrfs_join_transaction_nolock(root, 1);
1691 else
1692 trans = btrfs_join_transaction(root, 1);
1693 BUG_ON(!trans);
1677 btrfs_set_trans_block_group(trans, inode); 1694 btrfs_set_trans_block_group(trans, inode);
1678 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1695 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1679 ret = btrfs_update_inode(trans, root, inode); 1696 ret = btrfs_update_inode(trans, root, inode);
@@ -1686,7 +1703,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1686 ordered_extent->file_offset + ordered_extent->len - 1, 1703 ordered_extent->file_offset + ordered_extent->len - 1,
1687 0, &cached_state, GFP_NOFS); 1704 0, &cached_state, GFP_NOFS);
1688 1705
1689 trans = btrfs_join_transaction(root, 1); 1706 if (nolock)
1707 trans = btrfs_join_transaction_nolock(root, 1);
1708 else
1709 trans = btrfs_join_transaction(root, 1);
1690 btrfs_set_trans_block_group(trans, inode); 1710 btrfs_set_trans_block_group(trans, inode);
1691 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1711 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1692 1712
@@ -1700,6 +1720,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1700 ordered_extent->len); 1720 ordered_extent->len);
1701 BUG_ON(ret); 1721 BUG_ON(ret);
1702 } else { 1722 } else {
1723 BUG_ON(root == root->fs_info->tree_root);
1703 ret = insert_reserved_file_extent(trans, inode, 1724 ret = insert_reserved_file_extent(trans, inode,
1704 ordered_extent->file_offset, 1725 ordered_extent->file_offset,
1705 ordered_extent->start, 1726 ordered_extent->start,
@@ -1724,9 +1745,15 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1724 ret = btrfs_update_inode(trans, root, inode); 1745 ret = btrfs_update_inode(trans, root, inode);
1725 BUG_ON(ret); 1746 BUG_ON(ret);
1726out: 1747out:
1727 btrfs_delalloc_release_metadata(inode, ordered_extent->len); 1748 if (nolock) {
1728 if (trans) 1749 if (trans)
1729 btrfs_end_transaction(trans, root); 1750 btrfs_end_transaction_nolock(trans, root);
1751 } else {
1752 btrfs_delalloc_release_metadata(inode, ordered_extent->len);
1753 if (trans)
1754 btrfs_end_transaction(trans, root);
1755 }
1756
1730 /* once for us */ 1757 /* once for us */
1731 btrfs_put_ordered_extent(ordered_extent); 1758 btrfs_put_ordered_extent(ordered_extent);
1732 /* once for the tree */ 1759 /* once for the tree */
@@ -2237,7 +2264,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2237{ 2264{
2238 struct btrfs_path *path; 2265 struct btrfs_path *path;
2239 struct extent_buffer *leaf; 2266 struct extent_buffer *leaf;
2240 struct btrfs_item *item;
2241 struct btrfs_key key, found_key; 2267 struct btrfs_key key, found_key;
2242 struct btrfs_trans_handle *trans; 2268 struct btrfs_trans_handle *trans;
2243 struct inode *inode; 2269 struct inode *inode;
@@ -2275,7 +2301,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2275 2301
2276 /* pull out the item */ 2302 /* pull out the item */
2277 leaf = path->nodes[0]; 2303 leaf = path->nodes[0];
2278 item = btrfs_item_nr(leaf, path->slots[0]);
2279 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 2304 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2280 2305
2281 /* make sure the item matches what we want */ 2306 /* make sure the item matches what we want */
@@ -2651,7 +2676,8 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2651 2676
2652 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, 2677 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
2653 dir, index); 2678 dir, index);
2654 BUG_ON(ret); 2679 if (ret == -ENOENT)
2680 ret = 0;
2655err: 2681err:
2656 btrfs_free_path(path); 2682 btrfs_free_path(path);
2657 if (ret) 2683 if (ret)
@@ -2672,8 +2698,8 @@ static int check_path_shared(struct btrfs_root *root,
2672{ 2698{
2673 struct extent_buffer *eb; 2699 struct extent_buffer *eb;
2674 int level; 2700 int level;
2675 int ret;
2676 u64 refs = 1; 2701 u64 refs = 1;
2702 int uninitialized_var(ret);
2677 2703
2678 for (level = 0; level < BTRFS_MAX_LEVEL; level++) { 2704 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
2679 if (!path->nodes[level]) 2705 if (!path->nodes[level])
@@ -2686,7 +2712,7 @@ static int check_path_shared(struct btrfs_root *root,
2686 if (refs > 1) 2712 if (refs > 1)
2687 return 1; 2713 return 1;
2688 } 2714 }
2689 return 0; 2715 return ret; /* XXX callers? */
2690} 2716}
2691 2717
2692/* 2718/*
@@ -3196,7 +3222,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
3196 3222
3197 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); 3223 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
3198 3224
3199 if (root->ref_cows) 3225 if (root->ref_cows || root == root->fs_info->tree_root)
3200 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); 3226 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
3201 3227
3202 path = btrfs_alloc_path(); 3228 path = btrfs_alloc_path();
@@ -3344,7 +3370,8 @@ delete:
3344 } else { 3370 } else {
3345 break; 3371 break;
3346 } 3372 }
3347 if (found_extent && root->ref_cows) { 3373 if (found_extent && (root->ref_cows ||
3374 root == root->fs_info->tree_root)) {
3348 btrfs_set_path_blocking(path); 3375 btrfs_set_path_blocking(path);
3349 ret = btrfs_free_extent(trans, root, extent_start, 3376 ret = btrfs_free_extent(trans, root, extent_start,
3350 extent_num_bytes, 0, 3377 extent_num_bytes, 0,
@@ -3675,7 +3702,8 @@ void btrfs_evict_inode(struct inode *inode)
3675 int ret; 3702 int ret;
3676 3703
3677 truncate_inode_pages(&inode->i_data, 0); 3704 truncate_inode_pages(&inode->i_data, 0);
3678 if (inode->i_nlink && btrfs_root_refs(&root->root_item) != 0) 3705 if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
3706 root == root->fs_info->tree_root))
3679 goto no_delete; 3707 goto no_delete;
3680 3708
3681 if (is_bad_inode(inode)) { 3709 if (is_bad_inode(inode)) {
@@ -3888,7 +3916,14 @@ static void inode_tree_del(struct inode *inode)
3888 } 3916 }
3889 spin_unlock(&root->inode_lock); 3917 spin_unlock(&root->inode_lock);
3890 3918
3891 if (empty && btrfs_root_refs(&root->root_item) == 0) { 3919 /*
3920 * Free space cache has inodes in the tree root, but the tree root has a
3921 * root_refs of 0, so this could end up dropping the tree root as a
3922 * snapshot, so we need the extra !root->fs_info->tree_root check to
3923 * make sure we don't drop it.
3924 */
3925 if (empty && btrfs_root_refs(&root->root_item) == 0 &&
3926 root != root->fs_info->tree_root) {
3892 synchronize_srcu(&root->fs_info->subvol_srcu); 3927 synchronize_srcu(&root->fs_info->subvol_srcu);
3893 spin_lock(&root->inode_lock); 3928 spin_lock(&root->inode_lock);
3894 empty = RB_EMPTY_ROOT(&root->inode_tree); 3929 empty = RB_EMPTY_ROOT(&root->inode_tree);
@@ -4282,14 +4317,24 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
4282 struct btrfs_root *root = BTRFS_I(inode)->root; 4317 struct btrfs_root *root = BTRFS_I(inode)->root;
4283 struct btrfs_trans_handle *trans; 4318 struct btrfs_trans_handle *trans;
4284 int ret = 0; 4319 int ret = 0;
4320 bool nolock = false;
4285 4321
4286 if (BTRFS_I(inode)->dummy_inode) 4322 if (BTRFS_I(inode)->dummy_inode)
4287 return 0; 4323 return 0;
4288 4324
4325 smp_mb();
4326 nolock = (root->fs_info->closing && root == root->fs_info->tree_root);
4327
4289 if (wbc->sync_mode == WB_SYNC_ALL) { 4328 if (wbc->sync_mode == WB_SYNC_ALL) {
4290 trans = btrfs_join_transaction(root, 1); 4329 if (nolock)
4330 trans = btrfs_join_transaction_nolock(root, 1);
4331 else
4332 trans = btrfs_join_transaction(root, 1);
4291 btrfs_set_trans_block_group(trans, inode); 4333 btrfs_set_trans_block_group(trans, inode);
4292 ret = btrfs_commit_transaction(trans, root); 4334 if (nolock)
4335 ret = btrfs_end_transaction_nolock(trans, root);
4336 else
4337 ret = btrfs_commit_transaction(trans, root);
4293 } 4338 }
4294 return ret; 4339 return ret;
4295} 4340}
@@ -5645,7 +5690,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
5645 struct btrfs_root *root = BTRFS_I(inode)->root; 5690 struct btrfs_root *root = BTRFS_I(inode)->root;
5646 struct btrfs_dio_private *dip; 5691 struct btrfs_dio_private *dip;
5647 struct bio_vec *bvec = bio->bi_io_vec; 5692 struct bio_vec *bvec = bio->bi_io_vec;
5648 u64 start;
5649 int skip_sum; 5693 int skip_sum;
5650 int write = rw & REQ_WRITE; 5694 int write = rw & REQ_WRITE;
5651 int ret = 0; 5695 int ret = 0;
@@ -5671,7 +5715,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
5671 dip->inode = inode; 5715 dip->inode = inode;
5672 dip->logical_offset = file_offset; 5716 dip->logical_offset = file_offset;
5673 5717
5674 start = dip->logical_offset;
5675 dip->bytes = 0; 5718 dip->bytes = 0;
5676 do { 5719 do {
5677 dip->bytes += bvec->bv_len; 5720 dip->bytes += bvec->bv_len;
@@ -6308,6 +6351,21 @@ void btrfs_destroy_inode(struct inode *inode)
6308 spin_unlock(&root->fs_info->ordered_extent_lock); 6351 spin_unlock(&root->fs_info->ordered_extent_lock);
6309 } 6352 }
6310 6353
6354 if (root == root->fs_info->tree_root) {
6355 struct btrfs_block_group_cache *block_group;
6356
6357 block_group = btrfs_lookup_block_group(root->fs_info,
6358 BTRFS_I(inode)->block_group);
6359 if (block_group && block_group->inode == inode) {
6360 spin_lock(&block_group->lock);
6361 block_group->inode = NULL;
6362 spin_unlock(&block_group->lock);
6363 btrfs_put_block_group(block_group);
6364 } else if (block_group) {
6365 btrfs_put_block_group(block_group);
6366 }
6367 }
6368
6311 spin_lock(&root->orphan_lock); 6369 spin_lock(&root->orphan_lock);
6312 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 6370 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
6313 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", 6371 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
@@ -6340,7 +6398,8 @@ int btrfs_drop_inode(struct inode *inode)
6340{ 6398{
6341 struct btrfs_root *root = BTRFS_I(inode)->root; 6399 struct btrfs_root *root = BTRFS_I(inode)->root;
6342 6400
6343 if (btrfs_root_refs(&root->root_item) == 0) 6401 if (btrfs_root_refs(&root->root_item) == 0 &&
6402 root != root->fs_info->tree_root)
6344 return 1; 6403 return 1;
6345 else 6404 else
6346 return generic_drop_inode(inode); 6405 return generic_drop_inode(inode);
@@ -6609,7 +6668,8 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
6609 return 0; 6668 return 0;
6610} 6669}
6611 6670
6612int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput) 6671int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
6672 int sync)
6613{ 6673{
6614 struct btrfs_inode *binode; 6674 struct btrfs_inode *binode;
6615 struct inode *inode = NULL; 6675 struct inode *inode = NULL;
@@ -6631,7 +6691,26 @@ int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput)
6631 spin_unlock(&root->fs_info->delalloc_lock); 6691 spin_unlock(&root->fs_info->delalloc_lock);
6632 6692
6633 if (inode) { 6693 if (inode) {
6634 write_inode_now(inode, 0); 6694 if (sync) {
6695 filemap_write_and_wait(inode->i_mapping);
6696 /*
6697 * We have to do this because compression doesn't
6698 * actually set PG_writeback until it submits the pages
6699 * for IO, which happens in an async thread, so we could
6700 * race and not actually wait for any writeback pages
6701 * because they've not been submitted yet. Technically
6702 * this could still be the case for the ordered stuff
6703 * since the async thread may not have started to do its
6704 * work yet. If this becomes the case then we need to
6705 * figure out a way to make sure that in writepage we
6706 * wait for any async pages to be submitted before
6707 * returning so that fdatawait does what its supposed to
6708 * do.
6709 */
6710 btrfs_wait_ordered_range(inode, 0, (u64)-1);
6711 } else {
6712 filemap_flush(inode->i_mapping);
6713 }
6635 if (delay_iput) 6714 if (delay_iput)
6636 btrfs_add_delayed_iput(inode); 6715 btrfs_add_delayed_iput(inode);
6637 else 6716 else
@@ -6757,27 +6836,33 @@ out_unlock:
6757 return err; 6836 return err;
6758} 6837}
6759 6838
6760int btrfs_prealloc_file_range(struct inode *inode, int mode, 6839static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
6761 u64 start, u64 num_bytes, u64 min_size, 6840 u64 start, u64 num_bytes, u64 min_size,
6762 loff_t actual_len, u64 *alloc_hint) 6841 loff_t actual_len, u64 *alloc_hint,
6842 struct btrfs_trans_handle *trans)
6763{ 6843{
6764 struct btrfs_trans_handle *trans;
6765 struct btrfs_root *root = BTRFS_I(inode)->root; 6844 struct btrfs_root *root = BTRFS_I(inode)->root;
6766 struct btrfs_key ins; 6845 struct btrfs_key ins;
6767 u64 cur_offset = start; 6846 u64 cur_offset = start;
6768 int ret = 0; 6847 int ret = 0;
6848 bool own_trans = true;
6769 6849
6850 if (trans)
6851 own_trans = false;
6770 while (num_bytes > 0) { 6852 while (num_bytes > 0) {
6771 trans = btrfs_start_transaction(root, 3); 6853 if (own_trans) {
6772 if (IS_ERR(trans)) { 6854 trans = btrfs_start_transaction(root, 3);
6773 ret = PTR_ERR(trans); 6855 if (IS_ERR(trans)) {
6774 break; 6856 ret = PTR_ERR(trans);
6857 break;
6858 }
6775 } 6859 }
6776 6860
6777 ret = btrfs_reserve_extent(trans, root, num_bytes, min_size, 6861 ret = btrfs_reserve_extent(trans, root, num_bytes, min_size,
6778 0, *alloc_hint, (u64)-1, &ins, 1); 6862 0, *alloc_hint, (u64)-1, &ins, 1);
6779 if (ret) { 6863 if (ret) {
6780 btrfs_end_transaction(trans, root); 6864 if (own_trans)
6865 btrfs_end_transaction(trans, root);
6781 break; 6866 break;
6782 } 6867 }
6783 6868
@@ -6810,11 +6895,30 @@ int btrfs_prealloc_file_range(struct inode *inode, int mode,
6810 ret = btrfs_update_inode(trans, root, inode); 6895 ret = btrfs_update_inode(trans, root, inode);
6811 BUG_ON(ret); 6896 BUG_ON(ret);
6812 6897
6813 btrfs_end_transaction(trans, root); 6898 if (own_trans)
6899 btrfs_end_transaction(trans, root);
6814 } 6900 }
6815 return ret; 6901 return ret;
6816} 6902}
6817 6903
6904int btrfs_prealloc_file_range(struct inode *inode, int mode,
6905 u64 start, u64 num_bytes, u64 min_size,
6906 loff_t actual_len, u64 *alloc_hint)
6907{
6908 return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
6909 min_size, actual_len, alloc_hint,
6910 NULL);
6911}
6912
6913int btrfs_prealloc_file_range_trans(struct inode *inode,
6914 struct btrfs_trans_handle *trans, int mode,
6915 u64 start, u64 num_bytes, u64 min_size,
6916 loff_t actual_len, u64 *alloc_hint)
6917{
6918 return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
6919 min_size, actual_len, alloc_hint, trans);
6920}
6921
6818static long btrfs_fallocate(struct inode *inode, int mode, 6922static long btrfs_fallocate(struct inode *inode, int mode,
6819 loff_t offset, loff_t len) 6923 loff_t offset, loff_t len)
6820{ 6924{
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 9254b3d58dbe..463d91b4dd3a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -224,7 +224,8 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
224 224
225static noinline int create_subvol(struct btrfs_root *root, 225static noinline int create_subvol(struct btrfs_root *root,
226 struct dentry *dentry, 226 struct dentry *dentry,
227 char *name, int namelen) 227 char *name, int namelen,
228 u64 *async_transid)
228{ 229{
229 struct btrfs_trans_handle *trans; 230 struct btrfs_trans_handle *trans;
230 struct btrfs_key key; 231 struct btrfs_key key;
@@ -338,13 +339,19 @@ static noinline int create_subvol(struct btrfs_root *root,
338 339
339 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); 340 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
340fail: 341fail:
341 err = btrfs_commit_transaction(trans, root); 342 if (async_transid) {
343 *async_transid = trans->transid;
344 err = btrfs_commit_transaction_async(trans, root, 1);
345 } else {
346 err = btrfs_commit_transaction(trans, root);
347 }
342 if (err && !ret) 348 if (err && !ret)
343 ret = err; 349 ret = err;
344 return ret; 350 return ret;
345} 351}
346 352
347static int create_snapshot(struct btrfs_root *root, struct dentry *dentry) 353static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
354 char *name, int namelen, u64 *async_transid)
348{ 355{
349 struct inode *inode; 356 struct inode *inode;
350 struct btrfs_pending_snapshot *pending_snapshot; 357 struct btrfs_pending_snapshot *pending_snapshot;
@@ -373,7 +380,14 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry)
373 380
374 list_add(&pending_snapshot->list, 381 list_add(&pending_snapshot->list,
375 &trans->transaction->pending_snapshots); 382 &trans->transaction->pending_snapshots);
376 ret = btrfs_commit_transaction(trans, root->fs_info->extent_root); 383 if (async_transid) {
384 *async_transid = trans->transid;
385 ret = btrfs_commit_transaction_async(trans,
386 root->fs_info->extent_root, 1);
387 } else {
388 ret = btrfs_commit_transaction(trans,
389 root->fs_info->extent_root);
390 }
377 BUG_ON(ret); 391 BUG_ON(ret);
378 392
379 ret = pending_snapshot->error; 393 ret = pending_snapshot->error;
@@ -395,6 +409,76 @@ fail:
395 return ret; 409 return ret;
396} 410}
397 411
412/* copy of check_sticky in fs/namei.c()
413* It's inline, so penalty for filesystems that don't use sticky bit is
414* minimal.
415*/
416static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode)
417{
418 uid_t fsuid = current_fsuid();
419
420 if (!(dir->i_mode & S_ISVTX))
421 return 0;
422 if (inode->i_uid == fsuid)
423 return 0;
424 if (dir->i_uid == fsuid)
425 return 0;
426 return !capable(CAP_FOWNER);
427}
428
429/* copy of may_delete in fs/namei.c()
430 * Check whether we can remove a link victim from directory dir, check
431 * whether the type of victim is right.
432 * 1. We can't do it if dir is read-only (done in permission())
433 * 2. We should have write and exec permissions on dir
434 * 3. We can't remove anything from append-only dir
435 * 4. We can't do anything with immutable dir (done in permission())
436 * 5. If the sticky bit on dir is set we should either
437 * a. be owner of dir, or
438 * b. be owner of victim, or
439 * c. have CAP_FOWNER capability
440 * 6. If the victim is append-only or immutable we can't do antyhing with
441 * links pointing to it.
442 * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
443 * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
444 * 9. We can't remove a root or mountpoint.
445 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
446 * nfs_async_unlink().
447 */
448
449static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir)
450{
451 int error;
452
453 if (!victim->d_inode)
454 return -ENOENT;
455
456 BUG_ON(victim->d_parent->d_inode != dir);
457 audit_inode_child(victim, dir);
458
459 error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
460 if (error)
461 return error;
462 if (IS_APPEND(dir))
463 return -EPERM;
464 if (btrfs_check_sticky(dir, victim->d_inode)||
465 IS_APPEND(victim->d_inode)||
466 IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
467 return -EPERM;
468 if (isdir) {
469 if (!S_ISDIR(victim->d_inode->i_mode))
470 return -ENOTDIR;
471 if (IS_ROOT(victim))
472 return -EBUSY;
473 } else if (S_ISDIR(victim->d_inode->i_mode))
474 return -EISDIR;
475 if (IS_DEADDIR(dir))
476 return -ENOENT;
477 if (victim->d_flags & DCACHE_NFSFS_RENAMED)
478 return -EBUSY;
479 return 0;
480}
481
398/* copy of may_create in fs/namei.c() */ 482/* copy of may_create in fs/namei.c() */
399static inline int btrfs_may_create(struct inode *dir, struct dentry *child) 483static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
400{ 484{
@@ -412,7 +496,8 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
412 */ 496 */
413static noinline int btrfs_mksubvol(struct path *parent, 497static noinline int btrfs_mksubvol(struct path *parent,
414 char *name, int namelen, 498 char *name, int namelen,
415 struct btrfs_root *snap_src) 499 struct btrfs_root *snap_src,
500 u64 *async_transid)
416{ 501{
417 struct inode *dir = parent->dentry->d_inode; 502 struct inode *dir = parent->dentry->d_inode;
418 struct dentry *dentry; 503 struct dentry *dentry;
@@ -443,10 +528,11 @@ static noinline int btrfs_mksubvol(struct path *parent,
443 goto out_up_read; 528 goto out_up_read;
444 529
445 if (snap_src) { 530 if (snap_src) {
446 error = create_snapshot(snap_src, dentry); 531 error = create_snapshot(snap_src, dentry,
532 name, namelen, async_transid);
447 } else { 533 } else {
448 error = create_subvol(BTRFS_I(dir)->root, dentry, 534 error = create_subvol(BTRFS_I(dir)->root, dentry,
449 name, namelen); 535 name, namelen, async_transid);
450 } 536 }
451 if (!error) 537 if (!error)
452 fsnotify_mkdir(dir, dentry); 538 fsnotify_mkdir(dir, dentry);
@@ -708,7 +794,6 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
708 char *sizestr; 794 char *sizestr;
709 char *devstr = NULL; 795 char *devstr = NULL;
710 int ret = 0; 796 int ret = 0;
711 int namelen;
712 int mod = 0; 797 int mod = 0;
713 798
714 if (root->fs_info->sb->s_flags & MS_RDONLY) 799 if (root->fs_info->sb->s_flags & MS_RDONLY)
@@ -722,7 +807,6 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
722 return PTR_ERR(vol_args); 807 return PTR_ERR(vol_args);
723 808
724 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 809 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
725 namelen = strlen(vol_args->name);
726 810
727 mutex_lock(&root->fs_info->volume_mutex); 811 mutex_lock(&root->fs_info->volume_mutex);
728 sizestr = vol_args->name; 812 sizestr = vol_args->name;
@@ -801,11 +885,13 @@ out_unlock:
801 return ret; 885 return ret;
802} 886}
803 887
804static noinline int btrfs_ioctl_snap_create(struct file *file, 888static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
805 void __user *arg, int subvol) 889 char *name,
890 unsigned long fd,
891 int subvol,
892 u64 *transid)
806{ 893{
807 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; 894 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
808 struct btrfs_ioctl_vol_args *vol_args;
809 struct file *src_file; 895 struct file *src_file;
810 int namelen; 896 int namelen;
811 int ret = 0; 897 int ret = 0;
@@ -813,23 +899,18 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
813 if (root->fs_info->sb->s_flags & MS_RDONLY) 899 if (root->fs_info->sb->s_flags & MS_RDONLY)
814 return -EROFS; 900 return -EROFS;
815 901
816 vol_args = memdup_user(arg, sizeof(*vol_args)); 902 namelen = strlen(name);
817 if (IS_ERR(vol_args)) 903 if (strchr(name, '/')) {
818 return PTR_ERR(vol_args);
819
820 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
821 namelen = strlen(vol_args->name);
822 if (strchr(vol_args->name, '/')) {
823 ret = -EINVAL; 904 ret = -EINVAL;
824 goto out; 905 goto out;
825 } 906 }
826 907
827 if (subvol) { 908 if (subvol) {
828 ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen, 909 ret = btrfs_mksubvol(&file->f_path, name, namelen,
829 NULL); 910 NULL, transid);
830 } else { 911 } else {
831 struct inode *src_inode; 912 struct inode *src_inode;
832 src_file = fget(vol_args->fd); 913 src_file = fget(fd);
833 if (!src_file) { 914 if (!src_file) {
834 ret = -EINVAL; 915 ret = -EINVAL;
835 goto out; 916 goto out;
@@ -843,12 +924,56 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
843 fput(src_file); 924 fput(src_file);
844 goto out; 925 goto out;
845 } 926 }
846 ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen, 927 ret = btrfs_mksubvol(&file->f_path, name, namelen,
847 BTRFS_I(src_inode)->root); 928 BTRFS_I(src_inode)->root,
929 transid);
848 fput(src_file); 930 fput(src_file);
849 } 931 }
850out: 932out:
933 return ret;
934}
935
936static noinline int btrfs_ioctl_snap_create(struct file *file,
937 void __user *arg, int subvol,
938 int async)
939{
940 struct btrfs_ioctl_vol_args *vol_args = NULL;
941 struct btrfs_ioctl_async_vol_args *async_vol_args = NULL;
942 char *name;
943 u64 fd;
944 u64 transid = 0;
945 int ret;
946
947 if (async) {
948 async_vol_args = memdup_user(arg, sizeof(*async_vol_args));
949 if (IS_ERR(async_vol_args))
950 return PTR_ERR(async_vol_args);
951
952 name = async_vol_args->name;
953 fd = async_vol_args->fd;
954 async_vol_args->name[BTRFS_SNAPSHOT_NAME_MAX] = '\0';
955 } else {
956 vol_args = memdup_user(arg, sizeof(*vol_args));
957 if (IS_ERR(vol_args))
958 return PTR_ERR(vol_args);
959 name = vol_args->name;
960 fd = vol_args->fd;
961 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
962 }
963
964 ret = btrfs_ioctl_snap_create_transid(file, name, fd,
965 subvol, &transid);
966
967 if (!ret && async) {
968 if (copy_to_user(arg +
969 offsetof(struct btrfs_ioctl_async_vol_args,
970 transid), &transid, sizeof(transid)))
971 return -EFAULT;
972 }
973
851 kfree(vol_args); 974 kfree(vol_args);
975 kfree(async_vol_args);
976
852 return ret; 977 return ret;
853} 978}
854 979
@@ -1073,14 +1198,10 @@ static noinline int btrfs_ioctl_tree_search(struct file *file,
1073 if (!capable(CAP_SYS_ADMIN)) 1198 if (!capable(CAP_SYS_ADMIN))
1074 return -EPERM; 1199 return -EPERM;
1075 1200
1076 args = kmalloc(sizeof(*args), GFP_KERNEL); 1201 args = memdup_user(argp, sizeof(*args));
1077 if (!args) 1202 if (IS_ERR(args))
1078 return -ENOMEM; 1203 return PTR_ERR(args);
1079 1204
1080 if (copy_from_user(args, argp, sizeof(*args))) {
1081 kfree(args);
1082 return -EFAULT;
1083 }
1084 inode = fdentry(file)->d_inode; 1205 inode = fdentry(file)->d_inode;
1085 ret = search_ioctl(inode, args); 1206 ret = search_ioctl(inode, args);
1086 if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) 1207 if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
@@ -1188,14 +1309,10 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
1188 if (!capable(CAP_SYS_ADMIN)) 1309 if (!capable(CAP_SYS_ADMIN))
1189 return -EPERM; 1310 return -EPERM;
1190 1311
1191 args = kmalloc(sizeof(*args), GFP_KERNEL); 1312 args = memdup_user(argp, sizeof(*args));
1192 if (!args) 1313 if (IS_ERR(args))
1193 return -ENOMEM; 1314 return PTR_ERR(args);
1194 1315
1195 if (copy_from_user(args, argp, sizeof(*args))) {
1196 kfree(args);
1197 return -EFAULT;
1198 }
1199 inode = fdentry(file)->d_inode; 1316 inode = fdentry(file)->d_inode;
1200 1317
1201 if (args->treeid == 0) 1318 if (args->treeid == 0)
@@ -1227,9 +1344,6 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
1227 int ret; 1344 int ret;
1228 int err = 0; 1345 int err = 0;
1229 1346
1230 if (!capable(CAP_SYS_ADMIN))
1231 return -EPERM;
1232
1233 vol_args = memdup_user(arg, sizeof(*vol_args)); 1347 vol_args = memdup_user(arg, sizeof(*vol_args));
1234 if (IS_ERR(vol_args)) 1348 if (IS_ERR(vol_args))
1235 return PTR_ERR(vol_args); 1349 return PTR_ERR(vol_args);
@@ -1259,13 +1373,51 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
1259 } 1373 }
1260 1374
1261 inode = dentry->d_inode; 1375 inode = dentry->d_inode;
1376 dest = BTRFS_I(inode)->root;
1377 if (!capable(CAP_SYS_ADMIN)){
1378 /*
1379 * Regular user. Only allow this with a special mount
1380 * option, when the user has write+exec access to the
1381 * subvol root, and when rmdir(2) would have been
1382 * allowed.
1383 *
1384 * Note that this is _not_ check that the subvol is
1385 * empty or doesn't contain data that we wouldn't
1386 * otherwise be able to delete.
1387 *
1388 * Users who want to delete empty subvols should try
1389 * rmdir(2).
1390 */
1391 err = -EPERM;
1392 if (!btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
1393 goto out_dput;
1394
1395 /*
1396 * Do not allow deletion if the parent dir is the same
1397 * as the dir to be deleted. That means the ioctl
1398 * must be called on the dentry referencing the root
1399 * of the subvol, not a random directory contained
1400 * within it.
1401 */
1402 err = -EINVAL;
1403 if (root == dest)
1404 goto out_dput;
1405
1406 err = inode_permission(inode, MAY_WRITE | MAY_EXEC);
1407 if (err)
1408 goto out_dput;
1409
1410 /* check if subvolume may be deleted by a non-root user */
1411 err = btrfs_may_delete(dir, dentry, 1);
1412 if (err)
1413 goto out_dput;
1414 }
1415
1262 if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { 1416 if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
1263 err = -EINVAL; 1417 err = -EINVAL;
1264 goto out_dput; 1418 goto out_dput;
1265 } 1419 }
1266 1420
1267 dest = BTRFS_I(inode)->root;
1268
1269 mutex_lock(&inode->i_mutex); 1421 mutex_lock(&inode->i_mutex);
1270 err = d_invalidate(dentry); 1422 err = d_invalidate(dentry);
1271 if (err) 1423 if (err)
@@ -1304,7 +1456,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
1304 BUG_ON(ret); 1456 BUG_ON(ret);
1305 } 1457 }
1306 1458
1307 ret = btrfs_commit_transaction(trans, root); 1459 ret = btrfs_end_transaction(trans, root);
1308 BUG_ON(ret); 1460 BUG_ON(ret);
1309 inode->i_flags |= S_DEAD; 1461 inode->i_flags |= S_DEAD;
1310out_up_write: 1462out_up_write:
@@ -1502,11 +1654,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1502 path->reada = 2; 1654 path->reada = 2;
1503 1655
1504 if (inode < src) { 1656 if (inode < src) {
1505 mutex_lock(&inode->i_mutex); 1657 mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
1506 mutex_lock(&src->i_mutex); 1658 mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD);
1507 } else { 1659 } else {
1508 mutex_lock(&src->i_mutex); 1660 mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT);
1509 mutex_lock(&inode->i_mutex); 1661 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
1510 } 1662 }
1511 1663
1512 /* determine range to clone */ 1664 /* determine range to clone */
@@ -1530,13 +1682,15 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1530 while (1) { 1682 while (1) {
1531 struct btrfs_ordered_extent *ordered; 1683 struct btrfs_ordered_extent *ordered;
1532 lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); 1684 lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
1533 ordered = btrfs_lookup_first_ordered_extent(inode, off+len); 1685 ordered = btrfs_lookup_first_ordered_extent(src, off+len);
1534 if (BTRFS_I(src)->delalloc_bytes == 0 && !ordered) 1686 if (!ordered &&
1687 !test_range_bit(&BTRFS_I(src)->io_tree, off, off+len,
1688 EXTENT_DELALLOC, 0, NULL))
1535 break; 1689 break;
1536 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); 1690 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
1537 if (ordered) 1691 if (ordered)
1538 btrfs_put_ordered_extent(ordered); 1692 btrfs_put_ordered_extent(ordered);
1539 btrfs_wait_ordered_range(src, off, off+len); 1693 btrfs_wait_ordered_range(src, off, len);
1540 } 1694 }
1541 1695
1542 /* clone data */ 1696 /* clone data */
@@ -1605,7 +1759,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1605 } 1759 }
1606 btrfs_release_path(root, path); 1760 btrfs_release_path(root, path);
1607 1761
1608 if (key.offset + datal < off || 1762 if (key.offset + datal <= off ||
1609 key.offset >= off+len) 1763 key.offset >= off+len)
1610 goto next; 1764 goto next;
1611 1765
@@ -1879,6 +2033,22 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
1879 return 0; 2033 return 0;
1880} 2034}
1881 2035
2036static void get_block_group_info(struct list_head *groups_list,
2037 struct btrfs_ioctl_space_info *space)
2038{
2039 struct btrfs_block_group_cache *block_group;
2040
2041 space->total_bytes = 0;
2042 space->used_bytes = 0;
2043 space->flags = 0;
2044 list_for_each_entry(block_group, groups_list, list) {
2045 space->flags = block_group->flags;
2046 space->total_bytes += block_group->key.offset;
2047 space->used_bytes +=
2048 btrfs_block_group_used(&block_group->item);
2049 }
2050}
2051
1882long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) 2052long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
1883{ 2053{
1884 struct btrfs_ioctl_space_args space_args; 2054 struct btrfs_ioctl_space_args space_args;
@@ -1887,27 +2057,56 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
1887 struct btrfs_ioctl_space_info *dest_orig; 2057 struct btrfs_ioctl_space_info *dest_orig;
1888 struct btrfs_ioctl_space_info *user_dest; 2058 struct btrfs_ioctl_space_info *user_dest;
1889 struct btrfs_space_info *info; 2059 struct btrfs_space_info *info;
2060 u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
2061 BTRFS_BLOCK_GROUP_SYSTEM,
2062 BTRFS_BLOCK_GROUP_METADATA,
2063 BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA};
2064 int num_types = 4;
1890 int alloc_size; 2065 int alloc_size;
1891 int ret = 0; 2066 int ret = 0;
1892 int slot_count = 0; 2067 int slot_count = 0;
2068 int i, c;
1893 2069
1894 if (copy_from_user(&space_args, 2070 if (copy_from_user(&space_args,
1895 (struct btrfs_ioctl_space_args __user *)arg, 2071 (struct btrfs_ioctl_space_args __user *)arg,
1896 sizeof(space_args))) 2072 sizeof(space_args)))
1897 return -EFAULT; 2073 return -EFAULT;
1898 2074
1899 /* first we count slots */ 2075 for (i = 0; i < num_types; i++) {
1900 rcu_read_lock(); 2076 struct btrfs_space_info *tmp;
1901 list_for_each_entry_rcu(info, &root->fs_info->space_info, list) 2077
1902 slot_count++; 2078 info = NULL;
1903 rcu_read_unlock(); 2079 rcu_read_lock();
2080 list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
2081 list) {
2082 if (tmp->flags == types[i]) {
2083 info = tmp;
2084 break;
2085 }
2086 }
2087 rcu_read_unlock();
2088
2089 if (!info)
2090 continue;
2091
2092 down_read(&info->groups_sem);
2093 for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
2094 if (!list_empty(&info->block_groups[c]))
2095 slot_count++;
2096 }
2097 up_read(&info->groups_sem);
2098 }
1904 2099
1905 /* space_slots == 0 means they are asking for a count */ 2100 /* space_slots == 0 means they are asking for a count */
1906 if (space_args.space_slots == 0) { 2101 if (space_args.space_slots == 0) {
1907 space_args.total_spaces = slot_count; 2102 space_args.total_spaces = slot_count;
1908 goto out; 2103 goto out;
1909 } 2104 }
2105
2106 slot_count = min_t(int, space_args.space_slots, slot_count);
2107
1910 alloc_size = sizeof(*dest) * slot_count; 2108 alloc_size = sizeof(*dest) * slot_count;
2109
1911 /* we generally have at most 6 or so space infos, one for each raid 2110 /* we generally have at most 6 or so space infos, one for each raid
1912 * level. So, a whole page should be more than enough for everyone 2111 * level. So, a whole page should be more than enough for everyone
1913 */ 2112 */
@@ -1921,27 +2120,34 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
1921 dest_orig = dest; 2120 dest_orig = dest;
1922 2121
1923 /* now we have a buffer to copy into */ 2122 /* now we have a buffer to copy into */
1924 rcu_read_lock(); 2123 for (i = 0; i < num_types; i++) {
1925 list_for_each_entry_rcu(info, &root->fs_info->space_info, list) { 2124 struct btrfs_space_info *tmp;
1926 /* make sure we don't copy more than we allocated 2125
1927 * in our buffer 2126 info = NULL;
1928 */ 2127 rcu_read_lock();
1929 if (slot_count == 0) 2128 list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
1930 break; 2129 list) {
1931 slot_count--; 2130 if (tmp->flags == types[i]) {
1932 2131 info = tmp;
1933 /* make sure userland has enough room in their buffer */ 2132 break;
1934 if (space_args.total_spaces >= space_args.space_slots) 2133 }
1935 break; 2134 }
2135 rcu_read_unlock();
1936 2136
1937 space.flags = info->flags; 2137 if (!info)
1938 space.total_bytes = info->total_bytes; 2138 continue;
1939 space.used_bytes = info->bytes_used; 2139 down_read(&info->groups_sem);
1940 memcpy(dest, &space, sizeof(space)); 2140 for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
1941 dest++; 2141 if (!list_empty(&info->block_groups[c])) {
1942 space_args.total_spaces++; 2142 get_block_group_info(&info->block_groups[c],
2143 &space);
2144 memcpy(dest, &space, sizeof(space));
2145 dest++;
2146 space_args.total_spaces++;
2147 }
2148 }
2149 up_read(&info->groups_sem);
1943 } 2150 }
1944 rcu_read_unlock();
1945 2151
1946 user_dest = (struct btrfs_ioctl_space_info *) 2152 user_dest = (struct btrfs_ioctl_space_info *)
1947 (arg + sizeof(struct btrfs_ioctl_space_args)); 2153 (arg + sizeof(struct btrfs_ioctl_space_args));
@@ -1984,6 +2190,36 @@ long btrfs_ioctl_trans_end(struct file *file)
1984 return 0; 2190 return 0;
1985} 2191}
1986 2192
2193static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp)
2194{
2195 struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
2196 struct btrfs_trans_handle *trans;
2197 u64 transid;
2198
2199 trans = btrfs_start_transaction(root, 0);
2200 transid = trans->transid;
2201 btrfs_commit_transaction_async(trans, root, 0);
2202
2203 if (argp)
2204 if (copy_to_user(argp, &transid, sizeof(transid)))
2205 return -EFAULT;
2206 return 0;
2207}
2208
2209static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp)
2210{
2211 struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
2212 u64 transid;
2213
2214 if (argp) {
2215 if (copy_from_user(&transid, argp, sizeof(transid)))
2216 return -EFAULT;
2217 } else {
2218 transid = 0; /* current trans */
2219 }
2220 return btrfs_wait_for_commit(root, transid);
2221}
2222
1987long btrfs_ioctl(struct file *file, unsigned int 2223long btrfs_ioctl(struct file *file, unsigned int
1988 cmd, unsigned long arg) 2224 cmd, unsigned long arg)
1989{ 2225{
@@ -1998,9 +2234,11 @@ long btrfs_ioctl(struct file *file, unsigned int
1998 case FS_IOC_GETVERSION: 2234 case FS_IOC_GETVERSION:
1999 return btrfs_ioctl_getversion(file, argp); 2235 return btrfs_ioctl_getversion(file, argp);
2000 case BTRFS_IOC_SNAP_CREATE: 2236 case BTRFS_IOC_SNAP_CREATE:
2001 return btrfs_ioctl_snap_create(file, argp, 0); 2237 return btrfs_ioctl_snap_create(file, argp, 0, 0);
2238 case BTRFS_IOC_SNAP_CREATE_ASYNC:
2239 return btrfs_ioctl_snap_create(file, argp, 0, 1);
2002 case BTRFS_IOC_SUBVOL_CREATE: 2240 case BTRFS_IOC_SUBVOL_CREATE:
2003 return btrfs_ioctl_snap_create(file, argp, 1); 2241 return btrfs_ioctl_snap_create(file, argp, 1, 0);
2004 case BTRFS_IOC_SNAP_DESTROY: 2242 case BTRFS_IOC_SNAP_DESTROY:
2005 return btrfs_ioctl_snap_destroy(file, argp); 2243 return btrfs_ioctl_snap_destroy(file, argp);
2006 case BTRFS_IOC_DEFAULT_SUBVOL: 2244 case BTRFS_IOC_DEFAULT_SUBVOL:
@@ -2034,6 +2272,10 @@ long btrfs_ioctl(struct file *file, unsigned int
2034 case BTRFS_IOC_SYNC: 2272 case BTRFS_IOC_SYNC:
2035 btrfs_sync_fs(file->f_dentry->d_sb, 1); 2273 btrfs_sync_fs(file->f_dentry->d_sb, 1);
2036 return 0; 2274 return 0;
2275 case BTRFS_IOC_START_SYNC:
2276 return btrfs_ioctl_start_sync(file, argp);
2277 case BTRFS_IOC_WAIT_SYNC:
2278 return btrfs_ioctl_wait_sync(file, argp);
2037 } 2279 }
2038 2280
2039 return -ENOTTY; 2281 return -ENOTTY;
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 424694aa517f..17c99ebdf960 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -22,14 +22,21 @@
22 22
23#define BTRFS_IOCTL_MAGIC 0x94 23#define BTRFS_IOCTL_MAGIC 0x94
24#define BTRFS_VOL_NAME_MAX 255 24#define BTRFS_VOL_NAME_MAX 255
25#define BTRFS_PATH_NAME_MAX 4087
26 25
27/* this should be 4k */ 26/* this should be 4k */
27#define BTRFS_PATH_NAME_MAX 4087
28struct btrfs_ioctl_vol_args { 28struct btrfs_ioctl_vol_args {
29 __s64 fd; 29 __s64 fd;
30 char name[BTRFS_PATH_NAME_MAX + 1]; 30 char name[BTRFS_PATH_NAME_MAX + 1];
31}; 31};
32 32
33#define BTRFS_SNAPSHOT_NAME_MAX 4079
34struct btrfs_ioctl_async_vol_args {
35 __s64 fd;
36 __u64 transid;
37 char name[BTRFS_SNAPSHOT_NAME_MAX + 1];
38};
39
33#define BTRFS_INO_LOOKUP_PATH_MAX 4080 40#define BTRFS_INO_LOOKUP_PATH_MAX 4080
34struct btrfs_ioctl_ino_lookup_args { 41struct btrfs_ioctl_ino_lookup_args {
35 __u64 treeid; 42 __u64 treeid;
@@ -178,4 +185,8 @@ struct btrfs_ioctl_space_args {
178#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64) 185#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64)
179#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \ 186#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \
180 struct btrfs_ioctl_space_args) 187 struct btrfs_ioctl_space_args)
188#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64)
189#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
190#define BTRFS_IOC_SNAP_CREATE_ASYNC _IOW(BTRFS_IOCTL_MAGIC, 23, \
191 struct btrfs_ioctl_async_vol_args)
181#endif 192#endif
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index e56c72bc5add..f4621f6deca1 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -526,7 +526,6 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
526{ 526{
527 u64 end; 527 u64 end;
528 u64 orig_end; 528 u64 orig_end;
529 u64 wait_end;
530 struct btrfs_ordered_extent *ordered; 529 struct btrfs_ordered_extent *ordered;
531 int found; 530 int found;
532 531
@@ -537,7 +536,6 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
537 if (orig_end > INT_LIMIT(loff_t)) 536 if (orig_end > INT_LIMIT(loff_t))
538 orig_end = INT_LIMIT(loff_t); 537 orig_end = INT_LIMIT(loff_t);
539 } 538 }
540 wait_end = orig_end;
541again: 539again:
542 /* start IO across the range first to instantiate any delalloc 540 /* start IO across the range first to instantiate any delalloc
543 * extents 541 * extents
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b37d723b9d4a..045c9c2b2d7e 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -29,6 +29,7 @@
29#include "locking.h" 29#include "locking.h"
30#include "btrfs_inode.h" 30#include "btrfs_inode.h"
31#include "async-thread.h" 31#include "async-thread.h"
32#include "free-space-cache.h"
32 33
33/* 34/*
34 * backref_node, mapping_node and tree_block start with this 35 * backref_node, mapping_node and tree_block start with this
@@ -178,8 +179,6 @@ struct reloc_control {
178 u64 search_start; 179 u64 search_start;
179 u64 extents_found; 180 u64 extents_found;
180 181
181 int block_rsv_retries;
182
183 unsigned int stage:8; 182 unsigned int stage:8;
184 unsigned int create_reloc_tree:1; 183 unsigned int create_reloc_tree:1;
185 unsigned int merge_reloc_tree:1; 184 unsigned int merge_reloc_tree:1;
@@ -2133,7 +2132,6 @@ int prepare_to_merge(struct reloc_control *rc, int err)
2133 LIST_HEAD(reloc_roots); 2132 LIST_HEAD(reloc_roots);
2134 u64 num_bytes = 0; 2133 u64 num_bytes = 0;
2135 int ret; 2134 int ret;
2136 int retries = 0;
2137 2135
2138 mutex_lock(&root->fs_info->trans_mutex); 2136 mutex_lock(&root->fs_info->trans_mutex);
2139 rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; 2137 rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
@@ -2143,7 +2141,7 @@ again:
2143 if (!err) { 2141 if (!err) {
2144 num_bytes = rc->merging_rsv_size; 2142 num_bytes = rc->merging_rsv_size;
2145 ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv, 2143 ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv,
2146 num_bytes, &retries); 2144 num_bytes);
2147 if (ret) 2145 if (ret)
2148 err = ret; 2146 err = ret;
2149 } 2147 }
@@ -2155,7 +2153,6 @@ again:
2155 btrfs_end_transaction(trans, rc->extent_root); 2153 btrfs_end_transaction(trans, rc->extent_root);
2156 btrfs_block_rsv_release(rc->extent_root, 2154 btrfs_block_rsv_release(rc->extent_root,
2157 rc->block_rsv, num_bytes); 2155 rc->block_rsv, num_bytes);
2158 retries = 0;
2159 goto again; 2156 goto again;
2160 } 2157 }
2161 } 2158 }
@@ -2405,15 +2402,13 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans,
2405 num_bytes = calcu_metadata_size(rc, node, 1) * 2; 2402 num_bytes = calcu_metadata_size(rc, node, 1) * 2;
2406 2403
2407 trans->block_rsv = rc->block_rsv; 2404 trans->block_rsv = rc->block_rsv;
2408 ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes, 2405 ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes);
2409 &rc->block_rsv_retries);
2410 if (ret) { 2406 if (ret) {
2411 if (ret == -EAGAIN) 2407 if (ret == -EAGAIN)
2412 rc->commit_transaction = 1; 2408 rc->commit_transaction = 1;
2413 return ret; 2409 return ret;
2414 } 2410 }
2415 2411
2416 rc->block_rsv_retries = 0;
2417 return 0; 2412 return 0;
2418} 2413}
2419 2414
@@ -3099,6 +3094,8 @@ static int add_tree_block(struct reloc_control *rc,
3099 BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 3094 BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0));
3100 ret = get_ref_objectid_v0(rc, path, extent_key, 3095 ret = get_ref_objectid_v0(rc, path, extent_key,
3101 &ref_owner, NULL); 3096 &ref_owner, NULL);
3097 if (ret < 0)
3098 return ret;
3102 BUG_ON(ref_owner >= BTRFS_MAX_LEVEL); 3099 BUG_ON(ref_owner >= BTRFS_MAX_LEVEL);
3103 level = (int)ref_owner; 3100 level = (int)ref_owner;
3104 /* FIXME: get real generation */ 3101 /* FIXME: get real generation */
@@ -3191,6 +3188,54 @@ static int block_use_full_backref(struct reloc_control *rc,
3191 return ret; 3188 return ret;
3192} 3189}
3193 3190
3191static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
3192 struct inode *inode, u64 ino)
3193{
3194 struct btrfs_key key;
3195 struct btrfs_path *path;
3196 struct btrfs_root *root = fs_info->tree_root;
3197 struct btrfs_trans_handle *trans;
3198 unsigned long nr;
3199 int ret = 0;
3200
3201 if (inode)
3202 goto truncate;
3203
3204 key.objectid = ino;
3205 key.type = BTRFS_INODE_ITEM_KEY;
3206 key.offset = 0;
3207
3208 inode = btrfs_iget(fs_info->sb, &key, root, NULL);
3209 if (!inode || IS_ERR(inode) || is_bad_inode(inode)) {
3210 if (inode && !IS_ERR(inode))
3211 iput(inode);
3212 return -ENOENT;
3213 }
3214
3215truncate:
3216 path = btrfs_alloc_path();
3217 if (!path) {
3218 ret = -ENOMEM;
3219 goto out;
3220 }
3221
3222 trans = btrfs_join_transaction(root, 0);
3223 if (IS_ERR(trans)) {
3224 btrfs_free_path(path);
3225 goto out;
3226 }
3227
3228 ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
3229
3230 btrfs_free_path(path);
3231 nr = trans->blocks_used;
3232 btrfs_end_transaction(trans, root);
3233 btrfs_btree_balance_dirty(root, nr);
3234out:
3235 iput(inode);
3236 return ret;
3237}
3238
3194/* 3239/*
3195 * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY 3240 * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY
3196 * this function scans fs tree to find blocks reference the data extent 3241 * this function scans fs tree to find blocks reference the data extent
@@ -3217,15 +3262,27 @@ static int find_data_references(struct reloc_control *rc,
3217 int counted; 3262 int counted;
3218 int ret; 3263 int ret;
3219 3264
3220 path = btrfs_alloc_path();
3221 if (!path)
3222 return -ENOMEM;
3223
3224 ref_root = btrfs_extent_data_ref_root(leaf, ref); 3265 ref_root = btrfs_extent_data_ref_root(leaf, ref);
3225 ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref); 3266 ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref);
3226 ref_offset = btrfs_extent_data_ref_offset(leaf, ref); 3267 ref_offset = btrfs_extent_data_ref_offset(leaf, ref);
3227 ref_count = btrfs_extent_data_ref_count(leaf, ref); 3268 ref_count = btrfs_extent_data_ref_count(leaf, ref);
3228 3269
3270 /*
3271 * This is an extent belonging to the free space cache, lets just delete
3272 * it and redo the search.
3273 */
3274 if (ref_root == BTRFS_ROOT_TREE_OBJECTID) {
3275 ret = delete_block_group_cache(rc->extent_root->fs_info,
3276 NULL, ref_objectid);
3277 if (ret != -ENOENT)
3278 return ret;
3279 ret = 0;
3280 }
3281
3282 path = btrfs_alloc_path();
3283 if (!path)
3284 return -ENOMEM;
3285
3229 root = read_fs_root(rc->extent_root->fs_info, ref_root); 3286 root = read_fs_root(rc->extent_root->fs_info, ref_root);
3230 if (IS_ERR(root)) { 3287 if (IS_ERR(root)) {
3231 err = PTR_ERR(root); 3288 err = PTR_ERR(root);
@@ -3554,8 +3611,7 @@ int prepare_to_relocate(struct reloc_control *rc)
3554 * is no reservation in transaction handle. 3611 * is no reservation in transaction handle.
3555 */ 3612 */
3556 ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv, 3613 ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv,
3557 rc->extent_root->nodesize * 256, 3614 rc->extent_root->nodesize * 256);
3558 &rc->block_rsv_retries);
3559 if (ret) 3615 if (ret)
3560 return ret; 3616 return ret;
3561 3617
@@ -3567,7 +3623,6 @@ int prepare_to_relocate(struct reloc_control *rc)
3567 rc->extents_found = 0; 3623 rc->extents_found = 0;
3568 rc->nodes_relocated = 0; 3624 rc->nodes_relocated = 0;
3569 rc->merging_rsv_size = 0; 3625 rc->merging_rsv_size = 0;
3570 rc->block_rsv_retries = 0;
3571 3626
3572 rc->create_reloc_tree = 1; 3627 rc->create_reloc_tree = 1;
3573 set_reloc_control(rc); 3628 set_reloc_control(rc);
@@ -3860,6 +3915,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
3860{ 3915{
3861 struct btrfs_fs_info *fs_info = extent_root->fs_info; 3916 struct btrfs_fs_info *fs_info = extent_root->fs_info;
3862 struct reloc_control *rc; 3917 struct reloc_control *rc;
3918 struct inode *inode;
3919 struct btrfs_path *path;
3863 int ret; 3920 int ret;
3864 int rw = 0; 3921 int rw = 0;
3865 int err = 0; 3922 int err = 0;
@@ -3882,6 +3939,26 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
3882 rw = 1; 3939 rw = 1;
3883 } 3940 }
3884 3941
3942 path = btrfs_alloc_path();
3943 if (!path) {
3944 err = -ENOMEM;
3945 goto out;
3946 }
3947
3948 inode = lookup_free_space_inode(fs_info->tree_root, rc->block_group,
3949 path);
3950 btrfs_free_path(path);
3951
3952 if (!IS_ERR(inode))
3953 ret = delete_block_group_cache(fs_info, inode, 0);
3954 else
3955 ret = PTR_ERR(inode);
3956
3957 if (ret && ret != -ENOENT) {
3958 err = ret;
3959 goto out;
3960 }
3961
3885 rc->data_inode = create_reloc_inode(fs_info, rc->block_group); 3962 rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
3886 if (IS_ERR(rc->data_inode)) { 3963 if (IS_ERR(rc->data_inode)) {
3887 err = PTR_ERR(rc->data_inode); 3964 err = PTR_ERR(rc->data_inode);
@@ -4143,7 +4220,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
4143 btrfs_add_ordered_sum(inode, ordered, sums); 4220 btrfs_add_ordered_sum(inode, ordered, sums);
4144 } 4221 }
4145 btrfs_put_ordered_extent(ordered); 4222 btrfs_put_ordered_extent(ordered);
4146 return 0; 4223 return ret;
4147} 4224}
4148 4225
4149void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, 4226void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 2d958be761c8..6a1086e83ffc 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -181,7 +181,6 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
181int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid) 181int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid)
182{ 182{
183 struct btrfs_root *dead_root; 183 struct btrfs_root *dead_root;
184 struct btrfs_item *item;
185 struct btrfs_root_item *ri; 184 struct btrfs_root_item *ri;
186 struct btrfs_key key; 185 struct btrfs_key key;
187 struct btrfs_key found_key; 186 struct btrfs_key found_key;
@@ -214,7 +213,6 @@ again:
214 nritems = btrfs_header_nritems(leaf); 213 nritems = btrfs_header_nritems(leaf);
215 slot = path->slots[0]; 214 slot = path->slots[0];
216 } 215 }
217 item = btrfs_item_nr(leaf, slot);
218 btrfs_item_key_to_cpu(leaf, &key, slot); 216 btrfs_item_key_to_cpu(leaf, &key, slot);
219 if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY) 217 if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY)
220 goto next; 218 goto next;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 144f8a5730f5..8299a25ffc8f 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -61,6 +61,8 @@ static void btrfs_put_super(struct super_block *sb)
61 61
62 ret = close_ctree(root); 62 ret = close_ctree(root);
63 sb->s_fs_info = NULL; 63 sb->s_fs_info = NULL;
64
65 (void)ret; /* FIXME: need to fix VFS to return error? */
64} 66}
65 67
66enum { 68enum {
@@ -68,7 +70,8 @@ enum {
68 Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, 70 Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
69 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, 71 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
70 Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit, 72 Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit,
71 Opt_discard, Opt_err, 73 Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_err,
74 Opt_user_subvol_rm_allowed,
72}; 75};
73 76
74static match_table_t tokens = { 77static match_table_t tokens = {
@@ -92,6 +95,9 @@ static match_table_t tokens = {
92 {Opt_flushoncommit, "flushoncommit"}, 95 {Opt_flushoncommit, "flushoncommit"},
93 {Opt_ratio, "metadata_ratio=%d"}, 96 {Opt_ratio, "metadata_ratio=%d"},
94 {Opt_discard, "discard"}, 97 {Opt_discard, "discard"},
98 {Opt_space_cache, "space_cache"},
99 {Opt_clear_cache, "clear_cache"},
100 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
95 {Opt_err, NULL}, 101 {Opt_err, NULL},
96}; 102};
97 103
@@ -235,6 +241,16 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
235 case Opt_discard: 241 case Opt_discard:
236 btrfs_set_opt(info->mount_opt, DISCARD); 242 btrfs_set_opt(info->mount_opt, DISCARD);
237 break; 243 break;
244 case Opt_space_cache:
245 printk(KERN_INFO "btrfs: enabling disk space caching\n");
246 btrfs_set_opt(info->mount_opt, SPACE_CACHE);
247 case Opt_clear_cache:
248 printk(KERN_INFO "btrfs: force clearing of disk cache\n");
249 btrfs_set_opt(info->mount_opt, CLEAR_CACHE);
250 break;
251 case Opt_user_subvol_rm_allowed:
252 btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
253 break;
238 case Opt_err: 254 case Opt_err:
239 printk(KERN_INFO "btrfs: unrecognized mount option " 255 printk(KERN_INFO "btrfs: unrecognized mount option "
240 "'%s'\n", p); 256 "'%s'\n", p);
@@ -380,7 +396,7 @@ static struct dentry *get_default_root(struct super_block *sb,
380find_root: 396find_root:
381 new_root = btrfs_read_fs_root_no_name(root->fs_info, &location); 397 new_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
382 if (IS_ERR(new_root)) 398 if (IS_ERR(new_root))
383 return ERR_PTR(PTR_ERR(new_root)); 399 return ERR_CAST(new_root);
384 400
385 if (btrfs_root_refs(&new_root->root_item) == 0) 401 if (btrfs_root_refs(&new_root->root_item) == 0)
386 return ERR_PTR(-ENOENT); 402 return ERR_PTR(-ENOENT);
@@ -436,7 +452,6 @@ static int btrfs_fill_super(struct super_block *sb,
436{ 452{
437 struct inode *inode; 453 struct inode *inode;
438 struct dentry *root_dentry; 454 struct dentry *root_dentry;
439 struct btrfs_super_block *disk_super;
440 struct btrfs_root *tree_root; 455 struct btrfs_root *tree_root;
441 struct btrfs_key key; 456 struct btrfs_key key;
442 int err; 457 int err;
@@ -458,7 +473,6 @@ static int btrfs_fill_super(struct super_block *sb,
458 return PTR_ERR(tree_root); 473 return PTR_ERR(tree_root);
459 } 474 }
460 sb->s_fs_info = tree_root; 475 sb->s_fs_info = tree_root;
461 disk_super = &tree_root->fs_info->super_copy;
462 476
463 key.objectid = BTRFS_FIRST_FREE_OBJECTID; 477 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
464 key.type = BTRFS_INODE_ITEM_KEY; 478 key.type = BTRFS_INODE_ITEM_KEY;
@@ -560,8 +574,8 @@ static int btrfs_test_super(struct super_block *s, void *data)
560 * Note: This is based on get_sb_bdev from fs/super.c with a few additions 574 * Note: This is based on get_sb_bdev from fs/super.c with a few additions
561 * for multiple device setup. Make sure to keep it in sync. 575 * for multiple device setup. Make sure to keep it in sync.
562 */ 576 */
563static int btrfs_get_sb(struct file_system_type *fs_type, int flags, 577static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
564 const char *dev_name, void *data, struct vfsmount *mnt) 578 const char *dev_name, void *data)
565{ 579{
566 struct block_device *bdev = NULL; 580 struct block_device *bdev = NULL;
567 struct super_block *s; 581 struct super_block *s;
@@ -571,7 +585,6 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
571 char *subvol_name = NULL; 585 char *subvol_name = NULL;
572 u64 subvol_objectid = 0; 586 u64 subvol_objectid = 0;
573 int error = 0; 587 int error = 0;
574 int found = 0;
575 588
576 if (!(flags & MS_RDONLY)) 589 if (!(flags & MS_RDONLY))
577 mode |= FMODE_WRITE; 590 mode |= FMODE_WRITE;
@@ -580,7 +593,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
580 &subvol_name, &subvol_objectid, 593 &subvol_name, &subvol_objectid,
581 &fs_devices); 594 &fs_devices);
582 if (error) 595 if (error)
583 return error; 596 return ERR_PTR(error);
584 597
585 error = btrfs_scan_one_device(dev_name, mode, fs_type, &fs_devices); 598 error = btrfs_scan_one_device(dev_name, mode, fs_type, &fs_devices);
586 if (error) 599 if (error)
@@ -607,7 +620,6 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
607 goto error_close_devices; 620 goto error_close_devices;
608 } 621 }
609 622
610 found = 1;
611 btrfs_close_devices(fs_devices); 623 btrfs_close_devices(fs_devices);
612 } else { 624 } else {
613 char b[BDEVNAME_SIZE]; 625 char b[BDEVNAME_SIZE];
@@ -629,7 +641,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
629 if (IS_ERR(root)) { 641 if (IS_ERR(root)) {
630 error = PTR_ERR(root); 642 error = PTR_ERR(root);
631 deactivate_locked_super(s); 643 deactivate_locked_super(s);
632 goto error; 644 goto error_free_subvol_name;
633 } 645 }
634 /* if they gave us a subvolume name bind mount into that */ 646 /* if they gave us a subvolume name bind mount into that */
635 if (strcmp(subvol_name, ".")) { 647 if (strcmp(subvol_name, ".")) {
@@ -643,24 +655,21 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
643 deactivate_locked_super(s); 655 deactivate_locked_super(s);
644 error = PTR_ERR(new_root); 656 error = PTR_ERR(new_root);
645 dput(root); 657 dput(root);
646 goto error_close_devices; 658 goto error_free_subvol_name;
647 } 659 }
648 if (!new_root->d_inode) { 660 if (!new_root->d_inode) {
649 dput(root); 661 dput(root);
650 dput(new_root); 662 dput(new_root);
651 deactivate_locked_super(s); 663 deactivate_locked_super(s);
652 error = -ENXIO; 664 error = -ENXIO;
653 goto error_close_devices; 665 goto error_free_subvol_name;
654 } 666 }
655 dput(root); 667 dput(root);
656 root = new_root; 668 root = new_root;
657 } 669 }
658 670
659 mnt->mnt_sb = s;
660 mnt->mnt_root = root;
661
662 kfree(subvol_name); 671 kfree(subvol_name);
663 return 0; 672 return root;
664 673
665error_s: 674error_s:
666 error = PTR_ERR(s); 675 error = PTR_ERR(s);
@@ -668,8 +677,7 @@ error_close_devices:
668 btrfs_close_devices(fs_devices); 677 btrfs_close_devices(fs_devices);
669error_free_subvol_name: 678error_free_subvol_name:
670 kfree(subvol_name); 679 kfree(subvol_name);
671error: 680 return ERR_PTR(error);
672 return error;
673} 681}
674 682
675static int btrfs_remount(struct super_block *sb, int *flags, char *data) 683static int btrfs_remount(struct super_block *sb, int *flags, char *data)
@@ -716,18 +724,25 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
716 struct list_head *head = &root->fs_info->space_info; 724 struct list_head *head = &root->fs_info->space_info;
717 struct btrfs_space_info *found; 725 struct btrfs_space_info *found;
718 u64 total_used = 0; 726 u64 total_used = 0;
727 u64 total_used_data = 0;
719 int bits = dentry->d_sb->s_blocksize_bits; 728 int bits = dentry->d_sb->s_blocksize_bits;
720 __be32 *fsid = (__be32 *)root->fs_info->fsid; 729 __be32 *fsid = (__be32 *)root->fs_info->fsid;
721 730
722 rcu_read_lock(); 731 rcu_read_lock();
723 list_for_each_entry_rcu(found, head, list) 732 list_for_each_entry_rcu(found, head, list) {
733 if (found->flags & (BTRFS_BLOCK_GROUP_METADATA |
734 BTRFS_BLOCK_GROUP_SYSTEM))
735 total_used_data += found->disk_total;
736 else
737 total_used_data += found->disk_used;
724 total_used += found->disk_used; 738 total_used += found->disk_used;
739 }
725 rcu_read_unlock(); 740 rcu_read_unlock();
726 741
727 buf->f_namelen = BTRFS_NAME_LEN; 742 buf->f_namelen = BTRFS_NAME_LEN;
728 buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; 743 buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
729 buf->f_bfree = buf->f_blocks - (total_used >> bits); 744 buf->f_bfree = buf->f_blocks - (total_used >> bits);
730 buf->f_bavail = buf->f_bfree; 745 buf->f_bavail = buf->f_blocks - (total_used_data >> bits);
731 buf->f_bsize = dentry->d_sb->s_blocksize; 746 buf->f_bsize = dentry->d_sb->s_blocksize;
732 buf->f_type = BTRFS_SUPER_MAGIC; 747 buf->f_type = BTRFS_SUPER_MAGIC;
733 748
@@ -746,7 +761,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
746static struct file_system_type btrfs_fs_type = { 761static struct file_system_type btrfs_fs_type = {
747 .owner = THIS_MODULE, 762 .owner = THIS_MODULE,
748 .name = "btrfs", 763 .name = "btrfs",
749 .get_sb = btrfs_get_sb, 764 .mount = btrfs_mount,
750 .kill_sb = kill_anon_super, 765 .kill_sb = kill_anon_super,
751 .fs_flags = FS_REQUIRES_DEV, 766 .fs_flags = FS_REQUIRES_DEV,
752}; 767};
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 66e4c66cc63b..1fffbc017bdf 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -163,6 +163,7 @@ enum btrfs_trans_type {
163 TRANS_START, 163 TRANS_START,
164 TRANS_JOIN, 164 TRANS_JOIN,
165 TRANS_USERSPACE, 165 TRANS_USERSPACE,
166 TRANS_JOIN_NOLOCK,
166}; 167};
167 168
168static int may_wait_transaction(struct btrfs_root *root, int type) 169static int may_wait_transaction(struct btrfs_root *root, int type)
@@ -179,14 +180,14 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
179{ 180{
180 struct btrfs_trans_handle *h; 181 struct btrfs_trans_handle *h;
181 struct btrfs_transaction *cur_trans; 182 struct btrfs_transaction *cur_trans;
182 int retries = 0;
183 int ret; 183 int ret;
184again: 184again:
185 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); 185 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
186 if (!h) 186 if (!h)
187 return ERR_PTR(-ENOMEM); 187 return ERR_PTR(-ENOMEM);
188 188
189 mutex_lock(&root->fs_info->trans_mutex); 189 if (type != TRANS_JOIN_NOLOCK)
190 mutex_lock(&root->fs_info->trans_mutex);
190 if (may_wait_transaction(root, type)) 191 if (may_wait_transaction(root, type))
191 wait_current_trans(root); 192 wait_current_trans(root);
192 193
@@ -195,7 +196,8 @@ again:
195 196
196 cur_trans = root->fs_info->running_transaction; 197 cur_trans = root->fs_info->running_transaction;
197 cur_trans->use_count++; 198 cur_trans->use_count++;
198 mutex_unlock(&root->fs_info->trans_mutex); 199 if (type != TRANS_JOIN_NOLOCK)
200 mutex_unlock(&root->fs_info->trans_mutex);
199 201
200 h->transid = cur_trans->transid; 202 h->transid = cur_trans->transid;
201 h->transaction = cur_trans; 203 h->transaction = cur_trans;
@@ -212,8 +214,7 @@ again:
212 } 214 }
213 215
214 if (num_items > 0) { 216 if (num_items > 0) {
215 ret = btrfs_trans_reserve_metadata(h, root, num_items, 217 ret = btrfs_trans_reserve_metadata(h, root, num_items);
216 &retries);
217 if (ret == -EAGAIN) { 218 if (ret == -EAGAIN) {
218 btrfs_commit_transaction(h, root); 219 btrfs_commit_transaction(h, root);
219 goto again; 220 goto again;
@@ -224,9 +225,11 @@ again:
224 } 225 }
225 } 226 }
226 227
227 mutex_lock(&root->fs_info->trans_mutex); 228 if (type != TRANS_JOIN_NOLOCK)
229 mutex_lock(&root->fs_info->trans_mutex);
228 record_root_in_trans(h, root); 230 record_root_in_trans(h, root);
229 mutex_unlock(&root->fs_info->trans_mutex); 231 if (type != TRANS_JOIN_NOLOCK)
232 mutex_unlock(&root->fs_info->trans_mutex);
230 233
231 if (!current->journal_info && type != TRANS_USERSPACE) 234 if (!current->journal_info && type != TRANS_USERSPACE)
232 current->journal_info = h; 235 current->journal_info = h;
@@ -244,6 +247,12 @@ struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
244 return start_transaction(root, 0, TRANS_JOIN); 247 return start_transaction(root, 0, TRANS_JOIN);
245} 248}
246 249
250struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root,
251 int num_blocks)
252{
253 return start_transaction(root, 0, TRANS_JOIN_NOLOCK);
254}
255
247struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, 256struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
248 int num_blocks) 257 int num_blocks)
249{ 258{
@@ -270,6 +279,58 @@ static noinline int wait_for_commit(struct btrfs_root *root,
270 return 0; 279 return 0;
271} 280}
272 281
282int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
283{
284 struct btrfs_transaction *cur_trans = NULL, *t;
285 int ret;
286
287 mutex_lock(&root->fs_info->trans_mutex);
288
289 ret = 0;
290 if (transid) {
291 if (transid <= root->fs_info->last_trans_committed)
292 goto out_unlock;
293
294 /* find specified transaction */
295 list_for_each_entry(t, &root->fs_info->trans_list, list) {
296 if (t->transid == transid) {
297 cur_trans = t;
298 break;
299 }
300 if (t->transid > transid)
301 break;
302 }
303 ret = -EINVAL;
304 if (!cur_trans)
305 goto out_unlock; /* bad transid */
306 } else {
307 /* find newest transaction that is committing | committed */
308 list_for_each_entry_reverse(t, &root->fs_info->trans_list,
309 list) {
310 if (t->in_commit) {
311 if (t->commit_done)
312 goto out_unlock;
313 cur_trans = t;
314 break;
315 }
316 }
317 if (!cur_trans)
318 goto out_unlock; /* nothing committing|committed */
319 }
320
321 cur_trans->use_count++;
322 mutex_unlock(&root->fs_info->trans_mutex);
323
324 wait_for_commit(root, cur_trans);
325
326 mutex_lock(&root->fs_info->trans_mutex);
327 put_transaction(cur_trans);
328 ret = 0;
329out_unlock:
330 mutex_unlock(&root->fs_info->trans_mutex);
331 return ret;
332}
333
273#if 0 334#if 0
274/* 335/*
275 * rate limit against the drop_snapshot code. This helps to slow down new 336 * rate limit against the drop_snapshot code. This helps to slow down new
@@ -348,7 +409,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
348} 409}
349 410
350static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, 411static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
351 struct btrfs_root *root, int throttle) 412 struct btrfs_root *root, int throttle, int lock)
352{ 413{
353 struct btrfs_transaction *cur_trans = trans->transaction; 414 struct btrfs_transaction *cur_trans = trans->transaction;
354 struct btrfs_fs_info *info = root->fs_info; 415 struct btrfs_fs_info *info = root->fs_info;
@@ -376,26 +437,29 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
376 437
377 btrfs_trans_release_metadata(trans, root); 438 btrfs_trans_release_metadata(trans, root);
378 439
379 if (!root->fs_info->open_ioctl_trans && 440 if (lock && !root->fs_info->open_ioctl_trans &&
380 should_end_transaction(trans, root)) 441 should_end_transaction(trans, root))
381 trans->transaction->blocked = 1; 442 trans->transaction->blocked = 1;
382 443
383 if (cur_trans->blocked && !cur_trans->in_commit) { 444 if (lock && cur_trans->blocked && !cur_trans->in_commit) {
384 if (throttle) 445 if (throttle)
385 return btrfs_commit_transaction(trans, root); 446 return btrfs_commit_transaction(trans, root);
386 else 447 else
387 wake_up_process(info->transaction_kthread); 448 wake_up_process(info->transaction_kthread);
388 } 449 }
389 450
390 mutex_lock(&info->trans_mutex); 451 if (lock)
452 mutex_lock(&info->trans_mutex);
391 WARN_ON(cur_trans != info->running_transaction); 453 WARN_ON(cur_trans != info->running_transaction);
392 WARN_ON(cur_trans->num_writers < 1); 454 WARN_ON(cur_trans->num_writers < 1);
393 cur_trans->num_writers--; 455 cur_trans->num_writers--;
394 456
457 smp_mb();
395 if (waitqueue_active(&cur_trans->writer_wait)) 458 if (waitqueue_active(&cur_trans->writer_wait))
396 wake_up(&cur_trans->writer_wait); 459 wake_up(&cur_trans->writer_wait);
397 put_transaction(cur_trans); 460 put_transaction(cur_trans);
398 mutex_unlock(&info->trans_mutex); 461 if (lock)
462 mutex_unlock(&info->trans_mutex);
399 463
400 if (current->journal_info == trans) 464 if (current->journal_info == trans)
401 current->journal_info = NULL; 465 current->journal_info = NULL;
@@ -411,13 +475,19 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
411int btrfs_end_transaction(struct btrfs_trans_handle *trans, 475int btrfs_end_transaction(struct btrfs_trans_handle *trans,
412 struct btrfs_root *root) 476 struct btrfs_root *root)
413{ 477{
414 return __btrfs_end_transaction(trans, root, 0); 478 return __btrfs_end_transaction(trans, root, 0, 1);
415} 479}
416 480
417int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 481int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
418 struct btrfs_root *root) 482 struct btrfs_root *root)
419{ 483{
420 return __btrfs_end_transaction(trans, root, 1); 484 return __btrfs_end_transaction(trans, root, 1, 1);
485}
486
487int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
488 struct btrfs_root *root)
489{
490 return __btrfs_end_transaction(trans, root, 0, 0);
421} 491}
422 492
423/* 493/*
@@ -836,7 +906,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
836 struct extent_buffer *tmp; 906 struct extent_buffer *tmp;
837 struct extent_buffer *old; 907 struct extent_buffer *old;
838 int ret; 908 int ret;
839 int retries = 0;
840 u64 to_reserve = 0; 909 u64 to_reserve = 0;
841 u64 index = 0; 910 u64 index = 0;
842 u64 objectid; 911 u64 objectid;
@@ -858,7 +927,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
858 927
859 if (to_reserve > 0) { 928 if (to_reserve > 0) {
860 ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv, 929 ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv,
861 to_reserve, &retries); 930 to_reserve);
862 if (ret) { 931 if (ret) {
863 pending->error = ret; 932 pending->error = ret;
864 goto fail; 933 goto fail;
@@ -966,6 +1035,8 @@ static void update_super_roots(struct btrfs_root *root)
966 super->root = root_item->bytenr; 1035 super->root = root_item->bytenr;
967 super->generation = root_item->generation; 1036 super->generation = root_item->generation;
968 super->root_level = root_item->level; 1037 super->root_level = root_item->level;
1038 if (super->cache_generation != 0 || btrfs_test_opt(root, SPACE_CACHE))
1039 super->cache_generation = root_item->generation;
969} 1040}
970 1041
971int btrfs_transaction_in_commit(struct btrfs_fs_info *info) 1042int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
@@ -988,11 +1059,127 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info)
988 return ret; 1059 return ret;
989} 1060}
990 1061
1062/*
1063 * wait for the current transaction commit to start and block subsequent
1064 * transaction joins
1065 */
1066static void wait_current_trans_commit_start(struct btrfs_root *root,
1067 struct btrfs_transaction *trans)
1068{
1069 DEFINE_WAIT(wait);
1070
1071 if (trans->in_commit)
1072 return;
1073
1074 while (1) {
1075 prepare_to_wait(&root->fs_info->transaction_blocked_wait, &wait,
1076 TASK_UNINTERRUPTIBLE);
1077 if (trans->in_commit) {
1078 finish_wait(&root->fs_info->transaction_blocked_wait,
1079 &wait);
1080 break;
1081 }
1082 mutex_unlock(&root->fs_info->trans_mutex);
1083 schedule();
1084 mutex_lock(&root->fs_info->trans_mutex);
1085 finish_wait(&root->fs_info->transaction_blocked_wait, &wait);
1086 }
1087}
1088
1089/*
1090 * wait for the current transaction to start and then become unblocked.
1091 * caller holds ref.
1092 */
1093static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
1094 struct btrfs_transaction *trans)
1095{
1096 DEFINE_WAIT(wait);
1097
1098 if (trans->commit_done || (trans->in_commit && !trans->blocked))
1099 return;
1100
1101 while (1) {
1102 prepare_to_wait(&root->fs_info->transaction_wait, &wait,
1103 TASK_UNINTERRUPTIBLE);
1104 if (trans->commit_done ||
1105 (trans->in_commit && !trans->blocked)) {
1106 finish_wait(&root->fs_info->transaction_wait,
1107 &wait);
1108 break;
1109 }
1110 mutex_unlock(&root->fs_info->trans_mutex);
1111 schedule();
1112 mutex_lock(&root->fs_info->trans_mutex);
1113 finish_wait(&root->fs_info->transaction_wait,
1114 &wait);
1115 }
1116}
1117
1118/*
1119 * commit transactions asynchronously. once btrfs_commit_transaction_async
1120 * returns, any subsequent transaction will not be allowed to join.
1121 */
1122struct btrfs_async_commit {
1123 struct btrfs_trans_handle *newtrans;
1124 struct btrfs_root *root;
1125 struct delayed_work work;
1126};
1127
1128static void do_async_commit(struct work_struct *work)
1129{
1130 struct btrfs_async_commit *ac =
1131 container_of(work, struct btrfs_async_commit, work.work);
1132
1133 btrfs_commit_transaction(ac->newtrans, ac->root);
1134 kfree(ac);
1135}
1136
1137int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1138 struct btrfs_root *root,
1139 int wait_for_unblock)
1140{
1141 struct btrfs_async_commit *ac;
1142 struct btrfs_transaction *cur_trans;
1143
1144 ac = kmalloc(sizeof(*ac), GFP_NOFS);
1145 BUG_ON(!ac);
1146
1147 INIT_DELAYED_WORK(&ac->work, do_async_commit);
1148 ac->root = root;
1149 ac->newtrans = btrfs_join_transaction(root, 0);
1150
1151 /* take transaction reference */
1152 mutex_lock(&root->fs_info->trans_mutex);
1153 cur_trans = trans->transaction;
1154 cur_trans->use_count++;
1155 mutex_unlock(&root->fs_info->trans_mutex);
1156
1157 btrfs_end_transaction(trans, root);
1158 schedule_delayed_work(&ac->work, 0);
1159
1160 /* wait for transaction to start and unblock */
1161 mutex_lock(&root->fs_info->trans_mutex);
1162 if (wait_for_unblock)
1163 wait_current_trans_commit_start_and_unblock(root, cur_trans);
1164 else
1165 wait_current_trans_commit_start(root, cur_trans);
1166 put_transaction(cur_trans);
1167 mutex_unlock(&root->fs_info->trans_mutex);
1168
1169 return 0;
1170}
1171
1172/*
1173 * btrfs_transaction state sequence:
1174 * in_commit = 0, blocked = 0 (initial)
1175 * in_commit = 1, blocked = 1
1176 * blocked = 0
1177 * commit_done = 1
1178 */
991int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 1179int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
992 struct btrfs_root *root) 1180 struct btrfs_root *root)
993{ 1181{
994 unsigned long joined = 0; 1182 unsigned long joined = 0;
995 unsigned long timeout = 1;
996 struct btrfs_transaction *cur_trans; 1183 struct btrfs_transaction *cur_trans;
997 struct btrfs_transaction *prev_trans = NULL; 1184 struct btrfs_transaction *prev_trans = NULL;
998 DEFINE_WAIT(wait); 1185 DEFINE_WAIT(wait);
@@ -1039,6 +1226,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1039 1226
1040 trans->transaction->in_commit = 1; 1227 trans->transaction->in_commit = 1;
1041 trans->transaction->blocked = 1; 1228 trans->transaction->blocked = 1;
1229 wake_up(&root->fs_info->transaction_blocked_wait);
1230
1042 if (cur_trans->list.prev != &root->fs_info->trans_list) { 1231 if (cur_trans->list.prev != &root->fs_info->trans_list) {
1043 prev_trans = list_entry(cur_trans->list.prev, 1232 prev_trans = list_entry(cur_trans->list.prev,
1044 struct btrfs_transaction, list); 1233 struct btrfs_transaction, list);
@@ -1063,11 +1252,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1063 snap_pending = 1; 1252 snap_pending = 1;
1064 1253
1065 WARN_ON(cur_trans != trans->transaction); 1254 WARN_ON(cur_trans != trans->transaction);
1066 if (cur_trans->num_writers > 1)
1067 timeout = MAX_SCHEDULE_TIMEOUT;
1068 else if (should_grow)
1069 timeout = 1;
1070
1071 mutex_unlock(&root->fs_info->trans_mutex); 1255 mutex_unlock(&root->fs_info->trans_mutex);
1072 1256
1073 if (flush_on_commit || snap_pending) { 1257 if (flush_on_commit || snap_pending) {
@@ -1089,8 +1273,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1089 TASK_UNINTERRUPTIBLE); 1273 TASK_UNINTERRUPTIBLE);
1090 1274
1091 smp_mb(); 1275 smp_mb();
1092 if (cur_trans->num_writers > 1 || should_grow) 1276 if (cur_trans->num_writers > 1)
1093 schedule_timeout(timeout); 1277 schedule_timeout(MAX_SCHEDULE_TIMEOUT);
1278 else if (should_grow)
1279 schedule_timeout(1);
1094 1280
1095 mutex_lock(&root->fs_info->trans_mutex); 1281 mutex_lock(&root->fs_info->trans_mutex);
1096 finish_wait(&cur_trans->writer_wait, &wait); 1282 finish_wait(&cur_trans->writer_wait, &wait);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index e104986d0bfd..f104b57ad4ef 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -87,12 +87,17 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
87 87
88int btrfs_end_transaction(struct btrfs_trans_handle *trans, 88int btrfs_end_transaction(struct btrfs_trans_handle *trans,
89 struct btrfs_root *root); 89 struct btrfs_root *root);
90int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
91 struct btrfs_root *root);
90struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, 92struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
91 int num_items); 93 int num_items);
92struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, 94struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
93 int num_blocks); 95 int num_blocks);
96struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root,
97 int num_blocks);
94struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, 98struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
95 int num_blocks); 99 int num_blocks);
100int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid);
96int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, 101int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
97 struct btrfs_root *root); 102 struct btrfs_root *root);
98int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, 103int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
@@ -104,6 +109,9 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
104int btrfs_clean_old_snapshots(struct btrfs_root *root); 109int btrfs_clean_old_snapshots(struct btrfs_root *root);
105int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 110int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
106 struct btrfs_root *root); 111 struct btrfs_root *root);
112int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
113 struct btrfs_root *root,
114 int wait_for_unblock);
107int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 115int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
108 struct btrfs_root *root); 116 struct btrfs_root *root);
109int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, 117int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
index f7ac8e013ed7..992ab425599d 100644
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -36,7 +36,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
36 int ret = 0; 36 int ret = 0;
37 int wret; 37 int wret;
38 int level; 38 int level;
39 int orig_level;
40 int is_extent = 0; 39 int is_extent = 0;
41 int next_key_ret = 0; 40 int next_key_ret = 0;
42 u64 last_ret = 0; 41 u64 last_ret = 0;
@@ -64,7 +63,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
64 return -ENOMEM; 63 return -ENOMEM;
65 64
66 level = btrfs_header_level(root->node); 65 level = btrfs_header_level(root->node);
67 orig_level = level;
68 66
69 if (level == 0) 67 if (level == 0)
70 goto out; 68 goto out;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index fb102a9aee9c..a29f19384a27 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -786,7 +786,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
786{ 786{
787 struct inode *dir; 787 struct inode *dir;
788 int ret; 788 int ret;
789 struct btrfs_key location;
790 struct btrfs_inode_ref *ref; 789 struct btrfs_inode_ref *ref;
791 struct btrfs_dir_item *di; 790 struct btrfs_dir_item *di;
792 struct inode *inode; 791 struct inode *inode;
@@ -795,10 +794,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
795 unsigned long ref_ptr; 794 unsigned long ref_ptr;
796 unsigned long ref_end; 795 unsigned long ref_end;
797 796
798 location.objectid = key->objectid;
799 location.type = BTRFS_INODE_ITEM_KEY;
800 location.offset = 0;
801
802 /* 797 /*
803 * it is possible that we didn't log all the parent directories 798 * it is possible that we didn't log all the parent directories
804 * for a given inode. If we don't find the dir, just don't 799 * for a given inode. If we don't find the dir, just don't
@@ -1583,7 +1578,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1583 struct btrfs_path *path; 1578 struct btrfs_path *path;
1584 struct btrfs_root *root = wc->replay_dest; 1579 struct btrfs_root *root = wc->replay_dest;
1585 struct btrfs_key key; 1580 struct btrfs_key key;
1586 u32 item_size;
1587 int level; 1581 int level;
1588 int i; 1582 int i;
1589 int ret; 1583 int ret;
@@ -1601,7 +1595,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1601 nritems = btrfs_header_nritems(eb); 1595 nritems = btrfs_header_nritems(eb);
1602 for (i = 0; i < nritems; i++) { 1596 for (i = 0; i < nritems; i++) {
1603 btrfs_item_key_to_cpu(eb, &key, i); 1597 btrfs_item_key_to_cpu(eb, &key, i);
1604 item_size = btrfs_item_size_nr(eb, i);
1605 1598
1606 /* inode keys are done during the first stage */ 1599 /* inode keys are done during the first stage */
1607 if (key.type == BTRFS_INODE_ITEM_KEY && 1600 if (key.type == BTRFS_INODE_ITEM_KEY &&
@@ -1668,7 +1661,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
1668 struct walk_control *wc) 1661 struct walk_control *wc)
1669{ 1662{
1670 u64 root_owner; 1663 u64 root_owner;
1671 u64 root_gen;
1672 u64 bytenr; 1664 u64 bytenr;
1673 u64 ptr_gen; 1665 u64 ptr_gen;
1674 struct extent_buffer *next; 1666 struct extent_buffer *next;
@@ -1698,7 +1690,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
1698 1690
1699 parent = path->nodes[*level]; 1691 parent = path->nodes[*level];
1700 root_owner = btrfs_header_owner(parent); 1692 root_owner = btrfs_header_owner(parent);
1701 root_gen = btrfs_header_generation(parent);
1702 1693
1703 next = btrfs_find_create_tree_block(root, bytenr, blocksize); 1694 next = btrfs_find_create_tree_block(root, bytenr, blocksize);
1704 1695
@@ -1749,7 +1740,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
1749 struct walk_control *wc) 1740 struct walk_control *wc)
1750{ 1741{
1751 u64 root_owner; 1742 u64 root_owner;
1752 u64 root_gen;
1753 int i; 1743 int i;
1754 int slot; 1744 int slot;
1755 int ret; 1745 int ret;
@@ -1757,8 +1747,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
1757 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { 1747 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1758 slot = path->slots[i]; 1748 slot = path->slots[i];
1759 if (slot + 1 < btrfs_header_nritems(path->nodes[i])) { 1749 if (slot + 1 < btrfs_header_nritems(path->nodes[i])) {
1760 struct extent_buffer *node;
1761 node = path->nodes[i];
1762 path->slots[i]++; 1750 path->slots[i]++;
1763 *level = i; 1751 *level = i;
1764 WARN_ON(*level == 0); 1752 WARN_ON(*level == 0);
@@ -1771,7 +1759,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
1771 parent = path->nodes[*level + 1]; 1759 parent = path->nodes[*level + 1];
1772 1760
1773 root_owner = btrfs_header_owner(parent); 1761 root_owner = btrfs_header_owner(parent);
1774 root_gen = btrfs_header_generation(parent);
1775 wc->process_func(root, path->nodes[*level], wc, 1762 wc->process_func(root, path->nodes[*level], wc,
1776 btrfs_header_generation(path->nodes[*level])); 1763 btrfs_header_generation(path->nodes[*level]));
1777 if (wc->free) { 1764 if (wc->free) {
@@ -2273,7 +2260,7 @@ fail:
2273 } 2260 }
2274 btrfs_end_log_trans(root); 2261 btrfs_end_log_trans(root);
2275 2262
2276 return 0; 2263 return err;
2277} 2264}
2278 2265
2279/* see comments for btrfs_del_dir_entries_in_log */ 2266/* see comments for btrfs_del_dir_entries_in_log */
@@ -2729,7 +2716,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2729 struct btrfs_key max_key; 2716 struct btrfs_key max_key;
2730 struct btrfs_root *log = root->log_root; 2717 struct btrfs_root *log = root->log_root;
2731 struct extent_buffer *src = NULL; 2718 struct extent_buffer *src = NULL;
2732 u32 size;
2733 int err = 0; 2719 int err = 0;
2734 int ret; 2720 int ret;
2735 int nritems; 2721 int nritems;
@@ -2793,7 +2779,6 @@ again:
2793 break; 2779 break;
2794 2780
2795 src = path->nodes[0]; 2781 src = path->nodes[0];
2796 size = btrfs_item_size_nr(src, path->slots[0]);
2797 if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { 2782 if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
2798 ins_nr++; 2783 ins_nr++;
2799 goto next_slot; 2784 goto next_slot;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e25e46a8b4e2..cc04dc1445d6 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1898,7 +1898,6 @@ int btrfs_balance(struct btrfs_root *dev_root)
1898 u64 size_to_free; 1898 u64 size_to_free;
1899 struct btrfs_path *path; 1899 struct btrfs_path *path;
1900 struct btrfs_key key; 1900 struct btrfs_key key;
1901 struct btrfs_chunk *chunk;
1902 struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root; 1901 struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root;
1903 struct btrfs_trans_handle *trans; 1902 struct btrfs_trans_handle *trans;
1904 struct btrfs_key found_key; 1903 struct btrfs_key found_key;
@@ -1962,9 +1961,6 @@ int btrfs_balance(struct btrfs_root *dev_root)
1962 if (found_key.objectid != key.objectid) 1961 if (found_key.objectid != key.objectid)
1963 break; 1962 break;
1964 1963
1965 chunk = btrfs_item_ptr(path->nodes[0],
1966 path->slots[0],
1967 struct btrfs_chunk);
1968 /* chunk zero is special */ 1964 /* chunk zero is special */
1969 if (found_key.offset == 0) 1965 if (found_key.offset == 0)
1970 break; 1966 break;
@@ -3031,8 +3027,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
3031 } 3027 }
3032 bio->bi_sector = multi->stripes[dev_nr].physical >> 9; 3028 bio->bi_sector = multi->stripes[dev_nr].physical >> 9;
3033 dev = multi->stripes[dev_nr].dev; 3029 dev = multi->stripes[dev_nr].dev;
3034 BUG_ON(rw == WRITE && !dev->writeable); 3030 if (dev && dev->bdev && (rw != WRITE || dev->writeable)) {
3035 if (dev && dev->bdev) {
3036 bio->bi_bdev = dev->bdev; 3031 bio->bi_bdev = dev->bdev;
3037 if (async_submit) 3032 if (async_submit)
3038 schedule_bio(root, dev, rw, bio); 3033 schedule_bio(root, dev, rw, bio);
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 88ecbb215878..698fdd2c739c 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -178,7 +178,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
178 struct inode *inode = dentry->d_inode; 178 struct inode *inode = dentry->d_inode;
179 struct btrfs_root *root = BTRFS_I(inode)->root; 179 struct btrfs_root *root = BTRFS_I(inode)->root;
180 struct btrfs_path *path; 180 struct btrfs_path *path;
181 struct btrfs_item *item;
182 struct extent_buffer *leaf; 181 struct extent_buffer *leaf;
183 struct btrfs_dir_item *di; 182 struct btrfs_dir_item *di;
184 int ret = 0, slot, advance; 183 int ret = 0, slot, advance;
@@ -234,7 +233,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
234 } 233 }
235 advance = 1; 234 advance = 1;
236 235
237 item = btrfs_item_nr(leaf, slot);
238 btrfs_item_key_to_cpu(leaf, &found_key, slot); 236 btrfs_item_key_to_cpu(leaf, &found_key, slot);
239 237
240 /* check to make sure this item is what we want */ 238 /* check to make sure this item is what we want */
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 3e2b90eaa239..b9cd5445f71c 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -199,8 +199,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
199 int nr_pages = 0; 199 int nr_pages = 0;
200 struct page *in_page = NULL; 200 struct page *in_page = NULL;
201 struct page *out_page = NULL; 201 struct page *out_page = NULL;
202 int out_written = 0;
203 int in_read = 0;
204 unsigned long bytes_left; 202 unsigned long bytes_left;
205 203
206 *out_pages = 0; 204 *out_pages = 0;
@@ -233,9 +231,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
233 workspace->def_strm.avail_out = PAGE_CACHE_SIZE; 231 workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
234 workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE); 232 workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE);
235 233
236 out_written = 0;
237 in_read = 0;
238
239 while (workspace->def_strm.total_in < len) { 234 while (workspace->def_strm.total_in < len) {
240 ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH); 235 ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
241 if (ret != Z_OK) { 236 if (ret != Z_OK) {
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index d6e0e0421891..08b460ae0539 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -635,7 +635,7 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
635/* 635/*
636 * mount: join the ceph cluster, and open root directory. 636 * mount: join the ceph cluster, and open root directory.
637 */ 637 */
638static int ceph_mount(struct ceph_fs_client *fsc, struct vfsmount *mnt, 638static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
639 const char *path) 639 const char *path)
640{ 640{
641 int err; 641 int err;
@@ -678,16 +678,14 @@ static int ceph_mount(struct ceph_fs_client *fsc, struct vfsmount *mnt,
678 } 678 }
679 } 679 }
680 680
681 mnt->mnt_root = root;
682 mnt->mnt_sb = fsc->sb;
683
684 fsc->mount_state = CEPH_MOUNT_MOUNTED; 681 fsc->mount_state = CEPH_MOUNT_MOUNTED;
685 dout("mount success\n"); 682 dout("mount success\n");
686 err = 0; 683 mutex_unlock(&fsc->client->mount_mutex);
684 return root;
687 685
688out: 686out:
689 mutex_unlock(&fsc->client->mount_mutex); 687 mutex_unlock(&fsc->client->mount_mutex);
690 return err; 688 return ERR_PTR(err);
691 689
692fail: 690fail:
693 if (first) { 691 if (first) {
@@ -777,41 +775,45 @@ static int ceph_register_bdi(struct super_block *sb,
777 return err; 775 return err;
778} 776}
779 777
780static int ceph_get_sb(struct file_system_type *fs_type, 778static struct dentry *ceph_mount(struct file_system_type *fs_type,
781 int flags, const char *dev_name, void *data, 779 int flags, const char *dev_name, void *data)
782 struct vfsmount *mnt)
783{ 780{
784 struct super_block *sb; 781 struct super_block *sb;
785 struct ceph_fs_client *fsc; 782 struct ceph_fs_client *fsc;
783 struct dentry *res;
786 int err; 784 int err;
787 int (*compare_super)(struct super_block *, void *) = ceph_compare_super; 785 int (*compare_super)(struct super_block *, void *) = ceph_compare_super;
788 const char *path = NULL; 786 const char *path = NULL;
789 struct ceph_mount_options *fsopt = NULL; 787 struct ceph_mount_options *fsopt = NULL;
790 struct ceph_options *opt = NULL; 788 struct ceph_options *opt = NULL;
791 789
792 dout("ceph_get_sb\n"); 790 dout("ceph_mount\n");
793 err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path); 791 err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path);
794 if (err < 0) 792 if (err < 0) {
793 res = ERR_PTR(err);
795 goto out_final; 794 goto out_final;
795 }
796 796
797 /* create client (which we may/may not use) */ 797 /* create client (which we may/may not use) */
798 fsc = create_fs_client(fsopt, opt); 798 fsc = create_fs_client(fsopt, opt);
799 if (IS_ERR(fsc)) { 799 if (IS_ERR(fsc)) {
800 err = PTR_ERR(fsc); 800 res = ERR_CAST(fsc);
801 kfree(fsopt); 801 kfree(fsopt);
802 kfree(opt); 802 kfree(opt);
803 goto out_final; 803 goto out_final;
804 } 804 }
805 805
806 err = ceph_mdsc_init(fsc); 806 err = ceph_mdsc_init(fsc);
807 if (err < 0) 807 if (err < 0) {
808 res = ERR_PTR(err);
808 goto out; 809 goto out;
810 }
809 811
810 if (ceph_test_opt(fsc->client, NOSHARE)) 812 if (ceph_test_opt(fsc->client, NOSHARE))
811 compare_super = NULL; 813 compare_super = NULL;
812 sb = sget(fs_type, compare_super, ceph_set_super, fsc); 814 sb = sget(fs_type, compare_super, ceph_set_super, fsc);
813 if (IS_ERR(sb)) { 815 if (IS_ERR(sb)) {
814 err = PTR_ERR(sb); 816 res = ERR_CAST(sb);
815 goto out; 817 goto out;
816 } 818 }
817 819
@@ -823,16 +825,18 @@ static int ceph_get_sb(struct file_system_type *fs_type,
823 } else { 825 } else {
824 dout("get_sb using new client %p\n", fsc); 826 dout("get_sb using new client %p\n", fsc);
825 err = ceph_register_bdi(sb, fsc); 827 err = ceph_register_bdi(sb, fsc);
826 if (err < 0) 828 if (err < 0) {
829 res = ERR_PTR(err);
827 goto out_splat; 830 goto out_splat;
831 }
828 } 832 }
829 833
830 err = ceph_mount(fsc, mnt, path); 834 res = ceph_real_mount(fsc, path);
831 if (err < 0) 835 if (IS_ERR(res))
832 goto out_splat; 836 goto out_splat;
833 dout("root %p inode %p ino %llx.%llx\n", mnt->mnt_root, 837 dout("root %p inode %p ino %llx.%llx\n", res,
834 mnt->mnt_root->d_inode, ceph_vinop(mnt->mnt_root->d_inode)); 838 res->d_inode, ceph_vinop(res->d_inode));
835 return 0; 839 return res;
836 840
837out_splat: 841out_splat:
838 ceph_mdsc_close_sessions(fsc->mdsc); 842 ceph_mdsc_close_sessions(fsc->mdsc);
@@ -843,8 +847,8 @@ out:
843 ceph_mdsc_destroy(fsc); 847 ceph_mdsc_destroy(fsc);
844 destroy_fs_client(fsc); 848 destroy_fs_client(fsc);
845out_final: 849out_final:
846 dout("ceph_get_sb fail %d\n", err); 850 dout("ceph_mount fail %ld\n", PTR_ERR(res));
847 return err; 851 return res;
848} 852}
849 853
850static void ceph_kill_sb(struct super_block *s) 854static void ceph_kill_sb(struct super_block *s)
@@ -860,7 +864,7 @@ static void ceph_kill_sb(struct super_block *s)
860static struct file_system_type ceph_fs_type = { 864static struct file_system_type ceph_fs_type = {
861 .owner = THIS_MODULE, 865 .owner = THIS_MODULE,
862 .name = "ceph", 866 .name = "ceph",
863 .get_sb = ceph_get_sb, 867 .mount = ceph_mount,
864 .kill_sb = ceph_kill_sb, 868 .kill_sb = ceph_kill_sb,
865 .fs_flags = FS_RENAME_DOES_D_MOVE, 869 .fs_flags = FS_RENAME_DOES_D_MOVE,
866}; 870};
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 917b7d449bb2..0ed213970ced 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -2,6 +2,9 @@ config CIFS
2 tristate "CIFS support (advanced network filesystem, SMBFS successor)" 2 tristate "CIFS support (advanced network filesystem, SMBFS successor)"
3 depends on INET 3 depends on INET
4 select NLS 4 select NLS
5 select CRYPTO
6 select CRYPTO_MD5
7 select CRYPTO_ARC4
5 help 8 help
6 This is the client VFS module for the Common Internet File System 9 This is the client VFS module for the Common Internet File System
7 (CIFS) protocol which is the successor to the Server Message Block 10 (CIFS) protocol which is the successor to the Server Message Block
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 7ac0056294cf..f856732161ab 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -43,18 +43,32 @@ extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8,
43 unsigned char *p24); 43 unsigned char *p24);
44 44
45static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu, 45static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
46 const struct session_key *key, char *signature) 46 struct TCP_Server_Info *server, char *signature)
47{ 47{
48 struct MD5Context context; 48 int rc;
49 49
50 if ((cifs_pdu == NULL) || (signature == NULL) || (key == NULL)) 50 if (cifs_pdu == NULL || signature == NULL || server == NULL)
51 return -EINVAL; 51 return -EINVAL;
52 52
53 cifs_MD5_init(&context); 53 if (!server->secmech.sdescmd5) {
54 cifs_MD5_update(&context, (char *)&key->data, key->len); 54 cERROR(1, "%s: Can't generate signature\n", __func__);
55 cifs_MD5_update(&context, cifs_pdu->Protocol, cifs_pdu->smb_buf_length); 55 return -1;
56 }
57
58 rc = crypto_shash_init(&server->secmech.sdescmd5->shash);
59 if (rc) {
60 cERROR(1, "%s: Oould not init md5\n", __func__);
61 return rc;
62 }
63
64 crypto_shash_update(&server->secmech.sdescmd5->shash,
65 server->session_key.response, server->session_key.len);
66
67 crypto_shash_update(&server->secmech.sdescmd5->shash,
68 cifs_pdu->Protocol, cifs_pdu->smb_buf_length);
69
70 rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature);
56 71
57 cifs_MD5_final(signature, &context);
58 return 0; 72 return 0;
59} 73}
60 74
@@ -79,8 +93,7 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
79 server->sequence_number++; 93 server->sequence_number++;
80 spin_unlock(&GlobalMid_Lock); 94 spin_unlock(&GlobalMid_Lock);
81 95
82 rc = cifs_calculate_signature(cifs_pdu, &server->session_key, 96 rc = cifs_calculate_signature(cifs_pdu, server, smb_signature);
83 smb_signature);
84 if (rc) 97 if (rc)
85 memset(cifs_pdu->Signature.SecuritySignature, 0, 8); 98 memset(cifs_pdu->Signature.SecuritySignature, 0, 8);
86 else 99 else
@@ -90,16 +103,28 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
90} 103}
91 104
92static int cifs_calc_signature2(const struct kvec *iov, int n_vec, 105static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
93 const struct session_key *key, char *signature) 106 struct TCP_Server_Info *server, char *signature)
94{ 107{
95 struct MD5Context context;
96 int i; 108 int i;
109 int rc;
97 110
98 if ((iov == NULL) || (signature == NULL) || (key == NULL)) 111 if (iov == NULL || signature == NULL || server == NULL)
99 return -EINVAL; 112 return -EINVAL;
100 113
101 cifs_MD5_init(&context); 114 if (!server->secmech.sdescmd5) {
102 cifs_MD5_update(&context, (char *)&key->data, key->len); 115 cERROR(1, "%s: Can't generate signature\n", __func__);
116 return -1;
117 }
118
119 rc = crypto_shash_init(&server->secmech.sdescmd5->shash);
120 if (rc) {
121 cERROR(1, "%s: Oould not init md5\n", __func__);
122 return rc;
123 }
124
125 crypto_shash_update(&server->secmech.sdescmd5->shash,
126 server->session_key.response, server->session_key.len);
127
103 for (i = 0; i < n_vec; i++) { 128 for (i = 0; i < n_vec; i++) {
104 if (iov[i].iov_len == 0) 129 if (iov[i].iov_len == 0)
105 continue; 130 continue;
@@ -112,18 +137,18 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
112 if (i == 0) { 137 if (i == 0) {
113 if (iov[0].iov_len <= 8) /* cmd field at offset 9 */ 138 if (iov[0].iov_len <= 8) /* cmd field at offset 9 */
114 break; /* nothing to sign or corrupt header */ 139 break; /* nothing to sign or corrupt header */
115 cifs_MD5_update(&context, iov[0].iov_base+4, 140 crypto_shash_update(&server->secmech.sdescmd5->shash,
116 iov[0].iov_len-4); 141 iov[i].iov_base + 4, iov[i].iov_len - 4);
117 } else 142 } else
118 cifs_MD5_update(&context, iov[i].iov_base, iov[i].iov_len); 143 crypto_shash_update(&server->secmech.sdescmd5->shash,
144 iov[i].iov_base, iov[i].iov_len);
119 } 145 }
120 146
121 cifs_MD5_final(signature, &context); 147 rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature);
122 148
123 return 0; 149 return rc;
124} 150}
125 151
126
127int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, 152int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
128 __u32 *pexpected_response_sequence_number) 153 __u32 *pexpected_response_sequence_number)
129{ 154{
@@ -146,8 +171,7 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
146 server->sequence_number++; 171 server->sequence_number++;
147 spin_unlock(&GlobalMid_Lock); 172 spin_unlock(&GlobalMid_Lock);
148 173
149 rc = cifs_calc_signature2(iov, n_vec, &server->session_key, 174 rc = cifs_calc_signature2(iov, n_vec, server, smb_signature);
150 smb_signature);
151 if (rc) 175 if (rc)
152 memset(cifs_pdu->Signature.SecuritySignature, 0, 8); 176 memset(cifs_pdu->Signature.SecuritySignature, 0, 8);
153 else 177 else
@@ -157,14 +181,14 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
157} 181}
158 182
159int cifs_verify_signature(struct smb_hdr *cifs_pdu, 183int cifs_verify_signature(struct smb_hdr *cifs_pdu,
160 const struct session_key *session_key, 184 struct TCP_Server_Info *server,
161 __u32 expected_sequence_number) 185 __u32 expected_sequence_number)
162{ 186{
163 unsigned int rc; 187 unsigned int rc;
164 char server_response_sig[8]; 188 char server_response_sig[8];
165 char what_we_think_sig_should_be[20]; 189 char what_we_think_sig_should_be[20];
166 190
167 if (cifs_pdu == NULL || session_key == NULL) 191 if (cifs_pdu == NULL || server == NULL)
168 return -EINVAL; 192 return -EINVAL;
169 193
170 if (cifs_pdu->Command == SMB_COM_NEGOTIATE) 194 if (cifs_pdu->Command == SMB_COM_NEGOTIATE)
@@ -193,7 +217,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
193 cpu_to_le32(expected_sequence_number); 217 cpu_to_le32(expected_sequence_number);
194 cifs_pdu->Signature.Sequence.Reserved = 0; 218 cifs_pdu->Signature.Sequence.Reserved = 0;
195 219
196 rc = cifs_calculate_signature(cifs_pdu, session_key, 220 rc = cifs_calculate_signature(cifs_pdu, server,
197 what_we_think_sig_should_be); 221 what_we_think_sig_should_be);
198 222
199 if (rc) 223 if (rc)
@@ -209,18 +233,28 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
209 233
210} 234}
211 235
212/* We fill in key by putting in 40 byte array which was allocated by caller */ 236/* first calculate 24 bytes ntlm response and then 16 byte session key */
213int cifs_calculate_session_key(struct session_key *key, const char *rn, 237int setup_ntlm_response(struct cifsSesInfo *ses)
214 const char *password)
215{ 238{
216 char temp_key[16]; 239 unsigned int temp_len = CIFS_SESS_KEY_SIZE + CIFS_AUTH_RESP_SIZE;
217 if ((key == NULL) || (rn == NULL)) 240 char temp_key[CIFS_SESS_KEY_SIZE];
241
242 if (!ses)
218 return -EINVAL; 243 return -EINVAL;
219 244
220 E_md4hash(password, temp_key); 245 ses->auth_key.response = kmalloc(temp_len, GFP_KERNEL);
221 mdfour(key->data.ntlm, temp_key, 16); 246 if (!ses->auth_key.response) {
222 memcpy(key->data.ntlm+16, rn, CIFS_SESS_KEY_SIZE); 247 cERROR(1, "NTLM can't allocate (%u bytes) memory", temp_len);
223 key->len = 40; 248 return -ENOMEM;
249 }
250 ses->auth_key.len = temp_len;
251
252 SMBNTencrypt(ses->password, ses->server->cryptkey,
253 ses->auth_key.response + CIFS_SESS_KEY_SIZE);
254
255 E_md4hash(ses->password, temp_key);
256 mdfour(ses->auth_key.response, temp_key, CIFS_SESS_KEY_SIZE);
257
224 return 0; 258 return 0;
225} 259}
226 260
@@ -294,15 +328,15 @@ build_avpair_blob(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
294 * two times the unicode length of a server name + 328 * two times the unicode length of a server name +
295 * size of a timestamp (which is 8 bytes). 329 * size of a timestamp (which is 8 bytes).
296 */ 330 */
297 ses->tilen = size + 2 * (2 * dlen) + 2 * (2 * wlen) + 8; 331 ses->auth_key.len = size + 2 * (2 * dlen) + 2 * (2 * wlen) + 8;
298 ses->tiblob = kzalloc(ses->tilen, GFP_KERNEL); 332 ses->auth_key.response = kzalloc(ses->auth_key.len, GFP_KERNEL);
299 if (!ses->tiblob) { 333 if (!ses->auth_key.response) {
300 ses->tilen = 0; 334 ses->auth_key.len = 0;
301 cERROR(1, "Challenge target info allocation failure"); 335 cERROR(1, "Challenge target info allocation failure");
302 return -ENOMEM; 336 return -ENOMEM;
303 } 337 }
304 338
305 blobptr = ses->tiblob; 339 blobptr = ses->auth_key.response;
306 attrptr = (struct ntlmssp2_name *) blobptr; 340 attrptr = (struct ntlmssp2_name *) blobptr;
307 341
308 attrptr->type = cpu_to_le16(NTLMSSP_AV_NB_DOMAIN_NAME); 342 attrptr->type = cpu_to_le16(NTLMSSP_AV_NB_DOMAIN_NAME);
@@ -357,7 +391,7 @@ build_avpair_blob(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
357 * about target string i.e. for some, just user name might suffice. 391 * about target string i.e. for some, just user name might suffice.
358 */ 392 */
359static int 393static int
360find_domain_name(struct cifsSesInfo *ses) 394find_domain_name(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
361{ 395{
362 unsigned int attrsize; 396 unsigned int attrsize;
363 unsigned int type; 397 unsigned int type;
@@ -366,11 +400,11 @@ find_domain_name(struct cifsSesInfo *ses)
366 unsigned char *blobend; 400 unsigned char *blobend;
367 struct ntlmssp2_name *attrptr; 401 struct ntlmssp2_name *attrptr;
368 402
369 if (!ses->tilen || !ses->tiblob) 403 if (!ses->auth_key.len || !ses->auth_key.response)
370 return 0; 404 return 0;
371 405
372 blobptr = ses->tiblob; 406 blobptr = ses->auth_key.response;
373 blobend = ses->tiblob + ses->tilen; 407 blobend = blobptr + ses->auth_key.len;
374 408
375 while (blobptr + onesize < blobend) { 409 while (blobptr + onesize < blobend) {
376 attrptr = (struct ntlmssp2_name *) blobptr; 410 attrptr = (struct ntlmssp2_name *) blobptr;
@@ -386,16 +420,13 @@ find_domain_name(struct cifsSesInfo *ses)
386 if (!attrsize) 420 if (!attrsize)
387 break; 421 break;
388 if (!ses->domainName) { 422 if (!ses->domainName) {
389 struct nls_table *default_nls;
390 ses->domainName = 423 ses->domainName =
391 kmalloc(attrsize + 1, GFP_KERNEL); 424 kmalloc(attrsize + 1, GFP_KERNEL);
392 if (!ses->domainName) 425 if (!ses->domainName)
393 return -ENOMEM; 426 return -ENOMEM;
394 default_nls = load_nls_default();
395 cifs_from_ucs2(ses->domainName, 427 cifs_from_ucs2(ses->domainName,
396 (__le16 *)blobptr, attrsize, attrsize, 428 (__le16 *)blobptr, attrsize, attrsize,
397 default_nls, false); 429 nls_cp, false);
398 unload_nls(default_nls);
399 break; 430 break;
400 } 431 }
401 } 432 }
@@ -405,82 +436,136 @@ find_domain_name(struct cifsSesInfo *ses)
405 return 0; 436 return 0;
406} 437}
407 438
408static int calc_ntlmv2_hash(struct cifsSesInfo *ses, 439static int calc_ntlmv2_hash(struct cifsSesInfo *ses, char *ntlmv2_hash,
409 const struct nls_table *nls_cp) 440 const struct nls_table *nls_cp)
410{ 441{
411 int rc = 0; 442 int rc = 0;
412 int len; 443 int len;
413 char nt_hash[16]; 444 char nt_hash[CIFS_NTHASH_SIZE];
414 struct HMACMD5Context *pctxt;
415 wchar_t *user; 445 wchar_t *user;
416 wchar_t *domain; 446 wchar_t *domain;
447 wchar_t *server;
417 448
418 pctxt = kmalloc(sizeof(struct HMACMD5Context), GFP_KERNEL); 449 if (!ses->server->secmech.sdeschmacmd5) {
419 450 cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n");
420 if (pctxt == NULL) 451 return -1;
421 return -ENOMEM; 452 }
422 453
423 /* calculate md4 hash of password */ 454 /* calculate md4 hash of password */
424 E_md4hash(ses->password, nt_hash); 455 E_md4hash(ses->password, nt_hash);
425 456
426 /* convert Domainname to unicode and uppercase */ 457 crypto_shash_setkey(ses->server->secmech.hmacmd5, nt_hash,
427 hmac_md5_init_limK_to_64(nt_hash, 16, pctxt); 458 CIFS_NTHASH_SIZE);
459
460 rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
461 if (rc) {
462 cERROR(1, "calc_ntlmv2_hash: could not init hmacmd5\n");
463 return rc;
464 }
428 465
429 /* convert ses->userName to unicode and uppercase */ 466 /* convert ses->userName to unicode and uppercase */
430 len = strlen(ses->userName); 467 len = strlen(ses->userName);
431 user = kmalloc(2 + (len * 2), GFP_KERNEL); 468 user = kmalloc(2 + (len * 2), GFP_KERNEL);
432 if (user == NULL) 469 if (user == NULL) {
470 cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n");
471 rc = -ENOMEM;
433 goto calc_exit_2; 472 goto calc_exit_2;
473 }
434 len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp); 474 len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp);
435 UniStrupr(user); 475 UniStrupr(user);
436 hmac_md5_update((char *)user, 2*len, pctxt); 476
477 crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
478 (char *)user, 2 * len);
437 479
438 /* convert ses->domainName to unicode and uppercase */ 480 /* convert ses->domainName to unicode and uppercase */
439 if (ses->domainName) { 481 if (ses->domainName) {
440 len = strlen(ses->domainName); 482 len = strlen(ses->domainName);
441 483
442 domain = kmalloc(2 + (len * 2), GFP_KERNEL); 484 domain = kmalloc(2 + (len * 2), GFP_KERNEL);
443 if (domain == NULL) 485 if (domain == NULL) {
486 cERROR(1, "calc_ntlmv2_hash: domain mem alloc failure");
487 rc = -ENOMEM;
444 goto calc_exit_1; 488 goto calc_exit_1;
489 }
445 len = cifs_strtoUCS((__le16 *)domain, ses->domainName, len, 490 len = cifs_strtoUCS((__le16 *)domain, ses->domainName, len,
446 nls_cp); 491 nls_cp);
447 /* the following line was removed since it didn't work well 492 crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
448 with lower cased domain name that passed as an option. 493 (char *)domain, 2 * len);
449 Maybe converting the domain name earlier makes sense */
450 /* UniStrupr(domain); */
451
452 hmac_md5_update((char *)domain, 2*len, pctxt);
453
454 kfree(domain); 494 kfree(domain);
495 } else if (ses->serverName) {
496 len = strlen(ses->serverName);
497
498 server = kmalloc(2 + (len * 2), GFP_KERNEL);
499 if (server == NULL) {
500 cERROR(1, "calc_ntlmv2_hash: server mem alloc failure");
501 rc = -ENOMEM;
502 goto calc_exit_1;
503 }
504 len = cifs_strtoUCS((__le16 *)server, ses->serverName, len,
505 nls_cp);
506 crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
507 (char *)server, 2 * len);
508 kfree(server);
455 } 509 }
510
511 rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
512 ntlmv2_hash);
513
456calc_exit_1: 514calc_exit_1:
457 kfree(user); 515 kfree(user);
458calc_exit_2: 516calc_exit_2:
459 /* BB FIXME what about bytes 24 through 40 of the signing key? 517 return rc;
460 compare with the NTLM example */ 518}
461 hmac_md5_final(ses->ntlmv2_hash, pctxt); 519
520static int
521CalcNTLMv2_response(const struct cifsSesInfo *ses, char *ntlmv2_hash)
522{
523 int rc;
524 unsigned int offset = CIFS_SESS_KEY_SIZE + 8;
525
526 if (!ses->server->secmech.sdeschmacmd5) {
527 cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n");
528 return -1;
529 }
530
531 crypto_shash_setkey(ses->server->secmech.hmacmd5,
532 ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
533
534 rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
535 if (rc) {
536 cERROR(1, "CalcNTLMv2_response: could not init hmacmd5");
537 return rc;
538 }
539
540 if (ses->server->secType == RawNTLMSSP)
541 memcpy(ses->auth_key.response + offset,
542 ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
543 else
544 memcpy(ses->auth_key.response + offset,
545 ses->server->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
546 crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
547 ses->auth_key.response + offset, ses->auth_key.len - offset);
548
549 rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
550 ses->auth_key.response + CIFS_SESS_KEY_SIZE);
462 551
463 kfree(pctxt);
464 return rc; 552 return rc;
465} 553}
466 554
555
467int 556int
468setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf, 557setup_ntlmv2_rsp(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
469 const struct nls_table *nls_cp)
470{ 558{
471 int rc; 559 int rc;
472 struct ntlmv2_resp *buf = (struct ntlmv2_resp *)resp_buf; 560 int baselen;
473 struct HMACMD5Context context; 561 unsigned int tilen;
474 562 struct ntlmv2_resp *buf;
475 buf->blob_signature = cpu_to_le32(0x00000101); 563 char ntlmv2_hash[16];
476 buf->reserved = 0; 564 unsigned char *tiblob = NULL; /* target info blob */
477 buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
478 get_random_bytes(&buf->client_chal, sizeof(buf->client_chal));
479 buf->reserved2 = 0;
480 565
481 if (ses->server->secType == RawNTLMSSP) { 566 if (ses->server->secType == RawNTLMSSP) {
482 if (!ses->domainName) { 567 if (!ses->domainName) {
483 rc = find_domain_name(ses); 568 rc = find_domain_name(ses, nls_cp);
484 if (rc) { 569 if (rc) {
485 cERROR(1, "error %d finding domain name", rc); 570 cERROR(1, "error %d finding domain name", rc);
486 goto setup_ntlmv2_rsp_ret; 571 goto setup_ntlmv2_rsp_ret;
@@ -490,51 +575,179 @@ setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf,
490 rc = build_avpair_blob(ses, nls_cp); 575 rc = build_avpair_blob(ses, nls_cp);
491 if (rc) { 576 if (rc) {
492 cERROR(1, "error %d building av pair blob", rc); 577 cERROR(1, "error %d building av pair blob", rc);
493 return rc; 578 goto setup_ntlmv2_rsp_ret;
494 } 579 }
495 } 580 }
496 581
497 /* calculate buf->ntlmv2_hash */ 582 baselen = CIFS_SESS_KEY_SIZE + sizeof(struct ntlmv2_resp);
498 rc = calc_ntlmv2_hash(ses, nls_cp); 583 tilen = ses->auth_key.len;
584 tiblob = ses->auth_key.response;
585
586 ses->auth_key.response = kmalloc(baselen + tilen, GFP_KERNEL);
587 if (!ses->auth_key.response) {
588 rc = ENOMEM;
589 ses->auth_key.len = 0;
590 cERROR(1, "%s: Can't allocate auth blob", __func__);
591 goto setup_ntlmv2_rsp_ret;
592 }
593 ses->auth_key.len += baselen;
594
595 buf = (struct ntlmv2_resp *)
596 (ses->auth_key.response + CIFS_SESS_KEY_SIZE);
597 buf->blob_signature = cpu_to_le32(0x00000101);
598 buf->reserved = 0;
599 buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
600 get_random_bytes(&buf->client_chal, sizeof(buf->client_chal));
601 buf->reserved2 = 0;
602
603 memcpy(ses->auth_key.response + baselen, tiblob, tilen);
604
605 /* calculate ntlmv2_hash */
606 rc = calc_ntlmv2_hash(ses, ntlmv2_hash, nls_cp);
499 if (rc) { 607 if (rc) {
500 cERROR(1, "could not get v2 hash rc %d", rc); 608 cERROR(1, "could not get v2 hash rc %d", rc);
501 goto setup_ntlmv2_rsp_ret; 609 goto setup_ntlmv2_rsp_ret;
502 } 610 }
503 CalcNTLMv2_response(ses, resp_buf); 611
612 /* calculate first part of the client response (CR1) */
613 rc = CalcNTLMv2_response(ses, ntlmv2_hash);
614 if (rc) {
615 cERROR(1, "Could not calculate CR1 rc: %d", rc);
616 goto setup_ntlmv2_rsp_ret;
617 }
504 618
505 /* now calculate the session key for NTLMv2 */ 619 /* now calculate the session key for NTLMv2 */
506 hmac_md5_init_limK_to_64(ses->ntlmv2_hash, 16, &context); 620 crypto_shash_setkey(ses->server->secmech.hmacmd5,
507 hmac_md5_update(resp_buf, 16, &context); 621 ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
508 hmac_md5_final(ses->auth_key.data.ntlmv2.key, &context); 622
623 rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
624 if (rc) {
625 cERROR(1, "%s: Could not init hmacmd5\n", __func__);
626 goto setup_ntlmv2_rsp_ret;
627 }
509 628
510 memcpy(&ses->auth_key.data.ntlmv2.resp, resp_buf, 629 crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
511 sizeof(struct ntlmv2_resp)); 630 ses->auth_key.response + CIFS_SESS_KEY_SIZE,
512 ses->auth_key.len = 16 + sizeof(struct ntlmv2_resp); 631 CIFS_HMAC_MD5_HASH_SIZE);
513 632
514 return 0; 633 rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
634 ses->auth_key.response);
515 635
516setup_ntlmv2_rsp_ret: 636setup_ntlmv2_rsp_ret:
517 kfree(ses->tiblob); 637 kfree(tiblob);
518 ses->tiblob = NULL;
519 ses->tilen = 0;
520 638
521 return rc; 639 return rc;
522} 640}
523 641
524void CalcNTLMv2_response(const struct cifsSesInfo *ses, 642int
525 char *v2_session_response) 643calc_seckey(struct cifsSesInfo *ses)
526{ 644{
527 struct HMACMD5Context context; 645 int rc;
528 /* rest of v2 struct already generated */ 646 struct crypto_blkcipher *tfm_arc4;
529 memcpy(v2_session_response + 8, ses->cryptKey, 8); 647 struct scatterlist sgin, sgout;
530 hmac_md5_init_limK_to_64(ses->ntlmv2_hash, 16, &context); 648 struct blkcipher_desc desc;
649 unsigned char sec_key[CIFS_SESS_KEY_SIZE]; /* a nonce */
650
651 get_random_bytes(sec_key, CIFS_SESS_KEY_SIZE);
652
653 tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
654 if (!tfm_arc4 || IS_ERR(tfm_arc4)) {
655 cERROR(1, "could not allocate crypto API arc4\n");
656 return PTR_ERR(tfm_arc4);
657 }
531 658
532 hmac_md5_update(v2_session_response+8, 659 desc.tfm = tfm_arc4;
533 sizeof(struct ntlmv2_resp) - 8, &context);
534 660
535 if (ses->tilen) 661 crypto_blkcipher_setkey(tfm_arc4, ses->auth_key.response,
536 hmac_md5_update(ses->tiblob, ses->tilen, &context); 662 CIFS_SESS_KEY_SIZE);
537 663
538 hmac_md5_final(v2_session_response, &context); 664 sg_init_one(&sgin, sec_key, CIFS_SESS_KEY_SIZE);
539/* cifs_dump_mem("v2_sess_rsp: ", v2_session_response, 32); */ 665 sg_init_one(&sgout, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE);
666
667 rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, CIFS_CPHTXT_SIZE);
668 if (rc) {
669 cERROR(1, "could not encrypt session key rc: %d\n", rc);
670 crypto_free_blkcipher(tfm_arc4);
671 return rc;
672 }
673
674 /* make secondary_key/nonce as session key */
675 memcpy(ses->auth_key.response, sec_key, CIFS_SESS_KEY_SIZE);
676 /* and make len as that of session key only */
677 ses->auth_key.len = CIFS_SESS_KEY_SIZE;
678
679 crypto_free_blkcipher(tfm_arc4);
680
681 return 0;
682}
683
684void
685cifs_crypto_shash_release(struct TCP_Server_Info *server)
686{
687 if (server->secmech.md5)
688 crypto_free_shash(server->secmech.md5);
689
690 if (server->secmech.hmacmd5)
691 crypto_free_shash(server->secmech.hmacmd5);
692
693 kfree(server->secmech.sdeschmacmd5);
694
695 kfree(server->secmech.sdescmd5);
696}
697
698int
699cifs_crypto_shash_allocate(struct TCP_Server_Info *server)
700{
701 int rc;
702 unsigned int size;
703
704 server->secmech.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0);
705 if (!server->secmech.hmacmd5 ||
706 IS_ERR(server->secmech.hmacmd5)) {
707 cERROR(1, "could not allocate crypto hmacmd5\n");
708 return PTR_ERR(server->secmech.hmacmd5);
709 }
710
711 server->secmech.md5 = crypto_alloc_shash("md5", 0, 0);
712 if (!server->secmech.md5 || IS_ERR(server->secmech.md5)) {
713 cERROR(1, "could not allocate crypto md5\n");
714 rc = PTR_ERR(server->secmech.md5);
715 goto crypto_allocate_md5_fail;
716 }
717
718 size = sizeof(struct shash_desc) +
719 crypto_shash_descsize(server->secmech.hmacmd5);
720 server->secmech.sdeschmacmd5 = kmalloc(size, GFP_KERNEL);
721 if (!server->secmech.sdeschmacmd5) {
722 cERROR(1, "cifs_crypto_shash_allocate: can't alloc hmacmd5\n");
723 rc = -ENOMEM;
724 goto crypto_allocate_hmacmd5_sdesc_fail;
725 }
726 server->secmech.sdeschmacmd5->shash.tfm = server->secmech.hmacmd5;
727 server->secmech.sdeschmacmd5->shash.flags = 0x0;
728
729
730 size = sizeof(struct shash_desc) +
731 crypto_shash_descsize(server->secmech.md5);
732 server->secmech.sdescmd5 = kmalloc(size, GFP_KERNEL);
733 if (!server->secmech.sdescmd5) {
734 cERROR(1, "cifs_crypto_shash_allocate: can't alloc md5\n");
735 rc = -ENOMEM;
736 goto crypto_allocate_md5_sdesc_fail;
737 }
738 server->secmech.sdescmd5->shash.tfm = server->secmech.md5;
739 server->secmech.sdescmd5->shash.flags = 0x0;
740
741 return 0;
742
743crypto_allocate_md5_sdesc_fail:
744 kfree(server->secmech.sdeschmacmd5);
745
746crypto_allocate_hmacmd5_sdesc_fail:
747 crypto_free_shash(server->secmech.md5);
748
749crypto_allocate_md5_fail:
750 crypto_free_shash(server->secmech.hmacmd5);
751
752 return rc;
540} 753}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 34371637f210..75c4eaa79588 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -318,7 +318,6 @@ cifs_alloc_inode(struct super_block *sb)
318 return NULL; 318 return NULL;
319 cifs_inode->cifsAttrs = 0x20; /* default */ 319 cifs_inode->cifsAttrs = 0x20; /* default */
320 cifs_inode->time = 0; 320 cifs_inode->time = 0;
321 cifs_inode->write_behind_rc = 0;
322 /* Until the file is open and we have gotten oplock 321 /* Until the file is open and we have gotten oplock
323 info back from the server, can not assume caching of 322 info back from the server, can not assume caching of
324 file data or metadata */ 323 file data or metadata */
@@ -545,9 +544,9 @@ static const struct super_operations cifs_super_ops = {
545#endif 544#endif
546}; 545};
547 546
548static int 547static struct dentry *
549cifs_get_sb(struct file_system_type *fs_type, 548cifs_do_mount(struct file_system_type *fs_type,
550 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 549 int flags, const char *dev_name, void *data)
551{ 550{
552 int rc; 551 int rc;
553 struct super_block *sb; 552 struct super_block *sb;
@@ -557,18 +556,17 @@ cifs_get_sb(struct file_system_type *fs_type,
557 cFYI(1, "Devname: %s flags: %d ", dev_name, flags); 556 cFYI(1, "Devname: %s flags: %d ", dev_name, flags);
558 557
559 if (IS_ERR(sb)) 558 if (IS_ERR(sb))
560 return PTR_ERR(sb); 559 return ERR_CAST(sb);
561 560
562 sb->s_flags = flags; 561 sb->s_flags = flags;
563 562
564 rc = cifs_read_super(sb, data, dev_name, flags & MS_SILENT ? 1 : 0); 563 rc = cifs_read_super(sb, data, dev_name, flags & MS_SILENT ? 1 : 0);
565 if (rc) { 564 if (rc) {
566 deactivate_locked_super(sb); 565 deactivate_locked_super(sb);
567 return rc; 566 return ERR_PTR(rc);
568 } 567 }
569 sb->s_flags |= MS_ACTIVE; 568 sb->s_flags |= MS_ACTIVE;
570 simple_set_mnt(mnt, sb); 569 return dget(sb->s_root);
571 return 0;
572} 570}
573 571
574static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, 572static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
@@ -634,7 +632,7 @@ static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
634struct file_system_type cifs_fs_type = { 632struct file_system_type cifs_fs_type = {
635 .owner = THIS_MODULE, 633 .owner = THIS_MODULE,
636 .name = "cifs", 634 .name = "cifs",
637 .get_sb = cifs_get_sb, 635 .mount = cifs_do_mount,
638 .kill_sb = kill_anon_super, 636 .kill_sb = kill_anon_super,
639 /* .fs_flags */ 637 /* .fs_flags */
640}; 638};
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index f35795a16b42..897b2b2b28b5 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -112,5 +112,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
112extern const struct export_operations cifs_export_ops; 112extern const struct export_operations cifs_export_ops;
113#endif /* EXPERIMENTAL */ 113#endif /* EXPERIMENTAL */
114 114
115#define CIFS_VERSION "1.67" 115#define CIFS_VERSION "1.68"
116#endif /* _CIFSFS_H */ 116#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 3365e77f6f24..f259e4d7612d 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -25,6 +25,9 @@
25#include <linux/workqueue.h> 25#include <linux/workqueue.h>
26#include "cifs_fs_sb.h" 26#include "cifs_fs_sb.h"
27#include "cifsacl.h" 27#include "cifsacl.h"
28#include <crypto/internal/hash.h>
29#include <linux/scatterlist.h>
30
28/* 31/*
29 * The sizes of various internal tables and strings 32 * The sizes of various internal tables and strings
30 */ 33 */
@@ -74,7 +77,7 @@
74 * CIFS vfs client Status information (based on what we know.) 77 * CIFS vfs client Status information (based on what we know.)
75 */ 78 */
76 79
77 /* associated with each tcp and smb session */ 80/* associated with each tcp and smb session */
78enum statusEnum { 81enum statusEnum {
79 CifsNew = 0, 82 CifsNew = 0,
80 CifsGood, 83 CifsGood,
@@ -99,14 +102,29 @@ enum protocolEnum {
99 102
100struct session_key { 103struct session_key {
101 unsigned int len; 104 unsigned int len;
102 union { 105 char *response;
103 char ntlm[CIFS_SESS_KEY_SIZE + 16]; 106};
104 char krb5[CIFS_SESS_KEY_SIZE + 16]; /* BB: length correct? */ 107
105 struct { 108/* crypto security descriptor definition */
106 char key[16]; 109struct sdesc {
107 struct ntlmv2_resp resp; 110 struct shash_desc shash;
108 } ntlmv2; 111 char ctx[];
109 } data; 112};
113
114/* crypto hashing related structure/fields, not specific to a sec mech */
115struct cifs_secmech {
116 struct crypto_shash *hmacmd5; /* hmac-md5 hash function */
117 struct crypto_shash *md5; /* md5 hash function */
118 struct sdesc *sdeschmacmd5; /* ctxt to generate ntlmv2 hash, CR1 */
119 struct sdesc *sdescmd5; /* ctxt to generate cifs/smb signature */
120};
121
122/* per smb session structure/fields */
123struct ntlmssp_auth {
124 __u32 client_flags; /* sent by client in type 1 ntlmsssp exchange */
125 __u32 server_flags; /* sent by server in type 2 ntlmssp exchange */
126 unsigned char ciphertext[CIFS_CPHTXT_SIZE]; /* sent to server */
127 char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlmssp */
110}; 128};
111 129
112struct cifs_cred { 130struct cifs_cred {
@@ -179,12 +197,14 @@ struct TCP_Server_Info {
179 int capabilities; /* allow selective disabling of caps by smb sess */ 197 int capabilities; /* allow selective disabling of caps by smb sess */
180 int timeAdj; /* Adjust for difference in server time zone in sec */ 198 int timeAdj; /* Adjust for difference in server time zone in sec */
181 __u16 CurrentMid; /* multiplex id - rotating counter */ 199 __u16 CurrentMid; /* multiplex id - rotating counter */
200 char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */
182 /* 16th byte of RFC1001 workstation name is always null */ 201 /* 16th byte of RFC1001 workstation name is always null */
183 char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; 202 char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
184 __u32 sequence_number; /* needed for CIFS PDU signature */ 203 __u32 sequence_number; /* needed for CIFS PDU signature */
185 struct session_key session_key; 204 struct session_key session_key;
186 unsigned long lstrp; /* when we got last response from this server */ 205 unsigned long lstrp; /* when we got last response from this server */
187 u16 dialect; /* dialect index that server chose */ 206 u16 dialect; /* dialect index that server chose */
207 struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */
188 /* extended security flavors that server supports */ 208 /* extended security flavors that server supports */
189 bool sec_kerberos; /* supports plain Kerberos */ 209 bool sec_kerberos; /* supports plain Kerberos */
190 bool sec_mskerberos; /* supports legacy MS Kerberos */ 210 bool sec_mskerberos; /* supports legacy MS Kerberos */
@@ -222,11 +242,8 @@ struct cifsSesInfo {
222 char userName[MAX_USERNAME_SIZE + 1]; 242 char userName[MAX_USERNAME_SIZE + 1];
223 char *domainName; 243 char *domainName;
224 char *password; 244 char *password;
225 char cryptKey[CIFS_CRYPTO_KEY_SIZE];
226 struct session_key auth_key; 245 struct session_key auth_key;
227 char ntlmv2_hash[16]; 246 struct ntlmssp_auth *ntlmssp; /* ciphertext, flags, server challenge */
228 unsigned int tilen; /* length of the target info blob */
229 unsigned char *tiblob; /* target info blob in challenge response */
230 bool need_reconnect:1; /* connection reset, uid now invalid */ 247 bool need_reconnect:1; /* connection reset, uid now invalid */
231}; 248};
232/* no more than one of the following three session flags may be set */ 249/* no more than one of the following three session flags may be set */
@@ -395,16 +412,19 @@ struct cifsFileInfo {
395 struct list_head llist; /* list of byte range locks we have. */ 412 struct list_head llist; /* list of byte range locks we have. */
396 bool invalidHandle:1; /* file closed via session abend */ 413 bool invalidHandle:1; /* file closed via session abend */
397 bool oplock_break_cancelled:1; 414 bool oplock_break_cancelled:1;
398 atomic_t count; /* reference count */ 415 int count; /* refcount protected by cifs_file_list_lock */
399 struct mutex fh_mutex; /* prevents reopen race after dead ses*/ 416 struct mutex fh_mutex; /* prevents reopen race after dead ses*/
400 struct cifs_search_info srch_inf; 417 struct cifs_search_info srch_inf;
401 struct work_struct oplock_break; /* work for oplock breaks */ 418 struct work_struct oplock_break; /* work for oplock breaks */
402}; 419};
403 420
404/* Take a reference on the file private data */ 421/*
422 * Take a reference on the file private data. Must be called with
423 * cifs_file_list_lock held.
424 */
405static inline void cifsFileInfo_get(struct cifsFileInfo *cifs_file) 425static inline void cifsFileInfo_get(struct cifsFileInfo *cifs_file)
406{ 426{
407 atomic_inc(&cifs_file->count); 427 ++cifs_file->count;
408} 428}
409 429
410void cifsFileInfo_put(struct cifsFileInfo *cifs_file); 430void cifsFileInfo_put(struct cifsFileInfo *cifs_file);
@@ -417,7 +437,6 @@ struct cifsInodeInfo {
417 struct list_head lockList; 437 struct list_head lockList;
418 /* BB add in lists for dirty pages i.e. write caching info for oplock */ 438 /* BB add in lists for dirty pages i.e. write caching info for oplock */
419 struct list_head openFileList; 439 struct list_head openFileList;
420 int write_behind_rc;
421 __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ 440 __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */
422 unsigned long time; /* jiffies of last update/check of inode */ 441 unsigned long time; /* jiffies of last update/check of inode */
423 bool clientCanCacheRead:1; /* read oplock */ 442 bool clientCanCacheRead:1; /* read oplock */
@@ -668,7 +687,7 @@ require use of the stronger protocol */
668 * GlobalMid_Lock protects: 687 * GlobalMid_Lock protects:
669 * list operations on pending_mid_q and oplockQ 688 * list operations on pending_mid_q and oplockQ
670 * updates to XID counters, multiplex id and SMB sequence numbers 689 * updates to XID counters, multiplex id and SMB sequence numbers
671 * GlobalSMBSesLock protects: 690 * cifs_file_list_lock protects:
672 * list operations on tcp and SMB session lists and tCon lists 691 * list operations on tcp and SMB session lists and tCon lists
673 * f_owner.lock protects certain per file struct operations 692 * f_owner.lock protects certain per file struct operations
674 * mapping->page_lock protects certain per page operations 693 * mapping->page_lock protects certain per page operations
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index b0f4b5656d4c..de36b09763a8 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -131,9 +131,20 @@
131#define CIFS_CRYPTO_KEY_SIZE (8) 131#define CIFS_CRYPTO_KEY_SIZE (8)
132 132
133/* 133/*
134 * Size of the ntlm client response
135 */
136#define CIFS_AUTH_RESP_SIZE (24)
137
138/*
134 * Size of the session key (crypto key encrypted with the password 139 * Size of the session key (crypto key encrypted with the password
135 */ 140 */
136#define CIFS_SESS_KEY_SIZE (24) 141#define CIFS_SESS_KEY_SIZE (16)
142
143#define CIFS_CLIENT_CHALLENGE_SIZE (8)
144#define CIFS_SERVER_CHALLENGE_SIZE (8)
145#define CIFS_HMAC_MD5_HASH_SIZE (16)
146#define CIFS_CPHTXT_SIZE (16)
147#define CIFS_NTHASH_SIZE (16)
137 148
138/* 149/*
139 * Maximum user name length 150 * Maximum user name length
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index e593c40ba7ba..edb6d90efdf2 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -362,13 +362,15 @@ extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *);
362extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *, 362extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *,
363 __u32 *); 363 __u32 *);
364extern int cifs_verify_signature(struct smb_hdr *, 364extern int cifs_verify_signature(struct smb_hdr *,
365 const struct session_key *session_key, 365 struct TCP_Server_Info *server,
366 __u32 expected_sequence_number); 366 __u32 expected_sequence_number);
367extern int cifs_calculate_session_key(struct session_key *key, const char *rn, 367extern void SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *);
368 const char *pass); 368extern int setup_ntlm_response(struct cifsSesInfo *);
369extern void CalcNTLMv2_response(const struct cifsSesInfo *, char *); 369extern int setup_ntlmv2_rsp(struct cifsSesInfo *, const struct nls_table *);
370extern int setup_ntlmv2_rsp(struct cifsSesInfo *, char *, 370extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *);
371 const struct nls_table *); 371extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
372extern int calc_seckey(struct cifsSesInfo *);
373
372#ifdef CONFIG_CIFS_WEAK_PW_HASH 374#ifdef CONFIG_CIFS_WEAK_PW_HASH
373extern void calc_lanman_hash(const char *password, const char *cryptkey, 375extern void calc_lanman_hash(const char *password, const char *cryptkey,
374 bool encrypt, char *lnm_session_key); 376 bool encrypt, char *lnm_session_key);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index e98f1f317b15..2f2632b6df5a 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -503,7 +503,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
503 503
504 if (rsp->EncryptionKeyLength == 504 if (rsp->EncryptionKeyLength ==
505 cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) { 505 cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) {
506 memcpy(ses->cryptKey, rsp->EncryptionKey, 506 memcpy(ses->server->cryptkey, rsp->EncryptionKey,
507 CIFS_CRYPTO_KEY_SIZE); 507 CIFS_CRYPTO_KEY_SIZE);
508 } else if (server->secMode & SECMODE_PW_ENCRYPT) { 508 } else if (server->secMode & SECMODE_PW_ENCRYPT) {
509 rc = -EIO; /* need cryptkey unless plain text */ 509 rc = -EIO; /* need cryptkey unless plain text */
@@ -574,7 +574,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
574 server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone); 574 server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone);
575 server->timeAdj *= 60; 575 server->timeAdj *= 60;
576 if (pSMBr->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) { 576 if (pSMBr->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) {
577 memcpy(ses->cryptKey, pSMBr->u.EncryptionKey, 577 memcpy(ses->server->cryptkey, pSMBr->u.EncryptionKey,
578 CIFS_CRYPTO_KEY_SIZE); 578 CIFS_CRYPTO_KEY_SIZE);
579 } else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) 579 } else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC)
580 && (pSMBr->EncryptionKeyLength == 0)) { 580 && (pSMBr->EncryptionKeyLength == 0)) {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 7e73176acb58..9eb327defa1d 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -175,6 +175,9 @@ cifs_reconnect(struct TCP_Server_Info *server)
175 } 175 }
176 server->sequence_number = 0; 176 server->sequence_number = 0;
177 server->session_estab = false; 177 server->session_estab = false;
178 kfree(server->session_key.response);
179 server->session_key.response = NULL;
180 server->session_key.len = 0;
178 181
179 spin_lock(&GlobalMid_Lock); 182 spin_lock(&GlobalMid_Lock);
180 list_for_each(tmp, &server->pending_mid_q) { 183 list_for_each(tmp, &server->pending_mid_q) {
@@ -1064,7 +1067,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1064 } 1067 }
1065 i = cifs_convert_address((struct sockaddr *)&vol->srcaddr, 1068 i = cifs_convert_address((struct sockaddr *)&vol->srcaddr,
1066 value, strlen(value)); 1069 value, strlen(value));
1067 if (i < 0) { 1070 if (i == 0) {
1068 printk(KERN_WARNING "CIFS: Could not parse" 1071 printk(KERN_WARNING "CIFS: Could not parse"
1069 " srcaddr: %s\n", 1072 " srcaddr: %s\n",
1070 value); 1073 value);
@@ -1560,8 +1563,13 @@ cifs_put_tcp_session(struct TCP_Server_Info *server)
1560 server->tcpStatus = CifsExiting; 1563 server->tcpStatus = CifsExiting;
1561 spin_unlock(&GlobalMid_Lock); 1564 spin_unlock(&GlobalMid_Lock);
1562 1565
1566 cifs_crypto_shash_release(server);
1563 cifs_fscache_release_client_cookie(server); 1567 cifs_fscache_release_client_cookie(server);
1564 1568
1569 kfree(server->session_key.response);
1570 server->session_key.response = NULL;
1571 server->session_key.len = 0;
1572
1565 task = xchg(&server->tsk, NULL); 1573 task = xchg(&server->tsk, NULL);
1566 if (task) 1574 if (task)
1567 force_sig(SIGKILL, task); 1575 force_sig(SIGKILL, task);
@@ -1614,10 +1622,16 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1614 goto out_err; 1622 goto out_err;
1615 } 1623 }
1616 1624
1625 rc = cifs_crypto_shash_allocate(tcp_ses);
1626 if (rc) {
1627 cERROR(1, "could not setup hash structures rc %d", rc);
1628 goto out_err;
1629 }
1630
1617 tcp_ses->hostname = extract_hostname(volume_info->UNC); 1631 tcp_ses->hostname = extract_hostname(volume_info->UNC);
1618 if (IS_ERR(tcp_ses->hostname)) { 1632 if (IS_ERR(tcp_ses->hostname)) {
1619 rc = PTR_ERR(tcp_ses->hostname); 1633 rc = PTR_ERR(tcp_ses->hostname);
1620 goto out_err; 1634 goto out_err_crypto_release;
1621 } 1635 }
1622 1636
1623 tcp_ses->noblocksnd = volume_info->noblocksnd; 1637 tcp_ses->noblocksnd = volume_info->noblocksnd;
@@ -1661,7 +1675,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1661 } 1675 }
1662 if (rc < 0) { 1676 if (rc < 0) {
1663 cERROR(1, "Error connecting to socket. Aborting operation"); 1677 cERROR(1, "Error connecting to socket. Aborting operation");
1664 goto out_err; 1678 goto out_err_crypto_release;
1665 } 1679 }
1666 1680
1667 /* 1681 /*
@@ -1675,7 +1689,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1675 rc = PTR_ERR(tcp_ses->tsk); 1689 rc = PTR_ERR(tcp_ses->tsk);
1676 cERROR(1, "error %d create cifsd thread", rc); 1690 cERROR(1, "error %d create cifsd thread", rc);
1677 module_put(THIS_MODULE); 1691 module_put(THIS_MODULE);
1678 goto out_err; 1692 goto out_err_crypto_release;
1679 } 1693 }
1680 1694
1681 /* thread spawned, put it on the list */ 1695 /* thread spawned, put it on the list */
@@ -1687,6 +1701,9 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1687 1701
1688 return tcp_ses; 1702 return tcp_ses;
1689 1703
1704out_err_crypto_release:
1705 cifs_crypto_shash_release(tcp_ses);
1706
1690out_err: 1707out_err:
1691 if (tcp_ses) { 1708 if (tcp_ses) {
1692 if (!IS_ERR(tcp_ses->hostname)) 1709 if (!IS_ERR(tcp_ses->hostname))
@@ -1801,8 +1818,6 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1801 if (ses == NULL) 1818 if (ses == NULL)
1802 goto get_ses_fail; 1819 goto get_ses_fail;
1803 1820
1804 ses->tilen = 0;
1805 ses->tiblob = NULL;
1806 /* new SMB session uses our server ref */ 1821 /* new SMB session uses our server ref */
1807 ses->server = server; 1822 ses->server = server;
1808 if (server->addr.sockAddr6.sin6_family == AF_INET6) 1823 if (server->addr.sockAddr6.sin6_family == AF_INET6)
@@ -1823,10 +1838,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1823 goto get_ses_fail; 1838 goto get_ses_fail;
1824 } 1839 }
1825 if (volume_info->domainname) { 1840 if (volume_info->domainname) {
1826 int len = strlen(volume_info->domainname); 1841 ses->domainName = kstrdup(volume_info->domainname, GFP_KERNEL);
1827 ses->domainName = kmalloc(len + 1, GFP_KERNEL); 1842 if (!ses->domainName)
1828 if (ses->domainName) 1843 goto get_ses_fail;
1829 strcpy(ses->domainName, volume_info->domainname);
1830 } 1844 }
1831 ses->cred_uid = volume_info->cred_uid; 1845 ses->cred_uid = volume_info->cred_uid;
1832 ses->linux_uid = volume_info->linux_uid; 1846 ses->linux_uid = volume_info->linux_uid;
@@ -2985,13 +2999,13 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
2985#ifdef CONFIG_CIFS_WEAK_PW_HASH 2999#ifdef CONFIG_CIFS_WEAK_PW_HASH
2986 if ((global_secflags & CIFSSEC_MAY_LANMAN) && 3000 if ((global_secflags & CIFSSEC_MAY_LANMAN) &&
2987 (ses->server->secType == LANMAN)) 3001 (ses->server->secType == LANMAN))
2988 calc_lanman_hash(tcon->password, ses->cryptKey, 3002 calc_lanman_hash(tcon->password, ses->server->cryptkey,
2989 ses->server->secMode & 3003 ses->server->secMode &
2990 SECMODE_PW_ENCRYPT ? true : false, 3004 SECMODE_PW_ENCRYPT ? true : false,
2991 bcc_ptr); 3005 bcc_ptr);
2992 else 3006 else
2993#endif /* CIFS_WEAK_PW_HASH */ 3007#endif /* CIFS_WEAK_PW_HASH */
2994 SMBNTencrypt(tcon->password, ses->cryptKey, bcc_ptr); 3008 SMBNTencrypt(tcon->password, ses->server->cryptkey, bcc_ptr);
2995 3009
2996 bcc_ptr += CIFS_SESS_KEY_SIZE; 3010 bcc_ptr += CIFS_SESS_KEY_SIZE;
2997 if (ses->capabilities & CAP_UNICODE) { 3011 if (ses->capabilities & CAP_UNICODE) {
@@ -3178,10 +3192,11 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *ses,
3178 } else { 3192 } else {
3179 mutex_lock(&ses->server->srv_mutex); 3193 mutex_lock(&ses->server->srv_mutex);
3180 if (!server->session_estab) { 3194 if (!server->session_estab) {
3181 memcpy(&server->session_key.data, 3195 server->session_key.response = ses->auth_key.response;
3182 &ses->auth_key.data, ses->auth_key.len);
3183 server->session_key.len = ses->auth_key.len; 3196 server->session_key.len = ses->auth_key.len;
3184 ses->server->session_estab = true; 3197 server->sequence_number = 0x2;
3198 server->session_estab = true;
3199 ses->auth_key.response = NULL;
3185 } 3200 }
3186 mutex_unlock(&server->srv_mutex); 3201 mutex_unlock(&server->srv_mutex);
3187 3202
@@ -3192,6 +3207,12 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *ses,
3192 spin_unlock(&GlobalMid_Lock); 3207 spin_unlock(&GlobalMid_Lock);
3193 } 3208 }
3194 3209
3210 kfree(ses->auth_key.response);
3211 ses->auth_key.response = NULL;
3212 ses->auth_key.len = 0;
3213 kfree(ses->ntlmssp);
3214 ses->ntlmssp = NULL;
3215
3195 return rc; 3216 return rc;
3196} 3217}
3197 3218
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 45af003865d2..ae82159cf7fa 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -131,8 +131,7 @@ static inline int cifs_open_inode_helper(struct inode *inode,
131 /* BB no need to lock inode until after invalidate 131 /* BB no need to lock inode until after invalidate
132 since namei code should already have it locked? */ 132 since namei code should already have it locked? */
133 rc = filemap_write_and_wait(inode->i_mapping); 133 rc = filemap_write_and_wait(inode->i_mapping);
134 if (rc != 0) 134 mapping_set_error(inode->i_mapping, rc);
135 pCifsInode->write_behind_rc = rc;
136 } 135 }
137 cFYI(1, "invalidating remote inode since open detected it " 136 cFYI(1, "invalidating remote inode since open detected it "
138 "changed"); 137 "changed");
@@ -232,6 +231,7 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
232 if (pCifsFile == NULL) 231 if (pCifsFile == NULL)
233 return pCifsFile; 232 return pCifsFile;
234 233
234 pCifsFile->count = 1;
235 pCifsFile->netfid = fileHandle; 235 pCifsFile->netfid = fileHandle;
236 pCifsFile->pid = current->tgid; 236 pCifsFile->pid = current->tgid;
237 pCifsFile->uid = current_fsuid(); 237 pCifsFile->uid = current_fsuid();
@@ -242,7 +242,6 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
242 mutex_init(&pCifsFile->fh_mutex); 242 mutex_init(&pCifsFile->fh_mutex);
243 mutex_init(&pCifsFile->lock_mutex); 243 mutex_init(&pCifsFile->lock_mutex);
244 INIT_LIST_HEAD(&pCifsFile->llist); 244 INIT_LIST_HEAD(&pCifsFile->llist);
245 atomic_set(&pCifsFile->count, 1);
246 INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break); 245 INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break);
247 246
248 spin_lock(&cifs_file_list_lock); 247 spin_lock(&cifs_file_list_lock);
@@ -267,7 +266,8 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
267 266
268/* 267/*
269 * Release a reference on the file private data. This may involve closing 268 * Release a reference on the file private data. This may involve closing
270 * the filehandle out on the server. 269 * the filehandle out on the server. Must be called without holding
270 * cifs_file_list_lock.
271 */ 271 */
272void cifsFileInfo_put(struct cifsFileInfo *cifs_file) 272void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
273{ 273{
@@ -276,7 +276,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
276 struct cifsLockInfo *li, *tmp; 276 struct cifsLockInfo *li, *tmp;
277 277
278 spin_lock(&cifs_file_list_lock); 278 spin_lock(&cifs_file_list_lock);
279 if (!atomic_dec_and_test(&cifs_file->count)) { 279 if (--cifs_file->count > 0) {
280 spin_unlock(&cifs_file_list_lock); 280 spin_unlock(&cifs_file_list_lock);
281 return; 281 return;
282 } 282 }
@@ -605,8 +605,7 @@ reopen_success:
605 605
606 if (can_flush) { 606 if (can_flush) {
607 rc = filemap_write_and_wait(inode->i_mapping); 607 rc = filemap_write_and_wait(inode->i_mapping);
608 if (rc != 0) 608 mapping_set_error(inode->i_mapping, rc);
609 CIFS_I(inode)->write_behind_rc = rc;
610 609
611 pCifsInode->clientCanCacheAll = false; 610 pCifsInode->clientCanCacheAll = false;
612 pCifsInode->clientCanCacheRead = false; 611 pCifsInode->clientCanCacheRead = false;
@@ -1353,6 +1352,7 @@ static int cifs_writepages(struct address_space *mapping,
1353 if (!experimEnabled && tcon->ses->server->secMode & 1352 if (!experimEnabled && tcon->ses->server->secMode &
1354 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { 1353 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
1355 cifsFileInfo_put(open_file); 1354 cifsFileInfo_put(open_file);
1355 kfree(iov);
1356 return generic_writepages(mapping, wbc); 1356 return generic_writepages(mapping, wbc);
1357 } 1357 }
1358 cifsFileInfo_put(open_file); 1358 cifsFileInfo_put(open_file);
@@ -1478,12 +1478,7 @@ retry:
1478 if (rc || bytes_written < bytes_to_write) { 1478 if (rc || bytes_written < bytes_to_write) {
1479 cERROR(1, "Write2 ret %d, wrote %d", 1479 cERROR(1, "Write2 ret %d, wrote %d",
1480 rc, bytes_written); 1480 rc, bytes_written);
1481 /* BB what if continued retry is 1481 mapping_set_error(mapping, rc);
1482 requested via mount flags? */
1483 if (rc == -ENOSPC)
1484 set_bit(AS_ENOSPC, &mapping->flags);
1485 else
1486 set_bit(AS_EIO, &mapping->flags);
1487 } else { 1482 } else {
1488 cifs_stats_bytes_written(tcon, bytes_written); 1483 cifs_stats_bytes_written(tcon, bytes_written);
1489 } 1484 }
@@ -1628,11 +1623,10 @@ int cifs_fsync(struct file *file, int datasync)
1628 1623
1629 rc = filemap_write_and_wait(inode->i_mapping); 1624 rc = filemap_write_and_wait(inode->i_mapping);
1630 if (rc == 0) { 1625 if (rc == 0) {
1631 rc = CIFS_I(inode)->write_behind_rc; 1626 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1632 CIFS_I(inode)->write_behind_rc = 0; 1627
1633 tcon = tlink_tcon(smbfile->tlink); 1628 tcon = tlink_tcon(smbfile->tlink);
1634 if (!rc && tcon && smbfile && 1629 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
1635 !(CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
1636 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid); 1630 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
1637 } 1631 }
1638 1632
@@ -1677,21 +1671,8 @@ int cifs_flush(struct file *file, fl_owner_t id)
1677 struct inode *inode = file->f_path.dentry->d_inode; 1671 struct inode *inode = file->f_path.dentry->d_inode;
1678 int rc = 0; 1672 int rc = 0;
1679 1673
1680 /* Rather than do the steps manually: 1674 if (file->f_mode & FMODE_WRITE)
1681 lock the inode for writing 1675 rc = filemap_write_and_wait(inode->i_mapping);
1682 loop through pages looking for write behind data (dirty pages)
1683 coalesce into contiguous 16K (or smaller) chunks to write to server
1684 send to server (prefer in parallel)
1685 deal with writebehind errors
1686 unlock inode for writing
1687 filemapfdatawrite appears easier for the time being */
1688
1689 rc = filemap_fdatawrite(inode->i_mapping);
1690 /* reset wb rc if we were able to write out dirty pages */
1691 if (!rc) {
1692 rc = CIFS_I(inode)->write_behind_rc;
1693 CIFS_I(inode)->write_behind_rc = 0;
1694 }
1695 1676
1696 cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc); 1677 cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
1697 1678
@@ -2270,7 +2251,7 @@ void cifs_oplock_break(struct work_struct *work)
2270 oplock_break); 2251 oplock_break);
2271 struct inode *inode = cfile->dentry->d_inode; 2252 struct inode *inode = cfile->dentry->d_inode;
2272 struct cifsInodeInfo *cinode = CIFS_I(inode); 2253 struct cifsInodeInfo *cinode = CIFS_I(inode);
2273 int rc, waitrc = 0; 2254 int rc = 0;
2274 2255
2275 if (inode && S_ISREG(inode->i_mode)) { 2256 if (inode && S_ISREG(inode->i_mode)) {
2276 if (cinode->clientCanCacheRead) 2257 if (cinode->clientCanCacheRead)
@@ -2279,13 +2260,10 @@ void cifs_oplock_break(struct work_struct *work)
2279 break_lease(inode, O_WRONLY); 2260 break_lease(inode, O_WRONLY);
2280 rc = filemap_fdatawrite(inode->i_mapping); 2261 rc = filemap_fdatawrite(inode->i_mapping);
2281 if (cinode->clientCanCacheRead == 0) { 2262 if (cinode->clientCanCacheRead == 0) {
2282 waitrc = filemap_fdatawait(inode->i_mapping); 2263 rc = filemap_fdatawait(inode->i_mapping);
2264 mapping_set_error(inode->i_mapping, rc);
2283 invalidate_remote_inode(inode); 2265 invalidate_remote_inode(inode);
2284 } 2266 }
2285 if (!rc)
2286 rc = waitrc;
2287 if (rc)
2288 cinode->write_behind_rc = rc;
2289 cFYI(1, "Oplock flush inode %p rc %d", inode, rc); 2267 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
2290 } 2268 }
2291 2269
@@ -2304,7 +2282,7 @@ void cifs_oplock_break(struct work_struct *work)
2304 /* 2282 /*
2305 * We might have kicked in before is_valid_oplock_break() 2283 * We might have kicked in before is_valid_oplock_break()
2306 * finished grabbing reference for us. Make sure it's done by 2284 * finished grabbing reference for us. Make sure it's done by
2307 * waiting for GlobalSMSSeslock. 2285 * waiting for cifs_file_list_lock.
2308 */ 2286 */
2309 spin_lock(&cifs_file_list_lock); 2287 spin_lock(&cifs_file_list_lock);
2310 spin_unlock(&cifs_file_list_lock); 2288 spin_unlock(&cifs_file_list_lock);
@@ -2312,6 +2290,7 @@ void cifs_oplock_break(struct work_struct *work)
2312 cifs_oplock_break_put(cfile); 2290 cifs_oplock_break_put(cfile);
2313} 2291}
2314 2292
2293/* must be called while holding cifs_file_list_lock */
2315void cifs_oplock_break_get(struct cifsFileInfo *cfile) 2294void cifs_oplock_break_get(struct cifsFileInfo *cfile)
2316{ 2295{
2317 cifs_sb_active(cfile->dentry->d_sb); 2296 cifs_sb_active(cfile->dentry->d_sb);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 94979309698a..39869c3c3efb 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1682,8 +1682,7 @@ cifs_invalidate_mapping(struct inode *inode)
1682 /* write back any cached data */ 1682 /* write back any cached data */
1683 if (inode->i_mapping && inode->i_mapping->nrpages != 0) { 1683 if (inode->i_mapping && inode->i_mapping->nrpages != 0) {
1684 rc = filemap_write_and_wait(inode->i_mapping); 1684 rc = filemap_write_and_wait(inode->i_mapping);
1685 if (rc) 1685 mapping_set_error(inode->i_mapping, rc);
1686 cifs_i->write_behind_rc = rc;
1687 } 1686 }
1688 invalidate_remote_inode(inode); 1687 invalidate_remote_inode(inode);
1689 cifs_fscache_reset_inode_cookie(inode); 1688 cifs_fscache_reset_inode_cookie(inode);
@@ -1943,10 +1942,8 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
1943 * the flush returns error? 1942 * the flush returns error?
1944 */ 1943 */
1945 rc = filemap_write_and_wait(inode->i_mapping); 1944 rc = filemap_write_and_wait(inode->i_mapping);
1946 if (rc != 0) { 1945 mapping_set_error(inode->i_mapping, rc);
1947 cifsInode->write_behind_rc = rc; 1946 rc = 0;
1948 rc = 0;
1949 }
1950 1947
1951 if (attrs->ia_valid & ATTR_SIZE) { 1948 if (attrs->ia_valid & ATTR_SIZE) {
1952 rc = cifs_set_file_size(inode, attrs, xid, full_path); 1949 rc = cifs_set_file_size(inode, attrs, xid, full_path);
@@ -2087,10 +2084,8 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
2087 * the flush returns error? 2084 * the flush returns error?
2088 */ 2085 */
2089 rc = filemap_write_and_wait(inode->i_mapping); 2086 rc = filemap_write_and_wait(inode->i_mapping);
2090 if (rc != 0) { 2087 mapping_set_error(inode->i_mapping, rc);
2091 cifsInode->write_behind_rc = rc; 2088 rc = 0;
2092 rc = 0;
2093 }
2094 2089
2095 if (attrs->ia_valid & ATTR_SIZE) { 2090 if (attrs->ia_valid & ATTR_SIZE) {
2096 rc = cifs_set_file_size(inode, attrs, xid, full_path); 2091 rc = cifs_set_file_size(inode, attrs, xid, full_path);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 1c681f6a6803..c4e296fe3518 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -577,7 +577,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
577 * cifs_oplock_break_put() can't be called 577 * cifs_oplock_break_put() can't be called
578 * from here. Get reference after queueing 578 * from here. Get reference after queueing
579 * succeeded. cifs_oplock_break() will 579 * succeeded. cifs_oplock_break() will
580 * synchronize using GlobalSMSSeslock. 580 * synchronize using cifs_file_list_lock.
581 */ 581 */
582 if (queue_work(system_nrt_wq, 582 if (queue_work(system_nrt_wq,
583 &netfile->oplock_break)) 583 &netfile->oplock_break))
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 2a11efd96592..7b01d3f6eed6 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -32,9 +32,6 @@
32#include <linux/slab.h> 32#include <linux/slab.h>
33#include "cifs_spnego.h" 33#include "cifs_spnego.h"
34 34
35extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8,
36 unsigned char *p24);
37
38/* 35/*
39 * Checks if this is the first smb session to be reconnected after 36 * Checks if this is the first smb session to be reconnected after
40 * the socket has been reestablished (so we know whether to use vc 0). 37 * the socket has been reestablished (so we know whether to use vc 0).
@@ -402,23 +399,22 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
402 return -EINVAL; 399 return -EINVAL;
403 } 400 }
404 401
405 memcpy(ses->cryptKey, pblob->Challenge, CIFS_CRYPTO_KEY_SIZE); 402 memcpy(ses->ntlmssp->cryptkey, pblob->Challenge, CIFS_CRYPTO_KEY_SIZE);
406 /* BB we could decode pblob->NegotiateFlags; some may be useful */ 403 /* BB we could decode pblob->NegotiateFlags; some may be useful */
407 /* In particular we can examine sign flags */ 404 /* In particular we can examine sign flags */
408 /* BB spec says that if AvId field of MsvAvTimestamp is populated then 405 /* BB spec says that if AvId field of MsvAvTimestamp is populated then
409 we must set the MIC field of the AUTHENTICATE_MESSAGE */ 406 we must set the MIC field of the AUTHENTICATE_MESSAGE */
410 407 ses->ntlmssp->server_flags = le32_to_cpu(pblob->NegotiateFlags);
411 tioffset = cpu_to_le16(pblob->TargetInfoArray.BufferOffset); 408 tioffset = cpu_to_le16(pblob->TargetInfoArray.BufferOffset);
412 tilen = cpu_to_le16(pblob->TargetInfoArray.Length); 409 tilen = cpu_to_le16(pblob->TargetInfoArray.Length);
413 ses->tilen = tilen; 410 if (tilen) {
414 if (ses->tilen) { 411 ses->auth_key.response = kmalloc(tilen, GFP_KERNEL);
415 ses->tiblob = kmalloc(tilen, GFP_KERNEL); 412 if (!ses->auth_key.response) {
416 if (!ses->tiblob) {
417 cERROR(1, "Challenge target info allocation failure"); 413 cERROR(1, "Challenge target info allocation failure");
418 ses->tilen = 0;
419 return -ENOMEM; 414 return -ENOMEM;
420 } 415 }
421 memcpy(ses->tiblob, bcc_ptr + tioffset, ses->tilen); 416 memcpy(ses->auth_key.response, bcc_ptr + tioffset, tilen);
417 ses->auth_key.len = tilen;
422 } 418 }
423 419
424 return 0; 420 return 0;
@@ -443,10 +439,12 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
443 NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | 439 NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
444 NTLMSSP_NEGOTIATE_NTLM; 440 NTLMSSP_NEGOTIATE_NTLM;
445 if (ses->server->secMode & 441 if (ses->server->secMode &
446 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 442 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
447 flags |= NTLMSSP_NEGOTIATE_SIGN; 443 flags |= NTLMSSP_NEGOTIATE_SIGN;
448 if (ses->server->secMode & SECMODE_SIGN_REQUIRED) 444 if (!ses->server->session_estab)
449 flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN; 445 flags |= NTLMSSP_NEGOTIATE_KEY_XCH |
446 NTLMSSP_NEGOTIATE_EXTENDED_SEC;
447 }
450 448
451 sec_blob->NegotiateFlags |= cpu_to_le32(flags); 449 sec_blob->NegotiateFlags |= cpu_to_le32(flags);
452 450
@@ -469,11 +467,9 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
469 const struct nls_table *nls_cp) 467 const struct nls_table *nls_cp)
470{ 468{
471 int rc; 469 int rc;
472 unsigned int size;
473 AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer; 470 AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer;
474 __u32 flags; 471 __u32 flags;
475 unsigned char *tmp; 472 unsigned char *tmp;
476 struct ntlmv2_resp ntlmv2_response = {};
477 473
478 memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8); 474 memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
479 sec_blob->MessageType = NtLmAuthenticate; 475 sec_blob->MessageType = NtLmAuthenticate;
@@ -497,25 +493,19 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
497 sec_blob->LmChallengeResponse.MaximumLength = 0; 493 sec_blob->LmChallengeResponse.MaximumLength = 0;
498 494
499 sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer); 495 sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer);
500 rc = setup_ntlmv2_rsp(ses, (char *)&ntlmv2_response, nls_cp); 496 rc = setup_ntlmv2_rsp(ses, nls_cp);
501 if (rc) { 497 if (rc) {
502 cERROR(1, "Error %d during NTLMSSP authentication", rc); 498 cERROR(1, "Error %d during NTLMSSP authentication", rc);
503 goto setup_ntlmv2_ret; 499 goto setup_ntlmv2_ret;
504 } 500 }
505 size = sizeof(struct ntlmv2_resp); 501 memcpy(tmp, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
506 memcpy(tmp, (char *)&ntlmv2_response, size); 502 ses->auth_key.len - CIFS_SESS_KEY_SIZE);
507 tmp += size; 503 tmp += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
508 if (ses->tilen > 0) {
509 memcpy(tmp, ses->tiblob, ses->tilen);
510 tmp += ses->tilen;
511 }
512 504
513 sec_blob->NtChallengeResponse.Length = cpu_to_le16(size + ses->tilen); 505 sec_blob->NtChallengeResponse.Length =
506 cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
514 sec_blob->NtChallengeResponse.MaximumLength = 507 sec_blob->NtChallengeResponse.MaximumLength =
515 cpu_to_le16(size + ses->tilen); 508 cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
516 kfree(ses->tiblob);
517 ses->tiblob = NULL;
518 ses->tilen = 0;
519 509
520 if (ses->domainName == NULL) { 510 if (ses->domainName == NULL) {
521 sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); 511 sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
@@ -554,9 +544,19 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
554 sec_blob->WorkstationName.MaximumLength = 0; 544 sec_blob->WorkstationName.MaximumLength = 0;
555 tmp += 2; 545 tmp += 2;
556 546
557 sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer); 547 if ((ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_KEY_XCH) &&
558 sec_blob->SessionKey.Length = 0; 548 !calc_seckey(ses)) {
559 sec_blob->SessionKey.MaximumLength = 0; 549 memcpy(tmp, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE);
550 sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
551 sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE);
552 sec_blob->SessionKey.MaximumLength =
553 cpu_to_le16(CIFS_CPHTXT_SIZE);
554 tmp += CIFS_CPHTXT_SIZE;
555 } else {
556 sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
557 sec_blob->SessionKey.Length = 0;
558 sec_blob->SessionKey.MaximumLength = 0;
559 }
560 560
561setup_ntlmv2_ret: 561setup_ntlmv2_ret:
562 *buflen = tmp - pbuffer; 562 *buflen = tmp - pbuffer;
@@ -600,8 +600,16 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses,
600 return -EINVAL; 600 return -EINVAL;
601 601
602 type = ses->server->secType; 602 type = ses->server->secType;
603
604 cFYI(1, "sess setup type %d", type); 603 cFYI(1, "sess setup type %d", type);
604 if (type == RawNTLMSSP) {
605 /* if memory allocation is successful, caller of this function
606 * frees it.
607 */
608 ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL);
609 if (!ses->ntlmssp)
610 return -ENOMEM;
611 }
612
605ssetup_ntlmssp_authenticate: 613ssetup_ntlmssp_authenticate:
606 if (phase == NtLmChallenge) 614 if (phase == NtLmChallenge)
607 phase = NtLmAuthenticate; /* if ntlmssp, now final phase */ 615 phase = NtLmAuthenticate; /* if ntlmssp, now final phase */
@@ -666,10 +674,14 @@ ssetup_ntlmssp_authenticate:
666 /* no capabilities flags in old lanman negotiation */ 674 /* no capabilities flags in old lanman negotiation */
667 675
668 pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); 676 pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE);
669 /* BB calculate hash with password */
670 /* and copy into bcc */
671 677
672 calc_lanman_hash(ses->password, ses->cryptKey, 678 /* Calculate hash with password and copy into bcc_ptr.
679 * Encryption Key (stored as in cryptkey) gets used if the
680 * security mode bit in Negottiate Protocol response states
681 * to use challenge/response method (i.e. Password bit is 1).
682 */
683
684 calc_lanman_hash(ses->password, ses->server->cryptkey,
673 ses->server->secMode & SECMODE_PW_ENCRYPT ? 685 ses->server->secMode & SECMODE_PW_ENCRYPT ?
674 true : false, lnm_session_key); 686 true : false, lnm_session_key);
675 687
@@ -687,24 +699,27 @@ ssetup_ntlmssp_authenticate:
687 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); 699 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
688#endif 700#endif
689 } else if (type == NTLM) { 701 } else if (type == NTLM) {
690 char ntlm_session_key[CIFS_SESS_KEY_SIZE];
691
692 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities); 702 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
693 pSMB->req_no_secext.CaseInsensitivePasswordLength = 703 pSMB->req_no_secext.CaseInsensitivePasswordLength =
694 cpu_to_le16(CIFS_SESS_KEY_SIZE); 704 cpu_to_le16(CIFS_AUTH_RESP_SIZE);
695 pSMB->req_no_secext.CaseSensitivePasswordLength = 705 pSMB->req_no_secext.CaseSensitivePasswordLength =
696 cpu_to_le16(CIFS_SESS_KEY_SIZE); 706 cpu_to_le16(CIFS_AUTH_RESP_SIZE);
707
708 /* calculate ntlm response and session key */
709 rc = setup_ntlm_response(ses);
710 if (rc) {
711 cERROR(1, "Error %d during NTLM authentication", rc);
712 goto ssetup_exit;
713 }
697 714
698 /* calculate session key */ 715 /* copy ntlm response */
699 SMBNTencrypt(ses->password, ses->cryptKey, ntlm_session_key); 716 memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
717 CIFS_AUTH_RESP_SIZE);
718 bcc_ptr += CIFS_AUTH_RESP_SIZE;
719 memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
720 CIFS_AUTH_RESP_SIZE);
721 bcc_ptr += CIFS_AUTH_RESP_SIZE;
700 722
701 cifs_calculate_session_key(&ses->auth_key,
702 ntlm_session_key, ses->password);
703 /* copy session key */
704 memcpy(bcc_ptr, (char *)ntlm_session_key, CIFS_SESS_KEY_SIZE);
705 bcc_ptr += CIFS_SESS_KEY_SIZE;
706 memcpy(bcc_ptr, (char *)ntlm_session_key, CIFS_SESS_KEY_SIZE);
707 bcc_ptr += CIFS_SESS_KEY_SIZE;
708 if (ses->capabilities & CAP_UNICODE) { 723 if (ses->capabilities & CAP_UNICODE) {
709 /* unicode strings must be word aligned */ 724 /* unicode strings must be word aligned */
710 if (iov[0].iov_len % 2) { 725 if (iov[0].iov_len % 2) {
@@ -715,47 +730,26 @@ ssetup_ntlmssp_authenticate:
715 } else 730 } else
716 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); 731 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
717 } else if (type == NTLMv2) { 732 } else if (type == NTLMv2) {
718 char *v2_sess_key =
719 kmalloc(sizeof(struct ntlmv2_resp), GFP_KERNEL);
720
721 /* BB FIXME change all users of v2_sess_key to
722 struct ntlmv2_resp */
723
724 if (v2_sess_key == NULL) {
725 rc = -ENOMEM;
726 goto ssetup_exit;
727 }
728
729 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities); 733 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
730 734
731 /* LM2 password would be here if we supported it */ 735 /* LM2 password would be here if we supported it */
732 pSMB->req_no_secext.CaseInsensitivePasswordLength = 0; 736 pSMB->req_no_secext.CaseInsensitivePasswordLength = 0;
733 /* cpu_to_le16(LM2_SESS_KEY_SIZE); */
734 737
735 /* calculate session key */ 738 /* calculate nlmv2 response and session key */
736 rc = setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp); 739 rc = setup_ntlmv2_rsp(ses, nls_cp);
737 if (rc) { 740 if (rc) {
738 cERROR(1, "Error %d during NTLMv2 authentication", rc); 741 cERROR(1, "Error %d during NTLMv2 authentication", rc);
739 kfree(v2_sess_key);
740 goto ssetup_exit; 742 goto ssetup_exit;
741 } 743 }
742 memcpy(bcc_ptr, (char *)v2_sess_key, 744 memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
743 sizeof(struct ntlmv2_resp)); 745 ses->auth_key.len - CIFS_SESS_KEY_SIZE);
744 bcc_ptr += sizeof(struct ntlmv2_resp); 746 bcc_ptr += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
745 kfree(v2_sess_key); 747
746 /* set case sensitive password length after tilen may get 748 /* set case sensitive password length after tilen may get
747 * assigned, tilen is 0 otherwise. 749 * assigned, tilen is 0 otherwise.
748 */ 750 */
749 pSMB->req_no_secext.CaseSensitivePasswordLength = 751 pSMB->req_no_secext.CaseSensitivePasswordLength =
750 cpu_to_le16(sizeof(struct ntlmv2_resp) + ses->tilen); 752 cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
751 if (ses->tilen > 0) {
752 memcpy(bcc_ptr, ses->tiblob, ses->tilen);
753 bcc_ptr += ses->tilen;
754 /* we never did allocate ses->domainName to free */
755 kfree(ses->tiblob);
756 ses->tiblob = NULL;
757 ses->tilen = 0;
758 }
759 753
760 if (ses->capabilities & CAP_UNICODE) { 754 if (ses->capabilities & CAP_UNICODE) {
761 if (iov[0].iov_len % 2) { 755 if (iov[0].iov_len % 2) {
@@ -768,6 +762,7 @@ ssetup_ntlmssp_authenticate:
768 } else if (type == Kerberos) { 762 } else if (type == Kerberos) {
769#ifdef CONFIG_CIFS_UPCALL 763#ifdef CONFIG_CIFS_UPCALL
770 struct cifs_spnego_msg *msg; 764 struct cifs_spnego_msg *msg;
765
771 spnego_key = cifs_get_spnego_key(ses); 766 spnego_key = cifs_get_spnego_key(ses);
772 if (IS_ERR(spnego_key)) { 767 if (IS_ERR(spnego_key)) {
773 rc = PTR_ERR(spnego_key); 768 rc = PTR_ERR(spnego_key);
@@ -785,16 +780,17 @@ ssetup_ntlmssp_authenticate:
785 rc = -EKEYREJECTED; 780 rc = -EKEYREJECTED;
786 goto ssetup_exit; 781 goto ssetup_exit;
787 } 782 }
788 /* bail out if key is too long */ 783
789 if (msg->sesskey_len > 784 ses->auth_key.response = kmalloc(msg->sesskey_len, GFP_KERNEL);
790 sizeof(ses->auth_key.data.krb5)) { 785 if (!ses->auth_key.response) {
791 cERROR(1, "Kerberos signing key too long (%u bytes)", 786 cERROR(1, "Kerberos can't allocate (%u bytes) memory",
792 msg->sesskey_len); 787 msg->sesskey_len);
793 rc = -EOVERFLOW; 788 rc = -ENOMEM;
794 goto ssetup_exit; 789 goto ssetup_exit;
795 } 790 }
791 memcpy(ses->auth_key.response, msg->data, msg->sesskey_len);
796 ses->auth_key.len = msg->sesskey_len; 792 ses->auth_key.len = msg->sesskey_len;
797 memcpy(ses->auth_key.data.krb5, msg->data, msg->sesskey_len); 793
798 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; 794 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
799 capabilities |= CAP_EXTENDED_SECURITY; 795 capabilities |= CAP_EXTENDED_SECURITY;
800 pSMB->req.Capabilities = cpu_to_le32(capabilities); 796 pSMB->req.Capabilities = cpu_to_le32(capabilities);
@@ -897,8 +893,6 @@ ssetup_ntlmssp_authenticate:
897 CIFS_STD_OP /* not long */ | CIFS_LOG_ERROR); 893 CIFS_STD_OP /* not long */ | CIFS_LOG_ERROR);
898 /* SMB request buf freed in SendReceive2 */ 894 /* SMB request buf freed in SendReceive2 */
899 895
900 cFYI(1, "ssetup rc from sendrecv2 is %d", rc);
901
902 pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base; 896 pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base;
903 smb_buf = (struct smb_hdr *)iov[0].iov_base; 897 smb_buf = (struct smb_hdr *)iov[0].iov_base;
904 898
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index a66c91eb6eb4..e0588cdf4cc5 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -543,7 +543,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
543 (ses->server->secMode & (SECMODE_SIGN_REQUIRED | 543 (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
544 SECMODE_SIGN_ENABLED))) { 544 SECMODE_SIGN_ENABLED))) {
545 rc = cifs_verify_signature(midQ->resp_buf, 545 rc = cifs_verify_signature(midQ->resp_buf,
546 &ses->server->session_key, 546 ses->server,
547 midQ->sequence_number+1); 547 midQ->sequence_number+1);
548 if (rc) { 548 if (rc) {
549 cERROR(1, "Unexpected SMB signature"); 549 cERROR(1, "Unexpected SMB signature");
@@ -731,7 +731,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
731 (ses->server->secMode & (SECMODE_SIGN_REQUIRED | 731 (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
732 SECMODE_SIGN_ENABLED))) { 732 SECMODE_SIGN_ENABLED))) {
733 rc = cifs_verify_signature(out_buf, 733 rc = cifs_verify_signature(out_buf,
734 &ses->server->session_key, 734 ses->server,
735 midQ->sequence_number+1); 735 midQ->sequence_number+1);
736 if (rc) { 736 if (rc) {
737 cERROR(1, "Unexpected SMB signature"); 737 cERROR(1, "Unexpected SMB signature");
@@ -981,7 +981,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
981 (ses->server->secMode & (SECMODE_SIGN_REQUIRED | 981 (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
982 SECMODE_SIGN_ENABLED))) { 982 SECMODE_SIGN_ENABLED))) {
983 rc = cifs_verify_signature(out_buf, 983 rc = cifs_verify_signature(out_buf,
984 &ses->server->session_key, 984 ses->server,
985 midQ->sequence_number+1); 985 midQ->sequence_number+1);
986 if (rc) { 986 if (rc) {
987 cERROR(1, "Unexpected SMB signature"); 987 cERROR(1, "Unexpected SMB signature");
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 7993b96ca348..5ea57c8c7f97 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -306,16 +306,16 @@ static int coda_statfs(struct dentry *dentry, struct kstatfs *buf)
306 306
307/* init_coda: used by filesystems.c to register coda */ 307/* init_coda: used by filesystems.c to register coda */
308 308
309static int coda_get_sb(struct file_system_type *fs_type, 309static struct dentry *coda_mount(struct file_system_type *fs_type,
310 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 310 int flags, const char *dev_name, void *data)
311{ 311{
312 return get_sb_nodev(fs_type, flags, data, coda_fill_super, mnt); 312 return mount_nodev(fs_type, flags, data, coda_fill_super);
313} 313}
314 314
315struct file_system_type coda_fs_type = { 315struct file_system_type coda_fs_type = {
316 .owner = THIS_MODULE, 316 .owner = THIS_MODULE,
317 .name = "coda", 317 .name = "coda",
318 .get_sb = coda_get_sb, 318 .mount = coda_mount,
319 .kill_sb = kill_anon_super, 319 .kill_sb = kill_anon_super,
320 .fs_flags = FS_BINARY_MOUNTDATA, 320 .fs_flags = FS_BINARY_MOUNTDATA,
321}; 321};
diff --git a/fs/compat.c b/fs/compat.c
index f03abdadc401..c580c322fa6b 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -29,8 +29,6 @@
29#include <linux/vfs.h> 29#include <linux/vfs.h>
30#include <linux/ioctl.h> 30#include <linux/ioctl.h>
31#include <linux/init.h> 31#include <linux/init.h>
32#include <linux/smb.h>
33#include <linux/smb_mount.h>
34#include <linux/ncp_mount.h> 32#include <linux/ncp_mount.h>
35#include <linux/nfs4_mount.h> 33#include <linux/nfs4_mount.h>
36#include <linux/syscalls.h> 34#include <linux/syscalls.h>
@@ -51,6 +49,7 @@
51#include <linux/eventpoll.h> 49#include <linux/eventpoll.h>
52#include <linux/fs_struct.h> 50#include <linux/fs_struct.h>
53#include <linux/slab.h> 51#include <linux/slab.h>
52#include <linux/pagemap.h>
54 53
55#include <asm/uaccess.h> 54#include <asm/uaccess.h>
56#include <asm/mmu_context.h> 55#include <asm/mmu_context.h>
@@ -608,14 +607,14 @@ ssize_t compat_rw_copy_check_uvector(int type,
608 /* 607 /*
609 * Single unix specification: 608 * Single unix specification:
610 * We should -EINVAL if an element length is not >= 0 and fitting an 609 * We should -EINVAL if an element length is not >= 0 and fitting an
611 * ssize_t. The total length is fitting an ssize_t 610 * ssize_t.
612 * 611 *
613 * Be careful here because iov_len is a size_t not an ssize_t 612 * In Linux, the total length is limited to MAX_RW_COUNT, there is
613 * no overflow possibility.
614 */ 614 */
615 tot_len = 0; 615 tot_len = 0;
616 ret = -EINVAL; 616 ret = -EINVAL;
617 for (seg = 0; seg < nr_segs; seg++) { 617 for (seg = 0; seg < nr_segs; seg++) {
618 compat_ssize_t tmp = tot_len;
619 compat_uptr_t buf; 618 compat_uptr_t buf;
620 compat_ssize_t len; 619 compat_ssize_t len;
621 620
@@ -626,13 +625,13 @@ ssize_t compat_rw_copy_check_uvector(int type,
626 } 625 }
627 if (len < 0) /* size_t not fitting in compat_ssize_t .. */ 626 if (len < 0) /* size_t not fitting in compat_ssize_t .. */
628 goto out; 627 goto out;
629 tot_len += len;
630 if (tot_len < tmp) /* maths overflow on the compat_ssize_t */
631 goto out;
632 if (!access_ok(vrfy_dir(type), compat_ptr(buf), len)) { 628 if (!access_ok(vrfy_dir(type), compat_ptr(buf), len)) {
633 ret = -EFAULT; 629 ret = -EFAULT;
634 goto out; 630 goto out;
635 } 631 }
632 if (len > MAX_RW_COUNT - tot_len)
633 len = MAX_RW_COUNT - tot_len;
634 tot_len += len;
636 iov->iov_base = compat_ptr(buf); 635 iov->iov_base = compat_ptr(buf);
637 iov->iov_len = (compat_size_t) len; 636 iov->iov_len = (compat_size_t) len;
638 uvector++; 637 uvector++;
@@ -745,30 +744,6 @@ static void *do_ncp_super_data_conv(void *raw_data)
745 return raw_data; 744 return raw_data;
746} 745}
747 746
748struct compat_smb_mount_data {
749 compat_int_t version;
750 __compat_uid_t mounted_uid;
751 __compat_uid_t uid;
752 __compat_gid_t gid;
753 compat_mode_t file_mode;
754 compat_mode_t dir_mode;
755};
756
757static void *do_smb_super_data_conv(void *raw_data)
758{
759 struct smb_mount_data *s = raw_data;
760 struct compat_smb_mount_data *c_s = raw_data;
761
762 if (c_s->version != SMB_MOUNT_OLDVERSION)
763 goto out;
764 s->dir_mode = c_s->dir_mode;
765 s->file_mode = c_s->file_mode;
766 s->gid = c_s->gid;
767 s->uid = c_s->uid;
768 s->mounted_uid = c_s->mounted_uid;
769 out:
770 return raw_data;
771}
772 747
773struct compat_nfs_string { 748struct compat_nfs_string {
774 compat_uint_t len; 749 compat_uint_t len;
@@ -835,7 +810,6 @@ static int do_nfs4_super_data_conv(void *raw_data)
835 return 0; 810 return 0;
836} 811}
837 812
838#define SMBFS_NAME "smbfs"
839#define NCPFS_NAME "ncpfs" 813#define NCPFS_NAME "ncpfs"
840#define NFS4_NAME "nfs4" 814#define NFS4_NAME "nfs4"
841 815
@@ -870,9 +844,7 @@ asmlinkage long compat_sys_mount(const char __user * dev_name,
870 retval = -EINVAL; 844 retval = -EINVAL;
871 845
872 if (kernel_type && data_page) { 846 if (kernel_type && data_page) {
873 if (!strcmp(kernel_type, SMBFS_NAME)) { 847 if (!strcmp(kernel_type, NCPFS_NAME)) {
874 do_smb_super_data_conv((void *)data_page);
875 } else if (!strcmp(kernel_type, NCPFS_NAME)) {
876 do_ncp_super_data_conv((void *)data_page); 848 do_ncp_super_data_conv((void *)data_page);
877 } else if (!strcmp(kernel_type, NFS4_NAME)) { 849 } else if (!strcmp(kernel_type, NFS4_NAME)) {
878 if (do_nfs4_super_data_conv((void *) data_page)) 850 if (do_nfs4_super_data_conv((void *) data_page))
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index d0ad09d57789..410ed188faa1 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -46,7 +46,6 @@
46#include <linux/videodev.h> 46#include <linux/videodev.h>
47#include <linux/netdevice.h> 47#include <linux/netdevice.h>
48#include <linux/raw.h> 48#include <linux/raw.h>
49#include <linux/smb_fs.h>
50#include <linux/blkdev.h> 49#include <linux/blkdev.h>
51#include <linux/elevator.h> 50#include <linux/elevator.h>
52#include <linux/rtc.h> 51#include <linux/rtc.h>
@@ -558,25 +557,6 @@ static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, void __user *argp)
558 557
559#endif /* CONFIG_BLOCK */ 558#endif /* CONFIG_BLOCK */
560 559
561static int do_smb_getmountuid(unsigned int fd, unsigned int cmd,
562 compat_uid_t __user *argp)
563{
564 mm_segment_t old_fs = get_fs();
565 __kernel_uid_t kuid;
566 int err;
567
568 cmd = SMB_IOC_GETMOUNTUID;
569
570 set_fs(KERNEL_DS);
571 err = sys_ioctl(fd, cmd, (unsigned long)&kuid);
572 set_fs(old_fs);
573
574 if (err >= 0)
575 err = put_user(kuid, argp);
576
577 return err;
578}
579
580/* Bluetooth ioctls */ 560/* Bluetooth ioctls */
581#define HCIUARTSETPROTO _IOW('U', 200, int) 561#define HCIUARTSETPROTO _IOW('U', 200, int)
582#define HCIUARTGETPROTO _IOR('U', 201, int) 562#define HCIUARTGETPROTO _IOR('U', 201, int)
@@ -1199,8 +1179,9 @@ COMPATIBLE_IOCTL(SOUND_MIXER_PRIVATE5)
1199COMPATIBLE_IOCTL(SOUND_MIXER_GETLEVELS) 1179COMPATIBLE_IOCTL(SOUND_MIXER_GETLEVELS)
1200COMPATIBLE_IOCTL(SOUND_MIXER_SETLEVELS) 1180COMPATIBLE_IOCTL(SOUND_MIXER_SETLEVELS)
1201COMPATIBLE_IOCTL(OSS_GETVERSION) 1181COMPATIBLE_IOCTL(OSS_GETVERSION)
1202/* SMB ioctls which do not need any translations */ 1182/* Raw devices */
1203COMPATIBLE_IOCTL(SMB_IOC_NEWCONN) 1183COMPATIBLE_IOCTL(RAW_SETBIND)
1184COMPATIBLE_IOCTL(RAW_GETBIND)
1204/* Watchdog */ 1185/* Watchdog */
1205COMPATIBLE_IOCTL(WDIOC_GETSUPPORT) 1186COMPATIBLE_IOCTL(WDIOC_GETSUPPORT)
1206COMPATIBLE_IOCTL(WDIOC_GETSTATUS) 1187COMPATIBLE_IOCTL(WDIOC_GETSTATUS)
@@ -1458,10 +1439,6 @@ static long do_ioctl_trans(int fd, unsigned int cmd,
1458 case MTIOCPOS32: 1439 case MTIOCPOS32:
1459 return mt_ioctl_trans(fd, cmd, argp); 1440 return mt_ioctl_trans(fd, cmd, argp);
1460#endif 1441#endif
1461 /* One SMB ioctl needs translations. */
1462#define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t)
1463 case SMB_IOC_GETMOUNTUID_32:
1464 return do_smb_getmountuid(fd, cmd, argp);
1465 /* Serial */ 1442 /* Serial */
1466 case TIOCGSERIAL: 1443 case TIOCGSERIAL:
1467 case TIOCSSERIAL: 1444 case TIOCSSERIAL:
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index 8c8d64230c2d..7d3607febe1c 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -104,16 +104,16 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent)
104 return 0; 104 return 0;
105} 105}
106 106
107static int configfs_get_sb(struct file_system_type *fs_type, 107static struct dentry *configfs_do_mount(struct file_system_type *fs_type,
108 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 108 int flags, const char *dev_name, void *data)
109{ 109{
110 return get_sb_single(fs_type, flags, data, configfs_fill_super, mnt); 110 return mount_single(fs_type, flags, data, configfs_fill_super);
111} 111}
112 112
113static struct file_system_type configfs_fs_type = { 113static struct file_system_type configfs_fs_type = {
114 .owner = THIS_MODULE, 114 .owner = THIS_MODULE,
115 .name = "configfs", 115 .name = "configfs",
116 .get_sb = configfs_get_sb, 116 .mount = configfs_do_mount,
117 .kill_sb = kill_litter_super, 117 .kill_sb = kill_litter_super,
118}; 118};
119 119
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 1e7a33028d33..32fd5fe9ca0e 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -533,17 +533,16 @@ static const struct super_operations cramfs_ops = {
533 .statfs = cramfs_statfs, 533 .statfs = cramfs_statfs,
534}; 534};
535 535
536static int cramfs_get_sb(struct file_system_type *fs_type, 536static struct dentry *cramfs_mount(struct file_system_type *fs_type,
537 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 537 int flags, const char *dev_name, void *data)
538{ 538{
539 return get_sb_bdev(fs_type, flags, dev_name, data, cramfs_fill_super, 539 return mount_bdev(fs_type, flags, dev_name, data, cramfs_fill_super);
540 mnt);
541} 540}
542 541
543static struct file_system_type cramfs_fs_type = { 542static struct file_system_type cramfs_fs_type = {
544 .owner = THIS_MODULE, 543 .owner = THIS_MODULE,
545 .name = "cramfs", 544 .name = "cramfs",
546 .get_sb = cramfs_get_sb, 545 .mount = cramfs_mount,
547 .kill_sb = kill_block_super, 546 .kill_sb = kill_block_super,
548 .fs_flags = FS_REQUIRES_DEV, 547 .fs_flags = FS_REQUIRES_DEV,
549}; 548};
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index a4ed8380e98a..37a8ca7c1222 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -135,17 +135,17 @@ static int debug_fill_super(struct super_block *sb, void *data, int silent)
135 return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files); 135 return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files);
136} 136}
137 137
138static int debug_get_sb(struct file_system_type *fs_type, 138static struct dentry *debug_mount(struct file_system_type *fs_type,
139 int flags, const char *dev_name, 139 int flags, const char *dev_name,
140 void *data, struct vfsmount *mnt) 140 void *data)
141{ 141{
142 return get_sb_single(fs_type, flags, data, debug_fill_super, mnt); 142 return mount_single(fs_type, flags, data, debug_fill_super);
143} 143}
144 144
145static struct file_system_type debug_fs_type = { 145static struct file_system_type debug_fs_type = {
146 .owner = THIS_MODULE, 146 .owner = THIS_MODULE,
147 .name = "debugfs", 147 .name = "debugfs",
148 .get_sb = debug_get_sb, 148 .mount = debug_mount,
149 .kill_sb = kill_litter_super, 149 .kill_sb = kill_litter_super,
150}; 150};
151 151
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 8b3ffd5b5235..1bb547c9cad6 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -331,7 +331,7 @@ static int compare_init_pts_sb(struct super_block *s, void *p)
331} 331}
332 332
333/* 333/*
334 * devpts_get_sb() 334 * devpts_mount()
335 * 335 *
336 * If the '-o newinstance' mount option was specified, mount a new 336 * If the '-o newinstance' mount option was specified, mount a new
337 * (private) instance of devpts. PTYs created in this instance are 337 * (private) instance of devpts. PTYs created in this instance are
@@ -345,20 +345,20 @@ static int compare_init_pts_sb(struct super_block *s, void *p)
345 * semantics in devpts while preserving backward compatibility of the 345 * semantics in devpts while preserving backward compatibility of the
346 * current 'single-namespace' semantics. i.e all mounts of devpts 346 * current 'single-namespace' semantics. i.e all mounts of devpts
347 * without the 'newinstance' mount option should bind to the initial 347 * without the 'newinstance' mount option should bind to the initial
348 * kernel mount, like get_sb_single(). 348 * kernel mount, like mount_single().
349 * 349 *
350 * Mounts with 'newinstance' option create a new, private namespace. 350 * Mounts with 'newinstance' option create a new, private namespace.
351 * 351 *
352 * NOTE: 352 * NOTE:
353 * 353 *
354 * For single-mount semantics, devpts cannot use get_sb_single(), 354 * For single-mount semantics, devpts cannot use mount_single(),
355 * because get_sb_single()/sget() find and use the super-block from 355 * because mount_single()/sget() find and use the super-block from
356 * the most recent mount of devpts. But that recent mount may be a 356 * the most recent mount of devpts. But that recent mount may be a
357 * 'newinstance' mount and get_sb_single() would pick the newinstance 357 * 'newinstance' mount and mount_single() would pick the newinstance
358 * super-block instead of the initial super-block. 358 * super-block instead of the initial super-block.
359 */ 359 */
360static int devpts_get_sb(struct file_system_type *fs_type, 360static struct dentry *devpts_mount(struct file_system_type *fs_type,
361 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 361 int flags, const char *dev_name, void *data)
362{ 362{
363 int error; 363 int error;
364 struct pts_mount_opts opts; 364 struct pts_mount_opts opts;
@@ -366,7 +366,7 @@ static int devpts_get_sb(struct file_system_type *fs_type,
366 366
367 error = parse_mount_options(data, PARSE_MOUNT, &opts); 367 error = parse_mount_options(data, PARSE_MOUNT, &opts);
368 if (error) 368 if (error)
369 return error; 369 return ERR_PTR(error);
370 370
371 if (opts.newinstance) 371 if (opts.newinstance)
372 s = sget(fs_type, NULL, set_anon_super, NULL); 372 s = sget(fs_type, NULL, set_anon_super, NULL);
@@ -374,7 +374,7 @@ static int devpts_get_sb(struct file_system_type *fs_type,
374 s = sget(fs_type, compare_init_pts_sb, set_anon_super, NULL); 374 s = sget(fs_type, compare_init_pts_sb, set_anon_super, NULL);
375 375
376 if (IS_ERR(s)) 376 if (IS_ERR(s))
377 return PTR_ERR(s); 377 return ERR_CAST(s);
378 378
379 if (!s->s_root) { 379 if (!s->s_root) {
380 s->s_flags = flags; 380 s->s_flags = flags;
@@ -390,13 +390,11 @@ static int devpts_get_sb(struct file_system_type *fs_type,
390 if (error) 390 if (error)
391 goto out_undo_sget; 391 goto out_undo_sget;
392 392
393 simple_set_mnt(mnt, s); 393 return dget(s->s_root);
394
395 return 0;
396 394
397out_undo_sget: 395out_undo_sget:
398 deactivate_locked_super(s); 396 deactivate_locked_super(s);
399 return error; 397 return ERR_PTR(error);
400} 398}
401 399
402#else 400#else
@@ -404,10 +402,10 @@ out_undo_sget:
404 * This supports only the legacy single-instance semantics (no 402 * This supports only the legacy single-instance semantics (no
405 * multiple-instance semantics) 403 * multiple-instance semantics)
406 */ 404 */
407static int devpts_get_sb(struct file_system_type *fs_type, int flags, 405static struct dentry *devpts_mount(struct file_system_type *fs_type, int flags,
408 const char *dev_name, void *data, struct vfsmount *mnt) 406 const char *dev_name, void *data)
409{ 407{
410 return get_sb_single(fs_type, flags, data, devpts_fill_super, mnt); 408 return mount_single(fs_type, flags, data, devpts_fill_super);
411} 409}
412#endif 410#endif
413 411
@@ -421,7 +419,7 @@ static void devpts_kill_sb(struct super_block *sb)
421 419
422static struct file_system_type devpts_fs_type = { 420static struct file_system_type devpts_fs_type = {
423 .name = "devpts", 421 .name = "devpts",
424 .get_sb = devpts_get_sb, 422 .mount = devpts_mount,
425 .kill_sb = devpts_kill_sb, 423 .kill_sb = devpts_kill_sb,
426}; 424};
427 425
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 40186b959429..413a3c48f0bb 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -377,6 +377,7 @@ struct ecryptfs_mount_crypt_stat {
377#define ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES 0x00000010 377#define ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES 0x00000010
378#define ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK 0x00000020 378#define ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK 0x00000020
379#define ECRYPTFS_GLOBAL_ENCFN_USE_FEK 0x00000040 379#define ECRYPTFS_GLOBAL_ENCFN_USE_FEK 0x00000040
380#define ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY 0x00000080
380 u32 flags; 381 u32 flags;
381 struct list_head global_auth_tok_list; 382 struct list_head global_auth_tok_list;
382 struct mutex global_auth_tok_list_mutex; 383 struct mutex global_auth_tok_list_mutex;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 3fbc94203380..9d1a22d62765 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -32,6 +32,7 @@
32#include <linux/crypto.h> 32#include <linux/crypto.h>
33#include <linux/fs_stack.h> 33#include <linux/fs_stack.h>
34#include <linux/slab.h> 34#include <linux/slab.h>
35#include <linux/xattr.h>
35#include <asm/unaligned.h> 36#include <asm/unaligned.h>
36#include "ecryptfs_kernel.h" 37#include "ecryptfs_kernel.h"
37 38
@@ -70,15 +71,19 @@ ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
70 struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); 71 struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
71 struct dentry *dentry_save; 72 struct dentry *dentry_save;
72 struct vfsmount *vfsmount_save; 73 struct vfsmount *vfsmount_save;
74 unsigned int flags_save;
73 int rc; 75 int rc;
74 76
75 dentry_save = nd->path.dentry; 77 dentry_save = nd->path.dentry;
76 vfsmount_save = nd->path.mnt; 78 vfsmount_save = nd->path.mnt;
79 flags_save = nd->flags;
77 nd->path.dentry = lower_dentry; 80 nd->path.dentry = lower_dentry;
78 nd->path.mnt = lower_mnt; 81 nd->path.mnt = lower_mnt;
82 nd->flags &= ~LOOKUP_OPEN;
79 rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd); 83 rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd);
80 nd->path.dentry = dentry_save; 84 nd->path.dentry = dentry_save;
81 nd->path.mnt = vfsmount_save; 85 nd->path.mnt = vfsmount_save;
86 nd->flags = flags_save;
82 return rc; 87 return rc;
83} 88}
84 89
@@ -1108,10 +1113,8 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
1108 rc = -EOPNOTSUPP; 1113 rc = -EOPNOTSUPP;
1109 goto out; 1114 goto out;
1110 } 1115 }
1111 mutex_lock(&lower_dentry->d_inode->i_mutex); 1116
1112 rc = lower_dentry->d_inode->i_op->setxattr(lower_dentry, name, value, 1117 rc = vfs_setxattr(lower_dentry, name, value, size, flags);
1113 size, flags);
1114 mutex_unlock(&lower_dentry->d_inode->i_mutex);
1115out: 1118out:
1116 return rc; 1119 return rc;
1117} 1120}
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 73811cfa2ea4..b1f6858a5223 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -446,6 +446,7 @@ out:
446 */ 446 */
447static int 447static int
448ecryptfs_find_auth_tok_for_sig( 448ecryptfs_find_auth_tok_for_sig(
449 struct key **auth_tok_key,
449 struct ecryptfs_auth_tok **auth_tok, 450 struct ecryptfs_auth_tok **auth_tok,
450 struct ecryptfs_mount_crypt_stat *mount_crypt_stat, 451 struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
451 char *sig) 452 char *sig)
@@ -453,12 +454,21 @@ ecryptfs_find_auth_tok_for_sig(
453 struct ecryptfs_global_auth_tok *global_auth_tok; 454 struct ecryptfs_global_auth_tok *global_auth_tok;
454 int rc = 0; 455 int rc = 0;
455 456
457 (*auth_tok_key) = NULL;
456 (*auth_tok) = NULL; 458 (*auth_tok) = NULL;
457 if (ecryptfs_find_global_auth_tok_for_sig(&global_auth_tok, 459 if (ecryptfs_find_global_auth_tok_for_sig(&global_auth_tok,
458 mount_crypt_stat, sig)) { 460 mount_crypt_stat, sig)) {
459 struct key *auth_tok_key;
460 461
461 rc = ecryptfs_keyring_auth_tok_for_sig(&auth_tok_key, auth_tok, 462 /* if the flag ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY is set in the
463 * mount_crypt_stat structure, we prevent to use auth toks that
464 * are not inserted through the ecryptfs_add_global_auth_tok
465 * function.
466 */
467 if (mount_crypt_stat->flags
468 & ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY)
469 return -EINVAL;
470
471 rc = ecryptfs_keyring_auth_tok_for_sig(auth_tok_key, auth_tok,
462 sig); 472 sig);
463 } else 473 } else
464 (*auth_tok) = global_auth_tok->global_auth_tok; 474 (*auth_tok) = global_auth_tok->global_auth_tok;
@@ -509,6 +519,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
509 char *filename, size_t filename_size) 519 char *filename, size_t filename_size)
510{ 520{
511 struct ecryptfs_write_tag_70_packet_silly_stack *s; 521 struct ecryptfs_write_tag_70_packet_silly_stack *s;
522 struct key *auth_tok_key = NULL;
512 int rc = 0; 523 int rc = 0;
513 524
514 s = kmalloc(sizeof(*s), GFP_KERNEL); 525 s = kmalloc(sizeof(*s), GFP_KERNEL);
@@ -606,6 +617,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
606 } 617 }
607 dest[s->i++] = s->cipher_code; 618 dest[s->i++] = s->cipher_code;
608 rc = ecryptfs_find_auth_tok_for_sig( 619 rc = ecryptfs_find_auth_tok_for_sig(
620 &auth_tok_key,
609 &s->auth_tok, mount_crypt_stat, 621 &s->auth_tok, mount_crypt_stat,
610 mount_crypt_stat->global_default_fnek_sig); 622 mount_crypt_stat->global_default_fnek_sig);
611 if (rc) { 623 if (rc) {
@@ -753,6 +765,8 @@ out_free_unlock:
753out_unlock: 765out_unlock:
754 mutex_unlock(s->tfm_mutex); 766 mutex_unlock(s->tfm_mutex);
755out: 767out:
768 if (auth_tok_key)
769 key_put(auth_tok_key);
756 kfree(s); 770 kfree(s);
757 return rc; 771 return rc;
758} 772}
@@ -798,6 +812,7 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
798 char *data, size_t max_packet_size) 812 char *data, size_t max_packet_size)
799{ 813{
800 struct ecryptfs_parse_tag_70_packet_silly_stack *s; 814 struct ecryptfs_parse_tag_70_packet_silly_stack *s;
815 struct key *auth_tok_key = NULL;
801 int rc = 0; 816 int rc = 0;
802 817
803 (*packet_size) = 0; 818 (*packet_size) = 0;
@@ -910,7 +925,8 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
910 * >= ECRYPTFS_MAX_IV_BYTES. */ 925 * >= ECRYPTFS_MAX_IV_BYTES. */
911 memset(s->iv, 0, ECRYPTFS_MAX_IV_BYTES); 926 memset(s->iv, 0, ECRYPTFS_MAX_IV_BYTES);
912 s->desc.info = s->iv; 927 s->desc.info = s->iv;
913 rc = ecryptfs_find_auth_tok_for_sig(&s->auth_tok, mount_crypt_stat, 928 rc = ecryptfs_find_auth_tok_for_sig(&auth_tok_key,
929 &s->auth_tok, mount_crypt_stat,
914 s->fnek_sig_hex); 930 s->fnek_sig_hex);
915 if (rc) { 931 if (rc) {
916 printk(KERN_ERR "%s: Error attempting to find auth tok for " 932 printk(KERN_ERR "%s: Error attempting to find auth tok for "
@@ -986,6 +1002,8 @@ out:
986 (*filename_size) = 0; 1002 (*filename_size) = 0;
987 (*filename) = NULL; 1003 (*filename) = NULL;
988 } 1004 }
1005 if (auth_tok_key)
1006 key_put(auth_tok_key);
989 kfree(s); 1007 kfree(s);
990 return rc; 1008 return rc;
991} 1009}
@@ -1557,14 +1575,19 @@ int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key,
1557 ECRYPTFS_VERSION_MAJOR, 1575 ECRYPTFS_VERSION_MAJOR,
1558 ECRYPTFS_VERSION_MINOR); 1576 ECRYPTFS_VERSION_MINOR);
1559 rc = -EINVAL; 1577 rc = -EINVAL;
1560 goto out; 1578 goto out_release_key;
1561 } 1579 }
1562 if ((*auth_tok)->token_type != ECRYPTFS_PASSWORD 1580 if ((*auth_tok)->token_type != ECRYPTFS_PASSWORD
1563 && (*auth_tok)->token_type != ECRYPTFS_PRIVATE_KEY) { 1581 && (*auth_tok)->token_type != ECRYPTFS_PRIVATE_KEY) {
1564 printk(KERN_ERR "Invalid auth_tok structure " 1582 printk(KERN_ERR "Invalid auth_tok structure "
1565 "returned from key query\n"); 1583 "returned from key query\n");
1566 rc = -EINVAL; 1584 rc = -EINVAL;
1567 goto out; 1585 goto out_release_key;
1586 }
1587out_release_key:
1588 if (rc) {
1589 key_put(*auth_tok_key);
1590 (*auth_tok_key) = NULL;
1568 } 1591 }
1569out: 1592out:
1570 return rc; 1593 return rc;
@@ -1688,6 +1711,7 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
1688 struct ecryptfs_auth_tok_list_item *auth_tok_list_item; 1711 struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
1689 size_t tag_11_contents_size; 1712 size_t tag_11_contents_size;
1690 size_t tag_11_packet_size; 1713 size_t tag_11_packet_size;
1714 struct key *auth_tok_key = NULL;
1691 int rc = 0; 1715 int rc = 0;
1692 1716
1693 INIT_LIST_HEAD(&auth_tok_list); 1717 INIT_LIST_HEAD(&auth_tok_list);
@@ -1784,6 +1808,10 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
1784 * just one will be sufficient to decrypt to get the FEK. */ 1808 * just one will be sufficient to decrypt to get the FEK. */
1785find_next_matching_auth_tok: 1809find_next_matching_auth_tok:
1786 found_auth_tok = 0; 1810 found_auth_tok = 0;
1811 if (auth_tok_key) {
1812 key_put(auth_tok_key);
1813 auth_tok_key = NULL;
1814 }
1787 list_for_each_entry(auth_tok_list_item, &auth_tok_list, list) { 1815 list_for_each_entry(auth_tok_list_item, &auth_tok_list, list) {
1788 candidate_auth_tok = &auth_tok_list_item->auth_tok; 1816 candidate_auth_tok = &auth_tok_list_item->auth_tok;
1789 if (unlikely(ecryptfs_verbosity > 0)) { 1817 if (unlikely(ecryptfs_verbosity > 0)) {
@@ -1800,10 +1828,11 @@ find_next_matching_auth_tok:
1800 rc = -EINVAL; 1828 rc = -EINVAL;
1801 goto out_wipe_list; 1829 goto out_wipe_list;
1802 } 1830 }
1803 ecryptfs_find_auth_tok_for_sig(&matching_auth_tok, 1831 rc = ecryptfs_find_auth_tok_for_sig(&auth_tok_key,
1832 &matching_auth_tok,
1804 crypt_stat->mount_crypt_stat, 1833 crypt_stat->mount_crypt_stat,
1805 candidate_auth_tok_sig); 1834 candidate_auth_tok_sig);
1806 if (matching_auth_tok) { 1835 if (!rc) {
1807 found_auth_tok = 1; 1836 found_auth_tok = 1;
1808 goto found_matching_auth_tok; 1837 goto found_matching_auth_tok;
1809 } 1838 }
@@ -1866,6 +1895,8 @@ found_matching_auth_tok:
1866out_wipe_list: 1895out_wipe_list:
1867 wipe_auth_tok_list(&auth_tok_list); 1896 wipe_auth_tok_list(&auth_tok_list);
1868out: 1897out:
1898 if (auth_tok_key)
1899 key_put(auth_tok_key);
1869 return rc; 1900 return rc;
1870} 1901}
1871 1902
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index cbd4e18adb20..a9dbd62518e6 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -208,7 +208,8 @@ enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig,
208 ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata, 208 ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata,
209 ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig, 209 ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig,
210 ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes, 210 ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes,
211 ecryptfs_opt_unlink_sigs, ecryptfs_opt_err }; 211 ecryptfs_opt_unlink_sigs, ecryptfs_opt_mount_auth_tok_only,
212 ecryptfs_opt_err };
212 213
213static const match_table_t tokens = { 214static const match_table_t tokens = {
214 {ecryptfs_opt_sig, "sig=%s"}, 215 {ecryptfs_opt_sig, "sig=%s"},
@@ -223,6 +224,7 @@ static const match_table_t tokens = {
223 {ecryptfs_opt_fn_cipher, "ecryptfs_fn_cipher=%s"}, 224 {ecryptfs_opt_fn_cipher, "ecryptfs_fn_cipher=%s"},
224 {ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"}, 225 {ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"},
225 {ecryptfs_opt_unlink_sigs, "ecryptfs_unlink_sigs"}, 226 {ecryptfs_opt_unlink_sigs, "ecryptfs_unlink_sigs"},
227 {ecryptfs_opt_mount_auth_tok_only, "ecryptfs_mount_auth_tok_only"},
226 {ecryptfs_opt_err, NULL} 228 {ecryptfs_opt_err, NULL}
227}; 229};
228 230
@@ -406,6 +408,10 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options)
406 case ecryptfs_opt_unlink_sigs: 408 case ecryptfs_opt_unlink_sigs:
407 mount_crypt_stat->flags |= ECRYPTFS_UNLINK_SIGS; 409 mount_crypt_stat->flags |= ECRYPTFS_UNLINK_SIGS;
408 break; 410 break;
411 case ecryptfs_opt_mount_auth_tok_only:
412 mount_crypt_stat->flags |=
413 ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY;
414 break;
409 case ecryptfs_opt_err: 415 case ecryptfs_opt_err:
410 default: 416 default:
411 printk(KERN_WARNING 417 printk(KERN_WARNING
@@ -540,9 +546,8 @@ out:
540 * ecryptfs_interpose to perform most of the linking 546 * ecryptfs_interpose to perform most of the linking
541 * ecryptfs_interpose(): links the lower filesystem into ecryptfs (inode.c) 547 * ecryptfs_interpose(): links the lower filesystem into ecryptfs (inode.c)
542 */ 548 */
543static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags, 549static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags,
544 const char *dev_name, void *raw_data, 550 const char *dev_name, void *raw_data)
545 struct vfsmount *mnt)
546{ 551{
547 struct super_block *s; 552 struct super_block *s;
548 struct ecryptfs_sb_info *sbi; 553 struct ecryptfs_sb_info *sbi;
@@ -607,8 +612,7 @@ static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags,
607 err = "Reading sb failed"; 612 err = "Reading sb failed";
608 goto out; 613 goto out;
609 } 614 }
610 simple_set_mnt(mnt, s); 615 return dget(s->s_root);
611 return 0;
612 616
613out: 617out:
614 if (sbi) { 618 if (sbi) {
@@ -616,7 +620,7 @@ out:
616 kmem_cache_free(ecryptfs_sb_info_cache, sbi); 620 kmem_cache_free(ecryptfs_sb_info_cache, sbi);
617 } 621 }
618 printk(KERN_ERR "%s; rc = [%d]\n", err, rc); 622 printk(KERN_ERR "%s; rc = [%d]\n", err, rc);
619 return rc; 623 return ERR_PTR(rc);
620} 624}
621 625
622/** 626/**
@@ -639,7 +643,7 @@ static void ecryptfs_kill_block_super(struct super_block *sb)
639static struct file_system_type ecryptfs_fs_type = { 643static struct file_system_type ecryptfs_fs_type = {
640 .owner = THIS_MODULE, 644 .owner = THIS_MODULE,
641 .name = "ecryptfs", 645 .name = "ecryptfs",
642 .get_sb = ecryptfs_get_sb, 646 .mount = ecryptfs_mount,
643 .kill_sb = ecryptfs_kill_block_super, 647 .kill_sb = ecryptfs_kill_block_super,
644 .fs_flags = 0 648 .fs_flags = 0
645}; 649};
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index f7fc286a3aa9..253732382d37 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -180,6 +180,8 @@ static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt)
180 seq_printf(m, ",ecryptfs_encrypted_view"); 180 seq_printf(m, ",ecryptfs_encrypted_view");
181 if (mount_crypt_stat->flags & ECRYPTFS_UNLINK_SIGS) 181 if (mount_crypt_stat->flags & ECRYPTFS_UNLINK_SIGS)
182 seq_printf(m, ",ecryptfs_unlink_sigs"); 182 seq_printf(m, ",ecryptfs_unlink_sigs");
183 if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY)
184 seq_printf(m, ",ecryptfs_mount_auth_tok_only");
183 185
184 return 0; 186 return 0;
185} 187}
diff --git a/fs/efs/super.c b/fs/efs/super.c
index f04942810818..5073a07652cc 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -20,16 +20,16 @@
20static int efs_statfs(struct dentry *dentry, struct kstatfs *buf); 20static int efs_statfs(struct dentry *dentry, struct kstatfs *buf);
21static int efs_fill_super(struct super_block *s, void *d, int silent); 21static int efs_fill_super(struct super_block *s, void *d, int silent);
22 22
23static int efs_get_sb(struct file_system_type *fs_type, 23static struct dentry *efs_mount(struct file_system_type *fs_type,
24 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 24 int flags, const char *dev_name, void *data)
25{ 25{
26 return get_sb_bdev(fs_type, flags, dev_name, data, efs_fill_super, mnt); 26 return mount_bdev(fs_type, flags, dev_name, data, efs_fill_super);
27} 27}
28 28
29static struct file_system_type efs_fs_type = { 29static struct file_system_type efs_fs_type = {
30 .owner = THIS_MODULE, 30 .owner = THIS_MODULE,
31 .name = "efs", 31 .name = "efs",
32 .get_sb = efs_get_sb, 32 .mount = efs_mount,
33 .kill_sb = kill_block_super, 33 .kill_sb = kill_block_super,
34 .fs_flags = FS_REQUIRES_DEV, 34 .fs_flags = FS_REQUIRES_DEV,
35}; 35};
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 047e92fa3af8..79c3ae6e0456 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -659,19 +659,19 @@ free_bdi:
659/* 659/*
660 * Set up the superblock (calls exofs_fill_super eventually) 660 * Set up the superblock (calls exofs_fill_super eventually)
661 */ 661 */
662static int exofs_get_sb(struct file_system_type *type, 662static struct dentry *exofs_mount(struct file_system_type *type,
663 int flags, const char *dev_name, 663 int flags, const char *dev_name,
664 void *data, struct vfsmount *mnt) 664 void *data)
665{ 665{
666 struct exofs_mountopt opts; 666 struct exofs_mountopt opts;
667 int ret; 667 int ret;
668 668
669 ret = parse_options(data, &opts); 669 ret = parse_options(data, &opts);
670 if (ret) 670 if (ret)
671 return ret; 671 return ERR_PTR(ret);
672 672
673 opts.dev_name = dev_name; 673 opts.dev_name = dev_name;
674 return get_sb_nodev(type, flags, &opts, exofs_fill_super, mnt); 674 return mount_nodev(type, flags, &opts, exofs_fill_super);
675} 675}
676 676
677/* 677/*
@@ -809,7 +809,7 @@ static const struct export_operations exofs_export_ops = {
809static struct file_system_type exofs_type = { 809static struct file_system_type exofs_type = {
810 .owner = THIS_MODULE, 810 .owner = THIS_MODULE,
811 .name = "exofs", 811 .name = "exofs",
812 .get_sb = exofs_get_sb, 812 .mount = exofs_mount,
813 .kill_sb = generic_shutdown_super, 813 .kill_sb = generic_shutdown_super,
814}; 814};
815 815
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index c6c684b44ea1..0d06f4e75699 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -646,10 +646,9 @@ find_next_usable_block(int start, struct buffer_head *bh, int maxblocks)
646 return here; 646 return here;
647} 647}
648 648
649/* 649/**
650 * ext2_try_to_allocate() 650 * ext2_try_to_allocate()
651 * @sb: superblock 651 * @sb: superblock
652 * @handle: handle to this transaction
653 * @group: given allocation block group 652 * @group: given allocation block group
654 * @bitmap_bh: bufferhead holds the block bitmap 653 * @bitmap_bh: bufferhead holds the block bitmap
655 * @grp_goal: given target block within the group 654 * @grp_goal: given target block within the group
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 0901320671da..d89e0b6a2d78 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1356,10 +1356,10 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
1356 return 0; 1356 return 0;
1357} 1357}
1358 1358
1359static int ext2_get_sb(struct file_system_type *fs_type, 1359static struct dentry *ext2_mount(struct file_system_type *fs_type,
1360 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 1360 int flags, const char *dev_name, void *data)
1361{ 1361{
1362 return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super, mnt); 1362 return mount_bdev(fs_type, flags, dev_name, data, ext2_fill_super);
1363} 1363}
1364 1364
1365#ifdef CONFIG_QUOTA 1365#ifdef CONFIG_QUOTA
@@ -1473,7 +1473,7 @@ out:
1473static struct file_system_type ext2_fs_type = { 1473static struct file_system_type ext2_fs_type = {
1474 .owner = THIS_MODULE, 1474 .owner = THIS_MODULE,
1475 .name = "ext2", 1475 .name = "ext2",
1476 .get_sb = ext2_get_sb, 1476 .mount = ext2_mount,
1477 .kill_sb = kill_block_super, 1477 .kill_sb = kill_block_super,
1478 .fs_flags = FS_REQUIRES_DEV, 1478 .fs_flags = FS_REQUIRES_DEV,
1479}; 1479};
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 4a32511f4ded..b3db22649426 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -792,9 +792,9 @@ find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
792 if (here < 0) 792 if (here < 0)
793 here = 0; 793 here = 0;
794 794
795 p = ((char *)bh->b_data) + (here >> 3); 795 p = bh->b_data + (here >> 3);
796 r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3)); 796 r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3));
797 next = (r - ((char *)bh->b_data)) << 3; 797 next = (r - bh->b_data) << 3;
798 798
799 if (next < maxblocks && next >= start && ext3_test_allocatable(next, bh)) 799 if (next < maxblocks && next >= start && ext3_test_allocatable(next, bh))
800 return next; 800 return next;
@@ -810,8 +810,9 @@ find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
810 810
811/** 811/**
812 * claim_block() 812 * claim_block()
813 * @lock: the spin lock for this block group
813 * @block: the free block (group relative) to allocate 814 * @block: the free block (group relative) to allocate
814 * @bh: the bufferhead containts the block group bitmap 815 * @bh: the buffer_head contains the block group bitmap
815 * 816 *
816 * We think we can allocate this block in this bitmap. Try to set the bit. 817 * We think we can allocate this block in this bitmap. Try to set the bit.
817 * If that succeeds then check that nobody has allocated and then freed the 818 * If that succeeds then check that nobody has allocated and then freed the
@@ -956,9 +957,11 @@ fail_access:
956 * but we will shift to the place where start_block is, 957 * but we will shift to the place where start_block is,
957 * then start from there, when looking for a reservable space. 958 * then start from there, when looking for a reservable space.
958 * 959 *
959 * @size: the target new reservation window size 960 * @my_rsv: the reservation window
960 * 961 *
961 * @group_first_block: the first block we consider to start 962 * @sb: the super block
963 *
964 * @start_block: the first block we consider to start
962 * the real search from 965 * the real search from
963 * 966 *
964 * @last_block: 967 * @last_block:
@@ -1084,7 +1087,7 @@ static int find_next_reservable_window(
1084 * 1087 *
1085 * failed: we failed to find a reservation window in this group 1088 * failed: we failed to find a reservation window in this group
1086 * 1089 *
1087 * @rsv: the reservation 1090 * @my_rsv: the reservation window
1088 * 1091 *
1089 * @grp_goal: The goal (group-relative). It is where the search for a 1092 * @grp_goal: The goal (group-relative). It is where the search for a
1090 * free reservable space should start from. 1093 * free reservable space should start from.
@@ -1273,8 +1276,8 @@ static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
1273 * @group: given allocation block group 1276 * @group: given allocation block group
1274 * @bitmap_bh: bufferhead holds the block bitmap 1277 * @bitmap_bh: bufferhead holds the block bitmap
1275 * @grp_goal: given target block within the group 1278 * @grp_goal: given target block within the group
1276 * @count: target number of blocks to allocate
1277 * @my_rsv: reservation window 1279 * @my_rsv: reservation window
1280 * @count: target number of blocks to allocate
1278 * @errp: pointer to store the error code 1281 * @errp: pointer to store the error code
1279 * 1282 *
1280 * This is the main function used to allocate a new block and its reservation 1283 * This is the main function used to allocate a new block and its reservation
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 4ab72db3559e..9724aef22460 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -570,9 +570,14 @@ got:
570 ei->i_state_flags = 0; 570 ei->i_state_flags = 0;
571 ext3_set_inode_state(inode, EXT3_STATE_NEW); 571 ext3_set_inode_state(inode, EXT3_STATE_NEW);
572 572
573 ei->i_extra_isize = 573 /* See comment in ext3_iget for explanation */
574 (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ? 574 if (ino >= EXT3_FIRST_INO(sb) + 1 &&
575 sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0; 575 EXT3_INODE_SIZE(sb) > EXT3_GOOD_OLD_INODE_SIZE) {
576 ei->i_extra_isize =
577 sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE;
578 } else {
579 ei->i_extra_isize = 0;
580 }
576 581
577 ret = inode; 582 ret = inode;
578 dquot_initialize(inode); 583 dquot_initialize(inode);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index ad05353040a1..a9580617edd2 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -498,7 +498,7 @@ static ext3_fsblk_t ext3_find_goal(struct inode *inode, long block,
498} 498}
499 499
500/** 500/**
501 * ext3_blks_to_allocate: Look up the block map and count the number 501 * ext3_blks_to_allocate - Look up the block map and count the number
502 * of direct blocks need to be allocated for the given branch. 502 * of direct blocks need to be allocated for the given branch.
503 * 503 *
504 * @branch: chain of indirect blocks 504 * @branch: chain of indirect blocks
@@ -536,14 +536,18 @@ static int ext3_blks_to_allocate(Indirect *branch, int k, unsigned long blks,
536} 536}
537 537
538/** 538/**
539 * ext3_alloc_blocks: multiple allocate blocks needed for a branch 539 * ext3_alloc_blocks - multiple allocate blocks needed for a branch
540 * @handle: handle for this transaction
541 * @inode: owner
542 * @goal: preferred place for allocation
540 * @indirect_blks: the number of blocks need to allocate for indirect 543 * @indirect_blks: the number of blocks need to allocate for indirect
541 * blocks 544 * blocks
542 * 545 * @blks: number of blocks need to allocated for direct blocks
543 * @new_blocks: on return it will store the new block numbers for 546 * @new_blocks: on return it will store the new block numbers for
544 * the indirect blocks(if needed) and the first direct block, 547 * the indirect blocks(if needed) and the first direct block,
545 * @blks: on return it will store the total number of allocated 548 * @err: here we store the error value
546 * direct blocks 549 *
550 * return the number of direct blocks allocated
547 */ 551 */
548static int ext3_alloc_blocks(handle_t *handle, struct inode *inode, 552static int ext3_alloc_blocks(handle_t *handle, struct inode *inode,
549 ext3_fsblk_t goal, int indirect_blks, int blks, 553 ext3_fsblk_t goal, int indirect_blks, int blks,
@@ -598,9 +602,11 @@ failed_out:
598 602
599/** 603/**
600 * ext3_alloc_branch - allocate and set up a chain of blocks. 604 * ext3_alloc_branch - allocate and set up a chain of blocks.
605 * @handle: handle for this transaction
601 * @inode: owner 606 * @inode: owner
602 * @indirect_blks: number of allocated indirect blocks 607 * @indirect_blks: number of allocated indirect blocks
603 * @blks: number of allocated direct blocks 608 * @blks: number of allocated direct blocks
609 * @goal: preferred place for allocation
604 * @offsets: offsets (in the blocks) to store the pointers to next. 610 * @offsets: offsets (in the blocks) to store the pointers to next.
605 * @branch: place to store the chain in. 611 * @branch: place to store the chain in.
606 * 612 *
@@ -700,10 +706,9 @@ failed:
700 706
701/** 707/**
702 * ext3_splice_branch - splice the allocated branch onto inode. 708 * ext3_splice_branch - splice the allocated branch onto inode.
709 * @handle: handle for this transaction
703 * @inode: owner 710 * @inode: owner
704 * @block: (logical) number of block we are adding 711 * @block: (logical) number of block we are adding
705 * @chain: chain of indirect blocks (with a missing link - see
706 * ext3_alloc_branch)
707 * @where: location of missing link 712 * @where: location of missing link
708 * @num: number of indirect blocks we are adding 713 * @num: number of indirect blocks we are adding
709 * @blks: number of direct blocks we are adding 714 * @blks: number of direct blocks we are adding
@@ -2530,7 +2535,6 @@ void ext3_truncate(struct inode *inode)
2530 */ 2535 */
2531 } else { 2536 } else {
2532 /* Shared branch grows from an indirect block */ 2537 /* Shared branch grows from an indirect block */
2533 BUFFER_TRACE(partial->bh, "get_write_access");
2534 ext3_free_branches(handle, inode, partial->bh, 2538 ext3_free_branches(handle, inode, partial->bh,
2535 partial->p, 2539 partial->p,
2536 partial->p+1, (chain+n-1) - partial); 2540 partial->p+1, (chain+n-1) - partial);
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 0ccd7b12b73c..e746d30b1232 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -977,7 +977,8 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
977 o_blocks_count = le32_to_cpu(es->s_blocks_count); 977 o_blocks_count = le32_to_cpu(es->s_blocks_count);
978 978
979 if (test_opt(sb, DEBUG)) 979 if (test_opt(sb, DEBUG))
980 printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK" uto "E3FSBLK" blocks\n", 980 printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK
981 " upto "E3FSBLK" blocks\n",
981 o_blocks_count, n_blocks_count); 982 o_blocks_count, n_blocks_count);
982 983
983 if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) 984 if (n_blocks_count == 0 || n_blocks_count == o_blocks_count)
@@ -985,7 +986,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
985 986
986 if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { 987 if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
987 printk(KERN_ERR "EXT3-fs: filesystem on %s:" 988 printk(KERN_ERR "EXT3-fs: filesystem on %s:"
988 " too large to resize to %lu blocks safely\n", 989 " too large to resize to "E3FSBLK" blocks safely\n",
989 sb->s_id, n_blocks_count); 990 sb->s_id, n_blocks_count);
990 if (sizeof(sector_t) < 8) 991 if (sizeof(sector_t) < 8)
991 ext3_warning(sb, __func__, 992 ext3_warning(sb, __func__,
@@ -1065,11 +1066,11 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
1065 es->s_blocks_count = cpu_to_le32(o_blocks_count + add); 1066 es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
1066 ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); 1067 ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
1067 mutex_unlock(&EXT3_SB(sb)->s_resize_lock); 1068 mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
1068 ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count, 1069 ext3_debug("freeing blocks "E3FSBLK" through "E3FSBLK"\n",
1069 o_blocks_count + add); 1070 o_blocks_count, o_blocks_count + add);
1070 ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks); 1071 ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
1071 ext3_debug("freed blocks "E3FSBLK" through "E3FSBLK"\n", o_blocks_count, 1072 ext3_debug("freed blocks "E3FSBLK" through "E3FSBLK"\n",
1072 o_blocks_count + add); 1073 o_blocks_count, o_blocks_count + add);
1073 if ((err = ext3_journal_stop(handle))) 1074 if ((err = ext3_journal_stop(handle)))
1074 goto exit_put; 1075 goto exit_put;
1075 if (test_opt(sb, DEBUG)) 1076 if (test_opt(sb, DEBUG))
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 377768009106..2fedaf8b5012 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1301,9 +1301,9 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
1301 ext3_msg(sb, KERN_WARNING, 1301 ext3_msg(sb, KERN_WARNING,
1302 "warning: mounting fs with errors, " 1302 "warning: mounting fs with errors, "
1303 "running e2fsck is recommended"); 1303 "running e2fsck is recommended");
1304 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && 1304 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
1305 le16_to_cpu(es->s_mnt_count) >= 1305 le16_to_cpu(es->s_mnt_count) >=
1306 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 1306 le16_to_cpu(es->s_max_mnt_count))
1307 ext3_msg(sb, KERN_WARNING, 1307 ext3_msg(sb, KERN_WARNING,
1308 "warning: maximal mount count reached, " 1308 "warning: maximal mount count reached, "
1309 "running e2fsck is recommended"); 1309 "running e2fsck is recommended");
@@ -1320,7 +1320,7 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
1320 valid forever! :) */ 1320 valid forever! :) */
1321 es->s_state &= cpu_to_le16(~EXT3_VALID_FS); 1321 es->s_state &= cpu_to_le16(~EXT3_VALID_FS);
1322#endif 1322#endif
1323 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 1323 if (!le16_to_cpu(es->s_max_mnt_count))
1324 es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT); 1324 es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT);
1325 le16_add_cpu(&es->s_mnt_count, 1); 1325 le16_add_cpu(&es->s_mnt_count, 1);
1326 es->s_mtime = cpu_to_le32(get_seconds()); 1326 es->s_mtime = cpu_to_le32(get_seconds());
@@ -1647,7 +1647,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1647 * Note: s_es must be initialized as soon as possible because 1647 * Note: s_es must be initialized as soon as possible because
1648 * some ext3 macro-instructions depend on its value 1648 * some ext3 macro-instructions depend on its value
1649 */ 1649 */
1650 es = (struct ext3_super_block *) (((char *)bh->b_data) + offset); 1650 es = (struct ext3_super_block *) (bh->b_data + offset);
1651 sbi->s_es = es; 1651 sbi->s_es = es;
1652 sb->s_magic = le16_to_cpu(es->s_magic); 1652 sb->s_magic = le16_to_cpu(es->s_magic);
1653 if (sb->s_magic != EXT3_SUPER_MAGIC) 1653 if (sb->s_magic != EXT3_SUPER_MAGIC)
@@ -1758,7 +1758,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1758 "error: can't read superblock on 2nd try"); 1758 "error: can't read superblock on 2nd try");
1759 goto failed_mount; 1759 goto failed_mount;
1760 } 1760 }
1761 es = (struct ext3_super_block *)(((char *)bh->b_data) + offset); 1761 es = (struct ext3_super_block *)(bh->b_data + offset);
1762 sbi->s_es = es; 1762 sbi->s_es = es;
1763 if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) { 1763 if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) {
1764 ext3_msg(sb, KERN_ERR, 1764 ext3_msg(sb, KERN_ERR,
@@ -1857,13 +1857,13 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1857 sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) - 1857 sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
1858 le32_to_cpu(es->s_first_data_block) - 1) 1858 le32_to_cpu(es->s_first_data_block) - 1)
1859 / EXT3_BLOCKS_PER_GROUP(sb)) + 1; 1859 / EXT3_BLOCKS_PER_GROUP(sb)) + 1;
1860 db_count = (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) / 1860 db_count = DIV_ROUND_UP(sbi->s_groups_count, EXT3_DESC_PER_BLOCK(sb));
1861 EXT3_DESC_PER_BLOCK(sb);
1862 sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *), 1861 sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
1863 GFP_KERNEL); 1862 GFP_KERNEL);
1864 if (sbi->s_group_desc == NULL) { 1863 if (sbi->s_group_desc == NULL) {
1865 ext3_msg(sb, KERN_ERR, 1864 ext3_msg(sb, KERN_ERR,
1866 "error: not enough memory"); 1865 "error: not enough memory");
1866 ret = -ENOMEM;
1867 goto failed_mount; 1867 goto failed_mount;
1868 } 1868 }
1869 1869
@@ -1951,6 +1951,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1951 } 1951 }
1952 if (err) { 1952 if (err) {
1953 ext3_msg(sb, KERN_ERR, "error: insufficient memory"); 1953 ext3_msg(sb, KERN_ERR, "error: insufficient memory");
1954 ret = err;
1954 goto failed_mount3; 1955 goto failed_mount3;
1955 } 1956 }
1956 1957
@@ -2159,7 +2160,7 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
2159 goto out_bdev; 2160 goto out_bdev;
2160 } 2161 }
2161 2162
2162 es = (struct ext3_super_block *) (((char *)bh->b_data) + offset); 2163 es = (struct ext3_super_block *) (bh->b_data + offset);
2163 if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) || 2164 if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) ||
2164 !(le32_to_cpu(es->s_feature_incompat) & 2165 !(le32_to_cpu(es->s_feature_incompat) &
2165 EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) { 2166 EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) {
@@ -2352,6 +2353,21 @@ static int ext3_commit_super(struct super_block *sb,
2352 2353
2353 if (!sbh) 2354 if (!sbh)
2354 return error; 2355 return error;
2356
2357 if (buffer_write_io_error(sbh)) {
2358 /*
2359 * Oh, dear. A previous attempt to write the
2360 * superblock failed. This could happen because the
2361 * USB device was yanked out. Or it could happen to
2362 * be a transient write error and maybe the block will
2363 * be remapped. Nothing we can do but to retry the
2364 * write and hope for the best.
2365 */
2366 ext3_msg(sb, KERN_ERR, "previous I/O error to "
2367 "superblock detected");
2368 clear_buffer_write_io_error(sbh);
2369 set_buffer_uptodate(sbh);
2370 }
2355 /* 2371 /*
2356 * If the file system is mounted read-only, don't update the 2372 * If the file system is mounted read-only, don't update the
2357 * superblock write time. This avoids updating the superblock 2373 * superblock write time. This avoids updating the superblock
@@ -2368,8 +2384,15 @@ static int ext3_commit_super(struct super_block *sb,
2368 es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb)); 2384 es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb));
2369 BUFFER_TRACE(sbh, "marking dirty"); 2385 BUFFER_TRACE(sbh, "marking dirty");
2370 mark_buffer_dirty(sbh); 2386 mark_buffer_dirty(sbh);
2371 if (sync) 2387 if (sync) {
2372 error = sync_dirty_buffer(sbh); 2388 error = sync_dirty_buffer(sbh);
2389 if (buffer_write_io_error(sbh)) {
2390 ext3_msg(sb, KERN_ERR, "I/O error while writing "
2391 "superblock");
2392 clear_buffer_write_io_error(sbh);
2393 set_buffer_uptodate(sbh);
2394 }
2395 }
2373 return error; 2396 return error;
2374} 2397}
2375 2398
@@ -2997,16 +3020,16 @@ out:
2997 3020
2998#endif 3021#endif
2999 3022
3000static int ext3_get_sb(struct file_system_type *fs_type, 3023static struct dentry *ext3_mount(struct file_system_type *fs_type,
3001 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 3024 int flags, const char *dev_name, void *data)
3002{ 3025{
3003 return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super, mnt); 3026 return mount_bdev(fs_type, flags, dev_name, data, ext3_fill_super);
3004} 3027}
3005 3028
3006static struct file_system_type ext3_fs_type = { 3029static struct file_system_type ext3_fs_type = {
3007 .owner = THIS_MODULE, 3030 .owner = THIS_MODULE,
3008 .name = "ext3", 3031 .name = "ext3",
3009 .get_sb = ext3_get_sb, 3032 .mount = ext3_mount,
3010 .kill_sb = kill_block_super, 3033 .kill_sb = kill_block_super,
3011 .fs_flags = FS_REQUIRES_DEV, 3034 .fs_flags = FS_REQUIRES_DEV,
3012}; 3035};
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 8867b2a1e5fe..c947e36eda6c 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -4,7 +4,7 @@
4 4
5obj-$(CONFIG_EXT4_FS) += ext4.o 5obj-$(CONFIG_EXT4_FS) += ext4.o
6 6
7ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ 7ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \
8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ 8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
9 ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o 9 ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o
10 10
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index bd30799a43ed..14c3af26c671 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -171,7 +171,8 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
171 * less than the blocksize * 8 ( which is the size 171 * less than the blocksize * 8 ( which is the size
172 * of bitmap ), set rest of the block bitmap to 1 172 * of bitmap ), set rest of the block bitmap to 1
173 */ 173 */
174 mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data); 174 ext4_mark_bitmap_end(group_blocks, sb->s_blocksize * 8,
175 bh->b_data);
175 } 176 }
176 return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp); 177 return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp);
177} 178}
@@ -489,7 +490,7 @@ error_return:
489 * Check if filesystem has nblocks free & available for allocation. 490 * Check if filesystem has nblocks free & available for allocation.
490 * On success return 1, return 0 on failure. 491 * On success return 1, return 0 on failure.
491 */ 492 */
492int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) 493static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
493{ 494{
494 s64 free_blocks, dirty_blocks, root_blocks; 495 s64 free_blocks, dirty_blocks, root_blocks;
495 struct percpu_counter *fbc = &sbi->s_freeblocks_counter; 496 struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 3db5084db9bd..fac90f3fba80 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -29,16 +29,15 @@ struct ext4_system_zone {
29 29
30static struct kmem_cache *ext4_system_zone_cachep; 30static struct kmem_cache *ext4_system_zone_cachep;
31 31
32int __init init_ext4_system_zone(void) 32int __init ext4_init_system_zone(void)
33{ 33{
34 ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, 34 ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, 0);
35 SLAB_RECLAIM_ACCOUNT);
36 if (ext4_system_zone_cachep == NULL) 35 if (ext4_system_zone_cachep == NULL)
37 return -ENOMEM; 36 return -ENOMEM;
38 return 0; 37 return 0;
39} 38}
40 39
41void exit_ext4_system_zone(void) 40void ext4_exit_system_zone(void)
42{ 41{
43 kmem_cache_destroy(ext4_system_zone_cachep); 42 kmem_cache_destroy(ext4_system_zone_cachep);
44} 43}
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 374510f72baa..ece76fb6a40c 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -39,7 +39,7 @@ static int ext4_release_dir(struct inode *inode,
39 struct file *filp); 39 struct file *filp);
40 40
41const struct file_operations ext4_dir_operations = { 41const struct file_operations ext4_dir_operations = {
42 .llseek = generic_file_llseek, 42 .llseek = ext4_llseek,
43 .read = generic_read_dir, 43 .read = generic_read_dir,
44 .readdir = ext4_readdir, /* we take BKL. needed?*/ 44 .readdir = ext4_readdir, /* we take BKL. needed?*/
45 .unlocked_ioctl = ext4_ioctl, 45 .unlocked_ioctl = ext4_ioctl,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 889ec9d5e6ad..8b5dd6369f82 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -168,7 +168,20 @@ struct mpage_da_data {
168 int pages_written; 168 int pages_written;
169 int retval; 169 int retval;
170}; 170};
171#define EXT4_IO_UNWRITTEN 0x1 171
172/*
173 * Flags for ext4_io_end->flags
174 */
175#define EXT4_IO_END_UNWRITTEN 0x0001
176#define EXT4_IO_END_ERROR 0x0002
177
178struct ext4_io_page {
179 struct page *p_page;
180 int p_count;
181};
182
183#define MAX_IO_PAGES 128
184
172typedef struct ext4_io_end { 185typedef struct ext4_io_end {
173 struct list_head list; /* per-file finished IO list */ 186 struct list_head list; /* per-file finished IO list */
174 struct inode *inode; /* file being written to */ 187 struct inode *inode; /* file being written to */
@@ -179,8 +192,18 @@ typedef struct ext4_io_end {
179 struct work_struct work; /* data work queue */ 192 struct work_struct work; /* data work queue */
180 struct kiocb *iocb; /* iocb struct for AIO */ 193 struct kiocb *iocb; /* iocb struct for AIO */
181 int result; /* error value for AIO */ 194 int result; /* error value for AIO */
195 int num_io_pages;
196 struct ext4_io_page *pages[MAX_IO_PAGES];
182} ext4_io_end_t; 197} ext4_io_end_t;
183 198
199struct ext4_io_submit {
200 int io_op;
201 struct bio *io_bio;
202 ext4_io_end_t *io_end;
203 struct ext4_io_page *io_page;
204 sector_t io_next_block;
205};
206
184/* 207/*
185 * Special inodes numbers 208 * Special inodes numbers
186 */ 209 */
@@ -205,6 +228,7 @@ typedef struct ext4_io_end {
205#define EXT4_MIN_BLOCK_SIZE 1024 228#define EXT4_MIN_BLOCK_SIZE 1024
206#define EXT4_MAX_BLOCK_SIZE 65536 229#define EXT4_MAX_BLOCK_SIZE 65536
207#define EXT4_MIN_BLOCK_LOG_SIZE 10 230#define EXT4_MIN_BLOCK_LOG_SIZE 10
231#define EXT4_MAX_BLOCK_LOG_SIZE 16
208#ifdef __KERNEL__ 232#ifdef __KERNEL__
209# define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize) 233# define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize)
210#else 234#else
@@ -889,6 +913,7 @@ struct ext4_inode_info {
889#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ 913#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
890#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ 914#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
891#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ 915#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */
916#define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */
892 917
893#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt 918#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
894#define set_opt(o, opt) o |= EXT4_MOUNT_##opt 919#define set_opt(o, opt) o |= EXT4_MOUNT_##opt
@@ -1087,7 +1112,6 @@ struct ext4_sb_info {
1087 struct completion s_kobj_unregister; 1112 struct completion s_kobj_unregister;
1088 1113
1089 /* Journaling */ 1114 /* Journaling */
1090 struct inode *s_journal_inode;
1091 struct journal_s *s_journal; 1115 struct journal_s *s_journal;
1092 struct list_head s_orphan; 1116 struct list_head s_orphan;
1093 struct mutex s_orphan_lock; 1117 struct mutex s_orphan_lock;
@@ -1120,10 +1144,7 @@ struct ext4_sb_info {
1120 /* for buddy allocator */ 1144 /* for buddy allocator */
1121 struct ext4_group_info ***s_group_info; 1145 struct ext4_group_info ***s_group_info;
1122 struct inode *s_buddy_cache; 1146 struct inode *s_buddy_cache;
1123 long s_blocks_reserved;
1124 spinlock_t s_reserve_lock;
1125 spinlock_t s_md_lock; 1147 spinlock_t s_md_lock;
1126 tid_t s_last_transaction;
1127 unsigned short *s_mb_offsets; 1148 unsigned short *s_mb_offsets;
1128 unsigned int *s_mb_maxs; 1149 unsigned int *s_mb_maxs;
1129 1150
@@ -1141,7 +1162,6 @@ struct ext4_sb_info {
1141 unsigned long s_mb_last_start; 1162 unsigned long s_mb_last_start;
1142 1163
1143 /* stats for buddy allocator */ 1164 /* stats for buddy allocator */
1144 spinlock_t s_mb_pa_lock;
1145 atomic_t s_bal_reqs; /* number of reqs with len > 1 */ 1165 atomic_t s_bal_reqs; /* number of reqs with len > 1 */
1146 atomic_t s_bal_success; /* we found long enough chunks */ 1166 atomic_t s_bal_success; /* we found long enough chunks */
1147 atomic_t s_bal_allocated; /* in blocks */ 1167 atomic_t s_bal_allocated; /* in blocks */
@@ -1172,6 +1192,11 @@ struct ext4_sb_info {
1172 1192
1173 /* timer for periodic error stats printing */ 1193 /* timer for periodic error stats printing */
1174 struct timer_list s_err_report; 1194 struct timer_list s_err_report;
1195
1196 /* Lazy inode table initialization info */
1197 struct ext4_li_request *s_li_request;
1198 /* Wait multiplier for lazy initialization thread */
1199 unsigned int s_li_wait_mult;
1175}; 1200};
1176 1201
1177static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) 1202static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -1533,7 +1558,42 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
1533void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, 1558void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
1534 ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp); 1559 ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp);
1535 1560
1536extern struct proc_dir_entry *ext4_proc_root; 1561/*
1562 * Timeout and state flag for lazy initialization inode thread.
1563 */
1564#define EXT4_DEF_LI_WAIT_MULT 10
1565#define EXT4_DEF_LI_MAX_START_DELAY 5
1566#define EXT4_LAZYINIT_QUIT 0x0001
1567#define EXT4_LAZYINIT_RUNNING 0x0002
1568
1569/*
1570 * Lazy inode table initialization info
1571 */
1572struct ext4_lazy_init {
1573 unsigned long li_state;
1574
1575 wait_queue_head_t li_wait_daemon;
1576 wait_queue_head_t li_wait_task;
1577 struct timer_list li_timer;
1578 struct task_struct *li_task;
1579
1580 struct list_head li_request_list;
1581 struct mutex li_list_mtx;
1582};
1583
1584struct ext4_li_request {
1585 struct super_block *lr_super;
1586 struct ext4_sb_info *lr_sbi;
1587 ext4_group_t lr_next_group;
1588 struct list_head lr_request;
1589 unsigned long lr_next_sched;
1590 unsigned long lr_timeout;
1591};
1592
1593struct ext4_features {
1594 struct kobject f_kobj;
1595 struct completion f_kobj_unregister;
1596};
1537 1597
1538/* 1598/*
1539 * Function prototypes 1599 * Function prototypes
@@ -1561,7 +1621,6 @@ extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
1561extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, 1621extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
1562 ext4_fsblk_t goal, unsigned long *count, int *errp); 1622 ext4_fsblk_t goal, unsigned long *count, int *errp);
1563extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); 1623extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
1564extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
1565extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, 1624extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
1566 ext4_fsblk_t block, unsigned long count); 1625 ext4_fsblk_t block, unsigned long count);
1567extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); 1626extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
@@ -1605,11 +1664,9 @@ extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
1605extern unsigned long ext4_count_free_inodes(struct super_block *); 1664extern unsigned long ext4_count_free_inodes(struct super_block *);
1606extern unsigned long ext4_count_dirs(struct super_block *); 1665extern unsigned long ext4_count_dirs(struct super_block *);
1607extern void ext4_check_inodes_bitmap(struct super_block *); 1666extern void ext4_check_inodes_bitmap(struct super_block *);
1608extern unsigned ext4_init_inode_bitmap(struct super_block *sb, 1667extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
1609 struct buffer_head *bh, 1668extern int ext4_init_inode_table(struct super_block *sb,
1610 ext4_group_t group, 1669 ext4_group_t group, int barrier);
1611 struct ext4_group_desc *desc);
1612extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
1613 1670
1614/* mballoc.c */ 1671/* mballoc.c */
1615extern long ext4_mb_stats; 1672extern long ext4_mb_stats;
@@ -1620,16 +1677,15 @@ extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
1620 struct ext4_allocation_request *, int *); 1677 struct ext4_allocation_request *, int *);
1621extern int ext4_mb_reserve_blocks(struct super_block *, int); 1678extern int ext4_mb_reserve_blocks(struct super_block *, int);
1622extern void ext4_discard_preallocations(struct inode *); 1679extern void ext4_discard_preallocations(struct inode *);
1623extern int __init init_ext4_mballoc(void); 1680extern int __init ext4_init_mballoc(void);
1624extern void exit_ext4_mballoc(void); 1681extern void ext4_exit_mballoc(void);
1625extern void ext4_free_blocks(handle_t *handle, struct inode *inode, 1682extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
1626 struct buffer_head *bh, ext4_fsblk_t block, 1683 struct buffer_head *bh, ext4_fsblk_t block,
1627 unsigned long count, int flags); 1684 unsigned long count, int flags);
1628extern int ext4_mb_add_groupinfo(struct super_block *sb, 1685extern int ext4_mb_add_groupinfo(struct super_block *sb,
1629 ext4_group_t i, struct ext4_group_desc *desc); 1686 ext4_group_t i, struct ext4_group_desc *desc);
1630extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t); 1687extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
1631extern void ext4_mb_put_buddy_cache_lock(struct super_block *, 1688
1632 ext4_group_t, int);
1633/* inode.c */ 1689/* inode.c */
1634struct buffer_head *ext4_getblk(handle_t *, struct inode *, 1690struct buffer_head *ext4_getblk(handle_t *, struct inode *,
1635 ext4_lblk_t, int, int *); 1691 ext4_lblk_t, int, int *);
@@ -1657,13 +1713,11 @@ extern void ext4_get_inode_flags(struct ext4_inode_info *);
1657extern int ext4_alloc_da_blocks(struct inode *inode); 1713extern int ext4_alloc_da_blocks(struct inode *inode);
1658extern void ext4_set_aops(struct inode *inode); 1714extern void ext4_set_aops(struct inode *inode);
1659extern int ext4_writepage_trans_blocks(struct inode *); 1715extern int ext4_writepage_trans_blocks(struct inode *);
1660extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
1661extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); 1716extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
1662extern int ext4_block_truncate_page(handle_t *handle, 1717extern int ext4_block_truncate_page(handle_t *handle,
1663 struct address_space *mapping, loff_t from); 1718 struct address_space *mapping, loff_t from);
1664extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 1719extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
1665extern qsize_t *ext4_get_reserved_space(struct inode *inode); 1720extern qsize_t *ext4_get_reserved_space(struct inode *inode);
1666extern int flush_completed_IO(struct inode *inode);
1667extern void ext4_da_update_reserve_space(struct inode *inode, 1721extern void ext4_da_update_reserve_space(struct inode *inode,
1668 int used, int quota_claim); 1722 int used, int quota_claim);
1669/* ioctl.c */ 1723/* ioctl.c */
@@ -1960,6 +2014,7 @@ extern const struct file_operations ext4_dir_operations;
1960/* file.c */ 2014/* file.c */
1961extern const struct inode_operations ext4_file_inode_operations; 2015extern const struct inode_operations ext4_file_inode_operations;
1962extern const struct file_operations ext4_file_operations; 2016extern const struct file_operations ext4_file_operations;
2017extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
1963 2018
1964/* namei.c */ 2019/* namei.c */
1965extern const struct inode_operations ext4_dir_inode_operations; 2020extern const struct inode_operations ext4_dir_inode_operations;
@@ -1973,8 +2028,8 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations;
1973/* block_validity */ 2028/* block_validity */
1974extern void ext4_release_system_zone(struct super_block *sb); 2029extern void ext4_release_system_zone(struct super_block *sb);
1975extern int ext4_setup_system_zone(struct super_block *sb); 2030extern int ext4_setup_system_zone(struct super_block *sb);
1976extern int __init init_ext4_system_zone(void); 2031extern int __init ext4_init_system_zone(void);
1977extern void exit_ext4_system_zone(void); 2032extern void ext4_exit_system_zone(void);
1978extern int ext4_data_block_valid(struct ext4_sb_info *sbi, 2033extern int ext4_data_block_valid(struct ext4_sb_info *sbi,
1979 ext4_fsblk_t start_blk, 2034 ext4_fsblk_t start_blk,
1980 unsigned int count); 2035 unsigned int count);
@@ -2002,6 +2057,17 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
2002 __u64 start_orig, __u64 start_donor, 2057 __u64 start_orig, __u64 start_donor,
2003 __u64 len, __u64 *moved_len); 2058 __u64 len, __u64 *moved_len);
2004 2059
2060/* page-io.c */
2061extern int __init ext4_init_pageio(void);
2062extern void ext4_exit_pageio(void);
2063extern void ext4_free_io_end(ext4_io_end_t *io);
2064extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
2065extern int ext4_end_io_nolock(ext4_io_end_t *io);
2066extern void ext4_io_submit(struct ext4_io_submit *io);
2067extern int ext4_bio_write_page(struct ext4_io_submit *io,
2068 struct page *page,
2069 int len,
2070 struct writeback_control *wbc);
2005 2071
2006/* BH_Uninit flag: blocks are allocated but uninitialized on disk */ 2072/* BH_Uninit flag: blocks are allocated but uninitialized on disk */
2007enum ext4_state_bits { 2073enum ext4_state_bits {
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index bdb6ce7e2eb4..28ce70fd9cd0 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -225,11 +225,60 @@ static inline void ext4_ext_mark_initialized(struct ext4_extent *ext)
225 ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); 225 ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext));
226} 226}
227 227
228/*
229 * ext4_ext_pblock:
230 * combine low and high parts of physical block number into ext4_fsblk_t
231 */
232static inline ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex)
233{
234 ext4_fsblk_t block;
235
236 block = le32_to_cpu(ex->ee_start_lo);
237 block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1;
238 return block;
239}
240
241/*
242 * ext4_idx_pblock:
243 * combine low and high parts of a leaf physical block number into ext4_fsblk_t
244 */
245static inline ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_idx *ix)
246{
247 ext4_fsblk_t block;
248
249 block = le32_to_cpu(ix->ei_leaf_lo);
250 block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
251 return block;
252}
253
254/*
255 * ext4_ext_store_pblock:
256 * stores a large physical block number into an extent struct,
257 * breaking it into parts
258 */
259static inline void ext4_ext_store_pblock(struct ext4_extent *ex,
260 ext4_fsblk_t pb)
261{
262 ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
263 ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) &
264 0xffff);
265}
266
267/*
268 * ext4_idx_store_pblock:
269 * stores a large physical block number into an index struct,
270 * breaking it into parts
271 */
272static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix,
273 ext4_fsblk_t pb)
274{
275 ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
276 ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) &
277 0xffff);
278}
279
228extern int ext4_ext_calc_metadata_amount(struct inode *inode, 280extern int ext4_ext_calc_metadata_amount(struct inode *inode,
229 sector_t lblocks); 281 sector_t lblocks);
230extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
231extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
232extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
233extern int ext4_extent_tree_init(handle_t *, struct inode *); 282extern int ext4_extent_tree_init(handle_t *, struct inode *);
234extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, 283extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
235 int num, 284 int num,
@@ -237,19 +286,9 @@ extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
237extern int ext4_can_extents_be_merged(struct inode *inode, 286extern int ext4_can_extents_be_merged(struct inode *inode,
238 struct ext4_extent *ex1, 287 struct ext4_extent *ex1,
239 struct ext4_extent *ex2); 288 struct ext4_extent *ex2);
240extern int ext4_ext_try_to_merge(struct inode *inode,
241 struct ext4_ext_path *path,
242 struct ext4_extent *);
243extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
244extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *, int); 289extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *, int);
245extern int ext4_ext_walk_space(struct inode *, ext4_lblk_t, ext4_lblk_t,
246 ext_prepare_callback, void *);
247extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, 290extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
248 struct ext4_ext_path *); 291 struct ext4_ext_path *);
249extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *,
250 ext4_lblk_t *, ext4_fsblk_t *);
251extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *,
252 ext4_lblk_t *, ext4_fsblk_t *);
253extern void ext4_ext_drop_refs(struct ext4_ext_path *); 292extern void ext4_ext_drop_refs(struct ext4_ext_path *);
254extern int ext4_ext_check_inode(struct inode *inode); 293extern int ext4_ext_check_inode(struct inode *inode);
255#endif /* _EXT4_EXTENTS */ 294#endif /* _EXT4_EXTENTS */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 06328d3e5717..0554c48cb1fd 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -44,55 +44,6 @@
44#include "ext4_jbd2.h" 44#include "ext4_jbd2.h"
45#include "ext4_extents.h" 45#include "ext4_extents.h"
46 46
47
48/*
49 * ext_pblock:
50 * combine low and high parts of physical block number into ext4_fsblk_t
51 */
52ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
53{
54 ext4_fsblk_t block;
55
56 block = le32_to_cpu(ex->ee_start_lo);
57 block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1;
58 return block;
59}
60
61/*
62 * idx_pblock:
63 * combine low and high parts of a leaf physical block number into ext4_fsblk_t
64 */
65ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
66{
67 ext4_fsblk_t block;
68
69 block = le32_to_cpu(ix->ei_leaf_lo);
70 block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
71 return block;
72}
73
74/*
75 * ext4_ext_store_pblock:
76 * stores a large physical block number into an extent struct,
77 * breaking it into parts
78 */
79void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
80{
81 ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
82 ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
83}
84
85/*
86 * ext4_idx_store_pblock:
87 * stores a large physical block number into an index struct,
88 * breaking it into parts
89 */
90static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
91{
92 ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
93 ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
94}
95
96static int ext4_ext_truncate_extend_restart(handle_t *handle, 47static int ext4_ext_truncate_extend_restart(handle_t *handle,
97 struct inode *inode, 48 struct inode *inode,
98 int needed) 49 int needed)
@@ -169,7 +120,8 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
169 /* try to predict block placement */ 120 /* try to predict block placement */
170 ex = path[depth].p_ext; 121 ex = path[depth].p_ext;
171 if (ex) 122 if (ex)
172 return ext_pblock(ex)+(block-le32_to_cpu(ex->ee_block)); 123 return (ext4_ext_pblock(ex) +
124 (block - le32_to_cpu(ex->ee_block)));
173 125
174 /* it looks like index is empty; 126 /* it looks like index is empty;
175 * try to find starting block from index itself */ 127 * try to find starting block from index itself */
@@ -354,7 +306,7 @@ ext4_ext_max_entries(struct inode *inode, int depth)
354 306
355static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) 307static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
356{ 308{
357 ext4_fsblk_t block = ext_pblock(ext); 309 ext4_fsblk_t block = ext4_ext_pblock(ext);
358 int len = ext4_ext_get_actual_len(ext); 310 int len = ext4_ext_get_actual_len(ext);
359 311
360 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); 312 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
@@ -363,7 +315,7 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
363static int ext4_valid_extent_idx(struct inode *inode, 315static int ext4_valid_extent_idx(struct inode *inode,
364 struct ext4_extent_idx *ext_idx) 316 struct ext4_extent_idx *ext_idx)
365{ 317{
366 ext4_fsblk_t block = idx_pblock(ext_idx); 318 ext4_fsblk_t block = ext4_idx_pblock(ext_idx);
367 319
368 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1); 320 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1);
369} 321}
@@ -463,13 +415,13 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
463 for (k = 0; k <= l; k++, path++) { 415 for (k = 0; k <= l; k++, path++) {
464 if (path->p_idx) { 416 if (path->p_idx) {
465 ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), 417 ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block),
466 idx_pblock(path->p_idx)); 418 ext4_idx_pblock(path->p_idx));
467 } else if (path->p_ext) { 419 } else if (path->p_ext) {
468 ext_debug(" %d:[%d]%d:%llu ", 420 ext_debug(" %d:[%d]%d:%llu ",
469 le32_to_cpu(path->p_ext->ee_block), 421 le32_to_cpu(path->p_ext->ee_block),
470 ext4_ext_is_uninitialized(path->p_ext), 422 ext4_ext_is_uninitialized(path->p_ext),
471 ext4_ext_get_actual_len(path->p_ext), 423 ext4_ext_get_actual_len(path->p_ext),
472 ext_pblock(path->p_ext)); 424 ext4_ext_pblock(path->p_ext));
473 } else 425 } else
474 ext_debug(" []"); 426 ext_debug(" []");
475 } 427 }
@@ -494,7 +446,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
494 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { 446 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
495 ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), 447 ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
496 ext4_ext_is_uninitialized(ex), 448 ext4_ext_is_uninitialized(ex),
497 ext4_ext_get_actual_len(ex), ext_pblock(ex)); 449 ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex));
498 } 450 }
499 ext_debug("\n"); 451 ext_debug("\n");
500} 452}
@@ -545,7 +497,7 @@ ext4_ext_binsearch_idx(struct inode *inode,
545 497
546 path->p_idx = l - 1; 498 path->p_idx = l - 1;
547 ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block), 499 ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block),
548 idx_pblock(path->p_idx)); 500 ext4_idx_pblock(path->p_idx));
549 501
550#ifdef CHECK_BINSEARCH 502#ifdef CHECK_BINSEARCH
551 { 503 {
@@ -614,7 +566,7 @@ ext4_ext_binsearch(struct inode *inode,
614 path->p_ext = l - 1; 566 path->p_ext = l - 1;
615 ext_debug(" -> %d:%llu:[%d]%d ", 567 ext_debug(" -> %d:%llu:[%d]%d ",
616 le32_to_cpu(path->p_ext->ee_block), 568 le32_to_cpu(path->p_ext->ee_block),
617 ext_pblock(path->p_ext), 569 ext4_ext_pblock(path->p_ext),
618 ext4_ext_is_uninitialized(path->p_ext), 570 ext4_ext_is_uninitialized(path->p_ext),
619 ext4_ext_get_actual_len(path->p_ext)); 571 ext4_ext_get_actual_len(path->p_ext));
620 572
@@ -682,7 +634,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
682 ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); 634 ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
683 635
684 ext4_ext_binsearch_idx(inode, path + ppos, block); 636 ext4_ext_binsearch_idx(inode, path + ppos, block);
685 path[ppos].p_block = idx_pblock(path[ppos].p_idx); 637 path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
686 path[ppos].p_depth = i; 638 path[ppos].p_depth = i;
687 path[ppos].p_ext = NULL; 639 path[ppos].p_ext = NULL;
688 640
@@ -721,7 +673,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
721 ext4_ext_binsearch(inode, path + ppos, block); 673 ext4_ext_binsearch(inode, path + ppos, block);
722 /* if not an empty leaf */ 674 /* if not an empty leaf */
723 if (path[ppos].p_ext) 675 if (path[ppos].p_ext)
724 path[ppos].p_block = ext_pblock(path[ppos].p_ext); 676 path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
725 677
726 ext4_ext_show_path(inode, path); 678 ext4_ext_show_path(inode, path);
727 679
@@ -739,9 +691,9 @@ err:
739 * insert new index [@logical;@ptr] into the block at @curp; 691 * insert new index [@logical;@ptr] into the block at @curp;
740 * check where to insert: before @curp or after @curp 692 * check where to insert: before @curp or after @curp
741 */ 693 */
742int ext4_ext_insert_index(handle_t *handle, struct inode *inode, 694static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
743 struct ext4_ext_path *curp, 695 struct ext4_ext_path *curp,
744 int logical, ext4_fsblk_t ptr) 696 int logical, ext4_fsblk_t ptr)
745{ 697{
746 struct ext4_extent_idx *ix; 698 struct ext4_extent_idx *ix;
747 int len, err; 699 int len, err;
@@ -917,7 +869,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
917 EXT_MAX_EXTENT(path[depth].p_hdr)) { 869 EXT_MAX_EXTENT(path[depth].p_hdr)) {
918 ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", 870 ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n",
919 le32_to_cpu(path[depth].p_ext->ee_block), 871 le32_to_cpu(path[depth].p_ext->ee_block),
920 ext_pblock(path[depth].p_ext), 872 ext4_ext_pblock(path[depth].p_ext),
921 ext4_ext_is_uninitialized(path[depth].p_ext), 873 ext4_ext_is_uninitialized(path[depth].p_ext),
922 ext4_ext_get_actual_len(path[depth].p_ext), 874 ext4_ext_get_actual_len(path[depth].p_ext),
923 newblock); 875 newblock);
@@ -1007,7 +959,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
1007 while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { 959 while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
1008 ext_debug("%d: move %d:%llu in new index %llu\n", i, 960 ext_debug("%d: move %d:%llu in new index %llu\n", i,
1009 le32_to_cpu(path[i].p_idx->ei_block), 961 le32_to_cpu(path[i].p_idx->ei_block),
1010 idx_pblock(path[i].p_idx), 962 ext4_idx_pblock(path[i].p_idx),
1011 newblock); 963 newblock);
1012 /*memmove(++fidx, path[i].p_idx++, 964 /*memmove(++fidx, path[i].p_idx++,
1013 sizeof(struct ext4_extent_idx)); 965 sizeof(struct ext4_extent_idx));
@@ -1146,7 +1098,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1146 ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", 1098 ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
1147 le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), 1099 le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
1148 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), 1100 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
1149 idx_pblock(EXT_FIRST_INDEX(neh))); 1101 ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
1150 1102
1151 neh->eh_depth = cpu_to_le16(path->p_depth + 1); 1103 neh->eh_depth = cpu_to_le16(path->p_depth + 1);
1152 err = ext4_ext_dirty(handle, inode, curp); 1104 err = ext4_ext_dirty(handle, inode, curp);
@@ -1232,9 +1184,9 @@ out:
1232 * returns 0 at @phys 1184 * returns 0 at @phys
1233 * return value contains 0 (success) or error code 1185 * return value contains 0 (success) or error code
1234 */ 1186 */
1235int 1187static int ext4_ext_search_left(struct inode *inode,
1236ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path, 1188 struct ext4_ext_path *path,
1237 ext4_lblk_t *logical, ext4_fsblk_t *phys) 1189 ext4_lblk_t *logical, ext4_fsblk_t *phys)
1238{ 1190{
1239 struct ext4_extent_idx *ix; 1191 struct ext4_extent_idx *ix;
1240 struct ext4_extent *ex; 1192 struct ext4_extent *ex;
@@ -1286,7 +1238,7 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
1286 } 1238 }
1287 1239
1288 *logical = le32_to_cpu(ex->ee_block) + ee_len - 1; 1240 *logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
1289 *phys = ext_pblock(ex) + ee_len - 1; 1241 *phys = ext4_ext_pblock(ex) + ee_len - 1;
1290 return 0; 1242 return 0;
1291} 1243}
1292 1244
@@ -1297,9 +1249,9 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
1297 * returns 0 at @phys 1249 * returns 0 at @phys
1298 * return value contains 0 (success) or error code 1250 * return value contains 0 (success) or error code
1299 */ 1251 */
1300int 1252static int ext4_ext_search_right(struct inode *inode,
1301ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path, 1253 struct ext4_ext_path *path,
1302 ext4_lblk_t *logical, ext4_fsblk_t *phys) 1254 ext4_lblk_t *logical, ext4_fsblk_t *phys)
1303{ 1255{
1304 struct buffer_head *bh = NULL; 1256 struct buffer_head *bh = NULL;
1305 struct ext4_extent_header *eh; 1257 struct ext4_extent_header *eh;
@@ -1342,7 +1294,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
1342 } 1294 }
1343 } 1295 }
1344 *logical = le32_to_cpu(ex->ee_block); 1296 *logical = le32_to_cpu(ex->ee_block);
1345 *phys = ext_pblock(ex); 1297 *phys = ext4_ext_pblock(ex);
1346 return 0; 1298 return 0;
1347 } 1299 }
1348 1300
@@ -1357,7 +1309,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
1357 /* next allocated block in this leaf */ 1309 /* next allocated block in this leaf */
1358 ex++; 1310 ex++;
1359 *logical = le32_to_cpu(ex->ee_block); 1311 *logical = le32_to_cpu(ex->ee_block);
1360 *phys = ext_pblock(ex); 1312 *phys = ext4_ext_pblock(ex);
1361 return 0; 1313 return 0;
1362 } 1314 }
1363 1315
@@ -1376,7 +1328,7 @@ got_index:
1376 * follow it and find the closest allocated 1328 * follow it and find the closest allocated
1377 * block to the right */ 1329 * block to the right */
1378 ix++; 1330 ix++;
1379 block = idx_pblock(ix); 1331 block = ext4_idx_pblock(ix);
1380 while (++depth < path->p_depth) { 1332 while (++depth < path->p_depth) {
1381 bh = sb_bread(inode->i_sb, block); 1333 bh = sb_bread(inode->i_sb, block);
1382 if (bh == NULL) 1334 if (bh == NULL)
@@ -1388,7 +1340,7 @@ got_index:
1388 return -EIO; 1340 return -EIO;
1389 } 1341 }
1390 ix = EXT_FIRST_INDEX(eh); 1342 ix = EXT_FIRST_INDEX(eh);
1391 block = idx_pblock(ix); 1343 block = ext4_idx_pblock(ix);
1392 put_bh(bh); 1344 put_bh(bh);
1393 } 1345 }
1394 1346
@@ -1402,7 +1354,7 @@ got_index:
1402 } 1354 }
1403 ex = EXT_FIRST_EXTENT(eh); 1355 ex = EXT_FIRST_EXTENT(eh);
1404 *logical = le32_to_cpu(ex->ee_block); 1356 *logical = le32_to_cpu(ex->ee_block);
1405 *phys = ext_pblock(ex); 1357 *phys = ext4_ext_pblock(ex);
1406 put_bh(bh); 1358 put_bh(bh);
1407 return 0; 1359 return 0;
1408} 1360}
@@ -1573,7 +1525,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
1573 return 0; 1525 return 0;
1574#endif 1526#endif
1575 1527
1576 if (ext_pblock(ex1) + ext1_ee_len == ext_pblock(ex2)) 1528 if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2))
1577 return 1; 1529 return 1;
1578 return 0; 1530 return 0;
1579} 1531}
@@ -1585,9 +1537,9 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
1585 * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns 1537 * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns
1586 * 1 if they got merged. 1538 * 1 if they got merged.
1587 */ 1539 */
1588int ext4_ext_try_to_merge(struct inode *inode, 1540static int ext4_ext_try_to_merge(struct inode *inode,
1589 struct ext4_ext_path *path, 1541 struct ext4_ext_path *path,
1590 struct ext4_extent *ex) 1542 struct ext4_extent *ex)
1591{ 1543{
1592 struct ext4_extent_header *eh; 1544 struct ext4_extent_header *eh;
1593 unsigned int depth, len; 1545 unsigned int depth, len;
@@ -1632,9 +1584,9 @@ int ext4_ext_try_to_merge(struct inode *inode,
1632 * such that there will be no overlap, and then returns 1. 1584 * such that there will be no overlap, and then returns 1.
1633 * If there is no overlap found, it returns 0. 1585 * If there is no overlap found, it returns 0.
1634 */ 1586 */
1635unsigned int ext4_ext_check_overlap(struct inode *inode, 1587static unsigned int ext4_ext_check_overlap(struct inode *inode,
1636 struct ext4_extent *newext, 1588 struct ext4_extent *newext,
1637 struct ext4_ext_path *path) 1589 struct ext4_ext_path *path)
1638{ 1590{
1639 ext4_lblk_t b1, b2; 1591 ext4_lblk_t b1, b2;
1640 unsigned int depth, len1; 1592 unsigned int depth, len1;
@@ -1706,11 +1658,12 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1706 if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO) 1658 if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)
1707 && ext4_can_extents_be_merged(inode, ex, newext)) { 1659 && ext4_can_extents_be_merged(inode, ex, newext)) {
1708 ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", 1660 ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n",
1709 ext4_ext_is_uninitialized(newext), 1661 ext4_ext_is_uninitialized(newext),
1710 ext4_ext_get_actual_len(newext), 1662 ext4_ext_get_actual_len(newext),
1711 le32_to_cpu(ex->ee_block), 1663 le32_to_cpu(ex->ee_block),
1712 ext4_ext_is_uninitialized(ex), 1664 ext4_ext_is_uninitialized(ex),
1713 ext4_ext_get_actual_len(ex), ext_pblock(ex)); 1665 ext4_ext_get_actual_len(ex),
1666 ext4_ext_pblock(ex));
1714 err = ext4_ext_get_access(handle, inode, path + depth); 1667 err = ext4_ext_get_access(handle, inode, path + depth);
1715 if (err) 1668 if (err)
1716 return err; 1669 return err;
@@ -1780,7 +1733,7 @@ has_space:
1780 /* there is no extent in this leaf, create first one */ 1733 /* there is no extent in this leaf, create first one */
1781 ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n", 1734 ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n",
1782 le32_to_cpu(newext->ee_block), 1735 le32_to_cpu(newext->ee_block),
1783 ext_pblock(newext), 1736 ext4_ext_pblock(newext),
1784 ext4_ext_is_uninitialized(newext), 1737 ext4_ext_is_uninitialized(newext),
1785 ext4_ext_get_actual_len(newext)); 1738 ext4_ext_get_actual_len(newext));
1786 path[depth].p_ext = EXT_FIRST_EXTENT(eh); 1739 path[depth].p_ext = EXT_FIRST_EXTENT(eh);
@@ -1794,7 +1747,7 @@ has_space:
1794 ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, " 1747 ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, "
1795 "move %d from 0x%p to 0x%p\n", 1748 "move %d from 0x%p to 0x%p\n",
1796 le32_to_cpu(newext->ee_block), 1749 le32_to_cpu(newext->ee_block),
1797 ext_pblock(newext), 1750 ext4_ext_pblock(newext),
1798 ext4_ext_is_uninitialized(newext), 1751 ext4_ext_is_uninitialized(newext),
1799 ext4_ext_get_actual_len(newext), 1752 ext4_ext_get_actual_len(newext),
1800 nearex, len, nearex + 1, nearex + 2); 1753 nearex, len, nearex + 1, nearex + 2);
@@ -1808,7 +1761,7 @@ has_space:
1808 ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, " 1761 ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, "
1809 "move %d from 0x%p to 0x%p\n", 1762 "move %d from 0x%p to 0x%p\n",
1810 le32_to_cpu(newext->ee_block), 1763 le32_to_cpu(newext->ee_block),
1811 ext_pblock(newext), 1764 ext4_ext_pblock(newext),
1812 ext4_ext_is_uninitialized(newext), 1765 ext4_ext_is_uninitialized(newext),
1813 ext4_ext_get_actual_len(newext), 1766 ext4_ext_get_actual_len(newext),
1814 nearex, len, nearex + 1, nearex + 2); 1767 nearex, len, nearex + 1, nearex + 2);
@@ -1819,7 +1772,7 @@ has_space:
1819 le16_add_cpu(&eh->eh_entries, 1); 1772 le16_add_cpu(&eh->eh_entries, 1);
1820 nearex = path[depth].p_ext; 1773 nearex = path[depth].p_ext;
1821 nearex->ee_block = newext->ee_block; 1774 nearex->ee_block = newext->ee_block;
1822 ext4_ext_store_pblock(nearex, ext_pblock(newext)); 1775 ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext));
1823 nearex->ee_len = newext->ee_len; 1776 nearex->ee_len = newext->ee_len;
1824 1777
1825merge: 1778merge:
@@ -1845,9 +1798,9 @@ cleanup:
1845 return err; 1798 return err;
1846} 1799}
1847 1800
1848int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, 1801static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
1849 ext4_lblk_t num, ext_prepare_callback func, 1802 ext4_lblk_t num, ext_prepare_callback func,
1850 void *cbdata) 1803 void *cbdata)
1851{ 1804{
1852 struct ext4_ext_path *path = NULL; 1805 struct ext4_ext_path *path = NULL;
1853 struct ext4_ext_cache cbex; 1806 struct ext4_ext_cache cbex;
@@ -1923,7 +1876,7 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
1923 } else { 1876 } else {
1924 cbex.ec_block = le32_to_cpu(ex->ee_block); 1877 cbex.ec_block = le32_to_cpu(ex->ee_block);
1925 cbex.ec_len = ext4_ext_get_actual_len(ex); 1878 cbex.ec_len = ext4_ext_get_actual_len(ex);
1926 cbex.ec_start = ext_pblock(ex); 1879 cbex.ec_start = ext4_ext_pblock(ex);
1927 cbex.ec_type = EXT4_EXT_CACHE_EXTENT; 1880 cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
1928 } 1881 }
1929 1882
@@ -2073,7 +2026,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
2073 2026
2074 /* free index block */ 2027 /* free index block */
2075 path--; 2028 path--;
2076 leaf = idx_pblock(path->p_idx); 2029 leaf = ext4_idx_pblock(path->p_idx);
2077 if (unlikely(path->p_hdr->eh_entries == 0)) { 2030 if (unlikely(path->p_hdr->eh_entries == 0)) {
2078 EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); 2031 EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
2079 return -EIO; 2032 return -EIO;
@@ -2181,7 +2134,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2181 ext4_fsblk_t start; 2134 ext4_fsblk_t start;
2182 2135
2183 num = le32_to_cpu(ex->ee_block) + ee_len - from; 2136 num = le32_to_cpu(ex->ee_block) + ee_len - from;
2184 start = ext_pblock(ex) + ee_len - num; 2137 start = ext4_ext_pblock(ex) + ee_len - num;
2185 ext_debug("free last %u blocks starting %llu\n", num, start); 2138 ext_debug("free last %u blocks starting %llu\n", num, start);
2186 ext4_free_blocks(handle, inode, 0, start, num, flags); 2139 ext4_free_blocks(handle, inode, 0, start, num, flags);
2187 } else if (from == le32_to_cpu(ex->ee_block) 2140 } else if (from == le32_to_cpu(ex->ee_block)
@@ -2310,7 +2263,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2310 goto out; 2263 goto out;
2311 2264
2312 ext_debug("new extent: %u:%u:%llu\n", block, num, 2265 ext_debug("new extent: %u:%u:%llu\n", block, num,
2313 ext_pblock(ex)); 2266 ext4_ext_pblock(ex));
2314 ex--; 2267 ex--;
2315 ex_ee_block = le32_to_cpu(ex->ee_block); 2268 ex_ee_block = le32_to_cpu(ex->ee_block);
2316 ex_ee_len = ext4_ext_get_actual_len(ex); 2269 ex_ee_len = ext4_ext_get_actual_len(ex);
@@ -2421,9 +2374,9 @@ again:
2421 struct buffer_head *bh; 2374 struct buffer_head *bh;
2422 /* go to the next level */ 2375 /* go to the next level */
2423 ext_debug("move to level %d (block %llu)\n", 2376 ext_debug("move to level %d (block %llu)\n",
2424 i + 1, idx_pblock(path[i].p_idx)); 2377 i + 1, ext4_idx_pblock(path[i].p_idx));
2425 memset(path + i + 1, 0, sizeof(*path)); 2378 memset(path + i + 1, 0, sizeof(*path));
2426 bh = sb_bread(sb, idx_pblock(path[i].p_idx)); 2379 bh = sb_bread(sb, ext4_idx_pblock(path[i].p_idx));
2427 if (!bh) { 2380 if (!bh) {
2428 /* should we reset i_size? */ 2381 /* should we reset i_size? */
2429 err = -EIO; 2382 err = -EIO;
@@ -2535,77 +2488,21 @@ void ext4_ext_release(struct super_block *sb)
2535#endif 2488#endif
2536} 2489}
2537 2490
2538static void bi_complete(struct bio *bio, int error)
2539{
2540 complete((struct completion *)bio->bi_private);
2541}
2542
2543/* FIXME!! we need to try to merge to left or right after zero-out */ 2491/* FIXME!! we need to try to merge to left or right after zero-out */
2544static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) 2492static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
2545{ 2493{
2494 ext4_fsblk_t ee_pblock;
2495 unsigned int ee_len;
2546 int ret; 2496 int ret;
2547 struct bio *bio;
2548 int blkbits, blocksize;
2549 sector_t ee_pblock;
2550 struct completion event;
2551 unsigned int ee_len, len, done, offset;
2552 2497
2553
2554 blkbits = inode->i_blkbits;
2555 blocksize = inode->i_sb->s_blocksize;
2556 ee_len = ext4_ext_get_actual_len(ex); 2498 ee_len = ext4_ext_get_actual_len(ex);
2557 ee_pblock = ext_pblock(ex); 2499 ee_pblock = ext4_ext_pblock(ex);
2558
2559 /* convert ee_pblock to 512 byte sectors */
2560 ee_pblock = ee_pblock << (blkbits - 9);
2561
2562 while (ee_len > 0) {
2563
2564 if (ee_len > BIO_MAX_PAGES)
2565 len = BIO_MAX_PAGES;
2566 else
2567 len = ee_len;
2568
2569 bio = bio_alloc(GFP_NOIO, len);
2570 if (!bio)
2571 return -ENOMEM;
2572
2573 bio->bi_sector = ee_pblock;
2574 bio->bi_bdev = inode->i_sb->s_bdev;
2575
2576 done = 0;
2577 offset = 0;
2578 while (done < len) {
2579 ret = bio_add_page(bio, ZERO_PAGE(0),
2580 blocksize, offset);
2581 if (ret != blocksize) {
2582 /*
2583 * We can't add any more pages because of
2584 * hardware limitations. Start a new bio.
2585 */
2586 break;
2587 }
2588 done++;
2589 offset += blocksize;
2590 if (offset >= PAGE_CACHE_SIZE)
2591 offset = 0;
2592 }
2593 2500
2594 init_completion(&event); 2501 ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
2595 bio->bi_private = &event; 2502 if (ret > 0)
2596 bio->bi_end_io = bi_complete; 2503 ret = 0;
2597 submit_bio(WRITE, bio);
2598 wait_for_completion(&event);
2599 2504
2600 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { 2505 return ret;
2601 bio_put(bio);
2602 return -EIO;
2603 }
2604 bio_put(bio);
2605 ee_len -= done;
2606 ee_pblock += done << (blkbits - 9);
2607 }
2608 return 0;
2609} 2506}
2610 2507
2611#define EXT4_EXT_ZERO_LEN 7 2508#define EXT4_EXT_ZERO_LEN 7
@@ -2651,12 +2548,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2651 ee_block = le32_to_cpu(ex->ee_block); 2548 ee_block = le32_to_cpu(ex->ee_block);
2652 ee_len = ext4_ext_get_actual_len(ex); 2549 ee_len = ext4_ext_get_actual_len(ex);
2653 allocated = ee_len - (map->m_lblk - ee_block); 2550 allocated = ee_len - (map->m_lblk - ee_block);
2654 newblock = map->m_lblk - ee_block + ext_pblock(ex); 2551 newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex);
2655 2552
2656 ex2 = ex; 2553 ex2 = ex;
2657 orig_ex.ee_block = ex->ee_block; 2554 orig_ex.ee_block = ex->ee_block;
2658 orig_ex.ee_len = cpu_to_le16(ee_len); 2555 orig_ex.ee_len = cpu_to_le16(ee_len);
2659 ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); 2556 ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex));
2660 2557
2661 /* 2558 /*
2662 * It is safe to convert extent to initialized via explicit 2559 * It is safe to convert extent to initialized via explicit
@@ -2675,7 +2572,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2675 /* update the extent length and mark as initialized */ 2572 /* update the extent length and mark as initialized */
2676 ex->ee_block = orig_ex.ee_block; 2573 ex->ee_block = orig_ex.ee_block;
2677 ex->ee_len = orig_ex.ee_len; 2574 ex->ee_len = orig_ex.ee_len;
2678 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2575 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
2679 ext4_ext_dirty(handle, inode, path + depth); 2576 ext4_ext_dirty(handle, inode, path + depth);
2680 /* zeroed the full extent */ 2577 /* zeroed the full extent */
2681 return allocated; 2578 return allocated;
@@ -2710,7 +2607,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2710 ex->ee_block = orig_ex.ee_block; 2607 ex->ee_block = orig_ex.ee_block;
2711 ex->ee_len = cpu_to_le16(ee_len - allocated); 2608 ex->ee_len = cpu_to_le16(ee_len - allocated);
2712 ext4_ext_mark_uninitialized(ex); 2609 ext4_ext_mark_uninitialized(ex);
2713 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2610 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
2714 ext4_ext_dirty(handle, inode, path + depth); 2611 ext4_ext_dirty(handle, inode, path + depth);
2715 2612
2716 ex3 = &newex; 2613 ex3 = &newex;
@@ -2725,7 +2622,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2725 goto fix_extent_len; 2622 goto fix_extent_len;
2726 ex->ee_block = orig_ex.ee_block; 2623 ex->ee_block = orig_ex.ee_block;
2727 ex->ee_len = orig_ex.ee_len; 2624 ex->ee_len = orig_ex.ee_len;
2728 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2625 ext4_ext_store_pblock(ex,
2626 ext4_ext_pblock(&orig_ex));
2729 ext4_ext_dirty(handle, inode, path + depth); 2627 ext4_ext_dirty(handle, inode, path + depth);
2730 /* blocks available from map->m_lblk */ 2628 /* blocks available from map->m_lblk */
2731 return allocated; 2629 return allocated;
@@ -2782,7 +2680,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2782 /* update the extent length and mark as initialized */ 2680 /* update the extent length and mark as initialized */
2783 ex->ee_block = orig_ex.ee_block; 2681 ex->ee_block = orig_ex.ee_block;
2784 ex->ee_len = orig_ex.ee_len; 2682 ex->ee_len = orig_ex.ee_len;
2785 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2683 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
2786 ext4_ext_dirty(handle, inode, path + depth); 2684 ext4_ext_dirty(handle, inode, path + depth);
2787 /* zeroed the full extent */ 2685 /* zeroed the full extent */
2788 /* blocks available from map->m_lblk */ 2686 /* blocks available from map->m_lblk */
@@ -2833,7 +2731,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2833 /* update the extent length and mark as initialized */ 2731 /* update the extent length and mark as initialized */
2834 ex->ee_block = orig_ex.ee_block; 2732 ex->ee_block = orig_ex.ee_block;
2835 ex->ee_len = orig_ex.ee_len; 2733 ex->ee_len = orig_ex.ee_len;
2836 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2734 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
2837 ext4_ext_dirty(handle, inode, path + depth); 2735 ext4_ext_dirty(handle, inode, path + depth);
2838 /* zero out the first half */ 2736 /* zero out the first half */
2839 /* blocks available from map->m_lblk */ 2737 /* blocks available from map->m_lblk */
@@ -2902,7 +2800,7 @@ insert:
2902 /* update the extent length and mark as initialized */ 2800 /* update the extent length and mark as initialized */
2903 ex->ee_block = orig_ex.ee_block; 2801 ex->ee_block = orig_ex.ee_block;
2904 ex->ee_len = orig_ex.ee_len; 2802 ex->ee_len = orig_ex.ee_len;
2905 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2803 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
2906 ext4_ext_dirty(handle, inode, path + depth); 2804 ext4_ext_dirty(handle, inode, path + depth);
2907 /* zero out the first half */ 2805 /* zero out the first half */
2908 return allocated; 2806 return allocated;
@@ -2915,7 +2813,7 @@ out:
2915fix_extent_len: 2813fix_extent_len:
2916 ex->ee_block = orig_ex.ee_block; 2814 ex->ee_block = orig_ex.ee_block;
2917 ex->ee_len = orig_ex.ee_len; 2815 ex->ee_len = orig_ex.ee_len;
2918 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2816 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
2919 ext4_ext_mark_uninitialized(ex); 2817 ext4_ext_mark_uninitialized(ex);
2920 ext4_ext_dirty(handle, inode, path + depth); 2818 ext4_ext_dirty(handle, inode, path + depth);
2921 return err; 2819 return err;
@@ -2973,12 +2871,12 @@ static int ext4_split_unwritten_extents(handle_t *handle,
2973 ee_block = le32_to_cpu(ex->ee_block); 2871 ee_block = le32_to_cpu(ex->ee_block);
2974 ee_len = ext4_ext_get_actual_len(ex); 2872 ee_len = ext4_ext_get_actual_len(ex);
2975 allocated = ee_len - (map->m_lblk - ee_block); 2873 allocated = ee_len - (map->m_lblk - ee_block);
2976 newblock = map->m_lblk - ee_block + ext_pblock(ex); 2874 newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex);
2977 2875
2978 ex2 = ex; 2876 ex2 = ex;
2979 orig_ex.ee_block = ex->ee_block; 2877 orig_ex.ee_block = ex->ee_block;
2980 orig_ex.ee_len = cpu_to_le16(ee_len); 2878 orig_ex.ee_len = cpu_to_le16(ee_len);
2981 ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); 2879 ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex));
2982 2880
2983 /* 2881 /*
2984 * It is safe to convert extent to initialized via explicit 2882 * It is safe to convert extent to initialized via explicit
@@ -3027,7 +2925,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
3027 /* update the extent length and mark as initialized */ 2925 /* update the extent length and mark as initialized */
3028 ex->ee_block = orig_ex.ee_block; 2926 ex->ee_block = orig_ex.ee_block;
3029 ex->ee_len = orig_ex.ee_len; 2927 ex->ee_len = orig_ex.ee_len;
3030 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2928 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
3031 ext4_ext_dirty(handle, inode, path + depth); 2929 ext4_ext_dirty(handle, inode, path + depth);
3032 /* zeroed the full extent */ 2930 /* zeroed the full extent */
3033 /* blocks available from map->m_lblk */ 2931 /* blocks available from map->m_lblk */
@@ -3099,7 +2997,7 @@ insert:
3099 /* update the extent length and mark as initialized */ 2997 /* update the extent length and mark as initialized */
3100 ex->ee_block = orig_ex.ee_block; 2998 ex->ee_block = orig_ex.ee_block;
3101 ex->ee_len = orig_ex.ee_len; 2999 ex->ee_len = orig_ex.ee_len;
3102 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 3000 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
3103 ext4_ext_dirty(handle, inode, path + depth); 3001 ext4_ext_dirty(handle, inode, path + depth);
3104 /* zero out the first half */ 3002 /* zero out the first half */
3105 return allocated; 3003 return allocated;
@@ -3112,7 +3010,7 @@ out:
3112fix_extent_len: 3010fix_extent_len:
3113 ex->ee_block = orig_ex.ee_block; 3011 ex->ee_block = orig_ex.ee_block;
3114 ex->ee_len = orig_ex.ee_len; 3012 ex->ee_len = orig_ex.ee_len;
3115 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 3013 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
3116 ext4_ext_mark_uninitialized(ex); 3014 ext4_ext_mark_uninitialized(ex);
3117 ext4_ext_dirty(handle, inode, path + depth); 3015 ext4_ext_dirty(handle, inode, path + depth);
3118 return err; 3016 return err;
@@ -3180,6 +3078,57 @@ static void unmap_underlying_metadata_blocks(struct block_device *bdev,
3180 unmap_underlying_metadata(bdev, block + i); 3078 unmap_underlying_metadata(bdev, block + i);
3181} 3079}
3182 3080
3081/*
3082 * Handle EOFBLOCKS_FL flag, clearing it if necessary
3083 */
3084static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
3085 struct ext4_map_blocks *map,
3086 struct ext4_ext_path *path,
3087 unsigned int len)
3088{
3089 int i, depth;
3090 struct ext4_extent_header *eh;
3091 struct ext4_extent *ex, *last_ex;
3092
3093 if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))
3094 return 0;
3095
3096 depth = ext_depth(inode);
3097 eh = path[depth].p_hdr;
3098 ex = path[depth].p_ext;
3099
3100 if (unlikely(!eh->eh_entries)) {
3101 EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and "
3102 "EOFBLOCKS_FL set");
3103 return -EIO;
3104 }
3105 last_ex = EXT_LAST_EXTENT(eh);
3106 /*
3107 * We should clear the EOFBLOCKS_FL flag if we are writing the
3108 * last block in the last extent in the file. We test this by
3109 * first checking to see if the caller to
3110 * ext4_ext_get_blocks() was interested in the last block (or
3111 * a block beyond the last block) in the current extent. If
3112 * this turns out to be false, we can bail out from this
3113 * function immediately.
3114 */
3115 if (map->m_lblk + len < le32_to_cpu(last_ex->ee_block) +
3116 ext4_ext_get_actual_len(last_ex))
3117 return 0;
3118 /*
3119 * If the caller does appear to be planning to write at or
3120 * beyond the end of the current extent, we then test to see
3121 * if the current extent is the last extent in the file, by
3122 * checking to make sure it was reached via the rightmost node
3123 * at each level of the tree.
3124 */
3125 for (i = depth-1; i >= 0; i--)
3126 if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr))
3127 return 0;
3128 ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
3129 return ext4_mark_inode_dirty(handle, inode);
3130}
3131
3183static int 3132static int
3184ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, 3133ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3185 struct ext4_map_blocks *map, 3134 struct ext4_map_blocks *map,
@@ -3206,7 +3155,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3206 * completed 3155 * completed
3207 */ 3156 */
3208 if (io) 3157 if (io)
3209 io->flag = EXT4_IO_UNWRITTEN; 3158 io->flag = EXT4_IO_END_UNWRITTEN;
3210 else 3159 else
3211 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); 3160 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
3212 if (ext4_should_dioread_nolock(inode)) 3161 if (ext4_should_dioread_nolock(inode))
@@ -3217,8 +3166,12 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3217 if ((flags & EXT4_GET_BLOCKS_CONVERT)) { 3166 if ((flags & EXT4_GET_BLOCKS_CONVERT)) {
3218 ret = ext4_convert_unwritten_extents_endio(handle, inode, 3167 ret = ext4_convert_unwritten_extents_endio(handle, inode,
3219 path); 3168 path);
3220 if (ret >= 0) 3169 if (ret >= 0) {
3221 ext4_update_inode_fsync_trans(handle, inode, 1); 3170 ext4_update_inode_fsync_trans(handle, inode, 1);
3171 err = check_eofblocks_fl(handle, inode, map, path,
3172 map->m_len);
3173 } else
3174 err = ret;
3222 goto out2; 3175 goto out2;
3223 } 3176 }
3224 /* buffered IO case */ 3177 /* buffered IO case */
@@ -3244,8 +3197,13 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3244 3197
3245 /* buffered write, writepage time, convert*/ 3198 /* buffered write, writepage time, convert*/
3246 ret = ext4_ext_convert_to_initialized(handle, inode, map, path); 3199 ret = ext4_ext_convert_to_initialized(handle, inode, map, path);
3247 if (ret >= 0) 3200 if (ret >= 0) {
3248 ext4_update_inode_fsync_trans(handle, inode, 1); 3201 ext4_update_inode_fsync_trans(handle, inode, 1);
3202 err = check_eofblocks_fl(handle, inode, map, path, map->m_len);
3203 if (err < 0)
3204 goto out2;
3205 }
3206
3249out: 3207out:
3250 if (ret <= 0) { 3208 if (ret <= 0) {
3251 err = ret; 3209 err = ret;
@@ -3292,6 +3250,7 @@ out2:
3292 } 3250 }
3293 return err ? err : allocated; 3251 return err ? err : allocated;
3294} 3252}
3253
3295/* 3254/*
3296 * Block allocation/map/preallocation routine for extents based files 3255 * Block allocation/map/preallocation routine for extents based files
3297 * 3256 *
@@ -3315,9 +3274,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3315{ 3274{
3316 struct ext4_ext_path *path = NULL; 3275 struct ext4_ext_path *path = NULL;
3317 struct ext4_extent_header *eh; 3276 struct ext4_extent_header *eh;
3318 struct ext4_extent newex, *ex, *last_ex; 3277 struct ext4_extent newex, *ex;
3319 ext4_fsblk_t newblock; 3278 ext4_fsblk_t newblock;
3320 int i, err = 0, depth, ret, cache_type; 3279 int err = 0, depth, ret, cache_type;
3321 unsigned int allocated = 0; 3280 unsigned int allocated = 0;
3322 struct ext4_allocation_request ar; 3281 struct ext4_allocation_request ar;
3323 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3282 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
@@ -3341,7 +3300,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3341 /* block is already allocated */ 3300 /* block is already allocated */
3342 newblock = map->m_lblk 3301 newblock = map->m_lblk
3343 - le32_to_cpu(newex.ee_block) 3302 - le32_to_cpu(newex.ee_block)
3344 + ext_pblock(&newex); 3303 + ext4_ext_pblock(&newex);
3345 /* number of remaining blocks in the extent */ 3304 /* number of remaining blocks in the extent */
3346 allocated = ext4_ext_get_actual_len(&newex) - 3305 allocated = ext4_ext_get_actual_len(&newex) -
3347 (map->m_lblk - le32_to_cpu(newex.ee_block)); 3306 (map->m_lblk - le32_to_cpu(newex.ee_block));
@@ -3379,7 +3338,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3379 ex = path[depth].p_ext; 3338 ex = path[depth].p_ext;
3380 if (ex) { 3339 if (ex) {
3381 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); 3340 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
3382 ext4_fsblk_t ee_start = ext_pblock(ex); 3341 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
3383 unsigned short ee_len; 3342 unsigned short ee_len;
3384 3343
3385 /* 3344 /*
@@ -3488,7 +3447,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3488 */ 3447 */
3489 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 3448 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3490 if (io) 3449 if (io)
3491 io->flag = EXT4_IO_UNWRITTEN; 3450 io->flag = EXT4_IO_END_UNWRITTEN;
3492 else 3451 else
3493 ext4_set_inode_state(inode, 3452 ext4_set_inode_state(inode,
3494 EXT4_STATE_DIO_UNWRITTEN); 3453 EXT4_STATE_DIO_UNWRITTEN);
@@ -3497,44 +3456,23 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3497 map->m_flags |= EXT4_MAP_UNINIT; 3456 map->m_flags |= EXT4_MAP_UNINIT;
3498 } 3457 }
3499 3458
3500 if (unlikely(ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) { 3459 err = check_eofblocks_fl(handle, inode, map, path, ar.len);
3501 if (unlikely(!eh->eh_entries)) { 3460 if (err)
3502 EXT4_ERROR_INODE(inode, 3461 goto out2;
3503 "eh->eh_entries == 0 and " 3462
3504 "EOFBLOCKS_FL set");
3505 err = -EIO;
3506 goto out2;
3507 }
3508 last_ex = EXT_LAST_EXTENT(eh);
3509 /*
3510 * If the current leaf block was reached by looking at
3511 * the last index block all the way down the tree, and
3512 * we are extending the inode beyond the last extent
3513 * in the current leaf block, then clear the
3514 * EOFBLOCKS_FL flag.
3515 */
3516 for (i = depth-1; i >= 0; i--) {
3517 if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr))
3518 break;
3519 }
3520 if ((i < 0) &&
3521 (map->m_lblk + ar.len > le32_to_cpu(last_ex->ee_block) +
3522 ext4_ext_get_actual_len(last_ex)))
3523 ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
3524 }
3525 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); 3463 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
3526 if (err) { 3464 if (err) {
3527 /* free data blocks we just allocated */ 3465 /* free data blocks we just allocated */
3528 /* not a good idea to call discard here directly, 3466 /* not a good idea to call discard here directly,
3529 * but otherwise we'd need to call it every free() */ 3467 * but otherwise we'd need to call it every free() */
3530 ext4_discard_preallocations(inode); 3468 ext4_discard_preallocations(inode);
3531 ext4_free_blocks(handle, inode, 0, ext_pblock(&newex), 3469 ext4_free_blocks(handle, inode, 0, ext4_ext_pblock(&newex),
3532 ext4_ext_get_actual_len(&newex), 0); 3470 ext4_ext_get_actual_len(&newex), 0);
3533 goto out2; 3471 goto out2;
3534 } 3472 }
3535 3473
3536 /* previous routine could use block we allocated */ 3474 /* previous routine could use block we allocated */
3537 newblock = ext_pblock(&newex); 3475 newblock = ext4_ext_pblock(&newex);
3538 allocated = ext4_ext_get_actual_len(&newex); 3476 allocated = ext4_ext_get_actual_len(&newex);
3539 if (allocated > map->m_len) 3477 if (allocated > map->m_len)
3540 allocated = map->m_len; 3478 allocated = map->m_len;
@@ -3729,7 +3667,7 @@ retry:
3729 printk(KERN_ERR "%s: ext4_ext_map_blocks " 3667 printk(KERN_ERR "%s: ext4_ext_map_blocks "
3730 "returned error inode#%lu, block=%u, " 3668 "returned error inode#%lu, block=%u, "
3731 "max_blocks=%u", __func__, 3669 "max_blocks=%u", __func__,
3732 inode->i_ino, block, max_blocks); 3670 inode->i_ino, map.m_lblk, max_blocks);
3733#endif 3671#endif
3734 ext4_mark_inode_dirty(handle, inode); 3672 ext4_mark_inode_dirty(handle, inode);
3735 ret2 = ext4_journal_stop(handle); 3673 ret2 = ext4_journal_stop(handle);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index ee92b66d4558..5a5c55ddceef 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -130,8 +130,50 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
130 return dquot_file_open(inode, filp); 130 return dquot_file_open(inode, filp);
131} 131}
132 132
133/*
134 * ext4_llseek() copied from generic_file_llseek() to handle both
135 * block-mapped and extent-mapped maxbytes values. This should
136 * otherwise be identical with generic_file_llseek().
137 */
138loff_t ext4_llseek(struct file *file, loff_t offset, int origin)
139{
140 struct inode *inode = file->f_mapping->host;
141 loff_t maxbytes;
142
143 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
144 maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
145 else
146 maxbytes = inode->i_sb->s_maxbytes;
147 mutex_lock(&inode->i_mutex);
148 switch (origin) {
149 case SEEK_END:
150 offset += inode->i_size;
151 break;
152 case SEEK_CUR:
153 if (offset == 0) {
154 mutex_unlock(&inode->i_mutex);
155 return file->f_pos;
156 }
157 offset += file->f_pos;
158 break;
159 }
160
161 if (offset < 0 || offset > maxbytes) {
162 mutex_unlock(&inode->i_mutex);
163 return -EINVAL;
164 }
165
166 if (offset != file->f_pos) {
167 file->f_pos = offset;
168 file->f_version = 0;
169 }
170 mutex_unlock(&inode->i_mutex);
171
172 return offset;
173}
174
133const struct file_operations ext4_file_operations = { 175const struct file_operations ext4_file_operations = {
134 .llseek = generic_file_llseek, 176 .llseek = ext4_llseek,
135 .read = do_sync_read, 177 .read = do_sync_read,
136 .write = do_sync_write, 178 .write = do_sync_write,
137 .aio_read = generic_file_aio_read, 179 .aio_read = generic_file_aio_read,
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 3f3ff5ee8f9d..c1a7bc923cf6 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -34,6 +34,89 @@
34 34
35#include <trace/events/ext4.h> 35#include <trace/events/ext4.h>
36 36
37static void dump_completed_IO(struct inode * inode)
38{
39#ifdef EXT4_DEBUG
40 struct list_head *cur, *before, *after;
41 ext4_io_end_t *io, *io0, *io1;
42 unsigned long flags;
43
44 if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
45 ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
46 return;
47 }
48
49 ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
50 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
51 list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
52 cur = &io->list;
53 before = cur->prev;
54 io0 = container_of(before, ext4_io_end_t, list);
55 after = cur->next;
56 io1 = container_of(after, ext4_io_end_t, list);
57
58 ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
59 io, inode->i_ino, io0, io1);
60 }
61 spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
62#endif
63}
64
65/*
66 * This function is called from ext4_sync_file().
67 *
68 * When IO is completed, the work to convert unwritten extents to
69 * written is queued on workqueue but may not get immediately
70 * scheduled. When fsync is called, we need to ensure the
71 * conversion is complete before fsync returns.
72 * The inode keeps track of a list of pending/completed IO that
73 * might needs to do the conversion. This function walks through
74 * the list and convert the related unwritten extents for completed IO
75 * to written.
76 * The function return the number of pending IOs on success.
77 */
78static int flush_completed_IO(struct inode *inode)
79{
80 ext4_io_end_t *io;
81 struct ext4_inode_info *ei = EXT4_I(inode);
82 unsigned long flags;
83 int ret = 0;
84 int ret2 = 0;
85
86 if (list_empty(&ei->i_completed_io_list))
87 return ret;
88
89 dump_completed_IO(inode);
90 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
91 while (!list_empty(&ei->i_completed_io_list)){
92 io = list_entry(ei->i_completed_io_list.next,
93 ext4_io_end_t, list);
94 /*
95 * Calling ext4_end_io_nolock() to convert completed
96 * IO to written.
97 *
98 * When ext4_sync_file() is called, run_queue() may already
99 * about to flush the work corresponding to this io structure.
100 * It will be upset if it founds the io structure related
101 * to the work-to-be schedule is freed.
102 *
103 * Thus we need to keep the io structure still valid here after
104 * convertion finished. The io structure has a flag to
105 * avoid double converting from both fsync and background work
106 * queue work.
107 */
108 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
109 ret = ext4_end_io_nolock(io);
110 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
111 if (ret < 0)
112 ret2 = ret;
113 else
114 list_del_init(&io->list);
115 }
116 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
117 return (ret2 < 0) ? ret2 : 0;
118}
119
37/* 120/*
38 * If we're not journaling and this is a just-created file, we have to 121 * If we're not journaling and this is a just-created file, we have to
39 * sync our parent directory (if it was freshly created) since 122 * sync our parent directory (if it was freshly created) since
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 45853e0d1f21..1ce240a23ebb 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -50,7 +50,7 @@
50 * need to use it within a single byte (to ensure we get endianness right). 50 * need to use it within a single byte (to ensure we get endianness right).
51 * We can use memset for the rest of the bitmap as there are no other users. 51 * We can use memset for the rest of the bitmap as there are no other users.
52 */ 52 */
53void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) 53void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
54{ 54{
55 int i; 55 int i;
56 56
@@ -65,9 +65,10 @@ void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
65} 65}
66 66
67/* Initializes an uninitialized inode bitmap */ 67/* Initializes an uninitialized inode bitmap */
68unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, 68static unsigned ext4_init_inode_bitmap(struct super_block *sb,
69 ext4_group_t block_group, 69 struct buffer_head *bh,
70 struct ext4_group_desc *gdp) 70 ext4_group_t block_group,
71 struct ext4_group_desc *gdp)
71{ 72{
72 struct ext4_sb_info *sbi = EXT4_SB(sb); 73 struct ext4_sb_info *sbi = EXT4_SB(sb);
73 74
@@ -85,7 +86,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
85 } 86 }
86 87
87 memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); 88 memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
88 mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, 89 ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
89 bh->b_data); 90 bh->b_data);
90 91
91 return EXT4_INODES_PER_GROUP(sb); 92 return EXT4_INODES_PER_GROUP(sb);
@@ -107,6 +108,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
107 desc = ext4_get_group_desc(sb, block_group, NULL); 108 desc = ext4_get_group_desc(sb, block_group, NULL);
108 if (!desc) 109 if (!desc)
109 return NULL; 110 return NULL;
111
110 bitmap_blk = ext4_inode_bitmap(sb, desc); 112 bitmap_blk = ext4_inode_bitmap(sb, desc);
111 bh = sb_getblk(sb, bitmap_blk); 113 bh = sb_getblk(sb, bitmap_blk);
112 if (unlikely(!bh)) { 114 if (unlikely(!bh)) {
@@ -123,6 +125,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
123 unlock_buffer(bh); 125 unlock_buffer(bh);
124 return bh; 126 return bh;
125 } 127 }
128
126 ext4_lock_group(sb, block_group); 129 ext4_lock_group(sb, block_group);
127 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { 130 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
128 ext4_init_inode_bitmap(sb, bh, block_group, desc); 131 ext4_init_inode_bitmap(sb, bh, block_group, desc);
@@ -133,6 +136,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
133 return bh; 136 return bh;
134 } 137 }
135 ext4_unlock_group(sb, block_group); 138 ext4_unlock_group(sb, block_group);
139
136 if (buffer_uptodate(bh)) { 140 if (buffer_uptodate(bh)) {
137 /* 141 /*
138 * if not uninit if bh is uptodate, 142 * if not uninit if bh is uptodate,
@@ -411,8 +415,8 @@ struct orlov_stats {
411 * for a particular block group or flex_bg. If flex_size is 1, then g 415 * for a particular block group or flex_bg. If flex_size is 1, then g
412 * is a block group number; otherwise it is flex_bg number. 416 * is a block group number; otherwise it is flex_bg number.
413 */ 417 */
414void get_orlov_stats(struct super_block *sb, ext4_group_t g, 418static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
415 int flex_size, struct orlov_stats *stats) 419 int flex_size, struct orlov_stats *stats)
416{ 420{
417 struct ext4_group_desc *desc; 421 struct ext4_group_desc *desc;
418 struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups; 422 struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups;
@@ -712,8 +716,17 @@ static int ext4_claim_inode(struct super_block *sb,
712{ 716{
713 int free = 0, retval = 0, count; 717 int free = 0, retval = 0, count;
714 struct ext4_sb_info *sbi = EXT4_SB(sb); 718 struct ext4_sb_info *sbi = EXT4_SB(sb);
719 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
715 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); 720 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
716 721
722 /*
723 * We have to be sure that new inode allocation does not race with
724 * inode table initialization, because otherwise we may end up
725 * allocating and writing new inode right before sb_issue_zeroout
726 * takes place and overwriting our new inode with zeroes. So we
727 * take alloc_sem to prevent it.
728 */
729 down_read(&grp->alloc_sem);
717 ext4_lock_group(sb, group); 730 ext4_lock_group(sb, group);
718 if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { 731 if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) {
719 /* not a free inode */ 732 /* not a free inode */
@@ -724,6 +737,7 @@ static int ext4_claim_inode(struct super_block *sb,
724 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || 737 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
725 ino > EXT4_INODES_PER_GROUP(sb)) { 738 ino > EXT4_INODES_PER_GROUP(sb)) {
726 ext4_unlock_group(sb, group); 739 ext4_unlock_group(sb, group);
740 up_read(&grp->alloc_sem);
727 ext4_error(sb, "reserved inode or inode > inodes count - " 741 ext4_error(sb, "reserved inode or inode > inodes count - "
728 "block_group = %u, inode=%lu", group, 742 "block_group = %u, inode=%lu", group,
729 ino + group * EXT4_INODES_PER_GROUP(sb)); 743 ino + group * EXT4_INODES_PER_GROUP(sb));
@@ -772,6 +786,7 @@ static int ext4_claim_inode(struct super_block *sb,
772 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); 786 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
773err_ret: 787err_ret:
774 ext4_unlock_group(sb, group); 788 ext4_unlock_group(sb, group);
789 up_read(&grp->alloc_sem);
775 return retval; 790 return retval;
776} 791}
777 792
@@ -1205,3 +1220,109 @@ unsigned long ext4_count_dirs(struct super_block * sb)
1205 } 1220 }
1206 return count; 1221 return count;
1207} 1222}
1223
1224/*
1225 * Zeroes not yet zeroed inode table - just write zeroes through the whole
1226 * inode table. Must be called without any spinlock held. The only place
1227 * where it is called from on active part of filesystem is ext4lazyinit
1228 * thread, so we do not need any special locks, however we have to prevent
1229 * inode allocation from the current group, so we take alloc_sem lock, to
1230 * block ext4_claim_inode until we are finished.
1231 */
1232extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
1233 int barrier)
1234{
1235 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
1236 struct ext4_sb_info *sbi = EXT4_SB(sb);
1237 struct ext4_group_desc *gdp = NULL;
1238 struct buffer_head *group_desc_bh;
1239 handle_t *handle;
1240 ext4_fsblk_t blk;
1241 int num, ret = 0, used_blks = 0;
1242
1243 /* This should not happen, but just to be sure check this */
1244 if (sb->s_flags & MS_RDONLY) {
1245 ret = 1;
1246 goto out;
1247 }
1248
1249 gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
1250 if (!gdp)
1251 goto out;
1252
1253 /*
1254 * We do not need to lock this, because we are the only one
1255 * handling this flag.
1256 */
1257 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))
1258 goto out;
1259
1260 handle = ext4_journal_start_sb(sb, 1);
1261 if (IS_ERR(handle)) {
1262 ret = PTR_ERR(handle);
1263 goto out;
1264 }
1265
1266 down_write(&grp->alloc_sem);
1267 /*
1268 * If inode bitmap was already initialized there may be some
1269 * used inodes so we need to skip blocks with used inodes in
1270 * inode table.
1271 */
1272 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)))
1273 used_blks = DIV_ROUND_UP((EXT4_INODES_PER_GROUP(sb) -
1274 ext4_itable_unused_count(sb, gdp)),
1275 sbi->s_inodes_per_block);
1276
1277 if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) {
1278 ext4_error(sb, "Something is wrong with group %u\n"
1279 "Used itable blocks: %d"
1280 "itable unused count: %u\n",
1281 group, used_blks,
1282 ext4_itable_unused_count(sb, gdp));
1283 ret = 1;
1284 goto out;
1285 }
1286
1287 blk = ext4_inode_table(sb, gdp) + used_blks;
1288 num = sbi->s_itb_per_group - used_blks;
1289
1290 BUFFER_TRACE(group_desc_bh, "get_write_access");
1291 ret = ext4_journal_get_write_access(handle,
1292 group_desc_bh);
1293 if (ret)
1294 goto err_out;
1295
1296 /*
1297 * Skip zeroout if the inode table is full. But we set the ZEROED
1298 * flag anyway, because obviously, when it is full it does not need
1299 * further zeroing.
1300 */
1301 if (unlikely(num == 0))
1302 goto skip_zeroout;
1303
1304 ext4_debug("going to zero out inode table in group %d\n",
1305 group);
1306 ret = sb_issue_zeroout(sb, blk, num, GFP_NOFS);
1307 if (ret < 0)
1308 goto err_out;
1309 if (barrier)
1310 blkdev_issue_flush(sb->s_bdev, GFP_NOFS, NULL);
1311
1312skip_zeroout:
1313 ext4_lock_group(sb, group);
1314 gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED);
1315 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
1316 ext4_unlock_group(sb, group);
1317
1318 BUFFER_TRACE(group_desc_bh,
1319 "call ext4_handle_dirty_metadata");
1320 ret = ext4_handle_dirty_metadata(handle, NULL,
1321 group_desc_bh);
1322
1323err_out:
1324 up_write(&grp->alloc_sem);
1325 ext4_journal_stop(handle);
1326out:
1327 return ret;
1328}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 49635ef236f8..191616470466 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -60,6 +60,12 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode,
60} 60}
61 61
62static void ext4_invalidatepage(struct page *page, unsigned long offset); 62static void ext4_invalidatepage(struct page *page, unsigned long offset);
63static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
64 struct buffer_head *bh_result, int create);
65static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
66static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
67static int __ext4_journalled_writepage(struct page *page, unsigned int len);
68static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
63 69
64/* 70/*
65 * Test whether an inode is a fast symlink. 71 * Test whether an inode is a fast symlink.
@@ -755,6 +761,11 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
755 * parent to disk. 761 * parent to disk.
756 */ 762 */
757 bh = sb_getblk(inode->i_sb, new_blocks[n-1]); 763 bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
764 if (unlikely(!bh)) {
765 err = -EIO;
766 goto failed;
767 }
768
758 branch[n].bh = bh; 769 branch[n].bh = bh;
759 lock_buffer(bh); 770 lock_buffer(bh);
760 BUFFER_TRACE(bh, "call get_create_access"); 771 BUFFER_TRACE(bh, "call get_create_access");
@@ -1207,8 +1218,10 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
1207 break; 1218 break;
1208 idx++; 1219 idx++;
1209 num++; 1220 num++;
1210 if (num >= max_pages) 1221 if (num >= max_pages) {
1222 done = 1;
1211 break; 1223 break;
1224 }
1212 } 1225 }
1213 pagevec_release(&pvec); 1226 pagevec_release(&pvec);
1214 } 1227 }
@@ -1995,16 +2008,23 @@ static void ext4_da_page_release_reservation(struct page *page,
1995 * 2008 *
1996 * As pages are already locked by write_cache_pages(), we can't use it 2009 * As pages are already locked by write_cache_pages(), we can't use it
1997 */ 2010 */
1998static int mpage_da_submit_io(struct mpage_da_data *mpd) 2011static int mpage_da_submit_io(struct mpage_da_data *mpd,
2012 struct ext4_map_blocks *map)
1999{ 2013{
2000 long pages_skipped;
2001 struct pagevec pvec; 2014 struct pagevec pvec;
2002 unsigned long index, end; 2015 unsigned long index, end;
2003 int ret = 0, err, nr_pages, i; 2016 int ret = 0, err, nr_pages, i;
2004 struct inode *inode = mpd->inode; 2017 struct inode *inode = mpd->inode;
2005 struct address_space *mapping = inode->i_mapping; 2018 struct address_space *mapping = inode->i_mapping;
2019 loff_t size = i_size_read(inode);
2020 unsigned int len, block_start;
2021 struct buffer_head *bh, *page_bufs = NULL;
2022 int journal_data = ext4_should_journal_data(inode);
2023 sector_t pblock = 0, cur_logical = 0;
2024 struct ext4_io_submit io_submit;
2006 2025
2007 BUG_ON(mpd->next_page <= mpd->first_page); 2026 BUG_ON(mpd->next_page <= mpd->first_page);
2027 memset(&io_submit, 0, sizeof(io_submit));
2008 /* 2028 /*
2009 * We need to start from the first_page to the next_page - 1 2029 * We need to start from the first_page to the next_page - 1
2010 * to make sure we also write the mapped dirty buffer_heads. 2030 * to make sure we also write the mapped dirty buffer_heads.
@@ -2020,122 +2040,108 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
2020 if (nr_pages == 0) 2040 if (nr_pages == 0)
2021 break; 2041 break;
2022 for (i = 0; i < nr_pages; i++) { 2042 for (i = 0; i < nr_pages; i++) {
2043 int commit_write = 0, redirty_page = 0;
2023 struct page *page = pvec.pages[i]; 2044 struct page *page = pvec.pages[i];
2024 2045
2025 index = page->index; 2046 index = page->index;
2026 if (index > end) 2047 if (index > end)
2027 break; 2048 break;
2049
2050 if (index == size >> PAGE_CACHE_SHIFT)
2051 len = size & ~PAGE_CACHE_MASK;
2052 else
2053 len = PAGE_CACHE_SIZE;
2054 if (map) {
2055 cur_logical = index << (PAGE_CACHE_SHIFT -
2056 inode->i_blkbits);
2057 pblock = map->m_pblk + (cur_logical -
2058 map->m_lblk);
2059 }
2028 index++; 2060 index++;
2029 2061
2030 BUG_ON(!PageLocked(page)); 2062 BUG_ON(!PageLocked(page));
2031 BUG_ON(PageWriteback(page)); 2063 BUG_ON(PageWriteback(page));
2032 2064
2033 pages_skipped = mpd->wbc->pages_skipped;
2034 err = mapping->a_ops->writepage(page, mpd->wbc);
2035 if (!err && (pages_skipped == mpd->wbc->pages_skipped))
2036 /*
2037 * have successfully written the page
2038 * without skipping the same
2039 */
2040 mpd->pages_written++;
2041 /* 2065 /*
2042 * In error case, we have to continue because 2066 * If the page does not have buffers (for
2043 * remaining pages are still locked 2067 * whatever reason), try to create them using
2044 * XXX: unlock and re-dirty them? 2068 * __block_write_begin. If this fails,
2069 * redirty the page and move on.
2045 */ 2070 */
2046 if (ret == 0) 2071 if (!page_has_buffers(page)) {
2047 ret = err; 2072 if (__block_write_begin(page, 0, len,
2048 } 2073 noalloc_get_block_write)) {
2049 pagevec_release(&pvec); 2074 redirty_page:
2050 } 2075 redirty_page_for_writepage(mpd->wbc,
2051 return ret; 2076 page);
2052} 2077 unlock_page(page);
2053 2078 continue;
2054/* 2079 }
2055 * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers 2080 commit_write = 1;
2056 * 2081 }
2057 * the function goes through all passed space and put actual disk
2058 * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten
2059 */
2060static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd,
2061 struct ext4_map_blocks *map)
2062{
2063 struct inode *inode = mpd->inode;
2064 struct address_space *mapping = inode->i_mapping;
2065 int blocks = map->m_len;
2066 sector_t pblock = map->m_pblk, cur_logical;
2067 struct buffer_head *head, *bh;
2068 pgoff_t index, end;
2069 struct pagevec pvec;
2070 int nr_pages, i;
2071
2072 index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
2073 end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
2074 cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2075
2076 pagevec_init(&pvec, 0);
2077
2078 while (index <= end) {
2079 /* XXX: optimize tail */
2080 nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
2081 if (nr_pages == 0)
2082 break;
2083 for (i = 0; i < nr_pages; i++) {
2084 struct page *page = pvec.pages[i];
2085
2086 index = page->index;
2087 if (index > end)
2088 break;
2089 index++;
2090
2091 BUG_ON(!PageLocked(page));
2092 BUG_ON(PageWriteback(page));
2093 BUG_ON(!page_has_buffers(page));
2094
2095 bh = page_buffers(page);
2096 head = bh;
2097
2098 /* skip blocks out of the range */
2099 do {
2100 if (cur_logical >= map->m_lblk)
2101 break;
2102 cur_logical++;
2103 } while ((bh = bh->b_this_page) != head);
2104 2082
2083 bh = page_bufs = page_buffers(page);
2084 block_start = 0;
2105 do { 2085 do {
2106 if (cur_logical >= map->m_lblk + blocks) 2086 if (!bh)
2107 break; 2087 goto redirty_page;
2108 2088 if (map && (cur_logical >= map->m_lblk) &&
2109 if (buffer_delay(bh) || buffer_unwritten(bh)) { 2089 (cur_logical <= (map->m_lblk +
2110 2090 (map->m_len - 1)))) {
2111 BUG_ON(bh->b_bdev != inode->i_sb->s_bdev);
2112
2113 if (buffer_delay(bh)) { 2091 if (buffer_delay(bh)) {
2114 clear_buffer_delay(bh); 2092 clear_buffer_delay(bh);
2115 bh->b_blocknr = pblock; 2093 bh->b_blocknr = pblock;
2116 } else {
2117 /*
2118 * unwritten already should have
2119 * blocknr assigned. Verify that
2120 */
2121 clear_buffer_unwritten(bh);
2122 BUG_ON(bh->b_blocknr != pblock);
2123 } 2094 }
2095 if (buffer_unwritten(bh) ||
2096 buffer_mapped(bh))
2097 BUG_ON(bh->b_blocknr != pblock);
2098 if (map->m_flags & EXT4_MAP_UNINIT)
2099 set_buffer_uninit(bh);
2100 clear_buffer_unwritten(bh);
2101 }
2124 2102
2125 } else if (buffer_mapped(bh)) 2103 /* redirty page if block allocation undone */
2126 BUG_ON(bh->b_blocknr != pblock); 2104 if (buffer_delay(bh) || buffer_unwritten(bh))
2127 2105 redirty_page = 1;
2128 if (map->m_flags & EXT4_MAP_UNINIT) 2106 bh = bh->b_this_page;
2129 set_buffer_uninit(bh); 2107 block_start += bh->b_size;
2130 cur_logical++; 2108 cur_logical++;
2131 pblock++; 2109 pblock++;
2132 } while ((bh = bh->b_this_page) != head); 2110 } while (bh != page_bufs);
2111
2112 if (redirty_page)
2113 goto redirty_page;
2114
2115 if (commit_write)
2116 /* mark the buffer_heads as dirty & uptodate */
2117 block_commit_write(page, 0, len);
2118
2119 /*
2120 * Delalloc doesn't support data journalling,
2121 * but eventually maybe we'll lift this
2122 * restriction.
2123 */
2124 if (unlikely(journal_data && PageChecked(page)))
2125 err = __ext4_journalled_writepage(page, len);
2126 else
2127 err = ext4_bio_write_page(&io_submit, page,
2128 len, mpd->wbc);
2129
2130 if (!err)
2131 mpd->pages_written++;
2132 /*
2133 * In error case, we have to continue because
2134 * remaining pages are still locked
2135 */
2136 if (ret == 0)
2137 ret = err;
2133 } 2138 }
2134 pagevec_release(&pvec); 2139 pagevec_release(&pvec);
2135 } 2140 }
2141 ext4_io_submit(&io_submit);
2142 return ret;
2136} 2143}
2137 2144
2138
2139static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, 2145static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
2140 sector_t logical, long blk_cnt) 2146 sector_t logical, long blk_cnt)
2141{ 2147{
@@ -2187,35 +2193,32 @@ static void ext4_print_free_blocks(struct inode *inode)
2187} 2193}
2188 2194
2189/* 2195/*
2190 * mpage_da_map_blocks - go through given space 2196 * mpage_da_map_and_submit - go through given space, map them
2197 * if necessary, and then submit them for I/O
2191 * 2198 *
2192 * @mpd - bh describing space 2199 * @mpd - bh describing space
2193 * 2200 *
2194 * The function skips space we know is already mapped to disk blocks. 2201 * The function skips space we know is already mapped to disk blocks.
2195 * 2202 *
2196 */ 2203 */
2197static int mpage_da_map_blocks(struct mpage_da_data *mpd) 2204static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
2198{ 2205{
2199 int err, blks, get_blocks_flags; 2206 int err, blks, get_blocks_flags;
2200 struct ext4_map_blocks map; 2207 struct ext4_map_blocks map, *mapp = NULL;
2201 sector_t next = mpd->b_blocknr; 2208 sector_t next = mpd->b_blocknr;
2202 unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; 2209 unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits;
2203 loff_t disksize = EXT4_I(mpd->inode)->i_disksize; 2210 loff_t disksize = EXT4_I(mpd->inode)->i_disksize;
2204 handle_t *handle = NULL; 2211 handle_t *handle = NULL;
2205 2212
2206 /* 2213 /*
2207 * We consider only non-mapped and non-allocated blocks 2214 * If the blocks are mapped already, or we couldn't accumulate
2208 */ 2215 * any blocks, then proceed immediately to the submission stage.
2209 if ((mpd->b_state & (1 << BH_Mapped)) &&
2210 !(mpd->b_state & (1 << BH_Delay)) &&
2211 !(mpd->b_state & (1 << BH_Unwritten)))
2212 return 0;
2213
2214 /*
2215 * If we didn't accumulate anything to write simply return
2216 */ 2216 */
2217 if (!mpd->b_size) 2217 if ((mpd->b_size == 0) ||
2218 return 0; 2218 ((mpd->b_state & (1 << BH_Mapped)) &&
2219 !(mpd->b_state & (1 << BH_Delay)) &&
2220 !(mpd->b_state & (1 << BH_Unwritten))))
2221 goto submit_io;
2219 2222
2220 handle = ext4_journal_current_handle(); 2223 handle = ext4_journal_current_handle();
2221 BUG_ON(!handle); 2224 BUG_ON(!handle);
@@ -2252,17 +2255,18 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2252 2255
2253 err = blks; 2256 err = blks;
2254 /* 2257 /*
2255 * If get block returns with error we simply 2258 * If get block returns EAGAIN or ENOSPC and there
2256 * return. Later writepage will redirty the page and 2259 * appears to be free blocks we will call
2257 * writepages will find the dirty page again 2260 * ext4_writepage() for all of the pages which will
2261 * just redirty the pages.
2258 */ 2262 */
2259 if (err == -EAGAIN) 2263 if (err == -EAGAIN)
2260 return 0; 2264 goto submit_io;
2261 2265
2262 if (err == -ENOSPC && 2266 if (err == -ENOSPC &&
2263 ext4_count_free_blocks(sb)) { 2267 ext4_count_free_blocks(sb)) {
2264 mpd->retval = err; 2268 mpd->retval = err;
2265 return 0; 2269 goto submit_io;
2266 } 2270 }
2267 2271
2268 /* 2272 /*
@@ -2287,10 +2291,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2287 /* invalidate all the pages */ 2291 /* invalidate all the pages */
2288 ext4_da_block_invalidatepages(mpd, next, 2292 ext4_da_block_invalidatepages(mpd, next,
2289 mpd->b_size >> mpd->inode->i_blkbits); 2293 mpd->b_size >> mpd->inode->i_blkbits);
2290 return err; 2294 return;
2291 } 2295 }
2292 BUG_ON(blks == 0); 2296 BUG_ON(blks == 0);
2293 2297
2298 mapp = &map;
2294 if (map.m_flags & EXT4_MAP_NEW) { 2299 if (map.m_flags & EXT4_MAP_NEW) {
2295 struct block_device *bdev = mpd->inode->i_sb->s_bdev; 2300 struct block_device *bdev = mpd->inode->i_sb->s_bdev;
2296 int i; 2301 int i;
@@ -2299,18 +2304,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2299 unmap_underlying_metadata(bdev, map.m_pblk + i); 2304 unmap_underlying_metadata(bdev, map.m_pblk + i);
2300 } 2305 }
2301 2306
2302 /*
2303 * If blocks are delayed marked, we need to
2304 * put actual blocknr and drop delayed bit
2305 */
2306 if ((mpd->b_state & (1 << BH_Delay)) ||
2307 (mpd->b_state & (1 << BH_Unwritten)))
2308 mpage_put_bnr_to_bhs(mpd, &map);
2309
2310 if (ext4_should_order_data(mpd->inode)) { 2307 if (ext4_should_order_data(mpd->inode)) {
2311 err = ext4_jbd2_file_inode(handle, mpd->inode); 2308 err = ext4_jbd2_file_inode(handle, mpd->inode);
2312 if (err) 2309 if (err)
2313 return err; 2310 /* This only happens if the journal is aborted */
2311 return;
2314 } 2312 }
2315 2313
2316 /* 2314 /*
@@ -2321,10 +2319,16 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2321 disksize = i_size_read(mpd->inode); 2319 disksize = i_size_read(mpd->inode);
2322 if (disksize > EXT4_I(mpd->inode)->i_disksize) { 2320 if (disksize > EXT4_I(mpd->inode)->i_disksize) {
2323 ext4_update_i_disksize(mpd->inode, disksize); 2321 ext4_update_i_disksize(mpd->inode, disksize);
2324 return ext4_mark_inode_dirty(handle, mpd->inode); 2322 err = ext4_mark_inode_dirty(handle, mpd->inode);
2323 if (err)
2324 ext4_error(mpd->inode->i_sb,
2325 "Failed to mark inode %lu dirty",
2326 mpd->inode->i_ino);
2325 } 2327 }
2326 2328
2327 return 0; 2329submit_io:
2330 mpage_da_submit_io(mpd, mapp);
2331 mpd->io_done = 1;
2328} 2332}
2329 2333
2330#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ 2334#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
@@ -2401,9 +2405,7 @@ flush_it:
2401 * We couldn't merge the block to our extent, so we 2405 * We couldn't merge the block to our extent, so we
2402 * need to flush current extent and start new one 2406 * need to flush current extent and start new one
2403 */ 2407 */
2404 if (mpage_da_map_blocks(mpd) == 0) 2408 mpage_da_map_and_submit(mpd);
2405 mpage_da_submit_io(mpd);
2406 mpd->io_done = 1;
2407 return; 2409 return;
2408} 2410}
2409 2411
@@ -2422,9 +2424,9 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
2422 * The function finds extents of pages and scan them for all blocks. 2424 * The function finds extents of pages and scan them for all blocks.
2423 */ 2425 */
2424static int __mpage_da_writepage(struct page *page, 2426static int __mpage_da_writepage(struct page *page,
2425 struct writeback_control *wbc, void *data) 2427 struct writeback_control *wbc,
2428 struct mpage_da_data *mpd)
2426{ 2429{
2427 struct mpage_da_data *mpd = data;
2428 struct inode *inode = mpd->inode; 2430 struct inode *inode = mpd->inode;
2429 struct buffer_head *bh, *head; 2431 struct buffer_head *bh, *head;
2430 sector_t logical; 2432 sector_t logical;
@@ -2435,15 +2437,13 @@ static int __mpage_da_writepage(struct page *page,
2435 if (mpd->next_page != page->index) { 2437 if (mpd->next_page != page->index) {
2436 /* 2438 /*
2437 * Nope, we can't. So, we map non-allocated blocks 2439 * Nope, we can't. So, we map non-allocated blocks
2438 * and start IO on them using writepage() 2440 * and start IO on them
2439 */ 2441 */
2440 if (mpd->next_page != mpd->first_page) { 2442 if (mpd->next_page != mpd->first_page) {
2441 if (mpage_da_map_blocks(mpd) == 0) 2443 mpage_da_map_and_submit(mpd);
2442 mpage_da_submit_io(mpd);
2443 /* 2444 /*
2444 * skip rest of the page in the page_vec 2445 * skip rest of the page in the page_vec
2445 */ 2446 */
2446 mpd->io_done = 1;
2447 redirty_page_for_writepage(wbc, page); 2447 redirty_page_for_writepage(wbc, page);
2448 unlock_page(page); 2448 unlock_page(page);
2449 return MPAGE_DA_EXTENT_TAIL; 2449 return MPAGE_DA_EXTENT_TAIL;
@@ -2622,6 +2622,7 @@ static int __ext4_journalled_writepage(struct page *page,
2622 int ret = 0; 2622 int ret = 0;
2623 int err; 2623 int err;
2624 2624
2625 ClearPageChecked(page);
2625 page_bufs = page_buffers(page); 2626 page_bufs = page_buffers(page);
2626 BUG_ON(!page_bufs); 2627 BUG_ON(!page_bufs);
2627 walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); 2628 walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one);
@@ -2699,7 +2700,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
2699static int ext4_writepage(struct page *page, 2700static int ext4_writepage(struct page *page,
2700 struct writeback_control *wbc) 2701 struct writeback_control *wbc)
2701{ 2702{
2702 int ret = 0; 2703 int ret = 0, commit_write = 0;
2703 loff_t size; 2704 loff_t size;
2704 unsigned int len; 2705 unsigned int len;
2705 struct buffer_head *page_bufs = NULL; 2706 struct buffer_head *page_bufs = NULL;
@@ -2712,71 +2713,44 @@ static int ext4_writepage(struct page *page,
2712 else 2713 else
2713 len = PAGE_CACHE_SIZE; 2714 len = PAGE_CACHE_SIZE;
2714 2715
2715 if (page_has_buffers(page)) { 2716 /*
2716 page_bufs = page_buffers(page); 2717 * If the page does not have buffers (for whatever reason),
2717 if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, 2718 * try to create them using __block_write_begin. If this
2718 ext4_bh_delay_or_unwritten)) { 2719 * fails, redirty the page and move on.
2719 /* 2720 */
2720 * We don't want to do block allocation 2721 if (!page_has_buffers(page)) {
2721 * So redirty the page and return 2722 if (__block_write_begin(page, 0, len,
2722 * We may reach here when we do a journal commit 2723 noalloc_get_block_write)) {
2723 * via journal_submit_inode_data_buffers. 2724 redirty_page:
2724 * If we don't have mapping block we just ignore
2725 * them. We can also reach here via shrink_page_list
2726 */
2727 redirty_page_for_writepage(wbc, page); 2725 redirty_page_for_writepage(wbc, page);
2728 unlock_page(page); 2726 unlock_page(page);
2729 return 0; 2727 return 0;
2730 } 2728 }
2731 } else { 2729 commit_write = 1;
2730 }
2731 page_bufs = page_buffers(page);
2732 if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
2733 ext4_bh_delay_or_unwritten)) {
2732 /* 2734 /*
2733 * The test for page_has_buffers() is subtle: 2735 * We don't want to do block allocation, so redirty
2734 * We know the page is dirty but it lost buffers. That means 2736 * the page and return. We may reach here when we do
2735 * that at some moment in time after write_begin()/write_end() 2737 * a journal commit via journal_submit_inode_data_buffers.
2736 * has been called all buffers have been clean and thus they 2738 * We can also reach here via shrink_page_list
2737 * must have been written at least once. So they are all
2738 * mapped and we can happily proceed with mapping them
2739 * and writing the page.
2740 *
2741 * Try to initialize the buffer_heads and check whether
2742 * all are mapped and non delay. We don't want to
2743 * do block allocation here.
2744 */ 2739 */
2745 ret = __block_write_begin(page, 0, len, 2740 goto redirty_page;
2746 noalloc_get_block_write); 2741 }
2747 if (!ret) { 2742 if (commit_write)
2748 page_bufs = page_buffers(page);
2749 /* check whether all are mapped and non delay */
2750 if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
2751 ext4_bh_delay_or_unwritten)) {
2752 redirty_page_for_writepage(wbc, page);
2753 unlock_page(page);
2754 return 0;
2755 }
2756 } else {
2757 /*
2758 * We can't do block allocation here
2759 * so just redity the page and unlock
2760 * and return
2761 */
2762 redirty_page_for_writepage(wbc, page);
2763 unlock_page(page);
2764 return 0;
2765 }
2766 /* now mark the buffer_heads as dirty and uptodate */ 2743 /* now mark the buffer_heads as dirty and uptodate */
2767 block_commit_write(page, 0, len); 2744 block_commit_write(page, 0, len);
2768 }
2769 2745
2770 if (PageChecked(page) && ext4_should_journal_data(inode)) { 2746 if (PageChecked(page) && ext4_should_journal_data(inode))
2771 /* 2747 /*
2772 * It's mmapped pagecache. Add buffers and journal it. There 2748 * It's mmapped pagecache. Add buffers and journal it. There
2773 * doesn't seem much point in redirtying the page here. 2749 * doesn't seem much point in redirtying the page here.
2774 */ 2750 */
2775 ClearPageChecked(page);
2776 return __ext4_journalled_writepage(page, len); 2751 return __ext4_journalled_writepage(page, len);
2777 }
2778 2752
2779 if (page_bufs && buffer_uninit(page_bufs)) { 2753 if (buffer_uninit(page_bufs)) {
2780 ext4_set_bh_endio(page_bufs, inode); 2754 ext4_set_bh_endio(page_bufs, inode);
2781 ret = block_write_full_page_endio(page, noalloc_get_block_write, 2755 ret = block_write_full_page_endio(page, noalloc_get_block_write,
2782 wbc, ext4_end_io_buffer_write); 2756 wbc, ext4_end_io_buffer_write);
@@ -2823,25 +2797,32 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
2823 */ 2797 */
2824static int write_cache_pages_da(struct address_space *mapping, 2798static int write_cache_pages_da(struct address_space *mapping,
2825 struct writeback_control *wbc, 2799 struct writeback_control *wbc,
2826 struct mpage_da_data *mpd) 2800 struct mpage_da_data *mpd,
2801 pgoff_t *done_index)
2827{ 2802{
2828 int ret = 0; 2803 int ret = 0;
2829 int done = 0; 2804 int done = 0;
2830 struct pagevec pvec; 2805 struct pagevec pvec;
2831 int nr_pages; 2806 unsigned nr_pages;
2832 pgoff_t index; 2807 pgoff_t index;
2833 pgoff_t end; /* Inclusive */ 2808 pgoff_t end; /* Inclusive */
2834 long nr_to_write = wbc->nr_to_write; 2809 long nr_to_write = wbc->nr_to_write;
2810 int tag;
2835 2811
2836 pagevec_init(&pvec, 0); 2812 pagevec_init(&pvec, 0);
2837 index = wbc->range_start >> PAGE_CACHE_SHIFT; 2813 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2838 end = wbc->range_end >> PAGE_CACHE_SHIFT; 2814 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2839 2815
2816 if (wbc->sync_mode == WB_SYNC_ALL)
2817 tag = PAGECACHE_TAG_TOWRITE;
2818 else
2819 tag = PAGECACHE_TAG_DIRTY;
2820
2821 *done_index = index;
2840 while (!done && (index <= end)) { 2822 while (!done && (index <= end)) {
2841 int i; 2823 int i;
2842 2824
2843 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 2825 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
2844 PAGECACHE_TAG_DIRTY,
2845 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 2826 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
2846 if (nr_pages == 0) 2827 if (nr_pages == 0)
2847 break; 2828 break;
@@ -2861,6 +2842,8 @@ static int write_cache_pages_da(struct address_space *mapping,
2861 break; 2842 break;
2862 } 2843 }
2863 2844
2845 *done_index = page->index + 1;
2846
2864 lock_page(page); 2847 lock_page(page);
2865 2848
2866 /* 2849 /*
@@ -2946,6 +2929,8 @@ static int ext4_da_writepages(struct address_space *mapping,
2946 long desired_nr_to_write, nr_to_writebump = 0; 2929 long desired_nr_to_write, nr_to_writebump = 0;
2947 loff_t range_start = wbc->range_start; 2930 loff_t range_start = wbc->range_start;
2948 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); 2931 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2932 pgoff_t done_index = 0;
2933 pgoff_t end;
2949 2934
2950 trace_ext4_da_writepages(inode, wbc); 2935 trace_ext4_da_writepages(inode, wbc);
2951 2936
@@ -2981,8 +2966,11 @@ static int ext4_da_writepages(struct address_space *mapping,
2981 wbc->range_start = index << PAGE_CACHE_SHIFT; 2966 wbc->range_start = index << PAGE_CACHE_SHIFT;
2982 wbc->range_end = LLONG_MAX; 2967 wbc->range_end = LLONG_MAX;
2983 wbc->range_cyclic = 0; 2968 wbc->range_cyclic = 0;
2984 } else 2969 end = -1;
2970 } else {
2985 index = wbc->range_start >> PAGE_CACHE_SHIFT; 2971 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2972 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2973 }
2986 2974
2987 /* 2975 /*
2988 * This works around two forms of stupidity. The first is in 2976 * This works around two forms of stupidity. The first is in
@@ -3001,9 +2989,12 @@ static int ext4_da_writepages(struct address_space *mapping,
3001 * sbi->max_writeback_mb_bump whichever is smaller. 2989 * sbi->max_writeback_mb_bump whichever is smaller.
3002 */ 2990 */
3003 max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT); 2991 max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT);
3004 if (!range_cyclic && range_whole) 2992 if (!range_cyclic && range_whole) {
3005 desired_nr_to_write = wbc->nr_to_write * 8; 2993 if (wbc->nr_to_write == LONG_MAX)
3006 else 2994 desired_nr_to_write = wbc->nr_to_write;
2995 else
2996 desired_nr_to_write = wbc->nr_to_write * 8;
2997 } else
3007 desired_nr_to_write = ext4_num_dirty_pages(inode, index, 2998 desired_nr_to_write = ext4_num_dirty_pages(inode, index,
3008 max_pages); 2999 max_pages);
3009 if (desired_nr_to_write > max_pages) 3000 if (desired_nr_to_write > max_pages)
@@ -3020,6 +3011,9 @@ static int ext4_da_writepages(struct address_space *mapping,
3020 pages_skipped = wbc->pages_skipped; 3011 pages_skipped = wbc->pages_skipped;
3021 3012
3022retry: 3013retry:
3014 if (wbc->sync_mode == WB_SYNC_ALL)
3015 tag_pages_for_writeback(mapping, index, end);
3016
3023 while (!ret && wbc->nr_to_write > 0) { 3017 while (!ret && wbc->nr_to_write > 0) {
3024 3018
3025 /* 3019 /*
@@ -3058,16 +3052,14 @@ retry:
3058 mpd.io_done = 0; 3052 mpd.io_done = 0;
3059 mpd.pages_written = 0; 3053 mpd.pages_written = 0;
3060 mpd.retval = 0; 3054 mpd.retval = 0;
3061 ret = write_cache_pages_da(mapping, wbc, &mpd); 3055 ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index);
3062 /* 3056 /*
3063 * If we have a contiguous extent of pages and we 3057 * If we have a contiguous extent of pages and we
3064 * haven't done the I/O yet, map the blocks and submit 3058 * haven't done the I/O yet, map the blocks and submit
3065 * them for I/O. 3059 * them for I/O.
3066 */ 3060 */
3067 if (!mpd.io_done && mpd.next_page != mpd.first_page) { 3061 if (!mpd.io_done && mpd.next_page != mpd.first_page) {
3068 if (mpage_da_map_blocks(&mpd) == 0) 3062 mpage_da_map_and_submit(&mpd);
3069 mpage_da_submit_io(&mpd);
3070 mpd.io_done = 1;
3071 ret = MPAGE_DA_EXTENT_TAIL; 3063 ret = MPAGE_DA_EXTENT_TAIL;
3072 } 3064 }
3073 trace_ext4_da_write_pages(inode, &mpd); 3065 trace_ext4_da_write_pages(inode, &mpd);
@@ -3114,14 +3106,13 @@ retry:
3114 __func__, wbc->nr_to_write, ret); 3106 __func__, wbc->nr_to_write, ret);
3115 3107
3116 /* Update index */ 3108 /* Update index */
3117 index += pages_written;
3118 wbc->range_cyclic = range_cyclic; 3109 wbc->range_cyclic = range_cyclic;
3119 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) 3110 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
3120 /* 3111 /*
3121 * set the writeback_index so that range_cyclic 3112 * set the writeback_index so that range_cyclic
3122 * mode will write it back later 3113 * mode will write it back later
3123 */ 3114 */
3124 mapping->writeback_index = index; 3115 mapping->writeback_index = done_index;
3125 3116
3126out_writepages: 3117out_writepages:
3127 wbc->nr_to_write -= nr_to_writebump; 3118 wbc->nr_to_write -= nr_to_writebump;
@@ -3456,15 +3447,6 @@ ext4_readpages(struct file *file, struct address_space *mapping,
3456 return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); 3447 return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
3457} 3448}
3458 3449
3459static void ext4_free_io_end(ext4_io_end_t *io)
3460{
3461 BUG_ON(!io);
3462 if (io->page)
3463 put_page(io->page);
3464 iput(io->inode);
3465 kfree(io);
3466}
3467
3468static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset) 3450static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset)
3469{ 3451{
3470 struct buffer_head *head, *bh; 3452 struct buffer_head *head, *bh;
@@ -3641,173 +3623,6 @@ static int ext4_get_block_write(struct inode *inode, sector_t iblock,
3641 EXT4_GET_BLOCKS_IO_CREATE_EXT); 3623 EXT4_GET_BLOCKS_IO_CREATE_EXT);
3642} 3624}
3643 3625
3644static void dump_completed_IO(struct inode * inode)
3645{
3646#ifdef EXT4_DEBUG
3647 struct list_head *cur, *before, *after;
3648 ext4_io_end_t *io, *io0, *io1;
3649 unsigned long flags;
3650
3651 if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
3652 ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
3653 return;
3654 }
3655
3656 ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
3657 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
3658 list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
3659 cur = &io->list;
3660 before = cur->prev;
3661 io0 = container_of(before, ext4_io_end_t, list);
3662 after = cur->next;
3663 io1 = container_of(after, ext4_io_end_t, list);
3664
3665 ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
3666 io, inode->i_ino, io0, io1);
3667 }
3668 spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
3669#endif
3670}
3671
3672/*
3673 * check a range of space and convert unwritten extents to written.
3674 */
3675static int ext4_end_io_nolock(ext4_io_end_t *io)
3676{
3677 struct inode *inode = io->inode;
3678 loff_t offset = io->offset;
3679 ssize_t size = io->size;
3680 int ret = 0;
3681
3682 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
3683 "list->prev 0x%p\n",
3684 io, inode->i_ino, io->list.next, io->list.prev);
3685
3686 if (list_empty(&io->list))
3687 return ret;
3688
3689 if (io->flag != EXT4_IO_UNWRITTEN)
3690 return ret;
3691
3692 ret = ext4_convert_unwritten_extents(inode, offset, size);
3693 if (ret < 0) {
3694 printk(KERN_EMERG "%s: failed to convert unwritten"
3695 "extents to written extents, error is %d"
3696 " io is still on inode %lu aio dio list\n",
3697 __func__, ret, inode->i_ino);
3698 return ret;
3699 }
3700
3701 if (io->iocb)
3702 aio_complete(io->iocb, io->result, 0);
3703 /* clear the DIO AIO unwritten flag */
3704 io->flag = 0;
3705 return ret;
3706}
3707
3708/*
3709 * work on completed aio dio IO, to convert unwritten extents to extents
3710 */
3711static void ext4_end_io_work(struct work_struct *work)
3712{
3713 ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
3714 struct inode *inode = io->inode;
3715 struct ext4_inode_info *ei = EXT4_I(inode);
3716 unsigned long flags;
3717 int ret;
3718
3719 mutex_lock(&inode->i_mutex);
3720 ret = ext4_end_io_nolock(io);
3721 if (ret < 0) {
3722 mutex_unlock(&inode->i_mutex);
3723 return;
3724 }
3725
3726 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
3727 if (!list_empty(&io->list))
3728 list_del_init(&io->list);
3729 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
3730 mutex_unlock(&inode->i_mutex);
3731 ext4_free_io_end(io);
3732}
3733
3734/*
3735 * This function is called from ext4_sync_file().
3736 *
3737 * When IO is completed, the work to convert unwritten extents to
3738 * written is queued on workqueue but may not get immediately
3739 * scheduled. When fsync is called, we need to ensure the
3740 * conversion is complete before fsync returns.
3741 * The inode keeps track of a list of pending/completed IO that
3742 * might needs to do the conversion. This function walks through
3743 * the list and convert the related unwritten extents for completed IO
3744 * to written.
3745 * The function return the number of pending IOs on success.
3746 */
3747int flush_completed_IO(struct inode *inode)
3748{
3749 ext4_io_end_t *io;
3750 struct ext4_inode_info *ei = EXT4_I(inode);
3751 unsigned long flags;
3752 int ret = 0;
3753 int ret2 = 0;
3754
3755 if (list_empty(&ei->i_completed_io_list))
3756 return ret;
3757
3758 dump_completed_IO(inode);
3759 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
3760 while (!list_empty(&ei->i_completed_io_list)){
3761 io = list_entry(ei->i_completed_io_list.next,
3762 ext4_io_end_t, list);
3763 /*
3764 * Calling ext4_end_io_nolock() to convert completed
3765 * IO to written.
3766 *
3767 * When ext4_sync_file() is called, run_queue() may already
3768 * about to flush the work corresponding to this io structure.
3769 * It will be upset if it founds the io structure related
3770 * to the work-to-be schedule is freed.
3771 *
3772 * Thus we need to keep the io structure still valid here after
3773 * convertion finished. The io structure has a flag to
3774 * avoid double converting from both fsync and background work
3775 * queue work.
3776 */
3777 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
3778 ret = ext4_end_io_nolock(io);
3779 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
3780 if (ret < 0)
3781 ret2 = ret;
3782 else
3783 list_del_init(&io->list);
3784 }
3785 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
3786 return (ret2 < 0) ? ret2 : 0;
3787}
3788
3789static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
3790{
3791 ext4_io_end_t *io = NULL;
3792
3793 io = kmalloc(sizeof(*io), flags);
3794
3795 if (io) {
3796 igrab(inode);
3797 io->inode = inode;
3798 io->flag = 0;
3799 io->offset = 0;
3800 io->size = 0;
3801 io->page = NULL;
3802 io->iocb = NULL;
3803 io->result = 0;
3804 INIT_WORK(&io->work, ext4_end_io_work);
3805 INIT_LIST_HEAD(&io->list);
3806 }
3807
3808 return io;
3809}
3810
3811static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, 3626static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3812 ssize_t size, void *private, int ret, 3627 ssize_t size, void *private, int ret,
3813 bool is_async) 3628 bool is_async)
@@ -3827,7 +3642,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3827 size); 3642 size);
3828 3643
3829 /* if not aio dio with unwritten extents, just free io and return */ 3644 /* if not aio dio with unwritten extents, just free io and return */
3830 if (io_end->flag != EXT4_IO_UNWRITTEN){ 3645 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
3831 ext4_free_io_end(io_end); 3646 ext4_free_io_end(io_end);
3832 iocb->private = NULL; 3647 iocb->private = NULL;
3833out: 3648out:
@@ -3844,14 +3659,14 @@ out:
3844 } 3659 }
3845 wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; 3660 wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
3846 3661
3847 /* queue the work to convert unwritten extents to written */
3848 queue_work(wq, &io_end->work);
3849
3850 /* Add the io_end to per-inode completed aio dio list*/ 3662 /* Add the io_end to per-inode completed aio dio list*/
3851 ei = EXT4_I(io_end->inode); 3663 ei = EXT4_I(io_end->inode);
3852 spin_lock_irqsave(&ei->i_completed_io_lock, flags); 3664 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
3853 list_add_tail(&io_end->list, &ei->i_completed_io_list); 3665 list_add_tail(&io_end->list, &ei->i_completed_io_list);
3854 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 3666 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
3667
3668 /* queue the work to convert unwritten extents to written */
3669 queue_work(wq, &io_end->work);
3855 iocb->private = NULL; 3670 iocb->private = NULL;
3856} 3671}
3857 3672
@@ -3872,7 +3687,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
3872 goto out; 3687 goto out;
3873 } 3688 }
3874 3689
3875 io_end->flag = EXT4_IO_UNWRITTEN; 3690 io_end->flag = EXT4_IO_END_UNWRITTEN;
3876 inode = io_end->inode; 3691 inode = io_end->inode;
3877 3692
3878 /* Add the io_end to per-inode completed io list*/ 3693 /* Add the io_end to per-inode completed io list*/
@@ -5463,6 +5278,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5463{ 5278{
5464 struct inode *inode = dentry->d_inode; 5279 struct inode *inode = dentry->d_inode;
5465 int error, rc = 0; 5280 int error, rc = 0;
5281 int orphan = 0;
5466 const unsigned int ia_valid = attr->ia_valid; 5282 const unsigned int ia_valid = attr->ia_valid;
5467 5283
5468 error = inode_change_ok(inode, attr); 5284 error = inode_change_ok(inode, attr);
@@ -5518,8 +5334,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5518 error = PTR_ERR(handle); 5334 error = PTR_ERR(handle);
5519 goto err_out; 5335 goto err_out;
5520 } 5336 }
5521 5337 if (ext4_handle_valid(handle)) {
5522 error = ext4_orphan_add(handle, inode); 5338 error = ext4_orphan_add(handle, inode);
5339 orphan = 1;
5340 }
5523 EXT4_I(inode)->i_disksize = attr->ia_size; 5341 EXT4_I(inode)->i_disksize = attr->ia_size;
5524 rc = ext4_mark_inode_dirty(handle, inode); 5342 rc = ext4_mark_inode_dirty(handle, inode);
5525 if (!error) 5343 if (!error)
@@ -5537,6 +5355,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5537 goto err_out; 5355 goto err_out;
5538 } 5356 }
5539 ext4_orphan_del(handle, inode); 5357 ext4_orphan_del(handle, inode);
5358 orphan = 0;
5540 ext4_journal_stop(handle); 5359 ext4_journal_stop(handle);
5541 goto err_out; 5360 goto err_out;
5542 } 5361 }
@@ -5559,7 +5378,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5559 * If the call to ext4_truncate failed to get a transaction handle at 5378 * If the call to ext4_truncate failed to get a transaction handle at
5560 * all, we need to clean up the in-core orphan list manually. 5379 * all, we need to clean up the in-core orphan list manually.
5561 */ 5380 */
5562 if (inode->i_nlink) 5381 if (orphan && inode->i_nlink)
5563 ext4_orphan_del(NULL, inode); 5382 ext4_orphan_del(NULL, inode);
5564 5383
5565 if (!rc && (ia_valid & ATTR_MODE)) 5384 if (!rc && (ia_valid & ATTR_MODE))
@@ -5642,7 +5461,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
5642 * 5461 *
5643 * Also account for superblock, inode, quota and xattr blocks 5462 * Also account for superblock, inode, quota and xattr blocks
5644 */ 5463 */
5645int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) 5464static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
5646{ 5465{
5647 ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); 5466 ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
5648 int gdpblocks; 5467 int gdpblocks;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 42f77b1dc72d..c58eba34724a 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -338,6 +338,14 @@
338static struct kmem_cache *ext4_pspace_cachep; 338static struct kmem_cache *ext4_pspace_cachep;
339static struct kmem_cache *ext4_ac_cachep; 339static struct kmem_cache *ext4_ac_cachep;
340static struct kmem_cache *ext4_free_ext_cachep; 340static struct kmem_cache *ext4_free_ext_cachep;
341
342/* We create slab caches for groupinfo data structures based on the
343 * superblock block size. There will be one per mounted filesystem for
344 * each unique s_blocksize_bits */
345#define NR_GRPINFO_CACHES \
346 (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE + 1)
347static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
348
341static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, 349static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
342 ext4_group_t group); 350 ext4_group_t group);
343static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, 351static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
@@ -939,6 +947,85 @@ out:
939} 947}
940 948
941/* 949/*
950 * lock the group_info alloc_sem of all the groups
951 * belonging to the same buddy cache page. This
952 * make sure other parallel operation on the buddy
953 * cache doesn't happen whild holding the buddy cache
954 * lock
955 */
956static int ext4_mb_get_buddy_cache_lock(struct super_block *sb,
957 ext4_group_t group)
958{
959 int i;
960 int block, pnum;
961 int blocks_per_page;
962 int groups_per_page;
963 ext4_group_t ngroups = ext4_get_groups_count(sb);
964 ext4_group_t first_group;
965 struct ext4_group_info *grp;
966
967 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
968 /*
969 * the buddy cache inode stores the block bitmap
970 * and buddy information in consecutive blocks.
971 * So for each group we need two blocks.
972 */
973 block = group * 2;
974 pnum = block / blocks_per_page;
975 first_group = pnum * blocks_per_page / 2;
976
977 groups_per_page = blocks_per_page >> 1;
978 if (groups_per_page == 0)
979 groups_per_page = 1;
980 /* read all groups the page covers into the cache */
981 for (i = 0; i < groups_per_page; i++) {
982
983 if ((first_group + i) >= ngroups)
984 break;
985 grp = ext4_get_group_info(sb, first_group + i);
986 /* take all groups write allocation
987 * semaphore. This make sure there is
988 * no block allocation going on in any
989 * of that groups
990 */
991 down_write_nested(&grp->alloc_sem, i);
992 }
993 return i;
994}
995
996static void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
997 ext4_group_t group, int locked_group)
998{
999 int i;
1000 int block, pnum;
1001 int blocks_per_page;
1002 ext4_group_t first_group;
1003 struct ext4_group_info *grp;
1004
1005 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1006 /*
1007 * the buddy cache inode stores the block bitmap
1008 * and buddy information in consecutive blocks.
1009 * So for each group we need two blocks.
1010 */
1011 block = group * 2;
1012 pnum = block / blocks_per_page;
1013 first_group = pnum * blocks_per_page / 2;
1014 /* release locks on all the groups */
1015 for (i = 0; i < locked_group; i++) {
1016
1017 grp = ext4_get_group_info(sb, first_group + i);
1018 /* take all groups write allocation
1019 * semaphore. This make sure there is
1020 * no block allocation going on in any
1021 * of that groups
1022 */
1023 up_write(&grp->alloc_sem);
1024 }
1025
1026}
1027
1028/*
942 * Locking note: This routine calls ext4_mb_init_cache(), which takes the 1029 * Locking note: This routine calls ext4_mb_init_cache(), which takes the
943 * block group lock of all groups for this page; do not hold the BG lock when 1030 * block group lock of all groups for this page; do not hold the BG lock when
944 * calling this routine! 1031 * calling this routine!
@@ -1915,84 +2002,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1915 return 0; 2002 return 0;
1916} 2003}
1917 2004
1918/*
1919 * lock the group_info alloc_sem of all the groups
1920 * belonging to the same buddy cache page. This
1921 * make sure other parallel operation on the buddy
1922 * cache doesn't happen whild holding the buddy cache
1923 * lock
1924 */
1925int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
1926{
1927 int i;
1928 int block, pnum;
1929 int blocks_per_page;
1930 int groups_per_page;
1931 ext4_group_t ngroups = ext4_get_groups_count(sb);
1932 ext4_group_t first_group;
1933 struct ext4_group_info *grp;
1934
1935 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1936 /*
1937 * the buddy cache inode stores the block bitmap
1938 * and buddy information in consecutive blocks.
1939 * So for each group we need two blocks.
1940 */
1941 block = group * 2;
1942 pnum = block / blocks_per_page;
1943 first_group = pnum * blocks_per_page / 2;
1944
1945 groups_per_page = blocks_per_page >> 1;
1946 if (groups_per_page == 0)
1947 groups_per_page = 1;
1948 /* read all groups the page covers into the cache */
1949 for (i = 0; i < groups_per_page; i++) {
1950
1951 if ((first_group + i) >= ngroups)
1952 break;
1953 grp = ext4_get_group_info(sb, first_group + i);
1954 /* take all groups write allocation
1955 * semaphore. This make sure there is
1956 * no block allocation going on in any
1957 * of that groups
1958 */
1959 down_write_nested(&grp->alloc_sem, i);
1960 }
1961 return i;
1962}
1963
1964void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
1965 ext4_group_t group, int locked_group)
1966{
1967 int i;
1968 int block, pnum;
1969 int blocks_per_page;
1970 ext4_group_t first_group;
1971 struct ext4_group_info *grp;
1972
1973 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1974 /*
1975 * the buddy cache inode stores the block bitmap
1976 * and buddy information in consecutive blocks.
1977 * So for each group we need two blocks.
1978 */
1979 block = group * 2;
1980 pnum = block / blocks_per_page;
1981 first_group = pnum * blocks_per_page / 2;
1982 /* release locks on all the groups */
1983 for (i = 0; i < locked_group; i++) {
1984
1985 grp = ext4_get_group_info(sb, first_group + i);
1986 /* take all groups write allocation
1987 * semaphore. This make sure there is
1988 * no block allocation going on in any
1989 * of that groups
1990 */
1991 up_write(&grp->alloc_sem);
1992 }
1993
1994}
1995
1996static noinline_for_stack int 2005static noinline_for_stack int
1997ext4_mb_regular_allocator(struct ext4_allocation_context *ac) 2006ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1998{ 2007{
@@ -2233,15 +2242,24 @@ static const struct file_operations ext4_mb_seq_groups_fops = {
2233 .release = seq_release, 2242 .release = seq_release,
2234}; 2243};
2235 2244
2245static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
2246{
2247 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2248 struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
2249
2250 BUG_ON(!cachep);
2251 return cachep;
2252}
2236 2253
2237/* Create and initialize ext4_group_info data for the given group. */ 2254/* Create and initialize ext4_group_info data for the given group. */
2238int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, 2255int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2239 struct ext4_group_desc *desc) 2256 struct ext4_group_desc *desc)
2240{ 2257{
2241 int i, len; 2258 int i;
2242 int metalen = 0; 2259 int metalen = 0;
2243 struct ext4_sb_info *sbi = EXT4_SB(sb); 2260 struct ext4_sb_info *sbi = EXT4_SB(sb);
2244 struct ext4_group_info **meta_group_info; 2261 struct ext4_group_info **meta_group_info;
2262 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2245 2263
2246 /* 2264 /*
2247 * First check if this group is the first of a reserved block. 2265 * First check if this group is the first of a reserved block.
@@ -2261,22 +2279,16 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2261 meta_group_info; 2279 meta_group_info;
2262 } 2280 }
2263 2281
2264 /*
2265 * calculate needed size. if change bb_counters size,
2266 * don't forget about ext4_mb_generate_buddy()
2267 */
2268 len = offsetof(typeof(**meta_group_info),
2269 bb_counters[sb->s_blocksize_bits + 2]);
2270
2271 meta_group_info = 2282 meta_group_info =
2272 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; 2283 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
2273 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); 2284 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
2274 2285
2275 meta_group_info[i] = kzalloc(len, GFP_KERNEL); 2286 meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL);
2276 if (meta_group_info[i] == NULL) { 2287 if (meta_group_info[i] == NULL) {
2277 printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); 2288 printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
2278 goto exit_group_info; 2289 goto exit_group_info;
2279 } 2290 }
2291 memset(meta_group_info[i], 0, kmem_cache_size(cachep));
2280 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, 2292 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
2281 &(meta_group_info[i]->bb_state)); 2293 &(meta_group_info[i]->bb_state));
2282 2294
@@ -2331,6 +2343,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
2331 int num_meta_group_infos_max; 2343 int num_meta_group_infos_max;
2332 int array_size; 2344 int array_size;
2333 struct ext4_group_desc *desc; 2345 struct ext4_group_desc *desc;
2346 struct kmem_cache *cachep;
2334 2347
2335 /* This is the number of blocks used by GDT */ 2348 /* This is the number of blocks used by GDT */
2336 num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 2349 num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
@@ -2389,8 +2402,9 @@ static int ext4_mb_init_backend(struct super_block *sb)
2389 return 0; 2402 return 0;
2390 2403
2391err_freebuddy: 2404err_freebuddy:
2405 cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2392 while (i-- > 0) 2406 while (i-- > 0)
2393 kfree(ext4_get_group_info(sb, i)); 2407 kmem_cache_free(cachep, ext4_get_group_info(sb, i));
2394 i = num_meta_group_infos; 2408 i = num_meta_group_infos;
2395 while (i-- > 0) 2409 while (i-- > 0)
2396 kfree(sbi->s_group_info[i]); 2410 kfree(sbi->s_group_info[i]);
@@ -2407,19 +2421,48 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2407 unsigned offset; 2421 unsigned offset;
2408 unsigned max; 2422 unsigned max;
2409 int ret; 2423 int ret;
2424 int cache_index;
2425 struct kmem_cache *cachep;
2426 char *namep = NULL;
2410 2427
2411 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets); 2428 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
2412 2429
2413 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); 2430 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
2414 if (sbi->s_mb_offsets == NULL) { 2431 if (sbi->s_mb_offsets == NULL) {
2415 return -ENOMEM; 2432 ret = -ENOMEM;
2433 goto out;
2416 } 2434 }
2417 2435
2418 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs); 2436 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
2419 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); 2437 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
2420 if (sbi->s_mb_maxs == NULL) { 2438 if (sbi->s_mb_maxs == NULL) {
2421 kfree(sbi->s_mb_offsets); 2439 ret = -ENOMEM;
2422 return -ENOMEM; 2440 goto out;
2441 }
2442
2443 cache_index = sb->s_blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2444 cachep = ext4_groupinfo_caches[cache_index];
2445 if (!cachep) {
2446 char name[32];
2447 int len = offsetof(struct ext4_group_info,
2448 bb_counters[sb->s_blocksize_bits + 2]);
2449
2450 sprintf(name, "ext4_groupinfo_%d", sb->s_blocksize_bits);
2451 namep = kstrdup(name, GFP_KERNEL);
2452 if (!namep) {
2453 ret = -ENOMEM;
2454 goto out;
2455 }
2456
2457 /* Need to free the kmem_cache_name() when we
2458 * destroy the slab */
2459 cachep = kmem_cache_create(namep, len, 0,
2460 SLAB_RECLAIM_ACCOUNT, NULL);
2461 if (!cachep) {
2462 ret = -ENOMEM;
2463 goto out;
2464 }
2465 ext4_groupinfo_caches[cache_index] = cachep;
2423 } 2466 }
2424 2467
2425 /* order 0 is regular bitmap */ 2468 /* order 0 is regular bitmap */
@@ -2440,9 +2483,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2440 /* init file for buddy data */ 2483 /* init file for buddy data */
2441 ret = ext4_mb_init_backend(sb); 2484 ret = ext4_mb_init_backend(sb);
2442 if (ret != 0) { 2485 if (ret != 0) {
2443 kfree(sbi->s_mb_offsets); 2486 goto out;
2444 kfree(sbi->s_mb_maxs);
2445 return ret;
2446 } 2487 }
2447 2488
2448 spin_lock_init(&sbi->s_md_lock); 2489 spin_lock_init(&sbi->s_md_lock);
@@ -2457,9 +2498,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2457 2498
2458 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); 2499 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
2459 if (sbi->s_locality_groups == NULL) { 2500 if (sbi->s_locality_groups == NULL) {
2460 kfree(sbi->s_mb_offsets); 2501 ret = -ENOMEM;
2461 kfree(sbi->s_mb_maxs); 2502 goto out;
2462 return -ENOMEM;
2463 } 2503 }
2464 for_each_possible_cpu(i) { 2504 for_each_possible_cpu(i) {
2465 struct ext4_locality_group *lg; 2505 struct ext4_locality_group *lg;
@@ -2476,7 +2516,13 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2476 2516
2477 if (sbi->s_journal) 2517 if (sbi->s_journal)
2478 sbi->s_journal->j_commit_callback = release_blocks_on_commit; 2518 sbi->s_journal->j_commit_callback = release_blocks_on_commit;
2479 return 0; 2519out:
2520 if (ret) {
2521 kfree(sbi->s_mb_offsets);
2522 kfree(sbi->s_mb_maxs);
2523 kfree(namep);
2524 }
2525 return ret;
2480} 2526}
2481 2527
2482/* need to called with the ext4 group lock held */ 2528/* need to called with the ext4 group lock held */
@@ -2504,6 +2550,7 @@ int ext4_mb_release(struct super_block *sb)
2504 int num_meta_group_infos; 2550 int num_meta_group_infos;
2505 struct ext4_group_info *grinfo; 2551 struct ext4_group_info *grinfo;
2506 struct ext4_sb_info *sbi = EXT4_SB(sb); 2552 struct ext4_sb_info *sbi = EXT4_SB(sb);
2553 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2507 2554
2508 if (sbi->s_group_info) { 2555 if (sbi->s_group_info) {
2509 for (i = 0; i < ngroups; i++) { 2556 for (i = 0; i < ngroups; i++) {
@@ -2514,7 +2561,7 @@ int ext4_mb_release(struct super_block *sb)
2514 ext4_lock_group(sb, i); 2561 ext4_lock_group(sb, i);
2515 ext4_mb_cleanup_pa(grinfo); 2562 ext4_mb_cleanup_pa(grinfo);
2516 ext4_unlock_group(sb, i); 2563 ext4_unlock_group(sb, i);
2517 kfree(grinfo); 2564 kmem_cache_free(cachep, grinfo);
2518 } 2565 }
2519 num_meta_group_infos = (ngroups + 2566 num_meta_group_infos = (ngroups +
2520 EXT4_DESC_PER_BLOCK(sb) - 1) >> 2567 EXT4_DESC_PER_BLOCK(sb) - 1) >>
@@ -2558,7 +2605,7 @@ int ext4_mb_release(struct super_block *sb)
2558 return 0; 2605 return 0;
2559} 2606}
2560 2607
2561static inline void ext4_issue_discard(struct super_block *sb, 2608static inline int ext4_issue_discard(struct super_block *sb,
2562 ext4_group_t block_group, ext4_grpblk_t block, int count) 2609 ext4_group_t block_group, ext4_grpblk_t block, int count)
2563{ 2610{
2564 int ret; 2611 int ret;
@@ -2568,10 +2615,11 @@ static inline void ext4_issue_discard(struct super_block *sb,
2568 trace_ext4_discard_blocks(sb, 2615 trace_ext4_discard_blocks(sb,
2569 (unsigned long long) discard_block, count); 2616 (unsigned long long) discard_block, count);
2570 ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); 2617 ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
2571 if (ret == EOPNOTSUPP) { 2618 if (ret == -EOPNOTSUPP) {
2572 ext4_warning(sb, "discard not supported, disabling"); 2619 ext4_warning(sb, "discard not supported, disabling");
2573 clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD); 2620 clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
2574 } 2621 }
2622 return ret;
2575} 2623}
2576 2624
2577/* 2625/*
@@ -2659,28 +2707,22 @@ static void ext4_remove_debugfs_entry(void)
2659 2707
2660#endif 2708#endif
2661 2709
2662int __init init_ext4_mballoc(void) 2710int __init ext4_init_mballoc(void)
2663{ 2711{
2664 ext4_pspace_cachep = 2712 ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
2665 kmem_cache_create("ext4_prealloc_space", 2713 SLAB_RECLAIM_ACCOUNT);
2666 sizeof(struct ext4_prealloc_space),
2667 0, SLAB_RECLAIM_ACCOUNT, NULL);
2668 if (ext4_pspace_cachep == NULL) 2714 if (ext4_pspace_cachep == NULL)
2669 return -ENOMEM; 2715 return -ENOMEM;
2670 2716
2671 ext4_ac_cachep = 2717 ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
2672 kmem_cache_create("ext4_alloc_context", 2718 SLAB_RECLAIM_ACCOUNT);
2673 sizeof(struct ext4_allocation_context),
2674 0, SLAB_RECLAIM_ACCOUNT, NULL);
2675 if (ext4_ac_cachep == NULL) { 2719 if (ext4_ac_cachep == NULL) {
2676 kmem_cache_destroy(ext4_pspace_cachep); 2720 kmem_cache_destroy(ext4_pspace_cachep);
2677 return -ENOMEM; 2721 return -ENOMEM;
2678 } 2722 }
2679 2723
2680 ext4_free_ext_cachep = 2724 ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data,
2681 kmem_cache_create("ext4_free_block_extents", 2725 SLAB_RECLAIM_ACCOUNT);
2682 sizeof(struct ext4_free_data),
2683 0, SLAB_RECLAIM_ACCOUNT, NULL);
2684 if (ext4_free_ext_cachep == NULL) { 2726 if (ext4_free_ext_cachep == NULL) {
2685 kmem_cache_destroy(ext4_pspace_cachep); 2727 kmem_cache_destroy(ext4_pspace_cachep);
2686 kmem_cache_destroy(ext4_ac_cachep); 2728 kmem_cache_destroy(ext4_ac_cachep);
@@ -2690,8 +2732,9 @@ int __init init_ext4_mballoc(void)
2690 return 0; 2732 return 0;
2691} 2733}
2692 2734
2693void exit_ext4_mballoc(void) 2735void ext4_exit_mballoc(void)
2694{ 2736{
2737 int i;
2695 /* 2738 /*
2696 * Wait for completion of call_rcu()'s on ext4_pspace_cachep 2739 * Wait for completion of call_rcu()'s on ext4_pspace_cachep
2697 * before destroying the slab cache. 2740 * before destroying the slab cache.
@@ -2700,6 +2743,15 @@ void exit_ext4_mballoc(void)
2700 kmem_cache_destroy(ext4_pspace_cachep); 2743 kmem_cache_destroy(ext4_pspace_cachep);
2701 kmem_cache_destroy(ext4_ac_cachep); 2744 kmem_cache_destroy(ext4_ac_cachep);
2702 kmem_cache_destroy(ext4_free_ext_cachep); 2745 kmem_cache_destroy(ext4_free_ext_cachep);
2746
2747 for (i = 0; i < NR_GRPINFO_CACHES; i++) {
2748 struct kmem_cache *cachep = ext4_groupinfo_caches[i];
2749 if (cachep) {
2750 char *name = (char *)kmem_cache_name(cachep);
2751 kmem_cache_destroy(cachep);
2752 kfree(name);
2753 }
2754 }
2703 ext4_remove_debugfs_entry(); 2755 ext4_remove_debugfs_entry();
2704} 2756}
2705 2757
@@ -3536,8 +3588,7 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
3536 */ 3588 */
3537static noinline_for_stack int 3589static noinline_for_stack int
3538ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, 3590ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3539 struct ext4_prealloc_space *pa, 3591 struct ext4_prealloc_space *pa)
3540 struct ext4_allocation_context *ac)
3541{ 3592{
3542 struct super_block *sb = e4b->bd_sb; 3593 struct super_block *sb = e4b->bd_sb;
3543 struct ext4_sb_info *sbi = EXT4_SB(sb); 3594 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -3555,11 +3606,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3555 BUG_ON(group != e4b->bd_group && pa->pa_len != 0); 3606 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3556 end = bit + pa->pa_len; 3607 end = bit + pa->pa_len;
3557 3608
3558 if (ac) {
3559 ac->ac_sb = sb;
3560 ac->ac_inode = pa->pa_inode;
3561 }
3562
3563 while (bit < end) { 3609 while (bit < end) {
3564 bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit); 3610 bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
3565 if (bit >= end) 3611 if (bit >= end)
@@ -3570,16 +3616,9 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3570 (unsigned) next - bit, (unsigned) group); 3616 (unsigned) next - bit, (unsigned) group);
3571 free += next - bit; 3617 free += next - bit;
3572 3618
3573 if (ac) { 3619 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
3574 ac->ac_b_ex.fe_group = group; 3620 trace_ext4_mb_release_inode_pa(sb, pa->pa_inode, pa,
3575 ac->ac_b_ex.fe_start = bit; 3621 grp_blk_start + bit, next - bit);
3576 ac->ac_b_ex.fe_len = next - bit;
3577 ac->ac_b_ex.fe_logical = 0;
3578 trace_ext4_mballoc_discard(ac);
3579 }
3580
3581 trace_ext4_mb_release_inode_pa(sb, ac, pa, grp_blk_start + bit,
3582 next - bit);
3583 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); 3622 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
3584 bit = next + 1; 3623 bit = next + 1;
3585 } 3624 }
@@ -3602,29 +3641,19 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3602 3641
3603static noinline_for_stack int 3642static noinline_for_stack int
3604ext4_mb_release_group_pa(struct ext4_buddy *e4b, 3643ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3605 struct ext4_prealloc_space *pa, 3644 struct ext4_prealloc_space *pa)
3606 struct ext4_allocation_context *ac)
3607{ 3645{
3608 struct super_block *sb = e4b->bd_sb; 3646 struct super_block *sb = e4b->bd_sb;
3609 ext4_group_t group; 3647 ext4_group_t group;
3610 ext4_grpblk_t bit; 3648 ext4_grpblk_t bit;
3611 3649
3612 trace_ext4_mb_release_group_pa(sb, ac, pa); 3650 trace_ext4_mb_release_group_pa(sb, pa);
3613 BUG_ON(pa->pa_deleted == 0); 3651 BUG_ON(pa->pa_deleted == 0);
3614 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); 3652 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3615 BUG_ON(group != e4b->bd_group && pa->pa_len != 0); 3653 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3616 mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); 3654 mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
3617 atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); 3655 atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
3618 3656 trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
3619 if (ac) {
3620 ac->ac_sb = sb;
3621 ac->ac_inode = NULL;
3622 ac->ac_b_ex.fe_group = group;
3623 ac->ac_b_ex.fe_start = bit;
3624 ac->ac_b_ex.fe_len = pa->pa_len;
3625 ac->ac_b_ex.fe_logical = 0;
3626 trace_ext4_mballoc_discard(ac);
3627 }
3628 3657
3629 return 0; 3658 return 0;
3630} 3659}
@@ -3645,7 +3674,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
3645 struct ext4_group_info *grp = ext4_get_group_info(sb, group); 3674 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3646 struct buffer_head *bitmap_bh = NULL; 3675 struct buffer_head *bitmap_bh = NULL;
3647 struct ext4_prealloc_space *pa, *tmp; 3676 struct ext4_prealloc_space *pa, *tmp;
3648 struct ext4_allocation_context *ac;
3649 struct list_head list; 3677 struct list_head list;
3650 struct ext4_buddy e4b; 3678 struct ext4_buddy e4b;
3651 int err; 3679 int err;
@@ -3674,9 +3702,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
3674 needed = EXT4_BLOCKS_PER_GROUP(sb) + 1; 3702 needed = EXT4_BLOCKS_PER_GROUP(sb) + 1;
3675 3703
3676 INIT_LIST_HEAD(&list); 3704 INIT_LIST_HEAD(&list);
3677 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3678 if (ac)
3679 ac->ac_sb = sb;
3680repeat: 3705repeat:
3681 ext4_lock_group(sb, group); 3706 ext4_lock_group(sb, group);
3682 list_for_each_entry_safe(pa, tmp, 3707 list_for_each_entry_safe(pa, tmp,
@@ -3731,9 +3756,9 @@ repeat:
3731 spin_unlock(pa->pa_obj_lock); 3756 spin_unlock(pa->pa_obj_lock);
3732 3757
3733 if (pa->pa_type == MB_GROUP_PA) 3758 if (pa->pa_type == MB_GROUP_PA)
3734 ext4_mb_release_group_pa(&e4b, pa, ac); 3759 ext4_mb_release_group_pa(&e4b, pa);
3735 else 3760 else
3736 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); 3761 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
3737 3762
3738 list_del(&pa->u.pa_tmp_list); 3763 list_del(&pa->u.pa_tmp_list);
3739 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); 3764 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
@@ -3741,8 +3766,6 @@ repeat:
3741 3766
3742out: 3767out:
3743 ext4_unlock_group(sb, group); 3768 ext4_unlock_group(sb, group);
3744 if (ac)
3745 kmem_cache_free(ext4_ac_cachep, ac);
3746 ext4_mb_unload_buddy(&e4b); 3769 ext4_mb_unload_buddy(&e4b);
3747 put_bh(bitmap_bh); 3770 put_bh(bitmap_bh);
3748 return free; 3771 return free;
@@ -3763,7 +3786,6 @@ void ext4_discard_preallocations(struct inode *inode)
3763 struct super_block *sb = inode->i_sb; 3786 struct super_block *sb = inode->i_sb;
3764 struct buffer_head *bitmap_bh = NULL; 3787 struct buffer_head *bitmap_bh = NULL;
3765 struct ext4_prealloc_space *pa, *tmp; 3788 struct ext4_prealloc_space *pa, *tmp;
3766 struct ext4_allocation_context *ac;
3767 ext4_group_t group = 0; 3789 ext4_group_t group = 0;
3768 struct list_head list; 3790 struct list_head list;
3769 struct ext4_buddy e4b; 3791 struct ext4_buddy e4b;
@@ -3779,11 +3801,6 @@ void ext4_discard_preallocations(struct inode *inode)
3779 3801
3780 INIT_LIST_HEAD(&list); 3802 INIT_LIST_HEAD(&list);
3781 3803
3782 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3783 if (ac) {
3784 ac->ac_sb = sb;
3785 ac->ac_inode = inode;
3786 }
3787repeat: 3804repeat:
3788 /* first, collect all pa's in the inode */ 3805 /* first, collect all pa's in the inode */
3789 spin_lock(&ei->i_prealloc_lock); 3806 spin_lock(&ei->i_prealloc_lock);
@@ -3853,7 +3870,7 @@ repeat:
3853 3870
3854 ext4_lock_group(sb, group); 3871 ext4_lock_group(sb, group);
3855 list_del(&pa->pa_group_list); 3872 list_del(&pa->pa_group_list);
3856 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); 3873 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
3857 ext4_unlock_group(sb, group); 3874 ext4_unlock_group(sb, group);
3858 3875
3859 ext4_mb_unload_buddy(&e4b); 3876 ext4_mb_unload_buddy(&e4b);
@@ -3862,8 +3879,6 @@ repeat:
3862 list_del(&pa->u.pa_tmp_list); 3879 list_del(&pa->u.pa_tmp_list);
3863 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); 3880 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3864 } 3881 }
3865 if (ac)
3866 kmem_cache_free(ext4_ac_cachep, ac);
3867} 3882}
3868 3883
3869/* 3884/*
@@ -4061,14 +4076,10 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
4061 struct ext4_buddy e4b; 4076 struct ext4_buddy e4b;
4062 struct list_head discard_list; 4077 struct list_head discard_list;
4063 struct ext4_prealloc_space *pa, *tmp; 4078 struct ext4_prealloc_space *pa, *tmp;
4064 struct ext4_allocation_context *ac;
4065 4079
4066 mb_debug(1, "discard locality group preallocation\n"); 4080 mb_debug(1, "discard locality group preallocation\n");
4067 4081
4068 INIT_LIST_HEAD(&discard_list); 4082 INIT_LIST_HEAD(&discard_list);
4069 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4070 if (ac)
4071 ac->ac_sb = sb;
4072 4083
4073 spin_lock(&lg->lg_prealloc_lock); 4084 spin_lock(&lg->lg_prealloc_lock);
4074 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order], 4085 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
@@ -4120,15 +4131,13 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
4120 } 4131 }
4121 ext4_lock_group(sb, group); 4132 ext4_lock_group(sb, group);
4122 list_del(&pa->pa_group_list); 4133 list_del(&pa->pa_group_list);
4123 ext4_mb_release_group_pa(&e4b, pa, ac); 4134 ext4_mb_release_group_pa(&e4b, pa);
4124 ext4_unlock_group(sb, group); 4135 ext4_unlock_group(sb, group);
4125 4136
4126 ext4_mb_unload_buddy(&e4b); 4137 ext4_mb_unload_buddy(&e4b);
4127 list_del(&pa->u.pa_tmp_list); 4138 list_del(&pa->u.pa_tmp_list);
4128 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); 4139 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4129 } 4140 }
4130 if (ac)
4131 kmem_cache_free(ext4_ac_cachep, ac);
4132} 4141}
4133 4142
4134/* 4143/*
@@ -4492,7 +4501,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4492{ 4501{
4493 struct buffer_head *bitmap_bh = NULL; 4502 struct buffer_head *bitmap_bh = NULL;
4494 struct super_block *sb = inode->i_sb; 4503 struct super_block *sb = inode->i_sb;
4495 struct ext4_allocation_context *ac = NULL;
4496 struct ext4_group_desc *gdp; 4504 struct ext4_group_desc *gdp;
4497 unsigned long freed = 0; 4505 unsigned long freed = 0;
4498 unsigned int overflow; 4506 unsigned int overflow;
@@ -4532,6 +4540,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4532 if (!bh) 4540 if (!bh)
4533 tbh = sb_find_get_block(inode->i_sb, 4541 tbh = sb_find_get_block(inode->i_sb,
4534 block + i); 4542 block + i);
4543 if (unlikely(!tbh))
4544 continue;
4535 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, 4545 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
4536 inode, tbh, block + i); 4546 inode, tbh, block + i);
4537 } 4547 }
@@ -4547,12 +4557,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4547 if (!ext4_should_writeback_data(inode)) 4557 if (!ext4_should_writeback_data(inode))
4548 flags |= EXT4_FREE_BLOCKS_METADATA; 4558 flags |= EXT4_FREE_BLOCKS_METADATA;
4549 4559
4550 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4551 if (ac) {
4552 ac->ac_inode = inode;
4553 ac->ac_sb = sb;
4554 }
4555
4556do_more: 4560do_more:
4557 overflow = 0; 4561 overflow = 0;
4558 ext4_get_group_no_and_offset(sb, block, &block_group, &bit); 4562 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
@@ -4610,12 +4614,7 @@ do_more:
4610 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); 4614 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
4611 } 4615 }
4612#endif 4616#endif
4613 if (ac) { 4617 trace_ext4_mballoc_free(sb, inode, block_group, bit, count);
4614 ac->ac_b_ex.fe_group = block_group;
4615 ac->ac_b_ex.fe_start = bit;
4616 ac->ac_b_ex.fe_len = count;
4617 trace_ext4_mballoc_free(ac);
4618 }
4619 4618
4620 err = ext4_mb_load_buddy(sb, block_group, &e4b); 4619 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4621 if (err) 4620 if (err)
@@ -4641,12 +4640,12 @@ do_more:
4641 * with group lock held. generate_buddy look at 4640 * with group lock held. generate_buddy look at
4642 * them with group lock_held 4641 * them with group lock_held
4643 */ 4642 */
4643 if (test_opt(sb, DISCARD))
4644 ext4_issue_discard(sb, block_group, bit, count);
4644 ext4_lock_group(sb, block_group); 4645 ext4_lock_group(sb, block_group);
4645 mb_clear_bits(bitmap_bh->b_data, bit, count); 4646 mb_clear_bits(bitmap_bh->b_data, bit, count);
4646 mb_free_blocks(inode, &e4b, bit, count); 4647 mb_free_blocks(inode, &e4b, bit, count);
4647 ext4_mb_return_to_preallocation(inode, &e4b, block, count); 4648 ext4_mb_return_to_preallocation(inode, &e4b, block, count);
4648 if (test_opt(sb, DISCARD))
4649 ext4_issue_discard(sb, block_group, bit, count);
4650 } 4649 }
4651 4650
4652 ret = ext4_free_blks_count(sb, gdp) + count; 4651 ret = ext4_free_blks_count(sb, gdp) + count;
@@ -4686,7 +4685,190 @@ error_return:
4686 dquot_free_block(inode, freed); 4685 dquot_free_block(inode, freed);
4687 brelse(bitmap_bh); 4686 brelse(bitmap_bh);
4688 ext4_std_error(sb, err); 4687 ext4_std_error(sb, err);
4689 if (ac)
4690 kmem_cache_free(ext4_ac_cachep, ac);
4691 return; 4688 return;
4692} 4689}
4690
4691/**
4692 * ext4_trim_extent -- function to TRIM one single free extent in the group
4693 * @sb: super block for the file system
4694 * @start: starting block of the free extent in the alloc. group
4695 * @count: number of blocks to TRIM
4696 * @group: alloc. group we are working with
4697 * @e4b: ext4 buddy for the group
4698 *
4699 * Trim "count" blocks starting at "start" in the "group". To assure that no
4700 * one will allocate those blocks, mark it as used in buddy bitmap. This must
4701 * be called with under the group lock.
4702 */
4703static int ext4_trim_extent(struct super_block *sb, int start, int count,
4704 ext4_group_t group, struct ext4_buddy *e4b)
4705{
4706 struct ext4_free_extent ex;
4707 int ret = 0;
4708
4709 assert_spin_locked(ext4_group_lock_ptr(sb, group));
4710
4711 ex.fe_start = start;
4712 ex.fe_group = group;
4713 ex.fe_len = count;
4714
4715 /*
4716 * Mark blocks used, so no one can reuse them while
4717 * being trimmed.
4718 */
4719 mb_mark_used(e4b, &ex);
4720 ext4_unlock_group(sb, group);
4721
4722 ret = ext4_issue_discard(sb, group, start, count);
4723 if (ret)
4724 ext4_std_error(sb, ret);
4725
4726 ext4_lock_group(sb, group);
4727 mb_free_blocks(NULL, e4b, start, ex.fe_len);
4728 return ret;
4729}
4730
4731/**
4732 * ext4_trim_all_free -- function to trim all free space in alloc. group
4733 * @sb: super block for file system
4734 * @e4b: ext4 buddy
4735 * @start: first group block to examine
4736 * @max: last group block to examine
4737 * @minblocks: minimum extent block count
4738 *
4739 * ext4_trim_all_free walks through group's buddy bitmap searching for free
4740 * extents. When the free block is found, ext4_trim_extent is called to TRIM
4741 * the extent.
4742 *
4743 *
4744 * ext4_trim_all_free walks through group's block bitmap searching for free
4745 * extents. When the free extent is found, mark it as used in group buddy
4746 * bitmap. Then issue a TRIM command on this extent and free the extent in
4747 * the group buddy bitmap. This is done until whole group is scanned.
4748 */
4749ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b,
4750 ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks)
4751{
4752 void *bitmap;
4753 ext4_grpblk_t next, count = 0;
4754 ext4_group_t group;
4755 int ret = 0;
4756
4757 BUG_ON(e4b == NULL);
4758
4759 bitmap = e4b->bd_bitmap;
4760 group = e4b->bd_group;
4761 start = (e4b->bd_info->bb_first_free > start) ?
4762 e4b->bd_info->bb_first_free : start;
4763 ext4_lock_group(sb, group);
4764
4765 while (start < max) {
4766 start = mb_find_next_zero_bit(bitmap, max, start);
4767 if (start >= max)
4768 break;
4769 next = mb_find_next_bit(bitmap, max, start);
4770
4771 if ((next - start) >= minblocks) {
4772 ret = ext4_trim_extent(sb, start,
4773 next - start, group, e4b);
4774 if (ret < 0)
4775 break;
4776 count += next - start;
4777 }
4778 start = next + 1;
4779
4780 if (fatal_signal_pending(current)) {
4781 count = -ERESTARTSYS;
4782 break;
4783 }
4784
4785 if (need_resched()) {
4786 ext4_unlock_group(sb, group);
4787 cond_resched();
4788 ext4_lock_group(sb, group);
4789 }
4790
4791 if ((e4b->bd_info->bb_free - count) < minblocks)
4792 break;
4793 }
4794 ext4_unlock_group(sb, group);
4795
4796 ext4_debug("trimmed %d blocks in the group %d\n",
4797 count, group);
4798
4799 if (ret < 0)
4800 count = ret;
4801
4802 return count;
4803}
4804
4805/**
4806 * ext4_trim_fs() -- trim ioctl handle function
4807 * @sb: superblock for filesystem
4808 * @range: fstrim_range structure
4809 *
4810 * start: First Byte to trim
4811 * len: number of Bytes to trim from start
4812 * minlen: minimum extent length in Bytes
4813 * ext4_trim_fs goes through all allocation groups containing Bytes from
4814 * start to start+len. For each such a group ext4_trim_all_free function
4815 * is invoked to trim all free space.
4816 */
4817int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4818{
4819 struct ext4_buddy e4b;
4820 ext4_group_t first_group, last_group;
4821 ext4_group_t group, ngroups = ext4_get_groups_count(sb);
4822 ext4_grpblk_t cnt = 0, first_block, last_block;
4823 uint64_t start, len, minlen, trimmed;
4824 int ret = 0;
4825
4826 start = range->start >> sb->s_blocksize_bits;
4827 len = range->len >> sb->s_blocksize_bits;
4828 minlen = range->minlen >> sb->s_blocksize_bits;
4829 trimmed = 0;
4830
4831 if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb)))
4832 return -EINVAL;
4833
4834 /* Determine first and last group to examine based on start and len */
4835 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
4836 &first_group, &first_block);
4837 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len),
4838 &last_group, &last_block);
4839 last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
4840 last_block = EXT4_BLOCKS_PER_GROUP(sb);
4841
4842 if (first_group > last_group)
4843 return -EINVAL;
4844
4845 for (group = first_group; group <= last_group; group++) {
4846 ret = ext4_mb_load_buddy(sb, group, &e4b);
4847 if (ret) {
4848 ext4_error(sb, "Error in loading buddy "
4849 "information for %u", group);
4850 break;
4851 }
4852
4853 if (len >= EXT4_BLOCKS_PER_GROUP(sb))
4854 len -= (EXT4_BLOCKS_PER_GROUP(sb) - first_block);
4855 else
4856 last_block = len;
4857
4858 if (e4b.bd_info->bb_free >= minlen) {
4859 cnt = ext4_trim_all_free(sb, &e4b, first_block,
4860 last_block, minlen);
4861 if (cnt < 0) {
4862 ret = cnt;
4863 ext4_mb_unload_buddy(&e4b);
4864 break;
4865 }
4866 }
4867 ext4_mb_unload_buddy(&e4b);
4868 trimmed += cnt;
4869 first_block = 0;
4870 }
4871 range->len = trimmed * sb->s_blocksize;
4872
4873 return ret;
4874}
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 1765c2c50a9b..25f3a974b725 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -412,7 +412,7 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
412 struct buffer_head *bh; 412 struct buffer_head *bh;
413 struct ext4_extent_header *eh; 413 struct ext4_extent_header *eh;
414 414
415 block = idx_pblock(ix); 415 block = ext4_idx_pblock(ix);
416 bh = sb_bread(inode->i_sb, block); 416 bh = sb_bread(inode->i_sb, block);
417 if (!bh) 417 if (!bh)
418 return -EIO; 418 return -EIO;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 5f1ed9fc913c..b9f3e7862f13 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -85,7 +85,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
85 if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { 85 if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
86 /* leaf block */ 86 /* leaf block */
87 *extent = ++path[ppos].p_ext; 87 *extent = ++path[ppos].p_ext;
88 path[ppos].p_block = ext_pblock(path[ppos].p_ext); 88 path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
89 return 0; 89 return 0;
90 } 90 }
91 91
@@ -96,7 +96,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
96 96
97 /* index block */ 97 /* index block */
98 path[ppos].p_idx++; 98 path[ppos].p_idx++;
99 path[ppos].p_block = idx_pblock(path[ppos].p_idx); 99 path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
100 if (path[ppos+1].p_bh) 100 if (path[ppos+1].p_bh)
101 brelse(path[ppos+1].p_bh); 101 brelse(path[ppos+1].p_bh);
102 path[ppos+1].p_bh = 102 path[ppos+1].p_bh =
@@ -111,7 +111,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
111 path[cur_ppos].p_idx = 111 path[cur_ppos].p_idx =
112 EXT_FIRST_INDEX(path[cur_ppos].p_hdr); 112 EXT_FIRST_INDEX(path[cur_ppos].p_hdr);
113 path[cur_ppos].p_block = 113 path[cur_ppos].p_block =
114 idx_pblock(path[cur_ppos].p_idx); 114 ext4_idx_pblock(path[cur_ppos].p_idx);
115 if (path[cur_ppos+1].p_bh) 115 if (path[cur_ppos+1].p_bh)
116 brelse(path[cur_ppos+1].p_bh); 116 brelse(path[cur_ppos+1].p_bh);
117 path[cur_ppos+1].p_bh = sb_bread(inode->i_sb, 117 path[cur_ppos+1].p_bh = sb_bread(inode->i_sb,
@@ -133,7 +133,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
133 path[leaf_ppos].p_ext = *extent = 133 path[leaf_ppos].p_ext = *extent =
134 EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); 134 EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
135 path[leaf_ppos].p_block = 135 path[leaf_ppos].p_block =
136 ext_pblock(path[leaf_ppos].p_ext); 136 ext4_ext_pblock(path[leaf_ppos].p_ext);
137 return 0; 137 return 0;
138 } 138 }
139 } 139 }
@@ -249,7 +249,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
249 */ 249 */
250 o_end->ee_block = end_ext->ee_block; 250 o_end->ee_block = end_ext->ee_block;
251 o_end->ee_len = end_ext->ee_len; 251 o_end->ee_len = end_ext->ee_len;
252 ext4_ext_store_pblock(o_end, ext_pblock(end_ext)); 252 ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
253 } 253 }
254 254
255 o_start->ee_len = start_ext->ee_len; 255 o_start->ee_len = start_ext->ee_len;
@@ -276,7 +276,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
276 */ 276 */
277 o_end->ee_block = end_ext->ee_block; 277 o_end->ee_block = end_ext->ee_block;
278 o_end->ee_len = end_ext->ee_len; 278 o_end->ee_len = end_ext->ee_len;
279 ext4_ext_store_pblock(o_end, ext_pblock(end_ext)); 279 ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
280 280
281 /* 281 /*
282 * Set 0 to the extent block if new_ext was 282 * Set 0 to the extent block if new_ext was
@@ -361,7 +361,7 @@ mext_insert_inside_block(struct ext4_extent *o_start,
361 /* Insert new entry */ 361 /* Insert new entry */
362 if (new_ext->ee_len) { 362 if (new_ext->ee_len) {
363 o_start[i] = *new_ext; 363 o_start[i] = *new_ext;
364 ext4_ext_store_pblock(&o_start[i++], ext_pblock(new_ext)); 364 ext4_ext_store_pblock(&o_start[i++], ext4_ext_pblock(new_ext));
365 } 365 }
366 366
367 /* Insert end entry */ 367 /* Insert end entry */
@@ -488,7 +488,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
488 start_ext.ee_len = end_ext.ee_len = 0; 488 start_ext.ee_len = end_ext.ee_len = 0;
489 489
490 new_ext.ee_block = cpu_to_le32(*from); 490 new_ext.ee_block = cpu_to_le32(*from);
491 ext4_ext_store_pblock(&new_ext, ext_pblock(dext)); 491 ext4_ext_store_pblock(&new_ext, ext4_ext_pblock(dext));
492 new_ext.ee_len = dext->ee_len; 492 new_ext.ee_len = dext->ee_len;
493 new_ext_alen = ext4_ext_get_actual_len(&new_ext); 493 new_ext_alen = ext4_ext_get_actual_len(&new_ext);
494 new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1; 494 new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1;
@@ -553,7 +553,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
553 copy_extent_status(oext, &end_ext); 553 copy_extent_status(oext, &end_ext);
554 end_ext_alen = ext4_ext_get_actual_len(&end_ext); 554 end_ext_alen = ext4_ext_get_actual_len(&end_ext);
555 ext4_ext_store_pblock(&end_ext, 555 ext4_ext_store_pblock(&end_ext,
556 (ext_pblock(o_end) + oext_alen - end_ext_alen)); 556 (ext4_ext_pblock(o_end) + oext_alen - end_ext_alen));
557 end_ext.ee_block = 557 end_ext.ee_block =
558 cpu_to_le32(le32_to_cpu(o_end->ee_block) + 558 cpu_to_le32(le32_to_cpu(o_end->ee_block) +
559 oext_alen - end_ext_alen); 559 oext_alen - end_ext_alen);
@@ -604,7 +604,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
604 /* When tmp_dext is too large, pick up the target range. */ 604 /* When tmp_dext is too large, pick up the target range. */
605 diff = donor_off - le32_to_cpu(tmp_dext->ee_block); 605 diff = donor_off - le32_to_cpu(tmp_dext->ee_block);
606 606
607 ext4_ext_store_pblock(tmp_dext, ext_pblock(tmp_dext) + diff); 607 ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff);
608 tmp_dext->ee_block = 608 tmp_dext->ee_block =
609 cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff); 609 cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff);
610 tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff); 610 tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff);
@@ -613,7 +613,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
613 tmp_dext->ee_len = cpu_to_le16(max_count); 613 tmp_dext->ee_len = cpu_to_le16(max_count);
614 614
615 orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block); 615 orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block);
616 ext4_ext_store_pblock(tmp_oext, ext_pblock(tmp_oext) + orig_diff); 616 ext4_ext_store_pblock(tmp_oext, ext4_ext_pblock(tmp_oext) + orig_diff);
617 617
618 /* Adjust extent length if donor extent is larger than orig */ 618 /* Adjust extent length if donor extent is larger than orig */
619 if (ext4_ext_get_actual_len(tmp_dext) > 619 if (ext4_ext_get_actual_len(tmp_dext) >
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index bd39885b5998..92203b8a099f 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -856,6 +856,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
856 struct buffer_head *bh_use[NAMEI_RA_SIZE]; 856 struct buffer_head *bh_use[NAMEI_RA_SIZE];
857 struct buffer_head *bh, *ret = NULL; 857 struct buffer_head *bh, *ret = NULL;
858 ext4_lblk_t start, block, b; 858 ext4_lblk_t start, block, b;
859 const u8 *name = d_name->name;
859 int ra_max = 0; /* Number of bh's in the readahead 860 int ra_max = 0; /* Number of bh's in the readahead
860 buffer, bh_use[] */ 861 buffer, bh_use[] */
861 int ra_ptr = 0; /* Current index into readahead 862 int ra_ptr = 0; /* Current index into readahead
@@ -870,6 +871,16 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
870 namelen = d_name->len; 871 namelen = d_name->len;
871 if (namelen > EXT4_NAME_LEN) 872 if (namelen > EXT4_NAME_LEN)
872 return NULL; 873 return NULL;
874 if ((namelen <= 2) && (name[0] == '.') &&
875 (name[1] == '.' || name[1] == '0')) {
876 /*
877 * "." or ".." will only be in the first block
878 * NFS may look up ".."; "." should be handled by the VFS
879 */
880 block = start = 0;
881 nblocks = 1;
882 goto restart;
883 }
873 if (is_dx(dir)) { 884 if (is_dx(dir)) {
874 bh = ext4_dx_find_entry(dir, d_name, res_dir, &err); 885 bh = ext4_dx_find_entry(dir, d_name, res_dir, &err);
875 /* 886 /*
@@ -960,55 +971,35 @@ cleanup_and_exit:
960static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name, 971static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
961 struct ext4_dir_entry_2 **res_dir, int *err) 972 struct ext4_dir_entry_2 **res_dir, int *err)
962{ 973{
963 struct super_block * sb; 974 struct super_block * sb = dir->i_sb;
964 struct dx_hash_info hinfo; 975 struct dx_hash_info hinfo;
965 u32 hash;
966 struct dx_frame frames[2], *frame; 976 struct dx_frame frames[2], *frame;
967 struct ext4_dir_entry_2 *de, *top;
968 struct buffer_head *bh; 977 struct buffer_head *bh;
969 ext4_lblk_t block; 978 ext4_lblk_t block;
970 int retval; 979 int retval;
971 int namelen = d_name->len;
972 const u8 *name = d_name->name;
973 980
974 sb = dir->i_sb; 981 if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
975 /* NFS may look up ".." - look at dx_root directory block */ 982 return NULL;
976 if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
977 if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
978 return NULL;
979 } else {
980 frame = frames;
981 frame->bh = NULL; /* for dx_release() */
982 frame->at = (struct dx_entry *)frames; /* hack for zero entry*/
983 dx_set_block(frame->at, 0); /* dx_root block is 0 */
984 }
985 hash = hinfo.hash;
986 do { 983 do {
987 block = dx_get_block(frame->at); 984 block = dx_get_block(frame->at);
988 if (!(bh = ext4_bread (NULL,dir, block, 0, err))) 985 if (!(bh = ext4_bread(NULL, dir, block, 0, err)))
989 goto errout; 986 goto errout;
990 de = (struct ext4_dir_entry_2 *) bh->b_data;
991 top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize -
992 EXT4_DIR_REC_LEN(0));
993 for (; de < top; de = ext4_next_entry(de, sb->s_blocksize)) {
994 int off = (block << EXT4_BLOCK_SIZE_BITS(sb))
995 + ((char *) de - bh->b_data);
996
997 if (!ext4_check_dir_entry(dir, de, bh, off)) {
998 brelse(bh);
999 *err = ERR_BAD_DX_DIR;
1000 goto errout;
1001 }
1002 987
1003 if (ext4_match(namelen, name, de)) { 988 retval = search_dirblock(bh, dir, d_name,
1004 *res_dir = de; 989 block << EXT4_BLOCK_SIZE_BITS(sb),
1005 dx_release(frames); 990 res_dir);
1006 return bh; 991 if (retval == 1) { /* Success! */
1007 } 992 dx_release(frames);
993 return bh;
1008 } 994 }
1009 brelse(bh); 995 brelse(bh);
996 if (retval == -1) {
997 *err = ERR_BAD_DX_DIR;
998 goto errout;
999 }
1000
1010 /* Check to see if we should continue to search */ 1001 /* Check to see if we should continue to search */
1011 retval = ext4_htree_next_block(dir, hash, frame, 1002 retval = ext4_htree_next_block(dir, hinfo.hash, frame,
1012 frames, NULL); 1003 frames, NULL);
1013 if (retval < 0) { 1004 if (retval < 0) {
1014 ext4_warning(sb, 1005 ext4_warning(sb,
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
new file mode 100644
index 000000000000..46a7d6a9d976
--- /dev/null
+++ b/fs/ext4/page-io.c
@@ -0,0 +1,430 @@
1/*
2 * linux/fs/ext4/page-io.c
3 *
4 * This contains the new page_io functions for ext4
5 *
6 * Written by Theodore Ts'o, 2010.
7 */
8
9#include <linux/module.h>
10#include <linux/fs.h>
11#include <linux/time.h>
12#include <linux/jbd2.h>
13#include <linux/highuid.h>
14#include <linux/pagemap.h>
15#include <linux/quotaops.h>
16#include <linux/string.h>
17#include <linux/buffer_head.h>
18#include <linux/writeback.h>
19#include <linux/pagevec.h>
20#include <linux/mpage.h>
21#include <linux/namei.h>
22#include <linux/uio.h>
23#include <linux/bio.h>
24#include <linux/workqueue.h>
25#include <linux/kernel.h>
26#include <linux/slab.h>
27
28#include "ext4_jbd2.h"
29#include "xattr.h"
30#include "acl.h"
31#include "ext4_extents.h"
32
33static struct kmem_cache *io_page_cachep, *io_end_cachep;
34
35int __init ext4_init_pageio(void)
36{
37 io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
38 if (io_page_cachep == NULL)
39 return -ENOMEM;
40 io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
41 if (io_page_cachep == NULL) {
42 kmem_cache_destroy(io_page_cachep);
43 return -ENOMEM;
44 }
45
46 return 0;
47}
48
49void ext4_exit_pageio(void)
50{
51 kmem_cache_destroy(io_end_cachep);
52 kmem_cache_destroy(io_page_cachep);
53}
54
55void ext4_free_io_end(ext4_io_end_t *io)
56{
57 int i;
58
59 BUG_ON(!io);
60 if (io->page)
61 put_page(io->page);
62 for (i = 0; i < io->num_io_pages; i++) {
63 if (--io->pages[i]->p_count == 0) {
64 struct page *page = io->pages[i]->p_page;
65
66 end_page_writeback(page);
67 put_page(page);
68 kmem_cache_free(io_page_cachep, io->pages[i]);
69 }
70 }
71 io->num_io_pages = 0;
72 iput(io->inode);
73 kmem_cache_free(io_end_cachep, io);
74}
75
76/*
77 * check a range of space and convert unwritten extents to written.
78 */
79int ext4_end_io_nolock(ext4_io_end_t *io)
80{
81 struct inode *inode = io->inode;
82 loff_t offset = io->offset;
83 ssize_t size = io->size;
84 int ret = 0;
85
86 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
87 "list->prev 0x%p\n",
88 io, inode->i_ino, io->list.next, io->list.prev);
89
90 if (list_empty(&io->list))
91 return ret;
92
93 if (!(io->flag & EXT4_IO_END_UNWRITTEN))
94 return ret;
95
96 ret = ext4_convert_unwritten_extents(inode, offset, size);
97 if (ret < 0) {
98 printk(KERN_EMERG "%s: failed to convert unwritten "
99 "extents to written extents, error is %d "
100 "io is still on inode %lu aio dio list\n",
101 __func__, ret, inode->i_ino);
102 return ret;
103 }
104
105 if (io->iocb)
106 aio_complete(io->iocb, io->result, 0);
107 /* clear the DIO AIO unwritten flag */
108 io->flag &= ~EXT4_IO_END_UNWRITTEN;
109 return ret;
110}
111
112/*
113 * work on completed aio dio IO, to convert unwritten extents to extents
114 */
115static void ext4_end_io_work(struct work_struct *work)
116{
117 ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
118 struct inode *inode = io->inode;
119 struct ext4_inode_info *ei = EXT4_I(inode);
120 unsigned long flags;
121 int ret;
122
123 mutex_lock(&inode->i_mutex);
124 ret = ext4_end_io_nolock(io);
125 if (ret < 0) {
126 mutex_unlock(&inode->i_mutex);
127 return;
128 }
129
130 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
131 if (!list_empty(&io->list))
132 list_del_init(&io->list);
133 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
134 mutex_unlock(&inode->i_mutex);
135 ext4_free_io_end(io);
136}
137
138ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
139{
140 ext4_io_end_t *io = NULL;
141
142 io = kmem_cache_alloc(io_end_cachep, flags);
143 if (io) {
144 memset(io, 0, sizeof(*io));
145 io->inode = igrab(inode);
146 BUG_ON(!io->inode);
147 INIT_WORK(&io->work, ext4_end_io_work);
148 INIT_LIST_HEAD(&io->list);
149 }
150 return io;
151}
152
153/*
154 * Print an buffer I/O error compatible with the fs/buffer.c. This
155 * provides compatibility with dmesg scrapers that look for a specific
156 * buffer I/O error message. We really need a unified error reporting
157 * structure to userspace ala Digital Unix's uerf system, but it's
158 * probably not going to happen in my lifetime, due to LKML politics...
159 */
160static void buffer_io_error(struct buffer_head *bh)
161{
162 char b[BDEVNAME_SIZE];
163 printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n",
164 bdevname(bh->b_bdev, b),
165 (unsigned long long)bh->b_blocknr);
166}
167
168static void ext4_end_bio(struct bio *bio, int error)
169{
170 ext4_io_end_t *io_end = bio->bi_private;
171 struct workqueue_struct *wq;
172 struct inode *inode;
173 unsigned long flags;
174 ext4_fsblk_t err_block;
175 int i;
176
177 BUG_ON(!io_end);
178 inode = io_end->inode;
179 bio->bi_private = NULL;
180 bio->bi_end_io = NULL;
181 if (test_bit(BIO_UPTODATE, &bio->bi_flags))
182 error = 0;
183 err_block = bio->bi_sector >> (inode->i_blkbits - 9);
184 bio_put(bio);
185
186 if (!(inode->i_sb->s_flags & MS_ACTIVE)) {
187 pr_err("sb umounted, discard end_io request for inode %lu\n",
188 io_end->inode->i_ino);
189 ext4_free_io_end(io_end);
190 return;
191 }
192
193 if (error) {
194 io_end->flag |= EXT4_IO_END_ERROR;
195 ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
196 "(offset %llu size %ld starting block %llu)",
197 inode->i_ino,
198 (unsigned long long) io_end->offset,
199 (long) io_end->size,
200 (unsigned long long) err_block);
201 }
202
203 for (i = 0; i < io_end->num_io_pages; i++) {
204 struct page *page = io_end->pages[i]->p_page;
205 struct buffer_head *bh, *head;
206 int partial_write = 0;
207
208 head = page_buffers(page);
209 if (error)
210 SetPageError(page);
211 BUG_ON(!head);
212 if (head->b_size == PAGE_CACHE_SIZE)
213 clear_buffer_dirty(head);
214 else {
215 loff_t offset;
216 loff_t io_end_offset = io_end->offset + io_end->size;
217
218 offset = (sector_t) page->index << PAGE_CACHE_SHIFT;
219 bh = head;
220 do {
221 if ((offset >= io_end->offset) &&
222 (offset+bh->b_size <= io_end_offset)) {
223 if (error)
224 buffer_io_error(bh);
225
226 clear_buffer_dirty(bh);
227 }
228 if (buffer_delay(bh))
229 partial_write = 1;
230 else if (!buffer_mapped(bh))
231 clear_buffer_dirty(bh);
232 else if (buffer_dirty(bh))
233 partial_write = 1;
234 offset += bh->b_size;
235 bh = bh->b_this_page;
236 } while (bh != head);
237 }
238
239 if (--io_end->pages[i]->p_count == 0) {
240 struct page *page = io_end->pages[i]->p_page;
241
242 end_page_writeback(page);
243 put_page(page);
244 kmem_cache_free(io_page_cachep, io_end->pages[i]);
245 }
246
247 /*
248 * If this is a partial write which happened to make
249 * all buffers uptodate then we can optimize away a
250 * bogus readpage() for the next read(). Here we
251 * 'discover' whether the page went uptodate as a
252 * result of this (potentially partial) write.
253 */
254 if (!partial_write)
255 SetPageUptodate(page);
256 }
257
258 io_end->num_io_pages = 0;
259
260 /* Add the io_end to per-inode completed io list*/
261 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
262 list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
263 spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
264
265 wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
266 /* queue the work to convert unwritten extents to written */
267 queue_work(wq, &io_end->work);
268}
269
270void ext4_io_submit(struct ext4_io_submit *io)
271{
272 struct bio *bio = io->io_bio;
273
274 if (bio) {
275 bio_get(io->io_bio);
276 submit_bio(io->io_op, io->io_bio);
277 BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP));
278 bio_put(io->io_bio);
279 }
280 io->io_bio = 0;
281 io->io_op = 0;
282 io->io_end = 0;
283}
284
285static int io_submit_init(struct ext4_io_submit *io,
286 struct inode *inode,
287 struct writeback_control *wbc,
288 struct buffer_head *bh)
289{
290 ext4_io_end_t *io_end;
291 struct page *page = bh->b_page;
292 int nvecs = bio_get_nr_vecs(bh->b_bdev);
293 struct bio *bio;
294
295 io_end = ext4_init_io_end(inode, GFP_NOFS);
296 if (!io_end)
297 return -ENOMEM;
298 do {
299 bio = bio_alloc(GFP_NOIO, nvecs);
300 nvecs >>= 1;
301 } while (bio == NULL);
302
303 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
304 bio->bi_bdev = bh->b_bdev;
305 bio->bi_private = io->io_end = io_end;
306 bio->bi_end_io = ext4_end_bio;
307
308 io_end->inode = inode;
309 io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
310
311 io->io_bio = bio;
312 io->io_op = (wbc->sync_mode == WB_SYNC_ALL ?
313 WRITE_SYNC_PLUG : WRITE);
314 io->io_next_block = bh->b_blocknr;
315 return 0;
316}
317
318static int io_submit_add_bh(struct ext4_io_submit *io,
319 struct ext4_io_page *io_page,
320 struct inode *inode,
321 struct writeback_control *wbc,
322 struct buffer_head *bh)
323{
324 ext4_io_end_t *io_end;
325 int ret;
326
327 if (buffer_new(bh)) {
328 clear_buffer_new(bh);
329 unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
330 }
331
332 if (!buffer_mapped(bh) || buffer_delay(bh)) {
333 if (!buffer_mapped(bh))
334 clear_buffer_dirty(bh);
335 if (io->io_bio)
336 ext4_io_submit(io);
337 return 0;
338 }
339
340 if (io->io_bio && bh->b_blocknr != io->io_next_block) {
341submit_and_retry:
342 ext4_io_submit(io);
343 }
344 if (io->io_bio == NULL) {
345 ret = io_submit_init(io, inode, wbc, bh);
346 if (ret)
347 return ret;
348 }
349 io_end = io->io_end;
350 if ((io_end->num_io_pages >= MAX_IO_PAGES) &&
351 (io_end->pages[io_end->num_io_pages-1] != io_page))
352 goto submit_and_retry;
353 if (buffer_uninit(bh))
354 io->io_end->flag |= EXT4_IO_END_UNWRITTEN;
355 io->io_end->size += bh->b_size;
356 io->io_next_block++;
357 ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
358 if (ret != bh->b_size)
359 goto submit_and_retry;
360 if ((io_end->num_io_pages == 0) ||
361 (io_end->pages[io_end->num_io_pages-1] != io_page)) {
362 io_end->pages[io_end->num_io_pages++] = io_page;
363 io_page->p_count++;
364 }
365 return 0;
366}
367
368int ext4_bio_write_page(struct ext4_io_submit *io,
369 struct page *page,
370 int len,
371 struct writeback_control *wbc)
372{
373 struct inode *inode = page->mapping->host;
374 unsigned block_start, block_end, blocksize;
375 struct ext4_io_page *io_page;
376 struct buffer_head *bh, *head;
377 int ret = 0;
378
379 blocksize = 1 << inode->i_blkbits;
380
381 BUG_ON(PageWriteback(page));
382 set_page_writeback(page);
383 ClearPageError(page);
384
385 io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS);
386 if (!io_page) {
387 set_page_dirty(page);
388 unlock_page(page);
389 return -ENOMEM;
390 }
391 io_page->p_page = page;
392 io_page->p_count = 0;
393 get_page(page);
394
395 for (bh = head = page_buffers(page), block_start = 0;
396 bh != head || !block_start;
397 block_start = block_end, bh = bh->b_this_page) {
398 block_end = block_start + blocksize;
399 if (block_start >= len) {
400 clear_buffer_dirty(bh);
401 set_buffer_uptodate(bh);
402 continue;
403 }
404 ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
405 if (ret) {
406 /*
407 * We only get here on ENOMEM. Not much else
408 * we can do but mark the page as dirty, and
409 * better luck next time.
410 */
411 set_page_dirty(page);
412 break;
413 }
414 }
415 unlock_page(page);
416 /*
417 * If the page was truncated before we could do the writeback,
418 * or we had a memory allocation error while trying to write
419 * the first buffer head, we won't have submitted any pages for
420 * I/O. In that case we need to make sure we've cleared the
421 * PageWriteback bit from the page to prevent the system from
422 * wedging later on.
423 */
424 if (io_page->p_count == 0) {
425 put_page(page);
426 end_page_writeback(page);
427 kmem_cache_free(io_page_cachep, io_page);
428 }
429 return ret;
430}
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index ca5c8aa00a2f..dc963929de65 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -226,23 +226,13 @@ static int setup_new_group_blocks(struct super_block *sb,
226 } 226 }
227 227
228 /* Zero out all of the reserved backup group descriptor table blocks */ 228 /* Zero out all of the reserved backup group descriptor table blocks */
229 for (i = 0, bit = gdblocks + 1, block = start + bit; 229 ext4_debug("clear inode table blocks %#04llx -> %#04llx\n",
230 i < reserved_gdb; i++, block++, bit++) { 230 block, sbi->s_itb_per_group);
231 struct buffer_head *gdb; 231 err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb,
232 232 GFP_NOFS);
233 ext4_debug("clear reserved block %#04llx (+%d)\n", block, bit); 233 if (err)
234 234 goto exit_bh;
235 if ((err = extend_or_restart_transaction(handle, 1, bh)))
236 goto exit_bh;
237 235
238 if (IS_ERR(gdb = bclean(handle, sb, block))) {
239 err = PTR_ERR(gdb);
240 goto exit_bh;
241 }
242 ext4_handle_dirty_metadata(handle, NULL, gdb);
243 ext4_set_bit(bit, bh->b_data);
244 brelse(gdb);
245 }
246 ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, 236 ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap,
247 input->block_bitmap - start); 237 input->block_bitmap - start);
248 ext4_set_bit(input->block_bitmap - start, bh->b_data); 238 ext4_set_bit(input->block_bitmap - start, bh->b_data);
@@ -251,28 +241,18 @@ static int setup_new_group_blocks(struct super_block *sb,
251 ext4_set_bit(input->inode_bitmap - start, bh->b_data); 241 ext4_set_bit(input->inode_bitmap - start, bh->b_data);
252 242
253 /* Zero out all of the inode table blocks */ 243 /* Zero out all of the inode table blocks */
254 for (i = 0, block = input->inode_table, bit = block - start; 244 block = input->inode_table;
255 i < sbi->s_itb_per_group; i++, bit++, block++) { 245 ext4_debug("clear inode table blocks %#04llx -> %#04llx\n",
256 struct buffer_head *it; 246 block, sbi->s_itb_per_group);
257 247 err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS);
258 ext4_debug("clear inode block %#04llx (+%d)\n", block, bit); 248 if (err)
259 249 goto exit_bh;
260 if ((err = extend_or_restart_transaction(handle, 1, bh)))
261 goto exit_bh;
262
263 if (IS_ERR(it = bclean(handle, sb, block))) {
264 err = PTR_ERR(it);
265 goto exit_bh;
266 }
267 ext4_handle_dirty_metadata(handle, NULL, it);
268 brelse(it);
269 ext4_set_bit(bit, bh->b_data);
270 }
271 250
272 if ((err = extend_or_restart_transaction(handle, 2, bh))) 251 if ((err = extend_or_restart_transaction(handle, 2, bh)))
273 goto exit_bh; 252 goto exit_bh;
274 253
275 mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, bh->b_data); 254 ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8,
255 bh->b_data);
276 ext4_handle_dirty_metadata(handle, NULL, bh); 256 ext4_handle_dirty_metadata(handle, NULL, bh);
277 brelse(bh); 257 brelse(bh);
278 /* Mark unused entries in inode bitmap used */ 258 /* Mark unused entries in inode bitmap used */
@@ -283,8 +263,8 @@ static int setup_new_group_blocks(struct super_block *sb,
283 goto exit_journal; 263 goto exit_journal;
284 } 264 }
285 265
286 mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, 266 ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
287 bh->b_data); 267 bh->b_data);
288 ext4_handle_dirty_metadata(handle, NULL, bh); 268 ext4_handle_dirty_metadata(handle, NULL, bh);
289exit_bh: 269exit_bh:
290 brelse(bh); 270 brelse(bh);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 8ecc1e590303..40131b777af6 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -40,6 +40,9 @@
40#include <linux/crc16.h> 40#include <linux/crc16.h>
41#include <asm/uaccess.h> 41#include <asm/uaccess.h>
42 42
43#include <linux/kthread.h>
44#include <linux/freezer.h>
45
43#include "ext4.h" 46#include "ext4.h"
44#include "ext4_jbd2.h" 47#include "ext4_jbd2.h"
45#include "xattr.h" 48#include "xattr.h"
@@ -49,8 +52,11 @@
49#define CREATE_TRACE_POINTS 52#define CREATE_TRACE_POINTS
50#include <trace/events/ext4.h> 53#include <trace/events/ext4.h>
51 54
52struct proc_dir_entry *ext4_proc_root; 55static struct proc_dir_entry *ext4_proc_root;
53static struct kset *ext4_kset; 56static struct kset *ext4_kset;
57struct ext4_lazy_init *ext4_li_info;
58struct mutex ext4_li_mtx;
59struct ext4_features *ext4_feat;
54 60
55static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 61static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
56 unsigned long journal_devnum); 62 unsigned long journal_devnum);
@@ -67,14 +73,16 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
67static int ext4_unfreeze(struct super_block *sb); 73static int ext4_unfreeze(struct super_block *sb);
68static void ext4_write_super(struct super_block *sb); 74static void ext4_write_super(struct super_block *sb);
69static int ext4_freeze(struct super_block *sb); 75static int ext4_freeze(struct super_block *sb);
70static int ext4_get_sb(struct file_system_type *fs_type, int flags, 76static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
71 const char *dev_name, void *data, struct vfsmount *mnt); 77 const char *dev_name, void *data);
78static void ext4_destroy_lazyinit_thread(void);
79static void ext4_unregister_li_request(struct super_block *sb);
72 80
73#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 81#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
74static struct file_system_type ext3_fs_type = { 82static struct file_system_type ext3_fs_type = {
75 .owner = THIS_MODULE, 83 .owner = THIS_MODULE,
76 .name = "ext3", 84 .name = "ext3",
77 .get_sb = ext4_get_sb, 85 .mount = ext4_mount,
78 .kill_sb = kill_block_super, 86 .kill_sb = kill_block_super,
79 .fs_flags = FS_REQUIRES_DEV, 87 .fs_flags = FS_REQUIRES_DEV,
80}; 88};
@@ -701,6 +709,7 @@ static void ext4_put_super(struct super_block *sb)
701 struct ext4_super_block *es = sbi->s_es; 709 struct ext4_super_block *es = sbi->s_es;
702 int i, err; 710 int i, err;
703 711
712 ext4_unregister_li_request(sb);
704 dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); 713 dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
705 714
706 flush_workqueue(sbi->dio_unwritten_wq); 715 flush_workqueue(sbi->dio_unwritten_wq);
@@ -717,6 +726,7 @@ static void ext4_put_super(struct super_block *sb)
717 ext4_abort(sb, "Couldn't clean up the journal"); 726 ext4_abort(sb, "Couldn't clean up the journal");
718 } 727 }
719 728
729 del_timer(&sbi->s_err_report);
720 ext4_release_system_zone(sb); 730 ext4_release_system_zone(sb);
721 ext4_mb_release(sb); 731 ext4_mb_release(sb);
722 ext4_ext_release(sb); 732 ext4_ext_release(sb);
@@ -1042,6 +1052,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
1042 !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)) 1052 !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY))
1043 seq_puts(seq, ",block_validity"); 1053 seq_puts(seq, ",block_validity");
1044 1054
1055 if (!test_opt(sb, INIT_INODE_TABLE))
1056 seq_puts(seq, ",noinit_inode_table");
1057 else if (sbi->s_li_wait_mult)
1058 seq_printf(seq, ",init_inode_table=%u",
1059 (unsigned) sbi->s_li_wait_mult);
1060
1045 ext4_show_quota_options(seq, sb); 1061 ext4_show_quota_options(seq, sb);
1046 1062
1047 return 0; 1063 return 0;
@@ -1170,6 +1186,7 @@ static const struct super_operations ext4_sops = {
1170 .quota_write = ext4_quota_write, 1186 .quota_write = ext4_quota_write,
1171#endif 1187#endif
1172 .bdev_try_to_free_page = bdev_try_to_free_page, 1188 .bdev_try_to_free_page = bdev_try_to_free_page,
1189 .trim_fs = ext4_trim_fs
1173}; 1190};
1174 1191
1175static const struct super_operations ext4_nojournal_sops = { 1192static const struct super_operations ext4_nojournal_sops = {
@@ -1216,6 +1233,7 @@ enum {
1216 Opt_inode_readahead_blks, Opt_journal_ioprio, 1233 Opt_inode_readahead_blks, Opt_journal_ioprio,
1217 Opt_dioread_nolock, Opt_dioread_lock, 1234 Opt_dioread_nolock, Opt_dioread_lock,
1218 Opt_discard, Opt_nodiscard, 1235 Opt_discard, Opt_nodiscard,
1236 Opt_init_inode_table, Opt_noinit_inode_table,
1219}; 1237};
1220 1238
1221static const match_table_t tokens = { 1239static const match_table_t tokens = {
@@ -1286,6 +1304,9 @@ static const match_table_t tokens = {
1286 {Opt_dioread_lock, "dioread_lock"}, 1304 {Opt_dioread_lock, "dioread_lock"},
1287 {Opt_discard, "discard"}, 1305 {Opt_discard, "discard"},
1288 {Opt_nodiscard, "nodiscard"}, 1306 {Opt_nodiscard, "nodiscard"},
1307 {Opt_init_inode_table, "init_itable=%u"},
1308 {Opt_init_inode_table, "init_itable"},
1309 {Opt_noinit_inode_table, "noinit_itable"},
1289 {Opt_err, NULL}, 1310 {Opt_err, NULL},
1290}; 1311};
1291 1312
@@ -1756,6 +1777,20 @@ set_qf_format:
1756 case Opt_dioread_lock: 1777 case Opt_dioread_lock:
1757 clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); 1778 clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
1758 break; 1779 break;
1780 case Opt_init_inode_table:
1781 set_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
1782 if (args[0].from) {
1783 if (match_int(&args[0], &option))
1784 return 0;
1785 } else
1786 option = EXT4_DEF_LI_WAIT_MULT;
1787 if (option < 0)
1788 return 0;
1789 sbi->s_li_wait_mult = option;
1790 break;
1791 case Opt_noinit_inode_table:
1792 clear_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
1793 break;
1759 default: 1794 default:
1760 ext4_msg(sb, KERN_ERR, 1795 ext4_msg(sb, KERN_ERR,
1761 "Unrecognized mount option \"%s\" " 1796 "Unrecognized mount option \"%s\" "
@@ -1939,7 +1974,8 @@ int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group,
1939} 1974}
1940 1975
1941/* Called at mount-time, super-block is locked */ 1976/* Called at mount-time, super-block is locked */
1942static int ext4_check_descriptors(struct super_block *sb) 1977static int ext4_check_descriptors(struct super_block *sb,
1978 ext4_group_t *first_not_zeroed)
1943{ 1979{
1944 struct ext4_sb_info *sbi = EXT4_SB(sb); 1980 struct ext4_sb_info *sbi = EXT4_SB(sb);
1945 ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); 1981 ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
@@ -1948,7 +1984,7 @@ static int ext4_check_descriptors(struct super_block *sb)
1948 ext4_fsblk_t inode_bitmap; 1984 ext4_fsblk_t inode_bitmap;
1949 ext4_fsblk_t inode_table; 1985 ext4_fsblk_t inode_table;
1950 int flexbg_flag = 0; 1986 int flexbg_flag = 0;
1951 ext4_group_t i; 1987 ext4_group_t i, grp = sbi->s_groups_count;
1952 1988
1953 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 1989 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
1954 flexbg_flag = 1; 1990 flexbg_flag = 1;
@@ -1964,6 +2000,10 @@ static int ext4_check_descriptors(struct super_block *sb)
1964 last_block = first_block + 2000 last_block = first_block +
1965 (EXT4_BLOCKS_PER_GROUP(sb) - 1); 2001 (EXT4_BLOCKS_PER_GROUP(sb) - 1);
1966 2002
2003 if ((grp == sbi->s_groups_count) &&
2004 !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
2005 grp = i;
2006
1967 block_bitmap = ext4_block_bitmap(sb, gdp); 2007 block_bitmap = ext4_block_bitmap(sb, gdp);
1968 if (block_bitmap < first_block || block_bitmap > last_block) { 2008 if (block_bitmap < first_block || block_bitmap > last_block) {
1969 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 2009 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
@@ -2001,6 +2041,8 @@ static int ext4_check_descriptors(struct super_block *sb)
2001 if (!flexbg_flag) 2041 if (!flexbg_flag)
2002 first_block += EXT4_BLOCKS_PER_GROUP(sb); 2042 first_block += EXT4_BLOCKS_PER_GROUP(sb);
2003 } 2043 }
2044 if (NULL != first_not_zeroed)
2045 *first_not_zeroed = grp;
2004 2046
2005 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); 2047 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
2006 sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); 2048 sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
@@ -2373,6 +2415,7 @@ static struct ext4_attr ext4_attr_##_name = { \
2373#define EXT4_ATTR(name, mode, show, store) \ 2415#define EXT4_ATTR(name, mode, show, store) \
2374static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) 2416static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
2375 2417
2418#define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL)
2376#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) 2419#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
2377#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) 2420#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
2378#define EXT4_RW_ATTR_SBI_UI(name, elname) \ 2421#define EXT4_RW_ATTR_SBI_UI(name, elname) \
@@ -2409,6 +2452,16 @@ static struct attribute *ext4_attrs[] = {
2409 NULL, 2452 NULL,
2410}; 2453};
2411 2454
2455/* Features this copy of ext4 supports */
2456EXT4_INFO_ATTR(lazy_itable_init);
2457EXT4_INFO_ATTR(batched_discard);
2458
2459static struct attribute *ext4_feat_attrs[] = {
2460 ATTR_LIST(lazy_itable_init),
2461 ATTR_LIST(batched_discard),
2462 NULL,
2463};
2464
2412static ssize_t ext4_attr_show(struct kobject *kobj, 2465static ssize_t ext4_attr_show(struct kobject *kobj,
2413 struct attribute *attr, char *buf) 2466 struct attribute *attr, char *buf)
2414{ 2467{
@@ -2437,7 +2490,6 @@ static void ext4_sb_release(struct kobject *kobj)
2437 complete(&sbi->s_kobj_unregister); 2490 complete(&sbi->s_kobj_unregister);
2438} 2491}
2439 2492
2440
2441static const struct sysfs_ops ext4_attr_ops = { 2493static const struct sysfs_ops ext4_attr_ops = {
2442 .show = ext4_attr_show, 2494 .show = ext4_attr_show,
2443 .store = ext4_attr_store, 2495 .store = ext4_attr_store,
@@ -2449,6 +2501,17 @@ static struct kobj_type ext4_ktype = {
2449 .release = ext4_sb_release, 2501 .release = ext4_sb_release,
2450}; 2502};
2451 2503
2504static void ext4_feat_release(struct kobject *kobj)
2505{
2506 complete(&ext4_feat->f_kobj_unregister);
2507}
2508
2509static struct kobj_type ext4_feat_ktype = {
2510 .default_attrs = ext4_feat_attrs,
2511 .sysfs_ops = &ext4_attr_ops,
2512 .release = ext4_feat_release,
2513};
2514
2452/* 2515/*
2453 * Check whether this filesystem can be mounted based on 2516 * Check whether this filesystem can be mounted based on
2454 * the features present and the RDONLY/RDWR mount requested. 2517 * the features present and the RDONLY/RDWR mount requested.
@@ -2539,6 +2602,372 @@ static void print_daily_error_info(unsigned long arg)
2539 mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */ 2602 mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */
2540} 2603}
2541 2604
2605static void ext4_lazyinode_timeout(unsigned long data)
2606{
2607 struct task_struct *p = (struct task_struct *)data;
2608 wake_up_process(p);
2609}
2610
2611/* Find next suitable group and run ext4_init_inode_table */
2612static int ext4_run_li_request(struct ext4_li_request *elr)
2613{
2614 struct ext4_group_desc *gdp = NULL;
2615 ext4_group_t group, ngroups;
2616 struct super_block *sb;
2617 unsigned long timeout = 0;
2618 int ret = 0;
2619
2620 sb = elr->lr_super;
2621 ngroups = EXT4_SB(sb)->s_groups_count;
2622
2623 for (group = elr->lr_next_group; group < ngroups; group++) {
2624 gdp = ext4_get_group_desc(sb, group, NULL);
2625 if (!gdp) {
2626 ret = 1;
2627 break;
2628 }
2629
2630 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
2631 break;
2632 }
2633
2634 if (group == ngroups)
2635 ret = 1;
2636
2637 if (!ret) {
2638 timeout = jiffies;
2639 ret = ext4_init_inode_table(sb, group,
2640 elr->lr_timeout ? 0 : 1);
2641 if (elr->lr_timeout == 0) {
2642 timeout = jiffies - timeout;
2643 if (elr->lr_sbi->s_li_wait_mult)
2644 timeout *= elr->lr_sbi->s_li_wait_mult;
2645 else
2646 timeout *= 20;
2647 elr->lr_timeout = timeout;
2648 }
2649 elr->lr_next_sched = jiffies + elr->lr_timeout;
2650 elr->lr_next_group = group + 1;
2651 }
2652
2653 return ret;
2654}
2655
2656/*
2657 * Remove lr_request from the list_request and free the
2658 * request tructure. Should be called with li_list_mtx held
2659 */
2660static void ext4_remove_li_request(struct ext4_li_request *elr)
2661{
2662 struct ext4_sb_info *sbi;
2663
2664 if (!elr)
2665 return;
2666
2667 sbi = elr->lr_sbi;
2668
2669 list_del(&elr->lr_request);
2670 sbi->s_li_request = NULL;
2671 kfree(elr);
2672}
2673
2674static void ext4_unregister_li_request(struct super_block *sb)
2675{
2676 struct ext4_li_request *elr = EXT4_SB(sb)->s_li_request;
2677
2678 if (!ext4_li_info)
2679 return;
2680
2681 mutex_lock(&ext4_li_info->li_list_mtx);
2682 ext4_remove_li_request(elr);
2683 mutex_unlock(&ext4_li_info->li_list_mtx);
2684}
2685
2686/*
2687 * This is the function where ext4lazyinit thread lives. It walks
2688 * through the request list searching for next scheduled filesystem.
2689 * When such a fs is found, run the lazy initialization request
2690 * (ext4_rn_li_request) and keep track of the time spend in this
2691 * function. Based on that time we compute next schedule time of
2692 * the request. When walking through the list is complete, compute
2693 * next waking time and put itself into sleep.
2694 */
2695static int ext4_lazyinit_thread(void *arg)
2696{
2697 struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;
2698 struct list_head *pos, *n;
2699 struct ext4_li_request *elr;
2700 unsigned long next_wakeup;
2701 DEFINE_WAIT(wait);
2702 int ret;
2703
2704 BUG_ON(NULL == eli);
2705
2706 eli->li_timer.data = (unsigned long)current;
2707 eli->li_timer.function = ext4_lazyinode_timeout;
2708
2709 eli->li_task = current;
2710 wake_up(&eli->li_wait_task);
2711
2712cont_thread:
2713 while (true) {
2714 next_wakeup = MAX_JIFFY_OFFSET;
2715
2716 mutex_lock(&eli->li_list_mtx);
2717 if (list_empty(&eli->li_request_list)) {
2718 mutex_unlock(&eli->li_list_mtx);
2719 goto exit_thread;
2720 }
2721
2722 list_for_each_safe(pos, n, &eli->li_request_list) {
2723 elr = list_entry(pos, struct ext4_li_request,
2724 lr_request);
2725
2726 if (time_after_eq(jiffies, elr->lr_next_sched))
2727 ret = ext4_run_li_request(elr);
2728
2729 if (ret) {
2730 ret = 0;
2731 ext4_remove_li_request(elr);
2732 continue;
2733 }
2734
2735 if (time_before(elr->lr_next_sched, next_wakeup))
2736 next_wakeup = elr->lr_next_sched;
2737 }
2738 mutex_unlock(&eli->li_list_mtx);
2739
2740 if (freezing(current))
2741 refrigerator();
2742
2743 if (time_after_eq(jiffies, next_wakeup)) {
2744 cond_resched();
2745 continue;
2746 }
2747
2748 eli->li_timer.expires = next_wakeup;
2749 add_timer(&eli->li_timer);
2750 prepare_to_wait(&eli->li_wait_daemon, &wait,
2751 TASK_INTERRUPTIBLE);
2752 if (time_before(jiffies, next_wakeup))
2753 schedule();
2754 finish_wait(&eli->li_wait_daemon, &wait);
2755 }
2756
2757exit_thread:
2758 /*
2759 * It looks like the request list is empty, but we need
2760 * to check it under the li_list_mtx lock, to prevent any
2761 * additions into it, and of course we should lock ext4_li_mtx
2762 * to atomically free the list and ext4_li_info, because at
2763 * this point another ext4 filesystem could be registering
2764 * new one.
2765 */
2766 mutex_lock(&ext4_li_mtx);
2767 mutex_lock(&eli->li_list_mtx);
2768 if (!list_empty(&eli->li_request_list)) {
2769 mutex_unlock(&eli->li_list_mtx);
2770 mutex_unlock(&ext4_li_mtx);
2771 goto cont_thread;
2772 }
2773 mutex_unlock(&eli->li_list_mtx);
2774 del_timer_sync(&ext4_li_info->li_timer);
2775 eli->li_task = NULL;
2776 wake_up(&eli->li_wait_task);
2777
2778 kfree(ext4_li_info);
2779 ext4_li_info = NULL;
2780 mutex_unlock(&ext4_li_mtx);
2781
2782 return 0;
2783}
2784
2785static void ext4_clear_request_list(void)
2786{
2787 struct list_head *pos, *n;
2788 struct ext4_li_request *elr;
2789
2790 mutex_lock(&ext4_li_info->li_list_mtx);
2791 if (list_empty(&ext4_li_info->li_request_list))
2792 return;
2793
2794 list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
2795 elr = list_entry(pos, struct ext4_li_request,
2796 lr_request);
2797 ext4_remove_li_request(elr);
2798 }
2799 mutex_unlock(&ext4_li_info->li_list_mtx);
2800}
2801
2802static int ext4_run_lazyinit_thread(void)
2803{
2804 struct task_struct *t;
2805
2806 t = kthread_run(ext4_lazyinit_thread, ext4_li_info, "ext4lazyinit");
2807 if (IS_ERR(t)) {
2808 int err = PTR_ERR(t);
2809 ext4_clear_request_list();
2810 del_timer_sync(&ext4_li_info->li_timer);
2811 kfree(ext4_li_info);
2812 ext4_li_info = NULL;
2813 printk(KERN_CRIT "EXT4: error %d creating inode table "
2814 "initialization thread\n",
2815 err);
2816 return err;
2817 }
2818 ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
2819
2820 wait_event(ext4_li_info->li_wait_task, ext4_li_info->li_task != NULL);
2821 return 0;
2822}
2823
2824/*
2825 * Check whether it make sense to run itable init. thread or not.
2826 * If there is at least one uninitialized inode table, return
2827 * corresponding group number, else the loop goes through all
2828 * groups and return total number of groups.
2829 */
2830static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
2831{
2832 ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
2833 struct ext4_group_desc *gdp = NULL;
2834
2835 for (group = 0; group < ngroups; group++) {
2836 gdp = ext4_get_group_desc(sb, group, NULL);
2837 if (!gdp)
2838 continue;
2839
2840 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
2841 break;
2842 }
2843
2844 return group;
2845}
2846
2847static int ext4_li_info_new(void)
2848{
2849 struct ext4_lazy_init *eli = NULL;
2850
2851 eli = kzalloc(sizeof(*eli), GFP_KERNEL);
2852 if (!eli)
2853 return -ENOMEM;
2854
2855 eli->li_task = NULL;
2856 INIT_LIST_HEAD(&eli->li_request_list);
2857 mutex_init(&eli->li_list_mtx);
2858
2859 init_waitqueue_head(&eli->li_wait_daemon);
2860 init_waitqueue_head(&eli->li_wait_task);
2861 init_timer(&eli->li_timer);
2862 eli->li_state |= EXT4_LAZYINIT_QUIT;
2863
2864 ext4_li_info = eli;
2865
2866 return 0;
2867}
2868
2869static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
2870 ext4_group_t start)
2871{
2872 struct ext4_sb_info *sbi = EXT4_SB(sb);
2873 struct ext4_li_request *elr;
2874 unsigned long rnd;
2875
2876 elr = kzalloc(sizeof(*elr), GFP_KERNEL);
2877 if (!elr)
2878 return NULL;
2879
2880 elr->lr_super = sb;
2881 elr->lr_sbi = sbi;
2882 elr->lr_next_group = start;
2883
2884 /*
2885 * Randomize first schedule time of the request to
2886 * spread the inode table initialization requests
2887 * better.
2888 */
2889 get_random_bytes(&rnd, sizeof(rnd));
2890 elr->lr_next_sched = jiffies + (unsigned long)rnd %
2891 (EXT4_DEF_LI_MAX_START_DELAY * HZ);
2892
2893 return elr;
2894}
2895
2896static int ext4_register_li_request(struct super_block *sb,
2897 ext4_group_t first_not_zeroed)
2898{
2899 struct ext4_sb_info *sbi = EXT4_SB(sb);
2900 struct ext4_li_request *elr;
2901 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
2902 int ret;
2903
2904 if (sbi->s_li_request != NULL)
2905 return 0;
2906
2907 if (first_not_zeroed == ngroups ||
2908 (sb->s_flags & MS_RDONLY) ||
2909 !test_opt(sb, INIT_INODE_TABLE)) {
2910 sbi->s_li_request = NULL;
2911 return 0;
2912 }
2913
2914 if (first_not_zeroed == ngroups) {
2915 sbi->s_li_request = NULL;
2916 return 0;
2917 }
2918
2919 elr = ext4_li_request_new(sb, first_not_zeroed);
2920 if (!elr)
2921 return -ENOMEM;
2922
2923 mutex_lock(&ext4_li_mtx);
2924
2925 if (NULL == ext4_li_info) {
2926 ret = ext4_li_info_new();
2927 if (ret)
2928 goto out;
2929 }
2930
2931 mutex_lock(&ext4_li_info->li_list_mtx);
2932 list_add(&elr->lr_request, &ext4_li_info->li_request_list);
2933 mutex_unlock(&ext4_li_info->li_list_mtx);
2934
2935 sbi->s_li_request = elr;
2936
2937 if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
2938 ret = ext4_run_lazyinit_thread();
2939 if (ret)
2940 goto out;
2941 }
2942out:
2943 mutex_unlock(&ext4_li_mtx);
2944 if (ret)
2945 kfree(elr);
2946 return ret;
2947}
2948
2949/*
2950 * We do not need to lock anything since this is called on
2951 * module unload.
2952 */
2953static void ext4_destroy_lazyinit_thread(void)
2954{
2955 /*
2956 * If thread exited earlier
2957 * there's nothing to be done.
2958 */
2959 if (!ext4_li_info)
2960 return;
2961
2962 ext4_clear_request_list();
2963
2964 while (ext4_li_info->li_task) {
2965 wake_up(&ext4_li_info->li_wait_daemon);
2966 wait_event(ext4_li_info->li_wait_task,
2967 ext4_li_info->li_task == NULL);
2968 }
2969}
2970
2542static int ext4_fill_super(struct super_block *sb, void *data, int silent) 2971static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2543 __releases(kernel_lock) 2972 __releases(kernel_lock)
2544 __acquires(kernel_lock) 2973 __acquires(kernel_lock)
@@ -2564,6 +2993,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2564 __u64 blocks_count; 2993 __u64 blocks_count;
2565 int err; 2994 int err;
2566 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 2995 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
2996 ext4_group_t first_not_zeroed;
2567 2997
2568 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 2998 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
2569 if (!sbi) 2999 if (!sbi)
@@ -2624,6 +3054,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2624 3054
2625 /* Set defaults before we parse the mount options */ 3055 /* Set defaults before we parse the mount options */
2626 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 3056 def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
3057 set_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
2627 if (def_mount_opts & EXT4_DEFM_DEBUG) 3058 if (def_mount_opts & EXT4_DEFM_DEBUG)
2628 set_opt(sbi->s_mount_opt, DEBUG); 3059 set_opt(sbi->s_mount_opt, DEBUG);
2629 if (def_mount_opts & EXT4_DEFM_BSDGROUPS) { 3060 if (def_mount_opts & EXT4_DEFM_BSDGROUPS) {
@@ -2901,7 +3332,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2901 goto failed_mount2; 3332 goto failed_mount2;
2902 } 3333 }
2903 } 3334 }
2904 if (!ext4_check_descriptors(sb)) { 3335 if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
2905 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); 3336 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
2906 goto failed_mount2; 3337 goto failed_mount2;
2907 } 3338 }
@@ -3122,6 +3553,10 @@ no_journal:
3122 goto failed_mount4; 3553 goto failed_mount4;
3123 } 3554 }
3124 3555
3556 err = ext4_register_li_request(sb, first_not_zeroed);
3557 if (err)
3558 goto failed_mount4;
3559
3125 sbi->s_kobj.kset = ext4_kset; 3560 sbi->s_kobj.kset = ext4_kset;
3126 init_completion(&sbi->s_kobj_unregister); 3561 init_completion(&sbi->s_kobj_unregister);
3127 err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, 3562 err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
@@ -3461,7 +3896,7 @@ static int ext4_load_journal(struct super_block *sb,
3461 EXT4_SB(sb)->s_journal = journal; 3896 EXT4_SB(sb)->s_journal = journal;
3462 ext4_clear_journal_err(sb, es); 3897 ext4_clear_journal_err(sb, es);
3463 3898
3464 if (journal_devnum && 3899 if (!really_read_only && journal_devnum &&
3465 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 3900 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
3466 es->s_journal_dev = cpu_to_le32(journal_devnum); 3901 es->s_journal_dev = cpu_to_le32(journal_devnum);
3467 3902
@@ -3514,9 +3949,12 @@ static int ext4_commit_super(struct super_block *sb, int sync)
3514 else 3949 else
3515 es->s_kbytes_written = 3950 es->s_kbytes_written =
3516 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); 3951 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
3517 ext4_free_blocks_count_set(es, percpu_counter_sum_positive( 3952 if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeblocks_counter))
3953 ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
3518 &EXT4_SB(sb)->s_freeblocks_counter)); 3954 &EXT4_SB(sb)->s_freeblocks_counter));
3519 es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( 3955 if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
3956 es->s_free_inodes_count =
3957 cpu_to_le32(percpu_counter_sum_positive(
3520 &EXT4_SB(sb)->s_freeinodes_counter)); 3958 &EXT4_SB(sb)->s_freeinodes_counter));
3521 sb->s_dirt = 0; 3959 sb->s_dirt = 0;
3522 BUFFER_TRACE(sbh, "marking dirty"); 3960 BUFFER_TRACE(sbh, "marking dirty");
@@ -3835,6 +4273,19 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3835 enable_quota = 1; 4273 enable_quota = 1;
3836 } 4274 }
3837 } 4275 }
4276
4277 /*
4278 * Reinitialize lazy itable initialization thread based on
4279 * current settings
4280 */
4281 if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE))
4282 ext4_unregister_li_request(sb);
4283 else {
4284 ext4_group_t first_not_zeroed;
4285 first_not_zeroed = ext4_has_uninit_itable(sb);
4286 ext4_register_li_request(sb, first_not_zeroed);
4287 }
4288
3838 ext4_setup_system_zone(sb); 4289 ext4_setup_system_zone(sb);
3839 if (sbi->s_journal == NULL) 4290 if (sbi->s_journal == NULL)
3840 ext4_commit_super(sb, 1); 4291 ext4_commit_super(sb, 1);
@@ -4216,17 +4667,17 @@ out:
4216 4667
4217#endif 4668#endif
4218 4669
4219static int ext4_get_sb(struct file_system_type *fs_type, int flags, 4670static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
4220 const char *dev_name, void *data, struct vfsmount *mnt) 4671 const char *dev_name, void *data)
4221{ 4672{
4222 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt); 4673 return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
4223} 4674}
4224 4675
4225#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 4676#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
4226static struct file_system_type ext2_fs_type = { 4677static struct file_system_type ext2_fs_type = {
4227 .owner = THIS_MODULE, 4678 .owner = THIS_MODULE,
4228 .name = "ext2", 4679 .name = "ext2",
4229 .get_sb = ext4_get_sb, 4680 .mount = ext4_mount,
4230 .kill_sb = kill_block_super, 4681 .kill_sb = kill_block_super,
4231 .fs_flags = FS_REQUIRES_DEV, 4682 .fs_flags = FS_REQUIRES_DEV,
4232}; 4683};
@@ -4271,28 +4722,58 @@ static inline void unregister_as_ext3(void) { }
4271static struct file_system_type ext4_fs_type = { 4722static struct file_system_type ext4_fs_type = {
4272 .owner = THIS_MODULE, 4723 .owner = THIS_MODULE,
4273 .name = "ext4", 4724 .name = "ext4",
4274 .get_sb = ext4_get_sb, 4725 .mount = ext4_mount,
4275 .kill_sb = kill_block_super, 4726 .kill_sb = kill_block_super,
4276 .fs_flags = FS_REQUIRES_DEV, 4727 .fs_flags = FS_REQUIRES_DEV,
4277}; 4728};
4278 4729
4279static int __init init_ext4_fs(void) 4730int __init ext4_init_feat_adverts(void)
4731{
4732 struct ext4_features *ef;
4733 int ret = -ENOMEM;
4734
4735 ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL);
4736 if (!ef)
4737 goto out;
4738
4739 ef->f_kobj.kset = ext4_kset;
4740 init_completion(&ef->f_kobj_unregister);
4741 ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL,
4742 "features");
4743 if (ret) {
4744 kfree(ef);
4745 goto out;
4746 }
4747
4748 ext4_feat = ef;
4749 ret = 0;
4750out:
4751 return ret;
4752}
4753
4754static int __init ext4_init_fs(void)
4280{ 4755{
4281 int err; 4756 int err;
4282 4757
4283 ext4_check_flag_values(); 4758 ext4_check_flag_values();
4284 err = init_ext4_system_zone(); 4759 err = ext4_init_pageio();
4285 if (err) 4760 if (err)
4286 return err; 4761 return err;
4762 err = ext4_init_system_zone();
4763 if (err)
4764 goto out5;
4287 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); 4765 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
4288 if (!ext4_kset) 4766 if (!ext4_kset)
4289 goto out4; 4767 goto out4;
4290 ext4_proc_root = proc_mkdir("fs/ext4", NULL); 4768 ext4_proc_root = proc_mkdir("fs/ext4", NULL);
4291 err = init_ext4_mballoc(); 4769
4770 err = ext4_init_feat_adverts();
4771
4772 err = ext4_init_mballoc();
4292 if (err) 4773 if (err)
4293 goto out3; 4774 goto out3;
4294 4775
4295 err = init_ext4_xattr(); 4776 err = ext4_init_xattr();
4296 if (err) 4777 if (err)
4297 goto out2; 4778 goto out2;
4298 err = init_inodecache(); 4779 err = init_inodecache();
@@ -4303,38 +4784,46 @@ static int __init init_ext4_fs(void)
4303 err = register_filesystem(&ext4_fs_type); 4784 err = register_filesystem(&ext4_fs_type);
4304 if (err) 4785 if (err)
4305 goto out; 4786 goto out;
4787
4788 ext4_li_info = NULL;
4789 mutex_init(&ext4_li_mtx);
4306 return 0; 4790 return 0;
4307out: 4791out:
4308 unregister_as_ext2(); 4792 unregister_as_ext2();
4309 unregister_as_ext3(); 4793 unregister_as_ext3();
4310 destroy_inodecache(); 4794 destroy_inodecache();
4311out1: 4795out1:
4312 exit_ext4_xattr(); 4796 ext4_exit_xattr();
4313out2: 4797out2:
4314 exit_ext4_mballoc(); 4798 ext4_exit_mballoc();
4315out3: 4799out3:
4800 kfree(ext4_feat);
4316 remove_proc_entry("fs/ext4", NULL); 4801 remove_proc_entry("fs/ext4", NULL);
4317 kset_unregister(ext4_kset); 4802 kset_unregister(ext4_kset);
4318out4: 4803out4:
4319 exit_ext4_system_zone(); 4804 ext4_exit_system_zone();
4805out5:
4806 ext4_exit_pageio();
4320 return err; 4807 return err;
4321} 4808}
4322 4809
4323static void __exit exit_ext4_fs(void) 4810static void __exit ext4_exit_fs(void)
4324{ 4811{
4812 ext4_destroy_lazyinit_thread();
4325 unregister_as_ext2(); 4813 unregister_as_ext2();
4326 unregister_as_ext3(); 4814 unregister_as_ext3();
4327 unregister_filesystem(&ext4_fs_type); 4815 unregister_filesystem(&ext4_fs_type);
4328 destroy_inodecache(); 4816 destroy_inodecache();
4329 exit_ext4_xattr(); 4817 ext4_exit_xattr();
4330 exit_ext4_mballoc(); 4818 ext4_exit_mballoc();
4331 remove_proc_entry("fs/ext4", NULL); 4819 remove_proc_entry("fs/ext4", NULL);
4332 kset_unregister(ext4_kset); 4820 kset_unregister(ext4_kset);
4333 exit_ext4_system_zone(); 4821 ext4_exit_system_zone();
4822 ext4_exit_pageio();
4334} 4823}
4335 4824
4336MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 4825MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
4337MODULE_DESCRIPTION("Fourth Extended Filesystem"); 4826MODULE_DESCRIPTION("Fourth Extended Filesystem");
4338MODULE_LICENSE("GPL"); 4827MODULE_LICENSE("GPL");
4339module_init(init_ext4_fs) 4828module_init(ext4_init_fs)
4340module_exit(exit_ext4_fs) 4829module_exit(ext4_exit_fs)
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 3a8cd8dff1ad..fa4b899da4b3 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1588,7 +1588,7 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header,
1588#undef BLOCK_HASH_SHIFT 1588#undef BLOCK_HASH_SHIFT
1589 1589
1590int __init 1590int __init
1591init_ext4_xattr(void) 1591ext4_init_xattr(void)
1592{ 1592{
1593 ext4_xattr_cache = mb_cache_create("ext4_xattr", 6); 1593 ext4_xattr_cache = mb_cache_create("ext4_xattr", 6);
1594 if (!ext4_xattr_cache) 1594 if (!ext4_xattr_cache)
@@ -1597,7 +1597,7 @@ init_ext4_xattr(void)
1597} 1597}
1598 1598
1599void 1599void
1600exit_ext4_xattr(void) 1600ext4_exit_xattr(void)
1601{ 1601{
1602 if (ext4_xattr_cache) 1602 if (ext4_xattr_cache)
1603 mb_cache_destroy(ext4_xattr_cache); 1603 mb_cache_destroy(ext4_xattr_cache);
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 518e96e43905..1ef16520b950 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -83,8 +83,8 @@ extern void ext4_xattr_put_super(struct super_block *);
83extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, 83extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
84 struct ext4_inode *raw_inode, handle_t *handle); 84 struct ext4_inode *raw_inode, handle_t *handle);
85 85
86extern int init_ext4_xattr(void); 86extern int __init ext4_init_xattr(void);
87extern void exit_ext4_xattr(void); 87extern void ext4_exit_xattr(void);
88 88
89extern const struct xattr_handler *ext4_xattr_handlers[]; 89extern const struct xattr_handler *ext4_xattr_handlers[];
90 90
@@ -121,14 +121,14 @@ ext4_xattr_put_super(struct super_block *sb)
121{ 121{
122} 122}
123 123
124static inline int 124static __init inline int
125init_ext4_xattr(void) 125ext4_init_xattr(void)
126{ 126{
127 return 0; 127 return 0;
128} 128}
129 129
130static inline void 130static inline void
131exit_ext4_xattr(void) 131ext4_exit_xattr(void)
132{ 132{
133} 133}
134 134
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index bbca5c186ae7..3345aabd1dd7 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -675,18 +675,17 @@ static int msdos_fill_super(struct super_block *sb, void *data, int silent)
675 return 0; 675 return 0;
676} 676}
677 677
678static int msdos_get_sb(struct file_system_type *fs_type, 678static struct dentry *msdos_mount(struct file_system_type *fs_type,
679 int flags, const char *dev_name, 679 int flags, const char *dev_name,
680 void *data, struct vfsmount *mnt) 680 void *data)
681{ 681{
682 return get_sb_bdev(fs_type, flags, dev_name, data, msdos_fill_super, 682 return mount_bdev(fs_type, flags, dev_name, data, msdos_fill_super);
683 mnt);
684} 683}
685 684
686static struct file_system_type msdos_fs_type = { 685static struct file_system_type msdos_fs_type = {
687 .owner = THIS_MODULE, 686 .owner = THIS_MODULE,
688 .name = "msdos", 687 .name = "msdos",
689 .get_sb = msdos_get_sb, 688 .mount = msdos_mount,
690 .kill_sb = kill_block_super, 689 .kill_sb = kill_block_super,
691 .fs_flags = FS_REQUIRES_DEV, 690 .fs_flags = FS_REQUIRES_DEV,
692}; 691};
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 6f0f6c9a0152..b936703b8924 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -1071,18 +1071,17 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent)
1071 return 0; 1071 return 0;
1072} 1072}
1073 1073
1074static int vfat_get_sb(struct file_system_type *fs_type, 1074static struct dentry *vfat_mount(struct file_system_type *fs_type,
1075 int flags, const char *dev_name, 1075 int flags, const char *dev_name,
1076 void *data, struct vfsmount *mnt) 1076 void *data)
1077{ 1077{
1078 return get_sb_bdev(fs_type, flags, dev_name, data, vfat_fill_super, 1078 return mount_bdev(fs_type, flags, dev_name, data, vfat_fill_super);
1079 mnt);
1080} 1079}
1081 1080
1082static struct file_system_type vfat_fs_type = { 1081static struct file_system_type vfat_fs_type = {
1083 .owner = THIS_MODULE, 1082 .owner = THIS_MODULE,
1084 .name = "vfat", 1083 .name = "vfat",
1085 .get_sb = vfat_get_sb, 1084 .mount = vfat_mount,
1086 .kill_sb = kill_block_super, 1085 .kill_sb = kill_block_super,
1087 .fs_flags = FS_REQUIRES_DEV, 1086 .fs_flags = FS_REQUIRES_DEV,
1088}; 1087};
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 71b0148b8784..9d1c99558389 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -246,17 +246,16 @@ out:
246/* 246/*
247 * The usual module blurb. 247 * The usual module blurb.
248 */ 248 */
249static int vxfs_get_sb(struct file_system_type *fs_type, 249static struct dentry *vxfs_mount(struct file_system_type *fs_type,
250 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 250 int flags, const char *dev_name, void *data)
251{ 251{
252 return get_sb_bdev(fs_type, flags, dev_name, data, vxfs_fill_super, 252 return mount_bdev(fs_type, flags, dev_name, data, vxfs_fill_super);
253 mnt);
254} 253}
255 254
256static struct file_system_type vxfs_fs_type = { 255static struct file_system_type vxfs_fs_type = {
257 .owner = THIS_MODULE, 256 .owner = THIS_MODULE,
258 .name = "vxfs", 257 .name = "vxfs",
259 .get_sb = vxfs_get_sb, 258 .mount = vxfs_mount,
260 .kill_sb = kill_block_super, 259 .kill_sb = kill_block_super,
261 .fs_flags = FS_REQUIRES_DEV, 260 .fs_flags = FS_REQUIRES_DEV,
262}; 261};
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index aed881a76b22..3d06ccc953aa 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -707,6 +707,17 @@ get_next_work_item(struct backing_dev_info *bdi)
707 return work; 707 return work;
708} 708}
709 709
710/*
711 * Add in the number of potentially dirty inodes, because each inode
712 * write can dirty pagecache in the underlying blockdev.
713 */
714static unsigned long get_nr_dirty_pages(void)
715{
716 return global_page_state(NR_FILE_DIRTY) +
717 global_page_state(NR_UNSTABLE_NFS) +
718 get_nr_dirty_inodes();
719}
720
710static long wb_check_old_data_flush(struct bdi_writeback *wb) 721static long wb_check_old_data_flush(struct bdi_writeback *wb)
711{ 722{
712 unsigned long expired; 723 unsigned long expired;
@@ -724,13 +735,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
724 return 0; 735 return 0;
725 736
726 wb->last_old_flush = jiffies; 737 wb->last_old_flush = jiffies;
727 /* 738 nr_pages = get_nr_dirty_pages();
728 * Add in the number of potentially dirty inodes, because each inode
729 * write can dirty pagecache in the underlying blockdev.
730 */
731 nr_pages = global_page_state(NR_FILE_DIRTY) +
732 global_page_state(NR_UNSTABLE_NFS) +
733 get_nr_dirty_inodes();
734 739
735 if (nr_pages) { 740 if (nr_pages) {
736 struct wb_writeback_work work = { 741 struct wb_writeback_work work = {
@@ -1076,32 +1081,42 @@ static void wait_sb_inodes(struct super_block *sb)
1076} 1081}
1077 1082
1078/** 1083/**
1079 * writeback_inodes_sb - writeback dirty inodes from given super_block 1084 * writeback_inodes_sb_nr - writeback dirty inodes from given super_block
1080 * @sb: the superblock 1085 * @sb: the superblock
1086 * @nr: the number of pages to write
1081 * 1087 *
1082 * Start writeback on some inodes on this super_block. No guarantees are made 1088 * Start writeback on some inodes on this super_block. No guarantees are made
1083 * on how many (if any) will be written, and this function does not wait 1089 * on how many (if any) will be written, and this function does not wait
1084 * for IO completion of submitted IO. The number of pages submitted is 1090 * for IO completion of submitted IO.
1085 * returned.
1086 */ 1091 */
1087void writeback_inodes_sb(struct super_block *sb) 1092void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr)
1088{ 1093{
1089 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
1090 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
1091 DECLARE_COMPLETION_ONSTACK(done); 1094 DECLARE_COMPLETION_ONSTACK(done);
1092 struct wb_writeback_work work = { 1095 struct wb_writeback_work work = {
1093 .sb = sb, 1096 .sb = sb,
1094 .sync_mode = WB_SYNC_NONE, 1097 .sync_mode = WB_SYNC_NONE,
1095 .done = &done, 1098 .done = &done,
1099 .nr_pages = nr,
1096 }; 1100 };
1097 1101
1098 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 1102 WARN_ON(!rwsem_is_locked(&sb->s_umount));
1099
1100 work.nr_pages = nr_dirty + nr_unstable + get_nr_dirty_inodes();
1101
1102 bdi_queue_work(sb->s_bdi, &work); 1103 bdi_queue_work(sb->s_bdi, &work);
1103 wait_for_completion(&done); 1104 wait_for_completion(&done);
1104} 1105}
1106EXPORT_SYMBOL(writeback_inodes_sb_nr);
1107
1108/**
1109 * writeback_inodes_sb - writeback dirty inodes from given super_block
1110 * @sb: the superblock
1111 *
1112 * Start writeback on some inodes on this super_block. No guarantees are made
1113 * on how many (if any) will be written, and this function does not wait
1114 * for IO completion of submitted IO.
1115 */
1116void writeback_inodes_sb(struct super_block *sb)
1117{
1118 return writeback_inodes_sb_nr(sb, get_nr_dirty_pages());
1119}
1105EXPORT_SYMBOL(writeback_inodes_sb); 1120EXPORT_SYMBOL(writeback_inodes_sb);
1106 1121
1107/** 1122/**
@@ -1124,6 +1139,27 @@ int writeback_inodes_sb_if_idle(struct super_block *sb)
1124EXPORT_SYMBOL(writeback_inodes_sb_if_idle); 1139EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
1125 1140
1126/** 1141/**
1142 * writeback_inodes_sb_if_idle - start writeback if none underway
1143 * @sb: the superblock
1144 * @nr: the number of pages to write
1145 *
1146 * Invoke writeback_inodes_sb if no writeback is currently underway.
1147 * Returns 1 if writeback was started, 0 if not.
1148 */
1149int writeback_inodes_sb_nr_if_idle(struct super_block *sb,
1150 unsigned long nr)
1151{
1152 if (!writeback_in_progress(sb->s_bdi)) {
1153 down_read(&sb->s_umount);
1154 writeback_inodes_sb_nr(sb, nr);
1155 up_read(&sb->s_umount);
1156 return 1;
1157 } else
1158 return 0;
1159}
1160EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle);
1161
1162/**
1127 * sync_inodes_sb - sync sb inode pages 1163 * sync_inodes_sb - sync sb inode pages
1128 * @sb: the superblock 1164 * @sb: the superblock
1129 * 1165 *
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 4eba07661e5c..85542a7daf40 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -322,12 +322,10 @@ static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent)
322 return 0; 322 return 0;
323} 323}
324 324
325static int fuse_ctl_get_sb(struct file_system_type *fs_type, int flags, 325static struct dentry *fuse_ctl_mount(struct file_system_type *fs_type,
326 const char *dev_name, void *raw_data, 326 int flags, const char *dev_name, void *raw_data)
327 struct vfsmount *mnt)
328{ 327{
329 return get_sb_single(fs_type, flags, raw_data, 328 return mount_single(fs_type, flags, raw_data, fuse_ctl_fill_super);
330 fuse_ctl_fill_super, mnt);
331} 329}
332 330
333static void fuse_ctl_kill_sb(struct super_block *sb) 331static void fuse_ctl_kill_sb(struct super_block *sb)
@@ -346,7 +344,7 @@ static void fuse_ctl_kill_sb(struct super_block *sb)
346static struct file_system_type fuse_ctl_fs_type = { 344static struct file_system_type fuse_ctl_fs_type = {
347 .owner = THIS_MODULE, 345 .owner = THIS_MODULE,
348 .name = "fusectl", 346 .name = "fusectl",
349 .get_sb = fuse_ctl_get_sb, 347 .mount = fuse_ctl_mount,
350 .kill_sb = fuse_ctl_kill_sb, 348 .kill_sb = fuse_ctl_kill_sb,
351}; 349};
352 350
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index da9e6e11374c..cfce3ad86a92 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1041,11 +1041,11 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
1041 return err; 1041 return err;
1042} 1042}
1043 1043
1044static int fuse_get_sb(struct file_system_type *fs_type, 1044static struct dentry *fuse_mount(struct file_system_type *fs_type,
1045 int flags, const char *dev_name, 1045 int flags, const char *dev_name,
1046 void *raw_data, struct vfsmount *mnt) 1046 void *raw_data)
1047{ 1047{
1048 return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt); 1048 return mount_nodev(fs_type, flags, raw_data, fuse_fill_super);
1049} 1049}
1050 1050
1051static void fuse_kill_sb_anon(struct super_block *sb) 1051static void fuse_kill_sb_anon(struct super_block *sb)
@@ -1065,17 +1065,16 @@ static struct file_system_type fuse_fs_type = {
1065 .owner = THIS_MODULE, 1065 .owner = THIS_MODULE,
1066 .name = "fuse", 1066 .name = "fuse",
1067 .fs_flags = FS_HAS_SUBTYPE, 1067 .fs_flags = FS_HAS_SUBTYPE,
1068 .get_sb = fuse_get_sb, 1068 .mount = fuse_mount,
1069 .kill_sb = fuse_kill_sb_anon, 1069 .kill_sb = fuse_kill_sb_anon,
1070}; 1070};
1071 1071
1072#ifdef CONFIG_BLOCK 1072#ifdef CONFIG_BLOCK
1073static int fuse_get_sb_blk(struct file_system_type *fs_type, 1073static struct dentry *fuse_mount_blk(struct file_system_type *fs_type,
1074 int flags, const char *dev_name, 1074 int flags, const char *dev_name,
1075 void *raw_data, struct vfsmount *mnt) 1075 void *raw_data)
1076{ 1076{
1077 return get_sb_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super, 1077 return mount_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super);
1078 mnt);
1079} 1078}
1080 1079
1081static void fuse_kill_sb_blk(struct super_block *sb) 1080static void fuse_kill_sb_blk(struct super_block *sb)
@@ -1094,7 +1093,7 @@ static void fuse_kill_sb_blk(struct super_block *sb)
1094static struct file_system_type fuseblk_fs_type = { 1093static struct file_system_type fuseblk_fs_type = {
1095 .owner = THIS_MODULE, 1094 .owner = THIS_MODULE,
1096 .name = "fuseblk", 1095 .name = "fuseblk",
1097 .get_sb = fuse_get_sb_blk, 1096 .mount = fuse_mount_blk,
1098 .kill_sb = fuse_kill_sb_blk, 1097 .kill_sb = fuse_kill_sb_blk,
1099 .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, 1098 .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
1100}; 1099};
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index cade1acbcea9..3eb1393f7b81 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1250,12 +1250,11 @@ static int test_gfs2_super(struct super_block *s, void *ptr)
1250} 1250}
1251 1251
1252/** 1252/**
1253 * gfs2_get_sb - Get the GFS2 superblock 1253 * gfs2_mount - Get the GFS2 superblock
1254 * @fs_type: The GFS2 filesystem type 1254 * @fs_type: The GFS2 filesystem type
1255 * @flags: Mount flags 1255 * @flags: Mount flags
1256 * @dev_name: The name of the device 1256 * @dev_name: The name of the device
1257 * @data: The mount arguments 1257 * @data: The mount arguments
1258 * @mnt: The vfsmnt for this mount
1259 * 1258 *
1260 * Q. Why not use get_sb_bdev() ? 1259 * Q. Why not use get_sb_bdev() ?
1261 * A. We need to select one of two root directories to mount, independent 1260 * A. We need to select one of two root directories to mount, independent
@@ -1264,8 +1263,8 @@ static int test_gfs2_super(struct super_block *s, void *ptr)
1264 * Returns: 0 or -ve on error 1263 * Returns: 0 or -ve on error
1265 */ 1264 */
1266 1265
1267static int gfs2_get_sb(struct file_system_type *fs_type, int flags, 1266static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
1268 const char *dev_name, void *data, struct vfsmount *mnt) 1267 const char *dev_name, void *data)
1269{ 1268{
1270 struct block_device *bdev; 1269 struct block_device *bdev;
1271 struct super_block *s; 1270 struct super_block *s;
@@ -1279,7 +1278,7 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
1279 1278
1280 bdev = open_bdev_exclusive(dev_name, mode, fs_type); 1279 bdev = open_bdev_exclusive(dev_name, mode, fs_type);
1281 if (IS_ERR(bdev)) 1280 if (IS_ERR(bdev))
1282 return PTR_ERR(bdev); 1281 return ERR_CAST(bdev);
1283 1282
1284 /* 1283 /*
1285 * once the super is inserted into the list by sget, s_umount 1284 * once the super is inserted into the list by sget, s_umount
@@ -1298,6 +1297,9 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
1298 if (IS_ERR(s)) 1297 if (IS_ERR(s))
1299 goto error_bdev; 1298 goto error_bdev;
1300 1299
1300 if (s->s_root)
1301 close_bdev_exclusive(bdev, mode);
1302
1301 memset(&args, 0, sizeof(args)); 1303 memset(&args, 0, sizeof(args));
1302 args.ar_quota = GFS2_QUOTA_DEFAULT; 1304 args.ar_quota = GFS2_QUOTA_DEFAULT;
1303 args.ar_data = GFS2_DATA_DEFAULT; 1305 args.ar_data = GFS2_DATA_DEFAULT;
@@ -1309,17 +1311,13 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
1309 error = gfs2_mount_args(&args, data); 1311 error = gfs2_mount_args(&args, data);
1310 if (error) { 1312 if (error) {
1311 printk(KERN_WARNING "GFS2: can't parse mount arguments\n"); 1313 printk(KERN_WARNING "GFS2: can't parse mount arguments\n");
1312 if (s->s_root) 1314 goto error_super;
1313 goto error_super;
1314 deactivate_locked_super(s);
1315 return error;
1316 } 1315 }
1317 1316
1318 if (s->s_root) { 1317 if (s->s_root) {
1319 error = -EBUSY; 1318 error = -EBUSY;
1320 if ((flags ^ s->s_flags) & MS_RDONLY) 1319 if ((flags ^ s->s_flags) & MS_RDONLY)
1321 goto error_super; 1320 goto error_super;
1322 close_bdev_exclusive(bdev, mode);
1323 } else { 1321 } else {
1324 char b[BDEVNAME_SIZE]; 1322 char b[BDEVNAME_SIZE];
1325 1323
@@ -1328,27 +1326,24 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
1328 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); 1326 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
1329 sb_set_blocksize(s, block_size(bdev)); 1327 sb_set_blocksize(s, block_size(bdev));
1330 error = fill_super(s, &args, flags & MS_SILENT ? 1 : 0); 1328 error = fill_super(s, &args, flags & MS_SILENT ? 1 : 0);
1331 if (error) { 1329 if (error)
1332 deactivate_locked_super(s); 1330 goto error_super;
1333 return error;
1334 }
1335 s->s_flags |= MS_ACTIVE; 1331 s->s_flags |= MS_ACTIVE;
1336 bdev->bd_super = s; 1332 bdev->bd_super = s;
1337 } 1333 }
1338 1334
1339 sdp = s->s_fs_info; 1335 sdp = s->s_fs_info;
1340 mnt->mnt_sb = s;
1341 if (args.ar_meta) 1336 if (args.ar_meta)
1342 mnt->mnt_root = dget(sdp->sd_master_dir); 1337 return dget(sdp->sd_master_dir);
1343 else 1338 else
1344 mnt->mnt_root = dget(sdp->sd_root_dir); 1339 return dget(sdp->sd_root_dir);
1345 return 0;
1346 1340
1347error_super: 1341error_super:
1348 deactivate_locked_super(s); 1342 deactivate_locked_super(s);
1343 return ERR_PTR(error);
1349error_bdev: 1344error_bdev:
1350 close_bdev_exclusive(bdev, mode); 1345 close_bdev_exclusive(bdev, mode);
1351 return error; 1346 return ERR_PTR(error);
1352} 1347}
1353 1348
1354static int set_meta_super(struct super_block *s, void *ptr) 1349static int set_meta_super(struct super_block *s, void *ptr)
@@ -1356,8 +1351,8 @@ static int set_meta_super(struct super_block *s, void *ptr)
1356 return -EINVAL; 1351 return -EINVAL;
1357} 1352}
1358 1353
1359static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, 1354static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type,
1360 const char *dev_name, void *data, struct vfsmount *mnt) 1355 int flags, const char *dev_name, void *data)
1361{ 1356{
1362 struct super_block *s; 1357 struct super_block *s;
1363 struct gfs2_sbd *sdp; 1358 struct gfs2_sbd *sdp;
@@ -1368,23 +1363,21 @@ static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
1368 if (error) { 1363 if (error) {
1369 printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n", 1364 printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n",
1370 dev_name, error); 1365 dev_name, error);
1371 return error; 1366 return ERR_PTR(error);
1372 } 1367 }
1373 s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super, 1368 s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super,
1374 path.dentry->d_inode->i_sb->s_bdev); 1369 path.dentry->d_inode->i_sb->s_bdev);
1375 path_put(&path); 1370 path_put(&path);
1376 if (IS_ERR(s)) { 1371 if (IS_ERR(s)) {
1377 printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); 1372 printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n");
1378 return PTR_ERR(s); 1373 return ERR_CAST(s);
1379 } 1374 }
1380 if ((flags ^ s->s_flags) & MS_RDONLY) { 1375 if ((flags ^ s->s_flags) & MS_RDONLY) {
1381 deactivate_locked_super(s); 1376 deactivate_locked_super(s);
1382 return -EBUSY; 1377 return ERR_PTR(-EBUSY);
1383 } 1378 }
1384 sdp = s->s_fs_info; 1379 sdp = s->s_fs_info;
1385 mnt->mnt_sb = s; 1380 return dget(sdp->sd_master_dir);
1386 mnt->mnt_root = dget(sdp->sd_master_dir);
1387 return 0;
1388} 1381}
1389 1382
1390static void gfs2_kill_sb(struct super_block *sb) 1383static void gfs2_kill_sb(struct super_block *sb)
@@ -1410,7 +1403,7 @@ static void gfs2_kill_sb(struct super_block *sb)
1410struct file_system_type gfs2_fs_type = { 1403struct file_system_type gfs2_fs_type = {
1411 .name = "gfs2", 1404 .name = "gfs2",
1412 .fs_flags = FS_REQUIRES_DEV, 1405 .fs_flags = FS_REQUIRES_DEV,
1413 .get_sb = gfs2_get_sb, 1406 .mount = gfs2_mount,
1414 .kill_sb = gfs2_kill_sb, 1407 .kill_sb = gfs2_kill_sb,
1415 .owner = THIS_MODULE, 1408 .owner = THIS_MODULE,
1416}; 1409};
@@ -1418,7 +1411,7 @@ struct file_system_type gfs2_fs_type = {
1418struct file_system_type gfs2meta_fs_type = { 1411struct file_system_type gfs2meta_fs_type = {
1419 .name = "gfs2meta", 1412 .name = "gfs2meta",
1420 .fs_flags = FS_REQUIRES_DEV, 1413 .fs_flags = FS_REQUIRES_DEV,
1421 .get_sb = gfs2_get_sb_meta, 1414 .mount = gfs2_mount_meta,
1422 .owner = THIS_MODULE, 1415 .owner = THIS_MODULE,
1423}; 1416};
1424 1417
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 6ee1586f2334..4824c27cebb8 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -441,17 +441,16 @@ bail:
441 return res; 441 return res;
442} 442}
443 443
444static int hfs_get_sb(struct file_system_type *fs_type, 444static struct dentry *hfs_mount(struct file_system_type *fs_type,
445 int flags, const char *dev_name, void *data, 445 int flags, const char *dev_name, void *data)
446 struct vfsmount *mnt)
447{ 446{
448 return get_sb_bdev(fs_type, flags, dev_name, data, hfs_fill_super, mnt); 447 return mount_bdev(fs_type, flags, dev_name, data, hfs_fill_super);
449} 448}
450 449
451static struct file_system_type hfs_fs_type = { 450static struct file_system_type hfs_fs_type = {
452 .owner = THIS_MODULE, 451 .owner = THIS_MODULE,
453 .name = "hfs", 452 .name = "hfs",
454 .get_sb = hfs_get_sb, 453 .mount = hfs_mount,
455 .kill_sb = kill_block_super, 454 .kill_sb = kill_block_super,
456 .fs_flags = FS_REQUIRES_DEV, 455 .fs_flags = FS_REQUIRES_DEV,
457}; 456};
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index e318bbc0daf6..9d59c0571f59 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -317,8 +317,10 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry)
317 res = hfsplus_rename_cat(inode->i_ino, 317 res = hfsplus_rename_cat(inode->i_ino,
318 dir, &dentry->d_name, 318 dir, &dentry->d_name,
319 sbi->hidden_dir, &str); 319 sbi->hidden_dir, &str);
320 if (!res) 320 if (!res) {
321 inode->i_flags |= S_DEAD; 321 inode->i_flags |= S_DEAD;
322 drop_nlink(inode);
323 }
322 goto out; 324 goto out;
323 } 325 }
324 res = hfsplus_delete_cat(cnid, dir, &dentry->d_name); 326 res = hfsplus_delete_cat(cnid, dir, &dentry->d_name);
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index 5b4667e08ef7..40a85a3ded6e 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -92,7 +92,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
92 mark_inode_dirty(inode); 92 mark_inode_dirty(inode);
93 93
94out_unlock_inode: 94out_unlock_inode:
95 mutex_lock(&inode->i_mutex); 95 mutex_unlock(&inode->i_mutex);
96out_drop_write: 96out_drop_write:
97 mnt_drop_write(file->f_path.mnt); 97 mnt_drop_write(file->f_path.mnt);
98out: 98out:
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 9a88d7536103..52cc746d3ba3 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -495,18 +495,16 @@ static void hfsplus_destroy_inode(struct inode *inode)
495 495
496#define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info) 496#define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info)
497 497
498static int hfsplus_get_sb(struct file_system_type *fs_type, 498static struct dentry *hfsplus_mount(struct file_system_type *fs_type,
499 int flags, const char *dev_name, void *data, 499 int flags, const char *dev_name, void *data)
500 struct vfsmount *mnt)
501{ 500{
502 return get_sb_bdev(fs_type, flags, dev_name, data, hfsplus_fill_super, 501 return mount_bdev(fs_type, flags, dev_name, data, hfsplus_fill_super);
503 mnt);
504} 502}
505 503
506static struct file_system_type hfsplus_fs_type = { 504static struct file_system_type hfsplus_fs_type = {
507 .owner = THIS_MODULE, 505 .owner = THIS_MODULE,
508 .name = "hfsplus", 506 .name = "hfsplus",
509 .get_sb = hfsplus_get_sb, 507 .mount = hfsplus_mount,
510 .kill_sb = kill_block_super, 508 .kill_sb = kill_block_super,
511 .fs_flags = FS_REQUIRES_DEV, 509 .fs_flags = FS_REQUIRES_DEV,
512}; 510};
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index cd7c93917cc7..2c0f148a49e6 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -962,11 +962,11 @@ out:
962 return err; 962 return err;
963} 963}
964 964
965static int hostfs_read_sb(struct file_system_type *type, 965static struct dentry *hostfs_read_sb(struct file_system_type *type,
966 int flags, const char *dev_name, 966 int flags, const char *dev_name,
967 void *data, struct vfsmount *mnt) 967 void *data)
968{ 968{
969 return get_sb_nodev(type, flags, data, hostfs_fill_sb_common, mnt); 969 return mount_nodev(type, flags, data, hostfs_fill_sb_common);
970} 970}
971 971
972static void hostfs_kill_sb(struct super_block *s) 972static void hostfs_kill_sb(struct super_block *s)
@@ -978,7 +978,7 @@ static void hostfs_kill_sb(struct super_block *s)
978static struct file_system_type hostfs_type = { 978static struct file_system_type hostfs_type = {
979 .owner = THIS_MODULE, 979 .owner = THIS_MODULE,
980 .name = "hostfs", 980 .name = "hostfs",
981 .get_sb = hostfs_read_sb, 981 .mount = hostfs_read_sb,
982 .kill_sb = hostfs_kill_sb, 982 .kill_sb = hostfs_kill_sb,
983 .fs_flags = 0, 983 .fs_flags = 0,
984}; 984};
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index c969a1aa163a..bb69389972eb 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -686,17 +686,16 @@ bail0:
686 return -EINVAL; 686 return -EINVAL;
687} 687}
688 688
689static int hpfs_get_sb(struct file_system_type *fs_type, 689static struct dentry *hpfs_mount(struct file_system_type *fs_type,
690 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 690 int flags, const char *dev_name, void *data)
691{ 691{
692 return get_sb_bdev(fs_type, flags, dev_name, data, hpfs_fill_super, 692 return mount_bdev(fs_type, flags, dev_name, data, hpfs_fill_super);
693 mnt);
694} 693}
695 694
696static struct file_system_type hpfs_fs_type = { 695static struct file_system_type hpfs_fs_type = {
697 .owner = THIS_MODULE, 696 .owner = THIS_MODULE,
698 .name = "hpfs", 697 .name = "hpfs",
699 .get_sb = hpfs_get_sb, 698 .mount = hpfs_mount,
700 .kill_sb = kill_block_super, 699 .kill_sb = kill_block_super,
701 .fs_flags = FS_REQUIRES_DEV, 700 .fs_flags = FS_REQUIRES_DEV,
702}; 701};
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 4e2a45ea6140..f702b5f713fc 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -748,17 +748,17 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
748 return(err); 748 return(err);
749} 749}
750 750
751static int hppfs_read_super(struct file_system_type *type, 751static struct dentry *hppfs_read_super(struct file_system_type *type,
752 int flags, const char *dev_name, 752 int flags, const char *dev_name,
753 void *data, struct vfsmount *mnt) 753 void *data)
754{ 754{
755 return get_sb_nodev(type, flags, data, hppfs_fill_super, mnt); 755 return mount_nodev(type, flags, data, hppfs_fill_super);
756} 756}
757 757
758static struct file_system_type hppfs_type = { 758static struct file_system_type hppfs_type = {
759 .owner = THIS_MODULE, 759 .owner = THIS_MODULE,
760 .name = "hppfs", 760 .name = "hppfs",
761 .get_sb = hppfs_read_super, 761 .mount = hppfs_read_super,
762 .kill_sb = kill_anon_super, 762 .kill_sb = kill_anon_super,
763 .fs_flags = 0, 763 .fs_flags = 0,
764}; 764};
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index b14be3f781c7..d6cfac1f0a40 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -896,15 +896,15 @@ void hugetlb_put_quota(struct address_space *mapping, long delta)
896 } 896 }
897} 897}
898 898
899static int hugetlbfs_get_sb(struct file_system_type *fs_type, 899static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type,
900 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 900 int flags, const char *dev_name, void *data)
901{ 901{
902 return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super, mnt); 902 return mount_nodev(fs_type, flags, data, hugetlbfs_fill_super);
903} 903}
904 904
905static struct file_system_type hugetlbfs_fs_type = { 905static struct file_system_type hugetlbfs_fs_type = {
906 .name = "hugetlbfs", 906 .name = "hugetlbfs",
907 .get_sb = hugetlbfs_get_sb, 907 .mount = hugetlbfs_mount,
908 .kill_sb = kill_litter_super, 908 .kill_sb = kill_litter_super,
909}; 909};
910 910
diff --git a/fs/internal.h b/fs/internal.h
index ebad3b90752d..e43b9a4dbf4e 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -106,5 +106,5 @@ extern void release_open_intent(struct nameidata *);
106 * inode.c 106 * inode.c
107 */ 107 */
108extern int get_nr_dirty_inodes(void); 108extern int get_nr_dirty_inodes(void);
109extern int evict_inodes(struct super_block *); 109extern void evict_inodes(struct super_block *);
110extern int invalidate_inodes(struct super_block *); 110extern int invalidate_inodes(struct super_block *);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index f855ea4fc888..e92fdbb3bc3a 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -530,6 +530,41 @@ static int ioctl_fsthaw(struct file *filp)
530 return thaw_super(sb); 530 return thaw_super(sb);
531} 531}
532 532
533static int ioctl_fstrim(struct file *filp, void __user *argp)
534{
535 struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
536 struct fstrim_range range;
537 int ret = 0;
538
539 if (!capable(CAP_SYS_ADMIN))
540 return -EPERM;
541
542 /* If filesystem doesn't support trim feature, return. */
543 if (sb->s_op->trim_fs == NULL)
544 return -EOPNOTSUPP;
545
546 /* If a blockdevice-backed filesystem isn't specified, return EINVAL. */
547 if (sb->s_bdev == NULL)
548 return -EINVAL;
549
550 if (argp == NULL) {
551 range.start = 0;
552 range.len = ULLONG_MAX;
553 range.minlen = 0;
554 } else if (copy_from_user(&range, argp, sizeof(range)))
555 return -EFAULT;
556
557 ret = sb->s_op->trim_fs(sb, &range);
558 if (ret < 0)
559 return ret;
560
561 if ((argp != NULL) &&
562 (copy_to_user(argp, &range, sizeof(range))))
563 return -EFAULT;
564
565 return 0;
566}
567
533/* 568/*
534 * When you add any new common ioctls to the switches above and below 569 * When you add any new common ioctls to the switches above and below
535 * please update compat_sys_ioctl() too. 570 * please update compat_sys_ioctl() too.
@@ -580,6 +615,10 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
580 error = ioctl_fsthaw(filp); 615 error = ioctl_fsthaw(filp);
581 break; 616 break;
582 617
618 case FITRIM:
619 error = ioctl_fstrim(filp, argp);
620 break;
621
583 case FS_IOC_FIEMAP: 622 case FS_IOC_FIEMAP:
584 return ioctl_fiemap(filp, arg); 623 return ioctl_fiemap(filp, arg);
585 624
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 79cf7f616bbe..bfdeb82a53be 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -1507,17 +1507,16 @@ struct inode *isofs_iget(struct super_block *sb,
1507 return inode; 1507 return inode;
1508} 1508}
1509 1509
1510static int isofs_get_sb(struct file_system_type *fs_type, 1510static struct dentry *isofs_mount(struct file_system_type *fs_type,
1511 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 1511 int flags, const char *dev_name, void *data)
1512{ 1512{
1513 return get_sb_bdev(fs_type, flags, dev_name, data, isofs_fill_super, 1513 return mount_bdev(fs_type, flags, dev_name, data, isofs_fill_super);
1514 mnt);
1515} 1514}
1516 1515
1517static struct file_system_type iso9660_fs_type = { 1516static struct file_system_type iso9660_fs_type = {
1518 .owner = THIS_MODULE, 1517 .owner = THIS_MODULE,
1519 .name = "iso9660", 1518 .name = "iso9660",
1520 .get_sb = isofs_get_sb, 1519 .mount = isofs_mount,
1521 .kill_sb = kill_block_super, 1520 .kill_sb = kill_block_super,
1522 .fs_flags = FS_REQUIRES_DEV, 1521 .fs_flags = FS_REQUIRES_DEV,
1523}; 1522};
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 05a38b9c4c0e..e4b87bc1fa56 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -221,7 +221,7 @@ restart:
221 goto restart; 221 goto restart;
222 } 222 }
223 if (buffer_locked(bh)) { 223 if (buffer_locked(bh)) {
224 atomic_inc(&bh->b_count); 224 get_bh(bh);
225 spin_unlock(&journal->j_list_lock); 225 spin_unlock(&journal->j_list_lock);
226 jbd_unlock_bh_state(bh); 226 jbd_unlock_bh_state(bh);
227 wait_on_buffer(bh); 227 wait_on_buffer(bh);
@@ -283,7 +283,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
283 int ret = 0; 283 int ret = 0;
284 284
285 if (buffer_locked(bh)) { 285 if (buffer_locked(bh)) {
286 atomic_inc(&bh->b_count); 286 get_bh(bh);
287 spin_unlock(&journal->j_list_lock); 287 spin_unlock(&journal->j_list_lock);
288 jbd_unlock_bh_state(bh); 288 jbd_unlock_bh_state(bh);
289 wait_on_buffer(bh); 289 wait_on_buffer(bh);
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 85a6883c0aca..34a4861c14b8 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -587,13 +587,13 @@ void journal_commit_transaction(journal_t *journal)
587 /* Bump b_count to prevent truncate from stumbling over 587 /* Bump b_count to prevent truncate from stumbling over
588 the shadowed buffer! @@@ This can go if we ever get 588 the shadowed buffer! @@@ This can go if we ever get
589 rid of the BJ_IO/BJ_Shadow pairing of buffers. */ 589 rid of the BJ_IO/BJ_Shadow pairing of buffers. */
590 atomic_inc(&jh2bh(jh)->b_count); 590 get_bh(jh2bh(jh));
591 591
592 /* Make a temporary IO buffer with which to write it out 592 /* Make a temporary IO buffer with which to write it out
593 (this will requeue both the metadata buffer and the 593 (this will requeue both the metadata buffer and the
594 temporary IO buffer). new_bh goes on BJ_IO*/ 594 temporary IO buffer). new_bh goes on BJ_IO*/
595 595
596 set_bit(BH_JWrite, &jh2bh(jh)->b_state); 596 set_buffer_jwrite(jh2bh(jh));
597 /* 597 /*
598 * akpm: journal_write_metadata_buffer() sets 598 * akpm: journal_write_metadata_buffer() sets
599 * new_bh->b_transaction to commit_transaction. 599 * new_bh->b_transaction to commit_transaction.
@@ -603,7 +603,7 @@ void journal_commit_transaction(journal_t *journal)
603 JBUFFER_TRACE(jh, "ph3: write metadata"); 603 JBUFFER_TRACE(jh, "ph3: write metadata");
604 flags = journal_write_metadata_buffer(commit_transaction, 604 flags = journal_write_metadata_buffer(commit_transaction,
605 jh, &new_jh, blocknr); 605 jh, &new_jh, blocknr);
606 set_bit(BH_JWrite, &jh2bh(new_jh)->b_state); 606 set_buffer_jwrite(jh2bh(new_jh));
607 wbuf[bufs++] = jh2bh(new_jh); 607 wbuf[bufs++] = jh2bh(new_jh);
608 608
609 /* Record the new block's tag in the current descriptor 609 /* Record the new block's tag in the current descriptor
@@ -713,7 +713,7 @@ wait_for_iobuf:
713 shadowed buffer */ 713 shadowed buffer */
714 jh = commit_transaction->t_shadow_list->b_tprev; 714 jh = commit_transaction->t_shadow_list->b_tprev;
715 bh = jh2bh(jh); 715 bh = jh2bh(jh);
716 clear_bit(BH_JWrite, &bh->b_state); 716 clear_buffer_jwrite(bh);
717 J_ASSERT_BH(bh, buffer_jbddirty(bh)); 717 J_ASSERT_BH(bh, buffer_jbddirty(bh));
718 718
719 /* The metadata is now released for reuse, but we need 719 /* The metadata is now released for reuse, but we need
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 2c4b1f109da9..da1b5e4ffce1 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -36,6 +36,7 @@
36#include <linux/poison.h> 36#include <linux/poison.h>
37#include <linux/proc_fs.h> 37#include <linux/proc_fs.h>
38#include <linux/debugfs.h> 38#include <linux/debugfs.h>
39#include <linux/ratelimit.h>
39 40
40#include <asm/uaccess.h> 41#include <asm/uaccess.h>
41#include <asm/page.h> 42#include <asm/page.h>
@@ -84,6 +85,7 @@ EXPORT_SYMBOL(journal_force_commit);
84 85
85static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); 86static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
86static void __journal_abort_soft (journal_t *journal, int errno); 87static void __journal_abort_soft (journal_t *journal, int errno);
88static const char *journal_dev_name(journal_t *journal, char *buffer);
87 89
88/* 90/*
89 * Helper function used to manage commit timeouts 91 * Helper function used to manage commit timeouts
@@ -439,7 +441,7 @@ int __log_start_commit(journal_t *journal, tid_t target)
439 */ 441 */
440 if (!tid_geq(journal->j_commit_request, target)) { 442 if (!tid_geq(journal->j_commit_request, target)) {
441 /* 443 /*
442 * We want a new commit: OK, mark the request and wakup the 444 * We want a new commit: OK, mark the request and wakeup the
443 * commit thread. We do _not_ do the commit ourselves. 445 * commit thread. We do _not_ do the commit ourselves.
444 */ 446 */
445 447
@@ -950,6 +952,8 @@ int journal_create(journal_t *journal)
950 if (err) 952 if (err)
951 return err; 953 return err;
952 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 954 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
955 if (unlikely(!bh))
956 return -ENOMEM;
953 lock_buffer(bh); 957 lock_buffer(bh);
954 memset (bh->b_data, 0, journal->j_blocksize); 958 memset (bh->b_data, 0, journal->j_blocksize);
955 BUFFER_TRACE(bh, "marking dirty"); 959 BUFFER_TRACE(bh, "marking dirty");
@@ -1010,6 +1014,23 @@ void journal_update_superblock(journal_t *journal, int wait)
1010 goto out; 1014 goto out;
1011 } 1015 }
1012 1016
1017 if (buffer_write_io_error(bh)) {
1018 char b[BDEVNAME_SIZE];
1019 /*
1020 * Oh, dear. A previous attempt to write the journal
1021 * superblock failed. This could happen because the
1022 * USB device was yanked out. Or it could happen to
1023 * be a transient write error and maybe the block will
1024 * be remapped. Nothing we can do but to retry the
1025 * write and hope for the best.
1026 */
1027 printk(KERN_ERR "JBD: previous I/O error detected "
1028 "for journal superblock update for %s.\n",
1029 journal_dev_name(journal, b));
1030 clear_buffer_write_io_error(bh);
1031 set_buffer_uptodate(bh);
1032 }
1033
1013 spin_lock(&journal->j_state_lock); 1034 spin_lock(&journal->j_state_lock);
1014 jbd_debug(1,"JBD: updating superblock (start %u, seq %d, errno %d)\n", 1035 jbd_debug(1,"JBD: updating superblock (start %u, seq %d, errno %d)\n",
1015 journal->j_tail, journal->j_tail_sequence, journal->j_errno); 1036 journal->j_tail, journal->j_tail_sequence, journal->j_errno);
@@ -1021,9 +1042,17 @@ void journal_update_superblock(journal_t *journal, int wait)
1021 1042
1022 BUFFER_TRACE(bh, "marking dirty"); 1043 BUFFER_TRACE(bh, "marking dirty");
1023 mark_buffer_dirty(bh); 1044 mark_buffer_dirty(bh);
1024 if (wait) 1045 if (wait) {
1025 sync_dirty_buffer(bh); 1046 sync_dirty_buffer(bh);
1026 else 1047 if (buffer_write_io_error(bh)) {
1048 char b[BDEVNAME_SIZE];
1049 printk(KERN_ERR "JBD: I/O error detected "
1050 "when updating journal superblock for %s.\n",
1051 journal_dev_name(journal, b));
1052 clear_buffer_write_io_error(bh);
1053 set_buffer_uptodate(bh);
1054 }
1055 } else
1027 write_dirty_buffer(bh, WRITE); 1056 write_dirty_buffer(bh, WRITE);
1028 1057
1029out: 1058out:
@@ -1719,7 +1748,6 @@ static void journal_destroy_journal_head_cache(void)
1719static struct journal_head *journal_alloc_journal_head(void) 1748static struct journal_head *journal_alloc_journal_head(void)
1720{ 1749{
1721 struct journal_head *ret; 1750 struct journal_head *ret;
1722 static unsigned long last_warning;
1723 1751
1724#ifdef CONFIG_JBD_DEBUG 1752#ifdef CONFIG_JBD_DEBUG
1725 atomic_inc(&nr_journal_heads); 1753 atomic_inc(&nr_journal_heads);
@@ -1727,11 +1755,9 @@ static struct journal_head *journal_alloc_journal_head(void)
1727 ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS); 1755 ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
1728 if (ret == NULL) { 1756 if (ret == NULL) {
1729 jbd_debug(1, "out of memory for journal_head\n"); 1757 jbd_debug(1, "out of memory for journal_head\n");
1730 if (time_after(jiffies, last_warning + 5*HZ)) { 1758 printk_ratelimited(KERN_NOTICE "ENOMEM in %s, retrying.\n",
1731 printk(KERN_NOTICE "ENOMEM in %s, retrying.\n", 1759 __func__);
1732 __func__); 1760
1733 last_warning = jiffies;
1734 }
1735 while (ret == NULL) { 1761 while (ret == NULL) {
1736 yield(); 1762 yield();
1737 ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS); 1763 ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index 81051dafebf5..5b43e96788e6 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -296,10 +296,10 @@ int journal_skip_recovery(journal_t *journal)
296#ifdef CONFIG_JBD_DEBUG 296#ifdef CONFIG_JBD_DEBUG
297 int dropped = info.end_transaction - 297 int dropped = info.end_transaction -
298 be32_to_cpu(journal->j_superblock->s_sequence); 298 be32_to_cpu(journal->j_superblock->s_sequence);
299#endif
300 jbd_debug(1, 299 jbd_debug(1,
301 "JBD: ignoring %d transaction%s from the journal.\n", 300 "JBD: ignoring %d transaction%s from the journal.\n",
302 dropped, (dropped == 1) ? "" : "s"); 301 dropped, (dropped == 1) ? "" : "s");
302#endif
303 journal->j_transaction_sequence = ++info.end_transaction; 303 journal->j_transaction_sequence = ++info.end_transaction;
304 } 304 }
305 305
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 5ae71e75a491..846a3f314111 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -293,9 +293,7 @@ handle_t *journal_start(journal_t *journal, int nblocks)
293 jbd_free_handle(handle); 293 jbd_free_handle(handle);
294 current->journal_info = NULL; 294 current->journal_info = NULL;
295 handle = ERR_PTR(err); 295 handle = ERR_PTR(err);
296 goto out;
297 } 296 }
298out:
299 return handle; 297 return handle;
300} 298}
301 299
@@ -528,7 +526,7 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
528 transaction = handle->h_transaction; 526 transaction = handle->h_transaction;
529 journal = transaction->t_journal; 527 journal = transaction->t_journal;
530 528
531 jbd_debug(5, "buffer_head %p, force_copy %d\n", jh, force_copy); 529 jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy);
532 530
533 JBUFFER_TRACE(jh, "entry"); 531 JBUFFER_TRACE(jh, "entry");
534repeat: 532repeat:
@@ -713,7 +711,7 @@ done:
713 J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)), 711 J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)),
714 "Possible IO failure.\n"); 712 "Possible IO failure.\n");
715 page = jh2bh(jh)->b_page; 713 page = jh2bh(jh)->b_page;
716 offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK; 714 offset = offset_in_page(jh2bh(jh)->b_data);
717 source = kmap_atomic(page, KM_USER0); 715 source = kmap_atomic(page, KM_USER0);
718 memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); 716 memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
719 kunmap_atomic(source, KM_USER0); 717 kunmap_atomic(source, KM_USER0);
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 6571a056e55d..6a79fd0a1a32 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -299,6 +299,16 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
299 transaction->t_chp_stats.cs_forced_to_close++; 299 transaction->t_chp_stats.cs_forced_to_close++;
300 spin_unlock(&journal->j_list_lock); 300 spin_unlock(&journal->j_list_lock);
301 jbd_unlock_bh_state(bh); 301 jbd_unlock_bh_state(bh);
302 if (unlikely(journal->j_flags & JBD2_UNMOUNT))
303 /*
304 * The journal thread is dead; so starting and
305 * waiting for a commit to finish will cause
306 * us to wait for a _very_ long time.
307 */
308 printk(KERN_ERR "JBD2: %s: "
309 "Waiting for Godot: block %llu\n",
310 journal->j_devname,
311 (unsigned long long) bh->b_blocknr);
302 jbd2_log_start_commit(journal, tid); 312 jbd2_log_start_commit(journal, tid);
303 jbd2_log_wait_commit(journal, tid); 313 jbd2_log_wait_commit(journal, tid);
304 ret = 1; 314 ret = 1;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index bc6be8bda1cc..f3ad1598b201 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -26,7 +26,9 @@
26#include <linux/backing-dev.h> 26#include <linux/backing-dev.h>
27#include <linux/bio.h> 27#include <linux/bio.h>
28#include <linux/blkdev.h> 28#include <linux/blkdev.h>
29#include <linux/bitops.h>
29#include <trace/events/jbd2.h> 30#include <trace/events/jbd2.h>
31#include <asm/system.h>
30 32
31/* 33/*
32 * Default IO end handler for temporary BJ_IO buffer_heads. 34 * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -201,7 +203,7 @@ static int journal_submit_data_buffers(journal_t *journal,
201 spin_lock(&journal->j_list_lock); 203 spin_lock(&journal->j_list_lock);
202 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { 204 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
203 mapping = jinode->i_vfs_inode->i_mapping; 205 mapping = jinode->i_vfs_inode->i_mapping;
204 jinode->i_flags |= JI_COMMIT_RUNNING; 206 set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
205 spin_unlock(&journal->j_list_lock); 207 spin_unlock(&journal->j_list_lock);
206 /* 208 /*
207 * submit the inode data buffers. We use writepage 209 * submit the inode data buffers. We use writepage
@@ -216,7 +218,8 @@ static int journal_submit_data_buffers(journal_t *journal,
216 spin_lock(&journal->j_list_lock); 218 spin_lock(&journal->j_list_lock);
217 J_ASSERT(jinode->i_transaction == commit_transaction); 219 J_ASSERT(jinode->i_transaction == commit_transaction);
218 commit_transaction->t_flushed_data_blocks = 1; 220 commit_transaction->t_flushed_data_blocks = 1;
219 jinode->i_flags &= ~JI_COMMIT_RUNNING; 221 clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
222 smp_mb__after_clear_bit();
220 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); 223 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
221 } 224 }
222 spin_unlock(&journal->j_list_lock); 225 spin_unlock(&journal->j_list_lock);
@@ -237,7 +240,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
237 /* For locking, see the comment in journal_submit_data_buffers() */ 240 /* For locking, see the comment in journal_submit_data_buffers() */
238 spin_lock(&journal->j_list_lock); 241 spin_lock(&journal->j_list_lock);
239 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { 242 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
240 jinode->i_flags |= JI_COMMIT_RUNNING; 243 set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
241 spin_unlock(&journal->j_list_lock); 244 spin_unlock(&journal->j_list_lock);
242 err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping); 245 err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
243 if (err) { 246 if (err) {
@@ -253,7 +256,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
253 ret = err; 256 ret = err;
254 } 257 }
255 spin_lock(&journal->j_list_lock); 258 spin_lock(&journal->j_list_lock);
256 jinode->i_flags &= ~JI_COMMIT_RUNNING; 259 clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
260 smp_mb__after_clear_bit();
257 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); 261 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
258 } 262 }
259 263
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 262419f83d80..538417c1fdbb 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -42,12 +42,14 @@
42#include <linux/log2.h> 42#include <linux/log2.h>
43#include <linux/vmalloc.h> 43#include <linux/vmalloc.h>
44#include <linux/backing-dev.h> 44#include <linux/backing-dev.h>
45#include <linux/bitops.h>
45 46
46#define CREATE_TRACE_POINTS 47#define CREATE_TRACE_POINTS
47#include <trace/events/jbd2.h> 48#include <trace/events/jbd2.h>
48 49
49#include <asm/uaccess.h> 50#include <asm/uaccess.h>
50#include <asm/page.h> 51#include <asm/page.h>
52#include <asm/system.h>
51 53
52EXPORT_SYMBOL(jbd2_journal_extend); 54EXPORT_SYMBOL(jbd2_journal_extend);
53EXPORT_SYMBOL(jbd2_journal_stop); 55EXPORT_SYMBOL(jbd2_journal_stop);
@@ -478,7 +480,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
478 */ 480 */
479 if (!tid_geq(journal->j_commit_request, target)) { 481 if (!tid_geq(journal->j_commit_request, target)) {
480 /* 482 /*
481 * We want a new commit: OK, mark the request and wakup the 483 * We want a new commit: OK, mark the request and wakeup the
482 * commit thread. We do _not_ do the commit ourselves. 484 * commit thread. We do _not_ do the commit ourselves.
483 */ 485 */
484 486
@@ -2210,7 +2212,7 @@ void jbd2_journal_release_jbd_inode(journal_t *journal,
2210restart: 2212restart:
2211 spin_lock(&journal->j_list_lock); 2213 spin_lock(&journal->j_list_lock);
2212 /* Is commit writing out inode - we have to wait */ 2214 /* Is commit writing out inode - we have to wait */
2213 if (jinode->i_flags & JI_COMMIT_RUNNING) { 2215 if (test_bit(__JI_COMMIT_RUNNING, &jinode->i_flags)) {
2214 wait_queue_head_t *wq; 2216 wait_queue_head_t *wq;
2215 DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING); 2217 DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
2216 wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING); 2218 wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index f3479d6e0a83..6bf0a242613e 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -156,6 +156,7 @@ alloc_transaction:
156 */ 156 */
157repeat: 157repeat:
158 read_lock(&journal->j_state_lock); 158 read_lock(&journal->j_state_lock);
159 BUG_ON(journal->j_flags & JBD2_UNMOUNT);
159 if (is_journal_aborted(journal) || 160 if (is_journal_aborted(journal) ||
160 (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { 161 (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
161 read_unlock(&journal->j_state_lock); 162 read_unlock(&journal->j_state_lock);
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index a906f538d11c..85c6be2db02f 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -23,7 +23,7 @@ static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *,
23static inline struct jffs2_inode_cache * 23static inline struct jffs2_inode_cache *
24first_inode_chain(int *i, struct jffs2_sb_info *c) 24first_inode_chain(int *i, struct jffs2_sb_info *c)
25{ 25{
26 for (; *i < INOCACHE_HASHSIZE; (*i)++) { 26 for (; *i < c->inocache_hashsize; (*i)++) {
27 if (c->inocache_list[*i]) 27 if (c->inocache_list[*i])
28 return c->inocache_list[*i]; 28 return c->inocache_list[*i];
29 } 29 }
diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c
index 617a1e5694c1..de4247021d25 100644
--- a/fs/jffs2/compr.c
+++ b/fs/jffs2/compr.c
@@ -103,7 +103,7 @@ uint16_t jffs2_compress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
103 spin_unlock(&jffs2_compressor_list_lock); 103 spin_unlock(&jffs2_compressor_list_lock);
104 *datalen = orig_slen; 104 *datalen = orig_slen;
105 *cdatalen = orig_dlen; 105 *cdatalen = orig_dlen;
106 compr_ret = this->compress(data_in, output_buf, datalen, cdatalen, NULL); 106 compr_ret = this->compress(data_in, output_buf, datalen, cdatalen);
107 spin_lock(&jffs2_compressor_list_lock); 107 spin_lock(&jffs2_compressor_list_lock);
108 this->usecount--; 108 this->usecount--;
109 if (!compr_ret) { 109 if (!compr_ret) {
@@ -152,7 +152,7 @@ uint16_t jffs2_compress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
152 spin_unlock(&jffs2_compressor_list_lock); 152 spin_unlock(&jffs2_compressor_list_lock);
153 *datalen = orig_slen; 153 *datalen = orig_slen;
154 *cdatalen = orig_dlen; 154 *cdatalen = orig_dlen;
155 compr_ret = this->compress(data_in, this->compr_buf, datalen, cdatalen, NULL); 155 compr_ret = this->compress(data_in, this->compr_buf, datalen, cdatalen);
156 spin_lock(&jffs2_compressor_list_lock); 156 spin_lock(&jffs2_compressor_list_lock);
157 this->usecount--; 157 this->usecount--;
158 if (!compr_ret) { 158 if (!compr_ret) {
@@ -220,7 +220,7 @@ int jffs2_decompress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
220 if (comprtype == this->compr) { 220 if (comprtype == this->compr) {
221 this->usecount++; 221 this->usecount++;
222 spin_unlock(&jffs2_compressor_list_lock); 222 spin_unlock(&jffs2_compressor_list_lock);
223 ret = this->decompress(cdata_in, data_out, cdatalen, datalen, NULL); 223 ret = this->decompress(cdata_in, data_out, cdatalen, datalen);
224 spin_lock(&jffs2_compressor_list_lock); 224 spin_lock(&jffs2_compressor_list_lock);
225 if (ret) { 225 if (ret) {
226 printk(KERN_WARNING "Decompressor \"%s\" returned %d\n", this->name, ret); 226 printk(KERN_WARNING "Decompressor \"%s\" returned %d\n", this->name, ret);
diff --git a/fs/jffs2/compr.h b/fs/jffs2/compr.h
index e471a9106fd9..13bb7597ab39 100644
--- a/fs/jffs2/compr.h
+++ b/fs/jffs2/compr.h
@@ -49,9 +49,9 @@ struct jffs2_compressor {
49 char *name; 49 char *name;
50 char compr; /* JFFS2_COMPR_XXX */ 50 char compr; /* JFFS2_COMPR_XXX */
51 int (*compress)(unsigned char *data_in, unsigned char *cpage_out, 51 int (*compress)(unsigned char *data_in, unsigned char *cpage_out,
52 uint32_t *srclen, uint32_t *destlen, void *model); 52 uint32_t *srclen, uint32_t *destlen);
53 int (*decompress)(unsigned char *cdata_in, unsigned char *data_out, 53 int (*decompress)(unsigned char *cdata_in, unsigned char *data_out,
54 uint32_t cdatalen, uint32_t datalen, void *model); 54 uint32_t cdatalen, uint32_t datalen);
55 int usecount; 55 int usecount;
56 int disabled; /* if set the compressor won't compress */ 56 int disabled; /* if set the compressor won't compress */
57 unsigned char *compr_buf; /* used by size compr. mode */ 57 unsigned char *compr_buf; /* used by size compr. mode */
diff --git a/fs/jffs2/compr_lzo.c b/fs/jffs2/compr_lzo.c
index ed25ae7c98eb..af186ee674d8 100644
--- a/fs/jffs2/compr_lzo.c
+++ b/fs/jffs2/compr_lzo.c
@@ -42,7 +42,7 @@ static int __init alloc_workspace(void)
42} 42}
43 43
44static int jffs2_lzo_compress(unsigned char *data_in, unsigned char *cpage_out, 44static int jffs2_lzo_compress(unsigned char *data_in, unsigned char *cpage_out,
45 uint32_t *sourcelen, uint32_t *dstlen, void *model) 45 uint32_t *sourcelen, uint32_t *dstlen)
46{ 46{
47 size_t compress_size; 47 size_t compress_size;
48 int ret; 48 int ret;
@@ -67,7 +67,7 @@ static int jffs2_lzo_compress(unsigned char *data_in, unsigned char *cpage_out,
67} 67}
68 68
69static int jffs2_lzo_decompress(unsigned char *data_in, unsigned char *cpage_out, 69static int jffs2_lzo_decompress(unsigned char *data_in, unsigned char *cpage_out,
70 uint32_t srclen, uint32_t destlen, void *model) 70 uint32_t srclen, uint32_t destlen)
71{ 71{
72 size_t dl = destlen; 72 size_t dl = destlen;
73 int ret; 73 int ret;
diff --git a/fs/jffs2/compr_rtime.c b/fs/jffs2/compr_rtime.c
index 9696ad9ef5f7..16a5047903a6 100644
--- a/fs/jffs2/compr_rtime.c
+++ b/fs/jffs2/compr_rtime.c
@@ -31,8 +31,7 @@
31/* _compress returns the compressed size, -1 if bigger */ 31/* _compress returns the compressed size, -1 if bigger */
32static int jffs2_rtime_compress(unsigned char *data_in, 32static int jffs2_rtime_compress(unsigned char *data_in,
33 unsigned char *cpage_out, 33 unsigned char *cpage_out,
34 uint32_t *sourcelen, uint32_t *dstlen, 34 uint32_t *sourcelen, uint32_t *dstlen)
35 void *model)
36{ 35{
37 short positions[256]; 36 short positions[256];
38 int outpos = 0; 37 int outpos = 0;
@@ -73,8 +72,7 @@ static int jffs2_rtime_compress(unsigned char *data_in,
73 72
74static int jffs2_rtime_decompress(unsigned char *data_in, 73static int jffs2_rtime_decompress(unsigned char *data_in,
75 unsigned char *cpage_out, 74 unsigned char *cpage_out,
76 uint32_t srclen, uint32_t destlen, 75 uint32_t srclen, uint32_t destlen)
77 void *model)
78{ 76{
79 short positions[256]; 77 short positions[256];
80 int outpos = 0; 78 int outpos = 0;
diff --git a/fs/jffs2/compr_rubin.c b/fs/jffs2/compr_rubin.c
index a12b4f763373..9e7cec808c4c 100644
--- a/fs/jffs2/compr_rubin.c
+++ b/fs/jffs2/compr_rubin.c
@@ -298,7 +298,7 @@ static int rubin_do_compress(int bit_divider, int *bits, unsigned char *data_in,
298#if 0 298#if 0
299/* _compress returns the compressed size, -1 if bigger */ 299/* _compress returns the compressed size, -1 if bigger */
300int jffs2_rubinmips_compress(unsigned char *data_in, unsigned char *cpage_out, 300int jffs2_rubinmips_compress(unsigned char *data_in, unsigned char *cpage_out,
301 uint32_t *sourcelen, uint32_t *dstlen, void *model) 301 uint32_t *sourcelen, uint32_t *dstlen)
302{ 302{
303 return rubin_do_compress(BIT_DIVIDER_MIPS, bits_mips, data_in, 303 return rubin_do_compress(BIT_DIVIDER_MIPS, bits_mips, data_in,
304 cpage_out, sourcelen, dstlen); 304 cpage_out, sourcelen, dstlen);
@@ -306,8 +306,7 @@ int jffs2_rubinmips_compress(unsigned char *data_in, unsigned char *cpage_out,
306#endif 306#endif
307static int jffs2_dynrubin_compress(unsigned char *data_in, 307static int jffs2_dynrubin_compress(unsigned char *data_in,
308 unsigned char *cpage_out, 308 unsigned char *cpage_out,
309 uint32_t *sourcelen, uint32_t *dstlen, 309 uint32_t *sourcelen, uint32_t *dstlen)
310 void *model)
311{ 310{
312 int bits[8]; 311 int bits[8];
313 unsigned char histo[256]; 312 unsigned char histo[256];
@@ -387,8 +386,7 @@ static void rubin_do_decompress(int bit_divider, int *bits,
387 386
388static int jffs2_rubinmips_decompress(unsigned char *data_in, 387static int jffs2_rubinmips_decompress(unsigned char *data_in,
389 unsigned char *cpage_out, 388 unsigned char *cpage_out,
390 uint32_t sourcelen, uint32_t dstlen, 389 uint32_t sourcelen, uint32_t dstlen)
391 void *model)
392{ 390{
393 rubin_do_decompress(BIT_DIVIDER_MIPS, bits_mips, data_in, 391 rubin_do_decompress(BIT_DIVIDER_MIPS, bits_mips, data_in,
394 cpage_out, sourcelen, dstlen); 392 cpage_out, sourcelen, dstlen);
@@ -397,8 +395,7 @@ static int jffs2_rubinmips_decompress(unsigned char *data_in,
397 395
398static int jffs2_dynrubin_decompress(unsigned char *data_in, 396static int jffs2_dynrubin_decompress(unsigned char *data_in,
399 unsigned char *cpage_out, 397 unsigned char *cpage_out,
400 uint32_t sourcelen, uint32_t dstlen, 398 uint32_t sourcelen, uint32_t dstlen)
401 void *model)
402{ 399{
403 int bits[8]; 400 int bits[8];
404 int c; 401 int c;
diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c
index 97fc45de6f81..fd05a0b9431d 100644
--- a/fs/jffs2/compr_zlib.c
+++ b/fs/jffs2/compr_zlib.c
@@ -68,8 +68,7 @@ static void free_workspaces(void)
68 68
69static int jffs2_zlib_compress(unsigned char *data_in, 69static int jffs2_zlib_compress(unsigned char *data_in,
70 unsigned char *cpage_out, 70 unsigned char *cpage_out,
71 uint32_t *sourcelen, uint32_t *dstlen, 71 uint32_t *sourcelen, uint32_t *dstlen)
72 void *model)
73{ 72{
74 int ret; 73 int ret;
75 74
@@ -136,8 +135,7 @@ static int jffs2_zlib_compress(unsigned char *data_in,
136 135
137static int jffs2_zlib_decompress(unsigned char *data_in, 136static int jffs2_zlib_decompress(unsigned char *data_in,
138 unsigned char *cpage_out, 137 unsigned char *cpage_out,
139 uint32_t srclen, uint32_t destlen, 138 uint32_t srclen, uint32_t destlen)
140 void *model)
141{ 139{
142 int ret; 140 int ret;
143 int wbits = MAX_WBITS; 141 int wbits = MAX_WBITS;
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 79121aa5858b..92978658ed18 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -367,7 +367,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
367 } 367 }
368 368
369 /* We use f->target field to store the target path. */ 369 /* We use f->target field to store the target path. */
370 f->target = kmalloc(targetlen + 1, GFP_KERNEL); 370 f->target = kmemdup(target, targetlen + 1, GFP_KERNEL);
371 if (!f->target) { 371 if (!f->target) {
372 printk(KERN_WARNING "Can't allocate %d bytes of memory\n", targetlen + 1); 372 printk(KERN_WARNING "Can't allocate %d bytes of memory\n", targetlen + 1);
373 mutex_unlock(&f->sem); 373 mutex_unlock(&f->sem);
@@ -376,7 +376,6 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
376 goto fail; 376 goto fail;
377 } 377 }
378 378
379 memcpy(f->target, target, targetlen + 1);
380 D1(printk(KERN_DEBUG "jffs2_symlink: symlink's target '%s' cached\n", (char *)f->target)); 379 D1(printk(KERN_DEBUG "jffs2_symlink: symlink's target '%s' cached\n", (char *)f->target));
381 380
382 /* No data here. Only a metadata node, which will be 381 /* No data here. Only a metadata node, which will be
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index abac961f617b..e513f1913c15 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -151,7 +151,7 @@ int jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count)
151 } 151 }
152 152
153 /* Be nice */ 153 /* Be nice */
154 yield(); 154 cond_resched();
155 mutex_lock(&c->erase_free_sem); 155 mutex_lock(&c->erase_free_sem);
156 spin_lock(&c->erase_completion_lock); 156 spin_lock(&c->erase_completion_lock);
157 } 157 }
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index d9beb06e6fca..e896e67767eb 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -474,6 +474,25 @@ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_i
474 return inode; 474 return inode;
475} 475}
476 476
477static int calculate_inocache_hashsize(uint32_t flash_size)
478{
479 /*
480 * Pick a inocache hash size based on the size of the medium.
481 * Count how many megabytes we're dealing with, apply a hashsize twice
482 * that size, but rounding down to the usual big powers of 2. And keep
483 * to sensible bounds.
484 */
485
486 int size_mb = flash_size / 1024 / 1024;
487 int hashsize = (size_mb * 2) & ~0x3f;
488
489 if (hashsize < INOCACHE_HASHSIZE_MIN)
490 return INOCACHE_HASHSIZE_MIN;
491 if (hashsize > INOCACHE_HASHSIZE_MAX)
492 return INOCACHE_HASHSIZE_MAX;
493
494 return hashsize;
495}
477 496
478int jffs2_do_fill_super(struct super_block *sb, void *data, int silent) 497int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
479{ 498{
@@ -520,7 +539,8 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
520 if (ret) 539 if (ret)
521 return ret; 540 return ret;
522 541
523 c->inocache_list = kcalloc(INOCACHE_HASHSIZE, sizeof(struct jffs2_inode_cache *), GFP_KERNEL); 542 c->inocache_hashsize = calculate_inocache_hashsize(c->flash_size);
543 c->inocache_list = kcalloc(c->inocache_hashsize, sizeof(struct jffs2_inode_cache *), GFP_KERNEL);
524 if (!c->inocache_list) { 544 if (!c->inocache_list) {
525 ret = -ENOMEM; 545 ret = -ENOMEM;
526 goto out_wbuf; 546 goto out_wbuf;
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index 846a79452497..31dce611337c 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -219,13 +219,14 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
219 if (!list_empty(&c->erase_complete_list) || 219 if (!list_empty(&c->erase_complete_list) ||
220 !list_empty(&c->erase_pending_list)) { 220 !list_empty(&c->erase_pending_list)) {
221 spin_unlock(&c->erase_completion_lock); 221 spin_unlock(&c->erase_completion_lock);
222 mutex_unlock(&c->alloc_sem);
222 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() erasing pending blocks\n")); 223 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() erasing pending blocks\n"));
223 if (jffs2_erase_pending_blocks(c, 1)) { 224 if (jffs2_erase_pending_blocks(c, 1))
224 mutex_unlock(&c->alloc_sem);
225 return 0; 225 return 0;
226 } 226
227 D1(printk(KERN_DEBUG "No progress from erasing blocks; doing GC anyway\n")); 227 D1(printk(KERN_DEBUG "No progress from erasing blocks; doing GC anyway\n"));
228 spin_lock(&c->erase_completion_lock); 228 spin_lock(&c->erase_completion_lock);
229 mutex_lock(&c->alloc_sem);
229 } 230 }
230 231
231 /* First, work out which block we're garbage-collecting */ 232 /* First, work out which block we're garbage-collecting */
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h
index 6784bc89add1..f864005de64c 100644
--- a/fs/jffs2/jffs2_fs_sb.h
+++ b/fs/jffs2/jffs2_fs_sb.h
@@ -100,6 +100,7 @@ struct jffs2_sb_info {
100 wait_queue_head_t erase_wait; /* For waiting for erases to complete */ 100 wait_queue_head_t erase_wait; /* For waiting for erases to complete */
101 101
102 wait_queue_head_t inocache_wq; 102 wait_queue_head_t inocache_wq;
103 int inocache_hashsize;
103 struct jffs2_inode_cache **inocache_list; 104 struct jffs2_inode_cache **inocache_list;
104 spinlock_t inocache_lock; 105 spinlock_t inocache_lock;
105 106
diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c
index af02bd138469..5e03233c2363 100644
--- a/fs/jffs2/nodelist.c
+++ b/fs/jffs2/nodelist.c
@@ -420,7 +420,7 @@ struct jffs2_inode_cache *jffs2_get_ino_cache(struct jffs2_sb_info *c, uint32_t
420{ 420{
421 struct jffs2_inode_cache *ret; 421 struct jffs2_inode_cache *ret;
422 422
423 ret = c->inocache_list[ino % INOCACHE_HASHSIZE]; 423 ret = c->inocache_list[ino % c->inocache_hashsize];
424 while (ret && ret->ino < ino) { 424 while (ret && ret->ino < ino) {
425 ret = ret->next; 425 ret = ret->next;
426 } 426 }
@@ -441,7 +441,7 @@ void jffs2_add_ino_cache (struct jffs2_sb_info *c, struct jffs2_inode_cache *new
441 441
442 dbg_inocache("add %p (ino #%u)\n", new, new->ino); 442 dbg_inocache("add %p (ino #%u)\n", new, new->ino);
443 443
444 prev = &c->inocache_list[new->ino % INOCACHE_HASHSIZE]; 444 prev = &c->inocache_list[new->ino % c->inocache_hashsize];
445 445
446 while ((*prev) && (*prev)->ino < new->ino) { 446 while ((*prev) && (*prev)->ino < new->ino) {
447 prev = &(*prev)->next; 447 prev = &(*prev)->next;
@@ -462,7 +462,7 @@ void jffs2_del_ino_cache(struct jffs2_sb_info *c, struct jffs2_inode_cache *old)
462 dbg_inocache("del %p (ino #%u)\n", old, old->ino); 462 dbg_inocache("del %p (ino #%u)\n", old, old->ino);
463 spin_lock(&c->inocache_lock); 463 spin_lock(&c->inocache_lock);
464 464
465 prev = &c->inocache_list[old->ino % INOCACHE_HASHSIZE]; 465 prev = &c->inocache_list[old->ino % c->inocache_hashsize];
466 466
467 while ((*prev) && (*prev)->ino < old->ino) { 467 while ((*prev) && (*prev)->ino < old->ino) {
468 prev = &(*prev)->next; 468 prev = &(*prev)->next;
@@ -487,7 +487,7 @@ void jffs2_free_ino_caches(struct jffs2_sb_info *c)
487 int i; 487 int i;
488 struct jffs2_inode_cache *this, *next; 488 struct jffs2_inode_cache *this, *next;
489 489
490 for (i=0; i<INOCACHE_HASHSIZE; i++) { 490 for (i=0; i < c->inocache_hashsize; i++) {
491 this = c->inocache_list[i]; 491 this = c->inocache_list[i];
492 while (this) { 492 while (this) {
493 next = this->next; 493 next = this->next;
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index 523a91691052..5a53d9bdb2b5 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -199,7 +199,8 @@ struct jffs2_inode_cache {
199#define RAWNODE_CLASS_XATTR_DATUM 1 199#define RAWNODE_CLASS_XATTR_DATUM 1
200#define RAWNODE_CLASS_XATTR_REF 2 200#define RAWNODE_CLASS_XATTR_REF 2
201 201
202#define INOCACHE_HASHSIZE 128 202#define INOCACHE_HASHSIZE_MIN 128
203#define INOCACHE_HASHSIZE_MAX 1024
203 204
204#define write_ofs(c) ((c)->nextblock->offset + (c)->sector_size - (c)->nextblock->free_size) 205#define write_ofs(c) ((c)->nextblock->offset + (c)->sector_size - (c)->nextblock->free_size)
205 206
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 46f870d1cc36..b632dddcb482 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -20,7 +20,7 @@
20#include "summary.h" 20#include "summary.h"
21#include "debug.h" 21#include "debug.h"
22 22
23#define DEFAULT_EMPTY_SCAN_SIZE 1024 23#define DEFAULT_EMPTY_SCAN_SIZE 256
24 24
25#define noisy_printk(noise, args...) do { \ 25#define noisy_printk(noise, args...) do { \
26 if (*(noise)) { \ 26 if (*(noise)) { \
@@ -435,7 +435,7 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
435 unsigned char *buf, uint32_t buf_size, struct jffs2_summary *s) { 435 unsigned char *buf, uint32_t buf_size, struct jffs2_summary *s) {
436 struct jffs2_unknown_node *node; 436 struct jffs2_unknown_node *node;
437 struct jffs2_unknown_node crcnode; 437 struct jffs2_unknown_node crcnode;
438 uint32_t ofs, prevofs; 438 uint32_t ofs, prevofs, max_ofs;
439 uint32_t hdr_crc, buf_ofs, buf_len; 439 uint32_t hdr_crc, buf_ofs, buf_len;
440 int err; 440 int err;
441 int noise = 0; 441 int noise = 0;
@@ -550,12 +550,12 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
550 550
551 /* We temporarily use 'ofs' as a pointer into the buffer/jeb */ 551 /* We temporarily use 'ofs' as a pointer into the buffer/jeb */
552 ofs = 0; 552 ofs = 0;
553 553 max_ofs = EMPTY_SCAN_SIZE(c->sector_size);
554 /* Scan only 4KiB of 0xFF before declaring it's empty */ 554 /* Scan only EMPTY_SCAN_SIZE of 0xFF before declaring it's empty */
555 while(ofs < EMPTY_SCAN_SIZE(c->sector_size) && *(uint32_t *)(&buf[ofs]) == 0xFFFFFFFF) 555 while(ofs < max_ofs && *(uint32_t *)(&buf[ofs]) == 0xFFFFFFFF)
556 ofs += 4; 556 ofs += 4;
557 557
558 if (ofs == EMPTY_SCAN_SIZE(c->sector_size)) { 558 if (ofs == max_ofs) {
559#ifdef CONFIG_JFFS2_FS_WRITEBUFFER 559#ifdef CONFIG_JFFS2_FS_WRITEBUFFER
560 if (jffs2_cleanmarker_oob(c)) { 560 if (jffs2_cleanmarker_oob(c)) {
561 /* scan oob, take care of cleanmarker */ 561 /* scan oob, take care of cleanmarker */
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index d1ae5dfc22b9..c86041b866a4 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -179,12 +179,11 @@ static int jffs2_fill_super(struct super_block *sb, void *data, int silent)
179 return ret; 179 return ret;
180} 180}
181 181
182static int jffs2_get_sb(struct file_system_type *fs_type, 182static struct dentry *jffs2_mount(struct file_system_type *fs_type,
183 int flags, const char *dev_name, 183 int flags, const char *dev_name,
184 void *data, struct vfsmount *mnt) 184 void *data)
185{ 185{
186 return get_sb_mtd(fs_type, flags, dev_name, data, jffs2_fill_super, 186 return mount_mtd(fs_type, flags, dev_name, data, jffs2_fill_super);
187 mnt);
188} 187}
189 188
190static void jffs2_put_super (struct super_block *sb) 189static void jffs2_put_super (struct super_block *sb)
@@ -229,7 +228,7 @@ static void jffs2_kill_sb(struct super_block *sb)
229static struct file_system_type jffs2_fs_type = { 228static struct file_system_type jffs2_fs_type = {
230 .owner = THIS_MODULE, 229 .owner = THIS_MODULE,
231 .name = "jffs2", 230 .name = "jffs2",
232 .get_sb = jffs2_get_sb, 231 .mount = jffs2_mount,
233 .kill_sb = jffs2_kill_sb, 232 .kill_sb = jffs2_kill_sb,
234}; 233};
235 234
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 68eee2bf629e..0669fc1cc3bf 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -583,11 +583,10 @@ static int jfs_unfreeze(struct super_block *sb)
583 return 0; 583 return 0;
584} 584}
585 585
586static int jfs_get_sb(struct file_system_type *fs_type, 586static struct dentry *jfs_do_mount(struct file_system_type *fs_type,
587 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 587 int flags, const char *dev_name, void *data)
588{ 588{
589 return get_sb_bdev(fs_type, flags, dev_name, data, jfs_fill_super, 589 return mount_bdev(fs_type, flags, dev_name, data, jfs_fill_super);
590 mnt);
591} 590}
592 591
593static int jfs_sync_fs(struct super_block *sb, int wait) 592static int jfs_sync_fs(struct super_block *sb, int wait)
@@ -770,7 +769,7 @@ static const struct export_operations jfs_export_operations = {
770static struct file_system_type jfs_fs_type = { 769static struct file_system_type jfs_fs_type = {
771 .owner = THIS_MODULE, 770 .owner = THIS_MODULE,
772 .name = "jfs", 771 .name = "jfs",
773 .get_sb = jfs_get_sb, 772 .mount = jfs_do_mount,
774 .kill_sb = kill_block_super, 773 .kill_sb = kill_block_super,
775 .fs_flags = FS_REQUIRES_DEV, 774 .fs_flags = FS_REQUIRES_DEV,
776}; 775};
diff --git a/fs/libfs.c b/fs/libfs.c
index 304a5132ca27..a3accdf528ad 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -201,9 +201,8 @@ static const struct super_operations simple_super_operations = {
201 * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that 201 * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that
202 * will never be mountable) 202 * will never be mountable)
203 */ 203 */
204int get_sb_pseudo(struct file_system_type *fs_type, char *name, 204struct dentry *mount_pseudo(struct file_system_type *fs_type, char *name,
205 const struct super_operations *ops, unsigned long magic, 205 const struct super_operations *ops, unsigned long magic)
206 struct vfsmount *mnt)
207{ 206{
208 struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); 207 struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
209 struct dentry *dentry; 208 struct dentry *dentry;
@@ -211,7 +210,7 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
211 struct qstr d_name = {.name = name, .len = strlen(name)}; 210 struct qstr d_name = {.name = name, .len = strlen(name)};
212 211
213 if (IS_ERR(s)) 212 if (IS_ERR(s))
214 return PTR_ERR(s); 213 return ERR_CAST(s);
215 214
216 s->s_flags = MS_NOUSER; 215 s->s_flags = MS_NOUSER;
217 s->s_maxbytes = MAX_LFS_FILESIZE; 216 s->s_maxbytes = MAX_LFS_FILESIZE;
@@ -241,12 +240,11 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
241 d_instantiate(dentry, root); 240 d_instantiate(dentry, root);
242 s->s_root = dentry; 241 s->s_root = dentry;
243 s->s_flags |= MS_ACTIVE; 242 s->s_flags |= MS_ACTIVE;
244 simple_set_mnt(mnt, s); 243 return dget(s->s_root);
245 return 0;
246 244
247Enomem: 245Enomem:
248 deactivate_locked_super(s); 246 deactivate_locked_super(s);
249 return -ENOMEM; 247 return ERR_PTR(-ENOMEM);
250} 248}
251 249
252int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) 250int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
@@ -951,7 +949,7 @@ EXPORT_SYMBOL(dcache_dir_lseek);
951EXPORT_SYMBOL(dcache_dir_open); 949EXPORT_SYMBOL(dcache_dir_open);
952EXPORT_SYMBOL(dcache_readdir); 950EXPORT_SYMBOL(dcache_readdir);
953EXPORT_SYMBOL(generic_read_dir); 951EXPORT_SYMBOL(generic_read_dir);
954EXPORT_SYMBOL(get_sb_pseudo); 952EXPORT_SYMBOL(mount_pseudo);
955EXPORT_SYMBOL(simple_write_begin); 953EXPORT_SYMBOL(simple_write_begin);
956EXPORT_SYMBOL(simple_write_end); 954EXPORT_SYMBOL(simple_write_end);
957EXPORT_SYMBOL(simple_dir_inode_operations); 955EXPORT_SYMBOL(simple_dir_inode_operations);
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 9bd2ce2a3040..92ca6fbe09bd 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -298,9 +298,9 @@ static int bdev_write_sb(struct super_block *sb, struct page *page)
298 return sync_request(page, bdev, WRITE); 298 return sync_request(page, bdev, WRITE);
299} 299}
300 300
301static void bdev_put_device(struct super_block *sb) 301static void bdev_put_device(struct logfs_super *s)
302{ 302{
303 close_bdev_exclusive(logfs_super(sb)->s_bdev, FMODE_READ|FMODE_WRITE); 303 close_bdev_exclusive(s->s_bdev, FMODE_READ|FMODE_WRITE);
304} 304}
305 305
306static int bdev_can_write_buf(struct super_block *sb, u64 ofs) 306static int bdev_can_write_buf(struct super_block *sb, u64 ofs)
@@ -320,8 +320,8 @@ static const struct logfs_device_ops bd_devops = {
320 .put_device = bdev_put_device, 320 .put_device = bdev_put_device,
321}; 321};
322 322
323int logfs_get_sb_bdev(struct file_system_type *type, int flags, 323int logfs_get_sb_bdev(struct logfs_super *p, struct file_system_type *type,
324 const char *devname, struct vfsmount *mnt) 324 const char *devname)
325{ 325{
326 struct block_device *bdev; 326 struct block_device *bdev;
327 327
@@ -332,8 +332,11 @@ int logfs_get_sb_bdev(struct file_system_type *type, int flags,
332 if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) { 332 if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) {
333 int mtdnr = MINOR(bdev->bd_dev); 333 int mtdnr = MINOR(bdev->bd_dev);
334 close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE); 334 close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE);
335 return logfs_get_sb_mtd(type, flags, mtdnr, mnt); 335 return logfs_get_sb_mtd(p, mtdnr);
336 } 336 }
337 337
338 return logfs_get_sb_device(type, flags, NULL, bdev, &bd_devops, mnt); 338 p->s_bdev = bdev;
339 p->s_mtd = NULL;
340 p->s_devops = &bd_devops;
341 return 0;
339} 342}
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index a85d47d13e4b..7466e9dcc8c5 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -230,9 +230,9 @@ static void mtd_writeseg(struct super_block *sb, u64 ofs, size_t len)
230 __mtd_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT); 230 __mtd_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT);
231} 231}
232 232
233static void mtd_put_device(struct super_block *sb) 233static void mtd_put_device(struct logfs_super *s)
234{ 234{
235 put_mtd_device(logfs_super(sb)->s_mtd); 235 put_mtd_device(s->s_mtd);
236} 236}
237 237
238static int mtd_can_write_buf(struct super_block *sb, u64 ofs) 238static int mtd_can_write_buf(struct super_block *sb, u64 ofs)
@@ -265,14 +265,14 @@ static const struct logfs_device_ops mtd_devops = {
265 .put_device = mtd_put_device, 265 .put_device = mtd_put_device,
266}; 266};
267 267
268int logfs_get_sb_mtd(struct file_system_type *type, int flags, 268int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr)
269 int mtdnr, struct vfsmount *mnt)
270{ 269{
271 struct mtd_info *mtd; 270 struct mtd_info *mtd = get_mtd_device(NULL, mtdnr);
272 const struct logfs_device_ops *devops = &mtd_devops;
273
274 mtd = get_mtd_device(NULL, mtdnr);
275 if (IS_ERR(mtd)) 271 if (IS_ERR(mtd))
276 return PTR_ERR(mtd); 272 return PTR_ERR(mtd);
277 return logfs_get_sb_device(type, flags, mtd, NULL, devops, mnt); 273
274 s->s_bdev = NULL;
275 s->s_mtd = mtd;
276 s->s_devops = &mtd_devops;
277 return 0;
278} 278}
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index b8786264d243..cd51a36b37f0 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -136,6 +136,7 @@ struct logfs_area_ops {
136 int (*erase_segment)(struct logfs_area *area); 136 int (*erase_segment)(struct logfs_area *area);
137}; 137};
138 138
139struct logfs_super; /* forward */
139/** 140/**
140 * struct logfs_device_ops - device access operations 141 * struct logfs_device_ops - device access operations
141 * 142 *
@@ -156,7 +157,7 @@ struct logfs_device_ops {
156 int ensure_write); 157 int ensure_write);
157 int (*can_write_buf)(struct super_block *sb, u64 ofs); 158 int (*can_write_buf)(struct super_block *sb, u64 ofs);
158 void (*sync)(struct super_block *sb); 159 void (*sync)(struct super_block *sb);
159 void (*put_device)(struct super_block *sb); 160 void (*put_device)(struct logfs_super *s);
160}; 161};
161 162
162/** 163/**
@@ -471,11 +472,13 @@ void logfs_compr_exit(void);
471 472
472/* dev_bdev.c */ 473/* dev_bdev.c */
473#ifdef CONFIG_BLOCK 474#ifdef CONFIG_BLOCK
474int logfs_get_sb_bdev(struct file_system_type *type, int flags, 475int logfs_get_sb_bdev(struct logfs_super *s,
475 const char *devname, struct vfsmount *mnt); 476 struct file_system_type *type,
477 const char *devname);
476#else 478#else
477static inline int logfs_get_sb_bdev(struct file_system_type *type, int flags, 479static inline int logfs_get_sb_bdev(struct logfs_super *s,
478 const char *devname, struct vfsmount *mnt) 480 struct file_system_type *type,
481 const char *devname)
479{ 482{
480 return -ENODEV; 483 return -ENODEV;
481} 484}
@@ -483,11 +486,9 @@ static inline int logfs_get_sb_bdev(struct file_system_type *type, int flags,
483 486
484/* dev_mtd.c */ 487/* dev_mtd.c */
485#ifdef CONFIG_MTD 488#ifdef CONFIG_MTD
486int logfs_get_sb_mtd(struct file_system_type *type, int flags, 489int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr)
487 int mtdnr, struct vfsmount *mnt);
488#else 490#else
489static inline int logfs_get_sb_mtd(struct file_system_type *type, int flags, 491static inline int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr)
490 int mtdnr, struct vfsmount *mnt)
491{ 492{
492 return -ENODEV; 493 return -ENODEV;
493} 494}
@@ -619,9 +620,6 @@ void emergency_read_end(struct page *page);
619void logfs_crash_dump(struct super_block *sb); 620void logfs_crash_dump(struct super_block *sb);
620void *memchr_inv(const void *s, int c, size_t n); 621void *memchr_inv(const void *s, int c, size_t n);
621int logfs_statfs(struct dentry *dentry, struct kstatfs *stats); 622int logfs_statfs(struct dentry *dentry, struct kstatfs *stats);
622int logfs_get_sb_device(struct file_system_type *type, int flags,
623 struct mtd_info *mtd, struct block_device *bdev,
624 const struct logfs_device_ops *devops, struct vfsmount *mnt);
625int logfs_check_ds(struct logfs_disk_super *ds); 623int logfs_check_ds(struct logfs_disk_super *ds);
626int logfs_write_sb(struct super_block *sb); 624int logfs_write_sb(struct super_block *sb);
627 625
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index 5336155c5d81..33435e4b14d2 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -325,7 +325,7 @@ static int logfs_make_writeable(struct super_block *sb)
325 return 0; 325 return 0;
326} 326}
327 327
328static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt) 328static int logfs_get_sb_final(struct super_block *sb)
329{ 329{
330 struct logfs_super *super = logfs_super(sb); 330 struct logfs_super *super = logfs_super(sb);
331 struct inode *rootdir; 331 struct inode *rootdir;
@@ -356,7 +356,6 @@ static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
356 } 356 }
357 357
358 log_super("LogFS: Finished mounting\n"); 358 log_super("LogFS: Finished mounting\n");
359 simple_set_mnt(mnt, sb);
360 return 0; 359 return 0;
361 360
362fail: 361fail:
@@ -529,43 +528,37 @@ static void logfs_kill_sb(struct super_block *sb)
529 logfs_cleanup_rw(sb); 528 logfs_cleanup_rw(sb);
530 if (super->s_erase_page) 529 if (super->s_erase_page)
531 __free_page(super->s_erase_page); 530 __free_page(super->s_erase_page);
532 super->s_devops->put_device(sb); 531 super->s_devops->put_device(super);
533 logfs_mempool_destroy(super->s_btree_pool); 532 logfs_mempool_destroy(super->s_btree_pool);
534 logfs_mempool_destroy(super->s_alias_pool); 533 logfs_mempool_destroy(super->s_alias_pool);
535 kfree(super); 534 kfree(super);
536 log_super("LogFS: Finished unmounting\n"); 535 log_super("LogFS: Finished unmounting\n");
537} 536}
538 537
539int logfs_get_sb_device(struct file_system_type *type, int flags, 538static struct dentry *logfs_get_sb_device(struct logfs_super *super,
540 struct mtd_info *mtd, struct block_device *bdev, 539 struct file_system_type *type, int flags)
541 const struct logfs_device_ops *devops, struct vfsmount *mnt)
542{ 540{
543 struct logfs_super *super;
544 struct super_block *sb; 541 struct super_block *sb;
545 int err = -ENOMEM; 542 int err = -ENOMEM;
546 static int mount_count; 543 static int mount_count;
547 544
548 log_super("LogFS: Start mount %x\n", mount_count++); 545 log_super("LogFS: Start mount %x\n", mount_count++);
549 super = kzalloc(sizeof(*super), GFP_KERNEL);
550 if (!super)
551 goto err0;
552 546
553 super->s_mtd = mtd;
554 super->s_bdev = bdev;
555 err = -EINVAL; 547 err = -EINVAL;
556 sb = sget(type, logfs_sb_test, logfs_sb_set, super); 548 sb = sget(type, logfs_sb_test, logfs_sb_set, super);
557 if (IS_ERR(sb)) 549 if (IS_ERR(sb)) {
558 goto err0; 550 super->s_devops->put_device(super);
551 kfree(super);
552 return ERR_CAST(sb);
553 }
559 554
560 if (sb->s_root) { 555 if (sb->s_root) {
561 /* Device is already in use */ 556 /* Device is already in use */
562 err = 0; 557 super->s_devops->put_device(super);
563 simple_set_mnt(mnt, sb); 558 kfree(super);
564 goto err0; 559 return dget(sb->s_root);
565 } 560 }
566 561
567 super->s_devops = devops;
568
569 /* 562 /*
570 * sb->s_maxbytes is limited to 8TB. On 32bit systems, the page cache 563 * sb->s_maxbytes is limited to 8TB. On 32bit systems, the page cache
571 * only covers 16TB and the upper 8TB are used for indirect blocks. 564 * only covers 16TB and the upper 8TB are used for indirect blocks.
@@ -581,10 +574,12 @@ int logfs_get_sb_device(struct file_system_type *type, int flags,
581 goto err1; 574 goto err1;
582 575
583 sb->s_flags |= MS_ACTIVE; 576 sb->s_flags |= MS_ACTIVE;
584 err = logfs_get_sb_final(sb, mnt); 577 err = logfs_get_sb_final(sb);
585 if (err) 578 if (err) {
586 deactivate_locked_super(sb); 579 deactivate_locked_super(sb);
587 return err; 580 return ERR_PTR(err);
581 }
582 return dget(sb->s_root);
588 583
589err1: 584err1:
590 /* no ->s_root, no ->put_super() */ 585 /* no ->s_root, no ->put_super() */
@@ -592,37 +587,45 @@ err1:
592 iput(super->s_segfile_inode); 587 iput(super->s_segfile_inode);
593 iput(super->s_mapping_inode); 588 iput(super->s_mapping_inode);
594 deactivate_locked_super(sb); 589 deactivate_locked_super(sb);
595 return err; 590 return ERR_PTR(err);
596err0:
597 kfree(super);
598 //devops->put_device(sb);
599 return err;
600} 591}
601 592
602static int logfs_get_sb(struct file_system_type *type, int flags, 593static struct dentry *logfs_mount(struct file_system_type *type, int flags,
603 const char *devname, void *data, struct vfsmount *mnt) 594 const char *devname, void *data)
604{ 595{
605 ulong mtdnr; 596 ulong mtdnr;
597 struct logfs_super *super;
598 int err;
606 599
607 if (!devname) 600 super = kzalloc(sizeof(*super), GFP_KERNEL);
608 return logfs_get_sb_bdev(type, flags, devname, mnt); 601 if (!super)
609 if (strncmp(devname, "mtd", 3)) 602 return ERR_PTR(-ENOMEM);
610 return logfs_get_sb_bdev(type, flags, devname, mnt);
611 603
612 { 604 if (!devname)
605 err = logfs_get_sb_bdev(super, type, devname);
606 else if (strncmp(devname, "mtd", 3))
607 err = logfs_get_sb_bdev(super, type, devname);
608 else {
613 char *garbage; 609 char *garbage;
614 mtdnr = simple_strtoul(devname+3, &garbage, 0); 610 mtdnr = simple_strtoul(devname+3, &garbage, 0);
615 if (*garbage) 611 if (*garbage)
616 return -EINVAL; 612 err = -EINVAL;
613 else
614 err = logfs_get_sb_mtd(super, mtdnr);
615 }
616
617 if (err) {
618 kfree(super);
619 return ERR_PTR(err);
617 } 620 }
618 621
619 return logfs_get_sb_mtd(type, flags, mtdnr, mnt); 622 return logfs_get_sb_device(super, type, flags);
620} 623}
621 624
622static struct file_system_type logfs_fs_type = { 625static struct file_system_type logfs_fs_type = {
623 .owner = THIS_MODULE, 626 .owner = THIS_MODULE,
624 .name = "logfs", 627 .name = "logfs",
625 .get_sb = logfs_get_sb, 628 .mount = logfs_mount,
626 .kill_sb = logfs_kill_sb, 629 .kill_sb = logfs_kill_sb,
627 .fs_flags = FS_REQUIRES_DEV, 630 .fs_flags = FS_REQUIRES_DEV,
628 631
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index e39d6bf2e8fb..fb2020858a34 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -614,17 +614,16 @@ void minix_truncate(struct inode * inode)
614 V2_minix_truncate(inode); 614 V2_minix_truncate(inode);
615} 615}
616 616
617static int minix_get_sb(struct file_system_type *fs_type, 617static struct dentry *minix_mount(struct file_system_type *fs_type,
618 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 618 int flags, const char *dev_name, void *data)
619{ 619{
620 return get_sb_bdev(fs_type, flags, dev_name, data, minix_fill_super, 620 return mount_bdev(fs_type, flags, dev_name, data, minix_fill_super);
621 mnt);
622} 621}
623 622
624static struct file_system_type minix_fs_type = { 623static struct file_system_type minix_fs_type = {
625 .owner = THIS_MODULE, 624 .owner = THIS_MODULE,
626 .name = "minix", 625 .name = "minix",
627 .get_sb = minix_get_sb, 626 .mount = minix_mount,
628 .kill_sb = kill_block_super, 627 .kill_sb = kill_block_super,
629 .fs_flags = FS_REQUIRES_DEV, 628 .fs_flags = FS_REQUIRES_DEV,
630}; 629};
diff --git a/fs/namei.c b/fs/namei.c
index f7dbc06857ab..5362af9b7372 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1574,6 +1574,7 @@ static struct file *finish_open(struct nameidata *nd,
1574 */ 1574 */
1575 if (will_truncate) 1575 if (will_truncate)
1576 mnt_drop_write(nd->path.mnt); 1576 mnt_drop_write(nd->path.mnt);
1577 path_put(&nd->path);
1577 return filp; 1578 return filp;
1578 1579
1579exit: 1580exit:
@@ -1675,6 +1676,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
1675 } 1676 }
1676 filp = nameidata_to_filp(nd); 1677 filp = nameidata_to_filp(nd);
1677 mnt_drop_write(nd->path.mnt); 1678 mnt_drop_write(nd->path.mnt);
1679 path_put(&nd->path);
1678 if (!IS_ERR(filp)) { 1680 if (!IS_ERR(filp)) {
1679 error = ima_file_check(filp, acc_mode); 1681 error = ima_file_check(filp, acc_mode);
1680 if (error) { 1682 if (error) {
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 985fabb26aca..d290545aa0c4 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -1020,16 +1020,16 @@ out:
1020 return result; 1020 return result;
1021} 1021}
1022 1022
1023static int ncp_get_sb(struct file_system_type *fs_type, 1023static struct dentry *ncp_mount(struct file_system_type *fs_type,
1024 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 1024 int flags, const char *dev_name, void *data)
1025{ 1025{
1026 return get_sb_nodev(fs_type, flags, data, ncp_fill_super, mnt); 1026 return mount_nodev(fs_type, flags, data, ncp_fill_super);
1027} 1027}
1028 1028
1029static struct file_system_type ncp_fs_type = { 1029static struct file_system_type ncp_fs_type = {
1030 .owner = THIS_MODULE, 1030 .owner = THIS_MODULE,
1031 .name = "ncpfs", 1031 .name = "ncpfs",
1032 .get_sb = ncp_get_sb, 1032 .mount = ncp_mount,
1033 .kill_sb = kill_anon_super, 1033 .kill_sb = kill_anon_super,
1034 .fs_flags = FS_BINARY_MOUNTDATA, 1034 .fs_flags = FS_BINARY_MOUNTDATA,
1035}; 1035};
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 064a80961677..84d3c8b90206 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -873,7 +873,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
873 dreq->inode = inode; 873 dreq->inode = inode;
874 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); 874 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
875 dreq->l_ctx = nfs_get_lock_context(dreq->ctx); 875 dreq->l_ctx = nfs_get_lock_context(dreq->ctx);
876 if (dreq->l_ctx != NULL) 876 if (dreq->l_ctx == NULL)
877 goto out_release; 877 goto out_release;
878 if (!is_sync_kiocb(iocb)) 878 if (!is_sync_kiocb(iocb))
879 dreq->iocb = iocb; 879 dreq->iocb = iocb;
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index dec47ed8b6b9..4e2d9b6b1380 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -123,7 +123,7 @@ static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen,
123 size_t desclen = typelen + namelen + 2; 123 size_t desclen = typelen + namelen + 2;
124 124
125 *desc = kmalloc(desclen, GFP_KERNEL); 125 *desc = kmalloc(desclen, GFP_KERNEL);
126 if (!desc) 126 if (!*desc)
127 return -ENOMEM; 127 return -ENOMEM;
128 128
129 cp = *desc; 129 cp = *desc;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 32c8758c99fd..0f24cdf2cb13 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -429,7 +429,7 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
429 * returned NFS4ERR_DELAY as per Section 2.10.6.2 429 * returned NFS4ERR_DELAY as per Section 2.10.6.2
430 * of RFC5661. 430 * of RFC5661.
431 */ 431 */
432 dprintk("%s: slot=%ld seq=%d: Operation in progress\n", 432 dprintk("%s: slot=%td seq=%d: Operation in progress\n",
433 __func__, 433 __func__,
434 res->sr_slot - res->sr_session->fc_slot_table.slots, 434 res->sr_slot - res->sr_session->fc_slot_table.slots,
435 res->sr_slot->seq_nr); 435 res->sr_slot->seq_nr);
@@ -573,7 +573,7 @@ int nfs4_setup_sequence(const struct nfs_server *server,
573 goto out; 573 goto out;
574 } 574 }
575 575
576 dprintk("--> %s clp %p session %p sr_slot %ld\n", 576 dprintk("--> %s clp %p session %p sr_slot %td\n",
577 __func__, session->clp, session, res->sr_slot ? 577 __func__, session->clp, session, res->sr_slot ?
578 res->sr_slot - session->fc_slot_table.slots : -1); 578 res->sr_slot - session->fc_slot_table.slots : -1);
579 579
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 919490232e17..137b549e63db 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -65,6 +65,13 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
65 if (req == NULL) 65 if (req == NULL)
66 return ERR_PTR(-ENOMEM); 66 return ERR_PTR(-ENOMEM);
67 67
68 /* get lock context early so we can deal with alloc failures */
69 req->wb_lock_context = nfs_get_lock_context(ctx);
70 if (req->wb_lock_context == NULL) {
71 nfs_page_free(req);
72 return ERR_PTR(-ENOMEM);
73 }
74
68 /* Initialize the request struct. Initially, we assume a 75 /* Initialize the request struct. Initially, we assume a
69 * long write-back delay. This will be adjusted in 76 * long write-back delay. This will be adjusted in
70 * update_nfs_request below if the region is not locked. */ 77 * update_nfs_request below if the region is not locked. */
@@ -79,7 +86,6 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
79 req->wb_pgbase = offset; 86 req->wb_pgbase = offset;
80 req->wb_bytes = count; 87 req->wb_bytes = count;
81 req->wb_context = get_nfs_open_context(ctx); 88 req->wb_context = get_nfs_open_context(ctx);
82 req->wb_lock_context = nfs_get_lock_context(ctx);
83 kref_init(&req->wb_kref); 89 kref_init(&req->wb_kref);
84 return req; 90 return req;
85} 91}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 3600ec700d58..0a42e8f4adcb 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -260,8 +260,8 @@ static int nfs_statfs(struct dentry *, struct kstatfs *);
260static int nfs_show_options(struct seq_file *, struct vfsmount *); 260static int nfs_show_options(struct seq_file *, struct vfsmount *);
261static int nfs_show_stats(struct seq_file *, struct vfsmount *); 261static int nfs_show_stats(struct seq_file *, struct vfsmount *);
262static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *); 262static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *);
263static int nfs_xdev_get_sb(struct file_system_type *fs_type, 263static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type,
264 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 264 int flags, const char *dev_name, void *raw_data);
265static void nfs_put_super(struct super_block *); 265static void nfs_put_super(struct super_block *);
266static void nfs_kill_super(struct super_block *); 266static void nfs_kill_super(struct super_block *);
267static int nfs_remount(struct super_block *sb, int *flags, char *raw_data); 267static int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
@@ -277,7 +277,7 @@ static struct file_system_type nfs_fs_type = {
277struct file_system_type nfs_xdev_fs_type = { 277struct file_system_type nfs_xdev_fs_type = {
278 .owner = THIS_MODULE, 278 .owner = THIS_MODULE,
279 .name = "nfs", 279 .name = "nfs",
280 .get_sb = nfs_xdev_get_sb, 280 .mount = nfs_xdev_mount,
281 .kill_sb = nfs_kill_super, 281 .kill_sb = nfs_kill_super,
282 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 282 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
283}; 283};
@@ -302,14 +302,14 @@ static int nfs4_try_mount(int flags, const char *dev_name,
302 struct nfs_parsed_mount_data *data, struct vfsmount *mnt); 302 struct nfs_parsed_mount_data *data, struct vfsmount *mnt);
303static int nfs4_get_sb(struct file_system_type *fs_type, 303static int nfs4_get_sb(struct file_system_type *fs_type,
304 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 304 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
305static int nfs4_remote_get_sb(struct file_system_type *fs_type, 305static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type,
306 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 306 int flags, const char *dev_name, void *raw_data);
307static int nfs4_xdev_get_sb(struct file_system_type *fs_type, 307static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type,
308 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 308 int flags, const char *dev_name, void *raw_data);
309static int nfs4_referral_get_sb(struct file_system_type *fs_type, 309static int nfs4_referral_get_sb(struct file_system_type *fs_type,
310 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 310 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
311static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type, 311static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type,
312 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 312 int flags, const char *dev_name, void *raw_data);
313static void nfs4_kill_super(struct super_block *sb); 313static void nfs4_kill_super(struct super_block *sb);
314 314
315static struct file_system_type nfs4_fs_type = { 315static struct file_system_type nfs4_fs_type = {
@@ -323,7 +323,7 @@ static struct file_system_type nfs4_fs_type = {
323static struct file_system_type nfs4_remote_fs_type = { 323static struct file_system_type nfs4_remote_fs_type = {
324 .owner = THIS_MODULE, 324 .owner = THIS_MODULE,
325 .name = "nfs4", 325 .name = "nfs4",
326 .get_sb = nfs4_remote_get_sb, 326 .mount = nfs4_remote_mount,
327 .kill_sb = nfs4_kill_super, 327 .kill_sb = nfs4_kill_super,
328 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 328 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
329}; 329};
@@ -331,7 +331,7 @@ static struct file_system_type nfs4_remote_fs_type = {
331struct file_system_type nfs4_xdev_fs_type = { 331struct file_system_type nfs4_xdev_fs_type = {
332 .owner = THIS_MODULE, 332 .owner = THIS_MODULE,
333 .name = "nfs4", 333 .name = "nfs4",
334 .get_sb = nfs4_xdev_get_sb, 334 .mount = nfs4_xdev_mount,
335 .kill_sb = nfs4_kill_super, 335 .kill_sb = nfs4_kill_super,
336 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 336 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
337}; 337};
@@ -339,7 +339,7 @@ struct file_system_type nfs4_xdev_fs_type = {
339static struct file_system_type nfs4_remote_referral_fs_type = { 339static struct file_system_type nfs4_remote_referral_fs_type = {
340 .owner = THIS_MODULE, 340 .owner = THIS_MODULE,
341 .name = "nfs4", 341 .name = "nfs4",
342 .get_sb = nfs4_remote_referral_get_sb, 342 .mount = nfs4_remote_referral_mount,
343 .kill_sb = nfs4_kill_super, 343 .kill_sb = nfs4_kill_super,
344 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 344 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
345}; 345};
@@ -2397,9 +2397,9 @@ static void nfs_kill_super(struct super_block *s)
2397/* 2397/*
2398 * Clone an NFS2/3 server record on xdev traversal (FSID-change) 2398 * Clone an NFS2/3 server record on xdev traversal (FSID-change)
2399 */ 2399 */
2400static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, 2400static struct dentry *
2401 const char *dev_name, void *raw_data, 2401nfs_xdev_mount(struct file_system_type *fs_type, int flags,
2402 struct vfsmount *mnt) 2402 const char *dev_name, void *raw_data)
2403{ 2403{
2404 struct nfs_clone_mount *data = raw_data; 2404 struct nfs_clone_mount *data = raw_data;
2405 struct super_block *s; 2405 struct super_block *s;
@@ -2411,7 +2411,7 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
2411 }; 2411 };
2412 int error; 2412 int error;
2413 2413
2414 dprintk("--> nfs_xdev_get_sb()\n"); 2414 dprintk("--> nfs_xdev_mount()\n");
2415 2415
2416 /* create a new volume representation */ 2416 /* create a new volume representation */
2417 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr); 2417 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
@@ -2458,28 +2458,26 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
2458 } 2458 }
2459 2459
2460 s->s_flags |= MS_ACTIVE; 2460 s->s_flags |= MS_ACTIVE;
2461 mnt->mnt_sb = s;
2462 mnt->mnt_root = mntroot;
2463 2461
2464 /* clone any lsm security options from the parent to the new sb */ 2462 /* clone any lsm security options from the parent to the new sb */
2465 security_sb_clone_mnt_opts(data->sb, s); 2463 security_sb_clone_mnt_opts(data->sb, s);
2466 2464
2467 dprintk("<-- nfs_xdev_get_sb() = 0\n"); 2465 dprintk("<-- nfs_xdev_mount() = 0\n");
2468 return 0; 2466 return mntroot;
2469 2467
2470out_err_nosb: 2468out_err_nosb:
2471 nfs_free_server(server); 2469 nfs_free_server(server);
2472out_err_noserver: 2470out_err_noserver:
2473 dprintk("<-- nfs_xdev_get_sb() = %d [error]\n", error); 2471 dprintk("<-- nfs_xdev_mount() = %d [error]\n", error);
2474 return error; 2472 return ERR_PTR(error);
2475 2473
2476error_splat_super: 2474error_splat_super:
2477 if (server && !s->s_root) 2475 if (server && !s->s_root)
2478 bdi_unregister(&server->backing_dev_info); 2476 bdi_unregister(&server->backing_dev_info);
2479error_splat_bdi: 2477error_splat_bdi:
2480 deactivate_locked_super(s); 2478 deactivate_locked_super(s);
2481 dprintk("<-- nfs_xdev_get_sb() = %d [splat]\n", error); 2479 dprintk("<-- nfs_xdev_mount() = %d [splat]\n", error);
2482 return error; 2480 return ERR_PTR(error);
2483} 2481}
2484 2482
2485#ifdef CONFIG_NFS_V4 2483#ifdef CONFIG_NFS_V4
@@ -2649,8 +2647,9 @@ out_no_address:
2649/* 2647/*
2650 * Get the superblock for the NFS4 root partition 2648 * Get the superblock for the NFS4 root partition
2651 */ 2649 */
2652static int nfs4_remote_get_sb(struct file_system_type *fs_type, 2650static struct dentry *
2653 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 2651nfs4_remote_mount(struct file_system_type *fs_type, int flags,
2652 const char *dev_name, void *raw_data)
2654{ 2653{
2655 struct nfs_parsed_mount_data *data = raw_data; 2654 struct nfs_parsed_mount_data *data = raw_data;
2656 struct super_block *s; 2655 struct super_block *s;
@@ -2714,15 +2713,16 @@ static int nfs4_remote_get_sb(struct file_system_type *fs_type,
2714 goto error_splat_root; 2713 goto error_splat_root;
2715 2714
2716 s->s_flags |= MS_ACTIVE; 2715 s->s_flags |= MS_ACTIVE;
2717 mnt->mnt_sb = s; 2716
2718 mnt->mnt_root = mntroot; 2717 security_free_mnt_opts(&data->lsm_opts);
2719 error = 0; 2718 nfs_free_fhandle(mntfh);
2719 return mntroot;
2720 2720
2721out: 2721out:
2722 security_free_mnt_opts(&data->lsm_opts); 2722 security_free_mnt_opts(&data->lsm_opts);
2723out_free_fh: 2723out_free_fh:
2724 nfs_free_fhandle(mntfh); 2724 nfs_free_fhandle(mntfh);
2725 return error; 2725 return ERR_PTR(error);
2726 2726
2727out_free: 2727out_free:
2728 nfs_free_server(server); 2728 nfs_free_server(server);
@@ -2968,9 +2968,9 @@ static void nfs4_kill_super(struct super_block *sb)
2968/* 2968/*
2969 * Clone an NFS4 server record on xdev traversal (FSID-change) 2969 * Clone an NFS4 server record on xdev traversal (FSID-change)
2970 */ 2970 */
2971static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags, 2971static struct dentry *
2972 const char *dev_name, void *raw_data, 2972nfs4_xdev_mount(struct file_system_type *fs_type, int flags,
2973 struct vfsmount *mnt) 2973 const char *dev_name, void *raw_data)
2974{ 2974{
2975 struct nfs_clone_mount *data = raw_data; 2975 struct nfs_clone_mount *data = raw_data;
2976 struct super_block *s; 2976 struct super_block *s;
@@ -2982,7 +2982,7 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
2982 }; 2982 };
2983 int error; 2983 int error;
2984 2984
2985 dprintk("--> nfs4_xdev_get_sb()\n"); 2985 dprintk("--> nfs4_xdev_mount()\n");
2986 2986
2987 /* create a new volume representation */ 2987 /* create a new volume representation */
2988 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr); 2988 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
@@ -3029,32 +3029,30 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
3029 } 3029 }
3030 3030
3031 s->s_flags |= MS_ACTIVE; 3031 s->s_flags |= MS_ACTIVE;
3032 mnt->mnt_sb = s;
3033 mnt->mnt_root = mntroot;
3034 3032
3035 security_sb_clone_mnt_opts(data->sb, s); 3033 security_sb_clone_mnt_opts(data->sb, s);
3036 3034
3037 dprintk("<-- nfs4_xdev_get_sb() = 0\n"); 3035 dprintk("<-- nfs4_xdev_mount() = 0\n");
3038 return 0; 3036 return mntroot;
3039 3037
3040out_err_nosb: 3038out_err_nosb:
3041 nfs_free_server(server); 3039 nfs_free_server(server);
3042out_err_noserver: 3040out_err_noserver:
3043 dprintk("<-- nfs4_xdev_get_sb() = %d [error]\n", error); 3041 dprintk("<-- nfs4_xdev_mount() = %d [error]\n", error);
3044 return error; 3042 return ERR_PTR(error);
3045 3043
3046error_splat_super: 3044error_splat_super:
3047 if (server && !s->s_root) 3045 if (server && !s->s_root)
3048 bdi_unregister(&server->backing_dev_info); 3046 bdi_unregister(&server->backing_dev_info);
3049error_splat_bdi: 3047error_splat_bdi:
3050 deactivate_locked_super(s); 3048 deactivate_locked_super(s);
3051 dprintk("<-- nfs4_xdev_get_sb() = %d [splat]\n", error); 3049 dprintk("<-- nfs4_xdev_mount() = %d [splat]\n", error);
3052 return error; 3050 return ERR_PTR(error);
3053} 3051}
3054 3052
3055static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type, 3053static struct dentry *
3056 int flags, const char *dev_name, void *raw_data, 3054nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags,
3057 struct vfsmount *mnt) 3055 const char *dev_name, void *raw_data)
3058{ 3056{
3059 struct nfs_clone_mount *data = raw_data; 3057 struct nfs_clone_mount *data = raw_data;
3060 struct super_block *s; 3058 struct super_block *s;
@@ -3118,14 +3116,12 @@ static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type,
3118 } 3116 }
3119 3117
3120 s->s_flags |= MS_ACTIVE; 3118 s->s_flags |= MS_ACTIVE;
3121 mnt->mnt_sb = s;
3122 mnt->mnt_root = mntroot;
3123 3119
3124 security_sb_clone_mnt_opts(data->sb, s); 3120 security_sb_clone_mnt_opts(data->sb, s);
3125 3121
3126 nfs_free_fhandle(mntfh); 3122 nfs_free_fhandle(mntfh);
3127 dprintk("<-- nfs4_referral_get_sb() = 0\n"); 3123 dprintk("<-- nfs4_referral_get_sb() = 0\n");
3128 return 0; 3124 return mntroot;
3129 3125
3130out_err_nosb: 3126out_err_nosb:
3131 nfs_free_server(server); 3127 nfs_free_server(server);
@@ -3133,7 +3129,7 @@ out_err_noserver:
3133 nfs_free_fhandle(mntfh); 3129 nfs_free_fhandle(mntfh);
3134out_err_nofh: 3130out_err_nofh:
3135 dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error); 3131 dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error);
3136 return error; 3132 return ERR_PTR(error);
3137 3133
3138error_splat_super: 3134error_splat_super:
3139 if (server && !s->s_root) 3135 if (server && !s->s_root)
@@ -3142,7 +3138,7 @@ error_splat_bdi:
3142 deactivate_locked_super(s); 3138 deactivate_locked_super(s);
3143 nfs_free_fhandle(mntfh); 3139 nfs_free_fhandle(mntfh);
3144 dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error); 3140 dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error);
3145 return error; 3141 return ERR_PTR(error);
3146} 3142}
3147 3143
3148/* 3144/*
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 9a16bad5d2ea..7bdec8531400 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -444,9 +444,9 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
444 444
445 /* set up nfs_renamedata */ 445 /* set up nfs_renamedata */
446 data->old_dir = old_dir; 446 data->old_dir = old_dir;
447 atomic_inc(&old_dir->i_count); 447 ihold(old_dir);
448 data->new_dir = new_dir; 448 data->new_dir = new_dir;
449 atomic_inc(&new_dir->i_count); 449 ihold(new_dir);
450 data->old_dentry = dget(old_dentry); 450 data->old_dentry = dget(old_dentry);
451 data->new_dentry = dget(new_dentry); 451 data->new_dentry = dget(new_dentry);
452 nfs_fattr_init(&data->old_fattr); 452 nfs_fattr_init(&data->old_fattr);
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index d6dc3f61f8ba..4514ebbee4d6 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1405,16 +1405,16 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
1405 return simple_fill_super(sb, 0x6e667364, nfsd_files); 1405 return simple_fill_super(sb, 0x6e667364, nfsd_files);
1406} 1406}
1407 1407
1408static int nfsd_get_sb(struct file_system_type *fs_type, 1408static struct dentry *nfsd_mount(struct file_system_type *fs_type,
1409 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 1409 int flags, const char *dev_name, void *data)
1410{ 1410{
1411 return get_sb_single(fs_type, flags, data, nfsd_fill_super, mnt); 1411 return mount_single(fs_type, flags, data, nfsd_fill_super);
1412} 1412}
1413 1413
1414static struct file_system_type nfsd_fs_type = { 1414static struct file_system_type nfsd_fs_type = {
1415 .owner = THIS_MODULE, 1415 .owner = THIS_MODULE,
1416 .name = "nfsd", 1416 .name = "nfsd",
1417 .get_sb = nfsd_get_sb, 1417 .mount = nfsd_mount,
1418 .kill_sb = kill_litter_super, 1418 .kill_sb = kill_litter_super,
1419}; 1419};
1420 1420
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 35ae03c0db86..f804d41ec9d3 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1141,9 +1141,9 @@ static int nilfs_test_bdev_super(struct super_block *s, void *data)
1141 return (void *)s->s_bdev == data; 1141 return (void *)s->s_bdev == data;
1142} 1142}
1143 1143
1144static int 1144static struct dentry *
1145nilfs_get_sb(struct file_system_type *fs_type, int flags, 1145nilfs_mount(struct file_system_type *fs_type, int flags,
1146 const char *dev_name, void *data, struct vfsmount *mnt) 1146 const char *dev_name, void *data)
1147{ 1147{
1148 struct nilfs_super_data sd; 1148 struct nilfs_super_data sd;
1149 struct super_block *s; 1149 struct super_block *s;
@@ -1156,7 +1156,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
1156 1156
1157 sd.bdev = open_bdev_exclusive(dev_name, mode, fs_type); 1157 sd.bdev = open_bdev_exclusive(dev_name, mode, fs_type);
1158 if (IS_ERR(sd.bdev)) 1158 if (IS_ERR(sd.bdev))
1159 return PTR_ERR(sd.bdev); 1159 return ERR_CAST(sd.bdev);
1160 1160
1161 sd.cno = 0; 1161 sd.cno = 0;
1162 sd.flags = flags; 1162 sd.flags = flags;
@@ -1235,9 +1235,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
1235 if (!s_new) 1235 if (!s_new)
1236 close_bdev_exclusive(sd.bdev, mode); 1236 close_bdev_exclusive(sd.bdev, mode);
1237 1237
1238 mnt->mnt_sb = s; 1238 return root_dentry;
1239 mnt->mnt_root = root_dentry;
1240 return 0;
1241 1239
1242 failed_super: 1240 failed_super:
1243 deactivate_locked_super(s); 1241 deactivate_locked_super(s);
@@ -1245,13 +1243,13 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
1245 failed: 1243 failed:
1246 if (!s_new) 1244 if (!s_new)
1247 close_bdev_exclusive(sd.bdev, mode); 1245 close_bdev_exclusive(sd.bdev, mode);
1248 return err; 1246 return ERR_PTR(err);
1249} 1247}
1250 1248
1251struct file_system_type nilfs_fs_type = { 1249struct file_system_type nilfs_fs_type = {
1252 .owner = THIS_MODULE, 1250 .owner = THIS_MODULE,
1253 .name = "nilfs2", 1251 .name = "nilfs2",
1254 .get_sb = nilfs_get_sb, 1252 .mount = nilfs_mount,
1255 .kill_sb = kill_block_super, 1253 .kill_sb = kill_block_super,
1256 .fs_flags = FS_REQUIRES_DEV, 1254 .fs_flags = FS_REQUIRES_DEV,
1257}; 1255};
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index d3fbe5730bfc..a30ecacc01f2 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -3059,17 +3059,16 @@ struct kmem_cache *ntfs_index_ctx_cache;
3059/* Driver wide mutex. */ 3059/* Driver wide mutex. */
3060DEFINE_MUTEX(ntfs_lock); 3060DEFINE_MUTEX(ntfs_lock);
3061 3061
3062static int ntfs_get_sb(struct file_system_type *fs_type, 3062static struct dentry *ntfs_mount(struct file_system_type *fs_type,
3063 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 3063 int flags, const char *dev_name, void *data)
3064{ 3064{
3065 return get_sb_bdev(fs_type, flags, dev_name, data, ntfs_fill_super, 3065 return mount_bdev(fs_type, flags, dev_name, data, ntfs_fill_super);
3066 mnt);
3067} 3066}
3068 3067
3069static struct file_system_type ntfs_fs_type = { 3068static struct file_system_type ntfs_fs_type = {
3070 .owner = THIS_MODULE, 3069 .owner = THIS_MODULE,
3071 .name = "ntfs", 3070 .name = "ntfs",
3072 .get_sb = ntfs_get_sb, 3071 .mount = ntfs_mount,
3073 .kill_sb = kill_block_super, 3072 .kill_sb = kill_block_super,
3074 .fs_flags = FS_REQUIRES_DEV, 3073 .fs_flags = FS_REQUIRES_DEV,
3075}; 3074};
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 75e115f1bd73..b2df490a19ed 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -643,16 +643,16 @@ static const struct inode_operations dlmfs_file_inode_operations = {
643 .setattr = dlmfs_file_setattr, 643 .setattr = dlmfs_file_setattr,
644}; 644};
645 645
646static int dlmfs_get_sb(struct file_system_type *fs_type, 646static struct dentry *dlmfs_mount(struct file_system_type *fs_type,
647 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 647 int flags, const char *dev_name, void *data)
648{ 648{
649 return get_sb_nodev(fs_type, flags, data, dlmfs_fill_super, mnt); 649 return mount_nodev(fs_type, flags, data, dlmfs_fill_super);
650} 650}
651 651
652static struct file_system_type dlmfs_fs_type = { 652static struct file_system_type dlmfs_fs_type = {
653 .owner = THIS_MODULE, 653 .owner = THIS_MODULE,
654 .name = "ocfs2_dlmfs", 654 .name = "ocfs2_dlmfs",
655 .get_sb = dlmfs_get_sb, 655 .mount = dlmfs_mount,
656 .kill_sb = kill_litter_super, 656 .kill_sb = kill_litter_super,
657}; 657};
658 658
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 56f0cb395820..f02c0ef31578 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1236,14 +1236,12 @@ read_super_error:
1236 return status; 1236 return status;
1237} 1237}
1238 1238
1239static int ocfs2_get_sb(struct file_system_type *fs_type, 1239static struct dentry *ocfs2_mount(struct file_system_type *fs_type,
1240 int flags, 1240 int flags,
1241 const char *dev_name, 1241 const char *dev_name,
1242 void *data, 1242 void *data)
1243 struct vfsmount *mnt)
1244{ 1243{
1245 return get_sb_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super, 1244 return mount_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super);
1246 mnt);
1247} 1245}
1248 1246
1249static void ocfs2_kill_sb(struct super_block *sb) 1247static void ocfs2_kill_sb(struct super_block *sb)
@@ -1267,8 +1265,7 @@ out:
1267static struct file_system_type ocfs2_fs_type = { 1265static struct file_system_type ocfs2_fs_type = {
1268 .owner = THIS_MODULE, 1266 .owner = THIS_MODULE,
1269 .name = "ocfs2", 1267 .name = "ocfs2",
1270 .get_sb = ocfs2_get_sb, /* is this called when we mount 1268 .mount = ocfs2_mount,
1271 * the fs? */
1272 .kill_sb = ocfs2_kill_sb, 1269 .kill_sb = ocfs2_kill_sb,
1273 1270
1274 .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, 1271 .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index 14a22863291a..e043c4cb9a97 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -557,17 +557,16 @@ end:
557 return ret; 557 return ret;
558} 558}
559 559
560static int omfs_get_sb(struct file_system_type *fs_type, 560static struct dentry *omfs_mount(struct file_system_type *fs_type,
561 int flags, const char *dev_name, 561 int flags, const char *dev_name, void *data)
562 void *data, struct vfsmount *m)
563{ 562{
564 return get_sb_bdev(fs_type, flags, dev_name, data, omfs_fill_super, m); 563 return mount_bdev(fs_type, flags, dev_name, data, omfs_fill_super);
565} 564}
566 565
567static struct file_system_type omfs_fs_type = { 566static struct file_system_type omfs_fs_type = {
568 .owner = THIS_MODULE, 567 .owner = THIS_MODULE,
569 .name = "omfs", 568 .name = "omfs",
570 .get_sb = omfs_get_sb, 569 .mount = omfs_mount,
571 .kill_sb = kill_block_super, 570 .kill_sb = kill_block_super,
572 .fs_flags = FS_REQUIRES_DEV, 571 .fs_flags = FS_REQUIRES_DEV,
573}; 572};
diff --git a/fs/open.c b/fs/open.c
index d74e1983e8dc..4197b9ed023d 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -786,11 +786,11 @@ struct file *nameidata_to_filp(struct nameidata *nd)
786 /* Pick up the filp from the open intent */ 786 /* Pick up the filp from the open intent */
787 filp = nd->intent.open.file; 787 filp = nd->intent.open.file;
788 /* Has the filesystem initialised the file for us? */ 788 /* Has the filesystem initialised the file for us? */
789 if (filp->f_path.dentry == NULL) 789 if (filp->f_path.dentry == NULL) {
790 path_get(&nd->path);
790 filp = __dentry_open(nd->path.dentry, nd->path.mnt, filp, 791 filp = __dentry_open(nd->path.dentry, nd->path.mnt, filp,
791 NULL, cred); 792 NULL, cred);
792 else 793 }
793 path_put(&nd->path);
794 return filp; 794 return filp;
795} 795}
796 796
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index ffcd04f0012c..ddb1f41376e5 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -415,16 +415,16 @@ out_no_root:
415 return ret; 415 return ret;
416} 416}
417 417
418static int openprom_get_sb(struct file_system_type *fs_type, 418static struct dentry *openprom_mount(struct file_system_type *fs_type,
419 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 419 int flags, const char *dev_name, void *data)
420{ 420{
421 return get_sb_single(fs_type, flags, data, openprom_fill_super, mnt); 421 return mount_single(fs_type, flags, data, openprom_fill_super)
422} 422}
423 423
424static struct file_system_type openprom_fs_type = { 424static struct file_system_type openprom_fs_type = {
425 .owner = THIS_MODULE, 425 .owner = THIS_MODULE,
426 .name = "openpromfs", 426 .name = "openpromfs",
427 .get_sb = openprom_get_sb, 427 .mount = openprom_mount,
428 .kill_sb = kill_anon_super, 428 .kill_sb = kill_anon_super,
429}; 429};
430 430
diff --git a/fs/pipe.c b/fs/pipe.c
index d2d7566ce68e..a8012a955720 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1247,16 +1247,15 @@ out:
1247 * any operations on the root directory. However, we need a non-trivial 1247 * any operations on the root directory. However, we need a non-trivial
1248 * d_name - pipe: will go nicely and kill the special-casing in procfs. 1248 * d_name - pipe: will go nicely and kill the special-casing in procfs.
1249 */ 1249 */
1250static int pipefs_get_sb(struct file_system_type *fs_type, 1250static struct dentry *pipefs_mount(struct file_system_type *fs_type,
1251 int flags, const char *dev_name, void *data, 1251 int flags, const char *dev_name, void *data)
1252 struct vfsmount *mnt)
1253{ 1252{
1254 return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC, mnt); 1253 return mount_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
1255} 1254}
1256 1255
1257static struct file_system_type pipe_fs_type = { 1256static struct file_system_type pipe_fs_type = {
1258 .name = "pipefs", 1257 .name = "pipefs",
1259 .get_sb = pipefs_get_sb, 1258 .mount = pipefs_mount,
1260 .kill_sb = kill_anon_super, 1259 .kill_sb = kill_anon_super,
1261}; 1260};
1262 1261
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 93d99b316325..ef9fa8e24ad6 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -35,8 +35,8 @@ static int proc_set_super(struct super_block *sb, void *data)
35 return set_anon_super(sb, NULL); 35 return set_anon_super(sb, NULL);
36} 36}
37 37
38static int proc_get_sb(struct file_system_type *fs_type, 38static struct dentry *proc_mount(struct file_system_type *fs_type,
39 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 39 int flags, const char *dev_name, void *data)
40{ 40{
41 int err; 41 int err;
42 struct super_block *sb; 42 struct super_block *sb;
@@ -61,14 +61,14 @@ static int proc_get_sb(struct file_system_type *fs_type,
61 61
62 sb = sget(fs_type, proc_test_super, proc_set_super, ns); 62 sb = sget(fs_type, proc_test_super, proc_set_super, ns);
63 if (IS_ERR(sb)) 63 if (IS_ERR(sb))
64 return PTR_ERR(sb); 64 return ERR_CAST(sb);
65 65
66 if (!sb->s_root) { 66 if (!sb->s_root) {
67 sb->s_flags = flags; 67 sb->s_flags = flags;
68 err = proc_fill_super(sb); 68 err = proc_fill_super(sb);
69 if (err) { 69 if (err) {
70 deactivate_locked_super(sb); 70 deactivate_locked_super(sb);
71 return err; 71 return ERR_PTR(err);
72 } 72 }
73 73
74 ei = PROC_I(sb->s_root->d_inode); 74 ei = PROC_I(sb->s_root->d_inode);
@@ -79,11 +79,9 @@ static int proc_get_sb(struct file_system_type *fs_type,
79 } 79 }
80 80
81 sb->s_flags |= MS_ACTIVE; 81 sb->s_flags |= MS_ACTIVE;
82 ns->proc_mnt = mnt;
83 } 82 }
84 83
85 simple_set_mnt(mnt, sb); 84 return dget(sb->s_root);
86 return 0;
87} 85}
88 86
89static void proc_kill_sb(struct super_block *sb) 87static void proc_kill_sb(struct super_block *sb)
@@ -97,7 +95,7 @@ static void proc_kill_sb(struct super_block *sb)
97 95
98static struct file_system_type proc_fs_type = { 96static struct file_system_type proc_fs_type = {
99 .name = "proc", 97 .name = "proc",
100 .get_sb = proc_get_sb, 98 .mount = proc_mount,
101 .kill_sb = proc_kill_sb, 99 .kill_sb = proc_kill_sb,
102}; 100};
103 101
@@ -115,6 +113,7 @@ void __init proc_root_init(void)
115 return; 113 return;
116 } 114 }
117 115
116 init_pid_ns.proc_mnt = proc_mnt;
118 proc_symlink("mounts", NULL, "self/mounts"); 117 proc_symlink("mounts", NULL, "self/mounts");
119 118
120 proc_net_init(); 119 proc_net_init();
@@ -213,6 +212,7 @@ int pid_ns_prepare_proc(struct pid_namespace *ns)
213 if (IS_ERR(mnt)) 212 if (IS_ERR(mnt))
214 return PTR_ERR(mnt); 213 return PTR_ERR(mnt);
215 214
215 ns->proc_mnt = mnt;
216 return 0; 216 return 0;
217} 217}
218 218
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 01bad30026fc..fcada42f1aa3 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -454,17 +454,16 @@ static void destroy_inodecache(void)
454 kmem_cache_destroy(qnx4_inode_cachep); 454 kmem_cache_destroy(qnx4_inode_cachep);
455} 455}
456 456
457static int qnx4_get_sb(struct file_system_type *fs_type, 457static struct dentry *qnx4_mount(struct file_system_type *fs_type,
458 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 458 int flags, const char *dev_name, void *data)
459{ 459{
460 return get_sb_bdev(fs_type, flags, dev_name, data, qnx4_fill_super, 460 return mount_bdev(fs_type, flags, dev_name, data, qnx4_fill_super);
461 mnt);
462} 461}
463 462
464static struct file_system_type qnx4_fs_type = { 463static struct file_system_type qnx4_fs_type = {
465 .owner = THIS_MODULE, 464 .owner = THIS_MODULE,
466 .name = "qnx4", 465 .name = "qnx4",
467 .get_sb = qnx4_get_sb, 466 .mount = qnx4_mount,
468 .kill_sb = kill_block_super, 467 .kill_sb = kill_block_super,
469 .fs_flags = FS_REQUIRES_DEV, 468 .fs_flags = FS_REQUIRES_DEV,
470}; 469};
diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig
index 3e21b1e2ad3a..880fd9884366 100644
--- a/fs/quota/Kconfig
+++ b/fs/quota/Kconfig
@@ -4,6 +4,7 @@
4 4
5config QUOTA 5config QUOTA
6 bool "Quota support" 6 bool "Quota support"
7 select QUOTACTL
7 help 8 help
8 If you say Y here, you will be able to set per user limits for disk 9 If you say Y here, you will be able to set per user limits for disk
9 usage (also called disk quotas). Currently, it works for the 10 usage (also called disk quotas). Currently, it works for the
@@ -65,8 +66,7 @@ config QFMT_V2
65 66
66config QUOTACTL 67config QUOTACTL
67 bool 68 bool
68 depends on XFS_QUOTA || QUOTA 69 default n
69 default y
70 70
71config QUOTACTL_COMPAT 71config QUOTACTL_COMPAT
72 bool 72 bool
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index aad1316a977f..0fed41e6efcd 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1386,6 +1386,9 @@ static void __dquot_initialize(struct inode *inode, int type)
1386 /* Avoid races with quotaoff() */ 1386 /* Avoid races with quotaoff() */
1387 if (!sb_has_quota_active(sb, cnt)) 1387 if (!sb_has_quota_active(sb, cnt))
1388 continue; 1388 continue;
1389 /* We could race with quotaon or dqget() could have failed */
1390 if (!got[cnt])
1391 continue;
1389 if (!inode->i_dquot[cnt]) { 1392 if (!inode->i_dquot[cnt]) {
1390 inode->i_dquot[cnt] = got[cnt]; 1393 inode->i_dquot[cnt] = got[cnt];
1391 got[cnt] = NULL; 1394 got[cnt] = NULL;
@@ -1736,6 +1739,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
1736 qsize_t rsv_space = 0; 1739 qsize_t rsv_space = 0;
1737 struct dquot *transfer_from[MAXQUOTAS] = {}; 1740 struct dquot *transfer_from[MAXQUOTAS] = {};
1738 int cnt, ret = 0; 1741 int cnt, ret = 0;
1742 char is_valid[MAXQUOTAS] = {};
1739 char warntype_to[MAXQUOTAS]; 1743 char warntype_to[MAXQUOTAS];
1740 char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS]; 1744 char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS];
1741 1745
@@ -1757,8 +1761,15 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
1757 space = cur_space + rsv_space; 1761 space = cur_space + rsv_space;
1758 /* Build the transfer_from list and check the limits */ 1762 /* Build the transfer_from list and check the limits */
1759 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1763 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1764 /*
1765 * Skip changes for same uid or gid or for turned off quota-type.
1766 */
1760 if (!transfer_to[cnt]) 1767 if (!transfer_to[cnt])
1761 continue; 1768 continue;
1769 /* Avoid races with quotaoff() */
1770 if (!sb_has_quota_active(inode->i_sb, cnt))
1771 continue;
1772 is_valid[cnt] = 1;
1762 transfer_from[cnt] = inode->i_dquot[cnt]; 1773 transfer_from[cnt] = inode->i_dquot[cnt];
1763 ret = check_idq(transfer_to[cnt], 1, warntype_to + cnt); 1774 ret = check_idq(transfer_to[cnt], 1, warntype_to + cnt);
1764 if (ret) 1775 if (ret)
@@ -1772,12 +1783,8 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
1772 * Finally perform the needed transfer from transfer_from to transfer_to 1783 * Finally perform the needed transfer from transfer_from to transfer_to
1773 */ 1784 */
1774 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1785 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1775 /* 1786 if (!is_valid[cnt])
1776 * Skip changes for same uid or gid or for turned off quota-type.
1777 */
1778 if (!transfer_to[cnt])
1779 continue; 1787 continue;
1780
1781 /* Due to IO error we might not have transfer_from[] structure */ 1788 /* Due to IO error we might not have transfer_from[] structure */
1782 if (transfer_from[cnt]) { 1789 if (transfer_from[cnt]) {
1783 warntype_from_inodes[cnt] = 1790 warntype_from_inodes[cnt] =
@@ -1801,18 +1808,19 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
1801 1808
1802 mark_all_dquot_dirty(transfer_from); 1809 mark_all_dquot_dirty(transfer_from);
1803 mark_all_dquot_dirty(transfer_to); 1810 mark_all_dquot_dirty(transfer_to);
1804 /* Pass back references to put */
1805 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1806 transfer_to[cnt] = transfer_from[cnt];
1807warn:
1808 flush_warnings(transfer_to, warntype_to); 1811 flush_warnings(transfer_to, warntype_to);
1809 flush_warnings(transfer_from, warntype_from_inodes); 1812 flush_warnings(transfer_from, warntype_from_inodes);
1810 flush_warnings(transfer_from, warntype_from_space); 1813 flush_warnings(transfer_from, warntype_from_space);
1811 return ret; 1814 /* Pass back references to put */
1815 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1816 if (is_valid[cnt])
1817 transfer_to[cnt] = transfer_from[cnt];
1818 return 0;
1812over_quota: 1819over_quota:
1813 spin_unlock(&dq_data_lock); 1820 spin_unlock(&dq_data_lock);
1814 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); 1821 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
1815 goto warn; 1822 flush_warnings(transfer_to, warntype_to);
1823 return ret;
1816} 1824}
1817EXPORT_SYMBOL(__dquot_transfer); 1825EXPORT_SYMBOL(__dquot_transfer);
1818 1826
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 67fadb1ad2c1..eacb166fb259 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -255,17 +255,16 @@ fail:
255 return err; 255 return err;
256} 256}
257 257
258int ramfs_get_sb(struct file_system_type *fs_type, 258struct dentry *ramfs_mount(struct file_system_type *fs_type,
259 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 259 int flags, const char *dev_name, void *data)
260{ 260{
261 return get_sb_nodev(fs_type, flags, data, ramfs_fill_super, mnt); 261 return mount_nodev(fs_type, flags, data, ramfs_fill_super);
262} 262}
263 263
264static int rootfs_get_sb(struct file_system_type *fs_type, 264static struct dentry *rootfs_mount(struct file_system_type *fs_type,
265 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 265 int flags, const char *dev_name, void *data)
266{ 266{
267 return get_sb_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super, 267 return mount_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super);
268 mnt);
269} 268}
270 269
271static void ramfs_kill_sb(struct super_block *sb) 270static void ramfs_kill_sb(struct super_block *sb)
@@ -276,12 +275,12 @@ static void ramfs_kill_sb(struct super_block *sb)
276 275
277static struct file_system_type ramfs_fs_type = { 276static struct file_system_type ramfs_fs_type = {
278 .name = "ramfs", 277 .name = "ramfs",
279 .get_sb = ramfs_get_sb, 278 .mount = ramfs_mount,
280 .kill_sb = ramfs_kill_sb, 279 .kill_sb = ramfs_kill_sb,
281}; 280};
282static struct file_system_type rootfs_fs_type = { 281static struct file_system_type rootfs_fs_type = {
283 .name = "rootfs", 282 .name = "rootfs",
284 .get_sb = rootfs_get_sb, 283 .mount = rootfs_mount,
285 .kill_sb = kill_litter_super, 284 .kill_sb = kill_litter_super,
286}; 285};
287 286
diff --git a/fs/read_write.c b/fs/read_write.c
index 9cd9d148105d..431a0ed610c8 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -243,8 +243,6 @@ bad:
243 * them to something that fits in "int" so that others 243 * them to something that fits in "int" so that others
244 * won't have to do range checks all the time. 244 * won't have to do range checks all the time.
245 */ 245 */
246#define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK)
247
248int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count) 246int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count)
249{ 247{
250 struct inode *inode; 248 struct inode *inode;
@@ -584,65 +582,71 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
584 unsigned long nr_segs, unsigned long fast_segs, 582 unsigned long nr_segs, unsigned long fast_segs,
585 struct iovec *fast_pointer, 583 struct iovec *fast_pointer,
586 struct iovec **ret_pointer) 584 struct iovec **ret_pointer)
587 { 585{
588 unsigned long seg; 586 unsigned long seg;
589 ssize_t ret; 587 ssize_t ret;
590 struct iovec *iov = fast_pointer; 588 struct iovec *iov = fast_pointer;
591 589
592 /* 590 /*
593 * SuS says "The readv() function *may* fail if the iovcnt argument 591 * SuS says "The readv() function *may* fail if the iovcnt argument
594 * was less than or equal to 0, or greater than {IOV_MAX}. Linux has 592 * was less than or equal to 0, or greater than {IOV_MAX}. Linux has
595 * traditionally returned zero for zero segments, so... 593 * traditionally returned zero for zero segments, so...
596 */ 594 */
597 if (nr_segs == 0) { 595 if (nr_segs == 0) {
598 ret = 0; 596 ret = 0;
599 goto out; 597 goto out;
600 } 598 }
601 599
602 /* 600 /*
603 * First get the "struct iovec" from user memory and 601 * First get the "struct iovec" from user memory and
604 * verify all the pointers 602 * verify all the pointers
605 */ 603 */
606 if (nr_segs > UIO_MAXIOV) { 604 if (nr_segs > UIO_MAXIOV) {
607 ret = -EINVAL; 605 ret = -EINVAL;
608 goto out; 606 goto out;
609 } 607 }
610 if (nr_segs > fast_segs) { 608 if (nr_segs > fast_segs) {
611 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); 609 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
612 if (iov == NULL) { 610 if (iov == NULL) {
613 ret = -ENOMEM; 611 ret = -ENOMEM;
614 goto out; 612 goto out;
615 } 613 }
616 } 614 }
617 if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) { 615 if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
618 ret = -EFAULT; 616 ret = -EFAULT;
619 goto out; 617 goto out;
620 } 618 }
621 619
622 /* 620 /*
623 * According to the Single Unix Specification we should return EINVAL 621 * According to the Single Unix Specification we should return EINVAL
624 * if an element length is < 0 when cast to ssize_t or if the 622 * if an element length is < 0 when cast to ssize_t or if the
625 * total length would overflow the ssize_t return value of the 623 * total length would overflow the ssize_t return value of the
626 * system call. 624 * system call.
627 */ 625 *
626 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
627 * overflow case.
628 */
628 ret = 0; 629 ret = 0;
629 for (seg = 0; seg < nr_segs; seg++) { 630 for (seg = 0; seg < nr_segs; seg++) {
630 void __user *buf = iov[seg].iov_base; 631 void __user *buf = iov[seg].iov_base;
631 ssize_t len = (ssize_t)iov[seg].iov_len; 632 ssize_t len = (ssize_t)iov[seg].iov_len;
632 633
633 /* see if we we're about to use an invalid len or if 634 /* see if we we're about to use an invalid len or if
634 * it's about to overflow ssize_t */ 635 * it's about to overflow ssize_t */
635 if (len < 0 || (ret + len < ret)) { 636 if (len < 0) {
636 ret = -EINVAL; 637 ret = -EINVAL;
637 goto out; 638 goto out;
638 } 639 }
639 if (unlikely(!access_ok(vrfy_dir(type), buf, len))) { 640 if (unlikely(!access_ok(vrfy_dir(type), buf, len))) {
640 ret = -EFAULT; 641 ret = -EFAULT;
641 goto out; 642 goto out;
643 }
644 if (len > MAX_RW_COUNT - ret) {
645 len = MAX_RW_COUNT - ret;
646 iov[seg].iov_len = len;
642 } 647 }
643
644 ret += len; 648 ret += len;
645 } 649 }
646out: 650out:
647 *ret_pointer = iov; 651 *ret_pointer = iov;
648 return ret; 652 return ret;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index e15ff612002d..3bf7a6457f4d 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2213,12 +2213,11 @@ out:
2213 2213
2214#endif 2214#endif
2215 2215
2216static int get_super_block(struct file_system_type *fs_type, 2216static struct dentry *get_super_block(struct file_system_type *fs_type,
2217 int flags, const char *dev_name, 2217 int flags, const char *dev_name,
2218 void *data, struct vfsmount *mnt) 2218 void *data)
2219{ 2219{
2220 return get_sb_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super, 2220 return mount_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super);
2221 mnt);
2222} 2221}
2223 2222
2224static int __init init_reiserfs_fs(void) 2223static int __init init_reiserfs_fs(void)
@@ -2253,7 +2252,7 @@ static void __exit exit_reiserfs_fs(void)
2253struct file_system_type reiserfs_fs_type = { 2252struct file_system_type reiserfs_fs_type = {
2254 .owner = THIS_MODULE, 2253 .owner = THIS_MODULE,
2255 .name = "reiserfs", 2254 .name = "reiserfs",
2256 .get_sb = get_super_block, 2255 .mount = get_super_block,
2257 .kill_sb = reiserfs_kill_sb, 2256 .kill_sb = reiserfs_kill_sb,
2258 .fs_flags = FS_REQUIRES_DEV, 2257 .fs_flags = FS_REQUIRES_DEV,
2259}; 2258};
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 268580535c92..6647f90e55cd 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -552,20 +552,19 @@ error_rsb:
552/* 552/*
553 * get a superblock for mounting 553 * get a superblock for mounting
554 */ 554 */
555static int romfs_get_sb(struct file_system_type *fs_type, 555static struct dentry *romfs_mount(struct file_system_type *fs_type,
556 int flags, const char *dev_name, 556 int flags, const char *dev_name,
557 void *data, struct vfsmount *mnt) 557 void *data)
558{ 558{
559 int ret = -EINVAL; 559 struct dentry *ret = ERR_PTR(-EINVAL);
560 560
561#ifdef CONFIG_ROMFS_ON_MTD 561#ifdef CONFIG_ROMFS_ON_MTD
562 ret = get_sb_mtd(fs_type, flags, dev_name, data, romfs_fill_super, 562 ret = mount_mtd(fs_type, flags, dev_name, data, romfs_fill_super);
563 mnt);
564#endif 563#endif
565#ifdef CONFIG_ROMFS_ON_BLOCK 564#ifdef CONFIG_ROMFS_ON_BLOCK
566 if (ret == -EINVAL) 565 if (ret == ERR_PTR(-EINVAL))
567 ret = get_sb_bdev(fs_type, flags, dev_name, data, 566 ret = mount_bdev(fs_type, flags, dev_name, data,
568 romfs_fill_super, mnt); 567 romfs_fill_super);
569#endif 568#endif
570 return ret; 569 return ret;
571} 570}
@@ -592,7 +591,7 @@ static void romfs_kill_sb(struct super_block *sb)
592static struct file_system_type romfs_fs_type = { 591static struct file_system_type romfs_fs_type = {
593 .owner = THIS_MODULE, 592 .owner = THIS_MODULE,
594 .name = "romfs", 593 .name = "romfs",
595 .get_sb = romfs_get_sb, 594 .mount = romfs_mount,
596 .kill_sb = romfs_kill_sb, 595 .kill_sb = romfs_kill_sb,
597 .fs_flags = FS_REQUIRES_DEV, 596 .fs_flags = FS_REQUIRES_DEV,
598}; 597};
diff --git a/fs/smbfs/Kconfig b/fs/smbfs/Kconfig
deleted file mode 100644
index 2bc24a8c4039..000000000000
--- a/fs/smbfs/Kconfig
+++ /dev/null
@@ -1,56 +0,0 @@
1config SMB_FS
2 tristate "SMB file system support (OBSOLETE, please use CIFS)"
3 depends on BKL # probably unfixable
4 depends on INET
5 select NLS
6 help
7 SMB (Server Message Block) is the protocol Windows for Workgroups
8 (WfW), Windows 95/98, Windows NT and OS/2 Lan Manager use to share
9 files and printers over local networks. Saying Y here allows you to
10 mount their file systems (often called "shares" in this context) and
11 access them just like any other Unix directory. Currently, this
12 works only if the Windows machines use TCP/IP as the underlying
13 transport protocol, and not NetBEUI. For details, read
14 <file:Documentation/filesystems/smbfs.txt> and the SMB-HOWTO,
15 available from <http://www.tldp.org/docs.html#howto>.
16
17 Note: if you just want your box to act as an SMB *server* and make
18 files and printing services available to Windows clients (which need
19 to have a TCP/IP stack), you don't need to say Y here; you can use
20 the program SAMBA (available from <ftp://ftp.samba.org/pub/samba/>)
21 for that.
22
23 General information about how to connect Linux, Windows machines and
24 Macs is on the WWW at <http://www.eats.com/linux_mac_win.html>.
25
26 To compile the SMB support as a module, choose M here:
27 the module will be called smbfs. Most people say N, however.
28
29config SMB_NLS_DEFAULT
30 bool "Use a default NLS"
31 depends on SMB_FS
32 help
33 Enabling this will make smbfs use nls translations by default. You
34 need to specify the local charset (CONFIG_NLS_DEFAULT) in the nls
35 settings and you need to give the default nls for the SMB server as
36 CONFIG_SMB_NLS_REMOTE.
37
38 The nls settings can be changed at mount time, if your smbmount
39 supports that, using the codepage and iocharset parameters.
40
41 smbmount from samba 2.2.0 or later supports this.
42
43config SMB_NLS_REMOTE
44 string "Default Remote NLS Option"
45 depends on SMB_NLS_DEFAULT
46 default "cp437"
47 help
48 This setting allows you to specify a default value for which
49 codepage the server uses. If this field is left blank no
50 translations will be done by default. The local codepage/charset
51 default to CONFIG_NLS_DEFAULT.
52
53 The nls settings can be changed at mount time, if your smbmount
54 supports that, using the codepage and iocharset parameters.
55
56 smbmount from samba 2.2.0 or later supports this.
diff --git a/fs/smbfs/Makefile b/fs/smbfs/Makefile
deleted file mode 100644
index 4faf8c4722c3..000000000000
--- a/fs/smbfs/Makefile
+++ /dev/null
@@ -1,18 +0,0 @@
1#
2# Makefile for the linux smb-filesystem routines.
3#
4
5obj-$(CONFIG_SMB_FS) += smbfs.o
6
7smbfs-objs := proc.o dir.o cache.o sock.o inode.o file.o ioctl.o getopt.o \
8 symlink.o smbiod.o request.o
9
10# If you want debugging output, you may add these flags to the EXTRA_CFLAGS
11# SMBFS_PARANOIA should normally be enabled.
12
13EXTRA_CFLAGS += -DSMBFS_PARANOIA
14#EXTRA_CFLAGS += -DSMBFS_DEBUG
15#EXTRA_CFLAGS += -DSMBFS_DEBUG_VERBOSE
16#EXTRA_CFLAGS += -DDEBUG_SMB_TIMESTAMP
17#EXTRA_CFLAGS += -Werror
18
diff --git a/fs/smbfs/cache.c b/fs/smbfs/cache.c
deleted file mode 100644
index 8c177eb7e344..000000000000
--- a/fs/smbfs/cache.c
+++ /dev/null
@@ -1,208 +0,0 @@
1/*
2 * cache.c
3 *
4 * Copyright (C) 1997 by Bill Hawes
5 *
6 * Routines to support directory cacheing using the page cache.
7 * This cache code is almost directly taken from ncpfs.
8 *
9 * Please add a note about your changes to smbfs in the ChangeLog file.
10 */
11
12#include <linux/time.h>
13#include <linux/errno.h>
14#include <linux/kernel.h>
15#include <linux/mm.h>
16#include <linux/smb_fs.h>
17#include <linux/pagemap.h>
18#include <linux/net.h>
19
20#include <asm/page.h>
21
22#include "smb_debug.h"
23#include "proto.h"
24
25/*
26 * Force the next attempt to use the cache to be a timeout.
27 * If we can't find the page that's fine, it will cause a refresh.
28 */
29void
30smb_invalid_dir_cache(struct inode * dir)
31{
32 struct smb_sb_info *server = server_from_inode(dir);
33 union smb_dir_cache *cache = NULL;
34 struct page *page = NULL;
35
36 page = grab_cache_page(&dir->i_data, 0);
37 if (!page)
38 goto out;
39
40 if (!PageUptodate(page))
41 goto out_unlock;
42
43 cache = kmap(page);
44 cache->head.time = jiffies - SMB_MAX_AGE(server);
45
46 kunmap(page);
47 SetPageUptodate(page);
48out_unlock:
49 unlock_page(page);
50 page_cache_release(page);
51out:
52 return;
53}
54
55/*
56 * Mark all dentries for 'parent' as invalid, forcing them to be re-read
57 */
58void
59smb_invalidate_dircache_entries(struct dentry *parent)
60{
61 struct smb_sb_info *server = server_from_dentry(parent);
62 struct list_head *next;
63 struct dentry *dentry;
64
65 spin_lock(&dcache_lock);
66 next = parent->d_subdirs.next;
67 while (next != &parent->d_subdirs) {
68 dentry = list_entry(next, struct dentry, d_u.d_child);
69 dentry->d_fsdata = NULL;
70 smb_age_dentry(server, dentry);
71 next = next->next;
72 }
73 spin_unlock(&dcache_lock);
74}
75
76/*
77 * dget, but require that fpos and parent matches what the dentry contains.
78 * dentry is not known to be a valid pointer at entry.
79 */
80struct dentry *
81smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
82{
83 struct dentry *dent = dentry;
84 struct list_head *next;
85
86 if (d_validate(dent, parent)) {
87 if (dent->d_name.len <= SMB_MAXNAMELEN &&
88 (unsigned long)dent->d_fsdata == fpos) {
89 if (!dent->d_inode) {
90 dput(dent);
91 dent = NULL;
92 }
93 return dent;
94 }
95 dput(dent);
96 }
97
98 /* If a pointer is invalid, we search the dentry. */
99 spin_lock(&dcache_lock);
100 next = parent->d_subdirs.next;
101 while (next != &parent->d_subdirs) {
102 dent = list_entry(next, struct dentry, d_u.d_child);
103 if ((unsigned long)dent->d_fsdata == fpos) {
104 if (dent->d_inode)
105 dget_locked(dent);
106 else
107 dent = NULL;
108 goto out_unlock;
109 }
110 next = next->next;
111 }
112 dent = NULL;
113out_unlock:
114 spin_unlock(&dcache_lock);
115 return dent;
116}
117
118
119/*
120 * Create dentry/inode for this file and add it to the dircache.
121 */
122int
123smb_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
124 struct smb_cache_control *ctrl, struct qstr *qname,
125 struct smb_fattr *entry)
126{
127 struct dentry *newdent, *dentry = filp->f_path.dentry;
128 struct inode *newino, *inode = dentry->d_inode;
129 struct smb_cache_control ctl = *ctrl;
130 int valid = 0;
131 int hashed = 0;
132 ino_t ino = 0;
133
134 qname->hash = full_name_hash(qname->name, qname->len);
135
136 if (dentry->d_op && dentry->d_op->d_hash)
137 if (dentry->d_op->d_hash(dentry, qname) != 0)
138 goto end_advance;
139
140 newdent = d_lookup(dentry, qname);
141
142 if (!newdent) {
143 newdent = d_alloc(dentry, qname);
144 if (!newdent)
145 goto end_advance;
146 } else {
147 hashed = 1;
148 memcpy((char *) newdent->d_name.name, qname->name,
149 newdent->d_name.len);
150 }
151
152 if (!newdent->d_inode) {
153 smb_renew_times(newdent);
154 entry->f_ino = iunique(inode->i_sb, 2);
155 newino = smb_iget(inode->i_sb, entry);
156 if (newino) {
157 smb_new_dentry(newdent);
158 d_instantiate(newdent, newino);
159 if (!hashed)
160 d_rehash(newdent);
161 }
162 } else
163 smb_set_inode_attr(newdent->d_inode, entry);
164
165 if (newdent->d_inode) {
166 ino = newdent->d_inode->i_ino;
167 newdent->d_fsdata = (void *) ctl.fpos;
168 smb_new_dentry(newdent);
169 }
170
171 if (ctl.idx >= SMB_DIRCACHE_SIZE) {
172 if (ctl.page) {
173 kunmap(ctl.page);
174 SetPageUptodate(ctl.page);
175 unlock_page(ctl.page);
176 page_cache_release(ctl.page);
177 }
178 ctl.cache = NULL;
179 ctl.idx -= SMB_DIRCACHE_SIZE;
180 ctl.ofs += 1;
181 ctl.page = grab_cache_page(&inode->i_data, ctl.ofs);
182 if (ctl.page)
183 ctl.cache = kmap(ctl.page);
184 }
185 if (ctl.cache) {
186 ctl.cache->dentry[ctl.idx] = newdent;
187 valid = 1;
188 }
189 dput(newdent);
190
191end_advance:
192 if (!valid)
193 ctl.valid = 0;
194 if (!ctl.filled && (ctl.fpos == filp->f_pos)) {
195 if (!ino)
196 ino = find_inode_number(dentry, qname);
197 if (!ino)
198 ino = iunique(inode->i_sb, 2);
199 ctl.filled = filldir(dirent, qname->name, qname->len,
200 filp->f_pos, ino, DT_UNKNOWN);
201 if (!ctl.filled)
202 filp->f_pos += 1;
203 }
204 ctl.fpos += 1;
205 ctl.idx += 1;
206 *ctrl = ctl;
207 return (ctl.valid || !ctl.filled);
208}
diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c
deleted file mode 100644
index f678d421e541..000000000000
--- a/fs/smbfs/dir.c
+++ /dev/null
@@ -1,696 +0,0 @@
1/*
2 * dir.c
3 *
4 * Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
5 * Copyright (C) 1997 by Volker Lendecke
6 *
7 * Please add a note about your changes to smbfs in the ChangeLog file.
8 */
9
10#include <linux/time.h>
11#include <linux/errno.h>
12#include <linux/kernel.h>
13#include <linux/smp_lock.h>
14#include <linux/ctype.h>
15#include <linux/net.h>
16#include <linux/sched.h>
17
18#include <linux/smb_fs.h>
19#include <linux/smb_mount.h>
20#include <linux/smbno.h>
21
22#include "smb_debug.h"
23#include "proto.h"
24
25static int smb_readdir(struct file *, void *, filldir_t);
26static int smb_dir_open(struct inode *, struct file *);
27
28static struct dentry *smb_lookup(struct inode *, struct dentry *, struct nameidata *);
29static int smb_create(struct inode *, struct dentry *, int, struct nameidata *);
30static int smb_mkdir(struct inode *, struct dentry *, int);
31static int smb_rmdir(struct inode *, struct dentry *);
32static int smb_unlink(struct inode *, struct dentry *);
33static int smb_rename(struct inode *, struct dentry *,
34 struct inode *, struct dentry *);
35static int smb_make_node(struct inode *,struct dentry *,int,dev_t);
36static int smb_link(struct dentry *, struct inode *, struct dentry *);
37
38const struct file_operations smb_dir_operations =
39{
40 .llseek = generic_file_llseek,
41 .read = generic_read_dir,
42 .readdir = smb_readdir,
43 .unlocked_ioctl = smb_ioctl,
44 .open = smb_dir_open,
45};
46
47const struct inode_operations smb_dir_inode_operations =
48{
49 .create = smb_create,
50 .lookup = smb_lookup,
51 .unlink = smb_unlink,
52 .mkdir = smb_mkdir,
53 .rmdir = smb_rmdir,
54 .rename = smb_rename,
55 .getattr = smb_getattr,
56 .setattr = smb_notify_change,
57};
58
59const struct inode_operations smb_dir_inode_operations_unix =
60{
61 .create = smb_create,
62 .lookup = smb_lookup,
63 .unlink = smb_unlink,
64 .mkdir = smb_mkdir,
65 .rmdir = smb_rmdir,
66 .rename = smb_rename,
67 .getattr = smb_getattr,
68 .setattr = smb_notify_change,
69 .symlink = smb_symlink,
70 .mknod = smb_make_node,
71 .link = smb_link,
72};
73
74/*
75 * Read a directory, using filldir to fill the dirent memory.
76 * smb_proc_readdir does the actual reading from the smb server.
77 *
78 * The cache code is almost directly taken from ncpfs
79 */
80static int
81smb_readdir(struct file *filp, void *dirent, filldir_t filldir)
82{
83 struct dentry *dentry = filp->f_path.dentry;
84 struct inode *dir = dentry->d_inode;
85 struct smb_sb_info *server = server_from_dentry(dentry);
86 union smb_dir_cache *cache = NULL;
87 struct smb_cache_control ctl;
88 struct page *page = NULL;
89 int result;
90
91 ctl.page = NULL;
92 ctl.cache = NULL;
93
94 VERBOSE("reading %s/%s, f_pos=%d\n",
95 DENTRY_PATH(dentry), (int) filp->f_pos);
96
97 result = 0;
98
99 lock_kernel();
100
101 switch ((unsigned int) filp->f_pos) {
102 case 0:
103 if (filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR) < 0)
104 goto out;
105 filp->f_pos = 1;
106 /* fallthrough */
107 case 1:
108 if (filldir(dirent, "..", 2, 1, parent_ino(dentry), DT_DIR) < 0)
109 goto out;
110 filp->f_pos = 2;
111 }
112
113 /*
114 * Make sure our inode is up-to-date.
115 */
116 result = smb_revalidate_inode(dentry);
117 if (result)
118 goto out;
119
120
121 page = grab_cache_page(&dir->i_data, 0);
122 if (!page)
123 goto read_really;
124
125 ctl.cache = cache = kmap(page);
126 ctl.head = cache->head;
127
128 if (!PageUptodate(page) || !ctl.head.eof) {
129 VERBOSE("%s/%s, page uptodate=%d, eof=%d\n",
130 DENTRY_PATH(dentry), PageUptodate(page),ctl.head.eof);
131 goto init_cache;
132 }
133
134 if (filp->f_pos == 2) {
135 if (jiffies - ctl.head.time >= SMB_MAX_AGE(server))
136 goto init_cache;
137
138 /*
139 * N.B. ncpfs checks mtime of dentry too here, we don't.
140 * 1. common smb servers do not update mtime on dir changes
141 * 2. it requires an extra smb request
142 * (revalidate has the same timeout as ctl.head.time)
143 *
144 * Instead smbfs invalidates its own cache on local changes
145 * and remote changes are not seen until timeout.
146 */
147 }
148
149 if (filp->f_pos > ctl.head.end)
150 goto finished;
151
152 ctl.fpos = filp->f_pos + (SMB_DIRCACHE_START - 2);
153 ctl.ofs = ctl.fpos / SMB_DIRCACHE_SIZE;
154 ctl.idx = ctl.fpos % SMB_DIRCACHE_SIZE;
155
156 for (;;) {
157 if (ctl.ofs != 0) {
158 ctl.page = find_lock_page(&dir->i_data, ctl.ofs);
159 if (!ctl.page)
160 goto invalid_cache;
161 ctl.cache = kmap(ctl.page);
162 if (!PageUptodate(ctl.page))
163 goto invalid_cache;
164 }
165 while (ctl.idx < SMB_DIRCACHE_SIZE) {
166 struct dentry *dent;
167 int res;
168
169 dent = smb_dget_fpos(ctl.cache->dentry[ctl.idx],
170 dentry, filp->f_pos);
171 if (!dent)
172 goto invalid_cache;
173
174 res = filldir(dirent, dent->d_name.name,
175 dent->d_name.len, filp->f_pos,
176 dent->d_inode->i_ino, DT_UNKNOWN);
177 dput(dent);
178 if (res)
179 goto finished;
180 filp->f_pos += 1;
181 ctl.idx += 1;
182 if (filp->f_pos > ctl.head.end)
183 goto finished;
184 }
185 if (ctl.page) {
186 kunmap(ctl.page);
187 SetPageUptodate(ctl.page);
188 unlock_page(ctl.page);
189 page_cache_release(ctl.page);
190 ctl.page = NULL;
191 }
192 ctl.idx = 0;
193 ctl.ofs += 1;
194 }
195invalid_cache:
196 if (ctl.page) {
197 kunmap(ctl.page);
198 unlock_page(ctl.page);
199 page_cache_release(ctl.page);
200 ctl.page = NULL;
201 }
202 ctl.cache = cache;
203init_cache:
204 smb_invalidate_dircache_entries(dentry);
205 ctl.head.time = jiffies;
206 ctl.head.eof = 0;
207 ctl.fpos = 2;
208 ctl.ofs = 0;
209 ctl.idx = SMB_DIRCACHE_START;
210 ctl.filled = 0;
211 ctl.valid = 1;
212read_really:
213 result = server->ops->readdir(filp, dirent, filldir, &ctl);
214 if (result == -ERESTARTSYS && page)
215 ClearPageUptodate(page);
216 if (ctl.idx == -1)
217 goto invalid_cache; /* retry */
218 ctl.head.end = ctl.fpos - 1;
219 ctl.head.eof = ctl.valid;
220finished:
221 if (page) {
222 cache->head = ctl.head;
223 kunmap(page);
224 if (result != -ERESTARTSYS)
225 SetPageUptodate(page);
226 unlock_page(page);
227 page_cache_release(page);
228 }
229 if (ctl.page) {
230 kunmap(ctl.page);
231 SetPageUptodate(ctl.page);
232 unlock_page(ctl.page);
233 page_cache_release(ctl.page);
234 }
235out:
236 unlock_kernel();
237 return result;
238}
239
240static int
241smb_dir_open(struct inode *dir, struct file *file)
242{
243 struct dentry *dentry = file->f_path.dentry;
244 struct smb_sb_info *server;
245 int error = 0;
246
247 VERBOSE("(%s/%s)\n", dentry->d_parent->d_name.name,
248 file->f_path.dentry->d_name.name);
249
250 /*
251 * Directory timestamps in the core protocol aren't updated
252 * when a file is added, so we give them a very short TTL.
253 */
254 lock_kernel();
255 server = server_from_dentry(dentry);
256 if (server->opt.protocol < SMB_PROTOCOL_LANMAN2) {
257 unsigned long age = jiffies - SMB_I(dir)->oldmtime;
258 if (age > 2*HZ)
259 smb_invalid_dir_cache(dir);
260 }
261
262 /*
263 * Note: in order to allow the smbmount process to open the
264 * mount point, we only revalidate if the connection is valid or
265 * if the process is trying to access something other than the root.
266 */
267 if (server->state == CONN_VALID || !IS_ROOT(dentry))
268 error = smb_revalidate_inode(dentry);
269 unlock_kernel();
270 return error;
271}
272
273/*
274 * Dentry operations routines
275 */
276static int smb_lookup_validate(struct dentry *, struct nameidata *);
277static int smb_hash_dentry(struct dentry *, struct qstr *);
278static int smb_compare_dentry(struct dentry *, struct qstr *, struct qstr *);
279static int smb_delete_dentry(struct dentry *);
280
281static const struct dentry_operations smbfs_dentry_operations =
282{
283 .d_revalidate = smb_lookup_validate,
284 .d_hash = smb_hash_dentry,
285 .d_compare = smb_compare_dentry,
286 .d_delete = smb_delete_dentry,
287};
288
289static const struct dentry_operations smbfs_dentry_operations_case =
290{
291 .d_revalidate = smb_lookup_validate,
292 .d_delete = smb_delete_dentry,
293};
294
295
296/*
297 * This is the callback when the dcache has a lookup hit.
298 */
299static int
300smb_lookup_validate(struct dentry * dentry, struct nameidata *nd)
301{
302 struct smb_sb_info *server = server_from_dentry(dentry);
303 struct inode * inode = dentry->d_inode;
304 unsigned long age = jiffies - dentry->d_time;
305 int valid;
306
307 /*
308 * The default validation is based on dentry age:
309 * we believe in dentries for a few seconds. (But each
310 * successful server lookup renews the timestamp.)
311 */
312 valid = (age <= SMB_MAX_AGE(server));
313#ifdef SMBFS_DEBUG_VERBOSE
314 if (!valid)
315 VERBOSE("%s/%s not valid, age=%lu\n",
316 DENTRY_PATH(dentry), age);
317#endif
318
319 if (inode) {
320 lock_kernel();
321 if (is_bad_inode(inode)) {
322 PARANOIA("%s/%s has dud inode\n", DENTRY_PATH(dentry));
323 valid = 0;
324 } else if (!valid)
325 valid = (smb_revalidate_inode(dentry) == 0);
326 unlock_kernel();
327 } else {
328 /*
329 * What should we do for negative dentries?
330 */
331 }
332 return valid;
333}
334
335static int
336smb_hash_dentry(struct dentry *dir, struct qstr *this)
337{
338 unsigned long hash;
339 int i;
340
341 hash = init_name_hash();
342 for (i=0; i < this->len ; i++)
343 hash = partial_name_hash(tolower(this->name[i]), hash);
344 this->hash = end_name_hash(hash);
345
346 return 0;
347}
348
349static int
350smb_compare_dentry(struct dentry *dir, struct qstr *a, struct qstr *b)
351{
352 int i, result = 1;
353
354 if (a->len != b->len)
355 goto out;
356 for (i=0; i < a->len; i++) {
357 if (tolower(a->name[i]) != tolower(b->name[i]))
358 goto out;
359 }
360 result = 0;
361out:
362 return result;
363}
364
365/*
366 * This is the callback from dput() when d_count is going to 0.
367 * We use this to unhash dentries with bad inodes.
368 */
369static int
370smb_delete_dentry(struct dentry * dentry)
371{
372 if (dentry->d_inode) {
373 if (is_bad_inode(dentry->d_inode)) {
374 PARANOIA("bad inode, unhashing %s/%s\n",
375 DENTRY_PATH(dentry));
376 return 1;
377 }
378 } else {
379 /* N.B. Unhash negative dentries? */
380 }
381 return 0;
382}
383
384/*
385 * Initialize a new dentry
386 */
387void
388smb_new_dentry(struct dentry *dentry)
389{
390 struct smb_sb_info *server = server_from_dentry(dentry);
391
392 if (server->mnt->flags & SMB_MOUNT_CASE)
393 dentry->d_op = &smbfs_dentry_operations_case;
394 else
395 dentry->d_op = &smbfs_dentry_operations;
396 dentry->d_time = jiffies;
397}
398
399
400/*
401 * Whenever a lookup succeeds, we know the parent directories
402 * are all valid, so we want to update the dentry timestamps.
403 * N.B. Move this to dcache?
404 */
405void
406smb_renew_times(struct dentry * dentry)
407{
408 dget(dentry);
409 dentry->d_time = jiffies;
410
411 while (!IS_ROOT(dentry)) {
412 struct dentry *parent = dget_parent(dentry);
413 dput(dentry);
414 dentry = parent;
415
416 dentry->d_time = jiffies;
417 }
418 dput(dentry);
419}
420
421static struct dentry *
422smb_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
423{
424 struct smb_fattr finfo;
425 struct inode *inode;
426 int error;
427 struct smb_sb_info *server;
428
429 error = -ENAMETOOLONG;
430 if (dentry->d_name.len > SMB_MAXNAMELEN)
431 goto out;
432
433 /* Do not allow lookup of names with backslashes in */
434 error = -EINVAL;
435 if (memchr(dentry->d_name.name, '\\', dentry->d_name.len))
436 goto out;
437
438 lock_kernel();
439 error = smb_proc_getattr(dentry, &finfo);
440#ifdef SMBFS_PARANOIA
441 if (error && error != -ENOENT)
442 PARANOIA("find %s/%s failed, error=%d\n",
443 DENTRY_PATH(dentry), error);
444#endif
445
446 inode = NULL;
447 if (error == -ENOENT)
448 goto add_entry;
449 if (!error) {
450 error = -EACCES;
451 finfo.f_ino = iunique(dentry->d_sb, 2);
452 inode = smb_iget(dir->i_sb, &finfo);
453 if (inode) {
454 add_entry:
455 server = server_from_dentry(dentry);
456 if (server->mnt->flags & SMB_MOUNT_CASE)
457 dentry->d_op = &smbfs_dentry_operations_case;
458 else
459 dentry->d_op = &smbfs_dentry_operations;
460
461 d_add(dentry, inode);
462 smb_renew_times(dentry);
463 error = 0;
464 }
465 }
466 unlock_kernel();
467out:
468 return ERR_PTR(error);
469}
470
471/*
472 * This code is common to all routines creating a new inode.
473 */
474static int
475smb_instantiate(struct dentry *dentry, __u16 fileid, int have_id)
476{
477 struct smb_sb_info *server = server_from_dentry(dentry);
478 struct inode *inode;
479 int error;
480 struct smb_fattr fattr;
481
482 VERBOSE("file %s/%s, fileid=%u\n", DENTRY_PATH(dentry), fileid);
483
484 error = smb_proc_getattr(dentry, &fattr);
485 if (error)
486 goto out_close;
487
488 smb_renew_times(dentry);
489 fattr.f_ino = iunique(dentry->d_sb, 2);
490 inode = smb_iget(dentry->d_sb, &fattr);
491 if (!inode)
492 goto out_no_inode;
493
494 if (have_id) {
495 struct smb_inode_info *ei = SMB_I(inode);
496 ei->fileid = fileid;
497 ei->access = SMB_O_RDWR;
498 ei->open = server->generation;
499 }
500 d_instantiate(dentry, inode);
501out:
502 return error;
503
504out_no_inode:
505 error = -EACCES;
506out_close:
507 if (have_id) {
508 PARANOIA("%s/%s failed, error=%d, closing %u\n",
509 DENTRY_PATH(dentry), error, fileid);
510 smb_close_fileid(dentry, fileid);
511 }
512 goto out;
513}
514
515/* N.B. How should the mode argument be used? */
516static int
517smb_create(struct inode *dir, struct dentry *dentry, int mode,
518 struct nameidata *nd)
519{
520 struct smb_sb_info *server = server_from_dentry(dentry);
521 __u16 fileid;
522 int error;
523 struct iattr attr;
524
525 VERBOSE("creating %s/%s, mode=%d\n", DENTRY_PATH(dentry), mode);
526
527 lock_kernel();
528 smb_invalid_dir_cache(dir);
529 error = smb_proc_create(dentry, 0, get_seconds(), &fileid);
530 if (!error) {
531 if (server->opt.capabilities & SMB_CAP_UNIX) {
532 /* Set attributes for new file */
533 attr.ia_valid = ATTR_MODE;
534 attr.ia_mode = mode;
535 error = smb_proc_setattr_unix(dentry, &attr, 0, 0);
536 }
537 error = smb_instantiate(dentry, fileid, 1);
538 } else {
539 PARANOIA("%s/%s failed, error=%d\n",
540 DENTRY_PATH(dentry), error);
541 }
542 unlock_kernel();
543 return error;
544}
545
546/* N.B. How should the mode argument be used? */
547static int
548smb_mkdir(struct inode *dir, struct dentry *dentry, int mode)
549{
550 struct smb_sb_info *server = server_from_dentry(dentry);
551 int error;
552 struct iattr attr;
553
554 lock_kernel();
555 smb_invalid_dir_cache(dir);
556 error = smb_proc_mkdir(dentry);
557 if (!error) {
558 if (server->opt.capabilities & SMB_CAP_UNIX) {
559 /* Set attributes for new directory */
560 attr.ia_valid = ATTR_MODE;
561 attr.ia_mode = mode;
562 error = smb_proc_setattr_unix(dentry, &attr, 0, 0);
563 }
564 error = smb_instantiate(dentry, 0, 0);
565 }
566 unlock_kernel();
567 return error;
568}
569
570static int
571smb_rmdir(struct inode *dir, struct dentry *dentry)
572{
573 struct inode *inode = dentry->d_inode;
574 int error;
575
576 /*
577 * Close the directory if it's open.
578 */
579 lock_kernel();
580 smb_close(inode);
581
582 /*
583 * Check that nobody else is using the directory..
584 */
585 error = -EBUSY;
586 if (!d_unhashed(dentry))
587 goto out;
588
589 smb_invalid_dir_cache(dir);
590 error = smb_proc_rmdir(dentry);
591
592out:
593 unlock_kernel();
594 return error;
595}
596
597static int
598smb_unlink(struct inode *dir, struct dentry *dentry)
599{
600 int error;
601
602 /*
603 * Close the file if it's open.
604 */
605 lock_kernel();
606 smb_close(dentry->d_inode);
607
608 smb_invalid_dir_cache(dir);
609 error = smb_proc_unlink(dentry);
610 if (!error)
611 smb_renew_times(dentry);
612 unlock_kernel();
613 return error;
614}
615
616static int
617smb_rename(struct inode *old_dir, struct dentry *old_dentry,
618 struct inode *new_dir, struct dentry *new_dentry)
619{
620 int error;
621
622 /*
623 * Close any open files, and check whether to delete the
624 * target before attempting the rename.
625 */
626 lock_kernel();
627 if (old_dentry->d_inode)
628 smb_close(old_dentry->d_inode);
629 if (new_dentry->d_inode) {
630 smb_close(new_dentry->d_inode);
631 error = smb_proc_unlink(new_dentry);
632 if (error) {
633 VERBOSE("unlink %s/%s, error=%d\n",
634 DENTRY_PATH(new_dentry), error);
635 goto out;
636 }
637 /* FIXME */
638 d_delete(new_dentry);
639 }
640
641 smb_invalid_dir_cache(old_dir);
642 smb_invalid_dir_cache(new_dir);
643 error = smb_proc_mv(old_dentry, new_dentry);
644 if (!error) {
645 smb_renew_times(old_dentry);
646 smb_renew_times(new_dentry);
647 }
648out:
649 unlock_kernel();
650 return error;
651}
652
653/*
654 * FIXME: samba servers won't let you create device nodes unless uid/gid
655 * matches the connection credentials (and we don't know which those are ...)
656 */
657static int
658smb_make_node(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
659{
660 int error;
661 struct iattr attr;
662
663 attr.ia_valid = ATTR_MODE | ATTR_UID | ATTR_GID;
664 attr.ia_mode = mode;
665 current_euid_egid(&attr.ia_uid, &attr.ia_gid);
666
667 if (!new_valid_dev(dev))
668 return -EINVAL;
669
670 smb_invalid_dir_cache(dir);
671 error = smb_proc_setattr_unix(dentry, &attr, MAJOR(dev), MINOR(dev));
672 if (!error) {
673 error = smb_instantiate(dentry, 0, 0);
674 }
675 return error;
676}
677
678/*
679 * dentry = existing file
680 * new_dentry = new file
681 */
682static int
683smb_link(struct dentry *dentry, struct inode *dir, struct dentry *new_dentry)
684{
685 int error;
686
687 DEBUG1("smb_link old=%s/%s new=%s/%s\n",
688 DENTRY_PATH(dentry), DENTRY_PATH(new_dentry));
689 smb_invalid_dir_cache(dir);
690 error = smb_proc_link(server_from_dentry(dentry), dentry, new_dentry);
691 if (!error) {
692 smb_renew_times(dentry);
693 error = smb_instantiate(new_dentry, 0, 0);
694 }
695 return error;
696}
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
deleted file mode 100644
index 8e187a0f94bb..000000000000
--- a/fs/smbfs/file.c
+++ /dev/null
@@ -1,454 +0,0 @@
1/*
2 * file.c
3 *
4 * Copyright (C) 1995, 1996, 1997 by Paal-Kr. Engstad and Volker Lendecke
5 * Copyright (C) 1997 by Volker Lendecke
6 *
7 * Please add a note about your changes to smbfs in the ChangeLog file.
8 */
9
10#include <linux/time.h>
11#include <linux/kernel.h>
12#include <linux/errno.h>
13#include <linux/fcntl.h>
14#include <linux/stat.h>
15#include <linux/mm.h>
16#include <linux/pagemap.h>
17#include <linux/smp_lock.h>
18#include <linux/net.h>
19#include <linux/aio.h>
20
21#include <asm/uaccess.h>
22#include <asm/system.h>
23
24#include <linux/smbno.h>
25#include <linux/smb_fs.h>
26
27#include "smb_debug.h"
28#include "proto.h"
29
30static int
31smb_fsync(struct file *file, int datasync)
32{
33 struct dentry *dentry = file->f_path.dentry;
34 struct smb_sb_info *server = server_from_dentry(dentry);
35 int result;
36
37 VERBOSE("sync file %s/%s\n", DENTRY_PATH(dentry));
38
39 /*
40 * The VFS will writepage() all dirty pages for us, but we
41 * should send a SMBflush to the server, letting it know that
42 * we want things synchronized with actual storage.
43 *
44 * Note: this function requires all pages to have been written already
45 * (should be ok with writepage_sync)
46 */
47 result = smb_proc_flush(server, SMB_I(dentry->d_inode)->fileid);
48 return result;
49}
50
51/*
52 * Read a page synchronously.
53 */
54static int
55smb_readpage_sync(struct dentry *dentry, struct page *page)
56{
57 char *buffer = kmap(page);
58 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
59 struct smb_sb_info *server = server_from_dentry(dentry);
60 unsigned int rsize = smb_get_rsize(server);
61 int count = PAGE_SIZE;
62 int result;
63
64 VERBOSE("file %s/%s, count=%d@%Ld, rsize=%d\n",
65 DENTRY_PATH(dentry), count, offset, rsize);
66
67 result = smb_open(dentry, SMB_O_RDONLY);
68 if (result < 0)
69 goto io_error;
70
71 do {
72 if (count < rsize)
73 rsize = count;
74
75 result = server->ops->read(dentry->d_inode,offset,rsize,buffer);
76 if (result < 0)
77 goto io_error;
78
79 count -= result;
80 offset += result;
81 buffer += result;
82 dentry->d_inode->i_atime =
83 current_fs_time(dentry->d_inode->i_sb);
84 if (result < rsize)
85 break;
86 } while (count);
87
88 memset(buffer, 0, count);
89 flush_dcache_page(page);
90 SetPageUptodate(page);
91 result = 0;
92
93io_error:
94 kunmap(page);
95 unlock_page(page);
96 return result;
97}
98
99/*
100 * We are called with the page locked and we unlock it when done.
101 */
102static int
103smb_readpage(struct file *file, struct page *page)
104{
105 int error;
106 struct dentry *dentry = file->f_path.dentry;
107
108 page_cache_get(page);
109 error = smb_readpage_sync(dentry, page);
110 page_cache_release(page);
111 return error;
112}
113
114/*
115 * Write a page synchronously.
116 * Offset is the data offset within the page.
117 */
118static int
119smb_writepage_sync(struct inode *inode, struct page *page,
120 unsigned long pageoffset, unsigned int count)
121{
122 loff_t offset;
123 char *buffer = kmap(page) + pageoffset;
124 struct smb_sb_info *server = server_from_inode(inode);
125 unsigned int wsize = smb_get_wsize(server);
126 int ret = 0;
127
128 offset = ((loff_t)page->index << PAGE_CACHE_SHIFT) + pageoffset;
129 VERBOSE("file ino=%ld, fileid=%d, count=%d@%Ld, wsize=%d\n",
130 inode->i_ino, SMB_I(inode)->fileid, count, offset, wsize);
131
132 do {
133 int write_ret;
134
135 if (count < wsize)
136 wsize = count;
137
138 write_ret = server->ops->write(inode, offset, wsize, buffer);
139 if (write_ret < 0) {
140 PARANOIA("failed write, wsize=%d, write_ret=%d\n",
141 wsize, write_ret);
142 ret = write_ret;
143 break;
144 }
145 /* N.B. what if result < wsize?? */
146#ifdef SMBFS_PARANOIA
147 if (write_ret < wsize)
148 PARANOIA("short write, wsize=%d, write_ret=%d\n",
149 wsize, write_ret);
150#endif
151 buffer += wsize;
152 offset += wsize;
153 count -= wsize;
154 /*
155 * Update the inode now rather than waiting for a refresh.
156 */
157 inode->i_mtime = inode->i_atime = current_fs_time(inode->i_sb);
158 SMB_I(inode)->flags |= SMB_F_LOCALWRITE;
159 if (offset > inode->i_size)
160 inode->i_size = offset;
161 } while (count);
162
163 kunmap(page);
164 return ret;
165}
166
167/*
168 * Write a page to the server. This will be used for NFS swapping only
169 * (for now), and we currently do this synchronously only.
170 *
171 * We are called with the page locked and we unlock it when done.
172 */
173static int
174smb_writepage(struct page *page, struct writeback_control *wbc)
175{
176 struct address_space *mapping = page->mapping;
177 struct inode *inode;
178 unsigned long end_index;
179 unsigned offset = PAGE_CACHE_SIZE;
180 int err;
181
182 BUG_ON(!mapping);
183 inode = mapping->host;
184 BUG_ON(!inode);
185
186 end_index = inode->i_size >> PAGE_CACHE_SHIFT;
187
188 /* easy case */
189 if (page->index < end_index)
190 goto do_it;
191 /* things got complicated... */
192 offset = inode->i_size & (PAGE_CACHE_SIZE-1);
193 /* OK, are we completely out? */
194 if (page->index >= end_index+1 || !offset)
195 return 0; /* truncated - don't care */
196do_it:
197 page_cache_get(page);
198 err = smb_writepage_sync(inode, page, 0, offset);
199 SetPageUptodate(page);
200 unlock_page(page);
201 page_cache_release(page);
202 return err;
203}
204
205static int
206smb_updatepage(struct file *file, struct page *page, unsigned long offset,
207 unsigned int count)
208{
209 struct dentry *dentry = file->f_path.dentry;
210
211 DEBUG1("(%s/%s %d@%lld)\n", DENTRY_PATH(dentry), count,
212 ((unsigned long long)page->index << PAGE_CACHE_SHIFT) + offset);
213
214 return smb_writepage_sync(dentry->d_inode, page, offset, count);
215}
216
217static ssize_t
218smb_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
219 unsigned long nr_segs, loff_t pos)
220{
221 struct file * file = iocb->ki_filp;
222 struct dentry * dentry = file->f_path.dentry;
223 ssize_t status;
224
225 VERBOSE("file %s/%s, count=%lu@%lu\n", DENTRY_PATH(dentry),
226 (unsigned long) iocb->ki_left, (unsigned long) pos);
227
228 status = smb_revalidate_inode(dentry);
229 if (status) {
230 PARANOIA("%s/%s validation failed, error=%Zd\n",
231 DENTRY_PATH(dentry), status);
232 goto out;
233 }
234
235 VERBOSE("before read, size=%ld, flags=%x, atime=%ld\n",
236 (long)dentry->d_inode->i_size,
237 dentry->d_inode->i_flags, dentry->d_inode->i_atime.tv_sec);
238
239 status = generic_file_aio_read(iocb, iov, nr_segs, pos);
240out:
241 return status;
242}
243
244static int
245smb_file_mmap(struct file * file, struct vm_area_struct * vma)
246{
247 struct dentry * dentry = file->f_path.dentry;
248 int status;
249
250 VERBOSE("file %s/%s, address %lu - %lu\n",
251 DENTRY_PATH(dentry), vma->vm_start, vma->vm_end);
252
253 status = smb_revalidate_inode(dentry);
254 if (status) {
255 PARANOIA("%s/%s validation failed, error=%d\n",
256 DENTRY_PATH(dentry), status);
257 goto out;
258 }
259 status = generic_file_mmap(file, vma);
260out:
261 return status;
262}
263
264static ssize_t
265smb_file_splice_read(struct file *file, loff_t *ppos,
266 struct pipe_inode_info *pipe, size_t count,
267 unsigned int flags)
268{
269 struct dentry *dentry = file->f_path.dentry;
270 ssize_t status;
271
272 VERBOSE("file %s/%s, pos=%Ld, count=%lu\n",
273 DENTRY_PATH(dentry), *ppos, count);
274
275 status = smb_revalidate_inode(dentry);
276 if (status) {
277 PARANOIA("%s/%s validation failed, error=%Zd\n",
278 DENTRY_PATH(dentry), status);
279 goto out;
280 }
281 status = generic_file_splice_read(file, ppos, pipe, count, flags);
282out:
283 return status;
284}
285
286/*
287 * This does the "real" work of the write. The generic routine has
288 * allocated the page, locked it, done all the page alignment stuff
289 * calculations etc. Now we should just copy the data from user
290 * space and write it back to the real medium..
291 *
292 * If the writer ends up delaying the write, the writer needs to
293 * increment the page use counts until he is done with the page.
294 */
295static int smb_write_begin(struct file *file, struct address_space *mapping,
296 loff_t pos, unsigned len, unsigned flags,
297 struct page **pagep, void **fsdata)
298{
299 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
300 *pagep = grab_cache_page_write_begin(mapping, index, flags);
301 if (!*pagep)
302 return -ENOMEM;
303 return 0;
304}
305
306static int smb_write_end(struct file *file, struct address_space *mapping,
307 loff_t pos, unsigned len, unsigned copied,
308 struct page *page, void *fsdata)
309{
310 int status;
311 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
312
313 lock_kernel();
314 status = smb_updatepage(file, page, offset, copied);
315 unlock_kernel();
316
317 if (!status) {
318 if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
319 SetPageUptodate(page);
320 status = copied;
321 }
322
323 unlock_page(page);
324 page_cache_release(page);
325
326 return status;
327}
328
329const struct address_space_operations smb_file_aops = {
330 .readpage = smb_readpage,
331 .writepage = smb_writepage,
332 .write_begin = smb_write_begin,
333 .write_end = smb_write_end,
334};
335
336/*
337 * Write to a file (through the page cache).
338 */
339static ssize_t
340smb_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
341 unsigned long nr_segs, loff_t pos)
342{
343 struct file * file = iocb->ki_filp;
344 struct dentry * dentry = file->f_path.dentry;
345 ssize_t result;
346
347 VERBOSE("file %s/%s, count=%lu@%lu\n",
348 DENTRY_PATH(dentry),
349 (unsigned long) iocb->ki_left, (unsigned long) pos);
350
351 result = smb_revalidate_inode(dentry);
352 if (result) {
353 PARANOIA("%s/%s validation failed, error=%Zd\n",
354 DENTRY_PATH(dentry), result);
355 goto out;
356 }
357
358 result = smb_open(dentry, SMB_O_WRONLY);
359 if (result)
360 goto out;
361
362 if (iocb->ki_left > 0) {
363 result = generic_file_aio_write(iocb, iov, nr_segs, pos);
364 VERBOSE("pos=%ld, size=%ld, mtime=%ld, atime=%ld\n",
365 (long) file->f_pos, (long) dentry->d_inode->i_size,
366 dentry->d_inode->i_mtime.tv_sec,
367 dentry->d_inode->i_atime.tv_sec);
368 }
369out:
370 return result;
371}
372
373static int
374smb_file_open(struct inode *inode, struct file * file)
375{
376 int result;
377 struct dentry *dentry = file->f_path.dentry;
378 int smb_mode = (file->f_mode & O_ACCMODE) - 1;
379
380 lock_kernel();
381 result = smb_open(dentry, smb_mode);
382 if (result)
383 goto out;
384 SMB_I(inode)->openers++;
385out:
386 unlock_kernel();
387 return result;
388}
389
390static int
391smb_file_release(struct inode *inode, struct file * file)
392{
393 lock_kernel();
394 if (!--SMB_I(inode)->openers) {
395 /* We must flush any dirty pages now as we won't be able to
396 write anything after close. mmap can trigger this.
397 "openers" should perhaps include mmap'ers ... */
398 filemap_write_and_wait(inode->i_mapping);
399 smb_close(inode);
400 }
401 unlock_kernel();
402 return 0;
403}
404
405/*
406 * Check whether the required access is compatible with
407 * an inode's permission. SMB doesn't recognize superuser
408 * privileges, so we need our own check for this.
409 */
410static int
411smb_file_permission(struct inode *inode, int mask)
412{
413 int mode = inode->i_mode;
414 int error = 0;
415
416 VERBOSE("mode=%x, mask=%x\n", mode, mask);
417
418 /* Look at user permissions */
419 mode >>= 6;
420 if (mask & ~mode & (MAY_READ | MAY_WRITE | MAY_EXEC))
421 error = -EACCES;
422 return error;
423}
424
425static loff_t smb_remote_llseek(struct file *file, loff_t offset, int origin)
426{
427 loff_t ret;
428 lock_kernel();
429 ret = generic_file_llseek_unlocked(file, offset, origin);
430 unlock_kernel();
431 return ret;
432}
433
434const struct file_operations smb_file_operations =
435{
436 .llseek = smb_remote_llseek,
437 .read = do_sync_read,
438 .aio_read = smb_file_aio_read,
439 .write = do_sync_write,
440 .aio_write = smb_file_aio_write,
441 .unlocked_ioctl = smb_ioctl,
442 .mmap = smb_file_mmap,
443 .open = smb_file_open,
444 .release = smb_file_release,
445 .fsync = smb_fsync,
446 .splice_read = smb_file_splice_read,
447};
448
449const struct inode_operations smb_file_inode_operations =
450{
451 .permission = smb_file_permission,
452 .getattr = smb_getattr,
453 .setattr = smb_notify_change,
454};
diff --git a/fs/smbfs/getopt.c b/fs/smbfs/getopt.c
deleted file mode 100644
index 7ae0f5273ab1..000000000000
--- a/fs/smbfs/getopt.c
+++ /dev/null
@@ -1,64 +0,0 @@
1/*
2 * getopt.c
3 */
4
5#include <linux/kernel.h>
6#include <linux/string.h>
7#include <linux/net.h>
8
9#include "getopt.h"
10
11/**
12 * smb_getopt - option parser
13 * @caller: name of the caller, for error messages
14 * @options: the options string
15 * @opts: an array of &struct option entries controlling parser operations
16 * @optopt: output; will contain the current option
17 * @optarg: output; will contain the value (if one exists)
18 * @flag: output; may be NULL; should point to a long for or'ing flags
19 * @value: output; may be NULL; will be overwritten with the integer value
20 * of the current argument.
21 *
22 * Helper to parse options on the format used by mount ("a=b,c=d,e,f").
23 * Returns opts->val if a matching entry in the 'opts' array is found,
24 * 0 when no more tokens are found, -1 if an error is encountered.
25 */
26int smb_getopt(char *caller, char **options, struct option *opts,
27 char **optopt, char **optarg, unsigned long *flag,
28 unsigned long *value)
29{
30 char *token;
31 char *val;
32 int i;
33
34 do {
35 if ((token = strsep(options, ",")) == NULL)
36 return 0;
37 } while (*token == '\0');
38 *optopt = token;
39
40 *optarg = NULL;
41 if ((val = strchr (token, '=')) != NULL) {
42 *val++ = 0;
43 if (value)
44 *value = simple_strtoul(val, NULL, 0);
45 *optarg = val;
46 }
47
48 for (i = 0; opts[i].name != NULL; i++) {
49 if (!strcmp(opts[i].name, token)) {
50 if (!opts[i].flag && (!val || !*val)) {
51 printk("%s: the %s option requires an argument\n",
52 caller, token);
53 return -1;
54 }
55
56 if (flag && opts[i].flag)
57 *flag |= opts[i].flag;
58
59 return opts[i].val;
60 }
61 }
62 printk("%s: Unrecognized mount option %s\n", caller, token);
63 return -1;
64}
diff --git a/fs/smbfs/getopt.h b/fs/smbfs/getopt.h
deleted file mode 100644
index 146219ac7c46..000000000000
--- a/fs/smbfs/getopt.h
+++ /dev/null
@@ -1,14 +0,0 @@
1#ifndef _LINUX_GETOPT_H
2#define _LINUX_GETOPT_H
3
4struct option {
5 const char *name;
6 unsigned long flag;
7 int val;
8};
9
10extern int smb_getopt(char *caller, char **options, struct option *opts,
11 char **optopt, char **optarg, unsigned long *flag,
12 unsigned long *value);
13
14#endif /* _LINUX_GETOPT_H */
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
deleted file mode 100644
index f6e9ee59757e..000000000000
--- a/fs/smbfs/inode.c
+++ /dev/null
@@ -1,843 +0,0 @@
1/*
2 * inode.c
3 *
4 * Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
5 * Copyright (C) 1997 by Volker Lendecke
6 *
7 * Please add a note about your changes to smbfs in the ChangeLog file.
8 */
9
10#include <linux/module.h>
11#include <linux/time.h>
12#include <linux/kernel.h>
13#include <linux/mm.h>
14#include <linux/string.h>
15#include <linux/stat.h>
16#include <linux/errno.h>
17#include <linux/slab.h>
18#include <linux/init.h>
19#include <linux/file.h>
20#include <linux/dcache.h>
21#include <linux/smp_lock.h>
22#include <linux/nls.h>
23#include <linux/seq_file.h>
24#include <linux/mount.h>
25#include <linux/net.h>
26#include <linux/vfs.h>
27#include <linux/highuid.h>
28#include <linux/sched.h>
29#include <linux/smb_fs.h>
30#include <linux/smbno.h>
31#include <linux/smb_mount.h>
32
33#include <asm/system.h>
34#include <asm/uaccess.h>
35
36#include "smb_debug.h"
37#include "getopt.h"
38#include "proto.h"
39
40/* Always pick a default string */
41#ifdef CONFIG_SMB_NLS_REMOTE
42#define SMB_NLS_REMOTE CONFIG_SMB_NLS_REMOTE
43#else
44#define SMB_NLS_REMOTE ""
45#endif
46
47#define SMB_TTL_DEFAULT 1000
48
49static void smb_evict_inode(struct inode *);
50static void smb_put_super(struct super_block *);
51static int smb_statfs(struct dentry *, struct kstatfs *);
52static int smb_show_options(struct seq_file *, struct vfsmount *);
53
54static struct kmem_cache *smb_inode_cachep;
55
56static struct inode *smb_alloc_inode(struct super_block *sb)
57{
58 struct smb_inode_info *ei;
59 ei = (struct smb_inode_info *)kmem_cache_alloc(smb_inode_cachep, GFP_KERNEL);
60 if (!ei)
61 return NULL;
62 return &ei->vfs_inode;
63}
64
65static void smb_destroy_inode(struct inode *inode)
66{
67 kmem_cache_free(smb_inode_cachep, SMB_I(inode));
68}
69
70static void init_once(void *foo)
71{
72 struct smb_inode_info *ei = (struct smb_inode_info *) foo;
73
74 inode_init_once(&ei->vfs_inode);
75}
76
77static int init_inodecache(void)
78{
79 smb_inode_cachep = kmem_cache_create("smb_inode_cache",
80 sizeof(struct smb_inode_info),
81 0, (SLAB_RECLAIM_ACCOUNT|
82 SLAB_MEM_SPREAD),
83 init_once);
84 if (smb_inode_cachep == NULL)
85 return -ENOMEM;
86 return 0;
87}
88
89static void destroy_inodecache(void)
90{
91 kmem_cache_destroy(smb_inode_cachep);
92}
93
94static int smb_remount(struct super_block *sb, int *flags, char *data)
95{
96 *flags |= MS_NODIRATIME;
97 return 0;
98}
99
100static const struct super_operations smb_sops =
101{
102 .alloc_inode = smb_alloc_inode,
103 .destroy_inode = smb_destroy_inode,
104 .drop_inode = generic_delete_inode,
105 .evict_inode = smb_evict_inode,
106 .put_super = smb_put_super,
107 .statfs = smb_statfs,
108 .show_options = smb_show_options,
109 .remount_fs = smb_remount,
110};
111
112
113/* We are always generating a new inode here */
114struct inode *
115smb_iget(struct super_block *sb, struct smb_fattr *fattr)
116{
117 struct smb_sb_info *server = SMB_SB(sb);
118 struct inode *result;
119
120 DEBUG1("smb_iget: %p\n", fattr);
121
122 result = new_inode(sb);
123 if (!result)
124 return result;
125 result->i_ino = fattr->f_ino;
126 SMB_I(result)->open = 0;
127 SMB_I(result)->fileid = 0;
128 SMB_I(result)->access = 0;
129 SMB_I(result)->flags = 0;
130 SMB_I(result)->closed = 0;
131 SMB_I(result)->openers = 0;
132 smb_set_inode_attr(result, fattr);
133 if (S_ISREG(result->i_mode)) {
134 result->i_op = &smb_file_inode_operations;
135 result->i_fop = &smb_file_operations;
136 result->i_data.a_ops = &smb_file_aops;
137 } else if (S_ISDIR(result->i_mode)) {
138 if (server->opt.capabilities & SMB_CAP_UNIX)
139 result->i_op = &smb_dir_inode_operations_unix;
140 else
141 result->i_op = &smb_dir_inode_operations;
142 result->i_fop = &smb_dir_operations;
143 } else if (S_ISLNK(result->i_mode)) {
144 result->i_op = &smb_link_inode_operations;
145 } else {
146 init_special_inode(result, result->i_mode, fattr->f_rdev);
147 }
148 insert_inode_hash(result);
149 return result;
150}
151
152/*
153 * Copy the inode data to a smb_fattr structure.
154 */
155void
156smb_get_inode_attr(struct inode *inode, struct smb_fattr *fattr)
157{
158 memset(fattr, 0, sizeof(struct smb_fattr));
159 fattr->f_mode = inode->i_mode;
160 fattr->f_nlink = inode->i_nlink;
161 fattr->f_ino = inode->i_ino;
162 fattr->f_uid = inode->i_uid;
163 fattr->f_gid = inode->i_gid;
164 fattr->f_size = inode->i_size;
165 fattr->f_mtime = inode->i_mtime;
166 fattr->f_ctime = inode->i_ctime;
167 fattr->f_atime = inode->i_atime;
168 fattr->f_blocks = inode->i_blocks;
169
170 fattr->attr = SMB_I(inode)->attr;
171 /*
172 * Keep the attributes in sync with the inode permissions.
173 */
174 if (fattr->f_mode & S_IWUSR)
175 fattr->attr &= ~aRONLY;
176 else
177 fattr->attr |= aRONLY;
178}
179
180/*
181 * Update the inode, possibly causing it to invalidate its pages if mtime/size
182 * is different from last time.
183 */
184void
185smb_set_inode_attr(struct inode *inode, struct smb_fattr *fattr)
186{
187 struct smb_inode_info *ei = SMB_I(inode);
188
189 /*
190 * A size change should have a different mtime, or same mtime
191 * but different size.
192 */
193 time_t last_time = inode->i_mtime.tv_sec;
194 loff_t last_sz = inode->i_size;
195
196 inode->i_mode = fattr->f_mode;
197 inode->i_nlink = fattr->f_nlink;
198 inode->i_uid = fattr->f_uid;
199 inode->i_gid = fattr->f_gid;
200 inode->i_ctime = fattr->f_ctime;
201 inode->i_blocks = fattr->f_blocks;
202 inode->i_size = fattr->f_size;
203 inode->i_mtime = fattr->f_mtime;
204 inode->i_atime = fattr->f_atime;
205 ei->attr = fattr->attr;
206
207 /*
208 * Update the "last time refreshed" field for revalidation.
209 */
210 ei->oldmtime = jiffies;
211
212 if (inode->i_mtime.tv_sec != last_time || inode->i_size != last_sz) {
213 VERBOSE("%ld changed, old=%ld, new=%ld, oz=%ld, nz=%ld\n",
214 inode->i_ino,
215 (long) last_time, (long) inode->i_mtime.tv_sec,
216 (long) last_sz, (long) inode->i_size);
217
218 if (!S_ISDIR(inode->i_mode))
219 invalidate_remote_inode(inode);
220 }
221}
222
223/*
224 * This is called if the connection has gone bad ...
225 * try to kill off all the current inodes.
226 */
227void
228smb_invalidate_inodes(struct smb_sb_info *server)
229{
230 VERBOSE("\n");
231 shrink_dcache_sb(SB_of(server));
232}
233
234/*
235 * This is called to update the inode attributes after
236 * we've made changes to a file or directory.
237 */
238static int
239smb_refresh_inode(struct dentry *dentry)
240{
241 struct inode *inode = dentry->d_inode;
242 int error;
243 struct smb_fattr fattr;
244
245 error = smb_proc_getattr(dentry, &fattr);
246 if (!error) {
247 smb_renew_times(dentry);
248 /*
249 * Check whether the type part of the mode changed,
250 * and don't update the attributes if it did.
251 *
252 * And don't dick with the root inode
253 */
254 if (inode->i_ino == 2)
255 return error;
256 if (S_ISLNK(inode->i_mode))
257 return error; /* VFS will deal with it */
258
259 if ((inode->i_mode & S_IFMT) == (fattr.f_mode & S_IFMT)) {
260 smb_set_inode_attr(inode, &fattr);
261 } else {
262 /*
263 * Big trouble! The inode has become a new object,
264 * so any operations attempted on it are invalid.
265 *
266 * To limit damage, mark the inode as bad so that
267 * subsequent lookup validations will fail.
268 */
269 PARANOIA("%s/%s changed mode, %07o to %07o\n",
270 DENTRY_PATH(dentry),
271 inode->i_mode, fattr.f_mode);
272
273 fattr.f_mode = inode->i_mode; /* save mode */
274 make_bad_inode(inode);
275 inode->i_mode = fattr.f_mode; /* restore mode */
276 /*
277 * No need to worry about unhashing the dentry: the
278 * lookup validation will see that the inode is bad.
279 * But we do want to invalidate the caches ...
280 */
281 if (!S_ISDIR(inode->i_mode))
282 invalidate_remote_inode(inode);
283 else
284 smb_invalid_dir_cache(inode);
285 error = -EIO;
286 }
287 }
288 return error;
289}
290
291/*
292 * This is called when we want to check whether the inode
293 * has changed on the server. If it has changed, we must
294 * invalidate our local caches.
295 */
296int
297smb_revalidate_inode(struct dentry *dentry)
298{
299 struct smb_sb_info *s = server_from_dentry(dentry);
300 struct inode *inode = dentry->d_inode;
301 int error = 0;
302
303 DEBUG1("smb_revalidate_inode\n");
304 lock_kernel();
305
306 /*
307 * Check whether we've recently refreshed the inode.
308 */
309 if (time_before(jiffies, SMB_I(inode)->oldmtime + SMB_MAX_AGE(s))) {
310 VERBOSE("up-to-date, ino=%ld, jiffies=%lu, oldtime=%lu\n",
311 inode->i_ino, jiffies, SMB_I(inode)->oldmtime);
312 goto out;
313 }
314
315 error = smb_refresh_inode(dentry);
316out:
317 unlock_kernel();
318 return error;
319}
320
321/*
322 * This routine is called when i_nlink == 0 and i_count goes to 0.
323 * All blocking cleanup operations need to go here to avoid races.
324 */
325static void
326smb_evict_inode(struct inode *ino)
327{
328 DEBUG1("ino=%ld\n", ino->i_ino);
329 truncate_inode_pages(&ino->i_data, 0);
330 end_writeback(ino);
331 lock_kernel();
332 if (smb_close(ino))
333 PARANOIA("could not close inode %ld\n", ino->i_ino);
334 unlock_kernel();
335}
336
337static struct option opts[] = {
338 { "version", 0, 'v' },
339 { "win95", SMB_MOUNT_WIN95, 1 },
340 { "oldattr", SMB_MOUNT_OLDATTR, 1 },
341 { "dirattr", SMB_MOUNT_DIRATTR, 1 },
342 { "case", SMB_MOUNT_CASE, 1 },
343 { "uid", 0, 'u' },
344 { "gid", 0, 'g' },
345 { "file_mode", 0, 'f' },
346 { "dir_mode", 0, 'd' },
347 { "iocharset", 0, 'i' },
348 { "codepage", 0, 'c' },
349 { "ttl", 0, 't' },
350 { NULL, 0, 0}
351};
352
353static int
354parse_options(struct smb_mount_data_kernel *mnt, char *options)
355{
356 int c;
357 unsigned long flags;
358 unsigned long value;
359 char *optarg;
360 char *optopt;
361
362 flags = 0;
363 while ( (c = smb_getopt("smbfs", &options, opts,
364 &optopt, &optarg, &flags, &value)) > 0) {
365
366 VERBOSE("'%s' -> '%s'\n", optopt, optarg ? optarg : "<none>");
367 switch (c) {
368 case 1:
369 /* got a "flag" option */
370 break;
371 case 'v':
372 if (value != SMB_MOUNT_VERSION) {
373 printk ("smbfs: Bad mount version %ld, expected %d\n",
374 value, SMB_MOUNT_VERSION);
375 return 0;
376 }
377 mnt->version = value;
378 break;
379 case 'u':
380 mnt->uid = value;
381 flags |= SMB_MOUNT_UID;
382 break;
383 case 'g':
384 mnt->gid = value;
385 flags |= SMB_MOUNT_GID;
386 break;
387 case 'f':
388 mnt->file_mode = (value & S_IRWXUGO) | S_IFREG;
389 flags |= SMB_MOUNT_FMODE;
390 break;
391 case 'd':
392 mnt->dir_mode = (value & S_IRWXUGO) | S_IFDIR;
393 flags |= SMB_MOUNT_DMODE;
394 break;
395 case 'i':
396 strlcpy(mnt->codepage.local_name, optarg,
397 SMB_NLS_MAXNAMELEN);
398 break;
399 case 'c':
400 strlcpy(mnt->codepage.remote_name, optarg,
401 SMB_NLS_MAXNAMELEN);
402 break;
403 case 't':
404 mnt->ttl = value;
405 break;
406 default:
407 printk ("smbfs: Unrecognized mount option %s\n",
408 optopt);
409 return -1;
410 }
411 }
412 mnt->flags = flags;
413 return c;
414}
415
416/*
417 * smb_show_options() is for displaying mount options in /proc/mounts.
418 * It tries to avoid showing settings that were not changed from their
419 * defaults.
420 */
421static int
422smb_show_options(struct seq_file *s, struct vfsmount *m)
423{
424 struct smb_mount_data_kernel *mnt = SMB_SB(m->mnt_sb)->mnt;
425 int i;
426
427 for (i = 0; opts[i].name != NULL; i++)
428 if (mnt->flags & opts[i].flag)
429 seq_printf(s, ",%s", opts[i].name);
430
431 if (mnt->flags & SMB_MOUNT_UID)
432 seq_printf(s, ",uid=%d", mnt->uid);
433 if (mnt->flags & SMB_MOUNT_GID)
434 seq_printf(s, ",gid=%d", mnt->gid);
435 if (mnt->mounted_uid != 0)
436 seq_printf(s, ",mounted_uid=%d", mnt->mounted_uid);
437
438 /*
439 * Defaults for file_mode and dir_mode are unknown to us; they
440 * depend on the current umask of the user doing the mount.
441 */
442 if (mnt->flags & SMB_MOUNT_FMODE)
443 seq_printf(s, ",file_mode=%04o", mnt->file_mode & S_IRWXUGO);
444 if (mnt->flags & SMB_MOUNT_DMODE)
445 seq_printf(s, ",dir_mode=%04o", mnt->dir_mode & S_IRWXUGO);
446
447 if (strcmp(mnt->codepage.local_name, CONFIG_NLS_DEFAULT))
448 seq_printf(s, ",iocharset=%s", mnt->codepage.local_name);
449 if (strcmp(mnt->codepage.remote_name, SMB_NLS_REMOTE))
450 seq_printf(s, ",codepage=%s", mnt->codepage.remote_name);
451
452 if (mnt->ttl != SMB_TTL_DEFAULT)
453 seq_printf(s, ",ttl=%d", mnt->ttl);
454
455 return 0;
456}
457
458static void
459smb_unload_nls(struct smb_sb_info *server)
460{
461 unload_nls(server->remote_nls);
462 unload_nls(server->local_nls);
463}
464
465static void
466smb_put_super(struct super_block *sb)
467{
468 struct smb_sb_info *server = SMB_SB(sb);
469
470 lock_kernel();
471
472 smb_lock_server(server);
473 server->state = CONN_INVALID;
474 smbiod_unregister_server(server);
475
476 smb_close_socket(server);
477
478 if (server->conn_pid)
479 kill_pid(server->conn_pid, SIGTERM, 1);
480
481 bdi_destroy(&server->bdi);
482 kfree(server->ops);
483 smb_unload_nls(server);
484 sb->s_fs_info = NULL;
485 smb_unlock_server(server);
486 put_pid(server->conn_pid);
487 kfree(server);
488
489 unlock_kernel();
490}
491
492static int smb_fill_super(struct super_block *sb, void *raw_data, int silent)
493{
494 struct smb_sb_info *server;
495 struct smb_mount_data_kernel *mnt;
496 struct smb_mount_data *oldmnt;
497 struct inode *root_inode;
498 struct smb_fattr root;
499 int ver;
500 void *mem;
501 static int warn_count;
502
503 lock_kernel();
504
505 if (warn_count < 5) {
506 warn_count++;
507 printk(KERN_EMERG "smbfs is deprecated and will be removed"
508 " from the 2.6.27 kernel. Please migrate to cifs\n");
509 }
510
511 if (!raw_data)
512 goto out_no_data;
513
514 oldmnt = (struct smb_mount_data *) raw_data;
515 ver = oldmnt->version;
516 if (ver != SMB_MOUNT_OLDVERSION && cpu_to_be32(ver) != SMB_MOUNT_ASCII)
517 goto out_wrong_data;
518
519 sb->s_flags |= MS_NODIRATIME;
520 sb->s_blocksize = 1024; /* Eh... Is this correct? */
521 sb->s_blocksize_bits = 10;
522 sb->s_magic = SMB_SUPER_MAGIC;
523 sb->s_op = &smb_sops;
524 sb->s_time_gran = 100;
525
526 server = kzalloc(sizeof(struct smb_sb_info), GFP_KERNEL);
527 if (!server)
528 goto out_no_server;
529 sb->s_fs_info = server;
530
531 if (bdi_setup_and_register(&server->bdi, "smbfs", BDI_CAP_MAP_COPY))
532 goto out_bdi;
533
534 sb->s_bdi = &server->bdi;
535
536 server->super_block = sb;
537 server->mnt = NULL;
538 server->sock_file = NULL;
539 init_waitqueue_head(&server->conn_wq);
540 init_MUTEX(&server->sem);
541 INIT_LIST_HEAD(&server->entry);
542 INIT_LIST_HEAD(&server->xmitq);
543 INIT_LIST_HEAD(&server->recvq);
544 server->conn_error = 0;
545 server->conn_pid = NULL;
546 server->state = CONN_INVALID; /* no connection yet */
547 server->generation = 0;
548
549 /* Allocate the global temp buffer and some superblock helper structs */
550 /* FIXME: move these to the smb_sb_info struct */
551 VERBOSE("alloc chunk = %lu\n", sizeof(struct smb_ops) +
552 sizeof(struct smb_mount_data_kernel));
553 mem = kmalloc(sizeof(struct smb_ops) +
554 sizeof(struct smb_mount_data_kernel), GFP_KERNEL);
555 if (!mem)
556 goto out_no_mem;
557
558 server->ops = mem;
559 smb_install_null_ops(server->ops);
560 server->mnt = mem + sizeof(struct smb_ops);
561
562 /* Setup NLS stuff */
563 server->remote_nls = NULL;
564 server->local_nls = NULL;
565
566 mnt = server->mnt;
567
568 memset(mnt, 0, sizeof(struct smb_mount_data_kernel));
569 strlcpy(mnt->codepage.local_name, CONFIG_NLS_DEFAULT,
570 SMB_NLS_MAXNAMELEN);
571 strlcpy(mnt->codepage.remote_name, SMB_NLS_REMOTE,
572 SMB_NLS_MAXNAMELEN);
573
574 mnt->ttl = SMB_TTL_DEFAULT;
575 if (ver == SMB_MOUNT_OLDVERSION) {
576 mnt->version = oldmnt->version;
577
578 SET_UID(mnt->uid, oldmnt->uid);
579 SET_GID(mnt->gid, oldmnt->gid);
580
581 mnt->file_mode = (oldmnt->file_mode & S_IRWXUGO) | S_IFREG;
582 mnt->dir_mode = (oldmnt->dir_mode & S_IRWXUGO) | S_IFDIR;
583
584 mnt->flags = (oldmnt->file_mode >> 9) | SMB_MOUNT_UID |
585 SMB_MOUNT_GID | SMB_MOUNT_FMODE | SMB_MOUNT_DMODE;
586 } else {
587 mnt->file_mode = S_IRWXU | S_IRGRP | S_IXGRP |
588 S_IROTH | S_IXOTH | S_IFREG;
589 mnt->dir_mode = S_IRWXU | S_IRGRP | S_IXGRP |
590 S_IROTH | S_IXOTH | S_IFDIR;
591 if (parse_options(mnt, raw_data))
592 goto out_bad_option;
593 }
594 mnt->mounted_uid = current_uid();
595 smb_setcodepage(server, &mnt->codepage);
596
597 /*
598 * Display the enabled options
599 * Note: smb_proc_getattr uses these in 2.4 (but was changed in 2.2)
600 */
601 if (mnt->flags & SMB_MOUNT_OLDATTR)
602 printk("SMBFS: Using core getattr (Win 95 speedup)\n");
603 else if (mnt->flags & SMB_MOUNT_DIRATTR)
604 printk("SMBFS: Using dir ff getattr\n");
605
606 if (smbiod_register_server(server) < 0) {
607 printk(KERN_ERR "smbfs: failed to start smbiod\n");
608 goto out_no_smbiod;
609 }
610
611 /*
612 * Keep the super block locked while we get the root inode.
613 */
614 smb_init_root_dirent(server, &root, sb);
615 root_inode = smb_iget(sb, &root);
616 if (!root_inode)
617 goto out_no_root;
618
619 sb->s_root = d_alloc_root(root_inode);
620 if (!sb->s_root)
621 goto out_no_root;
622
623 smb_new_dentry(sb->s_root);
624
625 unlock_kernel();
626 return 0;
627
628out_no_root:
629 iput(root_inode);
630out_no_smbiod:
631 smb_unload_nls(server);
632out_bad_option:
633 kfree(mem);
634out_no_mem:
635 bdi_destroy(&server->bdi);
636out_bdi:
637 if (!server->mnt)
638 printk(KERN_ERR "smb_fill_super: allocation failure\n");
639 sb->s_fs_info = NULL;
640 kfree(server);
641 goto out_fail;
642out_wrong_data:
643 printk(KERN_ERR "smbfs: mount_data version %d is not supported\n", ver);
644 goto out_fail;
645out_no_data:
646 printk(KERN_ERR "smb_fill_super: missing data argument\n");
647out_fail:
648 unlock_kernel();
649 return -EINVAL;
650out_no_server:
651 printk(KERN_ERR "smb_fill_super: cannot allocate struct smb_sb_info\n");
652 unlock_kernel();
653 return -ENOMEM;
654}
655
656static int
657smb_statfs(struct dentry *dentry, struct kstatfs *buf)
658{
659 int result;
660
661 lock_kernel();
662
663 result = smb_proc_dskattr(dentry, buf);
664
665 unlock_kernel();
666
667 buf->f_type = SMB_SUPER_MAGIC;
668 buf->f_namelen = SMB_MAXPATHLEN;
669 return result;
670}
671
672int smb_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
673{
674 int err = smb_revalidate_inode(dentry);
675 if (!err)
676 generic_fillattr(dentry->d_inode, stat);
677 return err;
678}
679
680int
681smb_notify_change(struct dentry *dentry, struct iattr *attr)
682{
683 struct inode *inode = dentry->d_inode;
684 struct smb_sb_info *server = server_from_dentry(dentry);
685 unsigned int mask = (S_IFREG | S_IFDIR | S_IRWXUGO);
686 int error, changed, refresh = 0;
687 struct smb_fattr fattr;
688
689 lock_kernel();
690
691 error = smb_revalidate_inode(dentry);
692 if (error)
693 goto out;
694
695 if ((error = inode_change_ok(inode, attr)) < 0)
696 goto out;
697
698 error = -EPERM;
699 if ((attr->ia_valid & ATTR_UID) && (attr->ia_uid != server->mnt->uid))
700 goto out;
701
702 if ((attr->ia_valid & ATTR_GID) && (attr->ia_uid != server->mnt->gid))
703 goto out;
704
705 if ((attr->ia_valid & ATTR_MODE) && (attr->ia_mode & ~mask))
706 goto out;
707
708 if ((attr->ia_valid & ATTR_SIZE) != 0) {
709 VERBOSE("changing %s/%s, old size=%ld, new size=%ld\n",
710 DENTRY_PATH(dentry),
711 (long) inode->i_size, (long) attr->ia_size);
712
713 filemap_write_and_wait(inode->i_mapping);
714
715 error = smb_open(dentry, O_WRONLY);
716 if (error)
717 goto out;
718 error = server->ops->truncate(inode, attr->ia_size);
719 if (error)
720 goto out;
721 truncate_setsize(inode, attr->ia_size);
722 refresh = 1;
723 }
724
725 if (server->opt.capabilities & SMB_CAP_UNIX) {
726 /* For now we don't want to set the size with setattr_unix */
727 attr->ia_valid &= ~ATTR_SIZE;
728 /* FIXME: only call if we actually want to set something? */
729 error = smb_proc_setattr_unix(dentry, attr, 0, 0);
730 if (!error)
731 refresh = 1;
732
733 goto out;
734 }
735
736 /*
737 * Initialize the fattr and check for changed fields.
738 * Note: CTIME under SMB is creation time rather than
739 * change time, so we don't attempt to change it.
740 */
741 smb_get_inode_attr(inode, &fattr);
742
743 changed = 0;
744 if ((attr->ia_valid & ATTR_MTIME) != 0) {
745 fattr.f_mtime = attr->ia_mtime;
746 changed = 1;
747 }
748 if ((attr->ia_valid & ATTR_ATIME) != 0) {
749 fattr.f_atime = attr->ia_atime;
750 /* Earlier protocols don't have an access time */
751 if (server->opt.protocol >= SMB_PROTOCOL_LANMAN2)
752 changed = 1;
753 }
754 if (changed) {
755 error = smb_proc_settime(dentry, &fattr);
756 if (error)
757 goto out;
758 refresh = 1;
759 }
760
761 /*
762 * Check for mode changes ... we're extremely limited in
763 * what can be set for SMB servers: just the read-only bit.
764 */
765 if ((attr->ia_valid & ATTR_MODE) != 0) {
766 VERBOSE("%s/%s mode change, old=%x, new=%x\n",
767 DENTRY_PATH(dentry), fattr.f_mode, attr->ia_mode);
768 changed = 0;
769 if (attr->ia_mode & S_IWUSR) {
770 if (fattr.attr & aRONLY) {
771 fattr.attr &= ~aRONLY;
772 changed = 1;
773 }
774 } else {
775 if (!(fattr.attr & aRONLY)) {
776 fattr.attr |= aRONLY;
777 changed = 1;
778 }
779 }
780 if (changed) {
781 error = smb_proc_setattr(dentry, &fattr);
782 if (error)
783 goto out;
784 refresh = 1;
785 }
786 }
787 error = 0;
788
789out:
790 if (refresh)
791 smb_refresh_inode(dentry);
792 unlock_kernel();
793 return error;
794}
795
796static int smb_get_sb(struct file_system_type *fs_type,
797 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
798{
799 return get_sb_nodev(fs_type, flags, data, smb_fill_super, mnt);
800}
801
802static struct file_system_type smb_fs_type = {
803 .owner = THIS_MODULE,
804 .name = "smbfs",
805 .get_sb = smb_get_sb,
806 .kill_sb = kill_anon_super,
807 .fs_flags = FS_BINARY_MOUNTDATA,
808};
809
810static int __init init_smb_fs(void)
811{
812 int err;
813 DEBUG1("registering ...\n");
814
815 err = init_inodecache();
816 if (err)
817 goto out_inode;
818 err = smb_init_request_cache();
819 if (err)
820 goto out_request;
821 err = register_filesystem(&smb_fs_type);
822 if (err)
823 goto out;
824 return 0;
825out:
826 smb_destroy_request_cache();
827out_request:
828 destroy_inodecache();
829out_inode:
830 return err;
831}
832
833static void __exit exit_smb_fs(void)
834{
835 DEBUG1("unregistering ...\n");
836 unregister_filesystem(&smb_fs_type);
837 smb_destroy_request_cache();
838 destroy_inodecache();
839}
840
841module_init(init_smb_fs)
842module_exit(exit_smb_fs)
843MODULE_LICENSE("GPL");
diff --git a/fs/smbfs/ioctl.c b/fs/smbfs/ioctl.c
deleted file mode 100644
index 07215312ad39..000000000000
--- a/fs/smbfs/ioctl.c
+++ /dev/null
@@ -1,69 +0,0 @@
1/*
2 * ioctl.c
3 *
4 * Copyright (C) 1995, 1996 by Volker Lendecke
5 * Copyright (C) 1997 by Volker Lendecke
6 *
7 * Please add a note about your changes to smbfs in the ChangeLog file.
8 */
9
10#include <linux/errno.h>
11#include <linux/fs.h>
12#include <linux/ioctl.h>
13#include <linux/time.h>
14#include <linux/mm.h>
15#include <linux/highuid.h>
16#include <linux/smp_lock.h>
17#include <linux/net.h>
18
19#include <linux/smb_fs.h>
20#include <linux/smb_mount.h>
21
22#include <asm/uaccess.h>
23
24#include "proto.h"
25
26long
27smb_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
28{
29 struct smb_sb_info *server = server_from_inode(filp->f_path.dentry->d_inode);
30 struct smb_conn_opt opt;
31 int result = -EINVAL;
32
33 lock_kernel();
34 switch (cmd) {
35 uid16_t uid16;
36 uid_t uid32;
37 case SMB_IOC_GETMOUNTUID:
38 SET_UID(uid16, server->mnt->mounted_uid);
39 result = put_user(uid16, (uid16_t __user *) arg);
40 break;
41 case SMB_IOC_GETMOUNTUID32:
42 SET_UID(uid32, server->mnt->mounted_uid);
43 result = put_user(uid32, (uid_t __user *) arg);
44 break;
45
46 case SMB_IOC_NEWCONN:
47 /* arg is smb_conn_opt, or NULL if no connection was made */
48 if (!arg) {
49 result = 0;
50 smb_lock_server(server);
51 server->state = CONN_RETRIED;
52 printk(KERN_ERR "Connection attempt failed! [%d]\n",
53 server->conn_error);
54 smbiod_flush(server);
55 smb_unlock_server(server);
56 break;
57 }
58
59 result = -EFAULT;
60 if (!copy_from_user(&opt, (void __user *)arg, sizeof(opt)))
61 result = smb_newconn(server, &opt);
62 break;
63 default:
64 break;
65 }
66 unlock_kernel();
67
68 return result;
69}
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
deleted file mode 100644
index 3dcf638d4d3a..000000000000
--- a/fs/smbfs/proc.c
+++ /dev/null
@@ -1,3503 +0,0 @@
1/*
2 * proc.c
3 *
4 * Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
5 * Copyright (C) 1997 by Volker Lendecke
6 *
7 * Please add a note about your changes to smbfs in the ChangeLog file.
8 */
9
10#include <linux/types.h>
11#include <linux/capability.h>
12#include <linux/errno.h>
13#include <linux/slab.h>
14#include <linux/fs.h>
15#include <linux/file.h>
16#include <linux/stat.h>
17#include <linux/fcntl.h>
18#include <linux/dcache.h>
19#include <linux/nls.h>
20#include <linux/smp_lock.h>
21#include <linux/net.h>
22#include <linux/vfs.h>
23#include <linux/smb_fs.h>
24#include <linux/smbno.h>
25#include <linux/smb_mount.h>
26
27#include <net/sock.h>
28
29#include <asm/string.h>
30#include <asm/div64.h>
31
32#include "smb_debug.h"
33#include "proto.h"
34#include "request.h"
35
36
37/* Features. Undefine if they cause problems, this should perhaps be a
38 config option. */
39#define SMBFS_POSIX_UNLINK 1
40
41/* Allow smb_retry to be interrupted. */
42#define SMB_RETRY_INTR
43
44#define SMB_VWV(packet) ((packet) + SMB_HEADER_LEN)
45#define SMB_CMD(packet) (*(packet+8))
46#define SMB_WCT(packet) (*(packet+SMB_HEADER_LEN - 1))
47
48#define SMB_DIRINFO_SIZE 43
49#define SMB_STATUS_SIZE 21
50
51#define SMB_ST_BLKSIZE (PAGE_SIZE)
52#define SMB_ST_BLKSHIFT (PAGE_SHIFT)
53
54static struct smb_ops smb_ops_core;
55static struct smb_ops smb_ops_os2;
56static struct smb_ops smb_ops_win95;
57static struct smb_ops smb_ops_winNT;
58static struct smb_ops smb_ops_unix;
59static struct smb_ops smb_ops_null;
60
61static void
62smb_init_dirent(struct smb_sb_info *server, struct smb_fattr *fattr);
63static void
64smb_finish_dirent(struct smb_sb_info *server, struct smb_fattr *fattr);
65static int
66smb_proc_getattr_core(struct smb_sb_info *server, struct dentry *dir,
67 struct smb_fattr *fattr);
68static int
69smb_proc_getattr_ff(struct smb_sb_info *server, struct dentry *dentry,
70 struct smb_fattr *fattr);
71static int
72smb_proc_setattr_core(struct smb_sb_info *server, struct dentry *dentry,
73 u16 attr);
74static int
75smb_proc_setattr_ext(struct smb_sb_info *server,
76 struct inode *inode, struct smb_fattr *fattr);
77static int
78smb_proc_query_cifsunix(struct smb_sb_info *server);
79static void
80install_ops(struct smb_ops *dst, struct smb_ops *src);
81
82
83static void
84str_upper(char *name, int len)
85{
86 while (len--)
87 {
88 if (*name >= 'a' && *name <= 'z')
89 *name -= ('a' - 'A');
90 name++;
91 }
92}
93
94#if 0
95static void
96str_lower(char *name, int len)
97{
98 while (len--)
99 {
100 if (*name >= 'A' && *name <= 'Z')
101 *name += ('a' - 'A');
102 name++;
103 }
104}
105#endif
106
107/* reverse a string inline. This is used by the dircache walking routines */
108static void reverse_string(char *buf, int len)
109{
110 char c;
111 char *end = buf+len-1;
112
113 while(buf < end) {
114 c = *buf;
115 *(buf++) = *end;
116 *(end--) = c;
117 }
118}
119
120/* no conversion, just a wrapper for memcpy. */
121static int convert_memcpy(unsigned char *output, int olen,
122 const unsigned char *input, int ilen,
123 struct nls_table *nls_from,
124 struct nls_table *nls_to)
125{
126 if (olen < ilen)
127 return -ENAMETOOLONG;
128 memcpy(output, input, ilen);
129 return ilen;
130}
131
132static inline int write_char(unsigned char ch, char *output, int olen)
133{
134 if (olen < 4)
135 return -ENAMETOOLONG;
136 sprintf(output, ":x%02x", ch);
137 return 4;
138}
139
140static inline int write_unichar(wchar_t ch, char *output, int olen)
141{
142 if (olen < 5)
143 return -ENAMETOOLONG;
144 sprintf(output, ":%04x", ch);
145 return 5;
146}
147
148/* convert from one "codepage" to another (possibly being utf8). */
149static int convert_cp(unsigned char *output, int olen,
150 const unsigned char *input, int ilen,
151 struct nls_table *nls_from,
152 struct nls_table *nls_to)
153{
154 int len = 0;
155 int n;
156 wchar_t ch;
157
158 while (ilen > 0) {
159 /* convert by changing to unicode and back to the new cp */
160 n = nls_from->char2uni(input, ilen, &ch);
161 if (n == -EINVAL) {
162 ilen--;
163 n = write_char(*input++, output, olen);
164 if (n < 0)
165 goto fail;
166 output += n;
167 olen -= n;
168 len += n;
169 continue;
170 } else if (n < 0)
171 goto fail;
172 input += n;
173 ilen -= n;
174
175 n = nls_to->uni2char(ch, output, olen);
176 if (n == -EINVAL)
177 n = write_unichar(ch, output, olen);
178 if (n < 0)
179 goto fail;
180 output += n;
181 olen -= n;
182
183 len += n;
184 }
185 return len;
186fail:
187 return n;
188}
189
190/* ----------------------------------------------------------- */
191
192/*
193 * nls_unicode
194 *
195 * This encodes/decodes little endian unicode format
196 */
197
198static int uni2char(wchar_t uni, unsigned char *out, int boundlen)
199{
200 if (boundlen < 2)
201 return -EINVAL;
202 *out++ = uni & 0xff;
203 *out++ = uni >> 8;
204 return 2;
205}
206
207static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni)
208{
209 if (boundlen < 2)
210 return -EINVAL;
211 *uni = (rawstring[1] << 8) | rawstring[0];
212 return 2;
213}
214
215static struct nls_table unicode_table = {
216 .charset = "unicode",
217 .uni2char = uni2char,
218 .char2uni = char2uni,
219};
220
221/* ----------------------------------------------------------- */
222
223static int setcodepage(struct nls_table **p, char *name)
224{
225 struct nls_table *nls;
226
227 if (!name || !*name) {
228 nls = NULL;
229 } else if ( (nls = load_nls(name)) == NULL) {
230 printk (KERN_ERR "smbfs: failed to load nls '%s'\n", name);
231 return -EINVAL;
232 }
233
234 /* if already set, unload the previous one. */
235 if (*p && *p != &unicode_table)
236 unload_nls(*p);
237 *p = nls;
238
239 return 0;
240}
241
242/* Handles all changes to codepage settings. */
243int smb_setcodepage(struct smb_sb_info *server, struct smb_nls_codepage *cp)
244{
245 int n = 0;
246
247 smb_lock_server(server);
248
249 /* Don't load any nls_* at all, if no remote is requested */
250 if (!*cp->remote_name)
251 goto out;
252
253 /* local */
254 n = setcodepage(&server->local_nls, cp->local_name);
255 if (n != 0)
256 goto out;
257
258 /* remote */
259 if (!strcmp(cp->remote_name, "unicode")) {
260 server->remote_nls = &unicode_table;
261 } else {
262 n = setcodepage(&server->remote_nls, cp->remote_name);
263 if (n != 0)
264 setcodepage(&server->local_nls, NULL);
265 }
266
267out:
268 if (server->local_nls != NULL && server->remote_nls != NULL)
269 server->ops->convert = convert_cp;
270 else
271 server->ops->convert = convert_memcpy;
272
273 smb_unlock_server(server);
274 return n;
275}
276
277
278/*****************************************************************************/
279/* */
280/* Encoding/Decoding section */
281/* */
282/*****************************************************************************/
283
284static __u8 *
285smb_encode_smb_length(__u8 * p, __u32 len)
286{
287 *p = 0;
288 *(p+1) = 0;
289 *(p+2) = (len & 0xFF00) >> 8;
290 *(p+3) = (len & 0xFF);
291 if (len > 0xFFFF)
292 {
293 *(p+1) = 1;
294 }
295 return p + 4;
296}
297
298/*
299 * smb_build_path: build the path to entry and name storing it in buf.
300 * The path returned will have the trailing '\0'.
301 */
302static int smb_build_path(struct smb_sb_info *server, unsigned char *buf,
303 int maxlen,
304 struct dentry *entry, struct qstr *name)
305{
306 unsigned char *path = buf;
307 int len;
308 int unicode = (server->mnt->flags & SMB_MOUNT_UNICODE) != 0;
309
310 if (maxlen < (2<<unicode))
311 return -ENAMETOOLONG;
312
313 if (maxlen > SMB_MAXPATHLEN + 1)
314 maxlen = SMB_MAXPATHLEN + 1;
315
316 if (entry == NULL)
317 goto test_name_and_out;
318
319 /*
320 * If IS_ROOT, we have to do no walking at all.
321 */
322 if (IS_ROOT(entry) && !name) {
323 *path++ = '\\';
324 if (unicode) *path++ = '\0';
325 *path++ = '\0';
326 if (unicode) *path++ = '\0';
327 return path-buf;
328 }
329
330 /*
331 * Build the path string walking the tree backward from end to ROOT
332 * and store it in reversed order [see reverse_string()]
333 */
334 dget(entry);
335 while (!IS_ROOT(entry)) {
336 struct dentry *parent;
337
338 if (maxlen < (3<<unicode)) {
339 dput(entry);
340 return -ENAMETOOLONG;
341 }
342
343 spin_lock(&entry->d_lock);
344 len = server->ops->convert(path, maxlen-2,
345 entry->d_name.name, entry->d_name.len,
346 server->local_nls, server->remote_nls);
347 if (len < 0) {
348 spin_unlock(&entry->d_lock);
349 dput(entry);
350 return len;
351 }
352 reverse_string(path, len);
353 path += len;
354 if (unicode) {
355 /* Note: reverse order */
356 *path++ = '\0';
357 maxlen--;
358 }
359 *path++ = '\\';
360 maxlen -= len+1;
361 spin_unlock(&entry->d_lock);
362
363 parent = dget_parent(entry);
364 dput(entry);
365 entry = parent;
366 }
367 dput(entry);
368 reverse_string(buf, path-buf);
369
370 /* maxlen has space for at least one char */
371test_name_and_out:
372 if (name) {
373 if (maxlen < (3<<unicode))
374 return -ENAMETOOLONG;
375 *path++ = '\\';
376 if (unicode) {
377 *path++ = '\0';
378 maxlen--;
379 }
380 len = server->ops->convert(path, maxlen-2,
381 name->name, name->len,
382 server->local_nls, server->remote_nls);
383 if (len < 0)
384 return len;
385 path += len;
386 maxlen -= len+1;
387 }
388 /* maxlen has space for at least one char */
389 *path++ = '\0';
390 if (unicode) *path++ = '\0';
391 return path-buf;
392}
393
394static int smb_encode_path(struct smb_sb_info *server, char *buf, int maxlen,
395 struct dentry *dir, struct qstr *name)
396{
397 int result;
398
399 result = smb_build_path(server, buf, maxlen, dir, name);
400 if (result < 0)
401 goto out;
402 if (server->opt.protocol <= SMB_PROTOCOL_COREPLUS)
403 str_upper(buf, result);
404out:
405 return result;
406}
407
408/* encode_path for non-trans2 request SMBs */
409static int smb_simple_encode_path(struct smb_request *req, char **p,
410 struct dentry * entry, struct qstr * name)
411{
412 struct smb_sb_info *server = req->rq_server;
413 char *s = *p;
414 int res;
415 int maxlen = ((char *)req->rq_buffer + req->rq_bufsize) - s;
416 int unicode = (server->mnt->flags & SMB_MOUNT_UNICODE);
417
418 if (!maxlen)
419 return -ENAMETOOLONG;
420 *s++ = 4; /* ASCII data format */
421
422 /*
423 * SMB Unicode strings must be 16bit aligned relative the start of the
424 * packet. If they are not they must be padded with 0.
425 */
426 if (unicode) {
427 int align = s - (char *)req->rq_buffer;
428 if (!(align & 1)) {
429 *s++ = '\0';
430 maxlen--;
431 }
432 }
433
434 res = smb_encode_path(server, s, maxlen-1, entry, name);
435 if (res < 0)
436 return res;
437 *p = s + res;
438 return 0;
439}
440
441/* The following are taken directly from msdos-fs */
442
443/* Linear day numbers of the respective 1sts in non-leap years. */
444
445static int day_n[] =
446{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0, 0};
447 /* JanFebMarApr May Jun Jul Aug Sep Oct Nov Dec */
448
449
450static time_t
451utc2local(struct smb_sb_info *server, time_t time)
452{
453 return time - server->opt.serverzone*60;
454}
455
456static time_t
457local2utc(struct smb_sb_info *server, time_t time)
458{
459 return time + server->opt.serverzone*60;
460}
461
462/* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */
463
464static time_t
465date_dos2unix(struct smb_sb_info *server, __u16 date, __u16 time)
466{
467 int month, year;
468 time_t secs;
469
470 /* first subtract and mask after that... Otherwise, if
471 date == 0, bad things happen */
472 month = ((date >> 5) - 1) & 15;
473 year = date >> 9;
474 secs = (time & 31) * 2 + 60 * ((time >> 5) & 63) + (time >> 11) * 3600 + 86400 *
475 ((date & 31) - 1 + day_n[month] + (year / 4) + year * 365 - ((year & 3) == 0 &&
476 month < 2 ? 1 : 0) + 3653);
477 /* days since 1.1.70 plus 80's leap day */
478 return local2utc(server, secs);
479}
480
481
482/* Convert linear UNIX date to a MS-DOS time/date pair. */
483
484static void
485date_unix2dos(struct smb_sb_info *server,
486 int unix_date, __u16 *date, __u16 *time)
487{
488 int day, year, nl_day, month;
489
490 unix_date = utc2local(server, unix_date);
491 if (unix_date < 315532800)
492 unix_date = 315532800;
493
494 *time = (unix_date % 60) / 2 +
495 (((unix_date / 60) % 60) << 5) +
496 (((unix_date / 3600) % 24) << 11);
497
498 day = unix_date / 86400 - 3652;
499 year = day / 365;
500 if ((year + 3) / 4 + 365 * year > day)
501 year--;
502 day -= (year + 3) / 4 + 365 * year;
503 if (day == 59 && !(year & 3)) {
504 nl_day = day;
505 month = 2;
506 } else {
507 nl_day = (year & 3) || day <= 59 ? day : day - 1;
508 for (month = 1; month < 12; month++)
509 if (day_n[month] > nl_day)
510 break;
511 }
512 *date = nl_day - day_n[month - 1] + 1 + (month << 5) + (year << 9);
513}
514
515/* The following are taken from fs/ntfs/util.c */
516
517#define NTFS_TIME_OFFSET ((u64)(369*365 + 89) * 24 * 3600 * 10000000)
518
519/*
520 * Convert the NT UTC (based 1601-01-01, in hundred nanosecond units)
521 * into Unix UTC (based 1970-01-01, in seconds).
522 */
523static struct timespec
524smb_ntutc2unixutc(u64 ntutc)
525{
526 struct timespec ts;
527 /* FIXME: what about the timezone difference? */
528 /* Subtract the NTFS time offset, then convert to 1s intervals. */
529 u64 t = ntutc - NTFS_TIME_OFFSET;
530 ts.tv_nsec = do_div(t, 10000000) * 100;
531 ts.tv_sec = t;
532 return ts;
533}
534
535/* Convert the Unix UTC into NT time */
536static u64
537smb_unixutc2ntutc(struct timespec ts)
538{
539 /* Note: timezone conversion is probably wrong. */
540 /* return ((u64)utc2local(server, t)) * 10000000 + NTFS_TIME_OFFSET; */
541 return ((u64)ts.tv_sec) * 10000000 + ts.tv_nsec/100 + NTFS_TIME_OFFSET;
542}
543
544#define MAX_FILE_MODE 6
545static mode_t file_mode[] = {
546 S_IFREG, S_IFDIR, S_IFLNK, S_IFCHR, S_IFBLK, S_IFIFO, S_IFSOCK
547};
548
549static int smb_filetype_to_mode(u32 filetype)
550{
551 if (filetype > MAX_FILE_MODE) {
552 PARANOIA("Filetype out of range: %d\n", filetype);
553 return S_IFREG;
554 }
555 return file_mode[filetype];
556}
557
558static u32 smb_filetype_from_mode(int mode)
559{
560 if (S_ISREG(mode))
561 return UNIX_TYPE_FILE;
562 if (S_ISDIR(mode))
563 return UNIX_TYPE_DIR;
564 if (S_ISLNK(mode))
565 return UNIX_TYPE_SYMLINK;
566 if (S_ISCHR(mode))
567 return UNIX_TYPE_CHARDEV;
568 if (S_ISBLK(mode))
569 return UNIX_TYPE_BLKDEV;
570 if (S_ISFIFO(mode))
571 return UNIX_TYPE_FIFO;
572 if (S_ISSOCK(mode))
573 return UNIX_TYPE_SOCKET;
574 return UNIX_TYPE_UNKNOWN;
575}
576
577
578/*****************************************************************************/
579/* */
580/* Support section. */
581/* */
582/*****************************************************************************/
583
584__u32
585smb_len(__u8 * p)
586{
587 return ((*(p+1) & 0x1) << 16L) | (*(p+2) << 8L) | *(p+3);
588}
589
590static __u16
591smb_bcc(__u8 * packet)
592{
593 int pos = SMB_HEADER_LEN + SMB_WCT(packet) * sizeof(__u16);
594 return WVAL(packet, pos);
595}
596
597/* smb_valid_packet: We check if packet fulfills the basic
598 requirements of a smb packet */
599
600static int
601smb_valid_packet(__u8 * packet)
602{
603 return (packet[4] == 0xff
604 && packet[5] == 'S'
605 && packet[6] == 'M'
606 && packet[7] == 'B'
607 && (smb_len(packet) + 4 == SMB_HEADER_LEN
608 + SMB_WCT(packet) * 2 + smb_bcc(packet)));
609}
610
611/* smb_verify: We check if we got the answer we expected, and if we
612 got enough data. If bcc == -1, we don't care. */
613
614static int
615smb_verify(__u8 * packet, int command, int wct, int bcc)
616{
617 if (SMB_CMD(packet) != command)
618 goto bad_command;
619 if (SMB_WCT(packet) < wct)
620 goto bad_wct;
621 if (bcc != -1 && smb_bcc(packet) < bcc)
622 goto bad_bcc;
623 return 0;
624
625bad_command:
626 printk(KERN_ERR "smb_verify: command=%x, SMB_CMD=%x??\n",
627 command, SMB_CMD(packet));
628 goto fail;
629bad_wct:
630 printk(KERN_ERR "smb_verify: command=%x, wct=%d, SMB_WCT=%d??\n",
631 command, wct, SMB_WCT(packet));
632 goto fail;
633bad_bcc:
634 printk(KERN_ERR "smb_verify: command=%x, bcc=%d, SMB_BCC=%d??\n",
635 command, bcc, smb_bcc(packet));
636fail:
637 return -EIO;
638}
639
640/*
641 * Returns the maximum read or write size for the "payload". Making all of the
642 * packet fit within the negotiated max_xmit size.
643 *
644 * N.B. Since this value is usually computed before locking the server,
645 * the server's packet size must never be decreased!
646 */
647static inline int
648smb_get_xmitsize(struct smb_sb_info *server, int overhead)
649{
650 return server->opt.max_xmit - overhead;
651}
652
653/*
654 * Calculate the maximum read size
655 */
656int
657smb_get_rsize(struct smb_sb_info *server)
658{
659 /* readX has 12 parameters, read has 5 */
660 int overhead = SMB_HEADER_LEN + 12 * sizeof(__u16) + 2 + 1 + 2;
661 int size = smb_get_xmitsize(server, overhead);
662
663 VERBOSE("xmit=%d, size=%d\n", server->opt.max_xmit, size);
664
665 return size;
666}
667
668/*
669 * Calculate the maximum write size
670 */
671int
672smb_get_wsize(struct smb_sb_info *server)
673{
674 /* writeX has 14 parameters, write has 5 */
675 int overhead = SMB_HEADER_LEN + 14 * sizeof(__u16) + 2 + 1 + 2;
676 int size = smb_get_xmitsize(server, overhead);
677
678 VERBOSE("xmit=%d, size=%d\n", server->opt.max_xmit, size);
679
680 return size;
681}
682
683/*
684 * Convert SMB error codes to -E... errno values.
685 */
686int
687smb_errno(struct smb_request *req)
688{
689 int errcls = req->rq_rcls;
690 int error = req->rq_err;
691 char *class = "Unknown";
692
693 VERBOSE("errcls %d code %d from command 0x%x\n",
694 errcls, error, SMB_CMD(req->rq_header));
695
696 if (errcls == ERRDOS) {
697 switch (error) {
698 case ERRbadfunc:
699 return -EINVAL;
700 case ERRbadfile:
701 case ERRbadpath:
702 return -ENOENT;
703 case ERRnofids:
704 return -EMFILE;
705 case ERRnoaccess:
706 return -EACCES;
707 case ERRbadfid:
708 return -EBADF;
709 case ERRbadmcb:
710 return -EREMOTEIO;
711 case ERRnomem:
712 return -ENOMEM;
713 case ERRbadmem:
714 return -EFAULT;
715 case ERRbadenv:
716 case ERRbadformat:
717 return -EREMOTEIO;
718 case ERRbadaccess:
719 return -EACCES;
720 case ERRbaddata:
721 return -E2BIG;
722 case ERRbaddrive:
723 return -ENXIO;
724 case ERRremcd:
725 return -EREMOTEIO;
726 case ERRdiffdevice:
727 return -EXDEV;
728 case ERRnofiles:
729 return -ENOENT;
730 case ERRbadshare:
731 return -ETXTBSY;
732 case ERRlock:
733 return -EDEADLK;
734 case ERRfilexists:
735 return -EEXIST;
736 case ERROR_INVALID_PARAMETER:
737 return -EINVAL;
738 case ERROR_DISK_FULL:
739 return -ENOSPC;
740 case ERROR_INVALID_NAME:
741 return -ENOENT;
742 case ERROR_DIR_NOT_EMPTY:
743 return -ENOTEMPTY;
744 case ERROR_NOT_LOCKED:
745 return -ENOLCK;
746 case ERROR_ALREADY_EXISTS:
747 return -EEXIST;
748 default:
749 class = "ERRDOS";
750 goto err_unknown;
751 }
752 } else if (errcls == ERRSRV) {
753 switch (error) {
754 /* N.B. This is wrong ... EIO ? */
755 case ERRerror:
756 return -ENFILE;
757 case ERRbadpw:
758 return -EINVAL;
759 case ERRbadtype:
760 case ERRtimeout:
761 return -EIO;
762 case ERRaccess:
763 return -EACCES;
764 /*
765 * This is a fatal error, as it means the "tree ID"
766 * for this connection is no longer valid. We map
767 * to a special error code and get a new connection.
768 */
769 case ERRinvnid:
770 return -EBADSLT;
771 default:
772 class = "ERRSRV";
773 goto err_unknown;
774 }
775 } else if (errcls == ERRHRD) {
776 switch (error) {
777 case ERRnowrite:
778 return -EROFS;
779 case ERRbadunit:
780 return -ENODEV;
781 case ERRnotready:
782 return -EUCLEAN;
783 case ERRbadcmd:
784 case ERRdata:
785 return -EIO;
786 case ERRbadreq:
787 return -ERANGE;
788 case ERRbadshare:
789 return -ETXTBSY;
790 case ERRlock:
791 return -EDEADLK;
792 case ERRdiskfull:
793 return -ENOSPC;
794 default:
795 class = "ERRHRD";
796 goto err_unknown;
797 }
798 } else if (errcls == ERRCMD) {
799 class = "ERRCMD";
800 } else if (errcls == SUCCESS) {
801 return 0; /* This is the only valid 0 return */
802 }
803
804err_unknown:
805 printk(KERN_ERR "smb_errno: class %s, code %d from command 0x%x\n",
806 class, error, SMB_CMD(req->rq_header));
807 return -EIO;
808}
809
810/* smb_request_ok: We expect the server to be locked. Then we do the
811 request and check the answer completely. When smb_request_ok
812 returns 0, you can be quite sure that everything went well. When
813 the answer is <=0, the returned number is a valid unix errno. */
814
815static int
816smb_request_ok(struct smb_request *req, int command, int wct, int bcc)
817{
818 int result;
819
820 req->rq_resp_wct = wct;
821 req->rq_resp_bcc = bcc;
822
823 result = smb_add_request(req);
824 if (result != 0) {
825 DEBUG1("smb_request failed\n");
826 goto out;
827 }
828
829 if (smb_valid_packet(req->rq_header) != 0) {
830 PARANOIA("invalid packet!\n");
831 goto out;
832 }
833
834 result = smb_verify(req->rq_header, command, wct, bcc);
835
836out:
837 return result;
838}
839
840/*
841 * This implements the NEWCONN ioctl. It installs the server pid,
842 * sets server->state to CONN_VALID, and wakes up the waiting process.
843 */
844int
845smb_newconn(struct smb_sb_info *server, struct smb_conn_opt *opt)
846{
847 struct file *filp;
848 struct sock *sk;
849 int error;
850
851 VERBOSE("fd=%d, pid=%d\n", opt->fd, current->pid);
852
853 smb_lock_server(server);
854
855 /*
856 * Make sure we don't already have a valid connection ...
857 */
858 error = -EINVAL;
859 if (server->state == CONN_VALID)
860 goto out;
861
862 error = -EACCES;
863 if (current_uid() != server->mnt->mounted_uid &&
864 !capable(CAP_SYS_ADMIN))
865 goto out;
866
867 error = -EBADF;
868 filp = fget(opt->fd);
869 if (!filp)
870 goto out;
871 if (!smb_valid_socket(filp->f_path.dentry->d_inode))
872 goto out_putf;
873
874 server->sock_file = filp;
875 server->conn_pid = get_pid(task_pid(current));
876 server->opt = *opt;
877 server->generation += 1;
878 server->state = CONN_VALID;
879 error = 0;
880
881 if (server->conn_error) {
882 /*
883 * conn_error is the returncode we originally decided to
884 * drop the old connection on. This message should be positive
885 * and not make people ask questions on why smbfs is printing
886 * error messages ...
887 */
888 printk(KERN_INFO "SMB connection re-established (%d)\n",
889 server->conn_error);
890 server->conn_error = 0;
891 }
892
893 /*
894 * Store the server in sock user_data (Only used by sunrpc)
895 */
896 sk = SOCKET_I(filp->f_path.dentry->d_inode)->sk;
897 sk->sk_user_data = server;
898
899 /* chain into the data_ready callback */
900 server->data_ready = xchg(&sk->sk_data_ready, smb_data_ready);
901
902 /* check if we have an old smbmount that uses seconds for the
903 serverzone */
904 if (server->opt.serverzone > 12*60 || server->opt.serverzone < -12*60)
905 server->opt.serverzone /= 60;
906
907 /* now that we have an established connection we can detect the server
908 type and enable bug workarounds */
909 if (server->opt.protocol < SMB_PROTOCOL_LANMAN2)
910 install_ops(server->ops, &smb_ops_core);
911 else if (server->opt.protocol == SMB_PROTOCOL_LANMAN2)
912 install_ops(server->ops, &smb_ops_os2);
913 else if (server->opt.protocol == SMB_PROTOCOL_NT1 &&
914 (server->opt.max_xmit < 0x1000) &&
915 !(server->opt.capabilities & SMB_CAP_NT_SMBS)) {
916 /* FIXME: can we kill the WIN95 flag now? */
917 server->mnt->flags |= SMB_MOUNT_WIN95;
918 VERBOSE("detected WIN95 server\n");
919 install_ops(server->ops, &smb_ops_win95);
920 } else {
921 /*
922 * Samba has max_xmit 65535
923 * NT4spX has max_xmit 4536 (or something like that)
924 * win2k has ...
925 */
926 VERBOSE("detected NT1 (Samba, NT4/5) server\n");
927 install_ops(server->ops, &smb_ops_winNT);
928 }
929
930 /* FIXME: the win9x code wants to modify these ... (seek/trunc bug) */
931 if (server->mnt->flags & SMB_MOUNT_OLDATTR) {
932 server->ops->getattr = smb_proc_getattr_core;
933 } else if (server->mnt->flags & SMB_MOUNT_DIRATTR) {
934 server->ops->getattr = smb_proc_getattr_ff;
935 }
936
937 /* Decode server capabilities */
938 if (server->opt.capabilities & SMB_CAP_LARGE_FILES) {
939 /* Should be ok to set this now, as no one can access the
940 mount until the connection has been established. */
941 SB_of(server)->s_maxbytes = ~0ULL >> 1;
942 VERBOSE("LFS enabled\n");
943 }
944 if (server->opt.capabilities & SMB_CAP_UNICODE) {
945 server->mnt->flags |= SMB_MOUNT_UNICODE;
946 VERBOSE("Unicode enabled\n");
947 } else {
948 server->mnt->flags &= ~SMB_MOUNT_UNICODE;
949 }
950#if 0
951 /* flags we may test for other patches ... */
952 if (server->opt.capabilities & SMB_CAP_LARGE_READX) {
953 VERBOSE("Large reads enabled\n");
954 }
955 if (server->opt.capabilities & SMB_CAP_LARGE_WRITEX) {
956 VERBOSE("Large writes enabled\n");
957 }
958#endif
959 if (server->opt.capabilities & SMB_CAP_UNIX) {
960 struct inode *inode;
961 VERBOSE("Using UNIX CIFS extensions\n");
962 install_ops(server->ops, &smb_ops_unix);
963 inode = SB_of(server)->s_root->d_inode;
964 if (inode)
965 inode->i_op = &smb_dir_inode_operations_unix;
966 }
967
968 VERBOSE("protocol=%d, max_xmit=%d, pid=%d capabilities=0x%x\n",
969 server->opt.protocol, server->opt.max_xmit,
970 pid_nr(server->conn_pid), server->opt.capabilities);
971
972 /* FIXME: this really should be done by smbmount. */
973 if (server->opt.max_xmit > SMB_MAX_PACKET_SIZE) {
974 server->opt.max_xmit = SMB_MAX_PACKET_SIZE;
975 }
976
977 smb_unlock_server(server);
978 smbiod_wake_up();
979 if (server->opt.capabilities & SMB_CAP_UNIX)
980 smb_proc_query_cifsunix(server);
981
982 server->conn_complete++;
983 wake_up_interruptible_all(&server->conn_wq);
984 return error;
985
986out:
987 smb_unlock_server(server);
988 smbiod_wake_up();
989 return error;
990
991out_putf:
992 fput(filp);
993 goto out;
994}
995
996/* smb_setup_header: We completely set up the packet. You only have to
997 insert the command-specific fields */
998
999__u8 *
1000smb_setup_header(struct smb_request *req, __u8 command, __u16 wct, __u16 bcc)
1001{
1002 __u32 xmit_len = SMB_HEADER_LEN + wct * sizeof(__u16) + bcc + 2;
1003 __u8 *p = req->rq_header;
1004 struct smb_sb_info *server = req->rq_server;
1005
1006 p = smb_encode_smb_length(p, xmit_len - 4);
1007
1008 *p++ = 0xff;
1009 *p++ = 'S';
1010 *p++ = 'M';
1011 *p++ = 'B';
1012 *p++ = command;
1013
1014 memset(p, '\0', 19);
1015 p += 19;
1016 p += 8;
1017
1018 if (server->opt.protocol > SMB_PROTOCOL_CORE) {
1019 int flags = SMB_FLAGS_CASELESS_PATHNAMES;
1020 int flags2 = SMB_FLAGS2_LONG_PATH_COMPONENTS |
1021 SMB_FLAGS2_EXTENDED_ATTRIBUTES; /* EA? not really ... */
1022
1023 *(req->rq_header + smb_flg) = flags;
1024 if (server->mnt->flags & SMB_MOUNT_UNICODE)
1025 flags2 |= SMB_FLAGS2_UNICODE_STRINGS;
1026 WSET(req->rq_header, smb_flg2, flags2);
1027 }
1028 *p++ = wct; /* wct */
1029 p += 2 * wct;
1030 WSET(p, 0, bcc);
1031
1032 /* Include the header in the data to send */
1033 req->rq_iovlen = 1;
1034 req->rq_iov[0].iov_base = req->rq_header;
1035 req->rq_iov[0].iov_len = xmit_len - bcc;
1036
1037 return req->rq_buffer;
1038}
1039
1040static void
1041smb_setup_bcc(struct smb_request *req, __u8 *p)
1042{
1043 u16 bcc = p - req->rq_buffer;
1044 u8 *pbcc = req->rq_header + SMB_HEADER_LEN + 2*SMB_WCT(req->rq_header);
1045
1046 WSET(pbcc, 0, bcc);
1047
1048 smb_encode_smb_length(req->rq_header, SMB_HEADER_LEN +
1049 2*SMB_WCT(req->rq_header) - 2 + bcc);
1050
1051 /* Include the "bytes" in the data to send */
1052 req->rq_iovlen = 2;
1053 req->rq_iov[1].iov_base = req->rq_buffer;
1054 req->rq_iov[1].iov_len = bcc;
1055}
1056
1057static int
1058smb_proc_seek(struct smb_sb_info *server, __u16 fileid,
1059 __u16 mode, off_t offset)
1060{
1061 int result;
1062 struct smb_request *req;
1063
1064 result = -ENOMEM;
1065 if (! (req = smb_alloc_request(server, 0)))
1066 goto out;
1067
1068 smb_setup_header(req, SMBlseek, 4, 0);
1069 WSET(req->rq_header, smb_vwv0, fileid);
1070 WSET(req->rq_header, smb_vwv1, mode);
1071 DSET(req->rq_header, smb_vwv2, offset);
1072 req->rq_flags |= SMB_REQ_NORETRY;
1073
1074 result = smb_request_ok(req, SMBlseek, 2, 0);
1075 if (result < 0) {
1076 result = 0;
1077 goto out_free;
1078 }
1079
1080 result = DVAL(req->rq_header, smb_vwv0);
1081out_free:
1082 smb_rput(req);
1083out:
1084 return result;
1085}
1086
1087static int
1088smb_proc_open(struct smb_sb_info *server, struct dentry *dentry, int wish)
1089{
1090 struct inode *ino = dentry->d_inode;
1091 struct smb_inode_info *ei = SMB_I(ino);
1092 int mode, read_write = 0x42, read_only = 0x40;
1093 int res;
1094 char *p;
1095 struct smb_request *req;
1096
1097 /*
1098 * Attempt to open r/w, unless there are no write privileges.
1099 */
1100 mode = read_write;
1101 if (!(ino->i_mode & (S_IWUSR | S_IWGRP | S_IWOTH)))
1102 mode = read_only;
1103#if 0
1104 /* FIXME: why is this code not in? below we fix it so that a caller
1105 wanting RO doesn't get RW. smb_revalidate_inode does some
1106 optimization based on access mode. tail -f needs it to be correct.
1107
1108 We must open rw since we don't do the open if called a second time
1109 with different 'wish'. Is that not supported by smb servers? */
1110 if (!(wish & (O_WRONLY | O_RDWR)))
1111 mode = read_only;
1112#endif
1113
1114 res = -ENOMEM;
1115 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
1116 goto out;
1117
1118 retry:
1119 p = smb_setup_header(req, SMBopen, 2, 0);
1120 WSET(req->rq_header, smb_vwv0, mode);
1121 WSET(req->rq_header, smb_vwv1, aSYSTEM | aHIDDEN | aDIR);
1122 res = smb_simple_encode_path(req, &p, dentry, NULL);
1123 if (res < 0)
1124 goto out_free;
1125 smb_setup_bcc(req, p);
1126
1127 res = smb_request_ok(req, SMBopen, 7, 0);
1128 if (res != 0) {
1129 if (mode == read_write &&
1130 (res == -EACCES || res == -ETXTBSY || res == -EROFS))
1131 {
1132 VERBOSE("%s/%s R/W failed, error=%d, retrying R/O\n",
1133 DENTRY_PATH(dentry), res);
1134 mode = read_only;
1135 req->rq_flags = 0;
1136 goto retry;
1137 }
1138 goto out_free;
1139 }
1140 /* We should now have data in vwv[0..6]. */
1141
1142 ei->fileid = WVAL(req->rq_header, smb_vwv0);
1143 ei->attr = WVAL(req->rq_header, smb_vwv1);
1144 /* smb_vwv2 has mtime */
1145 /* smb_vwv4 has size */
1146 ei->access = (WVAL(req->rq_header, smb_vwv6) & SMB_ACCMASK);
1147 ei->open = server->generation;
1148
1149out_free:
1150 smb_rput(req);
1151out:
1152 return res;
1153}
1154
1155/*
1156 * Make sure the file is open, and check that the access
1157 * is compatible with the desired access.
1158 */
1159int
1160smb_open(struct dentry *dentry, int wish)
1161{
1162 struct inode *inode = dentry->d_inode;
1163 int result;
1164 __u16 access;
1165
1166 result = -ENOENT;
1167 if (!inode) {
1168 printk(KERN_ERR "smb_open: no inode for dentry %s/%s\n",
1169 DENTRY_PATH(dentry));
1170 goto out;
1171 }
1172
1173 if (!smb_is_open(inode)) {
1174 struct smb_sb_info *server = server_from_inode(inode);
1175 result = 0;
1176 if (!smb_is_open(inode))
1177 result = smb_proc_open(server, dentry, wish);
1178 if (result)
1179 goto out;
1180 /*
1181 * A successful open means the path is still valid ...
1182 */
1183 smb_renew_times(dentry);
1184 }
1185
1186 /*
1187 * Check whether the access is compatible with the desired mode.
1188 */
1189 result = 0;
1190 access = SMB_I(inode)->access;
1191 if (access != wish && access != SMB_O_RDWR) {
1192 PARANOIA("%s/%s access denied, access=%x, wish=%x\n",
1193 DENTRY_PATH(dentry), access, wish);
1194 result = -EACCES;
1195 }
1196out:
1197 return result;
1198}
1199
1200static int
1201smb_proc_close(struct smb_sb_info *server, __u16 fileid, __u32 mtime)
1202{
1203 struct smb_request *req;
1204 int result = -ENOMEM;
1205
1206 if (! (req = smb_alloc_request(server, 0)))
1207 goto out;
1208
1209 smb_setup_header(req, SMBclose, 3, 0);
1210 WSET(req->rq_header, smb_vwv0, fileid);
1211 DSET(req->rq_header, smb_vwv1, utc2local(server, mtime));
1212 req->rq_flags |= SMB_REQ_NORETRY;
1213 result = smb_request_ok(req, SMBclose, 0, 0);
1214
1215 smb_rput(req);
1216out:
1217 return result;
1218}
1219
1220/*
1221 * Win NT 4.0 has an apparent bug in that it fails to update the
1222 * modify time when writing to a file. As a workaround, we update
1223 * both modify and access time locally, and post the times to the
1224 * server when closing the file.
1225 */
1226static int
1227smb_proc_close_inode(struct smb_sb_info *server, struct inode * ino)
1228{
1229 struct smb_inode_info *ei = SMB_I(ino);
1230 int result = 0;
1231 if (smb_is_open(ino))
1232 {
1233 /*
1234 * We clear the open flag in advance, in case another
1235 * process observes the value while we block below.
1236 */
1237 ei->open = 0;
1238
1239 /*
1240 * Kludge alert: SMB timestamps are accurate only to
1241 * two seconds ... round the times to avoid needless
1242 * cache invalidations!
1243 */
1244 if (ino->i_mtime.tv_sec & 1) {
1245 ino->i_mtime.tv_sec--;
1246 ino->i_mtime.tv_nsec = 0;
1247 }
1248 if (ino->i_atime.tv_sec & 1) {
1249 ino->i_atime.tv_sec--;
1250 ino->i_atime.tv_nsec = 0;
1251 }
1252 /*
1253 * If the file is open with write permissions,
1254 * update the time stamps to sync mtime and atime.
1255 */
1256 if ((server->opt.capabilities & SMB_CAP_UNIX) == 0 &&
1257 (server->opt.protocol >= SMB_PROTOCOL_LANMAN2) &&
1258 !(ei->access == SMB_O_RDONLY))
1259 {
1260 struct smb_fattr fattr;
1261 smb_get_inode_attr(ino, &fattr);
1262 smb_proc_setattr_ext(server, ino, &fattr);
1263 }
1264
1265 result = smb_proc_close(server, ei->fileid, ino->i_mtime.tv_sec);
1266 /*
1267 * Force a revalidation after closing ... some servers
1268 * don't post the size until the file has been closed.
1269 */
1270 if (server->opt.protocol < SMB_PROTOCOL_NT1)
1271 ei->oldmtime = 0;
1272 ei->closed = jiffies;
1273 }
1274 return result;
1275}
1276
1277int
1278smb_close(struct inode *ino)
1279{
1280 int result = 0;
1281
1282 if (smb_is_open(ino)) {
1283 struct smb_sb_info *server = server_from_inode(ino);
1284 result = smb_proc_close_inode(server, ino);
1285 }
1286 return result;
1287}
1288
1289/*
1290 * This is used to close a file following a failed instantiate.
1291 * Since we don't have an inode, we can't use any of the above.
1292 */
1293int
1294smb_close_fileid(struct dentry *dentry, __u16 fileid)
1295{
1296 struct smb_sb_info *server = server_from_dentry(dentry);
1297 int result;
1298
1299 result = smb_proc_close(server, fileid, get_seconds());
1300 return result;
1301}
1302
1303/* In smb_proc_read and smb_proc_write we do not retry, because the
1304 file-id would not be valid after a reconnection. */
1305
1306static void
1307smb_proc_read_data(struct smb_request *req)
1308{
1309 req->rq_iov[0].iov_base = req->rq_buffer;
1310 req->rq_iov[0].iov_len = 3;
1311
1312 req->rq_iov[1].iov_base = req->rq_page;
1313 req->rq_iov[1].iov_len = req->rq_rsize;
1314 req->rq_iovlen = 2;
1315
1316 req->rq_rlen = smb_len(req->rq_header) + 4 - req->rq_bytes_recvd;
1317}
1318
1319static int
1320smb_proc_read(struct inode *inode, loff_t offset, int count, char *data)
1321{
1322 struct smb_sb_info *server = server_from_inode(inode);
1323 __u16 returned_count, data_len;
1324 unsigned char *buf;
1325 int result;
1326 struct smb_request *req;
1327 u8 rbuf[4];
1328
1329 result = -ENOMEM;
1330 if (! (req = smb_alloc_request(server, 0)))
1331 goto out;
1332
1333 smb_setup_header(req, SMBread, 5, 0);
1334 buf = req->rq_header;
1335 WSET(buf, smb_vwv0, SMB_I(inode)->fileid);
1336 WSET(buf, smb_vwv1, count);
1337 DSET(buf, smb_vwv2, offset);
1338 WSET(buf, smb_vwv4, 0);
1339
1340 req->rq_page = data;
1341 req->rq_rsize = count;
1342 req->rq_callback = smb_proc_read_data;
1343 req->rq_buffer = rbuf;
1344 req->rq_flags |= SMB_REQ_NORETRY | SMB_REQ_STATIC;
1345
1346 result = smb_request_ok(req, SMBread, 5, -1);
1347 if (result < 0)
1348 goto out_free;
1349 returned_count = WVAL(req->rq_header, smb_vwv0);
1350
1351 data_len = WVAL(rbuf, 1);
1352
1353 if (returned_count != data_len) {
1354 printk(KERN_NOTICE "smb_proc_read: returned != data_len\n");
1355 printk(KERN_NOTICE "smb_proc_read: ret_c=%d, data_len=%d\n",
1356 returned_count, data_len);
1357 }
1358 result = data_len;
1359
1360out_free:
1361 smb_rput(req);
1362out:
1363 VERBOSE("ino=%ld, fileid=%d, count=%d, result=%d\n",
1364 inode->i_ino, SMB_I(inode)->fileid, count, result);
1365 return result;
1366}
1367
1368static int
1369smb_proc_write(struct inode *inode, loff_t offset, int count, const char *data)
1370{
1371 struct smb_sb_info *server = server_from_inode(inode);
1372 int result;
1373 u16 fileid = SMB_I(inode)->fileid;
1374 u8 buf[4];
1375 struct smb_request *req;
1376
1377 result = -ENOMEM;
1378 if (! (req = smb_alloc_request(server, 0)))
1379 goto out;
1380
1381 VERBOSE("ino=%ld, fileid=%d, count=%d@%Ld\n",
1382 inode->i_ino, fileid, count, offset);
1383
1384 smb_setup_header(req, SMBwrite, 5, count + 3);
1385 WSET(req->rq_header, smb_vwv0, fileid);
1386 WSET(req->rq_header, smb_vwv1, count);
1387 DSET(req->rq_header, smb_vwv2, offset);
1388 WSET(req->rq_header, smb_vwv4, 0);
1389
1390 buf[0] = 1;
1391 WSET(buf, 1, count); /* yes, again ... */
1392 req->rq_iov[1].iov_base = buf;
1393 req->rq_iov[1].iov_len = 3;
1394 req->rq_iov[2].iov_base = (char *) data;
1395 req->rq_iov[2].iov_len = count;
1396 req->rq_iovlen = 3;
1397 req->rq_flags |= SMB_REQ_NORETRY;
1398
1399 result = smb_request_ok(req, SMBwrite, 1, 0);
1400 if (result >= 0)
1401 result = WVAL(req->rq_header, smb_vwv0);
1402
1403 smb_rput(req);
1404out:
1405 return result;
1406}
1407
1408/*
1409 * In smb_proc_readX and smb_proc_writeX we do not retry, because the
1410 * file-id would not be valid after a reconnection.
1411 */
1412
1413#define SMB_READX_MAX_PAD 64
1414static void
1415smb_proc_readX_data(struct smb_request *req)
1416{
1417 /* header length, excluding the netbios length (-4) */
1418 int hdrlen = SMB_HEADER_LEN + req->rq_resp_wct*2 - 2;
1419 int data_off = WVAL(req->rq_header, smb_vwv6);
1420
1421 /*
1422 * Some genius made the padding to the data bytes arbitrary.
1423 * So we must first calculate the amount of padding used by the server.
1424 */
1425 data_off -= hdrlen;
1426 if (data_off > SMB_READX_MAX_PAD || data_off < 0) {
1427 PARANOIA("offset is larger than SMB_READX_MAX_PAD or negative!\n");
1428 PARANOIA("%d > %d || %d < 0\n", data_off, SMB_READX_MAX_PAD, data_off);
1429 req->rq_rlen = req->rq_bufsize + 1;
1430 return;
1431 }
1432 req->rq_iov[0].iov_base = req->rq_buffer;
1433 req->rq_iov[0].iov_len = data_off;
1434
1435 req->rq_iov[1].iov_base = req->rq_page;
1436 req->rq_iov[1].iov_len = req->rq_rsize;
1437 req->rq_iovlen = 2;
1438
1439 req->rq_rlen = smb_len(req->rq_header) + 4 - req->rq_bytes_recvd;
1440}
1441
1442static int
1443smb_proc_readX(struct inode *inode, loff_t offset, int count, char *data)
1444{
1445 struct smb_sb_info *server = server_from_inode(inode);
1446 unsigned char *buf;
1447 int result;
1448 struct smb_request *req;
1449 static char pad[SMB_READX_MAX_PAD];
1450
1451 result = -ENOMEM;
1452 if (! (req = smb_alloc_request(server, 0)))
1453 goto out;
1454
1455 smb_setup_header(req, SMBreadX, 12, 0);
1456 buf = req->rq_header;
1457 WSET(buf, smb_vwv0, 0x00ff);
1458 WSET(buf, smb_vwv1, 0);
1459 WSET(buf, smb_vwv2, SMB_I(inode)->fileid);
1460 DSET(buf, smb_vwv3, (u32)offset); /* low 32 bits */
1461 WSET(buf, smb_vwv5, count);
1462 WSET(buf, smb_vwv6, 0);
1463 DSET(buf, smb_vwv7, 0);
1464 WSET(buf, smb_vwv9, 0);
1465 DSET(buf, smb_vwv10, (u32)(offset >> 32)); /* high 32 bits */
1466 WSET(buf, smb_vwv11, 0);
1467
1468 req->rq_page = data;
1469 req->rq_rsize = count;
1470 req->rq_callback = smb_proc_readX_data;
1471 req->rq_buffer = pad;
1472 req->rq_bufsize = SMB_READX_MAX_PAD;
1473 req->rq_flags |= SMB_REQ_STATIC | SMB_REQ_NORETRY;
1474
1475 result = smb_request_ok(req, SMBreadX, 12, -1);
1476 if (result < 0)
1477 goto out_free;
1478 result = WVAL(req->rq_header, smb_vwv5);
1479
1480out_free:
1481 smb_rput(req);
1482out:
1483 VERBOSE("ino=%ld, fileid=%d, count=%d, result=%d\n",
1484 inode->i_ino, SMB_I(inode)->fileid, count, result);
1485 return result;
1486}
1487
1488static int
1489smb_proc_writeX(struct inode *inode, loff_t offset, int count, const char *data)
1490{
1491 struct smb_sb_info *server = server_from_inode(inode);
1492 int result;
1493 u8 *p;
1494 static u8 pad[4];
1495 struct smb_request *req;
1496
1497 result = -ENOMEM;
1498 if (! (req = smb_alloc_request(server, 0)))
1499 goto out;
1500
1501 VERBOSE("ino=%ld, fileid=%d, count=%d@%Ld\n",
1502 inode->i_ino, SMB_I(inode)->fileid, count, offset);
1503
1504 p = smb_setup_header(req, SMBwriteX, 14, count + 1);
1505 WSET(req->rq_header, smb_vwv0, 0x00ff);
1506 WSET(req->rq_header, smb_vwv1, 0);
1507 WSET(req->rq_header, smb_vwv2, SMB_I(inode)->fileid);
1508 DSET(req->rq_header, smb_vwv3, (u32)offset); /* low 32 bits */
1509 DSET(req->rq_header, smb_vwv5, 0);
1510 WSET(req->rq_header, smb_vwv7, 0); /* write mode */
1511 WSET(req->rq_header, smb_vwv8, 0);
1512 WSET(req->rq_header, smb_vwv9, 0);
1513 WSET(req->rq_header, smb_vwv10, count); /* data length */
1514 WSET(req->rq_header, smb_vwv11, smb_vwv12 + 2 + 1);
1515 DSET(req->rq_header, smb_vwv12, (u32)(offset >> 32));
1516
1517 req->rq_iov[1].iov_base = pad;
1518 req->rq_iov[1].iov_len = 1;
1519 req->rq_iov[2].iov_base = (char *) data;
1520 req->rq_iov[2].iov_len = count;
1521 req->rq_iovlen = 3;
1522 req->rq_flags |= SMB_REQ_NORETRY;
1523
1524 result = smb_request_ok(req, SMBwriteX, 6, 0);
1525 if (result >= 0)
1526 result = WVAL(req->rq_header, smb_vwv2);
1527
1528 smb_rput(req);
1529out:
1530 return result;
1531}
1532
1533int
1534smb_proc_create(struct dentry *dentry, __u16 attr, time_t ctime, __u16 *fileid)
1535{
1536 struct smb_sb_info *server = server_from_dentry(dentry);
1537 char *p;
1538 int result;
1539 struct smb_request *req;
1540
1541 result = -ENOMEM;
1542 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
1543 goto out;
1544
1545 p = smb_setup_header(req, SMBcreate, 3, 0);
1546 WSET(req->rq_header, smb_vwv0, attr);
1547 DSET(req->rq_header, smb_vwv1, utc2local(server, ctime));
1548 result = smb_simple_encode_path(req, &p, dentry, NULL);
1549 if (result < 0)
1550 goto out_free;
1551 smb_setup_bcc(req, p);
1552
1553 result = smb_request_ok(req, SMBcreate, 1, 0);
1554 if (result < 0)
1555 goto out_free;
1556
1557 *fileid = WVAL(req->rq_header, smb_vwv0);
1558 result = 0;
1559
1560out_free:
1561 smb_rput(req);
1562out:
1563 return result;
1564}
1565
1566int
1567smb_proc_mv(struct dentry *old_dentry, struct dentry *new_dentry)
1568{
1569 struct smb_sb_info *server = server_from_dentry(old_dentry);
1570 char *p;
1571 int result;
1572 struct smb_request *req;
1573
1574 result = -ENOMEM;
1575 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
1576 goto out;
1577
1578 p = smb_setup_header(req, SMBmv, 1, 0);
1579 WSET(req->rq_header, smb_vwv0, aSYSTEM | aHIDDEN | aDIR);
1580 result = smb_simple_encode_path(req, &p, old_dentry, NULL);
1581 if (result < 0)
1582 goto out_free;
1583 result = smb_simple_encode_path(req, &p, new_dentry, NULL);
1584 if (result < 0)
1585 goto out_free;
1586 smb_setup_bcc(req, p);
1587
1588 if ((result = smb_request_ok(req, SMBmv, 0, 0)) < 0)
1589 goto out_free;
1590 result = 0;
1591
1592out_free:
1593 smb_rput(req);
1594out:
1595 return result;
1596}
1597
1598/*
1599 * Code common to mkdir and rmdir.
1600 */
1601static int
1602smb_proc_generic_command(struct dentry *dentry, __u8 command)
1603{
1604 struct smb_sb_info *server = server_from_dentry(dentry);
1605 char *p;
1606 int result;
1607 struct smb_request *req;
1608
1609 result = -ENOMEM;
1610 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
1611 goto out;
1612
1613 p = smb_setup_header(req, command, 0, 0);
1614 result = smb_simple_encode_path(req, &p, dentry, NULL);
1615 if (result < 0)
1616 goto out_free;
1617 smb_setup_bcc(req, p);
1618
1619 result = smb_request_ok(req, command, 0, 0);
1620 if (result < 0)
1621 goto out_free;
1622 result = 0;
1623
1624out_free:
1625 smb_rput(req);
1626out:
1627 return result;
1628}
1629
1630int
1631smb_proc_mkdir(struct dentry *dentry)
1632{
1633 return smb_proc_generic_command(dentry, SMBmkdir);
1634}
1635
1636int
1637smb_proc_rmdir(struct dentry *dentry)
1638{
1639 return smb_proc_generic_command(dentry, SMBrmdir);
1640}
1641
1642#if SMBFS_POSIX_UNLINK
1643/*
1644 * Removes readonly attribute from a file. Used by unlink to give posix
1645 * semantics.
1646 */
1647static int
1648smb_set_rw(struct dentry *dentry,struct smb_sb_info *server)
1649{
1650 int result;
1651 struct smb_fattr fattr;
1652
1653 /* FIXME: cifsUE should allow removing a readonly file. */
1654
1655 /* first get current attribute */
1656 smb_init_dirent(server, &fattr);
1657 result = server->ops->getattr(server, dentry, &fattr);
1658 smb_finish_dirent(server, &fattr);
1659 if (result < 0)
1660 return result;
1661
1662 /* if RONLY attribute is set, remove it */
1663 if (fattr.attr & aRONLY) { /* read only attribute is set */
1664 fattr.attr &= ~aRONLY;
1665 result = smb_proc_setattr_core(server, dentry, fattr.attr);
1666 }
1667 return result;
1668}
1669#endif
1670
1671int
1672smb_proc_unlink(struct dentry *dentry)
1673{
1674 struct smb_sb_info *server = server_from_dentry(dentry);
1675 int flag = 0;
1676 char *p;
1677 int result;
1678 struct smb_request *req;
1679
1680 result = -ENOMEM;
1681 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
1682 goto out;
1683
1684 retry:
1685 p = smb_setup_header(req, SMBunlink, 1, 0);
1686 WSET(req->rq_header, smb_vwv0, aSYSTEM | aHIDDEN);
1687 result = smb_simple_encode_path(req, &p, dentry, NULL);
1688 if (result < 0)
1689 goto out_free;
1690 smb_setup_bcc(req, p);
1691
1692 if ((result = smb_request_ok(req, SMBunlink, 0, 0)) < 0) {
1693#if SMBFS_POSIX_UNLINK
1694 if (result == -EACCES && !flag) {
1695 /* Posix semantics is for the read-only state
1696 of a file to be ignored in unlink(). In the
1697 SMB world a unlink() is refused on a
1698 read-only file. To make things easier for
1699 unix users we try to override the files
1700 permission if the unlink fails with the
1701 right error.
1702 This introduces a race condition that could
1703 lead to a file being written by someone who
1704 shouldn't have access, but as far as I can
1705 tell that is unavoidable */
1706
1707 /* remove RONLY attribute and try again */
1708 result = smb_set_rw(dentry,server);
1709 if (result == 0) {
1710 flag = 1;
1711 req->rq_flags = 0;
1712 goto retry;
1713 }
1714 }
1715#endif
1716 goto out_free;
1717 }
1718 result = 0;
1719
1720out_free:
1721 smb_rput(req);
1722out:
1723 return result;
1724}
1725
1726int
1727smb_proc_flush(struct smb_sb_info *server, __u16 fileid)
1728{
1729 int result;
1730 struct smb_request *req;
1731
1732 result = -ENOMEM;
1733 if (! (req = smb_alloc_request(server, 0)))
1734 goto out;
1735
1736 smb_setup_header(req, SMBflush, 1, 0);
1737 WSET(req->rq_header, smb_vwv0, fileid);
1738 req->rq_flags |= SMB_REQ_NORETRY;
1739 result = smb_request_ok(req, SMBflush, 0, 0);
1740
1741 smb_rput(req);
1742out:
1743 return result;
1744}
1745
1746static int
1747smb_proc_trunc32(struct inode *inode, loff_t length)
1748{
1749 /*
1750 * Writing 0bytes is old-SMB magic for truncating files.
1751 * MAX_NON_LFS should prevent this from being called with a too
1752 * large offset.
1753 */
1754 return smb_proc_write(inode, length, 0, NULL);
1755}
1756
1757static int
1758smb_proc_trunc64(struct inode *inode, loff_t length)
1759{
1760 struct smb_sb_info *server = server_from_inode(inode);
1761 int result;
1762 char *param;
1763 char *data;
1764 struct smb_request *req;
1765
1766 result = -ENOMEM;
1767 if (! (req = smb_alloc_request(server, 14)))
1768 goto out;
1769
1770 param = req->rq_buffer;
1771 data = req->rq_buffer + 6;
1772
1773 /* FIXME: must we also set allocation size? winNT seems to do that */
1774 WSET(param, 0, SMB_I(inode)->fileid);
1775 WSET(param, 2, SMB_SET_FILE_END_OF_FILE_INFO);
1776 WSET(param, 4, 0);
1777 LSET(data, 0, length);
1778
1779 req->rq_trans2_command = TRANSACT2_SETFILEINFO;
1780 req->rq_ldata = 8;
1781 req->rq_data = data;
1782 req->rq_lparm = 6;
1783 req->rq_parm = param;
1784 req->rq_flags |= SMB_REQ_NORETRY;
1785 result = smb_add_request(req);
1786 if (result < 0)
1787 goto out_free;
1788
1789 result = 0;
1790 if (req->rq_rcls != 0)
1791 result = smb_errno(req);
1792
1793out_free:
1794 smb_rput(req);
1795out:
1796 return result;
1797}
1798
1799static int
1800smb_proc_trunc95(struct inode *inode, loff_t length)
1801{
1802 struct smb_sb_info *server = server_from_inode(inode);
1803 int result = smb_proc_trunc32(inode, length);
1804
1805 /*
1806 * win9x doesn't appear to update the size immediately.
1807 * It will return the old file size after the truncate,
1808 * confusing smbfs. So we force an update.
1809 *
1810 * FIXME: is this still necessary?
1811 */
1812 smb_proc_flush(server, SMB_I(inode)->fileid);
1813 return result;
1814}
1815
1816static void
1817smb_init_dirent(struct smb_sb_info *server, struct smb_fattr *fattr)
1818{
1819 memset(fattr, 0, sizeof(*fattr));
1820
1821 fattr->f_nlink = 1;
1822 fattr->f_uid = server->mnt->uid;
1823 fattr->f_gid = server->mnt->gid;
1824 fattr->f_unix = 0;
1825}
1826
1827static void
1828smb_finish_dirent(struct smb_sb_info *server, struct smb_fattr *fattr)
1829{
1830 if (fattr->f_unix)
1831 return;
1832
1833 fattr->f_mode = server->mnt->file_mode;
1834 if (fattr->attr & aDIR) {
1835 fattr->f_mode = server->mnt->dir_mode;
1836 fattr->f_size = SMB_ST_BLKSIZE;
1837 }
1838 /* Check the read-only flag */
1839 if (fattr->attr & aRONLY)
1840 fattr->f_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH);
1841
1842 /* How many 512 byte blocks do we need for this file? */
1843 fattr->f_blocks = 0;
1844 if (fattr->f_size != 0)
1845 fattr->f_blocks = 1 + ((fattr->f_size-1) >> 9);
1846 return;
1847}
1848
1849void
1850smb_init_root_dirent(struct smb_sb_info *server, struct smb_fattr *fattr,
1851 struct super_block *sb)
1852{
1853 smb_init_dirent(server, fattr);
1854 fattr->attr = aDIR;
1855 fattr->f_ino = 2; /* traditional root inode number */
1856 fattr->f_mtime = current_fs_time(sb);
1857 smb_finish_dirent(server, fattr);
1858}
1859
1860/*
1861 * Decode a dirent for old protocols
1862 *
1863 * qname is filled with the decoded, and possibly translated, name.
1864 * fattr receives decoded attributes
1865 *
1866 * Bugs Noted:
1867 * (1) Pathworks servers may pad the name with extra spaces.
1868 */
1869static char *
1870smb_decode_short_dirent(struct smb_sb_info *server, char *p,
1871 struct qstr *qname, struct smb_fattr *fattr,
1872 unsigned char *name_buf)
1873{
1874 int len;
1875
1876 /*
1877 * SMB doesn't have a concept of inode numbers ...
1878 */
1879 smb_init_dirent(server, fattr);
1880 fattr->f_ino = 0; /* FIXME: do we need this? */
1881
1882 p += SMB_STATUS_SIZE; /* reserved (search_status) */
1883 fattr->attr = *p;
1884 fattr->f_mtime.tv_sec = date_dos2unix(server, WVAL(p, 3), WVAL(p, 1));
1885 fattr->f_mtime.tv_nsec = 0;
1886 fattr->f_size = DVAL(p, 5);
1887 fattr->f_ctime = fattr->f_mtime;
1888 fattr->f_atime = fattr->f_mtime;
1889 qname->name = p + 9;
1890 len = strnlen(qname->name, 12);
1891
1892 /*
1893 * Trim trailing blanks for Pathworks servers
1894 */
1895 while (len > 2 && qname->name[len-1] == ' ')
1896 len--;
1897
1898 smb_finish_dirent(server, fattr);
1899
1900#if 0
1901 /* FIXME: These only work for ascii chars, and recent smbmount doesn't
1902 allow the flag to be set anyway. It kills const. Remove? */
1903 switch (server->opt.case_handling) {
1904 case SMB_CASE_UPPER:
1905 str_upper(entry->name, len);
1906 break;
1907 case SMB_CASE_LOWER:
1908 str_lower(entry->name, len);
1909 break;
1910 default:
1911 break;
1912 }
1913#endif
1914
1915 qname->len = 0;
1916 len = server->ops->convert(name_buf, SMB_MAXNAMELEN,
1917 qname->name, len,
1918 server->remote_nls, server->local_nls);
1919 if (len > 0) {
1920 qname->len = len;
1921 qname->name = name_buf;
1922 DEBUG1("len=%d, name=%.*s\n",qname->len,qname->len,qname->name);
1923 }
1924
1925 return p + 22;
1926}
1927
1928/*
1929 * This routine is used to read in directory entries from the network.
1930 * Note that it is for short directory name seeks, i.e.: protocol <
1931 * SMB_PROTOCOL_LANMAN2
1932 */
1933static int
1934smb_proc_readdir_short(struct file *filp, void *dirent, filldir_t filldir,
1935 struct smb_cache_control *ctl)
1936{
1937 struct dentry *dir = filp->f_path.dentry;
1938 struct smb_sb_info *server = server_from_dentry(dir);
1939 struct qstr qname;
1940 struct smb_fattr fattr;
1941 char *p;
1942 int result;
1943 int i, first, entries_seen, entries;
1944 int entries_asked = (server->opt.max_xmit - 100) / SMB_DIRINFO_SIZE;
1945 __u16 bcc;
1946 __u16 count;
1947 char status[SMB_STATUS_SIZE];
1948 static struct qstr mask = {
1949 .name = "*.*",
1950 .len = 3,
1951 };
1952 unsigned char *last_status;
1953 struct smb_request *req;
1954 unsigned char *name_buf;
1955
1956 VERBOSE("%s/%s\n", DENTRY_PATH(dir));
1957
1958 lock_kernel();
1959
1960 result = -ENOMEM;
1961 if (! (name_buf = kmalloc(SMB_MAXNAMELEN, GFP_KERNEL)))
1962 goto out;
1963
1964 first = 1;
1965 entries = 0;
1966 entries_seen = 2; /* implicit . and .. */
1967
1968 result = -ENOMEM;
1969 if (! (req = smb_alloc_request(server, server->opt.max_xmit)))
1970 goto out_name;
1971
1972 while (1) {
1973 p = smb_setup_header(req, SMBsearch, 2, 0);
1974 WSET(req->rq_header, smb_vwv0, entries_asked);
1975 WSET(req->rq_header, smb_vwv1, aDIR);
1976 if (first == 1) {
1977 result = smb_simple_encode_path(req, &p, dir, &mask);
1978 if (result < 0)
1979 goto out_free;
1980 if (p + 3 > (char *)req->rq_buffer + req->rq_bufsize) {
1981 result = -ENAMETOOLONG;
1982 goto out_free;
1983 }
1984 *p++ = 5;
1985 WSET(p, 0, 0);
1986 p += 2;
1987 first = 0;
1988 } else {
1989 if (p + 5 + SMB_STATUS_SIZE >
1990 (char *)req->rq_buffer + req->rq_bufsize) {
1991 result = -ENAMETOOLONG;
1992 goto out_free;
1993 }
1994
1995 *p++ = 4;
1996 *p++ = 0;
1997 *p++ = 5;
1998 WSET(p, 0, SMB_STATUS_SIZE);
1999 p += 2;
2000 memcpy(p, status, SMB_STATUS_SIZE);
2001 p += SMB_STATUS_SIZE;
2002 }
2003
2004 smb_setup_bcc(req, p);
2005
2006 result = smb_request_ok(req, SMBsearch, 1, -1);
2007 if (result < 0) {
2008 if ((req->rq_rcls == ERRDOS) &&
2009 (req->rq_err == ERRnofiles))
2010 break;
2011 goto out_free;
2012 }
2013 count = WVAL(req->rq_header, smb_vwv0);
2014 if (count <= 0)
2015 break;
2016
2017 result = -EIO;
2018 bcc = smb_bcc(req->rq_header);
2019 if (bcc != count * SMB_DIRINFO_SIZE + 3)
2020 goto out_free;
2021 p = req->rq_buffer + 3;
2022
2023
2024 /* Make sure the response fits in the buffer. Fixed sized
2025 entries means we don't have to check in the decode loop. */
2026
2027 last_status = req->rq_buffer + 3 + (count-1) * SMB_DIRINFO_SIZE;
2028
2029 if (last_status + SMB_DIRINFO_SIZE >=
2030 req->rq_buffer + req->rq_bufsize) {
2031 printk(KERN_ERR "smb_proc_readdir_short: "
2032 "last dir entry outside buffer! "
2033 "%d@%p %d@%p\n", SMB_DIRINFO_SIZE, last_status,
2034 req->rq_bufsize, req->rq_buffer);
2035 goto out_free;
2036 }
2037
2038 /* Read the last entry into the status field. */
2039 memcpy(status, last_status, SMB_STATUS_SIZE);
2040
2041
2042 /* Now we are ready to parse smb directory entries. */
2043
2044 for (i = 0; i < count; i++) {
2045 p = smb_decode_short_dirent(server, p,
2046 &qname, &fattr, name_buf);
2047 if (qname.len == 0)
2048 continue;
2049
2050 if (entries_seen == 2 && qname.name[0] == '.') {
2051 if (qname.len == 1)
2052 continue;
2053 if (qname.name[1] == '.' && qname.len == 2)
2054 continue;
2055 }
2056 if (!smb_fill_cache(filp, dirent, filldir, ctl,
2057 &qname, &fattr))
2058 ; /* stop reading? */
2059 entries_seen++;
2060 }
2061 }
2062 result = entries;
2063
2064out_free:
2065 smb_rput(req);
2066out_name:
2067 kfree(name_buf);
2068out:
2069 unlock_kernel();
2070 return result;
2071}
2072
2073static void smb_decode_unix_basic(struct smb_fattr *fattr, struct smb_sb_info *server, char *p)
2074{
2075 u64 size, disk_bytes;
2076
2077 /* FIXME: verify nls support. all is sent as utf8? */
2078
2079 fattr->f_unix = 1;
2080 fattr->f_mode = 0;
2081
2082 /* FIXME: use the uniqueID from the remote instead? */
2083 /* 0 L file size in bytes */
2084 /* 8 L file size on disk in bytes (block count) */
2085 /* 40 L uid */
2086 /* 48 L gid */
2087 /* 56 W file type */
2088 /* 60 L devmajor */
2089 /* 68 L devminor */
2090 /* 76 L unique ID (inode) */
2091 /* 84 L permissions */
2092 /* 92 L link count */
2093
2094 size = LVAL(p, 0);
2095 disk_bytes = LVAL(p, 8);
2096
2097 /*
2098 * Some samba versions round up on-disk byte usage
2099 * to 1MB boundaries, making it useless. When seeing
2100 * that, use the size instead.
2101 */
2102 if (!(disk_bytes & 0xfffff))
2103 disk_bytes = size+511;
2104
2105 fattr->f_size = size;
2106 fattr->f_blocks = disk_bytes >> 9;
2107 fattr->f_ctime = smb_ntutc2unixutc(LVAL(p, 16));
2108 fattr->f_atime = smb_ntutc2unixutc(LVAL(p, 24));
2109 fattr->f_mtime = smb_ntutc2unixutc(LVAL(p, 32));
2110
2111 if (server->mnt->flags & SMB_MOUNT_UID)
2112 fattr->f_uid = server->mnt->uid;
2113 else
2114 fattr->f_uid = LVAL(p, 40);
2115
2116 if (server->mnt->flags & SMB_MOUNT_GID)
2117 fattr->f_gid = server->mnt->gid;
2118 else
2119 fattr->f_gid = LVAL(p, 48);
2120
2121 fattr->f_mode |= smb_filetype_to_mode(WVAL(p, 56));
2122
2123 if (S_ISBLK(fattr->f_mode) || S_ISCHR(fattr->f_mode)) {
2124 __u64 major = LVAL(p, 60);
2125 __u64 minor = LVAL(p, 68);
2126
2127 fattr->f_rdev = MKDEV(major & 0xffffffff, minor & 0xffffffff);
2128 if (MAJOR(fattr->f_rdev) != (major & 0xffffffff) ||
2129 MINOR(fattr->f_rdev) != (minor & 0xffffffff))
2130 fattr->f_rdev = 0;
2131 }
2132
2133 fattr->f_mode |= LVAL(p, 84);
2134
2135 if ( (server->mnt->flags & SMB_MOUNT_DMODE) &&
2136 (S_ISDIR(fattr->f_mode)) )
2137 fattr->f_mode = (server->mnt->dir_mode & S_IRWXUGO) | S_IFDIR;
2138 else if ( (server->mnt->flags & SMB_MOUNT_FMODE) &&
2139 !(S_ISDIR(fattr->f_mode)) )
2140 fattr->f_mode = (server->mnt->file_mode & S_IRWXUGO) |
2141 (fattr->f_mode & S_IFMT);
2142
2143}
2144
2145/*
2146 * Interpret a long filename structure using the specified info level:
2147 * level 1 for anything below NT1 protocol
2148 * level 260 for NT1 protocol
2149 *
2150 * qname is filled with the decoded, and possibly translated, name
2151 * fattr receives decoded attributes.
2152 *
2153 * Bugs Noted:
2154 * (1) Win NT 4.0 appends a null byte to names and counts it in the length!
2155 */
2156static char *
2157smb_decode_long_dirent(struct smb_sb_info *server, char *p, int level,
2158 struct qstr *qname, struct smb_fattr *fattr,
2159 unsigned char *name_buf)
2160{
2161 char *result;
2162 unsigned int len = 0;
2163 int n;
2164 __u16 date, time;
2165 int unicode = (server->mnt->flags & SMB_MOUNT_UNICODE);
2166
2167 /*
2168 * SMB doesn't have a concept of inode numbers ...
2169 */
2170 smb_init_dirent(server, fattr);
2171 fattr->f_ino = 0; /* FIXME: do we need this? */
2172
2173 switch (level) {
2174 case 1:
2175 len = *((unsigned char *) p + 22);
2176 qname->name = p + 23;
2177 result = p + 24 + len;
2178
2179 date = WVAL(p, 0);
2180 time = WVAL(p, 2);
2181 fattr->f_ctime.tv_sec = date_dos2unix(server, date, time);
2182 fattr->f_ctime.tv_nsec = 0;
2183
2184 date = WVAL(p, 4);
2185 time = WVAL(p, 6);
2186 fattr->f_atime.tv_sec = date_dos2unix(server, date, time);
2187 fattr->f_atime.tv_nsec = 0;
2188
2189 date = WVAL(p, 8);
2190 time = WVAL(p, 10);
2191 fattr->f_mtime.tv_sec = date_dos2unix(server, date, time);
2192 fattr->f_mtime.tv_nsec = 0;
2193 fattr->f_size = DVAL(p, 12);
2194 /* ULONG allocation size */
2195 fattr->attr = WVAL(p, 20);
2196
2197 VERBOSE("info 1 at %p, len=%d, name=%.*s\n",
2198 p, len, len, qname->name);
2199 break;
2200 case 260:
2201 result = p + WVAL(p, 0);
2202 len = DVAL(p, 60);
2203 if (len > 255) len = 255;
2204 /* NT4 null terminates, unless we are using unicode ... */
2205 qname->name = p + 94;
2206 if (!unicode && len && qname->name[len-1] == '\0')
2207 len--;
2208
2209 fattr->f_ctime = smb_ntutc2unixutc(LVAL(p, 8));
2210 fattr->f_atime = smb_ntutc2unixutc(LVAL(p, 16));
2211 fattr->f_mtime = smb_ntutc2unixutc(LVAL(p, 24));
2212 /* change time (32) */
2213 fattr->f_size = LVAL(p, 40);
2214 /* alloc size (48) */
2215 fattr->attr = DVAL(p, 56);
2216
2217 VERBOSE("info 260 at %p, len=%d, name=%.*s\n",
2218 p, len, len, qname->name);
2219 break;
2220 case SMB_FIND_FILE_UNIX:
2221 result = p + WVAL(p, 0);
2222 qname->name = p + 108;
2223
2224 len = strlen(qname->name);
2225 /* FIXME: should we check the length?? */
2226
2227 p += 8;
2228 smb_decode_unix_basic(fattr, server, p);
2229 VERBOSE("info SMB_FIND_FILE_UNIX at %p, len=%d, name=%.*s\n",
2230 p, len, len, qname->name);
2231 break;
2232 default:
2233 PARANOIA("Unknown info level %d\n", level);
2234 result = p + WVAL(p, 0);
2235 goto out;
2236 }
2237
2238 smb_finish_dirent(server, fattr);
2239
2240#if 0
2241 /* FIXME: These only work for ascii chars, and recent smbmount doesn't
2242 allow the flag to be set anyway. Remove? */
2243 switch (server->opt.case_handling) {
2244 case SMB_CASE_UPPER:
2245 str_upper(qname->name, len);
2246 break;
2247 case SMB_CASE_LOWER:
2248 str_lower(qname->name, len);
2249 break;
2250 default:
2251 break;
2252 }
2253#endif
2254
2255 qname->len = 0;
2256 n = server->ops->convert(name_buf, SMB_MAXNAMELEN,
2257 qname->name, len,
2258 server->remote_nls, server->local_nls);
2259 if (n > 0) {
2260 qname->len = n;
2261 qname->name = name_buf;
2262 }
2263
2264out:
2265 return result;
2266}
2267
2268/* findfirst/findnext flags */
2269#define SMB_CLOSE_AFTER_FIRST (1<<0)
2270#define SMB_CLOSE_IF_END (1<<1)
2271#define SMB_REQUIRE_RESUME_KEY (1<<2)
2272#define SMB_CONTINUE_BIT (1<<3)
2273
2274/*
2275 * Note: samba-2.0.7 (at least) has a very similar routine, cli_list, in
2276 * source/libsmb/clilist.c. When looking for smb bugs in the readdir code,
2277 * go there for advise.
2278 *
2279 * Bugs Noted:
2280 * (1) When using Info Level 1 Win NT 4.0 truncates directory listings
2281 * for certain patterns of names and/or lengths. The breakage pattern
2282 * is completely reproducible and can be toggled by the creation of a
2283 * single file. (E.g. echo hi >foo breaks, rm -f foo works.)
2284 */
2285static int
2286smb_proc_readdir_long(struct file *filp, void *dirent, filldir_t filldir,
2287 struct smb_cache_control *ctl)
2288{
2289 struct dentry *dir = filp->f_path.dentry;
2290 struct smb_sb_info *server = server_from_dentry(dir);
2291 struct qstr qname;
2292 struct smb_fattr fattr;
2293
2294 unsigned char *p, *lastname;
2295 char *mask, *param;
2296 __u16 command;
2297 int first, entries_seen;
2298
2299 /* Both NT and OS/2 accept info level 1 (but see note below). */
2300 int info_level = 260;
2301 const int max_matches = 512;
2302
2303 unsigned int ff_searchcount = 0;
2304 unsigned int ff_eos = 0;
2305 unsigned int ff_lastname = 0;
2306 unsigned int ff_dir_handle = 0;
2307 unsigned int loop_count = 0;
2308 unsigned int mask_len, i;
2309 int result;
2310 struct smb_request *req;
2311 unsigned char *name_buf;
2312 static struct qstr star = {
2313 .name = "*",
2314 .len = 1,
2315 };
2316
2317 lock_kernel();
2318
2319 /*
2320 * We always prefer unix style. Use info level 1 for older
2321 * servers that don't do 260.
2322 */
2323 if (server->opt.capabilities & SMB_CAP_UNIX)
2324 info_level = SMB_FIND_FILE_UNIX;
2325 else if (server->opt.protocol < SMB_PROTOCOL_NT1)
2326 info_level = 1;
2327
2328 result = -ENOMEM;
2329 if (! (name_buf = kmalloc(SMB_MAXNAMELEN+2, GFP_KERNEL)))
2330 goto out;
2331 if (! (req = smb_alloc_request(server, server->opt.max_xmit)))
2332 goto out_name;
2333 param = req->rq_buffer;
2334
2335 /*
2336 * Encode the initial path
2337 */
2338 mask = param + 12;
2339
2340 result = smb_encode_path(server, mask, SMB_MAXPATHLEN+1, dir, &star);
2341 if (result <= 0)
2342 goto out_free;
2343 mask_len = result - 1; /* mask_len is strlen, not #bytes */
2344 result = 0;
2345 first = 1;
2346 VERBOSE("starting mask_len=%d, mask=%s\n", mask_len, mask);
2347
2348 entries_seen = 2;
2349 ff_eos = 0;
2350
2351 while (ff_eos == 0) {
2352 loop_count += 1;
2353 if (loop_count > 10) {
2354 printk(KERN_WARNING "smb_proc_readdir_long: "
2355 "Looping in FIND_NEXT??\n");
2356 result = -EIO;
2357 break;
2358 }
2359
2360 if (first != 0) {
2361 command = TRANSACT2_FINDFIRST;
2362 WSET(param, 0, aSYSTEM | aHIDDEN | aDIR);
2363 WSET(param, 2, max_matches); /* max count */
2364 WSET(param, 4, SMB_CLOSE_IF_END);
2365 WSET(param, 6, info_level);
2366 DSET(param, 8, 0);
2367 } else {
2368 command = TRANSACT2_FINDNEXT;
2369
2370 VERBOSE("handle=0x%X, lastname=%d, mask=%.*s\n",
2371 ff_dir_handle, ff_lastname, mask_len, mask);
2372
2373 WSET(param, 0, ff_dir_handle); /* search handle */
2374 WSET(param, 2, max_matches); /* max count */
2375 WSET(param, 4, info_level);
2376 DSET(param, 6, 0);
2377 WSET(param, 10, SMB_CONTINUE_BIT|SMB_CLOSE_IF_END);
2378 }
2379
2380 req->rq_trans2_command = command;
2381 req->rq_ldata = 0;
2382 req->rq_data = NULL;
2383 req->rq_lparm = 12 + mask_len + 1;
2384 req->rq_parm = param;
2385 req->rq_flags = 0;
2386 result = smb_add_request(req);
2387 if (result < 0) {
2388 PARANOIA("error=%d, breaking\n", result);
2389 break;
2390 }
2391
2392 if (req->rq_rcls == ERRSRV && req->rq_err == ERRerror) {
2393 /* a damn Win95 bug - sometimes it clags if you
2394 ask it too fast */
2395 schedule_timeout_interruptible(msecs_to_jiffies(200));
2396 continue;
2397 }
2398
2399 if (req->rq_rcls != 0) {
2400 result = smb_errno(req);
2401 PARANOIA("name=%s, result=%d, rcls=%d, err=%d\n",
2402 mask, result, req->rq_rcls, req->rq_err);
2403 break;
2404 }
2405
2406 /* parse out some important return info */
2407 if (first != 0) {
2408 ff_dir_handle = WVAL(req->rq_parm, 0);
2409 ff_searchcount = WVAL(req->rq_parm, 2);
2410 ff_eos = WVAL(req->rq_parm, 4);
2411 ff_lastname = WVAL(req->rq_parm, 8);
2412 } else {
2413 ff_searchcount = WVAL(req->rq_parm, 0);
2414 ff_eos = WVAL(req->rq_parm, 2);
2415 ff_lastname = WVAL(req->rq_parm, 6);
2416 }
2417
2418 if (ff_searchcount == 0)
2419 break;
2420
2421 /* Now we are ready to parse smb directory entries. */
2422
2423 /* point to the data bytes */
2424 p = req->rq_data;
2425 for (i = 0; i < ff_searchcount; i++) {
2426 /* make sure we stay within the buffer */
2427 if (p >= req->rq_data + req->rq_ldata) {
2428 printk(KERN_ERR "smb_proc_readdir_long: "
2429 "dirent pointer outside buffer! "
2430 "%p %d@%p\n",
2431 p, req->rq_ldata, req->rq_data);
2432 result = -EIO; /* always a comm. error? */
2433 goto out_free;
2434 }
2435
2436 p = smb_decode_long_dirent(server, p, info_level,
2437 &qname, &fattr, name_buf);
2438
2439 /* ignore . and .. from the server */
2440 if (entries_seen == 2 && qname.name[0] == '.') {
2441 if (qname.len == 1)
2442 continue;
2443 if (qname.name[1] == '.' && qname.len == 2)
2444 continue;
2445 }
2446
2447 if (!smb_fill_cache(filp, dirent, filldir, ctl,
2448 &qname, &fattr))
2449 ; /* stop reading? */
2450 entries_seen++;
2451 }
2452
2453 VERBOSE("received %d entries, eos=%d\n", ff_searchcount,ff_eos);
2454
2455 /*
2456 * We might need the lastname for continuations.
2457 *
2458 * Note that some servers (win95?) point to the filename and
2459 * others (NT4, Samba using NT1) to the dir entry. We assume
2460 * here that those who do not point to a filename do not need
2461 * this info to continue the listing.
2462 *
2463 * OS/2 needs this and talks infolevel 1.
2464 * NetApps want lastname with infolevel 260.
2465 * win2k want lastname with infolevel 260, and points to
2466 * the record not to the name.
2467 * Samba+CifsUnixExt doesn't need lastname.
2468 *
2469 * Both are happy if we return the data they point to. So we do.
2470 * (FIXME: above is not true with win2k)
2471 */
2472 mask_len = 0;
2473 if (info_level != SMB_FIND_FILE_UNIX &&
2474 ff_lastname > 0 && ff_lastname < req->rq_ldata) {
2475 lastname = req->rq_data + ff_lastname;
2476
2477 switch (info_level) {
2478 case 260:
2479 mask_len = req->rq_ldata - ff_lastname;
2480 break;
2481 case 1:
2482 /* lastname points to a length byte */
2483 mask_len = *lastname++;
2484 if (ff_lastname + 1 + mask_len > req->rq_ldata)
2485 mask_len = req->rq_ldata - ff_lastname - 1;
2486 break;
2487 }
2488
2489 /*
2490 * Update the mask string for the next message.
2491 */
2492 if (mask_len > 255)
2493 mask_len = 255;
2494 if (mask_len)
2495 strncpy(mask, lastname, mask_len);
2496 }
2497 mask_len = strnlen(mask, mask_len);
2498 VERBOSE("new mask, len=%d@%d of %d, mask=%.*s\n",
2499 mask_len, ff_lastname, req->rq_ldata, mask_len, mask);
2500
2501 first = 0;
2502 loop_count = 0;
2503 }
2504
2505out_free:
2506 smb_rput(req);
2507out_name:
2508 kfree(name_buf);
2509out:
2510 unlock_kernel();
2511 return result;
2512}
2513
2514/*
2515 * This version uses the trans2 TRANSACT2_FINDFIRST message
2516 * to get the attribute data.
2517 *
2518 * Bugs Noted:
2519 */
2520static int
2521smb_proc_getattr_ff(struct smb_sb_info *server, struct dentry *dentry,
2522 struct smb_fattr *fattr)
2523{
2524 char *param, *mask;
2525 __u16 date, time;
2526 int mask_len, result;
2527 struct smb_request *req;
2528
2529 result = -ENOMEM;
2530 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
2531 goto out;
2532 param = req->rq_buffer;
2533 mask = param + 12;
2534
2535 mask_len = smb_encode_path(server, mask, SMB_MAXPATHLEN+1, dentry,NULL);
2536 if (mask_len < 0) {
2537 result = mask_len;
2538 goto out_free;
2539 }
2540 VERBOSE("name=%s, len=%d\n", mask, mask_len);
2541 WSET(param, 0, aSYSTEM | aHIDDEN | aDIR);
2542 WSET(param, 2, 1); /* max count */
2543 WSET(param, 4, 1); /* close after this call */
2544 WSET(param, 6, 1); /* info_level */
2545 DSET(param, 8, 0);
2546
2547 req->rq_trans2_command = TRANSACT2_FINDFIRST;
2548 req->rq_ldata = 0;
2549 req->rq_data = NULL;
2550 req->rq_lparm = 12 + mask_len;
2551 req->rq_parm = param;
2552 req->rq_flags = 0;
2553 result = smb_add_request(req);
2554 if (result < 0)
2555 goto out_free;
2556 if (req->rq_rcls != 0) {
2557 result = smb_errno(req);
2558#ifdef SMBFS_PARANOIA
2559 if (result != -ENOENT)
2560 PARANOIA("error for %s, rcls=%d, err=%d\n",
2561 mask, req->rq_rcls, req->rq_err);
2562#endif
2563 goto out_free;
2564 }
2565 /* Make sure we got enough data ... */
2566 result = -EINVAL;
2567 if (req->rq_ldata < 22 || WVAL(req->rq_parm, 2) != 1) {
2568 PARANOIA("bad result for %s, len=%d, count=%d\n",
2569 mask, req->rq_ldata, WVAL(req->rq_parm, 2));
2570 goto out_free;
2571 }
2572
2573 /*
2574 * Decode the response into the fattr ...
2575 */
2576 date = WVAL(req->rq_data, 0);
2577 time = WVAL(req->rq_data, 2);
2578 fattr->f_ctime.tv_sec = date_dos2unix(server, date, time);
2579 fattr->f_ctime.tv_nsec = 0;
2580
2581 date = WVAL(req->rq_data, 4);
2582 time = WVAL(req->rq_data, 6);
2583 fattr->f_atime.tv_sec = date_dos2unix(server, date, time);
2584 fattr->f_atime.tv_nsec = 0;
2585
2586 date = WVAL(req->rq_data, 8);
2587 time = WVAL(req->rq_data, 10);
2588 fattr->f_mtime.tv_sec = date_dos2unix(server, date, time);
2589 fattr->f_mtime.tv_nsec = 0;
2590 VERBOSE("name=%s, date=%x, time=%x, mtime=%ld\n",
2591 mask, date, time, fattr->f_mtime.tv_sec);
2592 fattr->f_size = DVAL(req->rq_data, 12);
2593 /* ULONG allocation size */
2594 fattr->attr = WVAL(req->rq_data, 20);
2595 result = 0;
2596
2597out_free:
2598 smb_rput(req);
2599out:
2600 return result;
2601}
2602
2603static int
2604smb_proc_getattr_core(struct smb_sb_info *server, struct dentry *dir,
2605 struct smb_fattr *fattr)
2606{
2607 int result;
2608 char *p;
2609 struct smb_request *req;
2610
2611 result = -ENOMEM;
2612 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
2613 goto out;
2614
2615 p = smb_setup_header(req, SMBgetatr, 0, 0);
2616 result = smb_simple_encode_path(req, &p, dir, NULL);
2617 if (result < 0)
2618 goto out_free;
2619 smb_setup_bcc(req, p);
2620
2621 if ((result = smb_request_ok(req, SMBgetatr, 10, 0)) < 0)
2622 goto out_free;
2623 fattr->attr = WVAL(req->rq_header, smb_vwv0);
2624 fattr->f_mtime.tv_sec = local2utc(server, DVAL(req->rq_header, smb_vwv1));
2625 fattr->f_mtime.tv_nsec = 0;
2626 fattr->f_size = DVAL(req->rq_header, smb_vwv3);
2627 fattr->f_ctime = fattr->f_mtime;
2628 fattr->f_atime = fattr->f_mtime;
2629#ifdef SMBFS_DEBUG_TIMESTAMP
2630 printk("getattr_core: %s/%s, mtime=%ld\n",
2631 DENTRY_PATH(dir), fattr->f_mtime);
2632#endif
2633 result = 0;
2634
2635out_free:
2636 smb_rput(req);
2637out:
2638 return result;
2639}
2640
2641/*
2642 * Bugs Noted:
2643 * (1) Win 95 swaps the date and time fields in the standard info level.
2644 */
2645static int
2646smb_proc_getattr_trans2(struct smb_sb_info *server, struct dentry *dir,
2647 struct smb_request *req, int infolevel)
2648{
2649 char *p, *param;
2650 int result;
2651
2652 param = req->rq_buffer;
2653 WSET(param, 0, infolevel);
2654 DSET(param, 2, 0);
2655 result = smb_encode_path(server, param+6, SMB_MAXPATHLEN+1, dir, NULL);
2656 if (result < 0)
2657 goto out;
2658 p = param + 6 + result;
2659
2660 req->rq_trans2_command = TRANSACT2_QPATHINFO;
2661 req->rq_ldata = 0;
2662 req->rq_data = NULL;
2663 req->rq_lparm = p - param;
2664 req->rq_parm = param;
2665 req->rq_flags = 0;
2666 result = smb_add_request(req);
2667 if (result < 0)
2668 goto out;
2669 if (req->rq_rcls != 0) {
2670 VERBOSE("for %s: result=%d, rcls=%d, err=%d\n",
2671 &param[6], result, req->rq_rcls, req->rq_err);
2672 result = smb_errno(req);
2673 goto out;
2674 }
2675 result = -ENOENT;
2676 if (req->rq_ldata < 22) {
2677 PARANOIA("not enough data for %s, len=%d\n",
2678 &param[6], req->rq_ldata);
2679 goto out;
2680 }
2681
2682 result = 0;
2683out:
2684 return result;
2685}
2686
2687static int
2688smb_proc_getattr_trans2_std(struct smb_sb_info *server, struct dentry *dir,
2689 struct smb_fattr *attr)
2690{
2691 u16 date, time;
2692 int off_date = 0, off_time = 2;
2693 int result;
2694 struct smb_request *req;
2695
2696 result = -ENOMEM;
2697 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
2698 goto out;
2699
2700 result = smb_proc_getattr_trans2(server, dir, req, SMB_INFO_STANDARD);
2701 if (result < 0)
2702 goto out_free;
2703
2704 /*
2705 * Kludge alert: Win 95 swaps the date and time field,
2706 * contrary to the CIFS docs and Win NT practice.
2707 */
2708 if (server->mnt->flags & SMB_MOUNT_WIN95) {
2709 off_date = 2;
2710 off_time = 0;
2711 }
2712 date = WVAL(req->rq_data, off_date);
2713 time = WVAL(req->rq_data, off_time);
2714 attr->f_ctime.tv_sec = date_dos2unix(server, date, time);
2715 attr->f_ctime.tv_nsec = 0;
2716
2717 date = WVAL(req->rq_data, 4 + off_date);
2718 time = WVAL(req->rq_data, 4 + off_time);
2719 attr->f_atime.tv_sec = date_dos2unix(server, date, time);
2720 attr->f_atime.tv_nsec = 0;
2721
2722 date = WVAL(req->rq_data, 8 + off_date);
2723 time = WVAL(req->rq_data, 8 + off_time);
2724 attr->f_mtime.tv_sec = date_dos2unix(server, date, time);
2725 attr->f_mtime.tv_nsec = 0;
2726#ifdef SMBFS_DEBUG_TIMESTAMP
2727 printk(KERN_DEBUG "getattr_trans2: %s/%s, date=%x, time=%x, mtime=%ld\n",
2728 DENTRY_PATH(dir), date, time, attr->f_mtime);
2729#endif
2730 attr->f_size = DVAL(req->rq_data, 12);
2731 attr->attr = WVAL(req->rq_data, 20);
2732
2733out_free:
2734 smb_rput(req);
2735out:
2736 return result;
2737}
2738
2739static int
2740smb_proc_getattr_trans2_all(struct smb_sb_info *server, struct dentry *dir,
2741 struct smb_fattr *attr)
2742{
2743 struct smb_request *req;
2744 int result;
2745
2746 result = -ENOMEM;
2747 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
2748 goto out;
2749
2750 result = smb_proc_getattr_trans2(server, dir, req,
2751 SMB_QUERY_FILE_ALL_INFO);
2752 if (result < 0)
2753 goto out_free;
2754
2755 attr->f_ctime = smb_ntutc2unixutc(LVAL(req->rq_data, 0));
2756 attr->f_atime = smb_ntutc2unixutc(LVAL(req->rq_data, 8));
2757 attr->f_mtime = smb_ntutc2unixutc(LVAL(req->rq_data, 16));
2758 /* change (24) */
2759 attr->attr = WVAL(req->rq_data, 32);
2760 /* pad? (34) */
2761 /* allocated size (40) */
2762 attr->f_size = LVAL(req->rq_data, 48);
2763
2764out_free:
2765 smb_rput(req);
2766out:
2767 return result;
2768}
2769
2770static int
2771smb_proc_getattr_unix(struct smb_sb_info *server, struct dentry *dir,
2772 struct smb_fattr *attr)
2773{
2774 struct smb_request *req;
2775 int result;
2776
2777 result = -ENOMEM;
2778 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
2779 goto out;
2780
2781 result = smb_proc_getattr_trans2(server, dir, req,
2782 SMB_QUERY_FILE_UNIX_BASIC);
2783 if (result < 0)
2784 goto out_free;
2785
2786 smb_decode_unix_basic(attr, server, req->rq_data);
2787
2788out_free:
2789 smb_rput(req);
2790out:
2791 return result;
2792}
2793
2794static int
2795smb_proc_getattr_95(struct smb_sb_info *server, struct dentry *dir,
2796 struct smb_fattr *attr)
2797{
2798 struct inode *inode = dir->d_inode;
2799 int result;
2800
2801 /* FIXME: why not use the "all" version? */
2802 result = smb_proc_getattr_trans2_std(server, dir, attr);
2803 if (result < 0)
2804 goto out;
2805
2806 /*
2807 * None of the getattr versions here can make win9x return the right
2808 * filesize if there are changes made to an open file.
2809 * A seek-to-end does return the right size, but we only need to do
2810 * that on files we have written.
2811 */
2812 if (inode && SMB_I(inode)->flags & SMB_F_LOCALWRITE &&
2813 smb_is_open(inode))
2814 {
2815 __u16 fileid = SMB_I(inode)->fileid;
2816 attr->f_size = smb_proc_seek(server, fileid, 2, 0);
2817 }
2818
2819out:
2820 return result;
2821}
2822
2823static int
2824smb_proc_ops_wait(struct smb_sb_info *server)
2825{
2826 int result;
2827
2828 result = wait_event_interruptible_timeout(server->conn_wq,
2829 server->conn_complete, 30*HZ);
2830
2831 if (!result || signal_pending(current))
2832 return -EIO;
2833
2834 return 0;
2835}
2836
2837static int
2838smb_proc_getattr_null(struct smb_sb_info *server, struct dentry *dir,
2839 struct smb_fattr *fattr)
2840{
2841 int result;
2842
2843 if (smb_proc_ops_wait(server) < 0)
2844 return -EIO;
2845
2846 smb_init_dirent(server, fattr);
2847 result = server->ops->getattr(server, dir, fattr);
2848 smb_finish_dirent(server, fattr);
2849
2850 return result;
2851}
2852
2853static int
2854smb_proc_readdir_null(struct file *filp, void *dirent, filldir_t filldir,
2855 struct smb_cache_control *ctl)
2856{
2857 struct smb_sb_info *server = server_from_dentry(filp->f_path.dentry);
2858
2859 if (smb_proc_ops_wait(server) < 0)
2860 return -EIO;
2861
2862 return server->ops->readdir(filp, dirent, filldir, ctl);
2863}
2864
2865int
2866smb_proc_getattr(struct dentry *dir, struct smb_fattr *fattr)
2867{
2868 struct smb_sb_info *server = server_from_dentry(dir);
2869 int result;
2870
2871 smb_init_dirent(server, fattr);
2872 result = server->ops->getattr(server, dir, fattr);
2873 smb_finish_dirent(server, fattr);
2874
2875 return result;
2876}
2877
2878
2879/*
2880 * Because of bugs in the core protocol, we use this only to set
2881 * attributes. See smb_proc_settime() below for timestamp handling.
2882 *
2883 * Bugs Noted:
2884 * (1) If mtime is non-zero, both Win 3.1 and Win 95 fail
2885 * with an undocumented error (ERRDOS code 50). Setting
2886 * mtime to 0 allows the attributes to be set.
2887 * (2) The extra parameters following the name string aren't
2888 * in the CIFS docs, but seem to be necessary for operation.
2889 */
2890static int
2891smb_proc_setattr_core(struct smb_sb_info *server, struct dentry *dentry,
2892 __u16 attr)
2893{
2894 char *p;
2895 int result;
2896 struct smb_request *req;
2897
2898 result = -ENOMEM;
2899 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
2900 goto out;
2901
2902 p = smb_setup_header(req, SMBsetatr, 8, 0);
2903 WSET(req->rq_header, smb_vwv0, attr);
2904 DSET(req->rq_header, smb_vwv1, 0); /* mtime */
2905 WSET(req->rq_header, smb_vwv3, 0); /* reserved values */
2906 WSET(req->rq_header, smb_vwv4, 0);
2907 WSET(req->rq_header, smb_vwv5, 0);
2908 WSET(req->rq_header, smb_vwv6, 0);
2909 WSET(req->rq_header, smb_vwv7, 0);
2910 result = smb_simple_encode_path(req, &p, dentry, NULL);
2911 if (result < 0)
2912 goto out_free;
2913 if (p + 2 > (char *)req->rq_buffer + req->rq_bufsize) {
2914 result = -ENAMETOOLONG;
2915 goto out_free;
2916 }
2917 *p++ = 4;
2918 *p++ = 0;
2919 smb_setup_bcc(req, p);
2920
2921 result = smb_request_ok(req, SMBsetatr, 0, 0);
2922 if (result < 0)
2923 goto out_free;
2924 result = 0;
2925
2926out_free:
2927 smb_rput(req);
2928out:
2929 return result;
2930}
2931
2932/*
2933 * Because of bugs in the trans2 setattr messages, we must set
2934 * attributes and timestamps separately. The core SMBsetatr
2935 * message seems to be the only reliable way to set attributes.
2936 */
2937int
2938smb_proc_setattr(struct dentry *dir, struct smb_fattr *fattr)
2939{
2940 struct smb_sb_info *server = server_from_dentry(dir);
2941 int result;
2942
2943 VERBOSE("setting %s/%s, open=%d\n",
2944 DENTRY_PATH(dir), smb_is_open(dir->d_inode));
2945 result = smb_proc_setattr_core(server, dir, fattr->attr);
2946 return result;
2947}
2948
2949/*
2950 * Sets the timestamps for an file open with write permissions.
2951 */
2952static int
2953smb_proc_setattr_ext(struct smb_sb_info *server,
2954 struct inode *inode, struct smb_fattr *fattr)
2955{
2956 __u16 date, time;
2957 int result;
2958 struct smb_request *req;
2959
2960 result = -ENOMEM;
2961 if (! (req = smb_alloc_request(server, 0)))
2962 goto out;
2963
2964 smb_setup_header(req, SMBsetattrE, 7, 0);
2965 WSET(req->rq_header, smb_vwv0, SMB_I(inode)->fileid);
2966 /* We don't change the creation time */
2967 WSET(req->rq_header, smb_vwv1, 0);
2968 WSET(req->rq_header, smb_vwv2, 0);
2969 date_unix2dos(server, fattr->f_atime.tv_sec, &date, &time);
2970 WSET(req->rq_header, smb_vwv3, date);
2971 WSET(req->rq_header, smb_vwv4, time);
2972 date_unix2dos(server, fattr->f_mtime.tv_sec, &date, &time);
2973 WSET(req->rq_header, smb_vwv5, date);
2974 WSET(req->rq_header, smb_vwv6, time);
2975#ifdef SMBFS_DEBUG_TIMESTAMP
2976 printk(KERN_DEBUG "smb_proc_setattr_ext: date=%d, time=%d, mtime=%ld\n",
2977 date, time, fattr->f_mtime);
2978#endif
2979
2980 req->rq_flags |= SMB_REQ_NORETRY;
2981 result = smb_request_ok(req, SMBsetattrE, 0, 0);
2982 if (result < 0)
2983 goto out_free;
2984 result = 0;
2985out_free:
2986 smb_rput(req);
2987out:
2988 return result;
2989}
2990
2991/*
2992 * Bugs Noted:
2993 * (1) The TRANSACT2_SETPATHINFO message under Win NT 4.0 doesn't
2994 * set the file's attribute flags.
2995 */
2996static int
2997smb_proc_setattr_trans2(struct smb_sb_info *server,
2998 struct dentry *dir, struct smb_fattr *fattr)
2999{
3000 __u16 date, time;
3001 char *p, *param;
3002 int result;
3003 char data[26];
3004 struct smb_request *req;
3005
3006 result = -ENOMEM;
3007 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
3008 goto out;
3009 param = req->rq_buffer;
3010
3011 WSET(param, 0, 1); /* Info level SMB_INFO_STANDARD */
3012 DSET(param, 2, 0);
3013 result = smb_encode_path(server, param+6, SMB_MAXPATHLEN+1, dir, NULL);
3014 if (result < 0)
3015 goto out_free;
3016 p = param + 6 + result;
3017
3018 WSET(data, 0, 0); /* creation time */
3019 WSET(data, 2, 0);
3020 date_unix2dos(server, fattr->f_atime.tv_sec, &date, &time);
3021 WSET(data, 4, date);
3022 WSET(data, 6, time);
3023 date_unix2dos(server, fattr->f_mtime.tv_sec, &date, &time);
3024 WSET(data, 8, date);
3025 WSET(data, 10, time);
3026#ifdef SMBFS_DEBUG_TIMESTAMP
3027 printk(KERN_DEBUG "setattr_trans2: %s/%s, date=%x, time=%x, mtime=%ld\n",
3028 DENTRY_PATH(dir), date, time, fattr->f_mtime);
3029#endif
3030 DSET(data, 12, 0); /* size */
3031 DSET(data, 16, 0); /* blksize */
3032 WSET(data, 20, 0); /* attr */
3033 DSET(data, 22, 0); /* ULONG EA size */
3034
3035 req->rq_trans2_command = TRANSACT2_SETPATHINFO;
3036 req->rq_ldata = 26;
3037 req->rq_data = data;
3038 req->rq_lparm = p - param;
3039 req->rq_parm = param;
3040 req->rq_flags = 0;
3041 result = smb_add_request(req);
3042 if (result < 0)
3043 goto out_free;
3044 result = 0;
3045 if (req->rq_rcls != 0)
3046 result = smb_errno(req);
3047
3048out_free:
3049 smb_rput(req);
3050out:
3051 return result;
3052}
3053
3054/*
3055 * ATTR_MODE 0x001
3056 * ATTR_UID 0x002
3057 * ATTR_GID 0x004
3058 * ATTR_SIZE 0x008
3059 * ATTR_ATIME 0x010
3060 * ATTR_MTIME 0x020
3061 * ATTR_CTIME 0x040
3062 * ATTR_ATIME_SET 0x080
3063 * ATTR_MTIME_SET 0x100
3064 * ATTR_FORCE 0x200
3065 * ATTR_ATTR_FLAG 0x400
3066 *
3067 * major/minor should only be set by mknod.
3068 */
3069int
3070smb_proc_setattr_unix(struct dentry *d, struct iattr *attr,
3071 unsigned int major, unsigned int minor)
3072{
3073 struct smb_sb_info *server = server_from_dentry(d);
3074 u64 nttime;
3075 char *p, *param;
3076 int result;
3077 char data[100];
3078 struct smb_request *req;
3079
3080 result = -ENOMEM;
3081 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
3082 goto out;
3083 param = req->rq_buffer;
3084
3085 DEBUG1("valid flags = 0x%04x\n", attr->ia_valid);
3086
3087 WSET(param, 0, SMB_SET_FILE_UNIX_BASIC);
3088 DSET(param, 2, 0);
3089 result = smb_encode_path(server, param+6, SMB_MAXPATHLEN+1, d, NULL);
3090 if (result < 0)
3091 goto out_free;
3092 p = param + 6 + result;
3093
3094 /* 0 L file size in bytes */
3095 /* 8 L file size on disk in bytes (block count) */
3096 /* 40 L uid */
3097 /* 48 L gid */
3098 /* 56 W file type enum */
3099 /* 60 L devmajor */
3100 /* 68 L devminor */
3101 /* 76 L unique ID (inode) */
3102 /* 84 L permissions */
3103 /* 92 L link count */
3104 LSET(data, 0, SMB_SIZE_NO_CHANGE);
3105 LSET(data, 8, SMB_SIZE_NO_CHANGE);
3106 LSET(data, 16, SMB_TIME_NO_CHANGE);
3107 LSET(data, 24, SMB_TIME_NO_CHANGE);
3108 LSET(data, 32, SMB_TIME_NO_CHANGE);
3109 LSET(data, 40, SMB_UID_NO_CHANGE);
3110 LSET(data, 48, SMB_GID_NO_CHANGE);
3111 DSET(data, 56, smb_filetype_from_mode(attr->ia_mode));
3112 LSET(data, 60, major);
3113 LSET(data, 68, minor);
3114 LSET(data, 76, 0);
3115 LSET(data, 84, SMB_MODE_NO_CHANGE);
3116 LSET(data, 92, 0);
3117
3118 if (attr->ia_valid & ATTR_SIZE) {
3119 LSET(data, 0, attr->ia_size);
3120 LSET(data, 8, 0); /* can't set anyway */
3121 }
3122
3123 /*
3124 * FIXME: check the conversion function it the correct one
3125 *
3126 * we can't set ctime but we might as well pass this to the server
3127 * and let it ignore it.
3128 */
3129 if (attr->ia_valid & ATTR_CTIME) {
3130 nttime = smb_unixutc2ntutc(attr->ia_ctime);
3131 LSET(data, 16, nttime);
3132 }
3133 if (attr->ia_valid & ATTR_ATIME) {
3134 nttime = smb_unixutc2ntutc(attr->ia_atime);
3135 LSET(data, 24, nttime);
3136 }
3137 if (attr->ia_valid & ATTR_MTIME) {
3138 nttime = smb_unixutc2ntutc(attr->ia_mtime);
3139 LSET(data, 32, nttime);
3140 }
3141
3142 if (attr->ia_valid & ATTR_UID) {
3143 LSET(data, 40, attr->ia_uid);
3144 }
3145 if (attr->ia_valid & ATTR_GID) {
3146 LSET(data, 48, attr->ia_gid);
3147 }
3148
3149 if (attr->ia_valid & ATTR_MODE) {
3150 LSET(data, 84, attr->ia_mode);
3151 }
3152
3153 req->rq_trans2_command = TRANSACT2_SETPATHINFO;
3154 req->rq_ldata = 100;
3155 req->rq_data = data;
3156 req->rq_lparm = p - param;
3157 req->rq_parm = param;
3158 req->rq_flags = 0;
3159 result = smb_add_request(req);
3160
3161out_free:
3162 smb_rput(req);
3163out:
3164 return result;
3165}
3166
3167
3168/*
3169 * Set the modify and access timestamps for a file.
3170 *
3171 * Incredibly enough, in all of SMB there is no message to allow
3172 * setting both attributes and timestamps at once.
3173 *
3174 * Bugs Noted:
3175 * (1) Win 95 doesn't support the TRANSACT2_SETFILEINFO message
3176 * with info level 1 (INFO_STANDARD).
3177 * (2) Win 95 seems not to support setting directory timestamps.
3178 * (3) Under the core protocol apparently the only way to set the
3179 * timestamp is to open and close the file.
3180 */
3181int
3182smb_proc_settime(struct dentry *dentry, struct smb_fattr *fattr)
3183{
3184 struct smb_sb_info *server = server_from_dentry(dentry);
3185 struct inode *inode = dentry->d_inode;
3186 int result;
3187
3188 VERBOSE("setting %s/%s, open=%d\n",
3189 DENTRY_PATH(dentry), smb_is_open(inode));
3190
3191 /* setting the time on a Win95 server fails (tridge) */
3192 if (server->opt.protocol >= SMB_PROTOCOL_LANMAN2 &&
3193 !(server->mnt->flags & SMB_MOUNT_WIN95)) {
3194 if (smb_is_open(inode) && SMB_I(inode)->access != SMB_O_RDONLY)
3195 result = smb_proc_setattr_ext(server, inode, fattr);
3196 else
3197 result = smb_proc_setattr_trans2(server, dentry, fattr);
3198 } else {
3199 /*
3200 * Fail silently on directories ... timestamp can't be set?
3201 */
3202 result = 0;
3203 if (S_ISREG(inode->i_mode)) {
3204 /*
3205 * Set the mtime by opening and closing the file.
3206 * Note that the file is opened read-only, but this
3207 * still allows us to set the date (tridge)
3208 */
3209 result = -EACCES;
3210 if (!smb_is_open(inode))
3211 smb_proc_open(server, dentry, SMB_O_RDONLY);
3212 if (smb_is_open(inode)) {
3213 inode->i_mtime = fattr->f_mtime;
3214 result = smb_proc_close_inode(server, inode);
3215 }
3216 }
3217 }
3218
3219 return result;
3220}
3221
3222int
3223smb_proc_dskattr(struct dentry *dentry, struct kstatfs *attr)
3224{
3225 struct smb_sb_info *server = SMB_SB(dentry->d_sb);
3226 int result;
3227 char *p;
3228 long unit;
3229 struct smb_request *req;
3230
3231 result = -ENOMEM;
3232 if (! (req = smb_alloc_request(server, 0)))
3233 goto out;
3234
3235 smb_setup_header(req, SMBdskattr, 0, 0);
3236 if ((result = smb_request_ok(req, SMBdskattr, 5, 0)) < 0)
3237 goto out_free;
3238 p = SMB_VWV(req->rq_header);
3239 unit = (WVAL(p, 2) * WVAL(p, 4)) >> SMB_ST_BLKSHIFT;
3240 attr->f_blocks = WVAL(p, 0) * unit;
3241 attr->f_bsize = SMB_ST_BLKSIZE;
3242 attr->f_bavail = attr->f_bfree = WVAL(p, 6) * unit;
3243 result = 0;
3244
3245out_free:
3246 smb_rput(req);
3247out:
3248 return result;
3249}
3250
3251int
3252smb_proc_read_link(struct smb_sb_info *server, struct dentry *d,
3253 char *buffer, int len)
3254{
3255 char *p, *param;
3256 int result;
3257 struct smb_request *req;
3258
3259 DEBUG1("readlink of %s/%s\n", DENTRY_PATH(d));
3260
3261 result = -ENOMEM;
3262 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
3263 goto out;
3264 param = req->rq_buffer;
3265
3266 WSET(param, 0, SMB_QUERY_FILE_UNIX_LINK);
3267 DSET(param, 2, 0);
3268 result = smb_encode_path(server, param+6, SMB_MAXPATHLEN+1, d, NULL);
3269 if (result < 0)
3270 goto out_free;
3271 p = param + 6 + result;
3272
3273 req->rq_trans2_command = TRANSACT2_QPATHINFO;
3274 req->rq_ldata = 0;
3275 req->rq_data = NULL;
3276 req->rq_lparm = p - param;
3277 req->rq_parm = param;
3278 req->rq_flags = 0;
3279 result = smb_add_request(req);
3280 if (result < 0)
3281 goto out_free;
3282 DEBUG1("for %s: result=%d, rcls=%d, err=%d\n",
3283 &param[6], result, req->rq_rcls, req->rq_err);
3284
3285 /* copy data up to the \0 or buffer length */
3286 result = len;
3287 if (req->rq_ldata < len)
3288 result = req->rq_ldata;
3289 strncpy(buffer, req->rq_data, result);
3290
3291out_free:
3292 smb_rput(req);
3293out:
3294 return result;
3295}
3296
3297
3298/*
3299 * Create a symlink object called dentry which points to oldpath.
3300 * Samba does not permit dangling links but returns a suitable error message.
3301 */
3302int
3303smb_proc_symlink(struct smb_sb_info *server, struct dentry *d,
3304 const char *oldpath)
3305{
3306 char *p, *param;
3307 int result;
3308 struct smb_request *req;
3309
3310 result = -ENOMEM;
3311 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
3312 goto out;
3313 param = req->rq_buffer;
3314
3315 WSET(param, 0, SMB_SET_FILE_UNIX_LINK);
3316 DSET(param, 2, 0);
3317 result = smb_encode_path(server, param + 6, SMB_MAXPATHLEN+1, d, NULL);
3318 if (result < 0)
3319 goto out_free;
3320 p = param + 6 + result;
3321
3322 req->rq_trans2_command = TRANSACT2_SETPATHINFO;
3323 req->rq_ldata = strlen(oldpath) + 1;
3324 req->rq_data = (char *) oldpath;
3325 req->rq_lparm = p - param;
3326 req->rq_parm = param;
3327 req->rq_flags = 0;
3328 result = smb_add_request(req);
3329 if (result < 0)
3330 goto out_free;
3331
3332 DEBUG1("for %s: result=%d, rcls=%d, err=%d\n",
3333 &param[6], result, req->rq_rcls, req->rq_err);
3334 result = 0;
3335
3336out_free:
3337 smb_rput(req);
3338out:
3339 return result;
3340}
3341
3342/*
3343 * Create a hard link object called new_dentry which points to dentry.
3344 */
3345int
3346smb_proc_link(struct smb_sb_info *server, struct dentry *dentry,
3347 struct dentry *new_dentry)
3348{
3349 char *p, *param;
3350 int result;
3351 struct smb_request *req;
3352
3353 result = -ENOMEM;
3354 if (! (req = smb_alloc_request(server, PAGE_SIZE)))
3355 goto out;
3356 param = req->rq_buffer;
3357
3358 WSET(param, 0, SMB_SET_FILE_UNIX_HLINK);
3359 DSET(param, 2, 0);
3360 result = smb_encode_path(server, param + 6, SMB_MAXPATHLEN+1,
3361 new_dentry, NULL);
3362 if (result < 0)
3363 goto out_free;
3364 p = param + 6 + result;
3365
3366 /* Grr, pointless separation of parameters and data ... */
3367 req->rq_data = p;
3368 req->rq_ldata = smb_encode_path(server, p, SMB_MAXPATHLEN+1,
3369 dentry, NULL);
3370
3371 req->rq_trans2_command = TRANSACT2_SETPATHINFO;
3372 req->rq_lparm = p - param;
3373 req->rq_parm = param;
3374 req->rq_flags = 0;
3375 result = smb_add_request(req);
3376 if (result < 0)
3377 goto out_free;
3378
3379 DEBUG1("for %s: result=%d, rcls=%d, err=%d\n",
3380 &param[6], result, req->rq_rcls, req->rq_err);
3381 result = 0;
3382
3383out_free:
3384 smb_rput(req);
3385out:
3386 return result;
3387}
3388
3389static int
3390smb_proc_query_cifsunix(struct smb_sb_info *server)
3391{
3392 int result;
3393 int major, minor;
3394 u64 caps;
3395 char param[2];
3396 struct smb_request *req;
3397
3398 result = -ENOMEM;
3399 if (! (req = smb_alloc_request(server, 100)))
3400 goto out;
3401
3402 WSET(param, 0, SMB_QUERY_CIFS_UNIX_INFO);
3403
3404 req->rq_trans2_command = TRANSACT2_QFSINFO;
3405 req->rq_ldata = 0;
3406 req->rq_data = NULL;
3407 req->rq_lparm = 2;
3408 req->rq_parm = param;
3409 req->rq_flags = 0;
3410 result = smb_add_request(req);
3411 if (result < 0)
3412 goto out_free;
3413
3414 if (req->rq_ldata < 12) {
3415 PARANOIA("Not enough data\n");
3416 goto out_free;
3417 }
3418 major = WVAL(req->rq_data, 0);
3419 minor = WVAL(req->rq_data, 2);
3420
3421 DEBUG1("Server implements CIFS Extensions for UNIX systems v%d.%d\n",
3422 major, minor);
3423 /* FIXME: verify that we are ok with this major/minor? */
3424
3425 caps = LVAL(req->rq_data, 4);
3426 DEBUG1("Server capabilities 0x%016llx\n", caps);
3427
3428out_free:
3429 smb_rput(req);
3430out:
3431 return result;
3432}
3433
3434
3435static void
3436install_ops(struct smb_ops *dst, struct smb_ops *src)
3437{
3438 memcpy(dst, src, sizeof(void *) * SMB_OPS_NUM_STATIC);
3439}
3440
3441/* < LANMAN2 */
3442static struct smb_ops smb_ops_core =
3443{
3444 .read = smb_proc_read,
3445 .write = smb_proc_write,
3446 .readdir = smb_proc_readdir_short,
3447 .getattr = smb_proc_getattr_core,
3448 .truncate = smb_proc_trunc32,
3449};
3450
3451/* LANMAN2, OS/2, others? */
3452static struct smb_ops smb_ops_os2 =
3453{
3454 .read = smb_proc_read,
3455 .write = smb_proc_write,
3456 .readdir = smb_proc_readdir_long,
3457 .getattr = smb_proc_getattr_trans2_std,
3458 .truncate = smb_proc_trunc32,
3459};
3460
3461/* Win95, and possibly some NetApp versions too */
3462static struct smb_ops smb_ops_win95 =
3463{
3464 .read = smb_proc_read, /* does not support 12word readX */
3465 .write = smb_proc_write,
3466 .readdir = smb_proc_readdir_long,
3467 .getattr = smb_proc_getattr_95,
3468 .truncate = smb_proc_trunc95,
3469};
3470
3471/* Samba, NT4 and NT5 */
3472static struct smb_ops smb_ops_winNT =
3473{
3474 .read = smb_proc_readX,
3475 .write = smb_proc_writeX,
3476 .readdir = smb_proc_readdir_long,
3477 .getattr = smb_proc_getattr_trans2_all,
3478 .truncate = smb_proc_trunc64,
3479};
3480
3481/* Samba w/ unix extensions. Others? */
3482static struct smb_ops smb_ops_unix =
3483{
3484 .read = smb_proc_readX,
3485 .write = smb_proc_writeX,
3486 .readdir = smb_proc_readdir_long,
3487 .getattr = smb_proc_getattr_unix,
3488 /* FIXME: core/ext/time setattr needs to be cleaned up! */
3489 /* .setattr = smb_proc_setattr_unix, */
3490 .truncate = smb_proc_trunc64,
3491};
3492
3493/* Place holder until real ops are in place */
3494static struct smb_ops smb_ops_null =
3495{
3496 .readdir = smb_proc_readdir_null,
3497 .getattr = smb_proc_getattr_null,
3498};
3499
3500void smb_install_null_ops(struct smb_ops *ops)
3501{
3502 install_ops(ops, &smb_ops_null);
3503}
diff --git a/fs/smbfs/proto.h b/fs/smbfs/proto.h
deleted file mode 100644
index 05939a6f43e6..000000000000
--- a/fs/smbfs/proto.h
+++ /dev/null
@@ -1,87 +0,0 @@
1/*
2 * Autogenerated with cproto on: Sat Sep 13 17:18:51 CEST 2003
3 */
4
5struct smb_request;
6struct sock;
7struct statfs;
8
9/* proc.c */
10extern int smb_setcodepage(struct smb_sb_info *server, struct smb_nls_codepage *cp);
11extern __u32 smb_len(__u8 *p);
12extern int smb_get_rsize(struct smb_sb_info *server);
13extern int smb_get_wsize(struct smb_sb_info *server);
14extern int smb_errno(struct smb_request *req);
15extern int smb_newconn(struct smb_sb_info *server, struct smb_conn_opt *opt);
16extern __u8 *smb_setup_header(struct smb_request *req, __u8 command, __u16 wct, __u16 bcc);
17extern int smb_open(struct dentry *dentry, int wish);
18extern int smb_close(struct inode *ino);
19extern int smb_close_fileid(struct dentry *dentry, __u16 fileid);
20extern int smb_proc_create(struct dentry *dentry, __u16 attr, time_t ctime, __u16 *fileid);
21extern int smb_proc_mv(struct dentry *old_dentry, struct dentry *new_dentry);
22extern int smb_proc_mkdir(struct dentry *dentry);
23extern int smb_proc_rmdir(struct dentry *dentry);
24extern int smb_proc_unlink(struct dentry *dentry);
25extern int smb_proc_flush(struct smb_sb_info *server, __u16 fileid);
26extern void smb_init_root_dirent(struct smb_sb_info *server, struct smb_fattr *fattr,
27 struct super_block *sb);
28extern int smb_proc_getattr(struct dentry *dir, struct smb_fattr *fattr);
29extern int smb_proc_setattr(struct dentry *dir, struct smb_fattr *fattr);
30extern int smb_proc_setattr_unix(struct dentry *d, struct iattr *attr, unsigned int major, unsigned int minor);
31extern int smb_proc_settime(struct dentry *dentry, struct smb_fattr *fattr);
32extern int smb_proc_dskattr(struct dentry *dentry, struct kstatfs *attr);
33extern int smb_proc_read_link(struct smb_sb_info *server, struct dentry *d, char *buffer, int len);
34extern int smb_proc_symlink(struct smb_sb_info *server, struct dentry *d, const char *oldpath);
35extern int smb_proc_link(struct smb_sb_info *server, struct dentry *dentry, struct dentry *new_dentry);
36extern void smb_install_null_ops(struct smb_ops *ops);
37/* dir.c */
38extern const struct file_operations smb_dir_operations;
39extern const struct inode_operations smb_dir_inode_operations;
40extern const struct inode_operations smb_dir_inode_operations_unix;
41extern void smb_new_dentry(struct dentry *dentry);
42extern void smb_renew_times(struct dentry *dentry);
43/* cache.c */
44extern void smb_invalid_dir_cache(struct inode *dir);
45extern void smb_invalidate_dircache_entries(struct dentry *parent);
46extern struct dentry *smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos);
47extern int smb_fill_cache(struct file *filp, void *dirent, filldir_t filldir, struct smb_cache_control *ctrl, struct qstr *qname, struct smb_fattr *entry);
48/* sock.c */
49extern void smb_data_ready(struct sock *sk, int len);
50extern int smb_valid_socket(struct inode *inode);
51extern void smb_close_socket(struct smb_sb_info *server);
52extern int smb_recv_available(struct smb_sb_info *server);
53extern int smb_receive_header(struct smb_sb_info *server);
54extern int smb_receive_drop(struct smb_sb_info *server);
55extern int smb_receive(struct smb_sb_info *server, struct smb_request *req);
56extern int smb_send_request(struct smb_request *req);
57/* inode.c */
58extern struct inode *smb_iget(struct super_block *sb, struct smb_fattr *fattr);
59extern void smb_get_inode_attr(struct inode *inode, struct smb_fattr *fattr);
60extern void smb_set_inode_attr(struct inode *inode, struct smb_fattr *fattr);
61extern void smb_invalidate_inodes(struct smb_sb_info *server);
62extern int smb_revalidate_inode(struct dentry *dentry);
63extern int smb_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
64extern int smb_notify_change(struct dentry *dentry, struct iattr *attr);
65/* file.c */
66extern const struct address_space_operations smb_file_aops;
67extern const struct file_operations smb_file_operations;
68extern const struct inode_operations smb_file_inode_operations;
69/* ioctl.c */
70extern long smb_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
71/* smbiod.c */
72extern void smbiod_wake_up(void);
73extern int smbiod_register_server(struct smb_sb_info *server);
74extern void smbiod_unregister_server(struct smb_sb_info *server);
75extern void smbiod_flush(struct smb_sb_info *server);
76extern int smbiod_retry(struct smb_sb_info *server);
77/* request.c */
78extern int smb_init_request_cache(void);
79extern void smb_destroy_request_cache(void);
80extern struct smb_request *smb_alloc_request(struct smb_sb_info *server, int bufsize);
81extern void smb_rput(struct smb_request *req);
82extern int smb_add_request(struct smb_request *req);
83extern int smb_request_send_server(struct smb_sb_info *server);
84extern int smb_request_recv(struct smb_sb_info *server);
85/* symlink.c */
86extern int smb_symlink(struct inode *inode, struct dentry *dentry, const char *oldname);
87extern const struct inode_operations smb_link_inode_operations;
diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c
deleted file mode 100644
index 45f45933e862..000000000000
--- a/fs/smbfs/request.c
+++ /dev/null
@@ -1,818 +0,0 @@
1/*
2 * request.c
3 *
4 * Copyright (C) 2001 by Urban Widmark
5 *
6 * Please add a note about your changes to smbfs in the ChangeLog file.
7 */
8
9#include <linux/kernel.h>
10#include <linux/types.h>
11#include <linux/fs.h>
12#include <linux/slab.h>
13#include <linux/net.h>
14#include <linux/sched.h>
15
16#include <linux/smb_fs.h>
17#include <linux/smbno.h>
18#include <linux/smb_mount.h>
19
20#include "smb_debug.h"
21#include "request.h"
22#include "proto.h"
23
24/* #define SMB_SLAB_DEBUG (SLAB_RED_ZONE | SLAB_POISON) */
25#define SMB_SLAB_DEBUG 0
26
27/* cache for request structures */
28static struct kmem_cache *req_cachep;
29
30static int smb_request_send_req(struct smb_request *req);
31
32/*
33 /proc/slabinfo:
34 name, active, num, objsize, active_slabs, num_slaps, #pages
35*/
36
37
38int smb_init_request_cache(void)
39{
40 req_cachep = kmem_cache_create("smb_request",
41 sizeof(struct smb_request), 0,
42 SMB_SLAB_DEBUG | SLAB_HWCACHE_ALIGN,
43 NULL);
44 if (req_cachep == NULL)
45 return -ENOMEM;
46
47 return 0;
48}
49
50void smb_destroy_request_cache(void)
51{
52 kmem_cache_destroy(req_cachep);
53}
54
55/*
56 * Allocate and initialise a request structure
57 */
58static struct smb_request *smb_do_alloc_request(struct smb_sb_info *server,
59 int bufsize)
60{
61 struct smb_request *req;
62 unsigned char *buf = NULL;
63
64 req = kmem_cache_zalloc(req_cachep, GFP_KERNEL);
65 VERBOSE("allocating request: %p\n", req);
66 if (!req)
67 goto out;
68
69 if (bufsize > 0) {
70 buf = kmalloc(bufsize, GFP_NOFS);
71 if (!buf) {
72 kmem_cache_free(req_cachep, req);
73 return NULL;
74 }
75 }
76
77 req->rq_buffer = buf;
78 req->rq_bufsize = bufsize;
79 req->rq_server = server;
80 init_waitqueue_head(&req->rq_wait);
81 INIT_LIST_HEAD(&req->rq_queue);
82 atomic_set(&req->rq_count, 1);
83
84out:
85 return req;
86}
87
88struct smb_request *smb_alloc_request(struct smb_sb_info *server, int bufsize)
89{
90 struct smb_request *req = NULL;
91
92 for (;;) {
93 atomic_inc(&server->nr_requests);
94 if (atomic_read(&server->nr_requests) <= MAX_REQUEST_HARD) {
95 req = smb_do_alloc_request(server, bufsize);
96 if (req != NULL)
97 break;
98 }
99
100#if 0
101 /*
102 * Try to free up at least one request in order to stay
103 * below the hard limit
104 */
105 if (nfs_try_to_free_pages(server))
106 continue;
107
108 if (fatal_signal_pending(current))
109 return ERR_PTR(-ERESTARTSYS);
110 current->policy = SCHED_YIELD;
111 schedule();
112#else
113 /* FIXME: we want something like nfs does above, but that
114 requires changes to all callers and can wait. */
115 break;
116#endif
117 }
118 return req;
119}
120
121static void smb_free_request(struct smb_request *req)
122{
123 atomic_dec(&req->rq_server->nr_requests);
124 if (req->rq_buffer && !(req->rq_flags & SMB_REQ_STATIC))
125 kfree(req->rq_buffer);
126 kfree(req->rq_trans2buffer);
127 kmem_cache_free(req_cachep, req);
128}
129
130/*
131 * What prevents a rget to race with a rput? The count must never drop to zero
132 * while it is in use. Only rput if it is ok that it is free'd.
133 */
134static void smb_rget(struct smb_request *req)
135{
136 atomic_inc(&req->rq_count);
137}
138void smb_rput(struct smb_request *req)
139{
140 if (atomic_dec_and_test(&req->rq_count)) {
141 list_del_init(&req->rq_queue);
142 smb_free_request(req);
143 }
144}
145
146/* setup to receive the data part of the SMB */
147static int smb_setup_bcc(struct smb_request *req)
148{
149 int result = 0;
150 req->rq_rlen = smb_len(req->rq_header) + 4 - req->rq_bytes_recvd;
151
152 if (req->rq_rlen > req->rq_bufsize) {
153 PARANOIA("Packet too large %d > %d\n",
154 req->rq_rlen, req->rq_bufsize);
155 return -ENOBUFS;
156 }
157
158 req->rq_iov[0].iov_base = req->rq_buffer;
159 req->rq_iov[0].iov_len = req->rq_rlen;
160 req->rq_iovlen = 1;
161
162 return result;
163}
164
165/*
166 * Prepare a "normal" request structure.
167 */
168static int smb_setup_request(struct smb_request *req)
169{
170 int len = smb_len(req->rq_header) + 4;
171 req->rq_slen = len;
172
173 /* if we expect a data part in the reply we set the iov's to read it */
174 if (req->rq_resp_bcc)
175 req->rq_setup_read = smb_setup_bcc;
176
177 /* This tries to support re-using the same request */
178 req->rq_bytes_sent = 0;
179 req->rq_rcls = 0;
180 req->rq_err = 0;
181 req->rq_errno = 0;
182 req->rq_fragment = 0;
183 kfree(req->rq_trans2buffer);
184 req->rq_trans2buffer = NULL;
185
186 return 0;
187}
188
189/*
190 * Prepare a transaction2 request structure
191 */
192static int smb_setup_trans2request(struct smb_request *req)
193{
194 struct smb_sb_info *server = req->rq_server;
195 int mparam, mdata;
196 static unsigned char padding[4];
197
198 /* I know the following is very ugly, but I want to build the
199 smb packet as efficiently as possible. */
200
201 const int smb_parameters = 15;
202 const int header = SMB_HEADER_LEN + 2 * smb_parameters + 2;
203 const int oparam = ALIGN(header + 3, sizeof(u32));
204 const int odata = ALIGN(oparam + req->rq_lparm, sizeof(u32));
205 const int bcc = (req->rq_data ? odata + req->rq_ldata :
206 oparam + req->rq_lparm) - header;
207
208 if ((bcc + oparam) > server->opt.max_xmit)
209 return -ENOMEM;
210 smb_setup_header(req, SMBtrans2, smb_parameters, bcc);
211
212 /*
213 * max parameters + max data + max setup == bufsize to make NT4 happy
214 * and not abort the transfer or split into multiple responses. It also
215 * makes smbfs happy as handling packets larger than the buffer size
216 * is extra work.
217 *
218 * OS/2 is probably going to hate me for this ...
219 */
220 mparam = SMB_TRANS2_MAX_PARAM;
221 mdata = req->rq_bufsize - mparam;
222
223 mdata = server->opt.max_xmit - mparam - 100;
224 if (mdata < 1024) {
225 mdata = 1024;
226 mparam = 20;
227 }
228
229#if 0
230 /* NT/win2k has ~4k max_xmit, so with this we request more than it wants
231 to return as one SMB. Useful for testing the fragmented trans2
232 handling. */
233 mdata = 8192;
234#endif
235
236 WSET(req->rq_header, smb_tpscnt, req->rq_lparm);
237 WSET(req->rq_header, smb_tdscnt, req->rq_ldata);
238 WSET(req->rq_header, smb_mprcnt, mparam);
239 WSET(req->rq_header, smb_mdrcnt, mdata);
240 WSET(req->rq_header, smb_msrcnt, 0); /* max setup always 0 ? */
241 WSET(req->rq_header, smb_flags, 0);
242 DSET(req->rq_header, smb_timeout, 0);
243 WSET(req->rq_header, smb_pscnt, req->rq_lparm);
244 WSET(req->rq_header, smb_psoff, oparam - 4);
245 WSET(req->rq_header, smb_dscnt, req->rq_ldata);
246 WSET(req->rq_header, smb_dsoff, req->rq_data ? odata - 4 : 0);
247 *(req->rq_header + smb_suwcnt) = 0x01; /* setup count */
248 *(req->rq_header + smb_suwcnt + 1) = 0x00; /* reserved */
249 WSET(req->rq_header, smb_setup0, req->rq_trans2_command);
250
251 req->rq_iovlen = 2;
252 req->rq_iov[0].iov_base = (void *) req->rq_header;
253 req->rq_iov[0].iov_len = oparam;
254 req->rq_iov[1].iov_base = (req->rq_parm==NULL) ? padding : req->rq_parm;
255 req->rq_iov[1].iov_len = req->rq_lparm;
256 req->rq_slen = oparam + req->rq_lparm;
257
258 if (req->rq_data) {
259 req->rq_iovlen += 2;
260 req->rq_iov[2].iov_base = padding;
261 req->rq_iov[2].iov_len = odata - oparam - req->rq_lparm;
262 req->rq_iov[3].iov_base = req->rq_data;
263 req->rq_iov[3].iov_len = req->rq_ldata;
264 req->rq_slen = odata + req->rq_ldata;
265 }
266
267 /* always a data part for trans2 replies */
268 req->rq_setup_read = smb_setup_bcc;
269
270 return 0;
271}
272
273/*
274 * Add a request and tell smbiod to process it
275 */
276int smb_add_request(struct smb_request *req)
277{
278 long timeleft;
279 struct smb_sb_info *server = req->rq_server;
280 int result = 0;
281
282 smb_setup_request(req);
283 if (req->rq_trans2_command) {
284 if (req->rq_buffer == NULL) {
285 PARANOIA("trans2 attempted without response buffer!\n");
286 return -EIO;
287 }
288 result = smb_setup_trans2request(req);
289 }
290 if (result < 0)
291 return result;
292
293#ifdef SMB_DEBUG_PACKET_SIZE
294 add_xmit_stats(req);
295#endif
296
297 /* add 'req' to the queue of requests */
298 if (smb_lock_server_interruptible(server))
299 return -EINTR;
300
301 /*
302 * Try to send the request as the process. If that fails we queue the
303 * request and let smbiod send it later.
304 */
305
306 /* FIXME: each server has a number on the maximum number of parallel
307 requests. 10, 50 or so. We should not allow more requests to be
308 active. */
309 if (server->mid > 0xf000)
310 server->mid = 0;
311 req->rq_mid = server->mid++;
312 WSET(req->rq_header, smb_mid, req->rq_mid);
313
314 result = 0;
315 if (server->state == CONN_VALID) {
316 if (list_empty(&server->xmitq))
317 result = smb_request_send_req(req);
318 if (result < 0) {
319 /* Connection lost? */
320 server->conn_error = result;
321 server->state = CONN_INVALID;
322 }
323 }
324 if (result != 1)
325 list_add_tail(&req->rq_queue, &server->xmitq);
326 smb_rget(req);
327
328 if (server->state != CONN_VALID)
329 smbiod_retry(server);
330
331 smb_unlock_server(server);
332
333 smbiod_wake_up();
334
335 timeleft = wait_event_interruptible_timeout(req->rq_wait,
336 req->rq_flags & SMB_REQ_RECEIVED, 30*HZ);
337 if (!timeleft || signal_pending(current)) {
338 /*
339 * On timeout or on interrupt we want to try and remove the
340 * request from the recvq/xmitq.
341 * First check if the request is still part of a queue. (May
342 * have been removed by some error condition)
343 */
344 smb_lock_server(server);
345 if (!list_empty(&req->rq_queue)) {
346 list_del_init(&req->rq_queue);
347 smb_rput(req);
348 }
349 smb_unlock_server(server);
350 }
351
352 if (!timeleft) {
353 PARANOIA("request [%p, mid=%d] timed out!\n",
354 req, req->rq_mid);
355 VERBOSE("smb_com: %02x\n", *(req->rq_header + smb_com));
356 VERBOSE("smb_rcls: %02x\n", *(req->rq_header + smb_rcls));
357 VERBOSE("smb_flg: %02x\n", *(req->rq_header + smb_flg));
358 VERBOSE("smb_tid: %04x\n", WVAL(req->rq_header, smb_tid));
359 VERBOSE("smb_pid: %04x\n", WVAL(req->rq_header, smb_pid));
360 VERBOSE("smb_uid: %04x\n", WVAL(req->rq_header, smb_uid));
361 VERBOSE("smb_mid: %04x\n", WVAL(req->rq_header, smb_mid));
362 VERBOSE("smb_wct: %02x\n", *(req->rq_header + smb_wct));
363
364 req->rq_rcls = ERRSRV;
365 req->rq_err = ERRtimeout;
366
367 /* Just in case it was "stuck" */
368 smbiod_wake_up();
369 }
370 VERBOSE("woke up, rcls=%d\n", req->rq_rcls);
371
372 if (req->rq_rcls != 0)
373 req->rq_errno = smb_errno(req);
374 if (signal_pending(current))
375 req->rq_errno = -ERESTARTSYS;
376 return req->rq_errno;
377}
378
379/*
380 * Send a request and place it on the recvq if successfully sent.
381 * Must be called with the server lock held.
382 */
383static int smb_request_send_req(struct smb_request *req)
384{
385 struct smb_sb_info *server = req->rq_server;
386 int result;
387
388 if (req->rq_bytes_sent == 0) {
389 WSET(req->rq_header, smb_tid, server->opt.tid);
390 WSET(req->rq_header, smb_pid, 1);
391 WSET(req->rq_header, smb_uid, server->opt.server_uid);
392 }
393
394 result = smb_send_request(req);
395 if (result < 0 && result != -EAGAIN)
396 goto out;
397
398 result = 0;
399 if (!(req->rq_flags & SMB_REQ_TRANSMITTED))
400 goto out;
401
402 list_move_tail(&req->rq_queue, &server->recvq);
403 result = 1;
404out:
405 return result;
406}
407
408/*
409 * Sends one request for this server. (smbiod)
410 * Must be called with the server lock held.
411 * Returns: <0 on error
412 * 0 if no request could be completely sent
413 * 1 if all data for one request was sent
414 */
415int smb_request_send_server(struct smb_sb_info *server)
416{
417 struct list_head *head;
418 struct smb_request *req;
419 int result;
420
421 if (server->state != CONN_VALID)
422 return 0;
423
424 /* dequeue first request, if any */
425 req = NULL;
426 head = server->xmitq.next;
427 if (head != &server->xmitq) {
428 req = list_entry(head, struct smb_request, rq_queue);
429 }
430 if (!req)
431 return 0;
432
433 result = smb_request_send_req(req);
434 if (result < 0) {
435 server->conn_error = result;
436 list_move(&req->rq_queue, &server->xmitq);
437 result = -EIO;
438 goto out;
439 }
440
441out:
442 return result;
443}
444
445/*
446 * Try to find a request matching this "mid". Typically the first entry will
447 * be the matching one.
448 */
449static struct smb_request *find_request(struct smb_sb_info *server, int mid)
450{
451 struct list_head *tmp;
452 struct smb_request *req = NULL;
453
454 list_for_each(tmp, &server->recvq) {
455 req = list_entry(tmp, struct smb_request, rq_queue);
456 if (req->rq_mid == mid) {
457 break;
458 }
459 req = NULL;
460 }
461
462 if (!req) {
463 VERBOSE("received reply with mid %d but no request!\n",
464 WVAL(server->header, smb_mid));
465 server->rstate = SMB_RECV_DROP;
466 }
467
468 return req;
469}
470
471/*
472 * Called when we have read the smb header and believe this is a response.
473 */
474static int smb_init_request(struct smb_sb_info *server, struct smb_request *req)
475{
476 int hdrlen, wct;
477
478 memcpy(req->rq_header, server->header, SMB_HEADER_LEN);
479
480 wct = *(req->rq_header + smb_wct);
481 if (wct > 20) {
482 PARANOIA("wct too large, %d > 20\n", wct);
483 server->rstate = SMB_RECV_DROP;
484 return 0;
485 }
486
487 req->rq_resp_wct = wct;
488 hdrlen = SMB_HEADER_LEN + wct*2 + 2;
489 VERBOSE("header length: %d smb_wct: %2d\n", hdrlen, wct);
490
491 req->rq_bytes_recvd = SMB_HEADER_LEN;
492 req->rq_rlen = hdrlen;
493 req->rq_iov[0].iov_base = req->rq_header;
494 req->rq_iov[0].iov_len = hdrlen;
495 req->rq_iovlen = 1;
496 server->rstate = SMB_RECV_PARAM;
497
498#ifdef SMB_DEBUG_PACKET_SIZE
499 add_recv_stats(smb_len(server->header));
500#endif
501 return 0;
502}
503
504/*
505 * Reads the SMB parameters
506 */
507static int smb_recv_param(struct smb_sb_info *server, struct smb_request *req)
508{
509 int result;
510
511 result = smb_receive(server, req);
512 if (result < 0)
513 return result;
514 if (req->rq_bytes_recvd < req->rq_rlen)
515 return 0;
516
517 VERBOSE("result: %d smb_bcc: %04x\n", result,
518 WVAL(req->rq_header, SMB_HEADER_LEN +
519 (*(req->rq_header + smb_wct) * 2)));
520
521 result = 0;
522 req->rq_iov[0].iov_base = NULL;
523 req->rq_rlen = 0;
524 if (req->rq_callback)
525 req->rq_callback(req);
526 else if (req->rq_setup_read)
527 result = req->rq_setup_read(req);
528 if (result < 0) {
529 server->rstate = SMB_RECV_DROP;
530 return result;
531 }
532
533 server->rstate = req->rq_rlen > 0 ? SMB_RECV_DATA : SMB_RECV_END;
534
535 req->rq_bytes_recvd = 0; // recvd out of the iov
536
537 VERBOSE("rlen: %d\n", req->rq_rlen);
538 if (req->rq_rlen < 0) {
539 PARANOIA("Parameters read beyond end of packet!\n");
540 server->rstate = SMB_RECV_END;
541 return -EIO;
542 }
543 return 0;
544}
545
546/*
547 * Reads the SMB data
548 */
549static int smb_recv_data(struct smb_sb_info *server, struct smb_request *req)
550{
551 int result;
552
553 result = smb_receive(server, req);
554 if (result < 0)
555 goto out;
556 if (req->rq_bytes_recvd < req->rq_rlen)
557 goto out;
558 server->rstate = SMB_RECV_END;
559out:
560 VERBOSE("result: %d\n", result);
561 return result;
562}
563
564/*
565 * Receive a transaction2 response
566 * Return: 0 if the response has been fully read
567 * 1 if there are further "fragments" to read
568 * <0 if there is an error
569 */
570static int smb_recv_trans2(struct smb_sb_info *server, struct smb_request *req)
571{
572 unsigned char *inbuf;
573 unsigned int parm_disp, parm_offset, parm_count, parm_tot;
574 unsigned int data_disp, data_offset, data_count, data_tot;
575 int hdrlen = SMB_HEADER_LEN + req->rq_resp_wct*2 - 2;
576
577 VERBOSE("handling trans2\n");
578
579 inbuf = req->rq_header;
580 data_tot = WVAL(inbuf, smb_tdrcnt);
581 parm_tot = WVAL(inbuf, smb_tprcnt);
582 parm_disp = WVAL(inbuf, smb_prdisp);
583 parm_offset = WVAL(inbuf, smb_proff);
584 parm_count = WVAL(inbuf, smb_prcnt);
585 data_disp = WVAL(inbuf, smb_drdisp);
586 data_offset = WVAL(inbuf, smb_droff);
587 data_count = WVAL(inbuf, smb_drcnt);
588
589 /* Modify offset for the split header/buffer we use */
590 if (data_count || data_offset) {
591 if (unlikely(data_offset < hdrlen))
592 goto out_bad_data;
593 else
594 data_offset -= hdrlen;
595 }
596 if (parm_count || parm_offset) {
597 if (unlikely(parm_offset < hdrlen))
598 goto out_bad_parm;
599 else
600 parm_offset -= hdrlen;
601 }
602
603 if (parm_count == parm_tot && data_count == data_tot) {
604 /*
605 * This packet has all the trans2 data.
606 *
607 * We setup the request so that this will be the common
608 * case. It may be a server error to not return a
609 * response that fits.
610 */
611 VERBOSE("single trans2 response "
612 "dcnt=%u, pcnt=%u, doff=%u, poff=%u\n",
613 data_count, parm_count,
614 data_offset, parm_offset);
615 req->rq_ldata = data_count;
616 req->rq_lparm = parm_count;
617 req->rq_data = req->rq_buffer + data_offset;
618 req->rq_parm = req->rq_buffer + parm_offset;
619 if (unlikely(parm_offset + parm_count > req->rq_rlen))
620 goto out_bad_parm;
621 if (unlikely(data_offset + data_count > req->rq_rlen))
622 goto out_bad_data;
623 return 0;
624 }
625
626 VERBOSE("multi trans2 response "
627 "frag=%d, dcnt=%u, pcnt=%u, doff=%u, poff=%u\n",
628 req->rq_fragment,
629 data_count, parm_count,
630 data_offset, parm_offset);
631
632 if (!req->rq_fragment) {
633 int buf_len;
634
635 /* We got the first trans2 fragment */
636 req->rq_fragment = 1;
637 req->rq_total_data = data_tot;
638 req->rq_total_parm = parm_tot;
639 req->rq_ldata = 0;
640 req->rq_lparm = 0;
641
642 buf_len = data_tot + parm_tot;
643 if (buf_len > SMB_MAX_PACKET_SIZE)
644 goto out_too_long;
645
646 req->rq_trans2bufsize = buf_len;
647 req->rq_trans2buffer = kzalloc(buf_len, GFP_NOFS);
648 if (!req->rq_trans2buffer)
649 goto out_no_mem;
650
651 req->rq_parm = req->rq_trans2buffer;
652 req->rq_data = req->rq_trans2buffer + parm_tot;
653 } else if (unlikely(req->rq_total_data < data_tot ||
654 req->rq_total_parm < parm_tot))
655 goto out_data_grew;
656
657 if (unlikely(parm_disp + parm_count > req->rq_total_parm ||
658 parm_offset + parm_count > req->rq_rlen))
659 goto out_bad_parm;
660 if (unlikely(data_disp + data_count > req->rq_total_data ||
661 data_offset + data_count > req->rq_rlen))
662 goto out_bad_data;
663
664 inbuf = req->rq_buffer;
665 memcpy(req->rq_parm + parm_disp, inbuf + parm_offset, parm_count);
666 memcpy(req->rq_data + data_disp, inbuf + data_offset, data_count);
667
668 req->rq_ldata += data_count;
669 req->rq_lparm += parm_count;
670
671 /*
672 * Check whether we've received all of the data. Note that
673 * we use the packet totals -- total lengths might shrink!
674 */
675 if (req->rq_ldata >= data_tot && req->rq_lparm >= parm_tot) {
676 req->rq_ldata = data_tot;
677 req->rq_lparm = parm_tot;
678 return 0;
679 }
680 return 1;
681
682out_too_long:
683 printk(KERN_ERR "smb_trans2: data/param too long, data=%u, parm=%u\n",
684 data_tot, parm_tot);
685 goto out_EIO;
686out_no_mem:
687 printk(KERN_ERR "smb_trans2: couldn't allocate data area of %d bytes\n",
688 req->rq_trans2bufsize);
689 req->rq_errno = -ENOMEM;
690 goto out;
691out_data_grew:
692 printk(KERN_ERR "smb_trans2: data/params grew!\n");
693 goto out_EIO;
694out_bad_parm:
695 printk(KERN_ERR "smb_trans2: invalid parms, disp=%u, cnt=%u, tot=%u, ofs=%u\n",
696 parm_disp, parm_count, parm_tot, parm_offset);
697 goto out_EIO;
698out_bad_data:
699 printk(KERN_ERR "smb_trans2: invalid data, disp=%u, cnt=%u, tot=%u, ofs=%u\n",
700 data_disp, data_count, data_tot, data_offset);
701out_EIO:
702 req->rq_errno = -EIO;
703out:
704 return req->rq_errno;
705}
706
707/*
708 * State machine for receiving responses. We handle the fact that we can't
709 * read the full response in one try by having states telling us how much we
710 * have read.
711 *
712 * Must be called with the server lock held (only called from smbiod).
713 *
714 * Return: <0 on error
715 */
716int smb_request_recv(struct smb_sb_info *server)
717{
718 struct smb_request *req = NULL;
719 int result = 0;
720
721 if (smb_recv_available(server) <= 0)
722 return 0;
723
724 VERBOSE("state: %d\n", server->rstate);
725 switch (server->rstate) {
726 case SMB_RECV_DROP:
727 result = smb_receive_drop(server);
728 if (result < 0)
729 break;
730 if (server->rstate == SMB_RECV_DROP)
731 break;
732 server->rstate = SMB_RECV_START;
733 /* fallthrough */
734 case SMB_RECV_START:
735 server->smb_read = 0;
736 server->rstate = SMB_RECV_HEADER;
737 /* fallthrough */
738 case SMB_RECV_HEADER:
739 result = smb_receive_header(server);
740 if (result < 0)
741 break;
742 if (server->rstate == SMB_RECV_HEADER)
743 break;
744 if (! (*(server->header + smb_flg) & SMB_FLAGS_REPLY) ) {
745 server->rstate = SMB_RECV_REQUEST;
746 break;
747 }
748 if (server->rstate != SMB_RECV_HCOMPLETE)
749 break;
750 /* fallthrough */
751 case SMB_RECV_HCOMPLETE:
752 req = find_request(server, WVAL(server->header, smb_mid));
753 if (!req)
754 break;
755 smb_init_request(server, req);
756 req->rq_rcls = *(req->rq_header + smb_rcls);
757 req->rq_err = WVAL(req->rq_header, smb_err);
758 if (server->rstate != SMB_RECV_PARAM)
759 break;
760 /* fallthrough */
761 case SMB_RECV_PARAM:
762 if (!req)
763 req = find_request(server,WVAL(server->header,smb_mid));
764 if (!req)
765 break;
766 result = smb_recv_param(server, req);
767 if (result < 0)
768 break;
769 if (server->rstate != SMB_RECV_DATA)
770 break;
771 /* fallthrough */
772 case SMB_RECV_DATA:
773 if (!req)
774 req = find_request(server,WVAL(server->header,smb_mid));
775 if (!req)
776 break;
777 result = smb_recv_data(server, req);
778 if (result < 0)
779 break;
780 break;
781
782 /* We should never be called with any of these states */
783 case SMB_RECV_END:
784 case SMB_RECV_REQUEST:
785 BUG();
786 }
787
788 if (result < 0) {
789 /* We saw an error */
790 return result;
791 }
792
793 if (server->rstate != SMB_RECV_END)
794 return 0;
795
796 result = 0;
797 if (req->rq_trans2_command && req->rq_rcls == SUCCESS)
798 result = smb_recv_trans2(server, req);
799
800 /*
801 * Response completely read. Drop any extra bytes sent by the server.
802 * (Yes, servers sometimes add extra bytes to responses)
803 */
804 VERBOSE("smb_len: %d smb_read: %d\n",
805 server->smb_len, server->smb_read);
806 if (server->smb_read < server->smb_len)
807 smb_receive_drop(server);
808
809 server->rstate = SMB_RECV_START;
810
811 if (!result) {
812 list_del_init(&req->rq_queue);
813 req->rq_flags |= SMB_REQ_RECEIVED;
814 smb_rput(req);
815 wake_up_interruptible(&req->rq_wait);
816 }
817 return 0;
818}
diff --git a/fs/smbfs/request.h b/fs/smbfs/request.h
deleted file mode 100644
index efb21451e7c9..000000000000
--- a/fs/smbfs/request.h
+++ /dev/null
@@ -1,70 +0,0 @@
1#include <linux/list.h>
2#include <linux/types.h>
3#include <linux/uio.h>
4#include <linux/wait.h>
5
6struct smb_request {
7 struct list_head rq_queue; /* recvq or xmitq for the server */
8
9 atomic_t rq_count;
10
11 wait_queue_head_t rq_wait;
12 int rq_flags;
13 int rq_mid; /* multiplex ID, set by request.c */
14
15 struct smb_sb_info *rq_server;
16
17 /* header + word count + parameter words + byte count */
18 unsigned char rq_header[SMB_HEADER_LEN + 20*2 + 2];
19
20 int rq_bufsize;
21 unsigned char *rq_buffer;
22
23 /* FIXME: this is not good enough for merging IO requests. */
24 unsigned char *rq_page;
25 int rq_rsize;
26
27 int rq_resp_wct;
28 int rq_resp_bcc;
29
30 int rq_rlen;
31 int rq_bytes_recvd;
32
33 int rq_slen;
34 int rq_bytes_sent;
35
36 int rq_iovlen;
37 struct kvec rq_iov[4];
38
39 int (*rq_setup_read) (struct smb_request *);
40 void (*rq_callback) (struct smb_request *);
41
42 /* ------ trans2 stuff ------ */
43
44 u16 rq_trans2_command; /* 0 if not a trans2 request */
45 unsigned int rq_ldata;
46 unsigned char *rq_data;
47 unsigned int rq_lparm;
48 unsigned char *rq_parm;
49
50 int rq_fragment;
51 u32 rq_total_data;
52 u32 rq_total_parm;
53 int rq_trans2bufsize;
54 unsigned char *rq_trans2buffer;
55
56 /* ------ response ------ */
57
58 unsigned short rq_rcls;
59 unsigned short rq_err;
60 int rq_errno;
61};
62
63#define SMB_REQ_STATIC 0x0001 /* rq_buffer is static */
64#define SMB_REQ_NORETRY 0x0002 /* request is invalid after retry */
65
66#define SMB_REQ_TRANSMITTED 0x4000 /* all data has been sent */
67#define SMB_REQ_RECEIVED 0x8000 /* reply received, smbiod is done */
68
69#define xSMB_REQ_NOREPLY 0x0004 /* we don't want the reply (if any) */
70#define xSMB_REQ_NORECEIVER 0x0008 /* caller doesn't wait for response */
diff --git a/fs/smbfs/smb_debug.h b/fs/smbfs/smb_debug.h
deleted file mode 100644
index fc4b1a5dd755..000000000000
--- a/fs/smbfs/smb_debug.h
+++ /dev/null
@@ -1,34 +0,0 @@
1/*
2 * Defines some debug macros for smbfs.
3 */
4
5/* This makes a dentry parent/child name pair. Useful for debugging printk's */
6#define DENTRY_PATH(dentry) \
7 (dentry)->d_parent->d_name.name,(dentry)->d_name.name
8
9/*
10 * safety checks that should never happen ???
11 * these are normally enabled.
12 */
13#ifdef SMBFS_PARANOIA
14# define PARANOIA(f, a...) printk(KERN_NOTICE "%s: " f, __func__ , ## a)
15#else
16# define PARANOIA(f, a...) do { ; } while(0)
17#endif
18
19/* lots of debug messages */
20#ifdef SMBFS_DEBUG_VERBOSE
21# define VERBOSE(f, a...) printk(KERN_DEBUG "%s: " f, __func__ , ## a)
22#else
23# define VERBOSE(f, a...) do { ; } while(0)
24#endif
25
26/*
27 * "normal" debug messages, but not with a normal DEBUG define ... way
28 * too common name.
29 */
30#ifdef SMBFS_DEBUG
31#define DEBUG1(f, a...) printk(KERN_DEBUG "%s: " f, __func__ , ## a)
32#else
33#define DEBUG1(f, a...) do { ; } while(0)
34#endif
diff --git a/fs/smbfs/smbiod.c b/fs/smbfs/smbiod.c
deleted file mode 100644
index 0e39a924f10a..000000000000
--- a/fs/smbfs/smbiod.c
+++ /dev/null
@@ -1,344 +0,0 @@
1/*
2 * smbiod.c
3 *
4 * Copyright (C) 2000, Charles Loep / Corel Corp.
5 * Copyright (C) 2001, Urban Widmark
6 */
7
8
9#include <linux/sched.h>
10#include <linux/kernel.h>
11#include <linux/mm.h>
12#include <linux/string.h>
13#include <linux/stat.h>
14#include <linux/errno.h>
15#include <linux/init.h>
16#include <linux/file.h>
17#include <linux/dcache.h>
18#include <linux/module.h>
19#include <linux/net.h>
20#include <linux/kthread.h>
21#include <net/ip.h>
22
23#include <linux/smb_fs.h>
24#include <linux/smbno.h>
25#include <linux/smb_mount.h>
26
27#include <asm/system.h>
28#include <asm/uaccess.h>
29
30#include "smb_debug.h"
31#include "request.h"
32#include "proto.h"
33
34enum smbiod_state {
35 SMBIOD_DEAD,
36 SMBIOD_STARTING,
37 SMBIOD_RUNNING,
38};
39
40static enum smbiod_state smbiod_state = SMBIOD_DEAD;
41static struct task_struct *smbiod_thread;
42static DECLARE_WAIT_QUEUE_HEAD(smbiod_wait);
43static LIST_HEAD(smb_servers);
44static DEFINE_SPINLOCK(servers_lock);
45
46#define SMBIOD_DATA_READY (1<<0)
47static unsigned long smbiod_flags;
48
49static int smbiod(void *);
50static int smbiod_start(void);
51
52/*
53 * called when there's work for us to do
54 */
55void smbiod_wake_up(void)
56{
57 if (smbiod_state == SMBIOD_DEAD)
58 return;
59 set_bit(SMBIOD_DATA_READY, &smbiod_flags);
60 wake_up_interruptible(&smbiod_wait);
61}
62
63/*
64 * start smbiod if none is running
65 */
66static int smbiod_start(void)
67{
68 struct task_struct *tsk;
69 int err = 0;
70
71 if (smbiod_state != SMBIOD_DEAD)
72 return 0;
73 smbiod_state = SMBIOD_STARTING;
74 __module_get(THIS_MODULE);
75 spin_unlock(&servers_lock);
76 tsk = kthread_run(smbiod, NULL, "smbiod");
77 if (IS_ERR(tsk)) {
78 err = PTR_ERR(tsk);
79 module_put(THIS_MODULE);
80 }
81
82 spin_lock(&servers_lock);
83 if (err < 0) {
84 smbiod_state = SMBIOD_DEAD;
85 smbiod_thread = NULL;
86 } else {
87 smbiod_state = SMBIOD_RUNNING;
88 smbiod_thread = tsk;
89 }
90 return err;
91}
92
93/*
94 * register a server & start smbiod if necessary
95 */
96int smbiod_register_server(struct smb_sb_info *server)
97{
98 int ret;
99 spin_lock(&servers_lock);
100 list_add(&server->entry, &smb_servers);
101 VERBOSE("%p\n", server);
102 ret = smbiod_start();
103 spin_unlock(&servers_lock);
104 return ret;
105}
106
107/*
108 * Unregister a server
109 * Must be called with the server lock held.
110 */
111void smbiod_unregister_server(struct smb_sb_info *server)
112{
113 spin_lock(&servers_lock);
114 list_del_init(&server->entry);
115 VERBOSE("%p\n", server);
116 spin_unlock(&servers_lock);
117
118 smbiod_wake_up();
119 smbiod_flush(server);
120}
121
122void smbiod_flush(struct smb_sb_info *server)
123{
124 struct list_head *tmp, *n;
125 struct smb_request *req;
126
127 list_for_each_safe(tmp, n, &server->xmitq) {
128 req = list_entry(tmp, struct smb_request, rq_queue);
129 req->rq_errno = -EIO;
130 list_del_init(&req->rq_queue);
131 smb_rput(req);
132 wake_up_interruptible(&req->rq_wait);
133 }
134 list_for_each_safe(tmp, n, &server->recvq) {
135 req = list_entry(tmp, struct smb_request, rq_queue);
136 req->rq_errno = -EIO;
137 list_del_init(&req->rq_queue);
138 smb_rput(req);
139 wake_up_interruptible(&req->rq_wait);
140 }
141}
142
143/*
144 * Wake up smbmount and make it reconnect to the server.
145 * This must be called with the server locked.
146 *
147 * FIXME: add smbconnect version to this
148 */
149int smbiod_retry(struct smb_sb_info *server)
150{
151 struct list_head *head;
152 struct smb_request *req;
153 struct pid *pid = get_pid(server->conn_pid);
154 int result = 0;
155
156 VERBOSE("state: %d\n", server->state);
157 if (server->state == CONN_VALID || server->state == CONN_RETRYING)
158 goto out;
159
160 smb_invalidate_inodes(server);
161
162 /*
163 * Some requests are meaningless after a retry, so we abort them.
164 * One example are all requests using 'fileid' since the files are
165 * closed on retry.
166 */
167 head = server->xmitq.next;
168 while (head != &server->xmitq) {
169 req = list_entry(head, struct smb_request, rq_queue);
170 head = head->next;
171
172 req->rq_bytes_sent = 0;
173 if (req->rq_flags & SMB_REQ_NORETRY) {
174 VERBOSE("aborting request %p on xmitq\n", req);
175 req->rq_errno = -EIO;
176 list_del_init(&req->rq_queue);
177 smb_rput(req);
178 wake_up_interruptible(&req->rq_wait);
179 }
180 }
181
182 /*
183 * FIXME: test the code for retrying request we already sent
184 */
185 head = server->recvq.next;
186 while (head != &server->recvq) {
187 req = list_entry(head, struct smb_request, rq_queue);
188 head = head->next;
189#if 0
190 if (req->rq_flags & SMB_REQ_RETRY) {
191 /* must move the request to the xmitq */
192 VERBOSE("retrying request %p on recvq\n", req);
193 list_move(&req->rq_queue, &server->xmitq);
194 continue;
195 }
196#endif
197
198 VERBOSE("aborting request %p on recvq\n", req);
199 /* req->rq_rcls = ???; */ /* FIXME: set smb error code too? */
200 req->rq_errno = -EIO;
201 list_del_init(&req->rq_queue);
202 smb_rput(req);
203 wake_up_interruptible(&req->rq_wait);
204 }
205
206 smb_close_socket(server);
207
208 if (!pid) {
209 /* FIXME: this is fatal, umount? */
210 printk(KERN_ERR "smb_retry: no connection process\n");
211 server->state = CONN_RETRIED;
212 goto out;
213 }
214
215 /*
216 * Change state so that only one retry per server will be started.
217 */
218 server->state = CONN_RETRYING;
219
220 /*
221 * Note: use the "priv" flag, as a user process may need to reconnect.
222 */
223 result = kill_pid(pid, SIGUSR1, 1);
224 if (result) {
225 /* FIXME: this is most likely fatal, umount? */
226 printk(KERN_ERR "smb_retry: signal failed [%d]\n", result);
227 goto out;
228 }
229 VERBOSE("signalled pid %d\n", pid_nr(pid));
230
231 /* FIXME: The retried requests should perhaps get a "time boost". */
232
233out:
234 put_pid(pid);
235 return result;
236}
237
238/*
239 * Currently handles lockingX packets.
240 */
241static void smbiod_handle_request(struct smb_sb_info *server)
242{
243 PARANOIA("smbiod got a request ... and we don't implement oplocks!\n");
244 server->rstate = SMB_RECV_DROP;
245}
246
247/*
248 * Do some IO for one server.
249 */
250static void smbiod_doio(struct smb_sb_info *server)
251{
252 int result;
253 int maxwork = 7;
254
255 if (server->state != CONN_VALID)
256 goto out;
257
258 do {
259 result = smb_request_recv(server);
260 if (result < 0) {
261 server->state = CONN_INVALID;
262 smbiod_retry(server);
263 goto out; /* reconnecting is slow */
264 } else if (server->rstate == SMB_RECV_REQUEST)
265 smbiod_handle_request(server);
266 } while (result > 0 && maxwork-- > 0);
267
268 /*
269 * If there is more to read then we want to be sure to wake up again.
270 */
271 if (server->state != CONN_VALID)
272 goto out;
273 if (smb_recv_available(server) > 0)
274 set_bit(SMBIOD_DATA_READY, &smbiod_flags);
275
276 do {
277 result = smb_request_send_server(server);
278 if (result < 0) {
279 server->state = CONN_INVALID;
280 smbiod_retry(server);
281 goto out; /* reconnecting is slow */
282 }
283 } while (result > 0);
284
285 /*
286 * If the last request was not sent out we want to wake up again.
287 */
288 if (!list_empty(&server->xmitq))
289 set_bit(SMBIOD_DATA_READY, &smbiod_flags);
290
291out:
292 return;
293}
294
295/*
296 * smbiod kernel thread
297 */
298static int smbiod(void *unused)
299{
300 VERBOSE("SMB Kernel thread starting (%d) ...\n", current->pid);
301
302 for (;;) {
303 struct smb_sb_info *server;
304 struct list_head *pos, *n;
305
306 /* FIXME: Use poll? */
307 wait_event_interruptible(smbiod_wait,
308 test_bit(SMBIOD_DATA_READY, &smbiod_flags));
309 if (signal_pending(current)) {
310 spin_lock(&servers_lock);
311 smbiod_state = SMBIOD_DEAD;
312 spin_unlock(&servers_lock);
313 break;
314 }
315
316 clear_bit(SMBIOD_DATA_READY, &smbiod_flags);
317
318 spin_lock(&servers_lock);
319 if (list_empty(&smb_servers)) {
320 smbiod_state = SMBIOD_DEAD;
321 spin_unlock(&servers_lock);
322 break;
323 }
324
325 list_for_each_safe(pos, n, &smb_servers) {
326 server = list_entry(pos, struct smb_sb_info, entry);
327 VERBOSE("checking server %p\n", server);
328
329 if (server->state == CONN_VALID) {
330 spin_unlock(&servers_lock);
331
332 smb_lock_server(server);
333 smbiod_doio(server);
334 smb_unlock_server(server);
335
336 spin_lock(&servers_lock);
337 }
338 }
339 spin_unlock(&servers_lock);
340 }
341
342 VERBOSE("SMB Kernel thread exiting (%d) ...\n", current->pid);
343 module_put_and_exit(0);
344}
diff --git a/fs/smbfs/sock.c b/fs/smbfs/sock.c
deleted file mode 100644
index e37fe4deebd0..000000000000
--- a/fs/smbfs/sock.c
+++ /dev/null
@@ -1,386 +0,0 @@
1/*
2 * sock.c
3 *
4 * Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
5 * Copyright (C) 1997 by Volker Lendecke
6 *
7 * Please add a note about your changes to smbfs in the ChangeLog file.
8 */
9
10#include <linux/fs.h>
11#include <linux/time.h>
12#include <linux/errno.h>
13#include <linux/socket.h>
14#include <linux/fcntl.h>
15#include <linux/file.h>
16#include <linux/in.h>
17#include <linux/net.h>
18#include <linux/mm.h>
19#include <linux/netdevice.h>
20#include <linux/workqueue.h>
21#include <net/scm.h>
22#include <net/tcp_states.h>
23#include <net/ip.h>
24
25#include <linux/smb_fs.h>
26#include <linux/smb.h>
27#include <linux/smbno.h>
28
29#include <asm/uaccess.h>
30#include <asm/ioctls.h>
31
32#include "smb_debug.h"
33#include "proto.h"
34#include "request.h"
35
36
37static int
38_recvfrom(struct socket *socket, unsigned char *ubuf, int size, unsigned flags)
39{
40 struct kvec iov = {ubuf, size};
41 struct msghdr msg = {.msg_flags = flags};
42 msg.msg_flags |= MSG_DONTWAIT | MSG_NOSIGNAL;
43 return kernel_recvmsg(socket, &msg, &iov, 1, size, msg.msg_flags);
44}
45
46/*
47 * Return the server this socket belongs to
48 */
49static struct smb_sb_info *
50server_from_socket(struct socket *socket)
51{
52 return socket->sk->sk_user_data;
53}
54
55/*
56 * Called when there is data on the socket.
57 */
58void
59smb_data_ready(struct sock *sk, int len)
60{
61 struct smb_sb_info *server = server_from_socket(sk->sk_socket);
62 void (*data_ready)(struct sock *, int) = server->data_ready;
63
64 data_ready(sk, len);
65 VERBOSE("(%p, %d)\n", sk, len);
66 smbiod_wake_up();
67}
68
69int
70smb_valid_socket(struct inode * inode)
71{
72 return (inode && S_ISSOCK(inode->i_mode) &&
73 SOCKET_I(inode)->type == SOCK_STREAM);
74}
75
76static struct socket *
77server_sock(struct smb_sb_info *server)
78{
79 struct file *file;
80
81 if (server && (file = server->sock_file))
82 {
83#ifdef SMBFS_PARANOIA
84 if (!smb_valid_socket(file->f_path.dentry->d_inode))
85 PARANOIA("bad socket!\n");
86#endif
87 return SOCKET_I(file->f_path.dentry->d_inode);
88 }
89 return NULL;
90}
91
92void
93smb_close_socket(struct smb_sb_info *server)
94{
95 struct file * file = server->sock_file;
96
97 if (file) {
98 struct socket *sock = server_sock(server);
99
100 VERBOSE("closing socket %p\n", sock);
101 sock->sk->sk_data_ready = server->data_ready;
102 server->sock_file = NULL;
103 fput(file);
104 }
105}
106
107static int
108smb_get_length(struct socket *socket, unsigned char *header)
109{
110 int result;
111
112 result = _recvfrom(socket, header, 4, MSG_PEEK);
113 if (result == -EAGAIN)
114 return -ENODATA;
115 if (result < 0) {
116 PARANOIA("recv error = %d\n", -result);
117 return result;
118 }
119 if (result < 4)
120 return -ENODATA;
121
122 switch (header[0]) {
123 case 0x00:
124 case 0x82:
125 break;
126
127 case 0x85:
128 DEBUG1("Got SESSION KEEP ALIVE\n");
129 _recvfrom(socket, header, 4, 0); /* read away */
130 return -ENODATA;
131
132 default:
133 PARANOIA("Invalid NBT packet, code=%x\n", header[0]);
134 return -EIO;
135 }
136
137 /* The length in the RFC NB header is the raw data length */
138 return smb_len(header);
139}
140
141int
142smb_recv_available(struct smb_sb_info *server)
143{
144 mm_segment_t oldfs;
145 int avail, err;
146 struct socket *sock = server_sock(server);
147
148 oldfs = get_fs();
149 set_fs(get_ds());
150 err = sock->ops->ioctl(sock, SIOCINQ, (unsigned long) &avail);
151 set_fs(oldfs);
152 return (err >= 0) ? avail : err;
153}
154
155/*
156 * Adjust the kvec to move on 'n' bytes (from nfs/sunrpc)
157 */
158static int
159smb_move_iov(struct kvec **data, size_t *num, struct kvec *vec, unsigned amount)
160{
161 struct kvec *iv = *data;
162 int i;
163 int len;
164
165 /*
166 * Eat any sent kvecs
167 */
168 while (iv->iov_len <= amount) {
169 amount -= iv->iov_len;
170 iv++;
171 (*num)--;
172 }
173
174 /*
175 * And chew down the partial one
176 */
177 vec[0].iov_len = iv->iov_len-amount;
178 vec[0].iov_base =((unsigned char *)iv->iov_base)+amount;
179 iv++;
180
181 len = vec[0].iov_len;
182
183 /*
184 * And copy any others
185 */
186 for (i = 1; i < *num; i++) {
187 vec[i] = *iv++;
188 len += vec[i].iov_len;
189 }
190
191 *data = vec;
192 return len;
193}
194
195/*
196 * smb_receive_header
197 * Only called by the smbiod thread.
198 */
199int
200smb_receive_header(struct smb_sb_info *server)
201{
202 struct socket *sock;
203 int result = 0;
204 unsigned char peek_buf[4];
205
206 result = -EIO;
207 sock = server_sock(server);
208 if (!sock)
209 goto out;
210 if (sock->sk->sk_state != TCP_ESTABLISHED)
211 goto out;
212
213 if (!server->smb_read) {
214 result = smb_get_length(sock, peek_buf);
215 if (result < 0) {
216 if (result == -ENODATA)
217 result = 0;
218 goto out;
219 }
220 server->smb_len = result + 4;
221
222 if (server->smb_len < SMB_HEADER_LEN) {
223 PARANOIA("short packet: %d\n", result);
224 server->rstate = SMB_RECV_DROP;
225 result = -EIO;
226 goto out;
227 }
228 if (server->smb_len > SMB_MAX_PACKET_SIZE) {
229 PARANOIA("long packet: %d\n", result);
230 server->rstate = SMB_RECV_DROP;
231 result = -EIO;
232 goto out;
233 }
234 }
235
236 result = _recvfrom(sock, server->header + server->smb_read,
237 SMB_HEADER_LEN - server->smb_read, 0);
238 VERBOSE("_recvfrom: %d\n", result);
239 if (result < 0) {
240 VERBOSE("receive error: %d\n", result);
241 goto out;
242 }
243 server->smb_read += result;
244
245 if (server->smb_read == SMB_HEADER_LEN)
246 server->rstate = SMB_RECV_HCOMPLETE;
247out:
248 return result;
249}
250
251static char drop_buffer[PAGE_SIZE];
252
253/*
254 * smb_receive_drop - read and throw away the data
255 * Only called by the smbiod thread.
256 *
257 * FIXME: we are in the kernel, could we just tell the socket that we want
258 * to drop stuff from the buffer?
259 */
260int
261smb_receive_drop(struct smb_sb_info *server)
262{
263 struct socket *sock;
264 unsigned int flags;
265 struct kvec iov;
266 struct msghdr msg;
267 int rlen = smb_len(server->header) - server->smb_read + 4;
268 int result = -EIO;
269
270 if (rlen > PAGE_SIZE)
271 rlen = PAGE_SIZE;
272
273 sock = server_sock(server);
274 if (!sock)
275 goto out;
276 if (sock->sk->sk_state != TCP_ESTABLISHED)
277 goto out;
278
279 flags = MSG_DONTWAIT | MSG_NOSIGNAL;
280 iov.iov_base = drop_buffer;
281 iov.iov_len = PAGE_SIZE;
282 msg.msg_flags = flags;
283 msg.msg_name = NULL;
284 msg.msg_namelen = 0;
285 msg.msg_control = NULL;
286
287 result = kernel_recvmsg(sock, &msg, &iov, 1, rlen, flags);
288
289 VERBOSE("read: %d\n", result);
290 if (result < 0) {
291 VERBOSE("receive error: %d\n", result);
292 goto out;
293 }
294 server->smb_read += result;
295
296 if (server->smb_read >= server->smb_len)
297 server->rstate = SMB_RECV_END;
298
299out:
300 return result;
301}
302
303/*
304 * smb_receive
305 * Only called by the smbiod thread.
306 */
307int
308smb_receive(struct smb_sb_info *server, struct smb_request *req)
309{
310 struct socket *sock;
311 unsigned int flags;
312 struct kvec iov[4];
313 struct kvec *p = req->rq_iov;
314 size_t num = req->rq_iovlen;
315 struct msghdr msg;
316 int rlen;
317 int result = -EIO;
318
319 sock = server_sock(server);
320 if (!sock)
321 goto out;
322 if (sock->sk->sk_state != TCP_ESTABLISHED)
323 goto out;
324
325 flags = MSG_DONTWAIT | MSG_NOSIGNAL;
326 msg.msg_flags = flags;
327 msg.msg_name = NULL;
328 msg.msg_namelen = 0;
329 msg.msg_control = NULL;
330
331 /* Dont repeat bytes and count available bufferspace */
332 rlen = min_t(int, smb_move_iov(&p, &num, iov, req->rq_bytes_recvd),
333 (req->rq_rlen - req->rq_bytes_recvd));
334
335 result = kernel_recvmsg(sock, &msg, p, num, rlen, flags);
336
337 VERBOSE("read: %d\n", result);
338 if (result < 0) {
339 VERBOSE("receive error: %d\n", result);
340 goto out;
341 }
342 req->rq_bytes_recvd += result;
343 server->smb_read += result;
344
345out:
346 return result;
347}
348
349/*
350 * Try to send a SMB request. This may return after sending only parts of the
351 * request. SMB_REQ_TRANSMITTED will be set if a request was fully sent.
352 *
353 * Parts of this was taken from xprt_sendmsg from net/sunrpc/xprt.c
354 */
355int
356smb_send_request(struct smb_request *req)
357{
358 struct smb_sb_info *server = req->rq_server;
359 struct socket *sock;
360 struct msghdr msg = {.msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT};
361 int slen = req->rq_slen - req->rq_bytes_sent;
362 int result = -EIO;
363 struct kvec iov[4];
364 struct kvec *p = req->rq_iov;
365 size_t num = req->rq_iovlen;
366
367 sock = server_sock(server);
368 if (!sock)
369 goto out;
370 if (sock->sk->sk_state != TCP_ESTABLISHED)
371 goto out;
372
373 /* Dont repeat bytes */
374 if (req->rq_bytes_sent)
375 smb_move_iov(&p, &num, iov, req->rq_bytes_sent);
376
377 result = kernel_sendmsg(sock, &msg, p, num, slen);
378
379 if (result >= 0) {
380 req->rq_bytes_sent += result;
381 if (req->rq_bytes_sent >= req->rq_slen)
382 req->rq_flags |= SMB_REQ_TRANSMITTED;
383 }
384out:
385 return result;
386}
diff --git a/fs/smbfs/symlink.c b/fs/smbfs/symlink.c
deleted file mode 100644
index 00b2909bd469..000000000000
--- a/fs/smbfs/symlink.c
+++ /dev/null
@@ -1,68 +0,0 @@
1/*
2 * symlink.c
3 *
4 * Copyright (C) 2002 by John Newbigin
5 *
6 * Please add a note about your changes to smbfs in the ChangeLog file.
7 */
8
9#include <linux/kernel.h>
10#include <linux/errno.h>
11#include <linux/fcntl.h>
12#include <linux/stat.h>
13#include <linux/mm.h>
14#include <linux/slab.h>
15#include <linux/pagemap.h>
16#include <linux/net.h>
17#include <linux/namei.h>
18
19#include <asm/uaccess.h>
20#include <asm/system.h>
21
22#include <linux/smbno.h>
23#include <linux/smb_fs.h>
24
25#include "smb_debug.h"
26#include "proto.h"
27
28int smb_symlink(struct inode *inode, struct dentry *dentry, const char *oldname)
29{
30 DEBUG1("create symlink %s -> %s/%s\n", oldname, DENTRY_PATH(dentry));
31
32 return smb_proc_symlink(server_from_dentry(dentry), dentry, oldname);
33}
34
35static void *smb_follow_link(struct dentry *dentry, struct nameidata *nd)
36{
37 char *link = __getname();
38 DEBUG1("followlink of %s/%s\n", DENTRY_PATH(dentry));
39
40 if (!link) {
41 link = ERR_PTR(-ENOMEM);
42 } else {
43 int len = smb_proc_read_link(server_from_dentry(dentry),
44 dentry, link, PATH_MAX - 1);
45 if (len < 0) {
46 __putname(link);
47 link = ERR_PTR(len);
48 } else {
49 link[len] = 0;
50 }
51 }
52 nd_set_link(nd, link);
53 return NULL;
54}
55
56static void smb_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
57{
58 char *s = nd_get_link(nd);
59 if (!IS_ERR(s))
60 __putname(s);
61}
62
63const struct inode_operations smb_link_inode_operations =
64{
65 .readlink = generic_readlink,
66 .follow_link = smb_follow_link,
67 .put_link = smb_put_link,
68};
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 07a4f1156048..24de30ba34c1 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -370,12 +370,10 @@ static void squashfs_put_super(struct super_block *sb)
370} 370}
371 371
372 372
373static int squashfs_get_sb(struct file_system_type *fs_type, int flags, 373static struct dentry *squashfs_mount(struct file_system_type *fs_type, int flags,
374 const char *dev_name, void *data, 374 const char *dev_name, void *data)
375 struct vfsmount *mnt)
376{ 375{
377 return get_sb_bdev(fs_type, flags, dev_name, data, squashfs_fill_super, 376 return mount_bdev(fs_type, flags, dev_name, data, squashfs_fill_super);
378 mnt);
379} 377}
380 378
381 379
@@ -451,7 +449,7 @@ static void squashfs_destroy_inode(struct inode *inode)
451static struct file_system_type squashfs_fs_type = { 449static struct file_system_type squashfs_fs_type = {
452 .owner = THIS_MODULE, 450 .owner = THIS_MODULE,
453 .name = "squashfs", 451 .name = "squashfs",
454 .get_sb = squashfs_get_sb, 452 .mount = squashfs_mount,
455 .kill_sb = kill_block_super, 453 .kill_sb = kill_block_super,
456 .fs_flags = FS_REQUIRES_DEV 454 .fs_flags = FS_REQUIRES_DEV
457}; 455};
diff --git a/fs/squashfs/xattr.c b/fs/squashfs/xattr.c
index 652b8541f9c6..3876c36699a1 100644
--- a/fs/squashfs/xattr.c
+++ b/fs/squashfs/xattr.c
@@ -158,17 +158,18 @@ static int squashfs_xattr_get(struct inode *inode, int name_index,
158 strncmp(target, name, name_size) == 0) { 158 strncmp(target, name, name_size) == 0) {
159 /* found xattr */ 159 /* found xattr */
160 if (type & SQUASHFS_XATTR_VALUE_OOL) { 160 if (type & SQUASHFS_XATTR_VALUE_OOL) {
161 __le64 xattr; 161 __le64 xattr_val;
162 u64 xattr;
162 /* val is a reference to the real location */ 163 /* val is a reference to the real location */
163 err = squashfs_read_metadata(sb, &val, &start, 164 err = squashfs_read_metadata(sb, &val, &start,
164 &offset, sizeof(val)); 165 &offset, sizeof(val));
165 if (err < 0) 166 if (err < 0)
166 goto failed; 167 goto failed;
167 err = squashfs_read_metadata(sb, &xattr, &start, 168 err = squashfs_read_metadata(sb, &xattr_val,
168 &offset, sizeof(xattr)); 169 &start, &offset, sizeof(xattr_val));
169 if (err < 0) 170 if (err < 0)
170 goto failed; 171 goto failed;
171 xattr = le64_to_cpu(xattr); 172 xattr = le64_to_cpu(xattr_val);
172 start = SQUASHFS_XATTR_BLK(xattr) + 173 start = SQUASHFS_XATTR_BLK(xattr) +
173 msblk->xattr_table; 174 msblk->xattr_table;
174 offset = SQUASHFS_XATTR_OFFSET(xattr); 175 offset = SQUASHFS_XATTR_OFFSET(xattr);
diff --git a/fs/squashfs/xattr.h b/fs/squashfs/xattr.h
index 49fe0d719fbf..b634efce4bde 100644
--- a/fs/squashfs/xattr.h
+++ b/fs/squashfs/xattr.h
@@ -25,7 +25,7 @@
25extern __le64 *squashfs_read_xattr_id_table(struct super_block *, u64, 25extern __le64 *squashfs_read_xattr_id_table(struct super_block *, u64,
26 u64 *, int *); 26 u64 *, int *);
27extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *, 27extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *,
28 int *, unsigned long long *); 28 unsigned int *, unsigned long long *);
29#else 29#else
30static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb, 30static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb,
31 u64 start, u64 *xattr_table_start, int *xattr_ids) 31 u64 start, u64 *xattr_table_start, int *xattr_ids)
@@ -35,7 +35,7 @@ static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb,
35} 35}
36 36
37static inline int squashfs_xattr_lookup(struct super_block *sb, 37static inline int squashfs_xattr_lookup(struct super_block *sb,
38 unsigned int index, int *count, int *size, 38 unsigned int index, int *count, unsigned int *size,
39 unsigned long long *xattr) 39 unsigned long long *xattr)
40{ 40{
41 return 0; 41 return 0;
diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c
index cfb41106098f..d33be5dd6c32 100644
--- a/fs/squashfs/xattr_id.c
+++ b/fs/squashfs/xattr_id.c
@@ -34,6 +34,7 @@
34#include "squashfs_fs_sb.h" 34#include "squashfs_fs_sb.h"
35#include "squashfs_fs_i.h" 35#include "squashfs_fs_i.h"
36#include "squashfs.h" 36#include "squashfs.h"
37#include "xattr.h"
37 38
38/* 39/*
39 * Map xattr id using the xattr id look up table 40 * Map xattr id using the xattr id look up table
diff --git a/fs/super.c b/fs/super.c
index b9c9869165db..ca696155cd9a 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -715,15 +715,14 @@ static int ns_set_super(struct super_block *sb, void *data)
715 return set_anon_super(sb, NULL); 715 return set_anon_super(sb, NULL);
716} 716}
717 717
718int get_sb_ns(struct file_system_type *fs_type, int flags, void *data, 718struct dentry *mount_ns(struct file_system_type *fs_type, int flags,
719 int (*fill_super)(struct super_block *, void *, int), 719 void *data, int (*fill_super)(struct super_block *, void *, int))
720 struct vfsmount *mnt)
721{ 720{
722 struct super_block *sb; 721 struct super_block *sb;
723 722
724 sb = sget(fs_type, ns_test_super, ns_set_super, data); 723 sb = sget(fs_type, ns_test_super, ns_set_super, data);
725 if (IS_ERR(sb)) 724 if (IS_ERR(sb))
726 return PTR_ERR(sb); 725 return ERR_CAST(sb);
727 726
728 if (!sb->s_root) { 727 if (!sb->s_root) {
729 int err; 728 int err;
@@ -731,17 +730,16 @@ int get_sb_ns(struct file_system_type *fs_type, int flags, void *data,
731 err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0); 730 err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
732 if (err) { 731 if (err) {
733 deactivate_locked_super(sb); 732 deactivate_locked_super(sb);
734 return err; 733 return ERR_PTR(err);
735 } 734 }
736 735
737 sb->s_flags |= MS_ACTIVE; 736 sb->s_flags |= MS_ACTIVE;
738 } 737 }
739 738
740 simple_set_mnt(mnt, sb); 739 return dget(sb->s_root);
741 return 0;
742} 740}
743 741
744EXPORT_SYMBOL(get_sb_ns); 742EXPORT_SYMBOL(mount_ns);
745 743
746#ifdef CONFIG_BLOCK 744#ifdef CONFIG_BLOCK
747static int set_bdev_super(struct super_block *s, void *data) 745static int set_bdev_super(struct super_block *s, void *data)
@@ -762,10 +760,9 @@ static int test_bdev_super(struct super_block *s, void *data)
762 return (void *)s->s_bdev == data; 760 return (void *)s->s_bdev == data;
763} 761}
764 762
765int get_sb_bdev(struct file_system_type *fs_type, 763struct dentry *mount_bdev(struct file_system_type *fs_type,
766 int flags, const char *dev_name, void *data, 764 int flags, const char *dev_name, void *data,
767 int (*fill_super)(struct super_block *, void *, int), 765 int (*fill_super)(struct super_block *, void *, int))
768 struct vfsmount *mnt)
769{ 766{
770 struct block_device *bdev; 767 struct block_device *bdev;
771 struct super_block *s; 768 struct super_block *s;
@@ -777,7 +774,7 @@ int get_sb_bdev(struct file_system_type *fs_type,
777 774
778 bdev = open_bdev_exclusive(dev_name, mode, fs_type); 775 bdev = open_bdev_exclusive(dev_name, mode, fs_type);
779 if (IS_ERR(bdev)) 776 if (IS_ERR(bdev))
780 return PTR_ERR(bdev); 777 return ERR_CAST(bdev);
781 778
782 /* 779 /*
783 * once the super is inserted into the list by sget, s_umount 780 * once the super is inserted into the list by sget, s_umount
@@ -829,15 +826,30 @@ int get_sb_bdev(struct file_system_type *fs_type,
829 bdev->bd_super = s; 826 bdev->bd_super = s;
830 } 827 }
831 828
832 simple_set_mnt(mnt, s); 829 return dget(s->s_root);
833 return 0;
834 830
835error_s: 831error_s:
836 error = PTR_ERR(s); 832 error = PTR_ERR(s);
837error_bdev: 833error_bdev:
838 close_bdev_exclusive(bdev, mode); 834 close_bdev_exclusive(bdev, mode);
839error: 835error:
840 return error; 836 return ERR_PTR(error);
837}
838EXPORT_SYMBOL(mount_bdev);
839
840int get_sb_bdev(struct file_system_type *fs_type,
841 int flags, const char *dev_name, void *data,
842 int (*fill_super)(struct super_block *, void *, int),
843 struct vfsmount *mnt)
844{
845 struct dentry *root;
846
847 root = mount_bdev(fs_type, flags, dev_name, data, fill_super);
848 if (IS_ERR(root))
849 return PTR_ERR(root);
850 mnt->mnt_root = root;
851 mnt->mnt_sb = root->d_sb;
852 return 0;
841} 853}
842 854
843EXPORT_SYMBOL(get_sb_bdev); 855EXPORT_SYMBOL(get_sb_bdev);
@@ -856,29 +868,42 @@ void kill_block_super(struct super_block *sb)
856EXPORT_SYMBOL(kill_block_super); 868EXPORT_SYMBOL(kill_block_super);
857#endif 869#endif
858 870
859int get_sb_nodev(struct file_system_type *fs_type, 871struct dentry *mount_nodev(struct file_system_type *fs_type,
860 int flags, void *data, 872 int flags, void *data,
861 int (*fill_super)(struct super_block *, void *, int), 873 int (*fill_super)(struct super_block *, void *, int))
862 struct vfsmount *mnt)
863{ 874{
864 int error; 875 int error;
865 struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); 876 struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
866 877
867 if (IS_ERR(s)) 878 if (IS_ERR(s))
868 return PTR_ERR(s); 879 return ERR_CAST(s);
869 880
870 s->s_flags = flags; 881 s->s_flags = flags;
871 882
872 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); 883 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
873 if (error) { 884 if (error) {
874 deactivate_locked_super(s); 885 deactivate_locked_super(s);
875 return error; 886 return ERR_PTR(error);
876 } 887 }
877 s->s_flags |= MS_ACTIVE; 888 s->s_flags |= MS_ACTIVE;
878 simple_set_mnt(mnt, s); 889 return dget(s->s_root);
879 return 0;
880} 890}
891EXPORT_SYMBOL(mount_nodev);
881 892
893int get_sb_nodev(struct file_system_type *fs_type,
894 int flags, void *data,
895 int (*fill_super)(struct super_block *, void *, int),
896 struct vfsmount *mnt)
897{
898 struct dentry *root;
899
900 root = mount_nodev(fs_type, flags, data, fill_super);
901 if (IS_ERR(root))
902 return PTR_ERR(root);
903 mnt->mnt_root = root;
904 mnt->mnt_sb = root->d_sb;
905 return 0;
906}
882EXPORT_SYMBOL(get_sb_nodev); 907EXPORT_SYMBOL(get_sb_nodev);
883 908
884static int compare_single(struct super_block *s, void *p) 909static int compare_single(struct super_block *s, void *p)
@@ -886,29 +911,42 @@ static int compare_single(struct super_block *s, void *p)
886 return 1; 911 return 1;
887} 912}
888 913
889int get_sb_single(struct file_system_type *fs_type, 914struct dentry *mount_single(struct file_system_type *fs_type,
890 int flags, void *data, 915 int flags, void *data,
891 int (*fill_super)(struct super_block *, void *, int), 916 int (*fill_super)(struct super_block *, void *, int))
892 struct vfsmount *mnt)
893{ 917{
894 struct super_block *s; 918 struct super_block *s;
895 int error; 919 int error;
896 920
897 s = sget(fs_type, compare_single, set_anon_super, NULL); 921 s = sget(fs_type, compare_single, set_anon_super, NULL);
898 if (IS_ERR(s)) 922 if (IS_ERR(s))
899 return PTR_ERR(s); 923 return ERR_CAST(s);
900 if (!s->s_root) { 924 if (!s->s_root) {
901 s->s_flags = flags; 925 s->s_flags = flags;
902 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); 926 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
903 if (error) { 927 if (error) {
904 deactivate_locked_super(s); 928 deactivate_locked_super(s);
905 return error; 929 return ERR_PTR(error);
906 } 930 }
907 s->s_flags |= MS_ACTIVE; 931 s->s_flags |= MS_ACTIVE;
908 } else { 932 } else {
909 do_remount_sb(s, flags, data, 0); 933 do_remount_sb(s, flags, data, 0);
910 } 934 }
911 simple_set_mnt(mnt, s); 935 return dget(s->s_root);
936}
937EXPORT_SYMBOL(mount_single);
938
939int get_sb_single(struct file_system_type *fs_type,
940 int flags, void *data,
941 int (*fill_super)(struct super_block *, void *, int),
942 struct vfsmount *mnt)
943{
944 struct dentry *root;
945 root = mount_single(fs_type, flags, data, fill_super);
946 if (IS_ERR(root))
947 return PTR_ERR(root);
948 mnt->mnt_root = root;
949 mnt->mnt_sb = root->d_sb;
912 return 0; 950 return 0;
913} 951}
914 952
@@ -918,6 +956,7 @@ struct vfsmount *
918vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) 956vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
919{ 957{
920 struct vfsmount *mnt; 958 struct vfsmount *mnt;
959 struct dentry *root;
921 char *secdata = NULL; 960 char *secdata = NULL;
922 int error; 961 int error;
923 962
@@ -942,9 +981,19 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
942 goto out_free_secdata; 981 goto out_free_secdata;
943 } 982 }
944 983
945 error = type->get_sb(type, flags, name, data, mnt); 984 if (type->mount) {
946 if (error < 0) 985 root = type->mount(type, flags, name, data);
947 goto out_free_secdata; 986 if (IS_ERR(root)) {
987 error = PTR_ERR(root);
988 goto out_free_secdata;
989 }
990 mnt->mnt_root = root;
991 mnt->mnt_sb = root->d_sb;
992 } else {
993 error = type->get_sb(type, flags, name, data, mnt);
994 if (error < 0)
995 goto out_free_secdata;
996 }
948 BUG_ON(!mnt->mnt_sb); 997 BUG_ON(!mnt->mnt_sb);
949 WARN_ON(!mnt->mnt_sb->s_bdi); 998 WARN_ON(!mnt->mnt_sb->s_bdi);
950 mnt->mnt_sb->s_flags |= MS_BORN; 999 mnt->mnt_sb->s_flags |= MS_BORN;
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index f2af22574c50..266895783b47 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -23,7 +23,7 @@
23#include "sysfs.h" 23#include "sysfs.h"
24 24
25 25
26static struct vfsmount *sysfs_mount; 26static struct vfsmount *sysfs_mnt;
27struct kmem_cache *sysfs_dir_cachep; 27struct kmem_cache *sysfs_dir_cachep;
28 28
29static const struct super_operations sysfs_ops = { 29static const struct super_operations sysfs_ops = {
@@ -95,18 +95,17 @@ static int sysfs_set_super(struct super_block *sb, void *data)
95 return error; 95 return error;
96} 96}
97 97
98static int sysfs_get_sb(struct file_system_type *fs_type, 98static struct dentry *sysfs_mount(struct file_system_type *fs_type,
99 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 99 int flags, const char *dev_name, void *data)
100{ 100{
101 struct sysfs_super_info *info; 101 struct sysfs_super_info *info;
102 enum kobj_ns_type type; 102 enum kobj_ns_type type;
103 struct super_block *sb; 103 struct super_block *sb;
104 int error; 104 int error;
105 105
106 error = -ENOMEM;
107 info = kzalloc(sizeof(*info), GFP_KERNEL); 106 info = kzalloc(sizeof(*info), GFP_KERNEL);
108 if (!info) 107 if (!info)
109 goto out; 108 return ERR_PTR(-ENOMEM);
110 109
111 for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) 110 for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
112 info->ns[type] = kobj_ns_current(type); 111 info->ns[type] = kobj_ns_current(type);
@@ -114,24 +113,19 @@ static int sysfs_get_sb(struct file_system_type *fs_type,
114 sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info); 113 sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info);
115 if (IS_ERR(sb) || sb->s_fs_info != info) 114 if (IS_ERR(sb) || sb->s_fs_info != info)
116 kfree(info); 115 kfree(info);
117 if (IS_ERR(sb)) { 116 if (IS_ERR(sb))
118 error = PTR_ERR(sb); 117 return ERR_CAST(sb);
119 goto out;
120 }
121 if (!sb->s_root) { 118 if (!sb->s_root) {
122 sb->s_flags = flags; 119 sb->s_flags = flags;
123 error = sysfs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); 120 error = sysfs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
124 if (error) { 121 if (error) {
125 deactivate_locked_super(sb); 122 deactivate_locked_super(sb);
126 goto out; 123 return ERR_PTR(error);
127 } 124 }
128 sb->s_flags |= MS_ACTIVE; 125 sb->s_flags |= MS_ACTIVE;
129 } 126 }
130 127
131 simple_set_mnt(mnt, sb); 128 return dget(sb->s_root);
132 error = 0;
133out:
134 return error;
135} 129}
136 130
137static void sysfs_kill_sb(struct super_block *sb) 131static void sysfs_kill_sb(struct super_block *sb)
@@ -147,7 +141,7 @@ static void sysfs_kill_sb(struct super_block *sb)
147 141
148static struct file_system_type sysfs_fs_type = { 142static struct file_system_type sysfs_fs_type = {
149 .name = "sysfs", 143 .name = "sysfs",
150 .get_sb = sysfs_get_sb, 144 .mount = sysfs_mount,
151 .kill_sb = sysfs_kill_sb, 145 .kill_sb = sysfs_kill_sb,
152}; 146};
153 147
@@ -189,11 +183,11 @@ int __init sysfs_init(void)
189 183
190 err = register_filesystem(&sysfs_fs_type); 184 err = register_filesystem(&sysfs_fs_type);
191 if (!err) { 185 if (!err) {
192 sysfs_mount = kern_mount(&sysfs_fs_type); 186 sysfs_mnt = kern_mount(&sysfs_fs_type);
193 if (IS_ERR(sysfs_mount)) { 187 if (IS_ERR(sysfs_mnt)) {
194 printk(KERN_ERR "sysfs: could not mount!\n"); 188 printk(KERN_ERR "sysfs: could not mount!\n");
195 err = PTR_ERR(sysfs_mount); 189 err = PTR_ERR(sysfs_mnt);
196 sysfs_mount = NULL; 190 sysfs_mnt = NULL;
197 unregister_filesystem(&sysfs_fs_type); 191 unregister_filesystem(&sysfs_fs_type);
198 goto out_err; 192 goto out_err;
199 } 193 }
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index a0b0cda6927e..3d9c62be0c10 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -526,23 +526,22 @@ failed:
526 526
527/* Every kernel module contains stuff like this. */ 527/* Every kernel module contains stuff like this. */
528 528
529static int sysv_get_sb(struct file_system_type *fs_type, 529static struct dentry *sysv_mount(struct file_system_type *fs_type,
530 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 530 int flags, const char *dev_name, void *data)
531{ 531{
532 return get_sb_bdev(fs_type, flags, dev_name, data, sysv_fill_super, 532 return mount_bdev(fs_type, flags, dev_name, data, sysv_fill_super);
533 mnt);
534} 533}
535 534
536static int v7_get_sb(struct file_system_type *fs_type, 535static struct dentry *v7_mount(struct file_system_type *fs_type,
537 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 536 int flags, const char *dev_name, void *data)
538{ 537{
539 return get_sb_bdev(fs_type, flags, dev_name, data, v7_fill_super, mnt); 538 return mount_bdev(fs_type, flags, dev_name, data, v7_fill_super);
540} 539}
541 540
542static struct file_system_type sysv_fs_type = { 541static struct file_system_type sysv_fs_type = {
543 .owner = THIS_MODULE, 542 .owner = THIS_MODULE,
544 .name = "sysv", 543 .name = "sysv",
545 .get_sb = sysv_get_sb, 544 .mount = sysv_mount,
546 .kill_sb = kill_block_super, 545 .kill_sb = kill_block_super,
547 .fs_flags = FS_REQUIRES_DEV, 546 .fs_flags = FS_REQUIRES_DEV,
548}; 547};
@@ -550,7 +549,7 @@ static struct file_system_type sysv_fs_type = {
550static struct file_system_type v7_fs_type = { 549static struct file_system_type v7_fs_type = {
551 .owner = THIS_MODULE, 550 .owner = THIS_MODULE,
552 .name = "v7", 551 .name = "v7",
553 .get_sb = v7_get_sb, 552 .mount = v7_mount,
554 .kill_sb = kill_block_super, 553 .kill_sb = kill_block_super,
555 .fs_flags = FS_REQUIRES_DEV, 554 .fs_flags = FS_REQUIRES_DEV,
556}; 555};
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 9a47c9f0ad07..91fac54c70e3 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2038,8 +2038,8 @@ static int sb_test(struct super_block *sb, void *data)
2038 return c->vi.cdev == *dev; 2038 return c->vi.cdev == *dev;
2039} 2039}
2040 2040
2041static int ubifs_get_sb(struct file_system_type *fs_type, int flags, 2041static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
2042 const char *name, void *data, struct vfsmount *mnt) 2042 const char *name, void *data)
2043{ 2043{
2044 struct ubi_volume_desc *ubi; 2044 struct ubi_volume_desc *ubi;
2045 struct ubi_volume_info vi; 2045 struct ubi_volume_info vi;
@@ -2057,7 +2057,7 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
2057 if (IS_ERR(ubi)) { 2057 if (IS_ERR(ubi)) {
2058 dbg_err("cannot open \"%s\", error %d", 2058 dbg_err("cannot open \"%s\", error %d",
2059 name, (int)PTR_ERR(ubi)); 2059 name, (int)PTR_ERR(ubi));
2060 return PTR_ERR(ubi); 2060 return ERR_CAST(ubi);
2061 } 2061 }
2062 ubi_get_volume_info(ubi, &vi); 2062 ubi_get_volume_info(ubi, &vi);
2063 2063
@@ -2095,20 +2095,19 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
2095 /* 'fill_super()' opens ubi again so we must close it here */ 2095 /* 'fill_super()' opens ubi again so we must close it here */
2096 ubi_close_volume(ubi); 2096 ubi_close_volume(ubi);
2097 2097
2098 simple_set_mnt(mnt, sb); 2098 return dget(sb->s_root);
2099 return 0;
2100 2099
2101out_deact: 2100out_deact:
2102 deactivate_locked_super(sb); 2101 deactivate_locked_super(sb);
2103out_close: 2102out_close:
2104 ubi_close_volume(ubi); 2103 ubi_close_volume(ubi);
2105 return err; 2104 return ERR_PTR(err);
2106} 2105}
2107 2106
2108static struct file_system_type ubifs_fs_type = { 2107static struct file_system_type ubifs_fs_type = {
2109 .name = "ubifs", 2108 .name = "ubifs",
2110 .owner = THIS_MODULE, 2109 .owner = THIS_MODULE,
2111 .get_sb = ubifs_get_sb, 2110 .mount = ubifs_mount,
2112 .kill_sb = kill_anon_super, 2111 .kill_sb = kill_anon_super,
2113}; 2112};
2114 2113
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 76f3d6d97b40..4a5c7c61836a 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -107,17 +107,16 @@ struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi)
107} 107}
108 108
109/* UDF filesystem type */ 109/* UDF filesystem type */
110static int udf_get_sb(struct file_system_type *fs_type, 110static struct dentry *udf_mount(struct file_system_type *fs_type,
111 int flags, const char *dev_name, void *data, 111 int flags, const char *dev_name, void *data)
112 struct vfsmount *mnt)
113{ 112{
114 return get_sb_bdev(fs_type, flags, dev_name, data, udf_fill_super, mnt); 113 return mount_bdev(fs_type, flags, dev_name, data, udf_fill_super);
115} 114}
116 115
117static struct file_system_type udf_fstype = { 116static struct file_system_type udf_fstype = {
118 .owner = THIS_MODULE, 117 .owner = THIS_MODULE,
119 .name = "udf", 118 .name = "udf",
120 .get_sb = udf_get_sb, 119 .mount = udf_mount,
121 .kill_sb = kill_block_super, 120 .kill_sb = kill_block_super,
122 .fs_flags = FS_REQUIRES_DEV, 121 .fs_flags = FS_REQUIRES_DEV,
123}; 122};
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 6b9be90dae7d..2c47daed56da 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1454,16 +1454,16 @@ static const struct super_operations ufs_super_ops = {
1454 .show_options = ufs_show_options, 1454 .show_options = ufs_show_options,
1455}; 1455};
1456 1456
1457static int ufs_get_sb(struct file_system_type *fs_type, 1457static struct dentry *ufs_mount(struct file_system_type *fs_type,
1458 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 1458 int flags, const char *dev_name, void *data)
1459{ 1459{
1460 return get_sb_bdev(fs_type, flags, dev_name, data, ufs_fill_super, mnt); 1460 return mount_bdev(fs_type, flags, dev_name, data, ufs_fill_super);
1461} 1461}
1462 1462
1463static struct file_system_type ufs_fs_type = { 1463static struct file_system_type ufs_fs_type = {
1464 .owner = THIS_MODULE, 1464 .owner = THIS_MODULE,
1465 .name = "ufs", 1465 .name = "ufs",
1466 .get_sb = ufs_get_sb, 1466 .mount = ufs_mount,
1467 .kill_sb = kill_block_super, 1467 .kill_sb = kill_block_super,
1468 .fs_flags = FS_REQUIRES_DEV, 1468 .fs_flags = FS_REQUIRES_DEV,
1469}; 1469};
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 480f28127f09..6100ec0fa1d4 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -22,6 +22,7 @@ config XFS_FS
22config XFS_QUOTA 22config XFS_QUOTA
23 bool "XFS Quota support" 23 bool "XFS Quota support"
24 depends on XFS_FS 24 depends on XFS_FS
25 select QUOTACTL
25 help 26 help
26 If you say Y here, you will be able to set limits for disk usage on 27 If you say Y here, you will be able to set limits for disk usage on
27 a per user and/or a per group basis under XFS. XFS considers quota 28 a per user and/or a per group basis under XFS. XFS considers quota
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index cf808782c065..9f3a78fe6ae4 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1609,16 +1609,14 @@ xfs_fs_fill_super(
1609 goto out_free_sb; 1609 goto out_free_sb;
1610} 1610}
1611 1611
1612STATIC int 1612STATIC struct dentry *
1613xfs_fs_get_sb( 1613xfs_fs_mount(
1614 struct file_system_type *fs_type, 1614 struct file_system_type *fs_type,
1615 int flags, 1615 int flags,
1616 const char *dev_name, 1616 const char *dev_name,
1617 void *data, 1617 void *data)
1618 struct vfsmount *mnt)
1619{ 1618{
1620 return get_sb_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super, 1619 return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
1621 mnt);
1622} 1620}
1623 1621
1624static const struct super_operations xfs_super_operations = { 1622static const struct super_operations xfs_super_operations = {
@@ -1639,7 +1637,7 @@ static const struct super_operations xfs_super_operations = {
1639static struct file_system_type xfs_fs_type = { 1637static struct file_system_type xfs_fs_type = {
1640 .owner = THIS_MODULE, 1638 .owner = THIS_MODULE,
1641 .name = "xfs", 1639 .name = "xfs",
1642 .get_sb = xfs_fs_get_sb, 1640 .mount = xfs_fs_mount,
1643 .kill_sb = kill_block_super, 1641 .kill_sb = kill_block_super,
1644 .fs_flags = FS_REQUIRES_DEV, 1642 .fs_flags = FS_REQUIRES_DEV,
1645}; 1643};