aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/Kconfig5
-rw-r--r--fs/9p/Makefile1
-rw-r--r--fs/9p/acl.c4
-rw-r--r--fs/9p/v9fs.h42
-rw-r--r--fs/9p/vfs_inode.c871
-rw-r--r--fs/9p/vfs_inode_dotl.c824
-rw-r--r--fs/9p/xattr.c2
-rw-r--r--fs/cifs/cache.c16
-rw-r--r--fs/cifs/cifs_debug.c22
-rw-r--r--fs/cifs/cifs_spnego.c10
-rw-r--r--fs/cifs/cifsencrypt.c6
-rw-r--r--fs/cifs/cifsfs.c17
-rw-r--r--fs/cifs/cifsglob.h9
-rw-r--r--fs/cifs/cifssmb.c5
-rw-r--r--fs/cifs/connect.c462
-rw-r--r--fs/cifs/dir.c6
-rw-r--r--fs/cifs/file.c233
-rw-r--r--fs/cifs/inode.c6
-rw-r--r--fs/cifs/readdir.c1
-rw-r--r--fs/cifs/sess.c135
-rw-r--r--fs/cifs/transport.c2
-rw-r--r--fs/dlm/lowcomms.c63
-rw-r--r--fs/ext2/dir.c19
-rw-r--r--fs/ext2/namei.c2
-rw-r--r--fs/ext2/super.c25
-rw-r--r--fs/ext2/xattr.c10
-rw-r--r--fs/ext3/balloc.c266
-rw-r--r--fs/ext3/dir.c15
-rw-r--r--fs/ext3/inode.c6
-rw-r--r--fs/ext3/ioctl.c22
-rw-r--r--fs/ext3/namei.c138
-rw-r--r--fs/ext3/resize.c65
-rw-r--r--fs/ext3/super.c64
-rw-r--r--fs/ext3/xattr.c2
-rw-r--r--fs/ext4/balloc.c3
-rw-r--r--fs/ext4/dir.c56
-rw-r--r--fs/ext4/ext4.h93
-rw-r--r--fs/ext4/ext4_extents.h8
-rw-r--r--fs/ext4/ext4_jbd2.h2
-rw-r--r--fs/ext4/extents.c88
-rw-r--r--fs/ext4/file.c22
-rw-r--r--fs/ext4/fsync.c4
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/inode.c74
-rw-r--r--fs/ext4/mballoc.c55
-rw-r--r--fs/ext4/migrate.c2
-rw-r--r--fs/ext4/namei.c69
-rw-r--r--fs/ext4/page-io.c7
-rw-r--r--fs/ext4/resize.c64
-rw-r--r--fs/ext4/super.c288
-rw-r--r--fs/ext4/xattr.c28
-rw-r--r--fs/fuse/dev.c156
-rw-r--r--fs/fuse/dir.c53
-rw-r--r--fs/fuse/file.c66
-rw-r--r--fs/fuse/fuse_i.h27
-rw-r--r--fs/fuse/inode.c30
-rw-r--r--fs/gfs2/incore.h1
-rw-r--r--fs/jbd2/journal.c34
-rw-r--r--fs/jbd2/recovery.c2
-rw-r--r--fs/jbd2/transaction.c6
-rw-r--r--fs/lockd/Makefile6
-rw-r--r--fs/lockd/clnt4xdr.c605
-rw-r--r--fs/lockd/clntlock.c4
-rw-r--r--fs/lockd/clntproc.c18
-rw-r--r--fs/lockd/clntxdr.c627
-rw-r--r--fs/lockd/host.c409
-rw-r--r--fs/lockd/mon.c110
-rw-r--r--fs/lockd/svc4proc.c20
-rw-r--r--fs/lockd/svclock.c34
-rw-r--r--fs/lockd/svcproc.c28
-rw-r--r--fs/lockd/xdr.c287
-rw-r--r--fs/lockd/xdr4.c255
-rw-r--r--fs/mbcache.c12
-rw-r--r--fs/namei.c8
-rw-r--r--fs/nfs/callback.c83
-rw-r--r--fs/nfs/callback.h59
-rw-r--r--fs/nfs/callback_proc.c326
-rw-r--r--fs/nfs/callback_xdr.c143
-rw-r--r--fs/nfs/client.c302
-rw-r--r--fs/nfs/delegation.c362
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/dir.c72
-rw-r--r--fs/nfs/idmap.c2
-rw-r--r--fs/nfs/inode.c3
-rw-r--r--fs/nfs/internal.h19
-rw-r--r--fs/nfs/mount_clnt.c83
-rw-r--r--fs/nfs/nfs2xdr.c1294
-rw-r--r--fs/nfs/nfs3xdr.c2817
-rw-r--r--fs/nfs/nfs4_fs.h13
-rw-r--r--fs/nfs/nfs4filelayout.c6
-rw-r--r--fs/nfs/nfs4proc.c188
-rw-r--r--fs/nfs/nfs4renewd.c11
-rw-r--r--fs/nfs/nfs4state.c293
-rw-r--r--fs/nfs/nfs4xdr.c1426
-rw-r--r--fs/nfs/pagelist.c7
-rw-r--r--fs/nfs/pnfs.c524
-rw-r--r--fs/nfs/pnfs.h76
-rw-r--r--fs/nfs/proc.c5
-rw-r--r--fs/nfs/super.c18
-rw-r--r--fs/nfs/unlink.c2
-rw-r--r--fs/nfsd/nfs4callback.c690
-rw-r--r--fs/nilfs2/bmap.c47
-rw-r--r--fs/nilfs2/btnode.c3
-rw-r--r--fs/nilfs2/dir.c3
-rw-r--r--fs/nilfs2/file.c1
-rw-r--r--fs/nilfs2/ifile.c11
-rw-r--r--fs/nilfs2/inode.c180
-rw-r--r--fs/nilfs2/ioctl.c12
-rw-r--r--fs/nilfs2/mdt.c32
-rw-r--r--fs/nilfs2/namei.c1
-rw-r--r--fs/nilfs2/nilfs.h13
-rw-r--r--fs/nilfs2/page.c86
-rw-r--r--fs/nilfs2/page.h3
-rw-r--r--fs/nilfs2/recovery.c2
-rw-r--r--fs/nilfs2/sb.h8
-rw-r--r--fs/nilfs2/segment.c43
-rw-r--r--fs/nilfs2/super.c30
-rw-r--r--fs/nilfs2/the_nilfs.c6
-rw-r--r--fs/nilfs2/the_nilfs.h3
-rw-r--r--fs/ocfs2/Kconfig2
-rw-r--r--fs/ocfs2/alloc.c77
-rw-r--r--fs/ocfs2/alloc.h4
-rw-r--r--fs/ocfs2/aops.c59
-rw-r--r--fs/ocfs2/cluster/heartbeat.c246
-rw-r--r--fs/ocfs2/cluster/netdebug.c286
-rw-r--r--fs/ocfs2/cluster/tcp.c145
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h33
-rw-r--r--fs/ocfs2/dlm/dlmast.c76
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h86
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c200
-rw-r--r--fs/ocfs2/dlm/dlmdebug.h5
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c10
-rw-r--r--fs/ocfs2/dlm/dlmlock.c3
-rw-r--r--fs/ocfs2/dlm/dlmthread.c132
-rw-r--r--fs/ocfs2/namei.c5
-rw-r--r--fs/ocfs2/ocfs2.h5
-rw-r--r--fs/quota/dquot.c18
-rw-r--r--fs/quota/quota_tree.c9
-rw-r--r--fs/sysfs/group.c10
-rw-r--r--fs/sysfs/inode.c1
-rw-r--r--fs/sysfs/sysfs.h1
-rw-r--r--fs/udf/Kconfig1
-rw-r--r--fs/udf/balloc.c3
-rw-r--r--fs/udf/dir.c5
-rw-r--r--fs/udf/file.c11
-rw-r--r--fs/udf/ialloc.c21
-rw-r--r--fs/udf/inode.c51
-rw-r--r--fs/udf/namei.c107
-rw-r--r--fs/udf/partition.c27
-rw-r--r--fs/udf/super.c67
-rw-r--r--fs/udf/symlink.c12
-rw-r--r--fs/udf/udf_i.h13
-rw-r--r--fs/udf/udf_sb.h22
-rw-r--r--fs/udf/udfdecl.h4
-rw-r--r--fs/xfs/linux-2.6/sv.h59
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c425
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.h16
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c235
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h22
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c12
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c22
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c92
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h59
-rw-r--r--fs/xfs/quota/xfs_dquot.c1
-rw-r--r--fs/xfs/xfs_ag.h2
-rw-r--r--fs/xfs/xfs_alloc.c351
-rw-r--r--fs/xfs/xfs_attr_leaf.c4
-rw-r--r--fs/xfs/xfs_btree.c9
-rw-r--r--fs/xfs/xfs_buf_item.c32
-rw-r--r--fs/xfs/xfs_buf_item.h11
-rw-r--r--fs/xfs/xfs_extfree_item.c97
-rw-r--r--fs/xfs/xfs_extfree_item.h11
-rw-r--r--fs/xfs/xfs_fsops.c1
-rw-r--r--fs/xfs/xfs_iget.c79
-rw-r--r--fs/xfs/xfs_inode.c54
-rw-r--r--fs/xfs/xfs_inode.h15
-rw-r--r--fs/xfs/xfs_inode_item.c90
-rw-r--r--fs/xfs/xfs_iomap.c233
-rw-r--r--fs/xfs/xfs_iomap.h27
-rw-r--r--fs/xfs/xfs_log.c739
-rw-r--r--fs/xfs/xfs_log_cil.c17
-rw-r--r--fs/xfs/xfs_log_priv.h127
-rw-r--r--fs/xfs/xfs_log_recover.c620
-rw-r--r--fs/xfs/xfs_mount.c23
-rw-r--r--fs/xfs/xfs_mount.h14
-rw-r--r--fs/xfs/xfs_trans.c79
-rw-r--r--fs/xfs/xfs_trans.h2
-rw-r--r--fs/xfs/xfs_trans_ail.c232
-rw-r--r--fs/xfs/xfs_trans_extfree.c8
-rw-r--r--fs/xfs/xfs_trans_priv.h35
-rw-r--r--fs/xfs/xfs_vnodeops.c61
192 files changed, 13426 insertions, 8596 deletions
diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig
index 7e0511476797..814ac4e213a8 100644
--- a/fs/9p/Kconfig
+++ b/fs/9p/Kconfig
@@ -9,6 +9,8 @@ config 9P_FS
9 9
10 If unsure, say N. 10 If unsure, say N.
11 11
12if 9P_FS
13
12config 9P_FSCACHE 14config 9P_FSCACHE
13 bool "Enable 9P client caching support (EXPERIMENTAL)" 15 bool "Enable 9P client caching support (EXPERIMENTAL)"
14 depends on EXPERIMENTAL 16 depends on EXPERIMENTAL
@@ -20,7 +22,6 @@ config 9P_FSCACHE
20 22
21config 9P_FS_POSIX_ACL 23config 9P_FS_POSIX_ACL
22 bool "9P POSIX Access Control Lists" 24 bool "9P POSIX Access Control Lists"
23 depends on 9P_FS
24 select FS_POSIX_ACL 25 select FS_POSIX_ACL
25 help 26 help
26 POSIX Access Control Lists (ACLs) support permissions for users and 27 POSIX Access Control Lists (ACLs) support permissions for users and
@@ -30,3 +31,5 @@ config 9P_FS_POSIX_ACL
30 Linux website <http://acl.bestbits.at/>. 31 Linux website <http://acl.bestbits.at/>.
31 32
32 If you don't know what Access Control Lists are, say N 33 If you don't know what Access Control Lists are, say N
34
35endif
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index f8ba37effd1b..ab8c12780634 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -3,6 +3,7 @@ obj-$(CONFIG_9P_FS) := 9p.o
39p-objs := \ 39p-objs := \
4 vfs_super.o \ 4 vfs_super.o \
5 vfs_inode.o \ 5 vfs_inode.o \
6 vfs_inode_dotl.o \
6 vfs_addr.o \ 7 vfs_addr.o \
7 vfs_file.o \ 8 vfs_file.o \
8 vfs_dir.o \ 9 vfs_dir.o \
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 6e58c4ca1e6e..02a2cf616318 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -28,7 +28,7 @@ static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, char *name)
28{ 28{
29 ssize_t size; 29 ssize_t size;
30 void *value = NULL; 30 void *value = NULL;
31 struct posix_acl *acl = NULL;; 31 struct posix_acl *acl = NULL;
32 32
33 size = v9fs_fid_xattr_get(fid, name, NULL, 0); 33 size = v9fs_fid_xattr_get(fid, name, NULL, 0);
34 if (size > 0) { 34 if (size > 0) {
@@ -365,7 +365,7 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name,
365 case ACL_TYPE_DEFAULT: 365 case ACL_TYPE_DEFAULT:
366 name = POSIX_ACL_XATTR_DEFAULT; 366 name = POSIX_ACL_XATTR_DEFAULT;
367 if (!S_ISDIR(inode->i_mode)) { 367 if (!S_ISDIR(inode->i_mode)) {
368 retval = -EINVAL; 368 retval = acl ? -EINVAL : 0;
369 goto err_out; 369 goto err_out;
370 } 370 }
371 break; 371 break;
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index cb6396855e2d..c4b5d8864f0d 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -113,9 +113,27 @@ struct v9fs_session_info {
113 113
114struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *, 114struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
115 char *); 115 char *);
116void v9fs_session_close(struct v9fs_session_info *v9ses); 116extern void v9fs_session_close(struct v9fs_session_info *v9ses);
117void v9fs_session_cancel(struct v9fs_session_info *v9ses); 117extern void v9fs_session_cancel(struct v9fs_session_info *v9ses);
118void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses); 118extern void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses);
119extern struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
120 struct nameidata *nameidata);
121extern int v9fs_vfs_unlink(struct inode *i, struct dentry *d);
122extern int v9fs_vfs_rmdir(struct inode *i, struct dentry *d);
123extern int v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
124 struct inode *new_dir, struct dentry *new_dentry);
125extern void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd,
126 void *p);
127extern struct inode *v9fs_inode(struct v9fs_session_info *v9ses,
128 struct p9_fid *fid,
129 struct super_block *sb);
130
131extern const struct inode_operations v9fs_dir_inode_operations_dotl;
132extern const struct inode_operations v9fs_file_inode_operations_dotl;
133extern const struct inode_operations v9fs_symlink_inode_operations_dotl;
134extern struct inode *v9fs_inode_dotl(struct v9fs_session_info *v9ses,
135 struct p9_fid *fid,
136 struct super_block *sb);
119 137
120/* other default globals */ 138/* other default globals */
121#define V9FS_PORT 564 139#define V9FS_PORT 564
@@ -138,3 +156,21 @@ static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses)
138{ 156{
139 return v9ses->flags & V9FS_PROTO_2000L; 157 return v9ses->flags & V9FS_PROTO_2000L;
140} 158}
159
160/**
161 * v9fs_inode_from_fid - Helper routine to populate an inode by
162 * issuing a attribute request
163 * @v9ses: session information
164 * @fid: fid to issue attribute request for
165 * @sb: superblock on which to create inode
166 *
167 */
168static inline struct inode *
169v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
170 struct super_block *sb)
171{
172 if (v9fs_proto_dotl(v9ses))
173 return v9fs_inode_dotl(v9ses, fid, sb);
174 else
175 return v9fs_inode(v9ses, fid, sb);
176}
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 59782981b225..5076eeb95502 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -49,15 +49,8 @@
49 49
50static const struct inode_operations v9fs_dir_inode_operations; 50static const struct inode_operations v9fs_dir_inode_operations;
51static const struct inode_operations v9fs_dir_inode_operations_dotu; 51static const struct inode_operations v9fs_dir_inode_operations_dotu;
52static const struct inode_operations v9fs_dir_inode_operations_dotl;
53static const struct inode_operations v9fs_file_inode_operations; 52static const struct inode_operations v9fs_file_inode_operations;
54static const struct inode_operations v9fs_file_inode_operations_dotl;
55static const struct inode_operations v9fs_symlink_inode_operations; 53static const struct inode_operations v9fs_symlink_inode_operations;
56static const struct inode_operations v9fs_symlink_inode_operations_dotl;
57
58static int
59v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
60 dev_t rdev);
61 54
62/** 55/**
63 * unixmode2p9mode - convert unix mode bits to plan 9 56 * unixmode2p9mode - convert unix mode bits to plan 9
@@ -251,41 +244,6 @@ void v9fs_destroy_inode(struct inode *inode)
251#endif 244#endif
252 245
253/** 246/**
254 * v9fs_get_fsgid_for_create - Helper function to get the gid for creating a
255 * new file system object. This checks the S_ISGID to determine the owning
256 * group of the new file system object.
257 */
258
259static gid_t v9fs_get_fsgid_for_create(struct inode *dir_inode)
260{
261 BUG_ON(dir_inode == NULL);
262
263 if (dir_inode->i_mode & S_ISGID) {
264 /* set_gid bit is set.*/
265 return dir_inode->i_gid;
266 }
267 return current_fsgid();
268}
269
270/**
271 * v9fs_dentry_from_dir_inode - helper function to get the dentry from
272 * dir inode.
273 *
274 */
275
276static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode)
277{
278 struct dentry *dentry;
279
280 spin_lock(&inode->i_lock);
281 /* Directory should have only one entry. */
282 BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry));
283 dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias);
284 spin_unlock(&inode->i_lock);
285 return dentry;
286}
287
288/**
289 * v9fs_get_inode - helper function to setup an inode 247 * v9fs_get_inode - helper function to setup an inode
290 * @sb: superblock 248 * @sb: superblock
291 * @mode: mode to setup inode with 249 * @mode: mode to setup inode with
@@ -454,7 +412,7 @@ void v9fs_evict_inode(struct inode *inode)
454#endif 412#endif
455} 413}
456 414
457static struct inode * 415struct inode *
458v9fs_inode(struct v9fs_session_info *v9ses, struct p9_fid *fid, 416v9fs_inode(struct v9fs_session_info *v9ses, struct p9_fid *fid,
459 struct super_block *sb) 417 struct super_block *sb)
460{ 418{
@@ -489,60 +447,6 @@ error:
489 return ERR_PTR(err); 447 return ERR_PTR(err);
490} 448}
491 449
492static struct inode *
493v9fs_inode_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
494 struct super_block *sb)
495{
496 struct inode *ret = NULL;
497 int err;
498 struct p9_stat_dotl *st;
499
500 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
501 if (IS_ERR(st))
502 return ERR_CAST(st);
503
504 ret = v9fs_get_inode(sb, st->st_mode);
505 if (IS_ERR(ret)) {
506 err = PTR_ERR(ret);
507 goto error;
508 }
509
510 v9fs_stat2inode_dotl(st, ret);
511 ret->i_ino = v9fs_qid2ino(&st->qid);
512#ifdef CONFIG_9P_FSCACHE
513 v9fs_vcookie_set_qid(ret, &st->qid);
514 v9fs_cache_inode_get_cookie(ret);
515#endif
516 err = v9fs_get_acl(ret, fid);
517 if (err) {
518 iput(ret);
519 goto error;
520 }
521 kfree(st);
522 return ret;
523error:
524 kfree(st);
525 return ERR_PTR(err);
526}
527
528/**
529 * v9fs_inode_from_fid - Helper routine to populate an inode by
530 * issuing a attribute request
531 * @v9ses: session information
532 * @fid: fid to issue attribute request for
533 * @sb: superblock on which to create inode
534 *
535 */
536static inline struct inode *
537v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
538 struct super_block *sb)
539{
540 if (v9fs_proto_dotl(v9ses))
541 return v9fs_inode_dotl(v9ses, fid, sb);
542 else
543 return v9fs_inode(v9ses, fid, sb);
544}
545
546/** 450/**
547 * v9fs_remove - helper function to remove files and directories 451 * v9fs_remove - helper function to remove files and directories
548 * @dir: directory inode that is being deleted 452 * @dir: directory inode that is being deleted
@@ -633,12 +537,6 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
633 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); 537 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
634 goto error; 538 goto error;
635 } 539 }
636
637 if (v9ses->cache)
638 d_set_d_op(dentry, &v9fs_cached_dentry_operations);
639 else
640 d_set_d_op(dentry, &v9fs_dentry_operations);
641
642 d_instantiate(dentry, inode); 540 d_instantiate(dentry, inode);
643 err = v9fs_fid_add(dentry, fid); 541 err = v9fs_fid_add(dentry, fid);
644 if (err < 0) 542 if (err < 0)
@@ -657,144 +555,6 @@ error:
657} 555}
658 556
659/** 557/**
660 * v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol.
661 * @dir: directory inode that is being created
662 * @dentry: dentry that is being deleted
663 * @mode: create permissions
664 * @nd: path information
665 *
666 */
667
668static int
669v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
670 struct nameidata *nd)
671{
672 int err = 0;
673 char *name = NULL;
674 gid_t gid;
675 int flags;
676 mode_t mode;
677 struct v9fs_session_info *v9ses;
678 struct p9_fid *fid = NULL;
679 struct p9_fid *dfid, *ofid;
680 struct file *filp;
681 struct p9_qid qid;
682 struct inode *inode;
683 struct posix_acl *pacl = NULL, *dacl = NULL;
684
685 v9ses = v9fs_inode2v9ses(dir);
686 if (nd && nd->flags & LOOKUP_OPEN)
687 flags = nd->intent.open.flags - 1;
688 else {
689 /*
690 * create call without LOOKUP_OPEN is due
691 * to mknod of regular files. So use mknod
692 * operation.
693 */
694 return v9fs_vfs_mknod_dotl(dir, dentry, omode, 0);
695 }
696
697 name = (char *) dentry->d_name.name;
698 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_create_dotl: name:%s flags:0x%x "
699 "mode:0x%x\n", name, flags, omode);
700
701 dfid = v9fs_fid_lookup(dentry->d_parent);
702 if (IS_ERR(dfid)) {
703 err = PTR_ERR(dfid);
704 P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
705 return err;
706 }
707
708 /* clone a fid to use for creation */
709 ofid = p9_client_walk(dfid, 0, NULL, 1);
710 if (IS_ERR(ofid)) {
711 err = PTR_ERR(ofid);
712 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
713 return err;
714 }
715
716 gid = v9fs_get_fsgid_for_create(dir);
717
718 mode = omode;
719 /* Update mode based on ACL value */
720 err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
721 if (err) {
722 P9_DPRINTK(P9_DEBUG_VFS,
723 "Failed to get acl values in creat %d\n", err);
724 goto error;
725 }
726 err = p9_client_create_dotl(ofid, name, flags, mode, gid, &qid);
727 if (err < 0) {
728 P9_DPRINTK(P9_DEBUG_VFS,
729 "p9_client_open_dotl failed in creat %d\n",
730 err);
731 goto error;
732 }
733 /* instantiate inode and assign the unopened fid to the dentry */
734 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE ||
735 (nd && nd->flags & LOOKUP_OPEN)) {
736 fid = p9_client_walk(dfid, 1, &name, 1);
737 if (IS_ERR(fid)) {
738 err = PTR_ERR(fid);
739 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
740 err);
741 fid = NULL;
742 goto error;
743 }
744
745 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
746 if (IS_ERR(inode)) {
747 err = PTR_ERR(inode);
748 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
749 err);
750 goto error;
751 }
752 d_set_d_op(dentry, &v9fs_cached_dentry_operations);
753 d_instantiate(dentry, inode);
754 err = v9fs_fid_add(dentry, fid);
755 if (err < 0)
756 goto error;
757 /* The fid would get clunked via a dput */
758 fid = NULL;
759 } else {
760 /*
761 * Not in cached mode. No need to populate
762 * inode with stat. We need to get an inode
763 * so that we can set the acl with dentry
764 */
765 inode = v9fs_get_inode(dir->i_sb, mode);
766 if (IS_ERR(inode)) {
767 err = PTR_ERR(inode);
768 goto error;
769 }
770 d_set_d_op(dentry, &v9fs_dentry_operations);
771 d_instantiate(dentry, inode);
772 }
773 /* Now set the ACL based on the default value */
774 v9fs_set_create_acl(dentry, dacl, pacl);
775
776 /* if we are opening a file, assign the open fid to the file */
777 if (nd && nd->flags & LOOKUP_OPEN) {
778 filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
779 if (IS_ERR(filp)) {
780 p9_client_clunk(ofid);
781 return PTR_ERR(filp);
782 }
783 filp->private_data = ofid;
784 } else
785 p9_client_clunk(ofid);
786
787 return 0;
788
789error:
790 if (ofid)
791 p9_client_clunk(ofid);
792 if (fid)
793 p9_client_clunk(fid);
794 return err;
795}
796
797/**
798 * v9fs_vfs_create - VFS hook to create files 558 * v9fs_vfs_create - VFS hook to create files
799 * @dir: directory inode that is being created 559 * @dir: directory inode that is being created
800 * @dentry: dentry that is being deleted 560 * @dentry: dentry that is being deleted
@@ -884,107 +644,6 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
884 return err; 644 return err;
885} 645}
886 646
887
888/**
889 * v9fs_vfs_mkdir_dotl - VFS mkdir hook to create a directory
890 * @dir: inode that is being unlinked
891 * @dentry: dentry that is being unlinked
892 * @mode: mode for new directory
893 *
894 */
895
896static int v9fs_vfs_mkdir_dotl(struct inode *dir,
897 struct dentry *dentry, int omode)
898{
899 int err;
900 struct v9fs_session_info *v9ses;
901 struct p9_fid *fid = NULL, *dfid = NULL;
902 gid_t gid;
903 char *name;
904 mode_t mode;
905 struct inode *inode;
906 struct p9_qid qid;
907 struct dentry *dir_dentry;
908 struct posix_acl *dacl = NULL, *pacl = NULL;
909
910 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
911 err = 0;
912 v9ses = v9fs_inode2v9ses(dir);
913
914 omode |= S_IFDIR;
915 if (dir->i_mode & S_ISGID)
916 omode |= S_ISGID;
917
918 dir_dentry = v9fs_dentry_from_dir_inode(dir);
919 dfid = v9fs_fid_lookup(dir_dentry);
920 if (IS_ERR(dfid)) {
921 err = PTR_ERR(dfid);
922 P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
923 dfid = NULL;
924 goto error;
925 }
926
927 gid = v9fs_get_fsgid_for_create(dir);
928 mode = omode;
929 /* Update mode based on ACL value */
930 err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
931 if (err) {
932 P9_DPRINTK(P9_DEBUG_VFS,
933 "Failed to get acl values in mkdir %d\n", err);
934 goto error;
935 }
936 name = (char *) dentry->d_name.name;
937 err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid);
938 if (err < 0)
939 goto error;
940
941 /* instantiate inode and assign the unopened fid to the dentry */
942 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
943 fid = p9_client_walk(dfid, 1, &name, 1);
944 if (IS_ERR(fid)) {
945 err = PTR_ERR(fid);
946 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
947 err);
948 fid = NULL;
949 goto error;
950 }
951
952 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
953 if (IS_ERR(inode)) {
954 err = PTR_ERR(inode);
955 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
956 err);
957 goto error;
958 }
959 d_set_d_op(dentry, &v9fs_cached_dentry_operations);
960 d_instantiate(dentry, inode);
961 err = v9fs_fid_add(dentry, fid);
962 if (err < 0)
963 goto error;
964 fid = NULL;
965 } else {
966 /*
967 * Not in cached mode. No need to populate
968 * inode with stat. We need to get an inode
969 * so that we can set the acl with dentry
970 */
971 inode = v9fs_get_inode(dir->i_sb, mode);
972 if (IS_ERR(inode)) {
973 err = PTR_ERR(inode);
974 goto error;
975 }
976 d_set_d_op(dentry, &v9fs_dentry_operations);
977 d_instantiate(dentry, inode);
978 }
979 /* Now set the ACL based on the default value */
980 v9fs_set_create_acl(dentry, dacl, pacl);
981
982error:
983 if (fid)
984 p9_client_clunk(fid);
985 return err;
986}
987
988/** 647/**
989 * v9fs_vfs_lookup - VFS lookup hook to "walk" to a new inode 648 * v9fs_vfs_lookup - VFS lookup hook to "walk" to a new inode
990 * @dir: inode that is being walked from 649 * @dir: inode that is being walked from
@@ -993,7 +652,7 @@ error:
993 * 652 *
994 */ 653 */
995 654
996static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, 655struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
997 struct nameidata *nameidata) 656 struct nameidata *nameidata)
998{ 657{
999 struct super_block *sb; 658 struct super_block *sb;
@@ -1063,7 +722,7 @@ error:
1063 * 722 *
1064 */ 723 */
1065 724
1066static int v9fs_vfs_unlink(struct inode *i, struct dentry *d) 725int v9fs_vfs_unlink(struct inode *i, struct dentry *d)
1067{ 726{
1068 return v9fs_remove(i, d, 0); 727 return v9fs_remove(i, d, 0);
1069} 728}
@@ -1075,7 +734,7 @@ static int v9fs_vfs_unlink(struct inode *i, struct dentry *d)
1075 * 734 *
1076 */ 735 */
1077 736
1078static int v9fs_vfs_rmdir(struct inode *i, struct dentry *d) 737int v9fs_vfs_rmdir(struct inode *i, struct dentry *d)
1079{ 738{
1080 return v9fs_remove(i, d, 1); 739 return v9fs_remove(i, d, 1);
1081} 740}
@@ -1089,7 +748,7 @@ static int v9fs_vfs_rmdir(struct inode *i, struct dentry *d)
1089 * 748 *
1090 */ 749 */
1091 750
1092static int 751int
1093v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, 752v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1094 struct inode *new_dir, struct dentry *new_dentry) 753 struct inode *new_dir, struct dentry *new_dentry)
1095{ 754{
@@ -1196,42 +855,6 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1196 return 0; 855 return 0;
1197} 856}
1198 857
1199static int
1200v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
1201 struct kstat *stat)
1202{
1203 int err;
1204 struct v9fs_session_info *v9ses;
1205 struct p9_fid *fid;
1206 struct p9_stat_dotl *st;
1207
1208 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
1209 err = -EPERM;
1210 v9ses = v9fs_inode2v9ses(dentry->d_inode);
1211 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
1212 return simple_getattr(mnt, dentry, stat);
1213
1214 fid = v9fs_fid_lookup(dentry);
1215 if (IS_ERR(fid))
1216 return PTR_ERR(fid);
1217
1218 /* Ask for all the fields in stat structure. Server will return
1219 * whatever it supports
1220 */
1221
1222 st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
1223 if (IS_ERR(st))
1224 return PTR_ERR(st);
1225
1226 v9fs_stat2inode_dotl(st, dentry->d_inode);
1227 generic_fillattr(dentry->d_inode, stat);
1228 /* Change block size to what the server returned */
1229 stat->blksize = st->st_blksize;
1230
1231 kfree(st);
1232 return 0;
1233}
1234
1235/** 858/**
1236 * v9fs_vfs_setattr - set file metadata 859 * v9fs_vfs_setattr - set file metadata
1237 * @dentry: file whose metadata to set 860 * @dentry: file whose metadata to set
@@ -1291,64 +914,6 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
1291} 914}
1292 915
1293/** 916/**
1294 * v9fs_vfs_setattr_dotl - set file metadata
1295 * @dentry: file whose metadata to set
1296 * @iattr: metadata assignment structure
1297 *
1298 */
1299
1300int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
1301{
1302 int retval;
1303 struct v9fs_session_info *v9ses;
1304 struct p9_fid *fid;
1305 struct p9_iattr_dotl p9attr;
1306
1307 P9_DPRINTK(P9_DEBUG_VFS, "\n");
1308
1309 retval = inode_change_ok(dentry->d_inode, iattr);
1310 if (retval)
1311 return retval;
1312
1313 p9attr.valid = iattr->ia_valid;
1314 p9attr.mode = iattr->ia_mode;
1315 p9attr.uid = iattr->ia_uid;
1316 p9attr.gid = iattr->ia_gid;
1317 p9attr.size = iattr->ia_size;
1318 p9attr.atime_sec = iattr->ia_atime.tv_sec;
1319 p9attr.atime_nsec = iattr->ia_atime.tv_nsec;
1320 p9attr.mtime_sec = iattr->ia_mtime.tv_sec;
1321 p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec;
1322
1323 retval = -EPERM;
1324 v9ses = v9fs_inode2v9ses(dentry->d_inode);
1325 fid = v9fs_fid_lookup(dentry);
1326 if (IS_ERR(fid))
1327 return PTR_ERR(fid);
1328
1329 retval = p9_client_setattr(fid, &p9attr);
1330 if (retval < 0)
1331 return retval;
1332
1333 if ((iattr->ia_valid & ATTR_SIZE) &&
1334 iattr->ia_size != i_size_read(dentry->d_inode)) {
1335 retval = vmtruncate(dentry->d_inode, iattr->ia_size);
1336 if (retval)
1337 return retval;
1338 }
1339
1340 setattr_copy(dentry->d_inode, iattr);
1341 mark_inode_dirty(dentry->d_inode);
1342 if (iattr->ia_valid & ATTR_MODE) {
1343 /* We also want to update ACL when we update mode bits */
1344 retval = v9fs_acl_chmod(dentry);
1345 if (retval < 0)
1346 return retval;
1347 }
1348 return 0;
1349}
1350
1351/**
1352 * v9fs_stat2inode - populate an inode structure with mistat info 917 * v9fs_stat2inode - populate an inode structure with mistat info
1353 * @stat: Plan 9 metadata (mistat) structure 918 * @stat: Plan 9 metadata (mistat) structure
1354 * @inode: inode to populate 919 * @inode: inode to populate
@@ -1426,77 +991,6 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
1426} 991}
1427 992
1428/** 993/**
1429 * v9fs_stat2inode_dotl - populate an inode structure with stat info
1430 * @stat: stat structure
1431 * @inode: inode to populate
1432 * @sb: superblock of filesystem
1433 *
1434 */
1435
1436void
1437v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
1438{
1439
1440 if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) {
1441 inode->i_atime.tv_sec = stat->st_atime_sec;
1442 inode->i_atime.tv_nsec = stat->st_atime_nsec;
1443 inode->i_mtime.tv_sec = stat->st_mtime_sec;
1444 inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
1445 inode->i_ctime.tv_sec = stat->st_ctime_sec;
1446 inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
1447 inode->i_uid = stat->st_uid;
1448 inode->i_gid = stat->st_gid;
1449 inode->i_nlink = stat->st_nlink;
1450 inode->i_mode = stat->st_mode;
1451 inode->i_rdev = new_decode_dev(stat->st_rdev);
1452
1453 if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode)))
1454 init_special_inode(inode, inode->i_mode, inode->i_rdev);
1455
1456 i_size_write(inode, stat->st_size);
1457 inode->i_blocks = stat->st_blocks;
1458 } else {
1459 if (stat->st_result_mask & P9_STATS_ATIME) {
1460 inode->i_atime.tv_sec = stat->st_atime_sec;
1461 inode->i_atime.tv_nsec = stat->st_atime_nsec;
1462 }
1463 if (stat->st_result_mask & P9_STATS_MTIME) {
1464 inode->i_mtime.tv_sec = stat->st_mtime_sec;
1465 inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
1466 }
1467 if (stat->st_result_mask & P9_STATS_CTIME) {
1468 inode->i_ctime.tv_sec = stat->st_ctime_sec;
1469 inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
1470 }
1471 if (stat->st_result_mask & P9_STATS_UID)
1472 inode->i_uid = stat->st_uid;
1473 if (stat->st_result_mask & P9_STATS_GID)
1474 inode->i_gid = stat->st_gid;
1475 if (stat->st_result_mask & P9_STATS_NLINK)
1476 inode->i_nlink = stat->st_nlink;
1477 if (stat->st_result_mask & P9_STATS_MODE) {
1478 inode->i_mode = stat->st_mode;
1479 if ((S_ISBLK(inode->i_mode)) ||
1480 (S_ISCHR(inode->i_mode)))
1481 init_special_inode(inode, inode->i_mode,
1482 inode->i_rdev);
1483 }
1484 if (stat->st_result_mask & P9_STATS_RDEV)
1485 inode->i_rdev = new_decode_dev(stat->st_rdev);
1486 if (stat->st_result_mask & P9_STATS_SIZE)
1487 i_size_write(inode, stat->st_size);
1488 if (stat->st_result_mask & P9_STATS_BLOCKS)
1489 inode->i_blocks = stat->st_blocks;
1490 }
1491 if (stat->st_result_mask & P9_STATS_GEN)
1492 inode->i_generation = stat->st_gen;
1493
1494 /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION
1495 * because the inode structure does not have fields for them.
1496 */
1497}
1498
1499/**
1500 * v9fs_qid2ino - convert qid into inode number 994 * v9fs_qid2ino - convert qid into inode number
1501 * @qid: qid to hash 995 * @qid: qid to hash
1502 * 996 *
@@ -1602,7 +1096,7 @@ static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd)
1602 * 1096 *
1603 */ 1097 */
1604 1098
1605static void 1099void
1606v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) 1100v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1607{ 1101{
1608 char *s = nd_get_link(nd); 1102 char *s = nd_get_link(nd);
@@ -1646,94 +1140,6 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
1646} 1140}
1647 1141
1648/** 1142/**
1649 * v9fs_vfs_symlink_dotl - helper function to create symlinks
1650 * @dir: directory inode containing symlink
1651 * @dentry: dentry for symlink
1652 * @symname: symlink data
1653 *
1654 * See Also: 9P2000.L RFC for more information
1655 *
1656 */
1657
1658static int
1659v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
1660 const char *symname)
1661{
1662 struct v9fs_session_info *v9ses;
1663 struct p9_fid *dfid;
1664 struct p9_fid *fid = NULL;
1665 struct inode *inode;
1666 struct p9_qid qid;
1667 char *name;
1668 int err;
1669 gid_t gid;
1670
1671 name = (char *) dentry->d_name.name;
1672 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n",
1673 dir->i_ino, name, symname);
1674 v9ses = v9fs_inode2v9ses(dir);
1675
1676 dfid = v9fs_fid_lookup(dentry->d_parent);
1677 if (IS_ERR(dfid)) {
1678 err = PTR_ERR(dfid);
1679 P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
1680 return err;
1681 }
1682
1683 gid = v9fs_get_fsgid_for_create(dir);
1684
1685 /* Server doesn't alter fid on TSYMLINK. Hence no need to clone it. */
1686 err = p9_client_symlink(dfid, name, (char *)symname, gid, &qid);
1687
1688 if (err < 0) {
1689 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_symlink failed %d\n", err);
1690 goto error;
1691 }
1692
1693 if (v9ses->cache) {
1694 /* Now walk from the parent so we can get an unopened fid. */
1695 fid = p9_client_walk(dfid, 1, &name, 1);
1696 if (IS_ERR(fid)) {
1697 err = PTR_ERR(fid);
1698 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
1699 err);
1700 fid = NULL;
1701 goto error;
1702 }
1703
1704 /* instantiate inode and assign the unopened fid to dentry */
1705 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
1706 if (IS_ERR(inode)) {
1707 err = PTR_ERR(inode);
1708 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
1709 err);
1710 goto error;
1711 }
1712 d_set_d_op(dentry, &v9fs_cached_dentry_operations);
1713 d_instantiate(dentry, inode);
1714 err = v9fs_fid_add(dentry, fid);
1715 if (err < 0)
1716 goto error;
1717 fid = NULL;
1718 } else {
1719 /* Not in cached mode. No need to populate inode with stat */
1720 inode = v9fs_get_inode(dir->i_sb, S_IFLNK);
1721 if (IS_ERR(inode)) {
1722 err = PTR_ERR(inode);
1723 goto error;
1724 }
1725 d_set_d_op(dentry, &v9fs_dentry_operations);
1726 d_instantiate(dentry, inode);
1727 }
1728
1729error:
1730 if (fid)
1731 p9_client_clunk(fid);
1732
1733 return err;
1734}
1735
1736/**
1737 * v9fs_vfs_symlink - helper function to create symlinks 1143 * v9fs_vfs_symlink - helper function to create symlinks
1738 * @dir: directory inode containing symlink 1144 * @dir: directory inode containing symlink
1739 * @dentry: dentry for symlink 1145 * @dentry: dentry for symlink
@@ -1792,77 +1198,6 @@ clunk_fid:
1792} 1198}
1793 1199
1794/** 1200/**
1795 * v9fs_vfs_link_dotl - create a hardlink for dotl
1796 * @old_dentry: dentry for file to link to
1797 * @dir: inode destination for new link
1798 * @dentry: dentry for link
1799 *
1800 */
1801
1802static int
1803v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
1804 struct dentry *dentry)
1805{
1806 int err;
1807 struct p9_fid *dfid, *oldfid;
1808 char *name;
1809 struct v9fs_session_info *v9ses;
1810 struct dentry *dir_dentry;
1811
1812 P9_DPRINTK(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n",
1813 dir->i_ino, old_dentry->d_name.name,
1814 dentry->d_name.name);
1815
1816 v9ses = v9fs_inode2v9ses(dir);
1817 dir_dentry = v9fs_dentry_from_dir_inode(dir);
1818 dfid = v9fs_fid_lookup(dir_dentry);
1819 if (IS_ERR(dfid))
1820 return PTR_ERR(dfid);
1821
1822 oldfid = v9fs_fid_lookup(old_dentry);
1823 if (IS_ERR(oldfid))
1824 return PTR_ERR(oldfid);
1825
1826 name = (char *) dentry->d_name.name;
1827
1828 err = p9_client_link(dfid, oldfid, (char *)dentry->d_name.name);
1829
1830 if (err < 0) {
1831 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_link failed %d\n", err);
1832 return err;
1833 }
1834
1835 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
1836 /* Get the latest stat info from server. */
1837 struct p9_fid *fid;
1838 struct p9_stat_dotl *st;
1839
1840 fid = v9fs_fid_lookup(old_dentry);
1841 if (IS_ERR(fid))
1842 return PTR_ERR(fid);
1843
1844 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
1845 if (IS_ERR(st))
1846 return PTR_ERR(st);
1847
1848 v9fs_stat2inode_dotl(st, old_dentry->d_inode);
1849
1850 kfree(st);
1851 } else {
1852 /* Caching disabled. No need to get upto date stat info.
1853 * This dentry will be released immediately. So, just hold the
1854 * inode
1855 */
1856 ihold(old_dentry->d_inode);
1857 }
1858
1859 d_set_d_op(dentry, old_dentry->d_op);
1860 d_instantiate(dentry, old_dentry->d_inode);
1861
1862 return err;
1863}
1864
1865/**
1866 * v9fs_vfs_mknod - create a special file 1201 * v9fs_vfs_mknod - create a special file
1867 * @dir: inode destination for new link 1202 * @dir: inode destination for new link
1868 * @dentry: dentry for file 1203 * @dentry: dentry for file
@@ -1907,160 +1242,6 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
1907 return retval; 1242 return retval;
1908} 1243}
1909 1244
1910/**
1911 * v9fs_vfs_mknod_dotl - create a special file
1912 * @dir: inode destination for new link
1913 * @dentry: dentry for file
1914 * @mode: mode for creation
1915 * @rdev: device associated with special file
1916 *
1917 */
1918static int
1919v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
1920 dev_t rdev)
1921{
1922 int err;
1923 char *name;
1924 mode_t mode;
1925 struct v9fs_session_info *v9ses;
1926 struct p9_fid *fid = NULL, *dfid = NULL;
1927 struct inode *inode;
1928 gid_t gid;
1929 struct p9_qid qid;
1930 struct dentry *dir_dentry;
1931 struct posix_acl *dacl = NULL, *pacl = NULL;
1932
1933 P9_DPRINTK(P9_DEBUG_VFS,
1934 " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
1935 dentry->d_name.name, omode, MAJOR(rdev), MINOR(rdev));
1936
1937 if (!new_valid_dev(rdev))
1938 return -EINVAL;
1939
1940 v9ses = v9fs_inode2v9ses(dir);
1941 dir_dentry = v9fs_dentry_from_dir_inode(dir);
1942 dfid = v9fs_fid_lookup(dir_dentry);
1943 if (IS_ERR(dfid)) {
1944 err = PTR_ERR(dfid);
1945 P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
1946 dfid = NULL;
1947 goto error;
1948 }
1949
1950 gid = v9fs_get_fsgid_for_create(dir);
1951 mode = omode;
1952 /* Update mode based on ACL value */
1953 err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
1954 if (err) {
1955 P9_DPRINTK(P9_DEBUG_VFS,
1956 "Failed to get acl values in mknod %d\n", err);
1957 goto error;
1958 }
1959 name = (char *) dentry->d_name.name;
1960
1961 err = p9_client_mknod_dotl(dfid, name, mode, rdev, gid, &qid);
1962 if (err < 0)
1963 goto error;
1964
1965 /* instantiate inode and assign the unopened fid to the dentry */
1966 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
1967 fid = p9_client_walk(dfid, 1, &name, 1);
1968 if (IS_ERR(fid)) {
1969 err = PTR_ERR(fid);
1970 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
1971 err);
1972 fid = NULL;
1973 goto error;
1974 }
1975
1976 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
1977 if (IS_ERR(inode)) {
1978 err = PTR_ERR(inode);
1979 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
1980 err);
1981 goto error;
1982 }
1983 d_set_d_op(dentry, &v9fs_cached_dentry_operations);
1984 d_instantiate(dentry, inode);
1985 err = v9fs_fid_add(dentry, fid);
1986 if (err < 0)
1987 goto error;
1988 fid = NULL;
1989 } else {
1990 /*
1991 * Not in cached mode. No need to populate inode with stat.
1992 * socket syscall returns a fd, so we need instantiate
1993 */
1994 inode = v9fs_get_inode(dir->i_sb, mode);
1995 if (IS_ERR(inode)) {
1996 err = PTR_ERR(inode);
1997 goto error;
1998 }
1999 d_set_d_op(dentry, &v9fs_dentry_operations);
2000 d_instantiate(dentry, inode);
2001 }
2002 /* Now set the ACL based on the default value */
2003 v9fs_set_create_acl(dentry, dacl, pacl);
2004error:
2005 if (fid)
2006 p9_client_clunk(fid);
2007 return err;
2008}
2009
2010static int
2011v9fs_vfs_readlink_dotl(struct dentry *dentry, char *buffer, int buflen)
2012{
2013 int retval;
2014 struct p9_fid *fid;
2015 char *target = NULL;
2016
2017 P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name);
2018 retval = -EPERM;
2019 fid = v9fs_fid_lookup(dentry);
2020 if (IS_ERR(fid))
2021 return PTR_ERR(fid);
2022
2023 retval = p9_client_readlink(fid, &target);
2024 if (retval < 0)
2025 return retval;
2026
2027 strncpy(buffer, target, buflen);
2028 P9_DPRINTK(P9_DEBUG_VFS, "%s -> %s\n", dentry->d_name.name, buffer);
2029
2030 retval = strnlen(buffer, buflen);
2031 return retval;
2032}
2033
2034/**
2035 * v9fs_vfs_follow_link_dotl - follow a symlink path
2036 * @dentry: dentry for symlink
2037 * @nd: nameidata
2038 *
2039 */
2040
2041static void *
2042v9fs_vfs_follow_link_dotl(struct dentry *dentry, struct nameidata *nd)
2043{
2044 int len = 0;
2045 char *link = __getname();
2046
2047 P9_DPRINTK(P9_DEBUG_VFS, "%s n", dentry->d_name.name);
2048
2049 if (!link)
2050 link = ERR_PTR(-ENOMEM);
2051 else {
2052 len = v9fs_vfs_readlink_dotl(dentry, link, PATH_MAX);
2053 if (len < 0) {
2054 __putname(link);
2055 link = ERR_PTR(len);
2056 } else
2057 link[min(len, PATH_MAX-1)] = 0;
2058 }
2059 nd_set_link(nd, link);
2060
2061 return NULL;
2062}
2063
2064static const struct inode_operations v9fs_dir_inode_operations_dotu = { 1245static const struct inode_operations v9fs_dir_inode_operations_dotu = {
2065 .create = v9fs_vfs_create, 1246 .create = v9fs_vfs_create,
2066 .lookup = v9fs_vfs_lookup, 1247 .lookup = v9fs_vfs_lookup,
@@ -2075,25 +1256,6 @@ static const struct inode_operations v9fs_dir_inode_operations_dotu = {
2075 .setattr = v9fs_vfs_setattr, 1256 .setattr = v9fs_vfs_setattr,
2076}; 1257};
2077 1258
2078static const struct inode_operations v9fs_dir_inode_operations_dotl = {
2079 .create = v9fs_vfs_create_dotl,
2080 .lookup = v9fs_vfs_lookup,
2081 .link = v9fs_vfs_link_dotl,
2082 .symlink = v9fs_vfs_symlink_dotl,
2083 .unlink = v9fs_vfs_unlink,
2084 .mkdir = v9fs_vfs_mkdir_dotl,
2085 .rmdir = v9fs_vfs_rmdir,
2086 .mknod = v9fs_vfs_mknod_dotl,
2087 .rename = v9fs_vfs_rename,
2088 .getattr = v9fs_vfs_getattr_dotl,
2089 .setattr = v9fs_vfs_setattr_dotl,
2090 .setxattr = generic_setxattr,
2091 .getxattr = generic_getxattr,
2092 .removexattr = generic_removexattr,
2093 .listxattr = v9fs_listxattr,
2094 .check_acl = v9fs_check_acl,
2095};
2096
2097static const struct inode_operations v9fs_dir_inode_operations = { 1259static const struct inode_operations v9fs_dir_inode_operations = {
2098 .create = v9fs_vfs_create, 1260 .create = v9fs_vfs_create,
2099 .lookup = v9fs_vfs_lookup, 1261 .lookup = v9fs_vfs_lookup,
@@ -2111,16 +1273,6 @@ static const struct inode_operations v9fs_file_inode_operations = {
2111 .setattr = v9fs_vfs_setattr, 1273 .setattr = v9fs_vfs_setattr,
2112}; 1274};
2113 1275
2114static const struct inode_operations v9fs_file_inode_operations_dotl = {
2115 .getattr = v9fs_vfs_getattr_dotl,
2116 .setattr = v9fs_vfs_setattr_dotl,
2117 .setxattr = generic_setxattr,
2118 .getxattr = generic_getxattr,
2119 .removexattr = generic_removexattr,
2120 .listxattr = v9fs_listxattr,
2121 .check_acl = v9fs_check_acl,
2122};
2123
2124static const struct inode_operations v9fs_symlink_inode_operations = { 1276static const struct inode_operations v9fs_symlink_inode_operations = {
2125 .readlink = generic_readlink, 1277 .readlink = generic_readlink,
2126 .follow_link = v9fs_vfs_follow_link, 1278 .follow_link = v9fs_vfs_follow_link,
@@ -2129,14 +1281,3 @@ static const struct inode_operations v9fs_symlink_inode_operations = {
2129 .setattr = v9fs_vfs_setattr, 1281 .setattr = v9fs_vfs_setattr,
2130}; 1282};
2131 1283
2132static const struct inode_operations v9fs_symlink_inode_operations_dotl = {
2133 .readlink = v9fs_vfs_readlink_dotl,
2134 .follow_link = v9fs_vfs_follow_link_dotl,
2135 .put_link = v9fs_vfs_put_link,
2136 .getattr = v9fs_vfs_getattr_dotl,
2137 .setattr = v9fs_vfs_setattr_dotl,
2138 .setxattr = generic_setxattr,
2139 .getxattr = generic_getxattr,
2140 .removexattr = generic_removexattr,
2141 .listxattr = v9fs_listxattr,
2142};
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
new file mode 100644
index 000000000000..fe3ffa9aace4
--- /dev/null
+++ b/fs/9p/vfs_inode_dotl.c
@@ -0,0 +1,824 @@
1/*
2 * linux/fs/9p/vfs_inode_dotl.c
3 *
4 * This file contains vfs inode ops for the 9P2000.L protocol.
5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2
11 * as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to:
20 * Free Software Foundation
21 * 51 Franklin Street, Fifth Floor
22 * Boston, MA 02111-1301 USA
23 *
24 */
25
26#include <linux/module.h>
27#include <linux/errno.h>
28#include <linux/fs.h>
29#include <linux/file.h>
30#include <linux/pagemap.h>
31#include <linux/stat.h>
32#include <linux/string.h>
33#include <linux/inet.h>
34#include <linux/namei.h>
35#include <linux/idr.h>
36#include <linux/sched.h>
37#include <linux/slab.h>
38#include <linux/xattr.h>
39#include <linux/posix_acl.h>
40#include <net/9p/9p.h>
41#include <net/9p/client.h>
42
43#include "v9fs.h"
44#include "v9fs_vfs.h"
45#include "fid.h"
46#include "cache.h"
47#include "xattr.h"
48#include "acl.h"
49
50static int
51v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
52 dev_t rdev);
53
54/**
55 * v9fs_get_fsgid_for_create - Helper function to get the gid for creating a
56 * new file system object. This checks the S_ISGID to determine the owning
57 * group of the new file system object.
58 */
59
60static gid_t v9fs_get_fsgid_for_create(struct inode *dir_inode)
61{
62 BUG_ON(dir_inode == NULL);
63
64 if (dir_inode->i_mode & S_ISGID) {
65 /* set_gid bit is set.*/
66 return dir_inode->i_gid;
67 }
68 return current_fsgid();
69}
70
71/**
72 * v9fs_dentry_from_dir_inode - helper function to get the dentry from
73 * dir inode.
74 *
75 */
76
77static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode)
78{
79 struct dentry *dentry;
80
81 spin_lock(&inode->i_lock);
82 /* Directory should have only one entry. */
83 BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry));
84 dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias);
85 spin_unlock(&inode->i_lock);
86 return dentry;
87}
88
89struct inode *
90v9fs_inode_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
91 struct super_block *sb)
92{
93 struct inode *ret = NULL;
94 int err;
95 struct p9_stat_dotl *st;
96
97 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
98 if (IS_ERR(st))
99 return ERR_CAST(st);
100
101 ret = v9fs_get_inode(sb, st->st_mode);
102 if (IS_ERR(ret)) {
103 err = PTR_ERR(ret);
104 goto error;
105 }
106
107 v9fs_stat2inode_dotl(st, ret);
108 ret->i_ino = v9fs_qid2ino(&st->qid);
109#ifdef CONFIG_9P_FSCACHE
110 v9fs_vcookie_set_qid(ret, &st->qid);
111 v9fs_cache_inode_get_cookie(ret);
112#endif
113 err = v9fs_get_acl(ret, fid);
114 if (err) {
115 iput(ret);
116 goto error;
117 }
118 kfree(st);
119 return ret;
120error:
121 kfree(st);
122 return ERR_PTR(err);
123}
124
125/**
126 * v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol.
127 * @dir: directory inode that is being created
128 * @dentry: dentry that is being deleted
129 * @mode: create permissions
130 * @nd: path information
131 *
132 */
133
134static int
135v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
136 struct nameidata *nd)
137{
138 int err = 0;
139 char *name = NULL;
140 gid_t gid;
141 int flags;
142 mode_t mode;
143 struct v9fs_session_info *v9ses;
144 struct p9_fid *fid = NULL;
145 struct p9_fid *dfid, *ofid;
146 struct file *filp;
147 struct p9_qid qid;
148 struct inode *inode;
149 struct posix_acl *pacl = NULL, *dacl = NULL;
150
151 v9ses = v9fs_inode2v9ses(dir);
152 if (nd && nd->flags & LOOKUP_OPEN)
153 flags = nd->intent.open.flags - 1;
154 else {
155 /*
156 * create call without LOOKUP_OPEN is due
157 * to mknod of regular files. So use mknod
158 * operation.
159 */
160 return v9fs_vfs_mknod_dotl(dir, dentry, omode, 0);
161 }
162
163 name = (char *) dentry->d_name.name;
164 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_create_dotl: name:%s flags:0x%x "
165 "mode:0x%x\n", name, flags, omode);
166
167 dfid = v9fs_fid_lookup(dentry->d_parent);
168 if (IS_ERR(dfid)) {
169 err = PTR_ERR(dfid);
170 P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
171 return err;
172 }
173
174 /* clone a fid to use for creation */
175 ofid = p9_client_walk(dfid, 0, NULL, 1);
176 if (IS_ERR(ofid)) {
177 err = PTR_ERR(ofid);
178 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
179 return err;
180 }
181
182 gid = v9fs_get_fsgid_for_create(dir);
183
184 mode = omode;
185 /* Update mode based on ACL value */
186 err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
187 if (err) {
188 P9_DPRINTK(P9_DEBUG_VFS,
189 "Failed to get acl values in creat %d\n", err);
190 goto error;
191 }
192 err = p9_client_create_dotl(ofid, name, flags, mode, gid, &qid);
193 if (err < 0) {
194 P9_DPRINTK(P9_DEBUG_VFS,
195 "p9_client_open_dotl failed in creat %d\n",
196 err);
197 goto error;
198 }
199
200 /* instantiate inode and assign the unopened fid to the dentry */
201 fid = p9_client_walk(dfid, 1, &name, 1);
202 if (IS_ERR(fid)) {
203 err = PTR_ERR(fid);
204 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
205 fid = NULL;
206 goto error;
207 }
208 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
209 if (IS_ERR(inode)) {
210 err = PTR_ERR(inode);
211 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
212 goto error;
213 }
214 d_instantiate(dentry, inode);
215 err = v9fs_fid_add(dentry, fid);
216 if (err < 0)
217 goto error;
218
219 /* Now set the ACL based on the default value */
220 v9fs_set_create_acl(dentry, dacl, pacl);
221
222 /* Since we are opening a file, assign the open fid to the file */
223 filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
224 if (IS_ERR(filp)) {
225 p9_client_clunk(ofid);
226 return PTR_ERR(filp);
227 }
228 filp->private_data = ofid;
229 return 0;
230
231error:
232 if (ofid)
233 p9_client_clunk(ofid);
234 if (fid)
235 p9_client_clunk(fid);
236 return err;
237}
238
239/**
240 * v9fs_vfs_mkdir_dotl - VFS mkdir hook to create a directory
241 * @dir: inode that is being unlinked
242 * @dentry: dentry that is being unlinked
243 * @mode: mode for new directory
244 *
245 */
246
247static int v9fs_vfs_mkdir_dotl(struct inode *dir,
248 struct dentry *dentry, int omode)
249{
250 int err;
251 struct v9fs_session_info *v9ses;
252 struct p9_fid *fid = NULL, *dfid = NULL;
253 gid_t gid;
254 char *name;
255 mode_t mode;
256 struct inode *inode;
257 struct p9_qid qid;
258 struct dentry *dir_dentry;
259 struct posix_acl *dacl = NULL, *pacl = NULL;
260
261 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
262 err = 0;
263 v9ses = v9fs_inode2v9ses(dir);
264
265 omode |= S_IFDIR;
266 if (dir->i_mode & S_ISGID)
267 omode |= S_ISGID;
268
269 dir_dentry = v9fs_dentry_from_dir_inode(dir);
270 dfid = v9fs_fid_lookup(dir_dentry);
271 if (IS_ERR(dfid)) {
272 err = PTR_ERR(dfid);
273 P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
274 dfid = NULL;
275 goto error;
276 }
277
278 gid = v9fs_get_fsgid_for_create(dir);
279 mode = omode;
280 /* Update mode based on ACL value */
281 err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
282 if (err) {
283 P9_DPRINTK(P9_DEBUG_VFS,
284 "Failed to get acl values in mkdir %d\n", err);
285 goto error;
286 }
287 name = (char *) dentry->d_name.name;
288 err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid);
289 if (err < 0)
290 goto error;
291
292 /* instantiate inode and assign the unopened fid to the dentry */
293 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
294 fid = p9_client_walk(dfid, 1, &name, 1);
295 if (IS_ERR(fid)) {
296 err = PTR_ERR(fid);
297 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
298 err);
299 fid = NULL;
300 goto error;
301 }
302
303 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
304 if (IS_ERR(inode)) {
305 err = PTR_ERR(inode);
306 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
307 err);
308 goto error;
309 }
310 d_instantiate(dentry, inode);
311 err = v9fs_fid_add(dentry, fid);
312 if (err < 0)
313 goto error;
314 fid = NULL;
315 } else {
316 /*
317 * Not in cached mode. No need to populate
318 * inode with stat. We need to get an inode
319 * so that we can set the acl with dentry
320 */
321 inode = v9fs_get_inode(dir->i_sb, mode);
322 if (IS_ERR(inode)) {
323 err = PTR_ERR(inode);
324 goto error;
325 }
326 d_instantiate(dentry, inode);
327 }
328 /* Now set the ACL based on the default value */
329 v9fs_set_create_acl(dentry, dacl, pacl);
330
331error:
332 if (fid)
333 p9_client_clunk(fid);
334 return err;
335}
336
337static int
338v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
339 struct kstat *stat)
340{
341 int err;
342 struct v9fs_session_info *v9ses;
343 struct p9_fid *fid;
344 struct p9_stat_dotl *st;
345
346 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
347 err = -EPERM;
348 v9ses = v9fs_inode2v9ses(dentry->d_inode);
349 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
350 return simple_getattr(mnt, dentry, stat);
351
352 fid = v9fs_fid_lookup(dentry);
353 if (IS_ERR(fid))
354 return PTR_ERR(fid);
355
356 /* Ask for all the fields in stat structure. Server will return
357 * whatever it supports
358 */
359
360 st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
361 if (IS_ERR(st))
362 return PTR_ERR(st);
363
364 v9fs_stat2inode_dotl(st, dentry->d_inode);
365 generic_fillattr(dentry->d_inode, stat);
366 /* Change block size to what the server returned */
367 stat->blksize = st->st_blksize;
368
369 kfree(st);
370 return 0;
371}
372
373/**
374 * v9fs_vfs_setattr_dotl - set file metadata
375 * @dentry: file whose metadata to set
376 * @iattr: metadata assignment structure
377 *
378 */
379
380int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
381{
382 int retval;
383 struct v9fs_session_info *v9ses;
384 struct p9_fid *fid;
385 struct p9_iattr_dotl p9attr;
386
387 P9_DPRINTK(P9_DEBUG_VFS, "\n");
388
389 retval = inode_change_ok(dentry->d_inode, iattr);
390 if (retval)
391 return retval;
392
393 p9attr.valid = iattr->ia_valid;
394 p9attr.mode = iattr->ia_mode;
395 p9attr.uid = iattr->ia_uid;
396 p9attr.gid = iattr->ia_gid;
397 p9attr.size = iattr->ia_size;
398 p9attr.atime_sec = iattr->ia_atime.tv_sec;
399 p9attr.atime_nsec = iattr->ia_atime.tv_nsec;
400 p9attr.mtime_sec = iattr->ia_mtime.tv_sec;
401 p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec;
402
403 retval = -EPERM;
404 v9ses = v9fs_inode2v9ses(dentry->d_inode);
405 fid = v9fs_fid_lookup(dentry);
406 if (IS_ERR(fid))
407 return PTR_ERR(fid);
408
409 retval = p9_client_setattr(fid, &p9attr);
410 if (retval < 0)
411 return retval;
412
413 if ((iattr->ia_valid & ATTR_SIZE) &&
414 iattr->ia_size != i_size_read(dentry->d_inode)) {
415 retval = vmtruncate(dentry->d_inode, iattr->ia_size);
416 if (retval)
417 return retval;
418 }
419
420 setattr_copy(dentry->d_inode, iattr);
421 mark_inode_dirty(dentry->d_inode);
422 if (iattr->ia_valid & ATTR_MODE) {
423 /* We also want to update ACL when we update mode bits */
424 retval = v9fs_acl_chmod(dentry);
425 if (retval < 0)
426 return retval;
427 }
428 return 0;
429}
430
431/**
432 * v9fs_stat2inode_dotl - populate an inode structure with stat info
433 * @stat: stat structure
434 * @inode: inode to populate
435 * @sb: superblock of filesystem
436 *
437 */
438
439void
440v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
441{
442
443 if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) {
444 inode->i_atime.tv_sec = stat->st_atime_sec;
445 inode->i_atime.tv_nsec = stat->st_atime_nsec;
446 inode->i_mtime.tv_sec = stat->st_mtime_sec;
447 inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
448 inode->i_ctime.tv_sec = stat->st_ctime_sec;
449 inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
450 inode->i_uid = stat->st_uid;
451 inode->i_gid = stat->st_gid;
452 inode->i_nlink = stat->st_nlink;
453 inode->i_mode = stat->st_mode;
454 inode->i_rdev = new_decode_dev(stat->st_rdev);
455
456 if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode)))
457 init_special_inode(inode, inode->i_mode, inode->i_rdev);
458
459 i_size_write(inode, stat->st_size);
460 inode->i_blocks = stat->st_blocks;
461 } else {
462 if (stat->st_result_mask & P9_STATS_ATIME) {
463 inode->i_atime.tv_sec = stat->st_atime_sec;
464 inode->i_atime.tv_nsec = stat->st_atime_nsec;
465 }
466 if (stat->st_result_mask & P9_STATS_MTIME) {
467 inode->i_mtime.tv_sec = stat->st_mtime_sec;
468 inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
469 }
470 if (stat->st_result_mask & P9_STATS_CTIME) {
471 inode->i_ctime.tv_sec = stat->st_ctime_sec;
472 inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
473 }
474 if (stat->st_result_mask & P9_STATS_UID)
475 inode->i_uid = stat->st_uid;
476 if (stat->st_result_mask & P9_STATS_GID)
477 inode->i_gid = stat->st_gid;
478 if (stat->st_result_mask & P9_STATS_NLINK)
479 inode->i_nlink = stat->st_nlink;
480 if (stat->st_result_mask & P9_STATS_MODE) {
481 inode->i_mode = stat->st_mode;
482 if ((S_ISBLK(inode->i_mode)) ||
483 (S_ISCHR(inode->i_mode)))
484 init_special_inode(inode, inode->i_mode,
485 inode->i_rdev);
486 }
487 if (stat->st_result_mask & P9_STATS_RDEV)
488 inode->i_rdev = new_decode_dev(stat->st_rdev);
489 if (stat->st_result_mask & P9_STATS_SIZE)
490 i_size_write(inode, stat->st_size);
491 if (stat->st_result_mask & P9_STATS_BLOCKS)
492 inode->i_blocks = stat->st_blocks;
493 }
494 if (stat->st_result_mask & P9_STATS_GEN)
495 inode->i_generation = stat->st_gen;
496
497 /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION
498 * because the inode structure does not have fields for them.
499 */
500}
501
502static int
503v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
504 const char *symname)
505{
506 struct v9fs_session_info *v9ses;
507 struct p9_fid *dfid;
508 struct p9_fid *fid = NULL;
509 struct inode *inode;
510 struct p9_qid qid;
511 char *name;
512 int err;
513 gid_t gid;
514
515 name = (char *) dentry->d_name.name;
516 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n",
517 dir->i_ino, name, symname);
518 v9ses = v9fs_inode2v9ses(dir);
519
520 dfid = v9fs_fid_lookup(dentry->d_parent);
521 if (IS_ERR(dfid)) {
522 err = PTR_ERR(dfid);
523 P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
524 return err;
525 }
526
527 gid = v9fs_get_fsgid_for_create(dir);
528
529 /* Server doesn't alter fid on TSYMLINK. Hence no need to clone it. */
530 err = p9_client_symlink(dfid, name, (char *)symname, gid, &qid);
531
532 if (err < 0) {
533 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_symlink failed %d\n", err);
534 goto error;
535 }
536
537 if (v9ses->cache) {
538 /* Now walk from the parent so we can get an unopened fid. */
539 fid = p9_client_walk(dfid, 1, &name, 1);
540 if (IS_ERR(fid)) {
541 err = PTR_ERR(fid);
542 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
543 err);
544 fid = NULL;
545 goto error;
546 }
547
548 /* instantiate inode and assign the unopened fid to dentry */
549 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
550 if (IS_ERR(inode)) {
551 err = PTR_ERR(inode);
552 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
553 err);
554 goto error;
555 }
556 d_instantiate(dentry, inode);
557 err = v9fs_fid_add(dentry, fid);
558 if (err < 0)
559 goto error;
560 fid = NULL;
561 } else {
562 /* Not in cached mode. No need to populate inode with stat */
563 inode = v9fs_get_inode(dir->i_sb, S_IFLNK);
564 if (IS_ERR(inode)) {
565 err = PTR_ERR(inode);
566 goto error;
567 }
568 d_instantiate(dentry, inode);
569 }
570
571error:
572 if (fid)
573 p9_client_clunk(fid);
574
575 return err;
576}
577
578/**
579 * v9fs_vfs_link_dotl - create a hardlink for dotl
580 * @old_dentry: dentry for file to link to
581 * @dir: inode destination for new link
582 * @dentry: dentry for link
583 *
584 */
585
586static int
587v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
588 struct dentry *dentry)
589{
590 int err;
591 struct p9_fid *dfid, *oldfid;
592 char *name;
593 struct v9fs_session_info *v9ses;
594 struct dentry *dir_dentry;
595
596 P9_DPRINTK(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n",
597 dir->i_ino, old_dentry->d_name.name,
598 dentry->d_name.name);
599
600 v9ses = v9fs_inode2v9ses(dir);
601 dir_dentry = v9fs_dentry_from_dir_inode(dir);
602 dfid = v9fs_fid_lookup(dir_dentry);
603 if (IS_ERR(dfid))
604 return PTR_ERR(dfid);
605
606 oldfid = v9fs_fid_lookup(old_dentry);
607 if (IS_ERR(oldfid))
608 return PTR_ERR(oldfid);
609
610 name = (char *) dentry->d_name.name;
611
612 err = p9_client_link(dfid, oldfid, (char *)dentry->d_name.name);
613
614 if (err < 0) {
615 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_link failed %d\n", err);
616 return err;
617 }
618
619 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
620 /* Get the latest stat info from server. */
621 struct p9_fid *fid;
622 struct p9_stat_dotl *st;
623
624 fid = v9fs_fid_lookup(old_dentry);
625 if (IS_ERR(fid))
626 return PTR_ERR(fid);
627
628 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
629 if (IS_ERR(st))
630 return PTR_ERR(st);
631
632 v9fs_stat2inode_dotl(st, old_dentry->d_inode);
633
634 kfree(st);
635 } else {
636 /* Caching disabled. No need to get upto date stat info.
637 * This dentry will be released immediately. So, just hold the
638 * inode
639 */
640 ihold(old_dentry->d_inode);
641 }
642 d_instantiate(dentry, old_dentry->d_inode);
643
644 return err;
645}
646
647/**
648 * v9fs_vfs_mknod_dotl - create a special file
649 * @dir: inode destination for new link
650 * @dentry: dentry for file
651 * @mode: mode for creation
652 * @rdev: device associated with special file
653 *
654 */
655static int
656v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
657 dev_t rdev)
658{
659 int err;
660 char *name;
661 mode_t mode;
662 struct v9fs_session_info *v9ses;
663 struct p9_fid *fid = NULL, *dfid = NULL;
664 struct inode *inode;
665 gid_t gid;
666 struct p9_qid qid;
667 struct dentry *dir_dentry;
668 struct posix_acl *dacl = NULL, *pacl = NULL;
669
670 P9_DPRINTK(P9_DEBUG_VFS,
671 " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
672 dentry->d_name.name, omode, MAJOR(rdev), MINOR(rdev));
673
674 if (!new_valid_dev(rdev))
675 return -EINVAL;
676
677 v9ses = v9fs_inode2v9ses(dir);
678 dir_dentry = v9fs_dentry_from_dir_inode(dir);
679 dfid = v9fs_fid_lookup(dir_dentry);
680 if (IS_ERR(dfid)) {
681 err = PTR_ERR(dfid);
682 P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
683 dfid = NULL;
684 goto error;
685 }
686
687 gid = v9fs_get_fsgid_for_create(dir);
688 mode = omode;
689 /* Update mode based on ACL value */
690 err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
691 if (err) {
692 P9_DPRINTK(P9_DEBUG_VFS,
693 "Failed to get acl values in mknod %d\n", err);
694 goto error;
695 }
696 name = (char *) dentry->d_name.name;
697
698 err = p9_client_mknod_dotl(dfid, name, mode, rdev, gid, &qid);
699 if (err < 0)
700 goto error;
701
702 /* instantiate inode and assign the unopened fid to the dentry */
703 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
704 fid = p9_client_walk(dfid, 1, &name, 1);
705 if (IS_ERR(fid)) {
706 err = PTR_ERR(fid);
707 P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
708 err);
709 fid = NULL;
710 goto error;
711 }
712
713 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
714 if (IS_ERR(inode)) {
715 err = PTR_ERR(inode);
716 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
717 err);
718 goto error;
719 }
720 d_instantiate(dentry, inode);
721 err = v9fs_fid_add(dentry, fid);
722 if (err < 0)
723 goto error;
724 fid = NULL;
725 } else {
726 /*
727 * Not in cached mode. No need to populate inode with stat.
728 * socket syscall returns a fd, so we need instantiate
729 */
730 inode = v9fs_get_inode(dir->i_sb, mode);
731 if (IS_ERR(inode)) {
732 err = PTR_ERR(inode);
733 goto error;
734 }
735 d_instantiate(dentry, inode);
736 }
737 /* Now set the ACL based on the default value */
738 v9fs_set_create_acl(dentry, dacl, pacl);
739error:
740 if (fid)
741 p9_client_clunk(fid);
742 return err;
743}
744
745/**
746 * v9fs_vfs_follow_link_dotl - follow a symlink path
747 * @dentry: dentry for symlink
748 * @nd: nameidata
749 *
750 */
751
752static void *
753v9fs_vfs_follow_link_dotl(struct dentry *dentry, struct nameidata *nd)
754{
755 int retval;
756 struct p9_fid *fid;
757 char *link = __getname();
758 char *target;
759
760 P9_DPRINTK(P9_DEBUG_VFS, "%s\n", dentry->d_name.name);
761
762 if (!link) {
763 link = ERR_PTR(-ENOMEM);
764 goto ndset;
765 }
766 fid = v9fs_fid_lookup(dentry);
767 if (IS_ERR(fid)) {
768 __putname(link);
769 link = ERR_PTR(PTR_ERR(fid));
770 goto ndset;
771 }
772 retval = p9_client_readlink(fid, &target);
773 if (!retval) {
774 strcpy(link, target);
775 kfree(target);
776 goto ndset;
777 }
778 __putname(link);
779 link = ERR_PTR(retval);
780ndset:
781 nd_set_link(nd, link);
782 return NULL;
783}
784
785const struct inode_operations v9fs_dir_inode_operations_dotl = {
786 .create = v9fs_vfs_create_dotl,
787 .lookup = v9fs_vfs_lookup,
788 .link = v9fs_vfs_link_dotl,
789 .symlink = v9fs_vfs_symlink_dotl,
790 .unlink = v9fs_vfs_unlink,
791 .mkdir = v9fs_vfs_mkdir_dotl,
792 .rmdir = v9fs_vfs_rmdir,
793 .mknod = v9fs_vfs_mknod_dotl,
794 .rename = v9fs_vfs_rename,
795 .getattr = v9fs_vfs_getattr_dotl,
796 .setattr = v9fs_vfs_setattr_dotl,
797 .setxattr = generic_setxattr,
798 .getxattr = generic_getxattr,
799 .removexattr = generic_removexattr,
800 .listxattr = v9fs_listxattr,
801 .check_acl = v9fs_check_acl,
802};
803
804const struct inode_operations v9fs_file_inode_operations_dotl = {
805 .getattr = v9fs_vfs_getattr_dotl,
806 .setattr = v9fs_vfs_setattr_dotl,
807 .setxattr = generic_setxattr,
808 .getxattr = generic_getxattr,
809 .removexattr = generic_removexattr,
810 .listxattr = v9fs_listxattr,
811 .check_acl = v9fs_check_acl,
812};
813
814const struct inode_operations v9fs_symlink_inode_operations_dotl = {
815 .readlink = generic_readlink,
816 .follow_link = v9fs_vfs_follow_link_dotl,
817 .put_link = v9fs_vfs_put_link,
818 .getattr = v9fs_vfs_getattr_dotl,
819 .setattr = v9fs_vfs_setattr_dotl,
820 .setxattr = generic_setxattr,
821 .getxattr = generic_getxattr,
822 .removexattr = generic_removexattr,
823 .listxattr = v9fs_listxattr,
824};
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index 43ec7df84336..d288773871b3 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -133,7 +133,7 @@ int v9fs_xattr_set(struct dentry *dentry, const char *name,
133 "p9_client_xattrcreate failed %d\n", retval); 133 "p9_client_xattrcreate failed %d\n", retval);
134 goto error; 134 goto error;
135 } 135 }
136 msize = fid->clnt->msize;; 136 msize = fid->clnt->msize;
137 while (value_len) { 137 while (value_len) {
138 if (value_len > (msize - P9_IOHDRSZ)) 138 if (value_len > (msize - P9_IOHDRSZ))
139 write_count = msize - P9_IOHDRSZ; 139 write_count = msize - P9_IOHDRSZ;
diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c
index 224d7bbd1fcc..e654dfd092c3 100644
--- a/fs/cifs/cache.c
+++ b/fs/cifs/cache.c
@@ -64,7 +64,9 @@ static uint16_t cifs_server_get_key(const void *cookie_netfs_data,
64 void *buffer, uint16_t maxbuf) 64 void *buffer, uint16_t maxbuf)
65{ 65{
66 const struct TCP_Server_Info *server = cookie_netfs_data; 66 const struct TCP_Server_Info *server = cookie_netfs_data;
67 const struct sockaddr *sa = (struct sockaddr *) &server->addr.sockAddr; 67 const struct sockaddr *sa = (struct sockaddr *) &server->dstaddr;
68 const struct sockaddr_in *addr = (struct sockaddr_in *) sa;
69 const struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *) sa;
68 struct cifs_server_key *key = buffer; 70 struct cifs_server_key *key = buffer;
69 uint16_t key_len = sizeof(struct cifs_server_key); 71 uint16_t key_len = sizeof(struct cifs_server_key);
70 72
@@ -76,16 +78,16 @@ static uint16_t cifs_server_get_key(const void *cookie_netfs_data,
76 */ 78 */
77 switch (sa->sa_family) { 79 switch (sa->sa_family) {
78 case AF_INET: 80 case AF_INET:
79 key->family = server->addr.sockAddr.sin_family; 81 key->family = sa->sa_family;
80 key->port = server->addr.sockAddr.sin_port; 82 key->port = addr->sin_port;
81 key->addr[0].ipv4_addr = server->addr.sockAddr.sin_addr; 83 key->addr[0].ipv4_addr = addr->sin_addr;
82 key_len += sizeof(key->addr[0].ipv4_addr); 84 key_len += sizeof(key->addr[0].ipv4_addr);
83 break; 85 break;
84 86
85 case AF_INET6: 87 case AF_INET6:
86 key->family = server->addr.sockAddr6.sin6_family; 88 key->family = sa->sa_family;
87 key->port = server->addr.sockAddr6.sin6_port; 89 key->port = addr6->sin6_port;
88 key->addr[0].ipv6_addr = server->addr.sockAddr6.sin6_addr; 90 key->addr[0].ipv6_addr = addr6->sin6_addr;
89 key_len += sizeof(key->addr[0].ipv6_addr); 91 key_len += sizeof(key->addr[0].ipv6_addr);
90 break; 92 break;
91 93
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 103ab8b605b0..ede98300a8cd 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -119,29 +119,27 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
119 "Display Internal CIFS Data Structures for Debugging\n" 119 "Display Internal CIFS Data Structures for Debugging\n"
120 "---------------------------------------------------\n"); 120 "---------------------------------------------------\n");
121 seq_printf(m, "CIFS Version %s\n", CIFS_VERSION); 121 seq_printf(m, "CIFS Version %s\n", CIFS_VERSION);
122 seq_printf(m, "Features: "); 122 seq_printf(m, "Features:");
123#ifdef CONFIG_CIFS_DFS_UPCALL 123#ifdef CONFIG_CIFS_DFS_UPCALL
124 seq_printf(m, "dfs"); 124 seq_printf(m, " dfs");
125 seq_putc(m, ' ');
126#endif 125#endif
127#ifdef CONFIG_CIFS_FSCACHE 126#ifdef CONFIG_CIFS_FSCACHE
128 seq_printf(m, "fscache"); 127 seq_printf(m, " fscache");
129 seq_putc(m, ' ');
130#endif 128#endif
131#ifdef CONFIG_CIFS_WEAK_PW_HASH 129#ifdef CONFIG_CIFS_WEAK_PW_HASH
132 seq_printf(m, "lanman"); 130 seq_printf(m, " lanman");
133 seq_putc(m, ' ');
134#endif 131#endif
135#ifdef CONFIG_CIFS_POSIX 132#ifdef CONFIG_CIFS_POSIX
136 seq_printf(m, "posix"); 133 seq_printf(m, " posix");
137 seq_putc(m, ' ');
138#endif 134#endif
139#ifdef CONFIG_CIFS_UPCALL 135#ifdef CONFIG_CIFS_UPCALL
140 seq_printf(m, "spnego"); 136 seq_printf(m, " spnego");
141 seq_putc(m, ' ');
142#endif 137#endif
143#ifdef CONFIG_CIFS_XATTR 138#ifdef CONFIG_CIFS_XATTR
144 seq_printf(m, "xattr"); 139 seq_printf(m, " xattr");
140#endif
141#ifdef CONFIG_CIFS_ACL
142 seq_printf(m, " acl");
145#endif 143#endif
146 seq_putc(m, '\n'); 144 seq_putc(m, '\n');
147 seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid); 145 seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid);
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 87044906cd1f..4dfba8283165 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -98,6 +98,8 @@ struct key *
98cifs_get_spnego_key(struct cifsSesInfo *sesInfo) 98cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
99{ 99{
100 struct TCP_Server_Info *server = sesInfo->server; 100 struct TCP_Server_Info *server = sesInfo->server;
101 struct sockaddr_in *sa = (struct sockaddr_in *) &server->dstaddr;
102 struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *) &server->dstaddr;
101 char *description, *dp; 103 char *description, *dp;
102 size_t desc_len; 104 size_t desc_len;
103 struct key *spnego_key; 105 struct key *spnego_key;
@@ -127,10 +129,10 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
127 dp = description + strlen(description); 129 dp = description + strlen(description);
128 130
129 /* add the server address */ 131 /* add the server address */
130 if (server->addr.sockAddr.sin_family == AF_INET) 132 if (server->dstaddr.ss_family == AF_INET)
131 sprintf(dp, "ip4=%pI4", &server->addr.sockAddr.sin_addr); 133 sprintf(dp, "ip4=%pI4", &sa->sin_addr);
132 else if (server->addr.sockAddr.sin_family == AF_INET6) 134 else if (server->dstaddr.ss_family == AF_INET6)
133 sprintf(dp, "ip6=%pI6", &server->addr.sockAddr6.sin6_addr); 135 sprintf(dp, "ip6=%pI6", &sa6->sin6_addr);
134 else 136 else
135 goto out; 137 goto out;
136 138
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index f856732161ab..66f3d50d0676 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -72,6 +72,7 @@ static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
72 return 0; 72 return 0;
73} 73}
74 74
75/* must be called with server->srv_mutex held */
75int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, 76int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
76 __u32 *pexpected_response_sequence_number) 77 __u32 *pexpected_response_sequence_number)
77{ 78{
@@ -84,14 +85,12 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
84 if ((cifs_pdu->Flags2 & SMBFLG2_SECURITY_SIGNATURE) == 0) 85 if ((cifs_pdu->Flags2 & SMBFLG2_SECURITY_SIGNATURE) == 0)
85 return rc; 86 return rc;
86 87
87 spin_lock(&GlobalMid_Lock);
88 cifs_pdu->Signature.Sequence.SequenceNumber = 88 cifs_pdu->Signature.Sequence.SequenceNumber =
89 cpu_to_le32(server->sequence_number); 89 cpu_to_le32(server->sequence_number);
90 cifs_pdu->Signature.Sequence.Reserved = 0; 90 cifs_pdu->Signature.Sequence.Reserved = 0;
91 91
92 *pexpected_response_sequence_number = server->sequence_number++; 92 *pexpected_response_sequence_number = server->sequence_number++;
93 server->sequence_number++; 93 server->sequence_number++;
94 spin_unlock(&GlobalMid_Lock);
95 94
96 rc = cifs_calculate_signature(cifs_pdu, server, smb_signature); 95 rc = cifs_calculate_signature(cifs_pdu, server, smb_signature);
97 if (rc) 96 if (rc)
@@ -149,6 +148,7 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
149 return rc; 148 return rc;
150} 149}
151 150
151/* must be called with server->srv_mutex held */
152int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, 152int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
153 __u32 *pexpected_response_sequence_number) 153 __u32 *pexpected_response_sequence_number)
154{ 154{
@@ -162,14 +162,12 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
162 if ((cifs_pdu->Flags2 & SMBFLG2_SECURITY_SIGNATURE) == 0) 162 if ((cifs_pdu->Flags2 & SMBFLG2_SECURITY_SIGNATURE) == 0)
163 return rc; 163 return rc;
164 164
165 spin_lock(&GlobalMid_Lock);
166 cifs_pdu->Signature.Sequence.SequenceNumber = 165 cifs_pdu->Signature.Sequence.SequenceNumber =
167 cpu_to_le32(server->sequence_number); 166 cpu_to_le32(server->sequence_number);
168 cifs_pdu->Signature.Sequence.Reserved = 0; 167 cifs_pdu->Signature.Sequence.Reserved = 0;
169 168
170 *pexpected_response_sequence_number = server->sequence_number++; 169 *pexpected_response_sequence_number = server->sequence_number++;
171 server->sequence_number++; 170 server->sequence_number++;
172 spin_unlock(&GlobalMid_Lock);
173 171
174 rc = cifs_calc_signature2(iov, n_vec, server, smb_signature); 172 rc = cifs_calc_signature2(iov, n_vec, server, smb_signature);
175 if (rc) 173 if (rc)
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 8e21e0fe65d5..5e7075d5f139 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -329,6 +329,8 @@ cifs_alloc_inode(struct super_block *sb)
329 cifs_inode->invalid_mapping = false; 329 cifs_inode->invalid_mapping = false;
330 cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ 330 cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */
331 cifs_inode->server_eof = 0; 331 cifs_inode->server_eof = 0;
332 cifs_inode->uniqueid = 0;
333 cifs_inode->createtime = 0;
332 334
333 /* Can not set i_flags here - they get immediately overwritten 335 /* Can not set i_flags here - they get immediately overwritten
334 to zero by the VFS */ 336 to zero by the VFS */
@@ -361,18 +363,19 @@ cifs_evict_inode(struct inode *inode)
361static void 363static void
362cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server) 364cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server)
363{ 365{
366 struct sockaddr_in *sa = (struct sockaddr_in *) &server->dstaddr;
367 struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *) &server->dstaddr;
368
364 seq_printf(s, ",addr="); 369 seq_printf(s, ",addr=");
365 370
366 switch (server->addr.sockAddr.sin_family) { 371 switch (server->dstaddr.ss_family) {
367 case AF_INET: 372 case AF_INET:
368 seq_printf(s, "%pI4", &server->addr.sockAddr.sin_addr.s_addr); 373 seq_printf(s, "%pI4", &sa->sin_addr.s_addr);
369 break; 374 break;
370 case AF_INET6: 375 case AF_INET6:
371 seq_printf(s, "%pI6", 376 seq_printf(s, "%pI6", &sa6->sin6_addr.s6_addr);
372 &server->addr.sockAddr6.sin6_addr.s6_addr); 377 if (sa6->sin6_scope_id)
373 if (server->addr.sockAddr6.sin6_scope_id) 378 seq_printf(s, "%%%u", sa6->sin6_scope_id);
374 seq_printf(s, "%%%u",
375 server->addr.sockAddr6.sin6_scope_id);
376 break; 379 break;
377 default: 380 default:
378 seq_printf(s, "(unknown)"); 381 seq_printf(s, "(unknown)");
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 7136c0c3e2f9..606ca8bb7102 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -163,10 +163,7 @@ struct TCP_Server_Info {
163 char server_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; 163 char server_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
164 char *hostname; /* hostname portion of UNC string */ 164 char *hostname; /* hostname portion of UNC string */
165 struct socket *ssocket; 165 struct socket *ssocket;
166 union { 166 struct sockaddr_storage dstaddr;
167 struct sockaddr_in sockAddr;
168 struct sockaddr_in6 sockAddr6;
169 } addr;
170 struct sockaddr_storage srcaddr; /* locally bind to this IP */ 167 struct sockaddr_storage srcaddr; /* locally bind to this IP */
171 wait_queue_head_t response_q; 168 wait_queue_head_t response_q;
172 wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/ 169 wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/
@@ -210,7 +207,7 @@ struct TCP_Server_Info {
210 char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */ 207 char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */
211 /* 16th byte of RFC1001 workstation name is always null */ 208 /* 16th byte of RFC1001 workstation name is always null */
212 char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; 209 char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
213 __u32 sequence_number; /* needed for CIFS PDU signature */ 210 __u32 sequence_number; /* for signing, protected by srv_mutex */
214 struct session_key session_key; 211 struct session_key session_key;
215 unsigned long lstrp; /* when we got last response from this server */ 212 unsigned long lstrp; /* when we got last response from this server */
216 u16 dialect; /* dialect index that server chose */ 213 u16 dialect; /* dialect index that server chose */
@@ -456,6 +453,7 @@ struct cifsInodeInfo {
456 bool invalid_mapping:1; /* pagecache is invalid */ 453 bool invalid_mapping:1; /* pagecache is invalid */
457 u64 server_eof; /* current file size on server */ 454 u64 server_eof; /* current file size on server */
458 u64 uniqueid; /* server inode number */ 455 u64 uniqueid; /* server inode number */
456 u64 createtime; /* creation time on server */
459#ifdef CONFIG_CIFS_FSCACHE 457#ifdef CONFIG_CIFS_FSCACHE
460 struct fscache_cookie *fscache; 458 struct fscache_cookie *fscache;
461#endif 459#endif
@@ -576,6 +574,7 @@ struct cifs_fattr {
576 u64 cf_uniqueid; 574 u64 cf_uniqueid;
577 u64 cf_eof; 575 u64 cf_eof;
578 u64 cf_bytes; 576 u64 cf_bytes;
577 u64 cf_createtime;
579 uid_t cf_uid; 578 uid_t cf_uid;
580 gid_t cf_gid; 579 gid_t cf_gid;
581 umode_t cf_mode; 580 umode_t cf_mode;
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 67acfb3acad2..2f6795e524d3 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -401,15 +401,12 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
401 else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_KRB5) { 401 else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_KRB5) {
402 cFYI(1, "Kerberos only mechanism, enable extended security"); 402 cFYI(1, "Kerberos only mechanism, enable extended security");
403 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; 403 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
404 } 404 } else if ((secFlags & CIFSSEC_MUST_NTLMSSP) == CIFSSEC_MUST_NTLMSSP)
405#ifdef CONFIG_CIFS_EXPERIMENTAL
406 else if ((secFlags & CIFSSEC_MUST_NTLMSSP) == CIFSSEC_MUST_NTLMSSP)
407 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; 405 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
408 else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_NTLMSSP) { 406 else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_NTLMSSP) {
409 cFYI(1, "NTLMSSP only mechanism, enable extended security"); 407 cFYI(1, "NTLMSSP only mechanism, enable extended security");
410 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; 408 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
411 } 409 }
412#endif
413 410
414 count = 0; 411 count = 0;
415 for (i = 0; i < CIFS_NUM_PROT; i++) { 412 for (i = 0; i < CIFS_NUM_PROT; i++) {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index cc1a8604a790..a65d311d163a 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -64,8 +64,8 @@ struct smb_vol {
64 char *UNC; 64 char *UNC;
65 char *UNCip; 65 char *UNCip;
66 char *iocharset; /* local code page for mapping to and from Unicode */ 66 char *iocharset; /* local code page for mapping to and from Unicode */
67 char source_rfc1001_name[16]; /* netbios name of client */ 67 char source_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* clnt nb name */
68 char target_rfc1001_name[16]; /* netbios name of server for Win9x/ME */ 68 char target_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* srvr nb name */
69 uid_t cred_uid; 69 uid_t cred_uid;
70 uid_t linux_uid; 70 uid_t linux_uid;
71 gid_t linux_gid; 71 gid_t linux_gid;
@@ -115,8 +115,8 @@ struct smb_vol {
115#define TLINK_ERROR_EXPIRE (1 * HZ) 115#define TLINK_ERROR_EXPIRE (1 * HZ)
116#define TLINK_IDLE_EXPIRE (600 * HZ) 116#define TLINK_IDLE_EXPIRE (600 * HZ)
117 117
118static int ipv4_connect(struct TCP_Server_Info *server); 118static int ip_connect(struct TCP_Server_Info *server);
119static int ipv6_connect(struct TCP_Server_Info *server); 119static int generic_ip_connect(struct TCP_Server_Info *server);
120static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink); 120static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink);
121static void cifs_prune_tlinks(struct work_struct *work); 121static void cifs_prune_tlinks(struct work_struct *work);
122 122
@@ -200,10 +200,9 @@ cifs_reconnect(struct TCP_Server_Info *server)
200 while ((server->tcpStatus != CifsExiting) && 200 while ((server->tcpStatus != CifsExiting) &&
201 (server->tcpStatus != CifsGood)) { 201 (server->tcpStatus != CifsGood)) {
202 try_to_freeze(); 202 try_to_freeze();
203 if (server->addr.sockAddr6.sin6_family == AF_INET6) 203
204 rc = ipv6_connect(server); 204 /* we should try only the port we connected to before */
205 else 205 rc = generic_ip_connect(server);
206 rc = ipv4_connect(server);
207 if (rc) { 206 if (rc) {
208 cFYI(1, "reconnect error %d", rc); 207 cFYI(1, "reconnect error %d", rc);
209 msleep(3000); 208 msleep(3000);
@@ -477,7 +476,7 @@ incomplete_rcv:
477 * initialize frame) 476 * initialize frame)
478 */ 477 */
479 cifs_set_port((struct sockaddr *) 478 cifs_set_port((struct sockaddr *)
480 &server->addr.sockAddr, CIFS_PORT); 479 &server->dstaddr, CIFS_PORT);
481 cifs_reconnect(server); 480 cifs_reconnect(server);
482 csocket = server->ssocket; 481 csocket = server->ssocket;
483 wake_up(&server->response_q); 482 wake_up(&server->response_q);
@@ -817,11 +816,11 @@ cifs_parse_mount_options(char *options, const char *devname,
817 * informational, only used for servers that do not support 816 * informational, only used for servers that do not support
818 * port 445 and it can be overridden at mount time 817 * port 445 and it can be overridden at mount time
819 */ 818 */
820 memset(vol->source_rfc1001_name, 0x20, 15); 819 memset(vol->source_rfc1001_name, 0x20, RFC1001_NAME_LEN);
821 for (i = 0; i < strnlen(nodename, 15); i++) 820 for (i = 0; i < strnlen(nodename, RFC1001_NAME_LEN); i++)
822 vol->source_rfc1001_name[i] = toupper(nodename[i]); 821 vol->source_rfc1001_name[i] = toupper(nodename[i]);
823 822
824 vol->source_rfc1001_name[15] = 0; 823 vol->source_rfc1001_name[RFC1001_NAME_LEN] = 0;
825 /* null target name indicates to use *SMBSERVR default called name 824 /* null target name indicates to use *SMBSERVR default called name
826 if we end up sending RFC1001 session initialize */ 825 if we end up sending RFC1001 session initialize */
827 vol->target_rfc1001_name[0] = 0; 826 vol->target_rfc1001_name[0] = 0;
@@ -985,13 +984,11 @@ cifs_parse_mount_options(char *options, const char *devname,
985 return 1; 984 return 1;
986 } else if (strnicmp(value, "krb5", 4) == 0) { 985 } else if (strnicmp(value, "krb5", 4) == 0) {
987 vol->secFlg |= CIFSSEC_MAY_KRB5; 986 vol->secFlg |= CIFSSEC_MAY_KRB5;
988#ifdef CONFIG_CIFS_EXPERIMENTAL
989 } else if (strnicmp(value, "ntlmsspi", 8) == 0) { 987 } else if (strnicmp(value, "ntlmsspi", 8) == 0) {
990 vol->secFlg |= CIFSSEC_MAY_NTLMSSP | 988 vol->secFlg |= CIFSSEC_MAY_NTLMSSP |
991 CIFSSEC_MUST_SIGN; 989 CIFSSEC_MUST_SIGN;
992 } else if (strnicmp(value, "ntlmssp", 7) == 0) { 990 } else if (strnicmp(value, "ntlmssp", 7) == 0) {
993 vol->secFlg |= CIFSSEC_MAY_NTLMSSP; 991 vol->secFlg |= CIFSSEC_MAY_NTLMSSP;
994#endif
995 } else if (strnicmp(value, "ntlmv2i", 7) == 0) { 992 } else if (strnicmp(value, "ntlmv2i", 7) == 0) {
996 vol->secFlg |= CIFSSEC_MAY_NTLMV2 | 993 vol->secFlg |= CIFSSEC_MAY_NTLMV2 |
997 CIFSSEC_MUST_SIGN; 994 CIFSSEC_MUST_SIGN;
@@ -1168,22 +1165,22 @@ cifs_parse_mount_options(char *options, const char *devname,
1168 if (!value || !*value || (*value == ' ')) { 1165 if (!value || !*value || (*value == ' ')) {
1169 cFYI(1, "invalid (empty) netbiosname"); 1166 cFYI(1, "invalid (empty) netbiosname");
1170 } else { 1167 } else {
1171 memset(vol->source_rfc1001_name, 0x20, 15); 1168 memset(vol->source_rfc1001_name, 0x20,
1172 for (i = 0; i < 15; i++) { 1169 RFC1001_NAME_LEN);
1173 /* BB are there cases in which a comma can be 1170 /*
1174 valid in this workstation netbios name (and need 1171 * FIXME: are there cases in which a comma can
1175 special handling)? */ 1172 * be valid in workstation netbios name (and
1176 1173 * need special handling)?
1177 /* We do not uppercase netbiosname for user */ 1174 */
1175 for (i = 0; i < RFC1001_NAME_LEN; i++) {
1176 /* don't ucase netbiosname for user */
1178 if (value[i] == 0) 1177 if (value[i] == 0)
1179 break; 1178 break;
1180 else 1179 vol->source_rfc1001_name[i] = value[i];
1181 vol->source_rfc1001_name[i] =
1182 value[i];
1183 } 1180 }
1184 /* The string has 16th byte zero still from 1181 /* The string has 16th byte zero still from
1185 set at top of the function */ 1182 set at top of the function */
1186 if ((i == 15) && (value[i] != 0)) 1183 if (i == RFC1001_NAME_LEN && value[i] != 0)
1187 printk(KERN_WARNING "CIFS: netbiosname" 1184 printk(KERN_WARNING "CIFS: netbiosname"
1188 " longer than 15 truncated.\n"); 1185 " longer than 15 truncated.\n");
1189 } 1186 }
@@ -1193,7 +1190,8 @@ cifs_parse_mount_options(char *options, const char *devname,
1193 cFYI(1, "empty server netbiosname specified"); 1190 cFYI(1, "empty server netbiosname specified");
1194 } else { 1191 } else {
1195 /* last byte, type, is 0x20 for servr type */ 1192 /* last byte, type, is 0x20 for servr type */
1196 memset(vol->target_rfc1001_name, 0x20, 16); 1193 memset(vol->target_rfc1001_name, 0x20,
1194 RFC1001_NAME_LEN_WITH_NULL);
1197 1195
1198 for (i = 0; i < 15; i++) { 1196 for (i = 0; i < 15; i++) {
1199 /* BB are there cases in which a comma can be 1197 /* BB are there cases in which a comma can be
@@ -1210,7 +1208,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1210 } 1208 }
1211 /* The string has 16th byte zero still from 1209 /* The string has 16th byte zero still from
1212 set at top of the function */ 1210 set at top of the function */
1213 if ((i == 15) && (value[i] != 0)) 1211 if (i == RFC1001_NAME_LEN && value[i] != 0)
1214 printk(KERN_WARNING "CIFS: server net" 1212 printk(KERN_WARNING "CIFS: server net"
1215 "biosname longer than 15 truncated.\n"); 1213 "biosname longer than 15 truncated.\n");
1216 } 1214 }
@@ -1341,10 +1339,8 @@ cifs_parse_mount_options(char *options, const char *devname,
1341 vol->no_psx_acl = 0; 1339 vol->no_psx_acl = 0;
1342 } else if (strnicmp(data, "noacl", 5) == 0) { 1340 } else if (strnicmp(data, "noacl", 5) == 0) {
1343 vol->no_psx_acl = 1; 1341 vol->no_psx_acl = 1;
1344#ifdef CONFIG_CIFS_EXPERIMENTAL
1345 } else if (strnicmp(data, "locallease", 6) == 0) { 1342 } else if (strnicmp(data, "locallease", 6) == 0) {
1346 vol->local_lease = 1; 1343 vol->local_lease = 1;
1347#endif
1348 } else if (strnicmp(data, "sign", 4) == 0) { 1344 } else if (strnicmp(data, "sign", 4) == 0) {
1349 vol->secFlg |= CIFSSEC_MUST_SIGN; 1345 vol->secFlg |= CIFSSEC_MUST_SIGN;
1350 } else if (strnicmp(data, "seal", 4) == 0) { 1346 } else if (strnicmp(data, "seal", 4) == 0) {
@@ -1454,35 +1450,71 @@ srcip_matches(struct sockaddr *srcaddr, struct sockaddr *rhs)
1454 } 1450 }
1455} 1451}
1456 1452
1453/*
1454 * If no port is specified in addr structure, we try to match with 445 port
1455 * and if it fails - with 139 ports. It should be called only if address
1456 * families of server and addr are equal.
1457 */
1458static bool
1459match_port(struct TCP_Server_Info *server, struct sockaddr *addr)
1460{
1461 unsigned short int port, *sport;
1462
1463 switch (addr->sa_family) {
1464 case AF_INET:
1465 sport = &((struct sockaddr_in *) &server->dstaddr)->sin_port;
1466 port = ((struct sockaddr_in *) addr)->sin_port;
1467 break;
1468 case AF_INET6:
1469 sport = &((struct sockaddr_in6 *) &server->dstaddr)->sin6_port;
1470 port = ((struct sockaddr_in6 *) addr)->sin6_port;
1471 break;
1472 default:
1473 WARN_ON(1);
1474 return false;
1475 }
1476
1477 if (!port) {
1478 port = htons(CIFS_PORT);
1479 if (port == *sport)
1480 return true;
1481
1482 port = htons(RFC1001_PORT);
1483 }
1484
1485 return port == *sport;
1486}
1457 1487
1458static bool 1488static bool
1459match_address(struct TCP_Server_Info *server, struct sockaddr *addr, 1489match_address(struct TCP_Server_Info *server, struct sockaddr *addr,
1460 struct sockaddr *srcaddr) 1490 struct sockaddr *srcaddr)
1461{ 1491{
1462 struct sockaddr_in *addr4 = (struct sockaddr_in *)addr;
1463 struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)addr;
1464
1465 switch (addr->sa_family) { 1492 switch (addr->sa_family) {
1466 case AF_INET: 1493 case AF_INET: {
1467 if (addr4->sin_addr.s_addr != 1494 struct sockaddr_in *addr4 = (struct sockaddr_in *)addr;
1468 server->addr.sockAddr.sin_addr.s_addr) 1495 struct sockaddr_in *srv_addr4 =
1469 return false; 1496 (struct sockaddr_in *)&server->dstaddr;
1470 if (addr4->sin_port && 1497
1471 addr4->sin_port != server->addr.sockAddr.sin_port) 1498 if (addr4->sin_addr.s_addr != srv_addr4->sin_addr.s_addr)
1472 return false; 1499 return false;
1473 break; 1500 break;
1474 case AF_INET6: 1501 }
1502 case AF_INET6: {
1503 struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)addr;
1504 struct sockaddr_in6 *srv_addr6 =
1505 (struct sockaddr_in6 *)&server->dstaddr;
1506
1475 if (!ipv6_addr_equal(&addr6->sin6_addr, 1507 if (!ipv6_addr_equal(&addr6->sin6_addr,
1476 &server->addr.sockAddr6.sin6_addr)) 1508 &srv_addr6->sin6_addr))
1477 return false; 1509 return false;
1478 if (addr6->sin6_scope_id != 1510 if (addr6->sin6_scope_id != srv_addr6->sin6_scope_id)
1479 server->addr.sockAddr6.sin6_scope_id)
1480 return false;
1481 if (addr6->sin6_port &&
1482 addr6->sin6_port != server->addr.sockAddr6.sin6_port)
1483 return false; 1511 return false;
1484 break; 1512 break;
1485 } 1513 }
1514 default:
1515 WARN_ON(1);
1516 return false; /* don't expect to be here */
1517 }
1486 1518
1487 if (!srcip_matches(srcaddr, (struct sockaddr *)&server->srcaddr)) 1519 if (!srcip_matches(srcaddr, (struct sockaddr *)&server->srcaddr))
1488 return false; 1520 return false;
@@ -1549,6 +1581,9 @@ cifs_find_tcp_session(struct sockaddr *addr, struct smb_vol *vol)
1549 (struct sockaddr *)&vol->srcaddr)) 1581 (struct sockaddr *)&vol->srcaddr))
1550 continue; 1582 continue;
1551 1583
1584 if (!match_port(server, addr))
1585 continue;
1586
1552 if (!match_security(server, vol)) 1587 if (!match_security(server, vol))
1553 continue; 1588 continue;
1554 1589
@@ -1681,14 +1716,13 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1681 cFYI(1, "attempting ipv6 connect"); 1716 cFYI(1, "attempting ipv6 connect");
1682 /* BB should we allow ipv6 on port 139? */ 1717 /* BB should we allow ipv6 on port 139? */
1683 /* other OS never observed in Wild doing 139 with v6 */ 1718 /* other OS never observed in Wild doing 139 with v6 */
1684 memcpy(&tcp_ses->addr.sockAddr6, sin_server6, 1719 memcpy(&tcp_ses->dstaddr, sin_server6,
1685 sizeof(struct sockaddr_in6)); 1720 sizeof(struct sockaddr_in6));
1686 rc = ipv6_connect(tcp_ses); 1721 } else
1687 } else { 1722 memcpy(&tcp_ses->dstaddr, sin_server,
1688 memcpy(&tcp_ses->addr.sockAddr, sin_server, 1723 sizeof(struct sockaddr_in));
1689 sizeof(struct sockaddr_in)); 1724
1690 rc = ipv4_connect(tcp_ses); 1725 rc = ip_connect(tcp_ses);
1691 }
1692 if (rc < 0) { 1726 if (rc < 0) {
1693 cERROR(1, "Error connecting to socket. Aborting operation"); 1727 cERROR(1, "Error connecting to socket. Aborting operation");
1694 goto out_err_crypto_release; 1728 goto out_err_crypto_release;
@@ -1793,6 +1827,8 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1793{ 1827{
1794 int rc = -ENOMEM, xid; 1828 int rc = -ENOMEM, xid;
1795 struct cifsSesInfo *ses; 1829 struct cifsSesInfo *ses;
1830 struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr;
1831 struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr;
1796 1832
1797 xid = GetXid(); 1833 xid = GetXid();
1798 1834
@@ -1836,12 +1872,10 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1836 1872
1837 /* new SMB session uses our server ref */ 1873 /* new SMB session uses our server ref */
1838 ses->server = server; 1874 ses->server = server;
1839 if (server->addr.sockAddr6.sin6_family == AF_INET6) 1875 if (server->dstaddr.ss_family == AF_INET6)
1840 sprintf(ses->serverName, "%pI6", 1876 sprintf(ses->serverName, "%pI6", &addr6->sin6_addr);
1841 &server->addr.sockAddr6.sin6_addr);
1842 else 1877 else
1843 sprintf(ses->serverName, "%pI4", 1878 sprintf(ses->serverName, "%pI4", &addr->sin_addr);
1844 &server->addr.sockAddr.sin_addr.s_addr);
1845 1879
1846 if (volume_info->username) 1880 if (volume_info->username)
1847 strncpy(ses->userName, volume_info->username, 1881 strncpy(ses->userName, volume_info->username,
@@ -2136,19 +2170,106 @@ bind_socket(struct TCP_Server_Info *server)
2136} 2170}
2137 2171
2138static int 2172static int
2139ipv4_connect(struct TCP_Server_Info *server) 2173ip_rfc1001_connect(struct TCP_Server_Info *server)
2174{
2175 int rc = 0;
2176 /*
2177 * some servers require RFC1001 sessinit before sending
2178 * negprot - BB check reconnection in case where second
2179 * sessinit is sent but no second negprot
2180 */
2181 struct rfc1002_session_packet *ses_init_buf;
2182 struct smb_hdr *smb_buf;
2183 ses_init_buf = kzalloc(sizeof(struct rfc1002_session_packet),
2184 GFP_KERNEL);
2185 if (ses_init_buf) {
2186 ses_init_buf->trailer.session_req.called_len = 32;
2187
2188 if (server->server_RFC1001_name &&
2189 server->server_RFC1001_name[0] != 0)
2190 rfc1002mangle(ses_init_buf->trailer.
2191 session_req.called_name,
2192 server->server_RFC1001_name,
2193 RFC1001_NAME_LEN_WITH_NULL);
2194 else
2195 rfc1002mangle(ses_init_buf->trailer.
2196 session_req.called_name,
2197 DEFAULT_CIFS_CALLED_NAME,
2198 RFC1001_NAME_LEN_WITH_NULL);
2199
2200 ses_init_buf->trailer.session_req.calling_len = 32;
2201
2202 /*
2203 * calling name ends in null (byte 16) from old smb
2204 * convention.
2205 */
2206 if (server->workstation_RFC1001_name &&
2207 server->workstation_RFC1001_name[0] != 0)
2208 rfc1002mangle(ses_init_buf->trailer.
2209 session_req.calling_name,
2210 server->workstation_RFC1001_name,
2211 RFC1001_NAME_LEN_WITH_NULL);
2212 else
2213 rfc1002mangle(ses_init_buf->trailer.
2214 session_req.calling_name,
2215 "LINUX_CIFS_CLNT",
2216 RFC1001_NAME_LEN_WITH_NULL);
2217
2218 ses_init_buf->trailer.session_req.scope1 = 0;
2219 ses_init_buf->trailer.session_req.scope2 = 0;
2220 smb_buf = (struct smb_hdr *)ses_init_buf;
2221
2222 /* sizeof RFC1002_SESSION_REQUEST with no scope */
2223 smb_buf->smb_buf_length = 0x81000044;
2224 rc = smb_send(server, smb_buf, 0x44);
2225 kfree(ses_init_buf);
2226 /*
2227 * RFC1001 layer in at least one server
2228 * requires very short break before negprot
2229 * presumably because not expecting negprot
2230 * to follow so fast. This is a simple
2231 * solution that works without
2232 * complicating the code and causes no
2233 * significant slowing down on mount
2234 * for everyone else
2235 */
2236 usleep_range(1000, 2000);
2237 }
2238 /*
2239 * else the negprot may still work without this
2240 * even though malloc failed
2241 */
2242
2243 return rc;
2244}
2245
2246static int
2247generic_ip_connect(struct TCP_Server_Info *server)
2140{ 2248{
2141 int rc = 0; 2249 int rc = 0;
2142 int val; 2250 unsigned short int sport;
2143 bool connected = false; 2251 int slen, sfamily;
2144 __be16 orig_port = 0;
2145 struct socket *socket = server->ssocket; 2252 struct socket *socket = server->ssocket;
2253 struct sockaddr *saddr;
2254
2255 saddr = (struct sockaddr *) &server->dstaddr;
2256
2257 if (server->dstaddr.ss_family == AF_INET6) {
2258 sport = ((struct sockaddr_in6 *) saddr)->sin6_port;
2259 slen = sizeof(struct sockaddr_in6);
2260 sfamily = AF_INET6;
2261 } else {
2262 sport = ((struct sockaddr_in *) saddr)->sin_port;
2263 slen = sizeof(struct sockaddr_in);
2264 sfamily = AF_INET;
2265 }
2146 2266
2147 if (socket == NULL) { 2267 if (socket == NULL) {
2148 rc = sock_create_kern(PF_INET, SOCK_STREAM, 2268 rc = sock_create_kern(sfamily, SOCK_STREAM,
2149 IPPROTO_TCP, &socket); 2269 IPPROTO_TCP, &socket);
2150 if (rc < 0) { 2270 if (rc < 0) {
2151 cERROR(1, "Error %d creating socket", rc); 2271 cERROR(1, "Error %d creating socket", rc);
2272 server->ssocket = NULL;
2152 return rc; 2273 return rc;
2153 } 2274 }
2154 2275
@@ -2156,63 +2277,28 @@ ipv4_connect(struct TCP_Server_Info *server)
2156 cFYI(1, "Socket created"); 2277 cFYI(1, "Socket created");
2157 server->ssocket = socket; 2278 server->ssocket = socket;
2158 socket->sk->sk_allocation = GFP_NOFS; 2279 socket->sk->sk_allocation = GFP_NOFS;
2159 cifs_reclassify_socket4(socket); 2280 if (sfamily == AF_INET6)
2281 cifs_reclassify_socket6(socket);
2282 else
2283 cifs_reclassify_socket4(socket);
2160 } 2284 }
2161 2285
2162 rc = bind_socket(server); 2286 rc = bind_socket(server);
2163 if (rc < 0) 2287 if (rc < 0)
2164 return rc; 2288 return rc;
2165 2289
2166 /* user overrode default port */ 2290 rc = socket->ops->connect(socket, saddr, slen, 0);
2167 if (server->addr.sockAddr.sin_port) { 2291 if (rc < 0) {
2168 rc = socket->ops->connect(socket, (struct sockaddr *) 2292 cFYI(1, "Error %d connecting to server", rc);
2169 &server->addr.sockAddr,
2170 sizeof(struct sockaddr_in), 0);
2171 if (rc >= 0)
2172 connected = true;
2173 }
2174
2175 if (!connected) {
2176 /* save original port so we can retry user specified port
2177 later if fall back ports fail this time */
2178 orig_port = server->addr.sockAddr.sin_port;
2179
2180 /* do not retry on the same port we just failed on */
2181 if (server->addr.sockAddr.sin_port != htons(CIFS_PORT)) {
2182 server->addr.sockAddr.sin_port = htons(CIFS_PORT);
2183 rc = socket->ops->connect(socket,
2184 (struct sockaddr *)
2185 &server->addr.sockAddr,
2186 sizeof(struct sockaddr_in), 0);
2187 if (rc >= 0)
2188 connected = true;
2189 }
2190 }
2191 if (!connected) {
2192 server->addr.sockAddr.sin_port = htons(RFC1001_PORT);
2193 rc = socket->ops->connect(socket, (struct sockaddr *)
2194 &server->addr.sockAddr,
2195 sizeof(struct sockaddr_in), 0);
2196 if (rc >= 0)
2197 connected = true;
2198 }
2199
2200 /* give up here - unless we want to retry on different
2201 protocol families some day */
2202 if (!connected) {
2203 if (orig_port)
2204 server->addr.sockAddr.sin_port = orig_port;
2205 cFYI(1, "Error %d connecting to server via ipv4", rc);
2206 sock_release(socket); 2293 sock_release(socket);
2207 server->ssocket = NULL; 2294 server->ssocket = NULL;
2208 return rc; 2295 return rc;
2209 } 2296 }
2210 2297
2211
2212 /* 2298 /*
2213 * Eventually check for other socket options to change from 2299 * Eventually check for other socket options to change from
2214 * the default. sock_setsockopt not used because it expects 2300 * the default. sock_setsockopt not used because it expects
2215 * user space buffer 2301 * user space buffer
2216 */ 2302 */
2217 socket->sk->sk_rcvtimeo = 7 * HZ; 2303 socket->sk->sk_rcvtimeo = 7 * HZ;
2218 socket->sk->sk_sndtimeo = 5 * HZ; 2304 socket->sk->sk_sndtimeo = 5 * HZ;
@@ -2226,7 +2312,7 @@ ipv4_connect(struct TCP_Server_Info *server)
2226 } 2312 }
2227 2313
2228 if (server->tcp_nodelay) { 2314 if (server->tcp_nodelay) {
2229 val = 1; 2315 int val = 1;
2230 rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY, 2316 rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY,
2231 (char *)&val, sizeof(val)); 2317 (char *)&val, sizeof(val));
2232 if (rc) 2318 if (rc)
@@ -2237,161 +2323,39 @@ ipv4_connect(struct TCP_Server_Info *server)
2237 socket->sk->sk_sndbuf, 2323 socket->sk->sk_sndbuf,
2238 socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo); 2324 socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo);
2239 2325
2240 /* send RFC1001 sessinit */ 2326 if (sport == htons(RFC1001_PORT))
2241 if (server->addr.sockAddr.sin_port == htons(RFC1001_PORT)) { 2327 rc = ip_rfc1001_connect(server);
2242 /* some servers require RFC1001 sessinit before sending
2243 negprot - BB check reconnection in case where second
2244 sessinit is sent but no second negprot */
2245 struct rfc1002_session_packet *ses_init_buf;
2246 struct smb_hdr *smb_buf;
2247 ses_init_buf = kzalloc(sizeof(struct rfc1002_session_packet),
2248 GFP_KERNEL);
2249 if (ses_init_buf) {
2250 ses_init_buf->trailer.session_req.called_len = 32;
2251 if (server->server_RFC1001_name &&
2252 server->server_RFC1001_name[0] != 0)
2253 rfc1002mangle(ses_init_buf->trailer.
2254 session_req.called_name,
2255 server->server_RFC1001_name,
2256 RFC1001_NAME_LEN_WITH_NULL);
2257 else
2258 rfc1002mangle(ses_init_buf->trailer.
2259 session_req.called_name,
2260 DEFAULT_CIFS_CALLED_NAME,
2261 RFC1001_NAME_LEN_WITH_NULL);
2262
2263 ses_init_buf->trailer.session_req.calling_len = 32;
2264
2265 /* calling name ends in null (byte 16) from old smb
2266 convention. */
2267 if (server->workstation_RFC1001_name &&
2268 server->workstation_RFC1001_name[0] != 0)
2269 rfc1002mangle(ses_init_buf->trailer.
2270 session_req.calling_name,
2271 server->workstation_RFC1001_name,
2272 RFC1001_NAME_LEN_WITH_NULL);
2273 else
2274 rfc1002mangle(ses_init_buf->trailer.
2275 session_req.calling_name,
2276 "LINUX_CIFS_CLNT",
2277 RFC1001_NAME_LEN_WITH_NULL);
2278
2279 ses_init_buf->trailer.session_req.scope1 = 0;
2280 ses_init_buf->trailer.session_req.scope2 = 0;
2281 smb_buf = (struct smb_hdr *)ses_init_buf;
2282 /* sizeof RFC1002_SESSION_REQUEST with no scope */
2283 smb_buf->smb_buf_length = 0x81000044;
2284 rc = smb_send(server, smb_buf, 0x44);
2285 kfree(ses_init_buf);
2286 msleep(1); /* RFC1001 layer in at least one server
2287 requires very short break before negprot
2288 presumably because not expecting negprot
2289 to follow so fast. This is a simple
2290 solution that works without
2291 complicating the code and causes no
2292 significant slowing down on mount
2293 for everyone else */
2294 }
2295 /* else the negprot may still work without this
2296 even though malloc failed */
2297
2298 }
2299 2328
2300 return rc; 2329 return rc;
2301} 2330}
2302 2331
2303static int 2332static int
2304ipv6_connect(struct TCP_Server_Info *server) 2333ip_connect(struct TCP_Server_Info *server)
2305{ 2334{
2306 int rc = 0; 2335 unsigned short int *sport;
2307 int val; 2336 struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr;
2308 bool connected = false; 2337 struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr;
2309 __be16 orig_port = 0;
2310 struct socket *socket = server->ssocket;
2311 2338
2312 if (socket == NULL) { 2339 if (server->dstaddr.ss_family == AF_INET6)
2313 rc = sock_create_kern(PF_INET6, SOCK_STREAM, 2340 sport = &addr6->sin6_port;
2314 IPPROTO_TCP, &socket); 2341 else
2315 if (rc < 0) { 2342 sport = &addr->sin_port;
2316 cERROR(1, "Error %d creating ipv6 socket", rc);
2317 socket = NULL;
2318 return rc;
2319 }
2320 2343
2321 /* BB other socket options to set KEEPALIVE, NODELAY? */ 2344 if (*sport == 0) {
2322 cFYI(1, "ipv6 Socket created"); 2345 int rc;
2323 server->ssocket = socket;
2324 socket->sk->sk_allocation = GFP_NOFS;
2325 cifs_reclassify_socket6(socket);
2326 }
2327 2346
2328 rc = bind_socket(server); 2347 /* try with 445 port at first */
2329 if (rc < 0) 2348 *sport = htons(CIFS_PORT);
2330 return rc;
2331 2349
2332 /* user overrode default port */ 2350 rc = generic_ip_connect(server);
2333 if (server->addr.sockAddr6.sin6_port) {
2334 rc = socket->ops->connect(socket,
2335 (struct sockaddr *) &server->addr.sockAddr6,
2336 sizeof(struct sockaddr_in6), 0);
2337 if (rc >= 0)
2338 connected = true;
2339 }
2340
2341 if (!connected) {
2342 /* save original port so we can retry user specified port
2343 later if fall back ports fail this time */
2344
2345 orig_port = server->addr.sockAddr6.sin6_port;
2346 /* do not retry on the same port we just failed on */
2347 if (server->addr.sockAddr6.sin6_port != htons(CIFS_PORT)) {
2348 server->addr.sockAddr6.sin6_port = htons(CIFS_PORT);
2349 rc = socket->ops->connect(socket, (struct sockaddr *)
2350 &server->addr.sockAddr6,
2351 sizeof(struct sockaddr_in6), 0);
2352 if (rc >= 0)
2353 connected = true;
2354 }
2355 }
2356 if (!connected) {
2357 server->addr.sockAddr6.sin6_port = htons(RFC1001_PORT);
2358 rc = socket->ops->connect(socket, (struct sockaddr *)
2359 &server->addr.sockAddr6,
2360 sizeof(struct sockaddr_in6), 0);
2361 if (rc >= 0) 2351 if (rc >= 0)
2362 connected = true; 2352 return rc;
2363 }
2364
2365 /* give up here - unless we want to retry on different
2366 protocol families some day */
2367 if (!connected) {
2368 if (orig_port)
2369 server->addr.sockAddr6.sin6_port = orig_port;
2370 cFYI(1, "Error %d connecting to server via ipv6", rc);
2371 sock_release(socket);
2372 server->ssocket = NULL;
2373 return rc;
2374 }
2375
2376 /*
2377 * Eventually check for other socket options to change from
2378 * the default. sock_setsockopt not used because it expects
2379 * user space buffer
2380 */
2381 socket->sk->sk_rcvtimeo = 7 * HZ;
2382 socket->sk->sk_sndtimeo = 5 * HZ;
2383 2353
2384 if (server->tcp_nodelay) { 2354 /* if it failed, try with 139 port */
2385 val = 1; 2355 *sport = htons(RFC1001_PORT);
2386 rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY,
2387 (char *)&val, sizeof(val));
2388 if (rc)
2389 cFYI(1, "set TCP_NODELAY socket option error %d", rc);
2390 } 2356 }
2391 2357
2392 server->ssocket = socket; 2358 return generic_ip_connect(server);
2393
2394 return rc;
2395} 2359}
2396 2360
2397void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon, 2361void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index db2a58c00f7b..2e773825835e 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -293,10 +293,8 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
293 args.uid = NO_CHANGE_64; 293 args.uid = NO_CHANGE_64;
294 args.gid = NO_CHANGE_64; 294 args.gid = NO_CHANGE_64;
295 } 295 }
296 CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args, 296 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fileHandle,
297 cifs_sb->local_nls, 297 current->tgid);
298 cifs_sb->mnt_cifs_flags &
299 CIFS_MOUNT_MAP_SPECIAL_CHR);
300 } else { 298 } else {
301 /* BB implement mode setting via Windows security 299 /* BB implement mode setting via Windows security
302 descriptors e.g. */ 300 descriptors e.g. */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 5a28660ca2b5..d843631c028d 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -104,53 +104,6 @@ static inline int cifs_get_disposition(unsigned int flags)
104 return FILE_OPEN; 104 return FILE_OPEN;
105} 105}
106 106
107static inline int cifs_open_inode_helper(struct inode *inode,
108 struct cifsTconInfo *pTcon, __u32 oplock, FILE_ALL_INFO *buf,
109 char *full_path, int xid)
110{
111 struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
112 struct timespec temp;
113 int rc;
114
115 if (pCifsInode->clientCanCacheRead) {
116 /* we have the inode open somewhere else
117 no need to discard cache data */
118 goto client_can_cache;
119 }
120
121 /* BB need same check in cifs_create too? */
122 /* if not oplocked, invalidate inode pages if mtime or file
123 size changed */
124 temp = cifs_NTtimeToUnix(buf->LastWriteTime);
125 if (timespec_equal(&inode->i_mtime, &temp) &&
126 (inode->i_size ==
127 (loff_t)le64_to_cpu(buf->EndOfFile))) {
128 cFYI(1, "inode unchanged on server");
129 } else {
130 if (inode->i_mapping) {
131 /* BB no need to lock inode until after invalidate
132 since namei code should already have it locked? */
133 rc = filemap_write_and_wait(inode->i_mapping);
134 mapping_set_error(inode->i_mapping, rc);
135 }
136 cFYI(1, "invalidating remote inode since open detected it "
137 "changed");
138 invalidate_remote_inode(inode);
139 }
140
141client_can_cache:
142 if (pTcon->unix_ext)
143 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
144 xid);
145 else
146 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
147 xid, NULL);
148
149 cifs_set_oplock_level(pCifsInode, oplock);
150
151 return rc;
152}
153
154int cifs_posix_open(char *full_path, struct inode **pinode, 107int cifs_posix_open(char *full_path, struct inode **pinode,
155 struct super_block *sb, int mode, unsigned int f_flags, 108 struct super_block *sb, int mode, unsigned int f_flags,
156 __u32 *poplock, __u16 *pnetfid, int xid) 109 __u32 *poplock, __u16 *pnetfid, int xid)
@@ -213,6 +166,76 @@ posix_open_ret:
213 return rc; 166 return rc;
214} 167}
215 168
169static int
170cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
171 struct cifsTconInfo *tcon, unsigned int f_flags, __u32 *poplock,
172 __u16 *pnetfid, int xid)
173{
174 int rc;
175 int desiredAccess;
176 int disposition;
177 FILE_ALL_INFO *buf;
178
179 desiredAccess = cifs_convert_flags(f_flags);
180
181/*********************************************************************
182 * open flag mapping table:
183 *
184 * POSIX Flag CIFS Disposition
185 * ---------- ----------------
186 * O_CREAT FILE_OPEN_IF
187 * O_CREAT | O_EXCL FILE_CREATE
188 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
189 * O_TRUNC FILE_OVERWRITE
190 * none of the above FILE_OPEN
191 *
192 * Note that there is not a direct match between disposition
193 * FILE_SUPERSEDE (ie create whether or not file exists although
194 * O_CREAT | O_TRUNC is similar but truncates the existing
195 * file rather than creating a new file as FILE_SUPERSEDE does
196 * (which uses the attributes / metadata passed in on open call)
197 *?
198 *? O_SYNC is a reasonable match to CIFS writethrough flag
199 *? and the read write flags match reasonably. O_LARGEFILE
200 *? is irrelevant because largefile support is always used
201 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
202 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
203 *********************************************************************/
204
205 disposition = cifs_get_disposition(f_flags);
206
207 /* BB pass O_SYNC flag through on file attributes .. BB */
208
209 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
210 if (!buf)
211 return -ENOMEM;
212
213 if (tcon->ses->capabilities & CAP_NT_SMBS)
214 rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
215 desiredAccess, CREATE_NOT_DIR, pnetfid, poplock, buf,
216 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
217 & CIFS_MOUNT_MAP_SPECIAL_CHR);
218 else
219 rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
220 desiredAccess, CREATE_NOT_DIR, pnetfid, poplock, buf,
221 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
222 & CIFS_MOUNT_MAP_SPECIAL_CHR);
223
224 if (rc)
225 goto out;
226
227 if (tcon->unix_ext)
228 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
229 xid);
230 else
231 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
232 xid, pnetfid);
233
234out:
235 kfree(buf);
236 return rc;
237}
238
216struct cifsFileInfo * 239struct cifsFileInfo *
217cifs_new_fileinfo(__u16 fileHandle, struct file *file, 240cifs_new_fileinfo(__u16 fileHandle, struct file *file,
218 struct tcon_link *tlink, __u32 oplock) 241 struct tcon_link *tlink, __u32 oplock)
@@ -317,10 +340,8 @@ int cifs_open(struct inode *inode, struct file *file)
317 struct cifsFileInfo *pCifsFile = NULL; 340 struct cifsFileInfo *pCifsFile = NULL;
318 struct cifsInodeInfo *pCifsInode; 341 struct cifsInodeInfo *pCifsInode;
319 char *full_path = NULL; 342 char *full_path = NULL;
320 int desiredAccess; 343 bool posix_open_ok = false;
321 int disposition;
322 __u16 netfid; 344 __u16 netfid;
323 FILE_ALL_INFO *buf = NULL;
324 345
325 xid = GetXid(); 346 xid = GetXid();
326 347
@@ -358,17 +379,7 @@ int cifs_open(struct inode *inode, struct file *file)
358 file->f_flags, &oplock, &netfid, xid); 379 file->f_flags, &oplock, &netfid, xid);
359 if (rc == 0) { 380 if (rc == 0) {
360 cFYI(1, "posix open succeeded"); 381 cFYI(1, "posix open succeeded");
361 382 posix_open_ok = true;
362 pCifsFile = cifs_new_fileinfo(netfid, file, tlink,
363 oplock);
364 if (pCifsFile == NULL) {
365 CIFSSMBClose(xid, tcon, netfid);
366 rc = -ENOMEM;
367 }
368
369 cifs_fscache_set_inode_cookie(inode, file);
370
371 goto out;
372 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { 383 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
373 if (tcon->ses->serverNOS) 384 if (tcon->ses->serverNOS)
374 cERROR(1, "server %s of type %s returned" 385 cERROR(1, "server %s of type %s returned"
@@ -385,103 +396,39 @@ int cifs_open(struct inode *inode, struct file *file)
385 or DFS errors */ 396 or DFS errors */
386 } 397 }
387 398
388 desiredAccess = cifs_convert_flags(file->f_flags); 399 if (!posix_open_ok) {
389 400 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
390/********************************************************************* 401 file->f_flags, &oplock, &netfid, xid);
391 * open flag mapping table: 402 if (rc)
392 * 403 goto out;
393 * POSIX Flag CIFS Disposition
394 * ---------- ----------------
395 * O_CREAT FILE_OPEN_IF
396 * O_CREAT | O_EXCL FILE_CREATE
397 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
398 * O_TRUNC FILE_OVERWRITE
399 * none of the above FILE_OPEN
400 *
401 * Note that there is not a direct match between disposition
402 * FILE_SUPERSEDE (ie create whether or not file exists although
403 * O_CREAT | O_TRUNC is similar but truncates the existing
404 * file rather than creating a new file as FILE_SUPERSEDE does
405 * (which uses the attributes / metadata passed in on open call)
406 *?
407 *? O_SYNC is a reasonable match to CIFS writethrough flag
408 *? and the read write flags match reasonably. O_LARGEFILE
409 *? is irrelevant because largefile support is always used
410 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
411 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
412 *********************************************************************/
413
414 disposition = cifs_get_disposition(file->f_flags);
415
416 /* BB pass O_SYNC flag through on file attributes .. BB */
417
418 /* Also refresh inode by passing in file_info buf returned by SMBOpen
419 and calling get_inode_info with returned buf (at least helps
420 non-Unix server case) */
421
422 /* BB we can not do this if this is the second open of a file
423 and the first handle has writebehind data, we might be
424 able to simply do a filemap_fdatawrite/filemap_fdatawait first */
425 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
426 if (!buf) {
427 rc = -ENOMEM;
428 goto out;
429 }
430
431 if (tcon->ses->capabilities & CAP_NT_SMBS)
432 rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
433 desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
434 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
435 & CIFS_MOUNT_MAP_SPECIAL_CHR);
436 else
437 rc = -EIO; /* no NT SMB support fall into legacy open below */
438
439 if (rc == -EIO) {
440 /* Old server, try legacy style OpenX */
441 rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
442 desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
443 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
444 & CIFS_MOUNT_MAP_SPECIAL_CHR);
445 }
446 if (rc) {
447 cFYI(1, "cifs_open returned 0x%x", rc);
448 goto out;
449 } 404 }
450 405
451 rc = cifs_open_inode_helper(inode, tcon, oplock, buf, full_path, xid);
452 if (rc != 0)
453 goto out;
454
455 pCifsFile = cifs_new_fileinfo(netfid, file, tlink, oplock); 406 pCifsFile = cifs_new_fileinfo(netfid, file, tlink, oplock);
456 if (pCifsFile == NULL) { 407 if (pCifsFile == NULL) {
408 CIFSSMBClose(xid, tcon, netfid);
457 rc = -ENOMEM; 409 rc = -ENOMEM;
458 goto out; 410 goto out;
459 } 411 }
460 412
461 cifs_fscache_set_inode_cookie(inode, file); 413 cifs_fscache_set_inode_cookie(inode, file);
462 414
463 if (oplock & CIFS_CREATE_ACTION) { 415 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
464 /* time to set mode which we can not set earlier due to 416 /* time to set mode which we can not set earlier due to
465 problems creating new read-only files */ 417 problems creating new read-only files */
466 if (tcon->unix_ext) { 418 struct cifs_unix_set_info_args args = {
467 struct cifs_unix_set_info_args args = { 419 .mode = inode->i_mode,
468 .mode = inode->i_mode, 420 .uid = NO_CHANGE_64,
469 .uid = NO_CHANGE_64, 421 .gid = NO_CHANGE_64,
470 .gid = NO_CHANGE_64, 422 .ctime = NO_CHANGE_64,
471 .ctime = NO_CHANGE_64, 423 .atime = NO_CHANGE_64,
472 .atime = NO_CHANGE_64, 424 .mtime = NO_CHANGE_64,
473 .mtime = NO_CHANGE_64, 425 .device = 0,
474 .device = 0, 426 };
475 }; 427 CIFSSMBUnixSetFileInfo(xid, tcon, &args, netfid,
476 CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args, 428 pCifsFile->pid);
477 cifs_sb->local_nls,
478 cifs_sb->mnt_cifs_flags &
479 CIFS_MOUNT_MAP_SPECIAL_CHR);
480 }
481 } 429 }
482 430
483out: 431out:
484 kfree(buf);
485 kfree(full_path); 432 kfree(full_path);
486 FreeXid(xid); 433 FreeXid(xid);
487 cifs_put_tlink(tlink); 434 cifs_put_tlink(tlink);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index a853a89857a5..0c7e36910e31 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -518,6 +518,7 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
518 518
519 fattr->cf_eof = le64_to_cpu(info->EndOfFile); 519 fattr->cf_eof = le64_to_cpu(info->EndOfFile);
520 fattr->cf_bytes = le64_to_cpu(info->AllocationSize); 520 fattr->cf_bytes = le64_to_cpu(info->AllocationSize);
521 fattr->cf_createtime = le64_to_cpu(info->CreationTime);
521 522
522 if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { 523 if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
523 fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode; 524 fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
@@ -779,6 +780,10 @@ cifs_find_inode(struct inode *inode, void *opaque)
779 if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid) 780 if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid)
780 return 0; 781 return 0;
781 782
783 /* use createtime like an i_generation field */
784 if (CIFS_I(inode)->createtime != fattr->cf_createtime)
785 return 0;
786
782 /* don't match inode of different type */ 787 /* don't match inode of different type */
783 if ((inode->i_mode & S_IFMT) != (fattr->cf_mode & S_IFMT)) 788 if ((inode->i_mode & S_IFMT) != (fattr->cf_mode & S_IFMT))
784 return 0; 789 return 0;
@@ -796,6 +801,7 @@ cifs_init_inode(struct inode *inode, void *opaque)
796 struct cifs_fattr *fattr = (struct cifs_fattr *) opaque; 801 struct cifs_fattr *fattr = (struct cifs_fattr *) opaque;
797 802
798 CIFS_I(inode)->uniqueid = fattr->cf_uniqueid; 803 CIFS_I(inode)->uniqueid = fattr->cf_uniqueid;
804 CIFS_I(inode)->createtime = fattr->cf_createtime;
799 return 0; 805 return 0;
800} 806}
801 807
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index ec5b68e3b928..76b1b37c9e6b 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -160,6 +160,7 @@ cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info,
160 fattr->cf_cifsattrs = le32_to_cpu(info->ExtFileAttributes); 160 fattr->cf_cifsattrs = le32_to_cpu(info->ExtFileAttributes);
161 fattr->cf_eof = le64_to_cpu(info->EndOfFile); 161 fattr->cf_eof = le64_to_cpu(info->EndOfFile);
162 fattr->cf_bytes = le64_to_cpu(info->AllocationSize); 162 fattr->cf_bytes = le64_to_cpu(info->AllocationSize);
163 fattr->cf_createtime = le64_to_cpu(info->CreationTime);
163 fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime); 164 fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime);
164 fattr->cf_ctime = cifs_NTtimeToUnix(info->ChangeTime); 165 fattr->cf_ctime = cifs_NTtimeToUnix(info->ChangeTime);
165 fattr->cf_mtime = cifs_NTtimeToUnix(info->LastWriteTime); 166 fattr->cf_mtime = cifs_NTtimeToUnix(info->LastWriteTime);
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 7b01d3f6eed6..eb746486e49e 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -420,7 +420,6 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
420 return 0; 420 return 0;
421} 421}
422 422
423#ifdef CONFIG_CIFS_EXPERIMENTAL
424/* BB Move to ntlmssp.c eventually */ 423/* BB Move to ntlmssp.c eventually */
425 424
426/* We do not malloc the blob, it is passed in pbuffer, because 425/* We do not malloc the blob, it is passed in pbuffer, because
@@ -431,13 +430,14 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
431 NEGOTIATE_MESSAGE *sec_blob = (NEGOTIATE_MESSAGE *)pbuffer; 430 NEGOTIATE_MESSAGE *sec_blob = (NEGOTIATE_MESSAGE *)pbuffer;
432 __u32 flags; 431 __u32 flags;
433 432
433 memset(pbuffer, 0, sizeof(NEGOTIATE_MESSAGE));
434 memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8); 434 memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
435 sec_blob->MessageType = NtLmNegotiate; 435 sec_blob->MessageType = NtLmNegotiate;
436 436
437 /* BB is NTLMV2 session security format easier to use here? */ 437 /* BB is NTLMV2 session security format easier to use here? */
438 flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET | 438 flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET |
439 NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | 439 NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
440 NTLMSSP_NEGOTIATE_NTLM; 440 NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC;
441 if (ses->server->secMode & 441 if (ses->server->secMode &
442 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { 442 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
443 flags |= NTLMSSP_NEGOTIATE_SIGN; 443 flags |= NTLMSSP_NEGOTIATE_SIGN;
@@ -446,7 +446,7 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
446 NTLMSSP_NEGOTIATE_EXTENDED_SEC; 446 NTLMSSP_NEGOTIATE_EXTENDED_SEC;
447 } 447 }
448 448
449 sec_blob->NegotiateFlags |= cpu_to_le32(flags); 449 sec_blob->NegotiateFlags = cpu_to_le32(flags);
450 450
451 sec_blob->WorkstationName.BufferOffset = 0; 451 sec_blob->WorkstationName.BufferOffset = 0;
452 sec_blob->WorkstationName.Length = 0; 452 sec_blob->WorkstationName.Length = 0;
@@ -477,7 +477,7 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
477 flags = NTLMSSP_NEGOTIATE_56 | 477 flags = NTLMSSP_NEGOTIATE_56 |
478 NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_TARGET_INFO | 478 NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_TARGET_INFO |
479 NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | 479 NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
480 NTLMSSP_NEGOTIATE_NTLM; 480 NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC;
481 if (ses->server->secMode & 481 if (ses->server->secMode &
482 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 482 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
483 flags |= NTLMSSP_NEGOTIATE_SIGN; 483 flags |= NTLMSSP_NEGOTIATE_SIGN;
@@ -485,7 +485,7 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
485 flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN; 485 flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN;
486 486
487 tmp = pbuffer + sizeof(AUTHENTICATE_MESSAGE); 487 tmp = pbuffer + sizeof(AUTHENTICATE_MESSAGE);
488 sec_blob->NegotiateFlags |= cpu_to_le32(flags); 488 sec_blob->NegotiateFlags = cpu_to_le32(flags);
489 489
490 sec_blob->LmChallengeResponse.BufferOffset = 490 sec_blob->LmChallengeResponse.BufferOffset =
491 cpu_to_le32(sizeof(AUTHENTICATE_MESSAGE)); 491 cpu_to_le32(sizeof(AUTHENTICATE_MESSAGE));
@@ -544,8 +544,9 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
544 sec_blob->WorkstationName.MaximumLength = 0; 544 sec_blob->WorkstationName.MaximumLength = 0;
545 tmp += 2; 545 tmp += 2;
546 546
547 if ((ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_KEY_XCH) && 547 if (((ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_KEY_XCH) ||
548 !calc_seckey(ses)) { 548 (ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_EXTENDED_SEC))
549 && !calc_seckey(ses)) {
549 memcpy(tmp, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE); 550 memcpy(tmp, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE);
550 sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer); 551 sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
551 sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE); 552 sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE);
@@ -563,17 +564,6 @@ setup_ntlmv2_ret:
563 return rc; 564 return rc;
564} 565}
565 566
566
567static void setup_ntlmssp_neg_req(SESSION_SETUP_ANDX *pSMB,
568 struct cifsSesInfo *ses)
569{
570 build_ntlmssp_negotiate_blob(&pSMB->req.SecurityBlob[0], ses);
571 pSMB->req.SecurityBlobLength = cpu_to_le16(sizeof(NEGOTIATE_MESSAGE));
572
573 return;
574}
575#endif
576
577int 567int
578CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, 568CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses,
579 const struct nls_table *nls_cp) 569 const struct nls_table *nls_cp)
@@ -814,71 +804,70 @@ ssetup_ntlmssp_authenticate:
814 rc = -ENOSYS; 804 rc = -ENOSYS;
815 goto ssetup_exit; 805 goto ssetup_exit;
816#endif /* CONFIG_CIFS_UPCALL */ 806#endif /* CONFIG_CIFS_UPCALL */
817 } else { 807 } else if (type == RawNTLMSSP) {
818#ifdef CONFIG_CIFS_EXPERIMENTAL 808 if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) {
819 if (type == RawNTLMSSP) { 809 cERROR(1, "NTLMSSP requires Unicode support");
820 if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) { 810 rc = -ENOSYS;
821 cERROR(1, "NTLMSSP requires Unicode support"); 811 goto ssetup_exit;
822 rc = -ENOSYS; 812 }
813
814 cFYI(1, "ntlmssp session setup phase %d", phase);
815 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
816 capabilities |= CAP_EXTENDED_SECURITY;
817 pSMB->req.Capabilities |= cpu_to_le32(capabilities);
818 switch(phase) {
819 case NtLmNegotiate:
820 build_ntlmssp_negotiate_blob(
821 pSMB->req.SecurityBlob, ses);
822 iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE);
823 iov[1].iov_base = pSMB->req.SecurityBlob;
824 pSMB->req.SecurityBlobLength =
825 cpu_to_le16(sizeof(NEGOTIATE_MESSAGE));
826 break;
827 case NtLmAuthenticate:
828 /*
829 * 5 is an empirical value, large enough to hold
830 * authenticate message plus max 10 of av paris,
831 * domain, user, workstation names, flags, etc.
832 */
833 ntlmsspblob = kzalloc(
834 5*sizeof(struct _AUTHENTICATE_MESSAGE),
835 GFP_KERNEL);
836 if (!ntlmsspblob) {
837 cERROR(1, "Can't allocate NTLMSSP blob");
838 rc = -ENOMEM;
823 goto ssetup_exit; 839 goto ssetup_exit;
824 } 840 }
825 841
826 cFYI(1, "ntlmssp session setup phase %d", phase); 842 rc = build_ntlmssp_auth_blob(ntlmsspblob,
827 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; 843 &blob_len, ses, nls_cp);
828 capabilities |= CAP_EXTENDED_SECURITY; 844 if (rc)
829 pSMB->req.Capabilities |= cpu_to_le32(capabilities);
830 if (phase == NtLmNegotiate) {
831 setup_ntlmssp_neg_req(pSMB, ses);
832 iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE);
833 iov[1].iov_base = &pSMB->req.SecurityBlob[0];
834 } else if (phase == NtLmAuthenticate) {
835 /* 5 is an empirical value, large enought to
836 * hold authenticate message, max 10 of
837 * av paris, doamin,user,workstation mames,
838 * flags etc..
839 */
840 ntlmsspblob = kmalloc(
841 5*sizeof(struct _AUTHENTICATE_MESSAGE),
842 GFP_KERNEL);
843 if (!ntlmsspblob) {
844 cERROR(1, "Can't allocate NTLMSSP");
845 rc = -ENOMEM;
846 goto ssetup_exit;
847 }
848
849 rc = build_ntlmssp_auth_blob(ntlmsspblob,
850 &blob_len, ses, nls_cp);
851 if (rc)
852 goto ssetup_exit;
853 iov[1].iov_len = blob_len;
854 iov[1].iov_base = ntlmsspblob;
855 pSMB->req.SecurityBlobLength =
856 cpu_to_le16(blob_len);
857 /* Make sure that we tell the server that we
858 are using the uid that it just gave us back
859 on the response (challenge) */
860 smb_buf->Uid = ses->Suid;
861 } else {
862 cERROR(1, "invalid phase %d", phase);
863 rc = -ENOSYS;
864 goto ssetup_exit; 845 goto ssetup_exit;
865 } 846 iov[1].iov_len = blob_len;
866 /* unicode strings must be word aligned */ 847 iov[1].iov_base = ntlmsspblob;
867 if ((iov[0].iov_len + iov[1].iov_len) % 2) { 848 pSMB->req.SecurityBlobLength = cpu_to_le16(blob_len);
868 *bcc_ptr = 0; 849 /*
869 bcc_ptr++; 850 * Make sure that we tell the server that we are using
870 } 851 * the uid that it just gave us back on the response
871 unicode_oslm_strings(&bcc_ptr, nls_cp); 852 * (challenge)
872 } else { 853 */
873 cERROR(1, "secType %d not supported!", type); 854 smb_buf->Uid = ses->Suid;
855 break;
856 default:
857 cERROR(1, "invalid phase %d", phase);
874 rc = -ENOSYS; 858 rc = -ENOSYS;
875 goto ssetup_exit; 859 goto ssetup_exit;
876 } 860 }
877#else 861 /* unicode strings must be word aligned */
862 if ((iov[0].iov_len + iov[1].iov_len) % 2) {
863 *bcc_ptr = 0;
864 bcc_ptr++;
865 }
866 unicode_oslm_strings(&bcc_ptr, nls_cp);
867 } else {
878 cERROR(1, "secType %d not supported!", type); 868 cERROR(1, "secType %d not supported!", type);
879 rc = -ENOSYS; 869 rc = -ENOSYS;
880 goto ssetup_exit; 870 goto ssetup_exit;
881#endif
882 } 871 }
883 872
884 iov[2].iov_base = str_area; 873 iov[2].iov_base = str_area;
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index e0588cdf4cc5..59ca81b16919 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -119,7 +119,7 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
119 if (ssocket == NULL) 119 if (ssocket == NULL)
120 return -ENOTSOCK; /* BB eventually add reconnect code here */ 120 return -ENOTSOCK; /* BB eventually add reconnect code here */
121 121
122 smb_msg.msg_name = (struct sockaddr *) &server->addr.sockAddr; 122 smb_msg.msg_name = (struct sockaddr *) &server->dstaddr;
123 smb_msg.msg_namelen = sizeof(struct sockaddr); 123 smb_msg.msg_namelen = sizeof(struct sockaddr);
124 smb_msg.msg_control = NULL; 124 smb_msg.msg_control = NULL;
125 smb_msg.msg_controllen = 0; 125 smb_msg.msg_controllen = 0;
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 37a34c2c622a..9c64ae9e4c1a 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -63,6 +63,9 @@
63#define NEEDED_RMEM (4*1024*1024) 63#define NEEDED_RMEM (4*1024*1024)
64#define CONN_HASH_SIZE 32 64#define CONN_HASH_SIZE 32
65 65
66/* Number of messages to send before rescheduling */
67#define MAX_SEND_MSG_COUNT 25
68
66struct cbuf { 69struct cbuf {
67 unsigned int base; 70 unsigned int base;
68 unsigned int len; 71 unsigned int len;
@@ -108,6 +111,7 @@ struct connection {
108#define CF_INIT_PENDING 4 111#define CF_INIT_PENDING 4
109#define CF_IS_OTHERCON 5 112#define CF_IS_OTHERCON 5
110#define CF_CLOSE 6 113#define CF_CLOSE 6
114#define CF_APP_LIMITED 7
111 struct list_head writequeue; /* List of outgoing writequeue_entries */ 115 struct list_head writequeue; /* List of outgoing writequeue_entries */
112 spinlock_t writequeue_lock; 116 spinlock_t writequeue_lock;
113 int (*rx_action) (struct connection *); /* What to do when active */ 117 int (*rx_action) (struct connection *); /* What to do when active */
@@ -295,7 +299,17 @@ static void lowcomms_write_space(struct sock *sk)
295{ 299{
296 struct connection *con = sock2con(sk); 300 struct connection *con = sock2con(sk);
297 301
298 if (con && !test_and_set_bit(CF_WRITE_PENDING, &con->flags)) 302 if (!con)
303 return;
304
305 clear_bit(SOCK_NOSPACE, &con->sock->flags);
306
307 if (test_and_clear_bit(CF_APP_LIMITED, &con->flags)) {
308 con->sock->sk->sk_write_pending--;
309 clear_bit(SOCK_ASYNC_NOSPACE, &con->sock->flags);
310 }
311
312 if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags))
299 queue_work(send_workqueue, &con->swork); 313 queue_work(send_workqueue, &con->swork);
300} 314}
301 315
@@ -915,6 +929,7 @@ static void tcp_connect_to_sock(struct connection *con)
915 struct sockaddr_storage saddr, src_addr; 929 struct sockaddr_storage saddr, src_addr;
916 int addr_len; 930 int addr_len;
917 struct socket *sock = NULL; 931 struct socket *sock = NULL;
932 int one = 1;
918 933
919 if (con->nodeid == 0) { 934 if (con->nodeid == 0) {
920 log_print("attempt to connect sock 0 foiled"); 935 log_print("attempt to connect sock 0 foiled");
@@ -960,6 +975,11 @@ static void tcp_connect_to_sock(struct connection *con)
960 make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len); 975 make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len);
961 976
962 log_print("connecting to %d", con->nodeid); 977 log_print("connecting to %d", con->nodeid);
978
979 /* Turn off Nagle's algorithm */
980 kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one,
981 sizeof(one));
982
963 result = 983 result =
964 sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len, 984 sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len,
965 O_NONBLOCK); 985 O_NONBLOCK);
@@ -1011,6 +1031,10 @@ static struct socket *tcp_create_listen_sock(struct connection *con,
1011 goto create_out; 1031 goto create_out;
1012 } 1032 }
1013 1033
1034 /* Turn off Nagle's algorithm */
1035 kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one,
1036 sizeof(one));
1037
1014 result = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, 1038 result = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
1015 (char *)&one, sizeof(one)); 1039 (char *)&one, sizeof(one));
1016 1040
@@ -1297,6 +1321,7 @@ static void send_to_sock(struct connection *con)
1297 const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; 1321 const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
1298 struct writequeue_entry *e; 1322 struct writequeue_entry *e;
1299 int len, offset; 1323 int len, offset;
1324 int count = 0;
1300 1325
1301 mutex_lock(&con->sock_mutex); 1326 mutex_lock(&con->sock_mutex);
1302 if (con->sock == NULL) 1327 if (con->sock == NULL)
@@ -1319,14 +1344,27 @@ static void send_to_sock(struct connection *con)
1319 ret = kernel_sendpage(con->sock, e->page, offset, len, 1344 ret = kernel_sendpage(con->sock, e->page, offset, len,
1320 msg_flags); 1345 msg_flags);
1321 if (ret == -EAGAIN || ret == 0) { 1346 if (ret == -EAGAIN || ret == 0) {
1347 if (ret == -EAGAIN &&
1348 test_bit(SOCK_ASYNC_NOSPACE, &con->sock->flags) &&
1349 !test_and_set_bit(CF_APP_LIMITED, &con->flags)) {
1350 /* Notify TCP that we're limited by the
1351 * application window size.
1352 */
1353 set_bit(SOCK_NOSPACE, &con->sock->flags);
1354 con->sock->sk->sk_write_pending++;
1355 }
1322 cond_resched(); 1356 cond_resched();
1323 goto out; 1357 goto out;
1324 } 1358 }
1325 if (ret <= 0) 1359 if (ret <= 0)
1326 goto send_error; 1360 goto send_error;
1327 } 1361 }
1328 /* Don't starve people filling buffers */ 1362
1363 /* Don't starve people filling buffers */
1364 if (++count >= MAX_SEND_MSG_COUNT) {
1329 cond_resched(); 1365 cond_resched();
1366 count = 0;
1367 }
1330 1368
1331 spin_lock(&con->writequeue_lock); 1369 spin_lock(&con->writequeue_lock);
1332 e->offset += ret; 1370 e->offset += ret;
@@ -1430,20 +1468,19 @@ static void work_stop(void)
1430 1468
1431static int work_start(void) 1469static int work_start(void)
1432{ 1470{
1433 int error; 1471 recv_workqueue = alloc_workqueue("dlm_recv", WQ_MEM_RECLAIM |
1434 recv_workqueue = create_workqueue("dlm_recv"); 1472 WQ_HIGHPRI | WQ_FREEZEABLE, 0);
1435 error = IS_ERR(recv_workqueue); 1473 if (!recv_workqueue) {
1436 if (error) { 1474 log_print("can't start dlm_recv");
1437 log_print("can't start dlm_recv %d", error); 1475 return -ENOMEM;
1438 return error;
1439 } 1476 }
1440 1477
1441 send_workqueue = create_singlethread_workqueue("dlm_send"); 1478 send_workqueue = alloc_workqueue("dlm_send", WQ_MEM_RECLAIM |
1442 error = IS_ERR(send_workqueue); 1479 WQ_HIGHPRI | WQ_FREEZEABLE, 0);
1443 if (error) { 1480 if (!send_workqueue) {
1444 log_print("can't start dlm_send %d", error); 1481 log_print("can't start dlm_send");
1445 destroy_workqueue(recv_workqueue); 1482 destroy_workqueue(recv_workqueue);
1446 return error; 1483 return -ENOMEM;
1447 } 1484 }
1448 1485
1449 return 0; 1486 return 0;
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 2709b34206ab..47cda410b548 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -28,21 +28,30 @@
28 28
29typedef struct ext2_dir_entry_2 ext2_dirent; 29typedef struct ext2_dir_entry_2 ext2_dirent;
30 30
31/*
32 * Tests against MAX_REC_LEN etc were put in place for 64k block
33 * sizes; if that is not possible on this arch, we can skip
34 * those tests and speed things up.
35 */
31static inline unsigned ext2_rec_len_from_disk(__le16 dlen) 36static inline unsigned ext2_rec_len_from_disk(__le16 dlen)
32{ 37{
33 unsigned len = le16_to_cpu(dlen); 38 unsigned len = le16_to_cpu(dlen);
34 39
40#if (PAGE_CACHE_SIZE >= 65536)
35 if (len == EXT2_MAX_REC_LEN) 41 if (len == EXT2_MAX_REC_LEN)
36 return 1 << 16; 42 return 1 << 16;
43#endif
37 return len; 44 return len;
38} 45}
39 46
40static inline __le16 ext2_rec_len_to_disk(unsigned len) 47static inline __le16 ext2_rec_len_to_disk(unsigned len)
41{ 48{
49#if (PAGE_CACHE_SIZE >= 65536)
42 if (len == (1 << 16)) 50 if (len == (1 << 16))
43 return cpu_to_le16(EXT2_MAX_REC_LEN); 51 return cpu_to_le16(EXT2_MAX_REC_LEN);
44 else 52 else
45 BUG_ON(len > (1 << 16)); 53 BUG_ON(len > (1 << 16));
54#endif
46 return cpu_to_le16(len); 55 return cpu_to_le16(len);
47} 56}
48 57
@@ -129,15 +138,15 @@ static void ext2_check_page(struct page *page, int quiet)
129 p = (ext2_dirent *)(kaddr + offs); 138 p = (ext2_dirent *)(kaddr + offs);
130 rec_len = ext2_rec_len_from_disk(p->rec_len); 139 rec_len = ext2_rec_len_from_disk(p->rec_len);
131 140
132 if (rec_len < EXT2_DIR_REC_LEN(1)) 141 if (unlikely(rec_len < EXT2_DIR_REC_LEN(1)))
133 goto Eshort; 142 goto Eshort;
134 if (rec_len & 3) 143 if (unlikely(rec_len & 3))
135 goto Ealign; 144 goto Ealign;
136 if (rec_len < EXT2_DIR_REC_LEN(p->name_len)) 145 if (unlikely(rec_len < EXT2_DIR_REC_LEN(p->name_len)))
137 goto Enamelen; 146 goto Enamelen;
138 if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1)) 147 if (unlikely(((offs + rec_len - 1) ^ offs) & ~(chunk_size-1)))
139 goto Espan; 148 goto Espan;
140 if (le32_to_cpu(p->inode) > max_inumber) 149 if (unlikely(le32_to_cpu(p->inode) > max_inumber))
141 goto Einumber; 150 goto Einumber;
142 } 151 }
143 if (offs != limit) 152 if (offs != limit)
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index f8aecd2e3297..2e1d8341d827 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -67,7 +67,7 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str
67 inode = NULL; 67 inode = NULL;
68 if (ino) { 68 if (ino) {
69 inode = ext2_iget(dir->i_sb, ino); 69 inode = ext2_iget(dir->i_sb, ino);
70 if (unlikely(IS_ERR(inode))) { 70 if (IS_ERR(inode)) {
71 if (PTR_ERR(inode) == -ESTALE) { 71 if (PTR_ERR(inode) == -ESTALE) {
72 ext2_error(dir->i_sb, __func__, 72 ext2_error(dir->i_sb, __func__,
73 "deleted inode referenced: %lu", 73 "deleted inode referenced: %lu",
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index e0c6380ff992..7731695e65d9 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -43,9 +43,10 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data);
43static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf); 43static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf);
44static int ext2_sync_fs(struct super_block *sb, int wait); 44static int ext2_sync_fs(struct super_block *sb, int wait);
45 45
46void ext2_error (struct super_block * sb, const char * function, 46void ext2_error(struct super_block *sb, const char *function,
47 const char * fmt, ...) 47 const char *fmt, ...)
48{ 48{
49 struct va_format vaf;
49 va_list args; 50 va_list args;
50 struct ext2_sb_info *sbi = EXT2_SB(sb); 51 struct ext2_sb_info *sbi = EXT2_SB(sb);
51 struct ext2_super_block *es = sbi->s_es; 52 struct ext2_super_block *es = sbi->s_es;
@@ -59,9 +60,13 @@ void ext2_error (struct super_block * sb, const char * function,
59 } 60 }
60 61
61 va_start(args, fmt); 62 va_start(args, fmt);
62 printk(KERN_CRIT "EXT2-fs (%s): error: %s: ", sb->s_id, function); 63
63 vprintk(fmt, args); 64 vaf.fmt = fmt;
64 printk("\n"); 65 vaf.va = &args;
66
67 printk(KERN_CRIT "EXT2-fs (%s): error: %s: %pV\n",
68 sb->s_id, function, &vaf);
69
65 va_end(args); 70 va_end(args);
66 71
67 if (test_opt(sb, ERRORS_PANIC)) 72 if (test_opt(sb, ERRORS_PANIC))
@@ -76,12 +81,16 @@ void ext2_error (struct super_block * sb, const char * function,
76void ext2_msg(struct super_block *sb, const char *prefix, 81void ext2_msg(struct super_block *sb, const char *prefix,
77 const char *fmt, ...) 82 const char *fmt, ...)
78{ 83{
84 struct va_format vaf;
79 va_list args; 85 va_list args;
80 86
81 va_start(args, fmt); 87 va_start(args, fmt);
82 printk("%sEXT2-fs (%s): ", prefix, sb->s_id); 88
83 vprintk(fmt, args); 89 vaf.fmt = fmt;
84 printk("\n"); 90 vaf.va = &args;
91
92 printk("%sEXT2-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
93
85 va_end(args); 94 va_end(args);
86} 95}
87 96
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index f84700be3274..c2e4dce984d2 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -199,14 +199,6 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get",
199 goto found; 199 goto found;
200 entry = next; 200 entry = next;
201 } 201 }
202 /* Check the remaining name entries */
203 while (!IS_LAST_ENTRY(entry)) {
204 struct ext2_xattr_entry *next =
205 EXT2_XATTR_NEXT(entry);
206 if ((char *)next >= end)
207 goto bad_block;
208 entry = next;
209 }
210 if (ext2_xattr_cache_insert(bh)) 202 if (ext2_xattr_cache_insert(bh))
211 ea_idebug(inode, "cache insert failed"); 203 ea_idebug(inode, "cache insert failed");
212 error = -ENODATA; 204 error = -ENODATA;
@@ -355,7 +347,7 @@ static void ext2_xattr_update_super_block(struct super_block *sb)
355/* 347/*
356 * ext2_xattr_set() 348 * ext2_xattr_set()
357 * 349 *
358 * Create, replace or remove an extended attribute for this inode. Buffer 350 * Create, replace or remove an extended attribute for this inode. Value
359 * is NULL to remove an existing extended attribute, and non-NULL to 351 * is NULL to remove an existing extended attribute, and non-NULL to
360 * either replace an existing extended attribute, or create a new extended 352 * either replace an existing extended attribute, or create a new extended
361 * attribute. The flags XATTR_REPLACE and XATTR_CREATE 353 * attribute. The flags XATTR_REPLACE and XATTR_CREATE
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index b3db22649426..045995c8ce5a 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -20,6 +20,7 @@
20#include <linux/ext3_jbd.h> 20#include <linux/ext3_jbd.h>
21#include <linux/quotaops.h> 21#include <linux/quotaops.h>
22#include <linux/buffer_head.h> 22#include <linux/buffer_head.h>
23#include <linux/blkdev.h>
23 24
24/* 25/*
25 * balloc.c contains the blocks allocation and deallocation routines 26 * balloc.c contains the blocks allocation and deallocation routines
@@ -39,6 +40,21 @@
39 40
40#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) 41#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
41 42
43/*
44 * Calculate the block group number and offset, given a block number
45 */
46static void ext3_get_group_no_and_offset(struct super_block *sb,
47 ext3_fsblk_t blocknr, unsigned long *blockgrpp, ext3_grpblk_t *offsetp)
48{
49 struct ext3_super_block *es = EXT3_SB(sb)->s_es;
50
51 blocknr = blocknr - le32_to_cpu(es->s_first_data_block);
52 if (offsetp)
53 *offsetp = blocknr % EXT3_BLOCKS_PER_GROUP(sb);
54 if (blockgrpp)
55 *blockgrpp = blocknr / EXT3_BLOCKS_PER_GROUP(sb);
56}
57
42/** 58/**
43 * ext3_get_group_desc() -- load group descriptor from disk 59 * ext3_get_group_desc() -- load group descriptor from disk
44 * @sb: super block 60 * @sb: super block
@@ -1885,3 +1901,253 @@ unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
1885 return ext3_bg_num_gdb_meta(sb,group); 1901 return ext3_bg_num_gdb_meta(sb,group);
1886 1902
1887} 1903}
1904
1905/**
1906 * ext3_trim_all_free -- function to trim all free space in alloc. group
1907 * @sb: super block for file system
1908 * @group: allocation group to trim
1909 * @start: first group block to examine
1910 * @max: last group block to examine
1911 * @gdp: allocation group description structure
1912 * @minblocks: minimum extent block count
1913 *
1914 * ext3_trim_all_free walks through group's block bitmap searching for free
1915 * blocks. When the free block is found, it tries to allocate this block and
1916 * consequent free block to get the biggest free extent possible, until it
1917 * reaches any used block. Then issue a TRIM command on this extent and free
1918 * the extent in the block bitmap. This is done until whole group is scanned.
1919 */
1920ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, unsigned int group,
1921 ext3_grpblk_t start, ext3_grpblk_t max,
1922 ext3_grpblk_t minblocks)
1923{
1924 handle_t *handle;
1925 ext3_grpblk_t next, free_blocks, bit, freed, count = 0;
1926 ext3_fsblk_t discard_block;
1927 struct ext3_sb_info *sbi;
1928 struct buffer_head *gdp_bh, *bitmap_bh = NULL;
1929 struct ext3_group_desc *gdp;
1930 int err = 0, ret = 0;
1931
1932 /*
1933 * We will update one block bitmap, and one group descriptor
1934 */
1935 handle = ext3_journal_start_sb(sb, 2);
1936 if (IS_ERR(handle))
1937 return PTR_ERR(handle);
1938
1939 bitmap_bh = read_block_bitmap(sb, group);
1940 if (!bitmap_bh) {
1941 err = -EIO;
1942 goto err_out;
1943 }
1944
1945 BUFFER_TRACE(bitmap_bh, "getting undo access");
1946 err = ext3_journal_get_undo_access(handle, bitmap_bh);
1947 if (err)
1948 goto err_out;
1949
1950 gdp = ext3_get_group_desc(sb, group, &gdp_bh);
1951 if (!gdp) {
1952 err = -EIO;
1953 goto err_out;
1954 }
1955
1956 BUFFER_TRACE(gdp_bh, "get_write_access");
1957 err = ext3_journal_get_write_access(handle, gdp_bh);
1958 if (err)
1959 goto err_out;
1960
1961 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
1962 sbi = EXT3_SB(sb);
1963
1964 /* Walk through the whole group */
1965 while (start < max) {
1966 start = bitmap_search_next_usable_block(start, bitmap_bh, max);
1967 if (start < 0)
1968 break;
1969 next = start;
1970
1971 /*
1972 * Allocate contiguous free extents by setting bits in the
1973 * block bitmap
1974 */
1975 while (next < max
1976 && claim_block(sb_bgl_lock(sbi, group),
1977 next, bitmap_bh)) {
1978 next++;
1979 }
1980
1981 /* We did not claim any blocks */
1982 if (next == start)
1983 continue;
1984
1985 discard_block = (ext3_fsblk_t)start +
1986 ext3_group_first_block_no(sb, group);
1987
1988 /* Update counters */
1989 spin_lock(sb_bgl_lock(sbi, group));
1990 le16_add_cpu(&gdp->bg_free_blocks_count, start - next);
1991 spin_unlock(sb_bgl_lock(sbi, group));
1992 percpu_counter_sub(&sbi->s_freeblocks_counter, next - start);
1993
1994 /* Do not issue a TRIM on extents smaller than minblocks */
1995 if ((next - start) < minblocks)
1996 goto free_extent;
1997
1998 /* Send the TRIM command down to the device */
1999 err = sb_issue_discard(sb, discard_block, next - start,
2000 GFP_NOFS, 0);
2001 count += (next - start);
2002free_extent:
2003 freed = 0;
2004
2005 /*
2006 * Clear bits in the bitmap
2007 */
2008 for (bit = start; bit < next; bit++) {
2009 BUFFER_TRACE(bitmap_bh, "clear bit");
2010 if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, group),
2011 bit, bitmap_bh->b_data)) {
2012 ext3_error(sb, __func__,
2013 "bit already cleared for block "E3FSBLK,
2014 (unsigned long)bit);
2015 BUFFER_TRACE(bitmap_bh, "bit already cleared");
2016 } else {
2017 freed++;
2018 }
2019 }
2020
2021 /* Update couters */
2022 spin_lock(sb_bgl_lock(sbi, group));
2023 le16_add_cpu(&gdp->bg_free_blocks_count, freed);
2024 spin_unlock(sb_bgl_lock(sbi, group));
2025 percpu_counter_add(&sbi->s_freeblocks_counter, freed);
2026
2027 start = next;
2028 if (err < 0) {
2029 if (err != -EOPNOTSUPP)
2030 ext3_warning(sb, __func__, "Discard command "
2031 "returned error %d\n", err);
2032 break;
2033 }
2034
2035 if (fatal_signal_pending(current)) {
2036 err = -ERESTARTSYS;
2037 break;
2038 }
2039
2040 cond_resched();
2041
2042 /* No more suitable extents */
2043 if ((free_blocks - count) < minblocks)
2044 break;
2045 }
2046
2047 /* We dirtied the bitmap block */
2048 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
2049 ret = ext3_journal_dirty_metadata(handle, bitmap_bh);
2050 if (!err)
2051 err = ret;
2052
2053 /* And the group descriptor block */
2054 BUFFER_TRACE(gdp_bh, "dirtied group descriptor block");
2055 ret = ext3_journal_dirty_metadata(handle, gdp_bh);
2056 if (!err)
2057 err = ret;
2058
2059 ext3_debug("trimmed %d blocks in the group %d\n",
2060 count, group);
2061
2062err_out:
2063 if (err)
2064 count = err;
2065 ext3_journal_stop(handle);
2066 brelse(bitmap_bh);
2067
2068 return count;
2069}
2070
2071/**
2072 * ext3_trim_fs() -- trim ioctl handle function
2073 * @sb: superblock for filesystem
2074 * @start: First Byte to trim
2075 * @len: number of Bytes to trim from start
2076 * @minlen: minimum extent length in Bytes
2077 *
2078 * ext3_trim_fs goes through all allocation groups containing Bytes from
2079 * start to start+len. For each such a group ext3_trim_all_free function
2080 * is invoked to trim all free space.
2081 */
2082int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
2083{
2084 ext3_grpblk_t last_block, first_block, free_blocks;
2085 unsigned long first_group, last_group;
2086 unsigned long group, ngroups;
2087 struct ext3_group_desc *gdp;
2088 struct ext3_super_block *es = EXT3_SB(sb)->s_es;
2089 uint64_t start, len, minlen, trimmed;
2090 ext3_fsblk_t max_blks = le32_to_cpu(es->s_blocks_count);
2091 int ret = 0;
2092
2093 start = range->start >> sb->s_blocksize_bits;
2094 len = range->len >> sb->s_blocksize_bits;
2095 minlen = range->minlen >> sb->s_blocksize_bits;
2096 trimmed = 0;
2097
2098 if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb)))
2099 return -EINVAL;
2100 if (start >= max_blks)
2101 goto out;
2102 if (start < le32_to_cpu(es->s_first_data_block)) {
2103 len -= le32_to_cpu(es->s_first_data_block) - start;
2104 start = le32_to_cpu(es->s_first_data_block);
2105 }
2106 if (start + len > max_blks)
2107 len = max_blks - start;
2108
2109 ngroups = EXT3_SB(sb)->s_groups_count;
2110 smp_rmb();
2111
2112 /* Determine first and last group to examine based on start and len */
2113 ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) start,
2114 &first_group, &first_block);
2115 ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) (start + len),
2116 &last_group, &last_block);
2117 last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
2118 last_block = EXT3_BLOCKS_PER_GROUP(sb);
2119
2120 if (first_group > last_group)
2121 return -EINVAL;
2122
2123 for (group = first_group; group <= last_group; group++) {
2124 gdp = ext3_get_group_desc(sb, group, NULL);
2125 if (!gdp)
2126 break;
2127
2128 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
2129 if (free_blocks < minlen)
2130 continue;
2131
2132 if (len >= EXT3_BLOCKS_PER_GROUP(sb))
2133 len -= (EXT3_BLOCKS_PER_GROUP(sb) - first_block);
2134 else
2135 last_block = first_block + len;
2136
2137 ret = ext3_trim_all_free(sb, group, first_block,
2138 last_block, minlen);
2139 if (ret < 0)
2140 break;
2141
2142 trimmed += ret;
2143 first_block = 0;
2144 }
2145
2146 if (ret >= 0)
2147 ret = 0;
2148
2149out:
2150 range->len = trimmed * sb->s_blocksize;
2151
2152 return ret;
2153}
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index e2e72c367cf6..34f0a072b935 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -69,25 +69,26 @@ int ext3_check_dir_entry (const char * function, struct inode * dir,
69 const char * error_msg = NULL; 69 const char * error_msg = NULL;
70 const int rlen = ext3_rec_len_from_disk(de->rec_len); 70 const int rlen = ext3_rec_len_from_disk(de->rec_len);
71 71
72 if (rlen < EXT3_DIR_REC_LEN(1)) 72 if (unlikely(rlen < EXT3_DIR_REC_LEN(1)))
73 error_msg = "rec_len is smaller than minimal"; 73 error_msg = "rec_len is smaller than minimal";
74 else if (rlen % 4 != 0) 74 else if (unlikely(rlen % 4 != 0))
75 error_msg = "rec_len % 4 != 0"; 75 error_msg = "rec_len % 4 != 0";
76 else if (rlen < EXT3_DIR_REC_LEN(de->name_len)) 76 else if (unlikely(rlen < EXT3_DIR_REC_LEN(de->name_len)))
77 error_msg = "rec_len is too small for name_len"; 77 error_msg = "rec_len is too small for name_len";
78 else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize) 78 else if (unlikely((((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)))
79 error_msg = "directory entry across blocks"; 79 error_msg = "directory entry across blocks";
80 else if (le32_to_cpu(de->inode) > 80 else if (unlikely(le32_to_cpu(de->inode) >
81 le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count)) 81 le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count)))
82 error_msg = "inode out of bounds"; 82 error_msg = "inode out of bounds";
83 83
84 if (error_msg != NULL) 84 if (unlikely(error_msg != NULL))
85 ext3_error (dir->i_sb, function, 85 ext3_error (dir->i_sb, function,
86 "bad entry in directory #%lu: %s - " 86 "bad entry in directory #%lu: %s - "
87 "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", 87 "offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
88 dir->i_ino, error_msg, offset, 88 dir->i_ino, error_msg, offset,
89 (unsigned long) le32_to_cpu(de->inode), 89 (unsigned long) le32_to_cpu(de->inode),
90 rlen, de->name_len); 90 rlen, de->name_len);
91
91 return error_msg == NULL ? 1 : 0; 92 return error_msg == NULL ? 1 : 0;
92} 93}
93 94
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index a9580617edd2..ae94f6d949f5 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2145,13 +2145,15 @@ static void ext3_clear_blocks(handle_t *handle, struct inode *inode,
2145 if (try_to_extend_transaction(handle, inode)) { 2145 if (try_to_extend_transaction(handle, inode)) {
2146 if (bh) { 2146 if (bh) {
2147 BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); 2147 BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
2148 ext3_journal_dirty_metadata(handle, bh); 2148 if (ext3_journal_dirty_metadata(handle, bh))
2149 return;
2149 } 2150 }
2150 ext3_mark_inode_dirty(handle, inode); 2151 ext3_mark_inode_dirty(handle, inode);
2151 truncate_restart_transaction(handle, inode); 2152 truncate_restart_transaction(handle, inode);
2152 if (bh) { 2153 if (bh) {
2153 BUFFER_TRACE(bh, "retaking write access"); 2154 BUFFER_TRACE(bh, "retaking write access");
2154 ext3_journal_get_write_access(handle, bh); 2155 if (ext3_journal_get_write_access(handle, bh))
2156 return;
2155 } 2157 }
2156 } 2158 }
2157 2159
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index 88974814783a..fc080dd561f7 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -276,7 +276,29 @@ group_add_out:
276 mnt_drop_write(filp->f_path.mnt); 276 mnt_drop_write(filp->f_path.mnt);
277 return err; 277 return err;
278 } 278 }
279 case FITRIM: {
279 280
281 struct super_block *sb = inode->i_sb;
282 struct fstrim_range range;
283 int ret = 0;
284
285 if (!capable(CAP_SYS_ADMIN))
286 return -EPERM;
287
288 if (copy_from_user(&range, (struct fstrim_range *)arg,
289 sizeof(range)))
290 return -EFAULT;
291
292 ret = ext3_trim_fs(sb, &range);
293 if (ret < 0)
294 return ret;
295
296 if (copy_to_user((struct fstrim_range *)arg, &range,
297 sizeof(range)))
298 return -EFAULT;
299
300 return 0;
301 }
280 302
281 default: 303 default:
282 return -ENOTTY; 304 return -ENOTTY;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index bce9dce639b8..b27ba71810ec 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -858,6 +858,7 @@ static struct buffer_head *ext3_find_entry(struct inode *dir,
858 struct buffer_head * bh_use[NAMEI_RA_SIZE]; 858 struct buffer_head * bh_use[NAMEI_RA_SIZE];
859 struct buffer_head * bh, *ret = NULL; 859 struct buffer_head * bh, *ret = NULL;
860 unsigned long start, block, b; 860 unsigned long start, block, b;
861 const u8 *name = entry->name;
861 int ra_max = 0; /* Number of bh's in the readahead 862 int ra_max = 0; /* Number of bh's in the readahead
862 buffer, bh_use[] */ 863 buffer, bh_use[] */
863 int ra_ptr = 0; /* Current index into readahead 864 int ra_ptr = 0; /* Current index into readahead
@@ -871,6 +872,16 @@ static struct buffer_head *ext3_find_entry(struct inode *dir,
871 namelen = entry->len; 872 namelen = entry->len;
872 if (namelen > EXT3_NAME_LEN) 873 if (namelen > EXT3_NAME_LEN)
873 return NULL; 874 return NULL;
875 if ((namelen <= 2) && (name[0] == '.') &&
876 (name[1] == '.' || name[1] == 0)) {
877 /*
878 * "." or ".." will only be in the first block
879 * NFS may look up ".."; "." should be handled by the VFS
880 */
881 block = start = 0;
882 nblocks = 1;
883 goto restart;
884 }
874 if (is_dx(dir)) { 885 if (is_dx(dir)) {
875 bh = ext3_dx_find_entry(dir, entry, res_dir, &err); 886 bh = ext3_dx_find_entry(dir, entry, res_dir, &err);
876 /* 887 /*
@@ -961,55 +972,35 @@ static struct buffer_head * ext3_dx_find_entry(struct inode *dir,
961 struct qstr *entry, struct ext3_dir_entry_2 **res_dir, 972 struct qstr *entry, struct ext3_dir_entry_2 **res_dir,
962 int *err) 973 int *err)
963{ 974{
964 struct super_block * sb; 975 struct super_block *sb = dir->i_sb;
965 struct dx_hash_info hinfo; 976 struct dx_hash_info hinfo;
966 u32 hash;
967 struct dx_frame frames[2], *frame; 977 struct dx_frame frames[2], *frame;
968 struct ext3_dir_entry_2 *de, *top;
969 struct buffer_head *bh; 978 struct buffer_head *bh;
970 unsigned long block; 979 unsigned long block;
971 int retval; 980 int retval;
972 int namelen = entry->len;
973 const u8 *name = entry->name;
974 981
975 sb = dir->i_sb; 982 if (!(frame = dx_probe(entry, dir, &hinfo, frames, err)))
976 /* NFS may look up ".." - look at dx_root directory block */ 983 return NULL;
977 if (namelen > 2 || name[0] != '.'|| (namelen == 2 && name[1] != '.')) {
978 if (!(frame = dx_probe(entry, dir, &hinfo, frames, err)))
979 return NULL;
980 } else {
981 frame = frames;
982 frame->bh = NULL; /* for dx_release() */
983 frame->at = (struct dx_entry *)frames; /* hack for zero entry*/
984 dx_set_block(frame->at, 0); /* dx_root block is 0 */
985 }
986 hash = hinfo.hash;
987 do { 984 do {
988 block = dx_get_block(frame->at); 985 block = dx_get_block(frame->at);
989 if (!(bh = ext3_bread (NULL,dir, block, 0, err))) 986 if (!(bh = ext3_bread (NULL,dir, block, 0, err)))
990 goto errout; 987 goto errout;
991 de = (struct ext3_dir_entry_2 *) bh->b_data;
992 top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
993 EXT3_DIR_REC_LEN(0));
994 for (; de < top; de = ext3_next_entry(de)) {
995 int off = (block << EXT3_BLOCK_SIZE_BITS(sb))
996 + ((char *) de - bh->b_data);
997
998 if (!ext3_check_dir_entry(__func__, dir, de, bh, off)) {
999 brelse(bh);
1000 *err = ERR_BAD_DX_DIR;
1001 goto errout;
1002 }
1003 988
1004 if (ext3_match(namelen, name, de)) { 989 retval = search_dirblock(bh, dir, entry,
1005 *res_dir = de; 990 block << EXT3_BLOCK_SIZE_BITS(sb),
1006 dx_release(frames); 991 res_dir);
1007 return bh; 992 if (retval == 1) {
1008 } 993 dx_release(frames);
994 return bh;
1009 } 995 }
1010 brelse (bh); 996 brelse(bh);
997 if (retval == -1) {
998 *err = ERR_BAD_DX_DIR;
999 goto errout;
1000 }
1001
1011 /* Check to see if we should continue to search */ 1002 /* Check to see if we should continue to search */
1012 retval = ext3_htree_next_block(dir, hash, frame, 1003 retval = ext3_htree_next_block(dir, hinfo.hash, frame,
1013 frames, NULL); 1004 frames, NULL);
1014 if (retval < 0) { 1005 if (retval < 0) {
1015 ext3_warning(sb, __func__, 1006 ext3_warning(sb, __func__,
@@ -1047,7 +1038,7 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
1047 return ERR_PTR(-EIO); 1038 return ERR_PTR(-EIO);
1048 } 1039 }
1049 inode = ext3_iget(dir->i_sb, ino); 1040 inode = ext3_iget(dir->i_sb, ino);
1050 if (unlikely(IS_ERR(inode))) { 1041 if (IS_ERR(inode)) {
1051 if (PTR_ERR(inode) == -ESTALE) { 1042 if (PTR_ERR(inode) == -ESTALE) {
1052 ext3_error(dir->i_sb, __func__, 1043 ext3_error(dir->i_sb, __func__,
1053 "deleted inode referenced: %lu", 1044 "deleted inode referenced: %lu",
@@ -1607,7 +1598,9 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
1607 if (err) 1598 if (err)
1608 goto journal_error; 1599 goto journal_error;
1609 } 1600 }
1610 ext3_journal_dirty_metadata(handle, frames[0].bh); 1601 err = ext3_journal_dirty_metadata(handle, frames[0].bh);
1602 if (err)
1603 goto journal_error;
1611 } 1604 }
1612 de = do_split(handle, dir, &bh, frame, &hinfo, &err); 1605 de = do_split(handle, dir, &bh, frame, &hinfo, &err);
1613 if (!de) 1606 if (!de)
@@ -1644,8 +1637,13 @@ static int ext3_delete_entry (handle_t *handle,
1644 if (!ext3_check_dir_entry("ext3_delete_entry", dir, de, bh, i)) 1637 if (!ext3_check_dir_entry("ext3_delete_entry", dir, de, bh, i))
1645 return -EIO; 1638 return -EIO;
1646 if (de == de_del) { 1639 if (de == de_del) {
1640 int err;
1641
1647 BUFFER_TRACE(bh, "get_write_access"); 1642 BUFFER_TRACE(bh, "get_write_access");
1648 ext3_journal_get_write_access(handle, bh); 1643 err = ext3_journal_get_write_access(handle, bh);
1644 if (err)
1645 goto journal_error;
1646
1649 if (pde) 1647 if (pde)
1650 pde->rec_len = ext3_rec_len_to_disk( 1648 pde->rec_len = ext3_rec_len_to_disk(
1651 ext3_rec_len_from_disk(pde->rec_len) + 1649 ext3_rec_len_from_disk(pde->rec_len) +
@@ -1654,7 +1652,12 @@ static int ext3_delete_entry (handle_t *handle,
1654 de->inode = 0; 1652 de->inode = 0;
1655 dir->i_version++; 1653 dir->i_version++;
1656 BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); 1654 BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
1657 ext3_journal_dirty_metadata(handle, bh); 1655 err = ext3_journal_dirty_metadata(handle, bh);
1656 if (err) {
1657journal_error:
1658 ext3_std_error(dir->i_sb, err);
1659 return err;
1660 }
1658 return 0; 1661 return 0;
1659 } 1662 }
1660 i += ext3_rec_len_from_disk(de->rec_len); 1663 i += ext3_rec_len_from_disk(de->rec_len);
@@ -1762,7 +1765,7 @@ static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
1762{ 1765{
1763 handle_t *handle; 1766 handle_t *handle;
1764 struct inode * inode; 1767 struct inode * inode;
1765 struct buffer_head * dir_block; 1768 struct buffer_head * dir_block = NULL;
1766 struct ext3_dir_entry_2 * de; 1769 struct ext3_dir_entry_2 * de;
1767 int err, retries = 0; 1770 int err, retries = 0;
1768 1771
@@ -1790,15 +1793,14 @@ retry:
1790 inode->i_fop = &ext3_dir_operations; 1793 inode->i_fop = &ext3_dir_operations;
1791 inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; 1794 inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
1792 dir_block = ext3_bread (handle, inode, 0, 1, &err); 1795 dir_block = ext3_bread (handle, inode, 0, 1, &err);
1793 if (!dir_block) { 1796 if (!dir_block)
1794 drop_nlink(inode); /* is this nlink == 0? */ 1797 goto out_clear_inode;
1795 unlock_new_inode(inode); 1798
1796 ext3_mark_inode_dirty(handle, inode);
1797 iput (inode);
1798 goto out_stop;
1799 }
1800 BUFFER_TRACE(dir_block, "get_write_access"); 1799 BUFFER_TRACE(dir_block, "get_write_access");
1801 ext3_journal_get_write_access(handle, dir_block); 1800 err = ext3_journal_get_write_access(handle, dir_block);
1801 if (err)
1802 goto out_clear_inode;
1803
1802 de = (struct ext3_dir_entry_2 *) dir_block->b_data; 1804 de = (struct ext3_dir_entry_2 *) dir_block->b_data;
1803 de->inode = cpu_to_le32(inode->i_ino); 1805 de->inode = cpu_to_le32(inode->i_ino);
1804 de->name_len = 1; 1806 de->name_len = 1;
@@ -1814,11 +1816,16 @@ retry:
1814 ext3_set_de_type(dir->i_sb, de, S_IFDIR); 1816 ext3_set_de_type(dir->i_sb, de, S_IFDIR);
1815 inode->i_nlink = 2; 1817 inode->i_nlink = 2;
1816 BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata"); 1818 BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata");
1817 ext3_journal_dirty_metadata(handle, dir_block); 1819 err = ext3_journal_dirty_metadata(handle, dir_block);
1818 brelse (dir_block); 1820 if (err)
1819 ext3_mark_inode_dirty(handle, inode); 1821 goto out_clear_inode;
1820 err = ext3_add_entry (handle, dentry, inode); 1822
1823 err = ext3_mark_inode_dirty(handle, inode);
1824 if (!err)
1825 err = ext3_add_entry (handle, dentry, inode);
1826
1821 if (err) { 1827 if (err) {
1828out_clear_inode:
1822 inode->i_nlink = 0; 1829 inode->i_nlink = 0;
1823 unlock_new_inode(inode); 1830 unlock_new_inode(inode);
1824 ext3_mark_inode_dirty(handle, inode); 1831 ext3_mark_inode_dirty(handle, inode);
@@ -1827,10 +1834,14 @@ retry:
1827 } 1834 }
1828 inc_nlink(dir); 1835 inc_nlink(dir);
1829 ext3_update_dx_flag(dir); 1836 ext3_update_dx_flag(dir);
1830 ext3_mark_inode_dirty(handle, dir); 1837 err = ext3_mark_inode_dirty(handle, dir);
1838 if (err)
1839 goto out_clear_inode;
1840
1831 d_instantiate(dentry, inode); 1841 d_instantiate(dentry, inode);
1832 unlock_new_inode(inode); 1842 unlock_new_inode(inode);
1833out_stop: 1843out_stop:
1844 brelse(dir_block);
1834 ext3_journal_stop(handle); 1845 ext3_journal_stop(handle);
1835 if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) 1846 if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
1836 goto retry; 1847 goto retry;
@@ -2353,7 +2364,9 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
2353 goto end_rename; 2364 goto end_rename;
2354 } else { 2365 } else {
2355 BUFFER_TRACE(new_bh, "get write access"); 2366 BUFFER_TRACE(new_bh, "get write access");
2356 ext3_journal_get_write_access(handle, new_bh); 2367 retval = ext3_journal_get_write_access(handle, new_bh);
2368 if (retval)
2369 goto journal_error;
2357 new_de->inode = cpu_to_le32(old_inode->i_ino); 2370 new_de->inode = cpu_to_le32(old_inode->i_ino);
2358 if (EXT3_HAS_INCOMPAT_FEATURE(new_dir->i_sb, 2371 if (EXT3_HAS_INCOMPAT_FEATURE(new_dir->i_sb,
2359 EXT3_FEATURE_INCOMPAT_FILETYPE)) 2372 EXT3_FEATURE_INCOMPAT_FILETYPE))
@@ -2362,7 +2375,9 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
2362 new_dir->i_ctime = new_dir->i_mtime = CURRENT_TIME_SEC; 2375 new_dir->i_ctime = new_dir->i_mtime = CURRENT_TIME_SEC;
2363 ext3_mark_inode_dirty(handle, new_dir); 2376 ext3_mark_inode_dirty(handle, new_dir);
2364 BUFFER_TRACE(new_bh, "call ext3_journal_dirty_metadata"); 2377 BUFFER_TRACE(new_bh, "call ext3_journal_dirty_metadata");
2365 ext3_journal_dirty_metadata(handle, new_bh); 2378 retval = ext3_journal_dirty_metadata(handle, new_bh);
2379 if (retval)
2380 goto journal_error;
2366 brelse(new_bh); 2381 brelse(new_bh);
2367 new_bh = NULL; 2382 new_bh = NULL;
2368 } 2383 }
@@ -2411,10 +2426,17 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
2411 ext3_update_dx_flag(old_dir); 2426 ext3_update_dx_flag(old_dir);
2412 if (dir_bh) { 2427 if (dir_bh) {
2413 BUFFER_TRACE(dir_bh, "get_write_access"); 2428 BUFFER_TRACE(dir_bh, "get_write_access");
2414 ext3_journal_get_write_access(handle, dir_bh); 2429 retval = ext3_journal_get_write_access(handle, dir_bh);
2430 if (retval)
2431 goto journal_error;
2415 PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino); 2432 PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino);
2416 BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata"); 2433 BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata");
2417 ext3_journal_dirty_metadata(handle, dir_bh); 2434 retval = ext3_journal_dirty_metadata(handle, dir_bh);
2435 if (retval) {
2436journal_error:
2437 ext3_std_error(new_dir->i_sb, retval);
2438 goto end_rename;
2439 }
2418 drop_nlink(old_dir); 2440 drop_nlink(old_dir);
2419 if (new_inode) { 2441 if (new_inode) {
2420 drop_nlink(new_inode); 2442 drop_nlink(new_inode);
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index e746d30b1232..108b142e11ed 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -249,7 +249,11 @@ static int setup_new_group_blocks(struct super_block *sb,
249 memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); 249 memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size);
250 set_buffer_uptodate(gdb); 250 set_buffer_uptodate(gdb);
251 unlock_buffer(gdb); 251 unlock_buffer(gdb);
252 ext3_journal_dirty_metadata(handle, gdb); 252 err = ext3_journal_dirty_metadata(handle, gdb);
253 if (err) {
254 brelse(gdb);
255 goto exit_bh;
256 }
253 ext3_set_bit(bit, bh->b_data); 257 ext3_set_bit(bit, bh->b_data);
254 brelse(gdb); 258 brelse(gdb);
255 } 259 }
@@ -269,7 +273,11 @@ static int setup_new_group_blocks(struct super_block *sb,
269 err = PTR_ERR(gdb); 273 err = PTR_ERR(gdb);
270 goto exit_bh; 274 goto exit_bh;
271 } 275 }
272 ext3_journal_dirty_metadata(handle, gdb); 276 err = ext3_journal_dirty_metadata(handle, gdb);
277 if (err) {
278 brelse(gdb);
279 goto exit_bh;
280 }
273 ext3_set_bit(bit, bh->b_data); 281 ext3_set_bit(bit, bh->b_data);
274 brelse(gdb); 282 brelse(gdb);
275 } 283 }
@@ -295,7 +303,11 @@ static int setup_new_group_blocks(struct super_block *sb,
295 err = PTR_ERR(it); 303 err = PTR_ERR(it);
296 goto exit_bh; 304 goto exit_bh;
297 } 305 }
298 ext3_journal_dirty_metadata(handle, it); 306 err = ext3_journal_dirty_metadata(handle, it);
307 if (err) {
308 brelse(it);
309 goto exit_bh;
310 }
299 brelse(it); 311 brelse(it);
300 ext3_set_bit(bit, bh->b_data); 312 ext3_set_bit(bit, bh->b_data);
301 } 313 }
@@ -306,7 +318,9 @@ static int setup_new_group_blocks(struct super_block *sb,
306 318
307 mark_bitmap_end(input->blocks_count, EXT3_BLOCKS_PER_GROUP(sb), 319 mark_bitmap_end(input->blocks_count, EXT3_BLOCKS_PER_GROUP(sb),
308 bh->b_data); 320 bh->b_data);
309 ext3_journal_dirty_metadata(handle, bh); 321 err = ext3_journal_dirty_metadata(handle, bh);
322 if (err)
323 goto exit_bh;
310 brelse(bh); 324 brelse(bh);
311 325
312 /* Mark unused entries in inode bitmap used */ 326 /* Mark unused entries in inode bitmap used */
@@ -319,7 +333,7 @@ static int setup_new_group_blocks(struct super_block *sb,
319 333
320 mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb), 334 mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb),
321 bh->b_data); 335 bh->b_data);
322 ext3_journal_dirty_metadata(handle, bh); 336 err = ext3_journal_dirty_metadata(handle, bh);
323exit_bh: 337exit_bh:
324 brelse(bh); 338 brelse(bh);
325 339
@@ -503,12 +517,19 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
503 * reserved inode, and will become GDT blocks (primary and backup). 517 * reserved inode, and will become GDT blocks (primary and backup).
504 */ 518 */
505 data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)] = 0; 519 data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)] = 0;
506 ext3_journal_dirty_metadata(handle, dind); 520 err = ext3_journal_dirty_metadata(handle, dind);
521 if (err)
522 goto exit_group_desc;
507 brelse(dind); 523 brelse(dind);
524 dind = NULL;
508 inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9; 525 inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
509 ext3_mark_iloc_dirty(handle, inode, &iloc); 526 err = ext3_mark_iloc_dirty(handle, inode, &iloc);
527 if (err)
528 goto exit_group_desc;
510 memset((*primary)->b_data, 0, sb->s_blocksize); 529 memset((*primary)->b_data, 0, sb->s_blocksize);
511 ext3_journal_dirty_metadata(handle, *primary); 530 err = ext3_journal_dirty_metadata(handle, *primary);
531 if (err)
532 goto exit_group_desc;
512 533
513 o_group_desc = EXT3_SB(sb)->s_group_desc; 534 o_group_desc = EXT3_SB(sb)->s_group_desc;
514 memcpy(n_group_desc, o_group_desc, 535 memcpy(n_group_desc, o_group_desc,
@@ -519,10 +540,14 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
519 kfree(o_group_desc); 540 kfree(o_group_desc);
520 541
521 le16_add_cpu(&es->s_reserved_gdt_blocks, -1); 542 le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
522 ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); 543 err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
544 if (err)
545 goto exit_inode;
523 546
524 return 0; 547 return 0;
525 548
549exit_group_desc:
550 kfree(n_group_desc);
526exit_inode: 551exit_inode:
527 //ext3_journal_release_buffer(handle, iloc.bh); 552 //ext3_journal_release_buffer(handle, iloc.bh);
528 brelse(iloc.bh); 553 brelse(iloc.bh);
@@ -706,16 +731,20 @@ static void update_backups(struct super_block *sb,
706 } 731 }
707 ext3_debug("update metadata backup %#04lx\n", 732 ext3_debug("update metadata backup %#04lx\n",
708 (unsigned long)bh->b_blocknr); 733 (unsigned long)bh->b_blocknr);
709 if ((err = ext3_journal_get_write_access(handle, bh))) 734 if ((err = ext3_journal_get_write_access(handle, bh))) {
735 brelse(bh);
710 break; 736 break;
737 }
711 lock_buffer(bh); 738 lock_buffer(bh);
712 memcpy(bh->b_data, data, size); 739 memcpy(bh->b_data, data, size);
713 if (rest) 740 if (rest)
714 memset(bh->b_data + size, 0, rest); 741 memset(bh->b_data + size, 0, rest);
715 set_buffer_uptodate(bh); 742 set_buffer_uptodate(bh);
716 unlock_buffer(bh); 743 unlock_buffer(bh);
717 ext3_journal_dirty_metadata(handle, bh); 744 err = ext3_journal_dirty_metadata(handle, bh);
718 brelse(bh); 745 brelse(bh);
746 if (err)
747 break;
719 } 748 }
720 if ((err2 = ext3_journal_stop(handle)) && !err) 749 if ((err2 = ext3_journal_stop(handle)) && !err)
721 err = err2; 750 err = err2;
@@ -922,7 +951,9 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
922 /* Update the global fs size fields */ 951 /* Update the global fs size fields */
923 sbi->s_groups_count++; 952 sbi->s_groups_count++;
924 953
925 ext3_journal_dirty_metadata(handle, primary); 954 err = ext3_journal_dirty_metadata(handle, primary);
955 if (err)
956 goto exit_journal;
926 957
927 /* Update the reserved block counts only once the new group is 958 /* Update the reserved block counts only once the new group is
928 * active. */ 959 * active. */
@@ -934,7 +965,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
934 percpu_counter_add(&sbi->s_freeinodes_counter, 965 percpu_counter_add(&sbi->s_freeinodes_counter,
935 EXT3_INODES_PER_GROUP(sb)); 966 EXT3_INODES_PER_GROUP(sb));
936 967
937 ext3_journal_dirty_metadata(handle, sbi->s_sbh); 968 err = ext3_journal_dirty_metadata(handle, sbi->s_sbh);
938 969
939exit_journal: 970exit_journal:
940 mutex_unlock(&sbi->s_resize_lock); 971 mutex_unlock(&sbi->s_resize_lock);
@@ -1064,8 +1095,14 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
1064 goto exit_put; 1095 goto exit_put;
1065 } 1096 }
1066 es->s_blocks_count = cpu_to_le32(o_blocks_count + add); 1097 es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
1067 ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); 1098 err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
1068 mutex_unlock(&EXT3_SB(sb)->s_resize_lock); 1099 mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
1100 if (err) {
1101 ext3_warning(sb, __func__,
1102 "error %d on journal dirty metadata", err);
1103 ext3_journal_stop(handle);
1104 goto exit_put;
1105 }
1069 ext3_debug("freeing blocks "E3FSBLK" through "E3FSBLK"\n", 1106 ext3_debug("freeing blocks "E3FSBLK" through "E3FSBLK"\n",
1070 o_blocks_count, o_blocks_count + add); 1107 o_blocks_count, o_blocks_count + add);
1071 ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks); 1108 ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 77ce1616f725..b7d0554631e4 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -143,12 +143,16 @@ void ext3_journal_abort_handle(const char *caller, const char *err_fn,
143void ext3_msg(struct super_block *sb, const char *prefix, 143void ext3_msg(struct super_block *sb, const char *prefix,
144 const char *fmt, ...) 144 const char *fmt, ...)
145{ 145{
146 struct va_format vaf;
146 va_list args; 147 va_list args;
147 148
148 va_start(args, fmt); 149 va_start(args, fmt);
149 printk("%sEXT3-fs (%s): ", prefix, sb->s_id); 150
150 vprintk(fmt, args); 151 vaf.fmt = fmt;
151 printk("\n"); 152 vaf.va = &args;
153
154 printk("%sEXT3-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
155
152 va_end(args); 156 va_end(args);
153} 157}
154 158
@@ -195,15 +199,20 @@ static void ext3_handle_error(struct super_block *sb)
195 sb->s_id); 199 sb->s_id);
196} 200}
197 201
198void ext3_error (struct super_block * sb, const char * function, 202void ext3_error(struct super_block *sb, const char *function,
199 const char * fmt, ...) 203 const char *fmt, ...)
200{ 204{
205 struct va_format vaf;
201 va_list args; 206 va_list args;
202 207
203 va_start(args, fmt); 208 va_start(args, fmt);
204 printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function); 209
205 vprintk(fmt, args); 210 vaf.fmt = fmt;
206 printk("\n"); 211 vaf.va = &args;
212
213 printk(KERN_CRIT "EXT3-fs error (device %s): %s: %pV\n",
214 sb->s_id, function, &vaf);
215
207 va_end(args); 216 va_end(args);
208 217
209 ext3_handle_error(sb); 218 ext3_handle_error(sb);
@@ -274,15 +283,20 @@ void __ext3_std_error (struct super_block * sb, const char * function,
274 * case we take the easy way out and panic immediately. 283 * case we take the easy way out and panic immediately.
275 */ 284 */
276 285
277void ext3_abort (struct super_block * sb, const char * function, 286void ext3_abort(struct super_block *sb, const char *function,
278 const char * fmt, ...) 287 const char *fmt, ...)
279{ 288{
289 struct va_format vaf;
280 va_list args; 290 va_list args;
281 291
282 va_start(args, fmt); 292 va_start(args, fmt);
283 printk(KERN_CRIT "EXT3-fs (%s): error: %s: ", sb->s_id, function); 293
284 vprintk(fmt, args); 294 vaf.fmt = fmt;
285 printk("\n"); 295 vaf.va = &args;
296
297 printk(KERN_CRIT "EXT3-fs (%s): error: %s: %pV\n",
298 sb->s_id, function, &vaf);
299
286 va_end(args); 300 va_end(args);
287 301
288 if (test_opt(sb, ERRORS_PANIC)) 302 if (test_opt(sb, ERRORS_PANIC))
@@ -300,16 +314,20 @@ void ext3_abort (struct super_block * sb, const char * function,
300 journal_abort(EXT3_SB(sb)->s_journal, -EIO); 314 journal_abort(EXT3_SB(sb)->s_journal, -EIO);
301} 315}
302 316
303void ext3_warning (struct super_block * sb, const char * function, 317void ext3_warning(struct super_block *sb, const char *function,
304 const char * fmt, ...) 318 const char *fmt, ...)
305{ 319{
320 struct va_format vaf;
306 va_list args; 321 va_list args;
307 322
308 va_start(args, fmt); 323 va_start(args, fmt);
309 printk(KERN_WARNING "EXT3-fs (%s): warning: %s: ", 324
310 sb->s_id, function); 325 vaf.fmt = fmt;
311 vprintk(fmt, args); 326 vaf.va = &args;
312 printk("\n"); 327
328 printk(KERN_WARNING "EXT3-fs (%s): warning: %s: %pV\n",
329 sb->s_id, function, &vaf);
330
313 va_end(args); 331 va_end(args);
314} 332}
315 333
@@ -1848,13 +1866,15 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1848 goto failed_mount; 1866 goto failed_mount;
1849 } 1867 }
1850 1868
1851 if (generic_check_addressable(sb->s_blocksize_bits, 1869 err = generic_check_addressable(sb->s_blocksize_bits,
1852 le32_to_cpu(es->s_blocks_count))) { 1870 le32_to_cpu(es->s_blocks_count));
1871 if (err) {
1853 ext3_msg(sb, KERN_ERR, 1872 ext3_msg(sb, KERN_ERR,
1854 "error: filesystem is too large to mount safely"); 1873 "error: filesystem is too large to mount safely");
1855 if (sizeof(sector_t) < 8) 1874 if (sizeof(sector_t) < 8)
1856 ext3_msg(sb, KERN_ERR, 1875 ext3_msg(sb, KERN_ERR,
1857 "error: CONFIG_LBDAF not enabled"); 1876 "error: CONFIG_LBDAF not enabled");
1877 ret = err;
1858 goto failed_mount; 1878 goto failed_mount;
1859 } 1879 }
1860 1880
@@ -2297,7 +2317,7 @@ static int ext3_load_journal(struct super_block *sb,
2297 EXT3_SB(sb)->s_journal = journal; 2317 EXT3_SB(sb)->s_journal = journal;
2298 ext3_clear_journal_err(sb, es); 2318 ext3_clear_journal_err(sb, es);
2299 2319
2300 if (journal_devnum && 2320 if (!really_read_only && journal_devnum &&
2301 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 2321 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
2302 es->s_journal_dev = cpu_to_le32(journal_devnum); 2322 es->s_journal_dev = cpu_to_le32(journal_devnum);
2303 2323
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index e69dc6dfaa89..32e6cc23bd9a 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -925,7 +925,7 @@ ext3_xattr_ibody_set(handle_t *handle, struct inode *inode,
925/* 925/*
926 * ext3_xattr_set_handle() 926 * ext3_xattr_set_handle()
927 * 927 *
928 * Create, replace or remove an extended attribute for this inode. Buffer 928 * Create, replace or remove an extended attribute for this inode. Value
929 * is NULL to remove an existing extended attribute, and non-NULL to 929 * is NULL to remove an existing extended attribute, and non-NULL to
930 * either replace an existing extended attribute, or create a new extended 930 * either replace an existing extended attribute, or create a new extended
931 * attribute. The flags XATTR_REPLACE and XATTR_CREATE 931 * attribute. The flags XATTR_REPLACE and XATTR_CREATE
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 14c3af26c671..adf96b822781 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -592,7 +592,8 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
592 * Account for the allocated meta blocks. We will never 592 * Account for the allocated meta blocks. We will never
593 * fail EDQUOT for metdata, but we do account for it. 593 * fail EDQUOT for metdata, but we do account for it.
594 */ 594 */
595 if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) { 595 if (!(*errp) &&
596 ext4_test_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED)) {
596 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 597 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
597 EXT4_I(inode)->i_allocated_meta_blocks += ar.len; 598 EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
598 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 599 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index ece76fb6a40c..164c56092e58 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -60,9 +60,13 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
60 return (ext4_filetype_table[filetype]); 60 return (ext4_filetype_table[filetype]);
61} 61}
62 62
63 63/*
64 * Return 0 if the directory entry is OK, and 1 if there is a problem
65 *
66 * Note: this is the opposite of what ext2 and ext3 historically returned...
67 */
64int __ext4_check_dir_entry(const char *function, unsigned int line, 68int __ext4_check_dir_entry(const char *function, unsigned int line,
65 struct inode *dir, 69 struct inode *dir, struct file *filp,
66 struct ext4_dir_entry_2 *de, 70 struct ext4_dir_entry_2 *de,
67 struct buffer_head *bh, 71 struct buffer_head *bh,
68 unsigned int offset) 72 unsigned int offset)
@@ -71,26 +75,37 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,
71 const int rlen = ext4_rec_len_from_disk(de->rec_len, 75 const int rlen = ext4_rec_len_from_disk(de->rec_len,
72 dir->i_sb->s_blocksize); 76 dir->i_sb->s_blocksize);
73 77
74 if (rlen < EXT4_DIR_REC_LEN(1)) 78 if (unlikely(rlen < EXT4_DIR_REC_LEN(1)))
75 error_msg = "rec_len is smaller than minimal"; 79 error_msg = "rec_len is smaller than minimal";
76 else if (rlen % 4 != 0) 80 else if (unlikely(rlen % 4 != 0))
77 error_msg = "rec_len % 4 != 0"; 81 error_msg = "rec_len % 4 != 0";
78 else if (rlen < EXT4_DIR_REC_LEN(de->name_len)) 82 else if (unlikely(rlen < EXT4_DIR_REC_LEN(de->name_len)))
79 error_msg = "rec_len is too small for name_len"; 83 error_msg = "rec_len is too small for name_len";
80 else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize) 84 else if (unlikely(((char *) de - bh->b_data) + rlen >
85 dir->i_sb->s_blocksize))
81 error_msg = "directory entry across blocks"; 86 error_msg = "directory entry across blocks";
82 else if (le32_to_cpu(de->inode) > 87 else if (unlikely(le32_to_cpu(de->inode) >
83 le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count)) 88 le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count)))
84 error_msg = "inode out of bounds"; 89 error_msg = "inode out of bounds";
90 else
91 return 0;
85 92
86 if (error_msg != NULL) 93 if (filp)
87 ext4_error_inode(dir, function, line, bh->b_blocknr, 94 ext4_error_file(filp, function, line, bh ? bh->b_blocknr : 0,
88 "bad entry in directory: %s - " 95 "bad entry in directory: %s - offset=%u(%u), "
89 "offset=%u(%u), inode=%u, rec_len=%d, name_len=%d", 96 "inode=%u, rec_len=%d, name_len=%d",
90 error_msg, (unsigned) (offset%bh->b_size), offset, 97 error_msg, (unsigned) (offset%bh->b_size),
91 le32_to_cpu(de->inode), 98 offset, le32_to_cpu(de->inode),
92 rlen, de->name_len); 99 rlen, de->name_len);
93 return error_msg == NULL ? 1 : 0; 100 else
101 ext4_error_inode(dir, function, line, bh ? bh->b_blocknr : 0,
102 "bad entry in directory: %s - offset=%u(%u), "
103 "inode=%u, rec_len=%d, name_len=%d",
104 error_msg, (unsigned) (offset%bh->b_size),
105 offset, le32_to_cpu(de->inode),
106 rlen, de->name_len);
107
108 return 1;
94} 109}
95 110
96static int ext4_readdir(struct file *filp, 111static int ext4_readdir(struct file *filp,
@@ -152,8 +167,9 @@ static int ext4_readdir(struct file *filp,
152 */ 167 */
153 if (!bh) { 168 if (!bh) {
154 if (!dir_has_error) { 169 if (!dir_has_error) {
155 EXT4_ERROR_INODE(inode, "directory " 170 EXT4_ERROR_FILE(filp, 0,
156 "contains a hole at offset %Lu", 171 "directory contains a "
172 "hole at offset %llu",
157 (unsigned long long) filp->f_pos); 173 (unsigned long long) filp->f_pos);
158 dir_has_error = 1; 174 dir_has_error = 1;
159 } 175 }
@@ -194,8 +210,8 @@ revalidate:
194 while (!error && filp->f_pos < inode->i_size 210 while (!error && filp->f_pos < inode->i_size
195 && offset < sb->s_blocksize) { 211 && offset < sb->s_blocksize) {
196 de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); 212 de = (struct ext4_dir_entry_2 *) (bh->b_data + offset);
197 if (!ext4_check_dir_entry(inode, de, 213 if (ext4_check_dir_entry(inode, filp, de,
198 bh, offset)) { 214 bh, offset)) {
199 /* 215 /*
200 * On error, skip the f_pos to the next block 216 * On error, skip the f_pos to the next block
201 */ 217 */
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 94ce3d7a1c4b..bab2387fba43 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -62,8 +62,8 @@
62#define EXT4_ERROR_INODE_BLOCK(inode, block, fmt, a...) \ 62#define EXT4_ERROR_INODE_BLOCK(inode, block, fmt, a...) \
63 ext4_error_inode((inode), __func__, __LINE__, (block), (fmt), ## a) 63 ext4_error_inode((inode), __func__, __LINE__, (block), (fmt), ## a)
64 64
65#define EXT4_ERROR_FILE(file, fmt, a...) \ 65#define EXT4_ERROR_FILE(file, block, fmt, a...) \
66 ext4_error_file(__func__, __LINE__, (file), (fmt), ## a) 66 ext4_error_file((file), __func__, __LINE__, (block), (fmt), ## a)
67 67
68/* data type for block offset of block group */ 68/* data type for block offset of block group */
69typedef int ext4_grpblk_t; 69typedef int ext4_grpblk_t;
@@ -561,22 +561,6 @@ struct ext4_new_group_data {
561#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION 561#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION
562#endif 562#endif
563 563
564
565/*
566 * Mount options
567 */
568struct ext4_mount_options {
569 unsigned long s_mount_opt;
570 uid_t s_resuid;
571 gid_t s_resgid;
572 unsigned long s_commit_interval;
573 u32 s_min_batch_time, s_max_batch_time;
574#ifdef CONFIG_QUOTA
575 int s_jquota_fmt;
576 char *s_qf_names[MAXQUOTAS];
577#endif
578};
579
580/* Max physical block we can addres w/o extents */ 564/* Max physical block we can addres w/o extents */
581#define EXT4_MAX_BLOCK_FILE_PHYS 0xFFFFFFFF 565#define EXT4_MAX_BLOCK_FILE_PHYS 0xFFFFFFFF
582 566
@@ -709,6 +693,8 @@ do { \
709 if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ 693 if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
710 ext4_decode_extra_time(&(inode)->xtime, \ 694 ext4_decode_extra_time(&(inode)->xtime, \
711 raw_inode->xtime ## _extra); \ 695 raw_inode->xtime ## _extra); \
696 else \
697 (inode)->xtime.tv_nsec = 0; \
712} while (0) 698} while (0)
713 699
714#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \ 700#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \
@@ -719,6 +705,8 @@ do { \
719 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ 705 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
720 ext4_decode_extra_time(&(einode)->xtime, \ 706 ext4_decode_extra_time(&(einode)->xtime, \
721 raw_inode->xtime ## _extra); \ 707 raw_inode->xtime ## _extra); \
708 else \
709 (einode)->xtime.tv_nsec = 0; \
722} while (0) 710} while (0)
723 711
724#define i_disk_version osd1.linux1.l_i_version 712#define i_disk_version osd1.linux1.l_i_version
@@ -750,12 +738,13 @@ do { \
750 738
751/* 739/*
752 * storage for cached extent 740 * storage for cached extent
741 * If ec_len == 0, then the cache is invalid.
742 * If ec_start == 0, then the cache represents a gap (null mapping)
753 */ 743 */
754struct ext4_ext_cache { 744struct ext4_ext_cache {
755 ext4_fsblk_t ec_start; 745 ext4_fsblk_t ec_start;
756 ext4_lblk_t ec_block; 746 ext4_lblk_t ec_block;
757 __u32 ec_len; /* must be 32bit to return holes */ 747 __u32 ec_len; /* must be 32bit to return holes */
758 __u32 ec_type;
759}; 748};
760 749
761/* 750/*
@@ -774,10 +763,12 @@ struct ext4_inode_info {
774 * near to their parent directory's inode. 763 * near to their parent directory's inode.
775 */ 764 */
776 ext4_group_t i_block_group; 765 ext4_group_t i_block_group;
766 ext4_lblk_t i_dir_start_lookup;
767#if (BITS_PER_LONG < 64)
777 unsigned long i_state_flags; /* Dynamic state flags */ 768 unsigned long i_state_flags; /* Dynamic state flags */
769#endif
778 unsigned long i_flags; 770 unsigned long i_flags;
779 771
780 ext4_lblk_t i_dir_start_lookup;
781#ifdef CONFIG_EXT4_FS_XATTR 772#ifdef CONFIG_EXT4_FS_XATTR
782 /* 773 /*
783 * Extended attributes can be read independently of the main file 774 * Extended attributes can be read independently of the main file
@@ -820,7 +811,7 @@ struct ext4_inode_info {
820 */ 811 */
821 struct rw_semaphore i_data_sem; 812 struct rw_semaphore i_data_sem;
822 struct inode vfs_inode; 813 struct inode vfs_inode;
823 struct jbd2_inode jinode; 814 struct jbd2_inode *jinode;
824 815
825 struct ext4_ext_cache i_cached_extent; 816 struct ext4_ext_cache i_cached_extent;
826 /* 817 /*
@@ -840,14 +831,12 @@ struct ext4_inode_info {
840 unsigned int i_reserved_data_blocks; 831 unsigned int i_reserved_data_blocks;
841 unsigned int i_reserved_meta_blocks; 832 unsigned int i_reserved_meta_blocks;
842 unsigned int i_allocated_meta_blocks; 833 unsigned int i_allocated_meta_blocks;
843 unsigned short i_delalloc_reserved_flag; 834 ext4_lblk_t i_da_metadata_calc_last_lblock;
844 sector_t i_da_metadata_calc_last_lblock;
845 int i_da_metadata_calc_len; 835 int i_da_metadata_calc_len;
846 836
847 /* on-disk additional length */ 837 /* on-disk additional length */
848 __u16 i_extra_isize; 838 __u16 i_extra_isize;
849 839
850 spinlock_t i_block_reservation_lock;
851#ifdef CONFIG_QUOTA 840#ifdef CONFIG_QUOTA
852 /* quota space reservation, managed internally by quota code */ 841 /* quota space reservation, managed internally by quota code */
853 qsize_t i_reserved_quota; 842 qsize_t i_reserved_quota;
@@ -856,9 +845,11 @@ struct ext4_inode_info {
856 /* completed IOs that might need unwritten extents handling */ 845 /* completed IOs that might need unwritten extents handling */
857 struct list_head i_completed_io_list; 846 struct list_head i_completed_io_list;
858 spinlock_t i_completed_io_lock; 847 spinlock_t i_completed_io_lock;
848 atomic_t i_ioend_count; /* Number of outstanding io_end structs */
859 /* current io_end structure for async DIO write*/ 849 /* current io_end structure for async DIO write*/
860 ext4_io_end_t *cur_aio_dio; 850 ext4_io_end_t *cur_aio_dio;
861 atomic_t i_ioend_count; /* Number of outstanding io_end structs */ 851
852 spinlock_t i_block_reservation_lock;
862 853
863 /* 854 /*
864 * Transactions that contain inode's metadata needed to complete 855 * Transactions that contain inode's metadata needed to complete
@@ -917,11 +908,20 @@ struct ext4_inode_info {
917#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ 908#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */
918#define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */ 909#define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */
919 910
920#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt 911#define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \
921#define set_opt(o, opt) o |= EXT4_MOUNT_##opt 912 ~EXT4_MOUNT_##opt
913#define set_opt(sb, opt) EXT4_SB(sb)->s_mount_opt |= \
914 EXT4_MOUNT_##opt
922#define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \ 915#define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \
923 EXT4_MOUNT_##opt) 916 EXT4_MOUNT_##opt)
924 917
918#define clear_opt2(sb, opt) EXT4_SB(sb)->s_mount_opt2 &= \
919 ~EXT4_MOUNT2_##opt
920#define set_opt2(sb, opt) EXT4_SB(sb)->s_mount_opt2 |= \
921 EXT4_MOUNT2_##opt
922#define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \
923 EXT4_MOUNT2_##opt)
924
925#define ext4_set_bit ext2_set_bit 925#define ext4_set_bit ext2_set_bit
926#define ext4_set_bit_atomic ext2_set_bit_atomic 926#define ext4_set_bit_atomic ext2_set_bit_atomic
927#define ext4_clear_bit ext2_clear_bit 927#define ext4_clear_bit ext2_clear_bit
@@ -1087,6 +1087,7 @@ struct ext4_sb_info {
1087 struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ 1087 struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
1088 struct buffer_head **s_group_desc; 1088 struct buffer_head **s_group_desc;
1089 unsigned int s_mount_opt; 1089 unsigned int s_mount_opt;
1090 unsigned int s_mount_opt2;
1090 unsigned int s_mount_flags; 1091 unsigned int s_mount_flags;
1091 ext4_fsblk_t s_sb_block; 1092 ext4_fsblk_t s_sb_block;
1092 uid_t s_resuid; 1093 uid_t s_resuid;
@@ -1237,24 +1238,39 @@ enum {
1237 EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */ 1238 EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */
1238 EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ 1239 EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/
1239 EXT4_STATE_NEWENTRY, /* File just added to dir */ 1240 EXT4_STATE_NEWENTRY, /* File just added to dir */
1241 EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */
1240}; 1242};
1241 1243
1242#define EXT4_INODE_BIT_FNS(name, field) \ 1244#define EXT4_INODE_BIT_FNS(name, field, offset) \
1243static inline int ext4_test_inode_##name(struct inode *inode, int bit) \ 1245static inline int ext4_test_inode_##name(struct inode *inode, int bit) \
1244{ \ 1246{ \
1245 return test_bit(bit, &EXT4_I(inode)->i_##field); \ 1247 return test_bit(bit + (offset), &EXT4_I(inode)->i_##field); \
1246} \ 1248} \
1247static inline void ext4_set_inode_##name(struct inode *inode, int bit) \ 1249static inline void ext4_set_inode_##name(struct inode *inode, int bit) \
1248{ \ 1250{ \
1249 set_bit(bit, &EXT4_I(inode)->i_##field); \ 1251 set_bit(bit + (offset), &EXT4_I(inode)->i_##field); \
1250} \ 1252} \
1251static inline void ext4_clear_inode_##name(struct inode *inode, int bit) \ 1253static inline void ext4_clear_inode_##name(struct inode *inode, int bit) \
1252{ \ 1254{ \
1253 clear_bit(bit, &EXT4_I(inode)->i_##field); \ 1255 clear_bit(bit + (offset), &EXT4_I(inode)->i_##field); \
1254} 1256}
1255 1257
1256EXT4_INODE_BIT_FNS(flag, flags) 1258EXT4_INODE_BIT_FNS(flag, flags, 0)
1257EXT4_INODE_BIT_FNS(state, state_flags) 1259#if (BITS_PER_LONG < 64)
1260EXT4_INODE_BIT_FNS(state, state_flags, 0)
1261
1262static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
1263{
1264 (ei)->i_state_flags = 0;
1265}
1266#else
1267EXT4_INODE_BIT_FNS(state, flags, 32)
1268
1269static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
1270{
1271 /* We depend on the fact that callers will set i_flags */
1272}
1273#endif
1258#else 1274#else
1259/* Assume that user mode programs are passing in an ext4fs superblock, not 1275/* Assume that user mode programs are passing in an ext4fs superblock, not
1260 * a kernel struct super_block. This will allow us to call the feature-test 1276 * a kernel struct super_block. This will allow us to call the feature-test
@@ -1642,10 +1658,12 @@ extern unsigned ext4_init_block_bitmap(struct super_block *sb,
1642 1658
1643/* dir.c */ 1659/* dir.c */
1644extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *, 1660extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *,
1661 struct file *,
1645 struct ext4_dir_entry_2 *, 1662 struct ext4_dir_entry_2 *,
1646 struct buffer_head *, unsigned int); 1663 struct buffer_head *, unsigned int);
1647#define ext4_check_dir_entry(dir, de, bh, offset) \ 1664#define ext4_check_dir_entry(dir, filp, de, bh, offset) \
1648 __ext4_check_dir_entry(__func__, __LINE__, (dir), (de), (bh), (offset)) 1665 unlikely(__ext4_check_dir_entry(__func__, __LINE__, (dir), (filp), \
1666 (de), (bh), (offset)))
1649extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, 1667extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
1650 __u32 minor_hash, 1668 __u32 minor_hash,
1651 struct ext4_dir_entry_2 *dirent); 1669 struct ext4_dir_entry_2 *dirent);
@@ -1653,6 +1671,7 @@ extern void ext4_htree_free_dir_info(struct dir_private_info *p);
1653 1671
1654/* fsync.c */ 1672/* fsync.c */
1655extern int ext4_sync_file(struct file *, int); 1673extern int ext4_sync_file(struct file *, int);
1674extern int ext4_flush_completed_IO(struct inode *);
1656 1675
1657/* hash.c */ 1676/* hash.c */
1658extern int ext4fs_dirhash(const char *name, int len, struct 1677extern int ext4fs_dirhash(const char *name, int len, struct
@@ -1752,8 +1771,8 @@ extern void ext4_error_inode(struct inode *, const char *, unsigned int,
1752 ext4_fsblk_t, const char *, ...) 1771 ext4_fsblk_t, const char *, ...)
1753 __attribute__ ((format (printf, 5, 6))); 1772 __attribute__ ((format (printf, 5, 6)));
1754extern void ext4_error_file(struct file *, const char *, unsigned int, 1773extern void ext4_error_file(struct file *, const char *, unsigned int,
1755 const char *, ...) 1774 ext4_fsblk_t, const char *, ...)
1756 __attribute__ ((format (printf, 4, 5))); 1775 __attribute__ ((format (printf, 5, 6)));
1757extern void __ext4_std_error(struct super_block *, const char *, 1776extern void __ext4_std_error(struct super_block *, const char *,
1758 unsigned int, int); 1777 unsigned int, int);
1759extern void __ext4_abort(struct super_block *, const char *, unsigned int, 1778extern void __ext4_abort(struct super_block *, const char *, unsigned int,
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 28ce70fd9cd0..2e29abb30f76 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -119,10 +119,6 @@ struct ext4_ext_path {
119 * structure for external API 119 * structure for external API
120 */ 120 */
121 121
122#define EXT4_EXT_CACHE_NO 0
123#define EXT4_EXT_CACHE_GAP 1
124#define EXT4_EXT_CACHE_EXTENT 2
125
126/* 122/*
127 * to be called by ext4_ext_walk_space() 123 * to be called by ext4_ext_walk_space()
128 * negative retcode - error 124 * negative retcode - error
@@ -197,7 +193,7 @@ static inline unsigned short ext_depth(struct inode *inode)
197static inline void 193static inline void
198ext4_ext_invalidate_cache(struct inode *inode) 194ext4_ext_invalidate_cache(struct inode *inode)
199{ 195{
200 EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO; 196 EXT4_I(inode)->i_cached_extent.ec_len = 0;
201} 197}
202 198
203static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext) 199static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext)
@@ -278,7 +274,7 @@ static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix,
278} 274}
279 275
280extern int ext4_ext_calc_metadata_amount(struct inode *inode, 276extern int ext4_ext_calc_metadata_amount(struct inode *inode,
281 sector_t lblocks); 277 ext4_lblk_t lblocks);
282extern int ext4_extent_tree_init(handle_t *, struct inode *); 278extern int ext4_extent_tree_init(handle_t *, struct inode *);
283extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, 279extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
284 int num, 280 int num,
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index b0bd792c58c5..d8b992e658c1 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -253,7 +253,7 @@ static inline int ext4_journal_force_commit(journal_t *journal)
253static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode) 253static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
254{ 254{
255 if (ext4_handle_valid(handle)) 255 if (ext4_handle_valid(handle))
256 return jbd2_journal_file_inode(handle, &EXT4_I(inode)->jinode); 256 return jbd2_journal_file_inode(handle, EXT4_I(inode)->jinode);
257 return 0; 257 return 0;
258} 258}
259 259
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 0554c48cb1fd..e910720e8bb8 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -117,11 +117,33 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
117 struct ext4_extent *ex; 117 struct ext4_extent *ex;
118 depth = path->p_depth; 118 depth = path->p_depth;
119 119
120 /* try to predict block placement */ 120 /*
121 * Try to predict block placement assuming that we are
122 * filling in a file which will eventually be
123 * non-sparse --- i.e., in the case of libbfd writing
124 * an ELF object sections out-of-order but in a way
125 * the eventually results in a contiguous object or
126 * executable file, or some database extending a table
127 * space file. However, this is actually somewhat
128 * non-ideal if we are writing a sparse file such as
129 * qemu or KVM writing a raw image file that is going
130 * to stay fairly sparse, since it will end up
131 * fragmenting the file system's free space. Maybe we
132 * should have some hueristics or some way to allow
133 * userspace to pass a hint to file system,
134 * especiially if the latter case turns out to be
135 * common.
136 */
121 ex = path[depth].p_ext; 137 ex = path[depth].p_ext;
122 if (ex) 138 if (ex) {
123 return (ext4_ext_pblock(ex) + 139 ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
124 (block - le32_to_cpu(ex->ee_block))); 140 ext4_lblk_t ext_block = le32_to_cpu(ex->ee_block);
141
142 if (block > ext_block)
143 return ext_pblk + (block - ext_block);
144 else
145 return ext_pblk - (ext_block - block);
146 }
125 147
126 /* it looks like index is empty; 148 /* it looks like index is empty;
127 * try to find starting block from index itself */ 149 * try to find starting block from index itself */
@@ -244,7 +266,7 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
244 * to allocate @blocks 266 * to allocate @blocks
245 * Worse case is one block per extent 267 * Worse case is one block per extent
246 */ 268 */
247int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock) 269int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
248{ 270{
249 struct ext4_inode_info *ei = EXT4_I(inode); 271 struct ext4_inode_info *ei = EXT4_I(inode);
250 int idxs, num = 0; 272 int idxs, num = 0;
@@ -1872,12 +1894,10 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
1872 cbex.ec_block = start; 1894 cbex.ec_block = start;
1873 cbex.ec_len = end - start; 1895 cbex.ec_len = end - start;
1874 cbex.ec_start = 0; 1896 cbex.ec_start = 0;
1875 cbex.ec_type = EXT4_EXT_CACHE_GAP;
1876 } else { 1897 } else {
1877 cbex.ec_block = le32_to_cpu(ex->ee_block); 1898 cbex.ec_block = le32_to_cpu(ex->ee_block);
1878 cbex.ec_len = ext4_ext_get_actual_len(ex); 1899 cbex.ec_len = ext4_ext_get_actual_len(ex);
1879 cbex.ec_start = ext4_ext_pblock(ex); 1900 cbex.ec_start = ext4_ext_pblock(ex);
1880 cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
1881 } 1901 }
1882 1902
1883 if (unlikely(cbex.ec_len == 0)) { 1903 if (unlikely(cbex.ec_len == 0)) {
@@ -1917,13 +1937,12 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
1917 1937
1918static void 1938static void
1919ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block, 1939ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block,
1920 __u32 len, ext4_fsblk_t start, int type) 1940 __u32 len, ext4_fsblk_t start)
1921{ 1941{
1922 struct ext4_ext_cache *cex; 1942 struct ext4_ext_cache *cex;
1923 BUG_ON(len == 0); 1943 BUG_ON(len == 0);
1924 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1944 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1925 cex = &EXT4_I(inode)->i_cached_extent; 1945 cex = &EXT4_I(inode)->i_cached_extent;
1926 cex->ec_type = type;
1927 cex->ec_block = block; 1946 cex->ec_block = block;
1928 cex->ec_len = len; 1947 cex->ec_len = len;
1929 cex->ec_start = start; 1948 cex->ec_start = start;
@@ -1976,15 +1995,18 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
1976 } 1995 }
1977 1996
1978 ext_debug(" -> %u:%lu\n", lblock, len); 1997 ext_debug(" -> %u:%lu\n", lblock, len);
1979 ext4_ext_put_in_cache(inode, lblock, len, 0, EXT4_EXT_CACHE_GAP); 1998 ext4_ext_put_in_cache(inode, lblock, len, 0);
1980} 1999}
1981 2000
2001/*
2002 * Return 0 if cache is invalid; 1 if the cache is valid
2003 */
1982static int 2004static int
1983ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, 2005ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
1984 struct ext4_extent *ex) 2006 struct ext4_extent *ex)
1985{ 2007{
1986 struct ext4_ext_cache *cex; 2008 struct ext4_ext_cache *cex;
1987 int ret = EXT4_EXT_CACHE_NO; 2009 int ret = 0;
1988 2010
1989 /* 2011 /*
1990 * We borrow i_block_reservation_lock to protect i_cached_extent 2012 * We borrow i_block_reservation_lock to protect i_cached_extent
@@ -1993,11 +2015,9 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
1993 cex = &EXT4_I(inode)->i_cached_extent; 2015 cex = &EXT4_I(inode)->i_cached_extent;
1994 2016
1995 /* has cache valid data? */ 2017 /* has cache valid data? */
1996 if (cex->ec_type == EXT4_EXT_CACHE_NO) 2018 if (cex->ec_len == 0)
1997 goto errout; 2019 goto errout;
1998 2020
1999 BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP &&
2000 cex->ec_type != EXT4_EXT_CACHE_EXTENT);
2001 if (in_range(block, cex->ec_block, cex->ec_len)) { 2021 if (in_range(block, cex->ec_block, cex->ec_len)) {
2002 ex->ee_block = cpu_to_le32(cex->ec_block); 2022 ex->ee_block = cpu_to_le32(cex->ec_block);
2003 ext4_ext_store_pblock(ex, cex->ec_start); 2023 ext4_ext_store_pblock(ex, cex->ec_start);
@@ -2005,7 +2025,7 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
2005 ext_debug("%u cached by %u:%u:%llu\n", 2025 ext_debug("%u cached by %u:%u:%llu\n",
2006 block, 2026 block,
2007 cex->ec_block, cex->ec_len, cex->ec_start); 2027 cex->ec_block, cex->ec_len, cex->ec_start);
2008 ret = cex->ec_type; 2028 ret = 1;
2009 } 2029 }
2010errout: 2030errout:
2011 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 2031 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
@@ -3082,7 +3102,7 @@ static void unmap_underlying_metadata_blocks(struct block_device *bdev,
3082 * Handle EOFBLOCKS_FL flag, clearing it if necessary 3102 * Handle EOFBLOCKS_FL flag, clearing it if necessary
3083 */ 3103 */
3084static int check_eofblocks_fl(handle_t *handle, struct inode *inode, 3104static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
3085 struct ext4_map_blocks *map, 3105 ext4_lblk_t lblk,
3086 struct ext4_ext_path *path, 3106 struct ext4_ext_path *path,
3087 unsigned int len) 3107 unsigned int len)
3088{ 3108{
@@ -3112,7 +3132,7 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
3112 * this turns out to be false, we can bail out from this 3132 * this turns out to be false, we can bail out from this
3113 * function immediately. 3133 * function immediately.
3114 */ 3134 */
3115 if (map->m_lblk + len < le32_to_cpu(last_ex->ee_block) + 3135 if (lblk + len < le32_to_cpu(last_ex->ee_block) +
3116 ext4_ext_get_actual_len(last_ex)) 3136 ext4_ext_get_actual_len(last_ex))
3117 return 0; 3137 return 0;
3118 /* 3138 /*
@@ -3168,8 +3188,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3168 path); 3188 path);
3169 if (ret >= 0) { 3189 if (ret >= 0) {
3170 ext4_update_inode_fsync_trans(handle, inode, 1); 3190 ext4_update_inode_fsync_trans(handle, inode, 1);
3171 err = check_eofblocks_fl(handle, inode, map, path, 3191 err = check_eofblocks_fl(handle, inode, map->m_lblk,
3172 map->m_len); 3192 path, map->m_len);
3173 } else 3193 } else
3174 err = ret; 3194 err = ret;
3175 goto out2; 3195 goto out2;
@@ -3199,7 +3219,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3199 ret = ext4_ext_convert_to_initialized(handle, inode, map, path); 3219 ret = ext4_ext_convert_to_initialized(handle, inode, map, path);
3200 if (ret >= 0) { 3220 if (ret >= 0) {
3201 ext4_update_inode_fsync_trans(handle, inode, 1); 3221 ext4_update_inode_fsync_trans(handle, inode, 1);
3202 err = check_eofblocks_fl(handle, inode, map, path, map->m_len); 3222 err = check_eofblocks_fl(handle, inode, map->m_lblk, path,
3223 map->m_len);
3203 if (err < 0) 3224 if (err < 0)
3204 goto out2; 3225 goto out2;
3205 } 3226 }
@@ -3276,7 +3297,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3276 struct ext4_extent_header *eh; 3297 struct ext4_extent_header *eh;
3277 struct ext4_extent newex, *ex; 3298 struct ext4_extent newex, *ex;
3278 ext4_fsblk_t newblock; 3299 ext4_fsblk_t newblock;
3279 int err = 0, depth, ret, cache_type; 3300 int err = 0, depth, ret;
3280 unsigned int allocated = 0; 3301 unsigned int allocated = 0;
3281 struct ext4_allocation_request ar; 3302 struct ext4_allocation_request ar;
3282 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3303 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
@@ -3285,9 +3306,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3285 map->m_lblk, map->m_len, inode->i_ino); 3306 map->m_lblk, map->m_len, inode->i_ino);
3286 3307
3287 /* check in cache */ 3308 /* check in cache */
3288 cache_type = ext4_ext_in_cache(inode, map->m_lblk, &newex); 3309 if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
3289 if (cache_type) { 3310 if (!newex.ee_start_lo && !newex.ee_start_hi) {
3290 if (cache_type == EXT4_EXT_CACHE_GAP) {
3291 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { 3311 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
3292 /* 3312 /*
3293 * block isn't allocated yet and 3313 * block isn't allocated yet and
@@ -3296,7 +3316,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3296 goto out2; 3316 goto out2;
3297 } 3317 }
3298 /* we should allocate requested block */ 3318 /* we should allocate requested block */
3299 } else if (cache_type == EXT4_EXT_CACHE_EXTENT) { 3319 } else {
3300 /* block is already allocated */ 3320 /* block is already allocated */
3301 newblock = map->m_lblk 3321 newblock = map->m_lblk
3302 - le32_to_cpu(newex.ee_block) 3322 - le32_to_cpu(newex.ee_block)
@@ -3305,8 +3325,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3305 allocated = ext4_ext_get_actual_len(&newex) - 3325 allocated = ext4_ext_get_actual_len(&newex) -
3306 (map->m_lblk - le32_to_cpu(newex.ee_block)); 3326 (map->m_lblk - le32_to_cpu(newex.ee_block));
3307 goto out; 3327 goto out;
3308 } else {
3309 BUG();
3310 } 3328 }
3311 } 3329 }
3312 3330
@@ -3357,8 +3375,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3357 /* Do not put uninitialized extent in the cache */ 3375 /* Do not put uninitialized extent in the cache */
3358 if (!ext4_ext_is_uninitialized(ex)) { 3376 if (!ext4_ext_is_uninitialized(ex)) {
3359 ext4_ext_put_in_cache(inode, ee_block, 3377 ext4_ext_put_in_cache(inode, ee_block,
3360 ee_len, ee_start, 3378 ee_len, ee_start);
3361 EXT4_EXT_CACHE_EXTENT);
3362 goto out; 3379 goto out;
3363 } 3380 }
3364 ret = ext4_ext_handle_uninitialized_extents(handle, 3381 ret = ext4_ext_handle_uninitialized_extents(handle,
@@ -3456,7 +3473,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3456 map->m_flags |= EXT4_MAP_UNINIT; 3473 map->m_flags |= EXT4_MAP_UNINIT;
3457 } 3474 }
3458 3475
3459 err = check_eofblocks_fl(handle, inode, map, path, ar.len); 3476 err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len);
3460 if (err) 3477 if (err)
3461 goto out2; 3478 goto out2;
3462 3479
@@ -3490,8 +3507,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3490 * when it is _not_ an uninitialized extent. 3507 * when it is _not_ an uninitialized extent.
3491 */ 3508 */
3492 if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) { 3509 if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
3493 ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock, 3510 ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock);
3494 EXT4_EXT_CACHE_EXTENT);
3495 ext4_update_inode_fsync_trans(handle, inode, 1); 3511 ext4_update_inode_fsync_trans(handle, inode, 1);
3496 } else 3512 } else
3497 ext4_update_inode_fsync_trans(handle, inode, 0); 3513 ext4_update_inode_fsync_trans(handle, inode, 0);
@@ -3519,6 +3535,12 @@ void ext4_ext_truncate(struct inode *inode)
3519 int err = 0; 3535 int err = 0;
3520 3536
3521 /* 3537 /*
3538 * finish any pending end_io work so we won't run the risk of
3539 * converting any truncated blocks to initialized later
3540 */
3541 ext4_flush_completed_IO(inode);
3542
3543 /*
3522 * probably first extent we're gonna free will be last in block 3544 * probably first extent we're gonna free will be last in block
3523 */ 3545 */
3524 err = ext4_writepage_trans_blocks(inode); 3546 err = ext4_writepage_trans_blocks(inode);
@@ -3767,7 +3789,7 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
3767 3789
3768 logical = (__u64)newex->ec_block << blksize_bits; 3790 logical = (__u64)newex->ec_block << blksize_bits;
3769 3791
3770 if (newex->ec_type == EXT4_EXT_CACHE_GAP) { 3792 if (newex->ec_start == 0) {
3771 pgoff_t offset; 3793 pgoff_t offset;
3772 struct page *page; 3794 struct page *page;
3773 struct buffer_head *bh = NULL; 3795 struct buffer_head *bh = NULL;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 5a5c55ddceef..bb003dc9ffff 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -104,6 +104,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
104{ 104{
105 struct super_block *sb = inode->i_sb; 105 struct super_block *sb = inode->i_sb;
106 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 106 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
107 struct ext4_inode_info *ei = EXT4_I(inode);
107 struct vfsmount *mnt = filp->f_path.mnt; 108 struct vfsmount *mnt = filp->f_path.mnt;
108 struct path path; 109 struct path path;
109 char buf[64], *cp; 110 char buf[64], *cp;
@@ -127,6 +128,27 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
127 ext4_mark_super_dirty(sb); 128 ext4_mark_super_dirty(sb);
128 } 129 }
129 } 130 }
131 /*
132 * Set up the jbd2_inode if we are opening the inode for
133 * writing and the journal is present
134 */
135 if (sbi->s_journal && !ei->jinode && (filp->f_mode & FMODE_WRITE)) {
136 struct jbd2_inode *jinode = jbd2_alloc_inode(GFP_KERNEL);
137
138 spin_lock(&inode->i_lock);
139 if (!ei->jinode) {
140 if (!jinode) {
141 spin_unlock(&inode->i_lock);
142 return -ENOMEM;
143 }
144 ei->jinode = jinode;
145 jbd2_journal_init_jbd_inode(ei->jinode, inode);
146 jinode = NULL;
147 }
148 spin_unlock(&inode->i_lock);
149 if (unlikely(jinode != NULL))
150 jbd2_free_inode(jinode);
151 }
130 return dquot_file_open(inode, filp); 152 return dquot_file_open(inode, filp);
131} 153}
132 154
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index c1a7bc923cf6..7829b287822a 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -75,7 +75,7 @@ static void dump_completed_IO(struct inode * inode)
75 * to written. 75 * to written.
76 * The function return the number of pending IOs on success. 76 * The function return the number of pending IOs on success.
77 */ 77 */
78static int flush_completed_IO(struct inode *inode) 78extern int ext4_flush_completed_IO(struct inode *inode)
79{ 79{
80 ext4_io_end_t *io; 80 ext4_io_end_t *io;
81 struct ext4_inode_info *ei = EXT4_I(inode); 81 struct ext4_inode_info *ei = EXT4_I(inode);
@@ -169,7 +169,7 @@ int ext4_sync_file(struct file *file, int datasync)
169 if (inode->i_sb->s_flags & MS_RDONLY) 169 if (inode->i_sb->s_flags & MS_RDONLY)
170 return 0; 170 return 0;
171 171
172 ret = flush_completed_IO(inode); 172 ret = ext4_flush_completed_IO(inode);
173 if (ret < 0) 173 if (ret < 0)
174 return ret; 174 return ret;
175 175
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 1ce240a23ebb..eb9097aec6f0 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1027,7 +1027,7 @@ got:
1027 inode->i_generation = sbi->s_next_generation++; 1027 inode->i_generation = sbi->s_next_generation++;
1028 spin_unlock(&sbi->s_next_gen_lock); 1028 spin_unlock(&sbi->s_next_gen_lock);
1029 1029
1030 ei->i_state_flags = 0; 1030 ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
1031 ext4_set_inode_state(inode, EXT4_STATE_NEW); 1031 ext4_set_inode_state(inode, EXT4_STATE_NEW);
1032 1032
1033 ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; 1033 ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index e659597b690b..e80fc513eacc 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -40,6 +40,7 @@
40#include <linux/workqueue.h> 40#include <linux/workqueue.h>
41#include <linux/kernel.h> 41#include <linux/kernel.h>
42#include <linux/slab.h> 42#include <linux/slab.h>
43#include <linux/ratelimit.h>
43 44
44#include "ext4_jbd2.h" 45#include "ext4_jbd2.h"
45#include "xattr.h" 46#include "xattr.h"
@@ -54,10 +55,17 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode,
54 loff_t new_size) 55 loff_t new_size)
55{ 56{
56 trace_ext4_begin_ordered_truncate(inode, new_size); 57 trace_ext4_begin_ordered_truncate(inode, new_size);
57 return jbd2_journal_begin_ordered_truncate( 58 /*
58 EXT4_SB(inode->i_sb)->s_journal, 59 * If jinode is zero, then we never opened the file for
59 &EXT4_I(inode)->jinode, 60 * writing, so there's no need to call
60 new_size); 61 * jbd2_journal_begin_ordered_truncate() since there's no
62 * outstanding writes we need to flush.
63 */
64 if (!EXT4_I(inode)->jinode)
65 return 0;
66 return jbd2_journal_begin_ordered_truncate(EXT4_JOURNAL(inode),
67 EXT4_I(inode)->jinode,
68 new_size);
61} 69}
62 70
63static void ext4_invalidatepage(struct page *page, unsigned long offset); 71static void ext4_invalidatepage(struct page *page, unsigned long offset);
@@ -552,7 +560,7 @@ static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
552} 560}
553 561
554/** 562/**
555 * ext4_blks_to_allocate: Look up the block map and count the number 563 * ext4_blks_to_allocate - Look up the block map and count the number
556 * of direct blocks need to be allocated for the given branch. 564 * of direct blocks need to be allocated for the given branch.
557 * 565 *
558 * @branch: chain of indirect blocks 566 * @branch: chain of indirect blocks
@@ -591,13 +599,19 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
591 599
592/** 600/**
593 * ext4_alloc_blocks: multiple allocate blocks needed for a branch 601 * ext4_alloc_blocks: multiple allocate blocks needed for a branch
602 * @handle: handle for this transaction
603 * @inode: inode which needs allocated blocks
604 * @iblock: the logical block to start allocated at
605 * @goal: preferred physical block of allocation
594 * @indirect_blks: the number of blocks need to allocate for indirect 606 * @indirect_blks: the number of blocks need to allocate for indirect
595 * blocks 607 * blocks
596 * 608 * @blks: number of desired blocks
597 * @new_blocks: on return it will store the new block numbers for 609 * @new_blocks: on return it will store the new block numbers for
598 * the indirect blocks(if needed) and the first direct block, 610 * the indirect blocks(if needed) and the first direct block,
599 * @blks: on return it will store the total number of allocated 611 * @err: on return it will store the error code
600 * direct blocks 612 *
613 * This function will return the number of blocks allocated as
614 * requested by the passed-in parameters.
601 */ 615 */
602static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, 616static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
603 ext4_lblk_t iblock, ext4_fsblk_t goal, 617 ext4_lblk_t iblock, ext4_fsblk_t goal,
@@ -711,9 +725,11 @@ failed_out:
711 725
712/** 726/**
713 * ext4_alloc_branch - allocate and set up a chain of blocks. 727 * ext4_alloc_branch - allocate and set up a chain of blocks.
728 * @handle: handle for this transaction
714 * @inode: owner 729 * @inode: owner
715 * @indirect_blks: number of allocated indirect blocks 730 * @indirect_blks: number of allocated indirect blocks
716 * @blks: number of allocated direct blocks 731 * @blks: number of allocated direct blocks
732 * @goal: preferred place for allocation
717 * @offsets: offsets (in the blocks) to store the pointers to next. 733 * @offsets: offsets (in the blocks) to store the pointers to next.
718 * @branch: place to store the chain in. 734 * @branch: place to store the chain in.
719 * 735 *
@@ -826,6 +842,7 @@ failed:
826 842
827/** 843/**
828 * ext4_splice_branch - splice the allocated branch onto inode. 844 * ext4_splice_branch - splice the allocated branch onto inode.
845 * @handle: handle for this transaction
829 * @inode: owner 846 * @inode: owner
830 * @block: (logical) number of block we are adding 847 * @block: (logical) number of block we are adding
831 * @chain: chain of indirect blocks (with a missing link - see 848 * @chain: chain of indirect blocks (with a missing link - see
@@ -1081,7 +1098,7 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode,
1081 * Calculate the number of metadata blocks need to reserve 1098 * Calculate the number of metadata blocks need to reserve
1082 * to allocate a block located at @lblock 1099 * to allocate a block located at @lblock
1083 */ 1100 */
1084static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock) 1101static int ext4_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
1085{ 1102{
1086 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 1103 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
1087 return ext4_ext_calc_metadata_amount(inode, lblock); 1104 return ext4_ext_calc_metadata_amount(inode, lblock);
@@ -1320,7 +1337,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
1320 * avoid double accounting 1337 * avoid double accounting
1321 */ 1338 */
1322 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 1339 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
1323 EXT4_I(inode)->i_delalloc_reserved_flag = 1; 1340 ext4_set_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
1324 /* 1341 /*
1325 * We need to check for EXT4 here because migrate 1342 * We need to check for EXT4 here because migrate
1326 * could have changed the inode type in between 1343 * could have changed the inode type in between
@@ -1350,7 +1367,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
1350 ext4_da_update_reserve_space(inode, retval, 1); 1367 ext4_da_update_reserve_space(inode, retval, 1);
1351 } 1368 }
1352 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 1369 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
1353 EXT4_I(inode)->i_delalloc_reserved_flag = 0; 1370 ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
1354 1371
1355 up_write((&EXT4_I(inode)->i_data_sem)); 1372 up_write((&EXT4_I(inode)->i_data_sem));
1356 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 1373 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
@@ -1878,7 +1895,7 @@ static int ext4_journalled_write_end(struct file *file,
1878/* 1895/*
1879 * Reserve a single block located at lblock 1896 * Reserve a single block located at lblock
1880 */ 1897 */
1881static int ext4_da_reserve_space(struct inode *inode, sector_t lblock) 1898static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
1882{ 1899{
1883 int retries = 0; 1900 int retries = 0;
1884 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1901 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -2239,7 +2256,7 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
2239 * affects functions in many different parts of the allocation 2256 * affects functions in many different parts of the allocation
2240 * call path. This flag exists primarily because we don't 2257 * call path. This flag exists primarily because we don't
2241 * want to change *many* call functions, so ext4_map_blocks() 2258 * want to change *many* call functions, so ext4_map_blocks()
2242 * will set the magic i_delalloc_reserved_flag once the 2259 * will set the EXT4_STATE_DELALLOC_RESERVED flag once the
2243 * inode's allocation semaphore is taken. 2260 * inode's allocation semaphore is taken.
2244 * 2261 *
2245 * If the blocks in questions were delalloc blocks, set 2262 * If the blocks in questions were delalloc blocks, set
@@ -3720,8 +3737,7 @@ static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode)
3720retry: 3737retry:
3721 io_end = ext4_init_io_end(inode, GFP_ATOMIC); 3738 io_end = ext4_init_io_end(inode, GFP_ATOMIC);
3722 if (!io_end) { 3739 if (!io_end) {
3723 if (printk_ratelimit()) 3740 pr_warning_ratelimited("%s: allocation fail\n", __func__);
3724 printk(KERN_WARNING "%s: allocation fail\n", __func__);
3725 schedule(); 3741 schedule();
3726 goto retry; 3742 goto retry;
3727 } 3743 }
@@ -4045,7 +4061,7 @@ int ext4_block_truncate_page(handle_t *handle,
4045 if (ext4_should_journal_data(inode)) { 4061 if (ext4_should_journal_data(inode)) {
4046 err = ext4_handle_dirty_metadata(handle, inode, bh); 4062 err = ext4_handle_dirty_metadata(handle, inode, bh);
4047 } else { 4063 } else {
4048 if (ext4_should_order_data(inode)) 4064 if (ext4_should_order_data(inode) && EXT4_I(inode)->jinode)
4049 err = ext4_jbd2_file_inode(handle, inode); 4065 err = ext4_jbd2_file_inode(handle, inode);
4050 mark_buffer_dirty(bh); 4066 mark_buffer_dirty(bh);
4051 } 4067 }
@@ -4169,6 +4185,7 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
4169{ 4185{
4170 __le32 *p; 4186 __le32 *p;
4171 int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED; 4187 int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED;
4188 int err;
4172 4189
4173 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) 4190 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
4174 flags |= EXT4_FREE_BLOCKS_METADATA; 4191 flags |= EXT4_FREE_BLOCKS_METADATA;
@@ -4184,11 +4201,23 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
4184 if (try_to_extend_transaction(handle, inode)) { 4201 if (try_to_extend_transaction(handle, inode)) {
4185 if (bh) { 4202 if (bh) {
4186 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 4203 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
4187 ext4_handle_dirty_metadata(handle, inode, bh); 4204 err = ext4_handle_dirty_metadata(handle, inode, bh);
4205 if (unlikely(err)) {
4206 ext4_std_error(inode->i_sb, err);
4207 return 1;
4208 }
4209 }
4210 err = ext4_mark_inode_dirty(handle, inode);
4211 if (unlikely(err)) {
4212 ext4_std_error(inode->i_sb, err);
4213 return 1;
4214 }
4215 err = ext4_truncate_restart_trans(handle, inode,
4216 blocks_for_truncate(inode));
4217 if (unlikely(err)) {
4218 ext4_std_error(inode->i_sb, err);
4219 return 1;
4188 } 4220 }
4189 ext4_mark_inode_dirty(handle, inode);
4190 ext4_truncate_restart_trans(handle, inode,
4191 blocks_for_truncate(inode));
4192 if (bh) { 4221 if (bh) {
4193 BUFFER_TRACE(bh, "retaking write access"); 4222 BUFFER_TRACE(bh, "retaking write access");
4194 ext4_journal_get_write_access(handle, bh); 4223 ext4_journal_get_write_access(handle, bh);
@@ -4349,6 +4378,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
4349 (__le32 *) bh->b_data, 4378 (__le32 *) bh->b_data,
4350 (__le32 *) bh->b_data + addr_per_block, 4379 (__le32 *) bh->b_data + addr_per_block,
4351 depth); 4380 depth);
4381 brelse(bh);
4352 4382
4353 /* 4383 /*
4354 * Everything below this this pointer has been 4384 * Everything below this this pointer has been
@@ -4859,7 +4889,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4859 } 4889 }
4860 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); 4890 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
4861 4891
4862 ei->i_state_flags = 0; 4892 ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
4863 ei->i_dir_start_lookup = 0; 4893 ei->i_dir_start_lookup = 0;
4864 ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); 4894 ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
4865 /* We now have enough fields to check if the inode was active or not. 4895 /* We now have enough fields to check if the inode was active or not.
@@ -5118,7 +5148,7 @@ static int ext4_do_update_inode(handle_t *handle,
5118 if (ext4_inode_blocks_set(handle, raw_inode, ei)) 5148 if (ext4_inode_blocks_set(handle, raw_inode, ei))
5119 goto out_brelse; 5149 goto out_brelse;
5120 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); 5150 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
5121 raw_inode->i_flags = cpu_to_le32(ei->i_flags); 5151 raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF);
5122 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != 5152 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
5123 cpu_to_le32(EXT4_OS_HURD)) 5153 cpu_to_le32(EXT4_OS_HURD))
5124 raw_inode->i_file_acl_high = 5154 raw_inode->i_file_acl_high =
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 5b4d4e3a4d58..851f49b2f9d2 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2608,18 +2608,12 @@ int ext4_mb_release(struct super_block *sb)
2608static inline int ext4_issue_discard(struct super_block *sb, 2608static inline int ext4_issue_discard(struct super_block *sb,
2609 ext4_group_t block_group, ext4_grpblk_t block, int count) 2609 ext4_group_t block_group, ext4_grpblk_t block, int count)
2610{ 2610{
2611 int ret;
2612 ext4_fsblk_t discard_block; 2611 ext4_fsblk_t discard_block;
2613 2612
2614 discard_block = block + ext4_group_first_block_no(sb, block_group); 2613 discard_block = block + ext4_group_first_block_no(sb, block_group);
2615 trace_ext4_discard_blocks(sb, 2614 trace_ext4_discard_blocks(sb,
2616 (unsigned long long) discard_block, count); 2615 (unsigned long long) discard_block, count);
2617 ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); 2616 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
2618 if (ret == -EOPNOTSUPP) {
2619 ext4_warning(sb, "discard not supported, disabling");
2620 clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
2621 }
2622 return ret;
2623} 2617}
2624 2618
2625/* 2619/*
@@ -2631,7 +2625,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2631 struct super_block *sb = journal->j_private; 2625 struct super_block *sb = journal->j_private;
2632 struct ext4_buddy e4b; 2626 struct ext4_buddy e4b;
2633 struct ext4_group_info *db; 2627 struct ext4_group_info *db;
2634 int err, count = 0, count2 = 0; 2628 int err, ret, count = 0, count2 = 0;
2635 struct ext4_free_data *entry; 2629 struct ext4_free_data *entry;
2636 struct list_head *l, *ltmp; 2630 struct list_head *l, *ltmp;
2637 2631
@@ -2641,9 +2635,15 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2641 mb_debug(1, "gonna free %u blocks in group %u (0x%p):", 2635 mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
2642 entry->count, entry->group, entry); 2636 entry->count, entry->group, entry);
2643 2637
2644 if (test_opt(sb, DISCARD)) 2638 if (test_opt(sb, DISCARD)) {
2645 ext4_issue_discard(sb, entry->group, 2639 ret = ext4_issue_discard(sb, entry->group,
2646 entry->start_blk, entry->count); 2640 entry->start_blk, entry->count);
2641 if (unlikely(ret == -EOPNOTSUPP)) {
2642 ext4_warning(sb, "discard not supported, "
2643 "disabling");
2644 clear_opt(sb, DISCARD);
2645 }
2646 }
2647 2647
2648 err = ext4_mb_load_buddy(sb, entry->group, &e4b); 2648 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
2649 /* we expect to find existing buddy because it's pinned */ 2649 /* we expect to find existing buddy because it's pinned */
@@ -3881,19 +3881,6 @@ repeat:
3881 } 3881 }
3882} 3882}
3883 3883
3884/*
3885 * finds all preallocated spaces and return blocks being freed to them
3886 * if preallocated space becomes full (no block is used from the space)
3887 * then the function frees space in buddy
3888 * XXX: at the moment, truncate (which is the only way to free blocks)
3889 * discards all preallocations
3890 */
3891static void ext4_mb_return_to_preallocation(struct inode *inode,
3892 struct ext4_buddy *e4b,
3893 sector_t block, int count)
3894{
3895 BUG_ON(!list_empty(&EXT4_I(inode)->i_prealloc_list));
3896}
3897#ifdef CONFIG_EXT4_DEBUG 3884#ifdef CONFIG_EXT4_DEBUG
3898static void ext4_mb_show_ac(struct ext4_allocation_context *ac) 3885static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
3899{ 3886{
@@ -4283,7 +4270,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4283 * EDQUOT check, as blocks and quotas have been already 4270 * EDQUOT check, as blocks and quotas have been already
4284 * reserved when data being copied into pagecache. 4271 * reserved when data being copied into pagecache.
4285 */ 4272 */
4286 if (EXT4_I(ar->inode)->i_delalloc_reserved_flag) 4273 if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED))
4287 ar->flags |= EXT4_MB_DELALLOC_RESERVED; 4274 ar->flags |= EXT4_MB_DELALLOC_RESERVED;
4288 else { 4275 else {
4289 /* Without delayed allocation we need to verify 4276 /* Without delayed allocation we need to verify
@@ -4380,7 +4367,8 @@ out:
4380 if (inquota && ar->len < inquota) 4367 if (inquota && ar->len < inquota)
4381 dquot_free_block(ar->inode, inquota - ar->len); 4368 dquot_free_block(ar->inode, inquota - ar->len);
4382 if (!ar->len) { 4369 if (!ar->len) {
4383 if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) 4370 if (!ext4_test_inode_state(ar->inode,
4371 EXT4_STATE_DELALLOC_RESERVED))
4384 /* release all the reserved blocks if non delalloc */ 4372 /* release all the reserved blocks if non delalloc */
4385 percpu_counter_sub(&sbi->s_dirtyblocks_counter, 4373 percpu_counter_sub(&sbi->s_dirtyblocks_counter,
4386 reserv_blks); 4374 reserv_blks);
@@ -4626,7 +4614,11 @@ do_more:
4626 * blocks being freed are metadata. these blocks shouldn't 4614 * blocks being freed are metadata. these blocks shouldn't
4627 * be used until this transaction is committed 4615 * be used until this transaction is committed
4628 */ 4616 */
4629 new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS); 4617 new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
4618 if (!new_entry) {
4619 err = -ENOMEM;
4620 goto error_return;
4621 }
4630 new_entry->start_blk = bit; 4622 new_entry->start_blk = bit;
4631 new_entry->group = block_group; 4623 new_entry->group = block_group;
4632 new_entry->count = count; 4624 new_entry->count = count;
@@ -4643,7 +4635,6 @@ do_more:
4643 ext4_lock_group(sb, block_group); 4635 ext4_lock_group(sb, block_group);
4644 mb_clear_bits(bitmap_bh->b_data, bit, count); 4636 mb_clear_bits(bitmap_bh->b_data, bit, count);
4645 mb_free_blocks(inode, &e4b, bit, count); 4637 mb_free_blocks(inode, &e4b, bit, count);
4646 ext4_mb_return_to_preallocation(inode, &e4b, block, count);
4647 } 4638 }
4648 4639
4649 ret = ext4_free_blks_count(sb, gdp) + count; 4640 ret = ext4_free_blks_count(sb, gdp) + count;
@@ -4718,8 +4709,6 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count,
4718 ext4_unlock_group(sb, group); 4709 ext4_unlock_group(sb, group);
4719 4710
4720 ret = ext4_issue_discard(sb, group, start, count); 4711 ret = ext4_issue_discard(sb, group, start, count);
4721 if (ret)
4722 ext4_std_error(sb, ret);
4723 4712
4724 ext4_lock_group(sb, group); 4713 ext4_lock_group(sb, group);
4725 mb_free_blocks(NULL, e4b, start, ex.fe_len); 4714 mb_free_blocks(NULL, e4b, start, ex.fe_len);
@@ -4819,6 +4808,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4819 ext4_group_t group, ngroups = ext4_get_groups_count(sb); 4808 ext4_group_t group, ngroups = ext4_get_groups_count(sb);
4820 ext4_grpblk_t cnt = 0, first_block, last_block; 4809 ext4_grpblk_t cnt = 0, first_block, last_block;
4821 uint64_t start, len, minlen, trimmed; 4810 uint64_t start, len, minlen, trimmed;
4811 ext4_fsblk_t first_data_blk =
4812 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
4822 int ret = 0; 4813 int ret = 0;
4823 4814
4824 start = range->start >> sb->s_blocksize_bits; 4815 start = range->start >> sb->s_blocksize_bits;
@@ -4828,6 +4819,10 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4828 4819
4829 if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb))) 4820 if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb)))
4830 return -EINVAL; 4821 return -EINVAL;
4822 if (start < first_data_blk) {
4823 len -= first_data_blk - start;
4824 start = first_data_blk;
4825 }
4831 4826
4832 /* Determine first and last group to examine based on start and len */ 4827 /* Determine first and last group to examine based on start and len */
4833 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start, 4828 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
@@ -4851,7 +4846,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4851 if (len >= EXT4_BLOCKS_PER_GROUP(sb)) 4846 if (len >= EXT4_BLOCKS_PER_GROUP(sb))
4852 len -= (EXT4_BLOCKS_PER_GROUP(sb) - first_block); 4847 len -= (EXT4_BLOCKS_PER_GROUP(sb) - first_block);
4853 else 4848 else
4854 last_block = len; 4849 last_block = first_block + len;
4855 4850
4856 if (e4b.bd_info->bb_free >= minlen) { 4851 if (e4b.bd_info->bb_free >= minlen) {
4857 cnt = ext4_trim_all_free(sb, &e4b, first_block, 4852 cnt = ext4_trim_all_free(sb, &e4b, first_block,
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 25f3a974b725..b0a126f23c20 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -496,7 +496,7 @@ int ext4_ext_migrate(struct inode *inode)
496 goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) * 496 goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
497 EXT4_INODES_PER_GROUP(inode->i_sb)) + 1; 497 EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
498 tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode, 498 tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
499 S_IFREG, 0, goal); 499 S_IFREG, NULL, goal);
500 if (IS_ERR(tmp_inode)) { 500 if (IS_ERR(tmp_inode)) {
501 retval = -ENOMEM; 501 retval = -ENOMEM;
502 ext4_journal_stop(handle); 502 ext4_journal_stop(handle);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index dc40e75cba88..5485390d32c5 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -581,9 +581,9 @@ static int htree_dirblock_to_tree(struct file *dir_file,
581 dir->i_sb->s_blocksize - 581 dir->i_sb->s_blocksize -
582 EXT4_DIR_REC_LEN(0)); 582 EXT4_DIR_REC_LEN(0));
583 for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) { 583 for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
584 if (!ext4_check_dir_entry(dir, de, bh, 584 if (ext4_check_dir_entry(dir, NULL, de, bh,
585 (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb)) 585 (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
586 +((char *)de - bh->b_data))) { 586 + ((char *)de - bh->b_data))) {
587 /* On error, skip the f_pos to the next block. */ 587 /* On error, skip the f_pos to the next block. */
588 dir_file->f_pos = (dir_file->f_pos | 588 dir_file->f_pos = (dir_file->f_pos |
589 (dir->i_sb->s_blocksize - 1)) + 1; 589 (dir->i_sb->s_blocksize - 1)) + 1;
@@ -820,7 +820,7 @@ static inline int search_dirblock(struct buffer_head *bh,
820 if ((char *) de + namelen <= dlimit && 820 if ((char *) de + namelen <= dlimit &&
821 ext4_match (namelen, name, de)) { 821 ext4_match (namelen, name, de)) {
822 /* found a match - just to be sure, do a full check */ 822 /* found a match - just to be sure, do a full check */
823 if (!ext4_check_dir_entry(dir, de, bh, offset)) 823 if (ext4_check_dir_entry(dir, NULL, de, bh, offset))
824 return -1; 824 return -1;
825 *res_dir = de; 825 *res_dir = de;
826 return 1; 826 return 1;
@@ -1036,7 +1036,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
1036 return ERR_PTR(-EIO); 1036 return ERR_PTR(-EIO);
1037 } 1037 }
1038 inode = ext4_iget(dir->i_sb, ino); 1038 inode = ext4_iget(dir->i_sb, ino);
1039 if (unlikely(IS_ERR(inode))) { 1039 if (IS_ERR(inode)) {
1040 if (PTR_ERR(inode) == -ESTALE) { 1040 if (PTR_ERR(inode) == -ESTALE) {
1041 EXT4_ERROR_INODE(dir, 1041 EXT4_ERROR_INODE(dir,
1042 "deleted inode referenced: %u", 1042 "deleted inode referenced: %u",
@@ -1269,7 +1269,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1269 de = (struct ext4_dir_entry_2 *)bh->b_data; 1269 de = (struct ext4_dir_entry_2 *)bh->b_data;
1270 top = bh->b_data + blocksize - reclen; 1270 top = bh->b_data + blocksize - reclen;
1271 while ((char *) de <= top) { 1271 while ((char *) de <= top) {
1272 if (!ext4_check_dir_entry(dir, de, bh, offset)) 1272 if (ext4_check_dir_entry(dir, NULL, de, bh, offset))
1273 return -EIO; 1273 return -EIO;
1274 if (ext4_match(namelen, name, de)) 1274 if (ext4_match(namelen, name, de))
1275 return -EEXIST; 1275 return -EEXIST;
@@ -1602,7 +1602,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1602 if (err) 1602 if (err)
1603 goto journal_error; 1603 goto journal_error;
1604 } 1604 }
1605 ext4_handle_dirty_metadata(handle, inode, frames[0].bh); 1605 err = ext4_handle_dirty_metadata(handle, inode, frames[0].bh);
1606 if (err) {
1607 ext4_std_error(inode->i_sb, err);
1608 goto cleanup;
1609 }
1606 } 1610 }
1607 de = do_split(handle, dir, &bh, frame, &hinfo, &err); 1611 de = do_split(handle, dir, &bh, frame, &hinfo, &err);
1608 if (!de) 1612 if (!de)
@@ -1630,17 +1634,21 @@ static int ext4_delete_entry(handle_t *handle,
1630{ 1634{
1631 struct ext4_dir_entry_2 *de, *pde; 1635 struct ext4_dir_entry_2 *de, *pde;
1632 unsigned int blocksize = dir->i_sb->s_blocksize; 1636 unsigned int blocksize = dir->i_sb->s_blocksize;
1633 int i; 1637 int i, err;
1634 1638
1635 i = 0; 1639 i = 0;
1636 pde = NULL; 1640 pde = NULL;
1637 de = (struct ext4_dir_entry_2 *) bh->b_data; 1641 de = (struct ext4_dir_entry_2 *) bh->b_data;
1638 while (i < bh->b_size) { 1642 while (i < bh->b_size) {
1639 if (!ext4_check_dir_entry(dir, de, bh, i)) 1643 if (ext4_check_dir_entry(dir, NULL, de, bh, i))
1640 return -EIO; 1644 return -EIO;
1641 if (de == de_del) { 1645 if (de == de_del) {
1642 BUFFER_TRACE(bh, "get_write_access"); 1646 BUFFER_TRACE(bh, "get_write_access");
1643 ext4_journal_get_write_access(handle, bh); 1647 err = ext4_journal_get_write_access(handle, bh);
1648 if (unlikely(err)) {
1649 ext4_std_error(dir->i_sb, err);
1650 return err;
1651 }
1644 if (pde) 1652 if (pde)
1645 pde->rec_len = ext4_rec_len_to_disk( 1653 pde->rec_len = ext4_rec_len_to_disk(
1646 ext4_rec_len_from_disk(pde->rec_len, 1654 ext4_rec_len_from_disk(pde->rec_len,
@@ -1652,7 +1660,11 @@ static int ext4_delete_entry(handle_t *handle,
1652 de->inode = 0; 1660 de->inode = 0;
1653 dir->i_version++; 1661 dir->i_version++;
1654 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 1662 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
1655 ext4_handle_dirty_metadata(handle, dir, bh); 1663 err = ext4_handle_dirty_metadata(handle, dir, bh);
1664 if (unlikely(err)) {
1665 ext4_std_error(dir->i_sb, err);
1666 return err;
1667 }
1656 return 0; 1668 return 0;
1657 } 1669 }
1658 i += ext4_rec_len_from_disk(de->rec_len, blocksize); 1670 i += ext4_rec_len_from_disk(de->rec_len, blocksize);
@@ -1789,7 +1801,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1789{ 1801{
1790 handle_t *handle; 1802 handle_t *handle;
1791 struct inode *inode; 1803 struct inode *inode;
1792 struct buffer_head *dir_block; 1804 struct buffer_head *dir_block = NULL;
1793 struct ext4_dir_entry_2 *de; 1805 struct ext4_dir_entry_2 *de;
1794 unsigned int blocksize = dir->i_sb->s_blocksize; 1806 unsigned int blocksize = dir->i_sb->s_blocksize;
1795 int err, retries = 0; 1807 int err, retries = 0;
@@ -1822,7 +1834,9 @@ retry:
1822 if (!dir_block) 1834 if (!dir_block)
1823 goto out_clear_inode; 1835 goto out_clear_inode;
1824 BUFFER_TRACE(dir_block, "get_write_access"); 1836 BUFFER_TRACE(dir_block, "get_write_access");
1825 ext4_journal_get_write_access(handle, dir_block); 1837 err = ext4_journal_get_write_access(handle, dir_block);
1838 if (err)
1839 goto out_clear_inode;
1826 de = (struct ext4_dir_entry_2 *) dir_block->b_data; 1840 de = (struct ext4_dir_entry_2 *) dir_block->b_data;
1827 de->inode = cpu_to_le32(inode->i_ino); 1841 de->inode = cpu_to_le32(inode->i_ino);
1828 de->name_len = 1; 1842 de->name_len = 1;
@@ -1839,10 +1853,12 @@ retry:
1839 ext4_set_de_type(dir->i_sb, de, S_IFDIR); 1853 ext4_set_de_type(dir->i_sb, de, S_IFDIR);
1840 inode->i_nlink = 2; 1854 inode->i_nlink = 2;
1841 BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata"); 1855 BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
1842 ext4_handle_dirty_metadata(handle, dir, dir_block); 1856 err = ext4_handle_dirty_metadata(handle, dir, dir_block);
1843 brelse(dir_block); 1857 if (err)
1844 ext4_mark_inode_dirty(handle, inode); 1858 goto out_clear_inode;
1845 err = ext4_add_entry(handle, dentry, inode); 1859 err = ext4_mark_inode_dirty(handle, inode);
1860 if (!err)
1861 err = ext4_add_entry(handle, dentry, inode);
1846 if (err) { 1862 if (err) {
1847out_clear_inode: 1863out_clear_inode:
1848 clear_nlink(inode); 1864 clear_nlink(inode);
@@ -1853,10 +1869,13 @@ out_clear_inode:
1853 } 1869 }
1854 ext4_inc_count(handle, dir); 1870 ext4_inc_count(handle, dir);
1855 ext4_update_dx_flag(dir); 1871 ext4_update_dx_flag(dir);
1856 ext4_mark_inode_dirty(handle, dir); 1872 err = ext4_mark_inode_dirty(handle, dir);
1873 if (err)
1874 goto out_clear_inode;
1857 d_instantiate(dentry, inode); 1875 d_instantiate(dentry, inode);
1858 unlock_new_inode(inode); 1876 unlock_new_inode(inode);
1859out_stop: 1877out_stop:
1878 brelse(dir_block);
1860 ext4_journal_stop(handle); 1879 ext4_journal_stop(handle);
1861 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) 1880 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
1862 goto retry; 1881 goto retry;
@@ -1919,7 +1938,7 @@ static int empty_dir(struct inode *inode)
1919 } 1938 }
1920 de = (struct ext4_dir_entry_2 *) bh->b_data; 1939 de = (struct ext4_dir_entry_2 *) bh->b_data;
1921 } 1940 }
1922 if (!ext4_check_dir_entry(inode, de, bh, offset)) { 1941 if (ext4_check_dir_entry(inode, NULL, de, bh, offset)) {
1923 de = (struct ext4_dir_entry_2 *)(bh->b_data + 1942 de = (struct ext4_dir_entry_2 *)(bh->b_data +
1924 sb->s_blocksize); 1943 sb->s_blocksize);
1925 offset = (offset | (sb->s_blocksize - 1)) + 1; 1944 offset = (offset | (sb->s_blocksize - 1)) + 1;
@@ -2407,7 +2426,11 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2407 ext4_current_time(new_dir); 2426 ext4_current_time(new_dir);
2408 ext4_mark_inode_dirty(handle, new_dir); 2427 ext4_mark_inode_dirty(handle, new_dir);
2409 BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata"); 2428 BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata");
2410 ext4_handle_dirty_metadata(handle, new_dir, new_bh); 2429 retval = ext4_handle_dirty_metadata(handle, new_dir, new_bh);
2430 if (unlikely(retval)) {
2431 ext4_std_error(new_dir->i_sb, retval);
2432 goto end_rename;
2433 }
2411 brelse(new_bh); 2434 brelse(new_bh);
2412 new_bh = NULL; 2435 new_bh = NULL;
2413 } 2436 }
@@ -2459,7 +2482,11 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2459 PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) = 2482 PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) =
2460 cpu_to_le32(new_dir->i_ino); 2483 cpu_to_le32(new_dir->i_ino);
2461 BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); 2484 BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
2462 ext4_handle_dirty_metadata(handle, old_dir, dir_bh); 2485 retval = ext4_handle_dirty_metadata(handle, old_dir, dir_bh);
2486 if (retval) {
2487 ext4_std_error(old_dir->i_sb, retval);
2488 goto end_rename;
2489 }
2463 ext4_dec_count(handle, old_dir); 2490 ext4_dec_count(handle, old_dir);
2464 if (new_inode) { 2491 if (new_inode) {
2465 /* checked empty_dir above, can't have another parent, 2492 /* checked empty_dir above, can't have another parent,
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index beacce11ac50..7270dcfca92a 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -44,7 +44,7 @@ int __init ext4_init_pageio(void)
44 if (io_page_cachep == NULL) 44 if (io_page_cachep == NULL)
45 return -ENOMEM; 45 return -ENOMEM;
46 io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT); 46 io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
47 if (io_page_cachep == NULL) { 47 if (io_end_cachep == NULL) {
48 kmem_cache_destroy(io_page_cachep); 48 kmem_cache_destroy(io_page_cachep);
49 return -ENOMEM; 49 return -ENOMEM;
50 } 50 }
@@ -158,11 +158,8 @@ static void ext4_end_io_work(struct work_struct *work)
158 158
159ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) 159ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
160{ 160{
161 ext4_io_end_t *io = NULL; 161 ext4_io_end_t *io = kmem_cache_zalloc(io_end_cachep, flags);
162
163 io = kmem_cache_alloc(io_end_cachep, flags);
164 if (io) { 162 if (io) {
165 memset(io, 0, sizeof(*io));
166 atomic_inc(&EXT4_I(inode)->i_ioend_count); 163 atomic_inc(&EXT4_I(inode)->i_ioend_count);
167 io->inode = inode; 164 io->inode = inode;
168 INIT_WORK(&io->work, ext4_end_io_work); 165 INIT_WORK(&io->work, ext4_end_io_work);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 981c8477adab..3ecc6e45d2f9 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -220,7 +220,11 @@ static int setup_new_group_blocks(struct super_block *sb,
220 memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); 220 memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size);
221 set_buffer_uptodate(gdb); 221 set_buffer_uptodate(gdb);
222 unlock_buffer(gdb); 222 unlock_buffer(gdb);
223 ext4_handle_dirty_metadata(handle, NULL, gdb); 223 err = ext4_handle_dirty_metadata(handle, NULL, gdb);
224 if (unlikely(err)) {
225 brelse(gdb);
226 goto exit_bh;
227 }
224 ext4_set_bit(bit, bh->b_data); 228 ext4_set_bit(bit, bh->b_data);
225 brelse(gdb); 229 brelse(gdb);
226 } 230 }
@@ -258,7 +262,11 @@ static int setup_new_group_blocks(struct super_block *sb,
258 262
259 ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, 263 ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8,
260 bh->b_data); 264 bh->b_data);
261 ext4_handle_dirty_metadata(handle, NULL, bh); 265 err = ext4_handle_dirty_metadata(handle, NULL, bh);
266 if (unlikely(err)) {
267 ext4_std_error(sb, err);
268 goto exit_bh;
269 }
262 brelse(bh); 270 brelse(bh);
263 /* Mark unused entries in inode bitmap used */ 271 /* Mark unused entries in inode bitmap used */
264 ext4_debug("clear inode bitmap %#04llx (+%llu)\n", 272 ext4_debug("clear inode bitmap %#04llx (+%llu)\n",
@@ -270,7 +278,9 @@ static int setup_new_group_blocks(struct super_block *sb,
270 278
271 ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, 279 ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
272 bh->b_data); 280 bh->b_data);
273 ext4_handle_dirty_metadata(handle, NULL, bh); 281 err = ext4_handle_dirty_metadata(handle, NULL, bh);
282 if (unlikely(err))
283 ext4_std_error(sb, err);
274exit_bh: 284exit_bh:
275 brelse(bh); 285 brelse(bh);
276 286
@@ -422,17 +432,21 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
422 goto exit_dind; 432 goto exit_dind;
423 } 433 }
424 434
425 if ((err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh))) 435 err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
436 if (unlikely(err))
426 goto exit_dind; 437 goto exit_dind;
427 438
428 if ((err = ext4_journal_get_write_access(handle, *primary))) 439 err = ext4_journal_get_write_access(handle, *primary);
440 if (unlikely(err))
429 goto exit_sbh; 441 goto exit_sbh;
430 442
431 if ((err = ext4_journal_get_write_access(handle, dind))) 443 err = ext4_journal_get_write_access(handle, dind);
432 goto exit_primary; 444 if (unlikely(err))
445 ext4_std_error(sb, err);
433 446
434 /* ext4_reserve_inode_write() gets a reference on the iloc */ 447 /* ext4_reserve_inode_write() gets a reference on the iloc */
435 if ((err = ext4_reserve_inode_write(handle, inode, &iloc))) 448 err = ext4_reserve_inode_write(handle, inode, &iloc);
449 if (unlikely(err))
436 goto exit_dindj; 450 goto exit_dindj;
437 451
438 n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *), 452 n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
@@ -454,12 +468,20 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
454 * reserved inode, and will become GDT blocks (primary and backup). 468 * reserved inode, and will become GDT blocks (primary and backup).
455 */ 469 */
456 data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)] = 0; 470 data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)] = 0;
457 ext4_handle_dirty_metadata(handle, NULL, dind); 471 err = ext4_handle_dirty_metadata(handle, NULL, dind);
458 brelse(dind); 472 if (unlikely(err)) {
473 ext4_std_error(sb, err);
474 goto exit_inode;
475 }
459 inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9; 476 inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
460 ext4_mark_iloc_dirty(handle, inode, &iloc); 477 ext4_mark_iloc_dirty(handle, inode, &iloc);
461 memset((*primary)->b_data, 0, sb->s_blocksize); 478 memset((*primary)->b_data, 0, sb->s_blocksize);
462 ext4_handle_dirty_metadata(handle, NULL, *primary); 479 err = ext4_handle_dirty_metadata(handle, NULL, *primary);
480 if (unlikely(err)) {
481 ext4_std_error(sb, err);
482 goto exit_inode;
483 }
484 brelse(dind);
463 485
464 o_group_desc = EXT4_SB(sb)->s_group_desc; 486 o_group_desc = EXT4_SB(sb)->s_group_desc;
465 memcpy(n_group_desc, o_group_desc, 487 memcpy(n_group_desc, o_group_desc,
@@ -470,19 +492,19 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
470 kfree(o_group_desc); 492 kfree(o_group_desc);
471 493
472 le16_add_cpu(&es->s_reserved_gdt_blocks, -1); 494 le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
473 ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); 495 err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
496 if (err)
497 ext4_std_error(sb, err);
474 498
475 return 0; 499 return err;
476 500
477exit_inode: 501exit_inode:
478 /* ext4_journal_release_buffer(handle, iloc.bh); */ 502 /* ext4_journal_release_buffer(handle, iloc.bh); */
479 brelse(iloc.bh); 503 brelse(iloc.bh);
480exit_dindj: 504exit_dindj:
481 /* ext4_journal_release_buffer(handle, dind); */ 505 /* ext4_journal_release_buffer(handle, dind); */
482exit_primary:
483 /* ext4_journal_release_buffer(handle, *primary); */
484exit_sbh: 506exit_sbh:
485 /* ext4_journal_release_buffer(handle, *primary); */ 507 /* ext4_journal_release_buffer(handle, EXT4_SB(sb)->s_sbh); */
486exit_dind: 508exit_dind:
487 brelse(dind); 509 brelse(dind);
488exit_bh: 510exit_bh:
@@ -665,7 +687,9 @@ static void update_backups(struct super_block *sb,
665 memset(bh->b_data + size, 0, rest); 687 memset(bh->b_data + size, 0, rest);
666 set_buffer_uptodate(bh); 688 set_buffer_uptodate(bh);
667 unlock_buffer(bh); 689 unlock_buffer(bh);
668 ext4_handle_dirty_metadata(handle, NULL, bh); 690 err = ext4_handle_dirty_metadata(handle, NULL, bh);
691 if (unlikely(err))
692 ext4_std_error(sb, err);
669 brelse(bh); 693 brelse(bh);
670 } 694 }
671 if ((err2 = ext4_journal_stop(handle)) && !err) 695 if ((err2 = ext4_journal_stop(handle)) && !err)
@@ -883,7 +907,11 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
883 /* Update the global fs size fields */ 907 /* Update the global fs size fields */
884 sbi->s_groups_count++; 908 sbi->s_groups_count++;
885 909
886 ext4_handle_dirty_metadata(handle, NULL, primary); 910 err = ext4_handle_dirty_metadata(handle, NULL, primary);
911 if (unlikely(err)) {
912 ext4_std_error(sb, err);
913 goto exit_journal;
914 }
887 915
888 /* Update the reserved block counts only once the new group is 916 /* Update the reserved block counts only once the new group is
889 * active. */ 917 * active. */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index cd37f9d5e447..29c80f6d8b27 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -388,13 +388,14 @@ static void ext4_handle_error(struct super_block *sb)
388void __ext4_error(struct super_block *sb, const char *function, 388void __ext4_error(struct super_block *sb, const char *function,
389 unsigned int line, const char *fmt, ...) 389 unsigned int line, const char *fmt, ...)
390{ 390{
391 struct va_format vaf;
391 va_list args; 392 va_list args;
392 393
393 va_start(args, fmt); 394 va_start(args, fmt);
394 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: ", 395 vaf.fmt = fmt;
395 sb->s_id, function, line, current->comm); 396 vaf.va = &args;
396 vprintk(fmt, args); 397 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
397 printk("\n"); 398 sb->s_id, function, line, current->comm, &vaf);
398 va_end(args); 399 va_end(args);
399 400
400 ext4_handle_error(sb); 401 ext4_handle_error(sb);
@@ -405,28 +406,31 @@ void ext4_error_inode(struct inode *inode, const char *function,
405 const char *fmt, ...) 406 const char *fmt, ...)
406{ 407{
407 va_list args; 408 va_list args;
409 struct va_format vaf;
408 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; 410 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
409 411
410 es->s_last_error_ino = cpu_to_le32(inode->i_ino); 412 es->s_last_error_ino = cpu_to_le32(inode->i_ino);
411 es->s_last_error_block = cpu_to_le64(block); 413 es->s_last_error_block = cpu_to_le64(block);
412 save_error_info(inode->i_sb, function, line); 414 save_error_info(inode->i_sb, function, line);
413 va_start(args, fmt); 415 va_start(args, fmt);
416 vaf.fmt = fmt;
417 vaf.va = &args;
414 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ", 418 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ",
415 inode->i_sb->s_id, function, line, inode->i_ino); 419 inode->i_sb->s_id, function, line, inode->i_ino);
416 if (block) 420 if (block)
417 printk("block %llu: ", block); 421 printk(KERN_CONT "block %llu: ", block);
418 printk("comm %s: ", current->comm); 422 printk(KERN_CONT "comm %s: %pV\n", current->comm, &vaf);
419 vprintk(fmt, args);
420 printk("\n");
421 va_end(args); 423 va_end(args);
422 424
423 ext4_handle_error(inode->i_sb); 425 ext4_handle_error(inode->i_sb);
424} 426}
425 427
426void ext4_error_file(struct file *file, const char *function, 428void ext4_error_file(struct file *file, const char *function,
427 unsigned int line, const char *fmt, ...) 429 unsigned int line, ext4_fsblk_t block,
430 const char *fmt, ...)
428{ 431{
429 va_list args; 432 va_list args;
433 struct va_format vaf;
430 struct ext4_super_block *es; 434 struct ext4_super_block *es;
431 struct inode *inode = file->f_dentry->d_inode; 435 struct inode *inode = file->f_dentry->d_inode;
432 char pathname[80], *path; 436 char pathname[80], *path;
@@ -434,17 +438,18 @@ void ext4_error_file(struct file *file, const char *function,
434 es = EXT4_SB(inode->i_sb)->s_es; 438 es = EXT4_SB(inode->i_sb)->s_es;
435 es->s_last_error_ino = cpu_to_le32(inode->i_ino); 439 es->s_last_error_ino = cpu_to_le32(inode->i_ino);
436 save_error_info(inode->i_sb, function, line); 440 save_error_info(inode->i_sb, function, line);
437 va_start(args, fmt);
438 path = d_path(&(file->f_path), pathname, sizeof(pathname)); 441 path = d_path(&(file->f_path), pathname, sizeof(pathname));
439 if (!path) 442 if (IS_ERR(path))
440 path = "(unknown)"; 443 path = "(unknown)";
441 printk(KERN_CRIT 444 printk(KERN_CRIT
442 "EXT4-fs error (device %s): %s:%d: inode #%lu " 445 "EXT4-fs error (device %s): %s:%d: inode #%lu: ",
443 "(comm %s path %s): ", 446 inode->i_sb->s_id, function, line, inode->i_ino);
444 inode->i_sb->s_id, function, line, inode->i_ino, 447 if (block)
445 current->comm, path); 448 printk(KERN_CONT "block %llu: ", block);
446 vprintk(fmt, args); 449 va_start(args, fmt);
447 printk("\n"); 450 vaf.fmt = fmt;
451 vaf.va = &args;
452 printk(KERN_CONT "comm %s: path %s: %pV\n", current->comm, path, &vaf);
448 va_end(args); 453 va_end(args);
449 454
450 ext4_handle_error(inode->i_sb); 455 ext4_handle_error(inode->i_sb);
@@ -543,28 +548,29 @@ void __ext4_abort(struct super_block *sb, const char *function,
543 panic("EXT4-fs panic from previous error\n"); 548 panic("EXT4-fs panic from previous error\n");
544} 549}
545 550
546void ext4_msg (struct super_block * sb, const char *prefix, 551void ext4_msg(struct super_block *sb, const char *prefix, const char *fmt, ...)
547 const char *fmt, ...)
548{ 552{
553 struct va_format vaf;
549 va_list args; 554 va_list args;
550 555
551 va_start(args, fmt); 556 va_start(args, fmt);
552 printk("%sEXT4-fs (%s): ", prefix, sb->s_id); 557 vaf.fmt = fmt;
553 vprintk(fmt, args); 558 vaf.va = &args;
554 printk("\n"); 559 printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
555 va_end(args); 560 va_end(args);
556} 561}
557 562
558void __ext4_warning(struct super_block *sb, const char *function, 563void __ext4_warning(struct super_block *sb, const char *function,
559 unsigned int line, const char *fmt, ...) 564 unsigned int line, const char *fmt, ...)
560{ 565{
566 struct va_format vaf;
561 va_list args; 567 va_list args;
562 568
563 va_start(args, fmt); 569 va_start(args, fmt);
564 printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: ", 570 vaf.fmt = fmt;
565 sb->s_id, function, line); 571 vaf.va = &args;
566 vprintk(fmt, args); 572 printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
567 printk("\n"); 573 sb->s_id, function, line, &vaf);
568 va_end(args); 574 va_end(args);
569} 575}
570 576
@@ -575,21 +581,25 @@ void __ext4_grp_locked_error(const char *function, unsigned int line,
575__releases(bitlock) 581__releases(bitlock)
576__acquires(bitlock) 582__acquires(bitlock)
577{ 583{
584 struct va_format vaf;
578 va_list args; 585 va_list args;
579 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 586 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
580 587
581 es->s_last_error_ino = cpu_to_le32(ino); 588 es->s_last_error_ino = cpu_to_le32(ino);
582 es->s_last_error_block = cpu_to_le64(block); 589 es->s_last_error_block = cpu_to_le64(block);
583 __save_error_info(sb, function, line); 590 __save_error_info(sb, function, line);
591
584 va_start(args, fmt); 592 va_start(args, fmt);
593
594 vaf.fmt = fmt;
595 vaf.va = &args;
585 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u", 596 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u",
586 sb->s_id, function, line, grp); 597 sb->s_id, function, line, grp);
587 if (ino) 598 if (ino)
588 printk("inode %lu: ", ino); 599 printk(KERN_CONT "inode %lu: ", ino);
589 if (block) 600 if (block)
590 printk("block %llu:", (unsigned long long) block); 601 printk(KERN_CONT "block %llu:", (unsigned long long) block);
591 vprintk(fmt, args); 602 printk(KERN_CONT "%pV\n", &vaf);
592 printk("\n");
593 va_end(args); 603 va_end(args);
594 604
595 if (test_opt(sb, ERRORS_CONT)) { 605 if (test_opt(sb, ERRORS_CONT)) {
@@ -808,21 +818,15 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
808 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); 818 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
809 INIT_LIST_HEAD(&ei->i_prealloc_list); 819 INIT_LIST_HEAD(&ei->i_prealloc_list);
810 spin_lock_init(&ei->i_prealloc_lock); 820 spin_lock_init(&ei->i_prealloc_lock);
811 /*
812 * Note: We can be called before EXT4_SB(sb)->s_journal is set,
813 * therefore it can be null here. Don't check it, just initialize
814 * jinode.
815 */
816 jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
817 ei->i_reserved_data_blocks = 0; 821 ei->i_reserved_data_blocks = 0;
818 ei->i_reserved_meta_blocks = 0; 822 ei->i_reserved_meta_blocks = 0;
819 ei->i_allocated_meta_blocks = 0; 823 ei->i_allocated_meta_blocks = 0;
820 ei->i_da_metadata_calc_len = 0; 824 ei->i_da_metadata_calc_len = 0;
821 ei->i_delalloc_reserved_flag = 0;
822 spin_lock_init(&(ei->i_block_reservation_lock)); 825 spin_lock_init(&(ei->i_block_reservation_lock));
823#ifdef CONFIG_QUOTA 826#ifdef CONFIG_QUOTA
824 ei->i_reserved_quota = 0; 827 ei->i_reserved_quota = 0;
825#endif 828#endif
829 ei->jinode = NULL;
826 INIT_LIST_HEAD(&ei->i_completed_io_list); 830 INIT_LIST_HEAD(&ei->i_completed_io_list);
827 spin_lock_init(&ei->i_completed_io_lock); 831 spin_lock_init(&ei->i_completed_io_lock);
828 ei->cur_aio_dio = NULL; 832 ei->cur_aio_dio = NULL;
@@ -898,9 +902,12 @@ void ext4_clear_inode(struct inode *inode)
898 end_writeback(inode); 902 end_writeback(inode);
899 dquot_drop(inode); 903 dquot_drop(inode);
900 ext4_discard_preallocations(inode); 904 ext4_discard_preallocations(inode);
901 if (EXT4_JOURNAL(inode)) 905 if (EXT4_I(inode)->jinode) {
902 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, 906 jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
903 &EXT4_I(inode)->jinode); 907 EXT4_I(inode)->jinode);
908 jbd2_free_inode(EXT4_I(inode)->jinode);
909 EXT4_I(inode)->jinode = NULL;
910 }
904} 911}
905 912
906static inline void ext4_show_quota_options(struct seq_file *seq, 913static inline void ext4_show_quota_options(struct seq_file *seq,
@@ -1393,7 +1400,7 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
1393 sbi->s_qf_names[qtype] = NULL; 1400 sbi->s_qf_names[qtype] = NULL;
1394 return 0; 1401 return 0;
1395 } 1402 }
1396 set_opt(sbi->s_mount_opt, QUOTA); 1403 set_opt(sb, QUOTA);
1397 return 1; 1404 return 1;
1398} 1405}
1399 1406
@@ -1448,21 +1455,21 @@ static int parse_options(char *options, struct super_block *sb,
1448 switch (token) { 1455 switch (token) {
1449 case Opt_bsd_df: 1456 case Opt_bsd_df:
1450 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1457 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
1451 clear_opt(sbi->s_mount_opt, MINIX_DF); 1458 clear_opt(sb, MINIX_DF);
1452 break; 1459 break;
1453 case Opt_minix_df: 1460 case Opt_minix_df:
1454 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1461 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
1455 set_opt(sbi->s_mount_opt, MINIX_DF); 1462 set_opt(sb, MINIX_DF);
1456 1463
1457 break; 1464 break;
1458 case Opt_grpid: 1465 case Opt_grpid:
1459 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1466 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
1460 set_opt(sbi->s_mount_opt, GRPID); 1467 set_opt(sb, GRPID);
1461 1468
1462 break; 1469 break;
1463 case Opt_nogrpid: 1470 case Opt_nogrpid:
1464 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1471 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
1465 clear_opt(sbi->s_mount_opt, GRPID); 1472 clear_opt(sb, GRPID);
1466 1473
1467 break; 1474 break;
1468 case Opt_resuid: 1475 case Opt_resuid:
@@ -1480,38 +1487,38 @@ static int parse_options(char *options, struct super_block *sb,
1480 /* *sb_block = match_int(&args[0]); */ 1487 /* *sb_block = match_int(&args[0]); */
1481 break; 1488 break;
1482 case Opt_err_panic: 1489 case Opt_err_panic:
1483 clear_opt(sbi->s_mount_opt, ERRORS_CONT); 1490 clear_opt(sb, ERRORS_CONT);
1484 clear_opt(sbi->s_mount_opt, ERRORS_RO); 1491 clear_opt(sb, ERRORS_RO);
1485 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 1492 set_opt(sb, ERRORS_PANIC);
1486 break; 1493 break;
1487 case Opt_err_ro: 1494 case Opt_err_ro:
1488 clear_opt(sbi->s_mount_opt, ERRORS_CONT); 1495 clear_opt(sb, ERRORS_CONT);
1489 clear_opt(sbi->s_mount_opt, ERRORS_PANIC); 1496 clear_opt(sb, ERRORS_PANIC);
1490 set_opt(sbi->s_mount_opt, ERRORS_RO); 1497 set_opt(sb, ERRORS_RO);
1491 break; 1498 break;
1492 case Opt_err_cont: 1499 case Opt_err_cont:
1493 clear_opt(sbi->s_mount_opt, ERRORS_RO); 1500 clear_opt(sb, ERRORS_RO);
1494 clear_opt(sbi->s_mount_opt, ERRORS_PANIC); 1501 clear_opt(sb, ERRORS_PANIC);
1495 set_opt(sbi->s_mount_opt, ERRORS_CONT); 1502 set_opt(sb, ERRORS_CONT);
1496 break; 1503 break;
1497 case Opt_nouid32: 1504 case Opt_nouid32:
1498 set_opt(sbi->s_mount_opt, NO_UID32); 1505 set_opt(sb, NO_UID32);
1499 break; 1506 break;
1500 case Opt_debug: 1507 case Opt_debug:
1501 set_opt(sbi->s_mount_opt, DEBUG); 1508 set_opt(sb, DEBUG);
1502 break; 1509 break;
1503 case Opt_oldalloc: 1510 case Opt_oldalloc:
1504 set_opt(sbi->s_mount_opt, OLDALLOC); 1511 set_opt(sb, OLDALLOC);
1505 break; 1512 break;
1506 case Opt_orlov: 1513 case Opt_orlov:
1507 clear_opt(sbi->s_mount_opt, OLDALLOC); 1514 clear_opt(sb, OLDALLOC);
1508 break; 1515 break;
1509#ifdef CONFIG_EXT4_FS_XATTR 1516#ifdef CONFIG_EXT4_FS_XATTR
1510 case Opt_user_xattr: 1517 case Opt_user_xattr:
1511 set_opt(sbi->s_mount_opt, XATTR_USER); 1518 set_opt(sb, XATTR_USER);
1512 break; 1519 break;
1513 case Opt_nouser_xattr: 1520 case Opt_nouser_xattr:
1514 clear_opt(sbi->s_mount_opt, XATTR_USER); 1521 clear_opt(sb, XATTR_USER);
1515 break; 1522 break;
1516#else 1523#else
1517 case Opt_user_xattr: 1524 case Opt_user_xattr:
@@ -1521,10 +1528,10 @@ static int parse_options(char *options, struct super_block *sb,
1521#endif 1528#endif
1522#ifdef CONFIG_EXT4_FS_POSIX_ACL 1529#ifdef CONFIG_EXT4_FS_POSIX_ACL
1523 case Opt_acl: 1530 case Opt_acl:
1524 set_opt(sbi->s_mount_opt, POSIX_ACL); 1531 set_opt(sb, POSIX_ACL);
1525 break; 1532 break;
1526 case Opt_noacl: 1533 case Opt_noacl:
1527 clear_opt(sbi->s_mount_opt, POSIX_ACL); 1534 clear_opt(sb, POSIX_ACL);
1528 break; 1535 break;
1529#else 1536#else
1530 case Opt_acl: 1537 case Opt_acl:
@@ -1543,7 +1550,7 @@ static int parse_options(char *options, struct super_block *sb,
1543 "Cannot specify journal on remount"); 1550 "Cannot specify journal on remount");
1544 return 0; 1551 return 0;
1545 } 1552 }
1546 set_opt(sbi->s_mount_opt, UPDATE_JOURNAL); 1553 set_opt(sb, UPDATE_JOURNAL);
1547 break; 1554 break;
1548 case Opt_journal_dev: 1555 case Opt_journal_dev:
1549 if (is_remount) { 1556 if (is_remount) {
@@ -1556,14 +1563,14 @@ static int parse_options(char *options, struct super_block *sb,
1556 *journal_devnum = option; 1563 *journal_devnum = option;
1557 break; 1564 break;
1558 case Opt_journal_checksum: 1565 case Opt_journal_checksum:
1559 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); 1566 set_opt(sb, JOURNAL_CHECKSUM);
1560 break; 1567 break;
1561 case Opt_journal_async_commit: 1568 case Opt_journal_async_commit:
1562 set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT); 1569 set_opt(sb, JOURNAL_ASYNC_COMMIT);
1563 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); 1570 set_opt(sb, JOURNAL_CHECKSUM);
1564 break; 1571 break;
1565 case Opt_noload: 1572 case Opt_noload:
1566 set_opt(sbi->s_mount_opt, NOLOAD); 1573 set_opt(sb, NOLOAD);
1567 break; 1574 break;
1568 case Opt_commit: 1575 case Opt_commit:
1569 if (match_int(&args[0], &option)) 1576 if (match_int(&args[0], &option))
@@ -1606,15 +1613,15 @@ static int parse_options(char *options, struct super_block *sb,
1606 return 0; 1613 return 0;
1607 } 1614 }
1608 } else { 1615 } else {
1609 clear_opt(sbi->s_mount_opt, DATA_FLAGS); 1616 clear_opt(sb, DATA_FLAGS);
1610 sbi->s_mount_opt |= data_opt; 1617 sbi->s_mount_opt |= data_opt;
1611 } 1618 }
1612 break; 1619 break;
1613 case Opt_data_err_abort: 1620 case Opt_data_err_abort:
1614 set_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1621 set_opt(sb, DATA_ERR_ABORT);
1615 break; 1622 break;
1616 case Opt_data_err_ignore: 1623 case Opt_data_err_ignore:
1617 clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1624 clear_opt(sb, DATA_ERR_ABORT);
1618 break; 1625 break;
1619#ifdef CONFIG_QUOTA 1626#ifdef CONFIG_QUOTA
1620 case Opt_usrjquota: 1627 case Opt_usrjquota:
@@ -1654,12 +1661,12 @@ set_qf_format:
1654 break; 1661 break;
1655 case Opt_quota: 1662 case Opt_quota:
1656 case Opt_usrquota: 1663 case Opt_usrquota:
1657 set_opt(sbi->s_mount_opt, QUOTA); 1664 set_opt(sb, QUOTA);
1658 set_opt(sbi->s_mount_opt, USRQUOTA); 1665 set_opt(sb, USRQUOTA);
1659 break; 1666 break;
1660 case Opt_grpquota: 1667 case Opt_grpquota:
1661 set_opt(sbi->s_mount_opt, QUOTA); 1668 set_opt(sb, QUOTA);
1662 set_opt(sbi->s_mount_opt, GRPQUOTA); 1669 set_opt(sb, GRPQUOTA);
1663 break; 1670 break;
1664 case Opt_noquota: 1671 case Opt_noquota:
1665 if (sb_any_quota_loaded(sb)) { 1672 if (sb_any_quota_loaded(sb)) {
@@ -1667,9 +1674,9 @@ set_qf_format:
1667 "options when quota turned on"); 1674 "options when quota turned on");
1668 return 0; 1675 return 0;
1669 } 1676 }
1670 clear_opt(sbi->s_mount_opt, QUOTA); 1677 clear_opt(sb, QUOTA);
1671 clear_opt(sbi->s_mount_opt, USRQUOTA); 1678 clear_opt(sb, USRQUOTA);
1672 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1679 clear_opt(sb, GRPQUOTA);
1673 break; 1680 break;
1674#else 1681#else
1675 case Opt_quota: 1682 case Opt_quota:
@@ -1695,7 +1702,7 @@ set_qf_format:
1695 sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; 1702 sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
1696 break; 1703 break;
1697 case Opt_nobarrier: 1704 case Opt_nobarrier:
1698 clear_opt(sbi->s_mount_opt, BARRIER); 1705 clear_opt(sb, BARRIER);
1699 break; 1706 break;
1700 case Opt_barrier: 1707 case Opt_barrier:
1701 if (args[0].from) { 1708 if (args[0].from) {
@@ -1704,9 +1711,9 @@ set_qf_format:
1704 } else 1711 } else
1705 option = 1; /* No argument, default to 1 */ 1712 option = 1; /* No argument, default to 1 */
1706 if (option) 1713 if (option)
1707 set_opt(sbi->s_mount_opt, BARRIER); 1714 set_opt(sb, BARRIER);
1708 else 1715 else
1709 clear_opt(sbi->s_mount_opt, BARRIER); 1716 clear_opt(sb, BARRIER);
1710 break; 1717 break;
1711 case Opt_ignore: 1718 case Opt_ignore:
1712 break; 1719 break;
@@ -1730,17 +1737,17 @@ set_qf_format:
1730 "Ignoring deprecated bh option"); 1737 "Ignoring deprecated bh option");
1731 break; 1738 break;
1732 case Opt_i_version: 1739 case Opt_i_version:
1733 set_opt(sbi->s_mount_opt, I_VERSION); 1740 set_opt(sb, I_VERSION);
1734 sb->s_flags |= MS_I_VERSION; 1741 sb->s_flags |= MS_I_VERSION;
1735 break; 1742 break;
1736 case Opt_nodelalloc: 1743 case Opt_nodelalloc:
1737 clear_opt(sbi->s_mount_opt, DELALLOC); 1744 clear_opt(sb, DELALLOC);
1738 break; 1745 break;
1739 case Opt_mblk_io_submit: 1746 case Opt_mblk_io_submit:
1740 set_opt(sbi->s_mount_opt, MBLK_IO_SUBMIT); 1747 set_opt(sb, MBLK_IO_SUBMIT);
1741 break; 1748 break;
1742 case Opt_nomblk_io_submit: 1749 case Opt_nomblk_io_submit:
1743 clear_opt(sbi->s_mount_opt, MBLK_IO_SUBMIT); 1750 clear_opt(sb, MBLK_IO_SUBMIT);
1744 break; 1751 break;
1745 case Opt_stripe: 1752 case Opt_stripe:
1746 if (match_int(&args[0], &option)) 1753 if (match_int(&args[0], &option))
@@ -1750,13 +1757,13 @@ set_qf_format:
1750 sbi->s_stripe = option; 1757 sbi->s_stripe = option;
1751 break; 1758 break;
1752 case Opt_delalloc: 1759 case Opt_delalloc:
1753 set_opt(sbi->s_mount_opt, DELALLOC); 1760 set_opt(sb, DELALLOC);
1754 break; 1761 break;
1755 case Opt_block_validity: 1762 case Opt_block_validity:
1756 set_opt(sbi->s_mount_opt, BLOCK_VALIDITY); 1763 set_opt(sb, BLOCK_VALIDITY);
1757 break; 1764 break;
1758 case Opt_noblock_validity: 1765 case Opt_noblock_validity:
1759 clear_opt(sbi->s_mount_opt, BLOCK_VALIDITY); 1766 clear_opt(sb, BLOCK_VALIDITY);
1760 break; 1767 break;
1761 case Opt_inode_readahead_blks: 1768 case Opt_inode_readahead_blks:
1762 if (match_int(&args[0], &option)) 1769 if (match_int(&args[0], &option))
@@ -1780,7 +1787,7 @@ set_qf_format:
1780 option); 1787 option);
1781 break; 1788 break;
1782 case Opt_noauto_da_alloc: 1789 case Opt_noauto_da_alloc:
1783 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); 1790 set_opt(sb, NO_AUTO_DA_ALLOC);
1784 break; 1791 break;
1785 case Opt_auto_da_alloc: 1792 case Opt_auto_da_alloc:
1786 if (args[0].from) { 1793 if (args[0].from) {
@@ -1789,24 +1796,24 @@ set_qf_format:
1789 } else 1796 } else
1790 option = 1; /* No argument, default to 1 */ 1797 option = 1; /* No argument, default to 1 */
1791 if (option) 1798 if (option)
1792 clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC); 1799 clear_opt(sb, NO_AUTO_DA_ALLOC);
1793 else 1800 else
1794 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); 1801 set_opt(sb,NO_AUTO_DA_ALLOC);
1795 break; 1802 break;
1796 case Opt_discard: 1803 case Opt_discard:
1797 set_opt(sbi->s_mount_opt, DISCARD); 1804 set_opt(sb, DISCARD);
1798 break; 1805 break;
1799 case Opt_nodiscard: 1806 case Opt_nodiscard:
1800 clear_opt(sbi->s_mount_opt, DISCARD); 1807 clear_opt(sb, DISCARD);
1801 break; 1808 break;
1802 case Opt_dioread_nolock: 1809 case Opt_dioread_nolock:
1803 set_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); 1810 set_opt(sb, DIOREAD_NOLOCK);
1804 break; 1811 break;
1805 case Opt_dioread_lock: 1812 case Opt_dioread_lock:
1806 clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); 1813 clear_opt(sb, DIOREAD_NOLOCK);
1807 break; 1814 break;
1808 case Opt_init_inode_table: 1815 case Opt_init_inode_table:
1809 set_opt(sbi->s_mount_opt, INIT_INODE_TABLE); 1816 set_opt(sb, INIT_INODE_TABLE);
1810 if (args[0].from) { 1817 if (args[0].from) {
1811 if (match_int(&args[0], &option)) 1818 if (match_int(&args[0], &option))
1812 return 0; 1819 return 0;
@@ -1817,7 +1824,7 @@ set_qf_format:
1817 sbi->s_li_wait_mult = option; 1824 sbi->s_li_wait_mult = option;
1818 break; 1825 break;
1819 case Opt_noinit_inode_table: 1826 case Opt_noinit_inode_table:
1820 clear_opt(sbi->s_mount_opt, INIT_INODE_TABLE); 1827 clear_opt(sb, INIT_INODE_TABLE);
1821 break; 1828 break;
1822 default: 1829 default:
1823 ext4_msg(sb, KERN_ERR, 1830 ext4_msg(sb, KERN_ERR,
@@ -1829,10 +1836,10 @@ set_qf_format:
1829#ifdef CONFIG_QUOTA 1836#ifdef CONFIG_QUOTA
1830 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 1837 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
1831 if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) 1838 if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
1832 clear_opt(sbi->s_mount_opt, USRQUOTA); 1839 clear_opt(sb, USRQUOTA);
1833 1840
1834 if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) 1841 if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
1835 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1842 clear_opt(sb, GRPQUOTA);
1836 1843
1837 if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) { 1844 if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
1838 ext4_msg(sb, KERN_ERR, "old and new quota " 1845 ext4_msg(sb, KERN_ERR, "old and new quota "
@@ -1902,12 +1909,12 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1902 ext4_commit_super(sb, 1); 1909 ext4_commit_super(sb, 1);
1903 if (test_opt(sb, DEBUG)) 1910 if (test_opt(sb, DEBUG))
1904 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " 1911 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
1905 "bpg=%lu, ipg=%lu, mo=%04x]\n", 1912 "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
1906 sb->s_blocksize, 1913 sb->s_blocksize,
1907 sbi->s_groups_count, 1914 sbi->s_groups_count,
1908 EXT4_BLOCKS_PER_GROUP(sb), 1915 EXT4_BLOCKS_PER_GROUP(sb),
1909 EXT4_INODES_PER_GROUP(sb), 1916 EXT4_INODES_PER_GROUP(sb),
1910 sbi->s_mount_opt); 1917 sbi->s_mount_opt, sbi->s_mount_opt2);
1911 1918
1912 return res; 1919 return res;
1913} 1920}
@@ -1937,14 +1944,13 @@ static int ext4_fill_flex_info(struct super_block *sb)
1937 size = flex_group_count * sizeof(struct flex_groups); 1944 size = flex_group_count * sizeof(struct flex_groups);
1938 sbi->s_flex_groups = kzalloc(size, GFP_KERNEL); 1945 sbi->s_flex_groups = kzalloc(size, GFP_KERNEL);
1939 if (sbi->s_flex_groups == NULL) { 1946 if (sbi->s_flex_groups == NULL) {
1940 sbi->s_flex_groups = vmalloc(size); 1947 sbi->s_flex_groups = vzalloc(size);
1941 if (sbi->s_flex_groups) 1948 if (sbi->s_flex_groups == NULL) {
1942 memset(sbi->s_flex_groups, 0, size); 1949 ext4_msg(sb, KERN_ERR,
1943 } 1950 "not enough memory for %u flex groups",
1944 if (sbi->s_flex_groups == NULL) { 1951 flex_group_count);
1945 ext4_msg(sb, KERN_ERR, "not enough memory for " 1952 goto failed;
1946 "%u flex groups", flex_group_count); 1953 }
1947 goto failed;
1948 } 1954 }
1949 1955
1950 for (i = 0; i < sbi->s_groups_count; i++) { 1956 for (i = 0; i < sbi->s_groups_count; i++) {
@@ -2923,7 +2929,7 @@ static int ext4_register_li_request(struct super_block *sb,
2923 struct ext4_sb_info *sbi = EXT4_SB(sb); 2929 struct ext4_sb_info *sbi = EXT4_SB(sb);
2924 struct ext4_li_request *elr; 2930 struct ext4_li_request *elr;
2925 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; 2931 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
2926 int ret; 2932 int ret = 0;
2927 2933
2928 if (sbi->s_li_request != NULL) 2934 if (sbi->s_li_request != NULL)
2929 return 0; 2935 return 0;
@@ -3078,41 +3084,41 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3078 3084
3079 /* Set defaults before we parse the mount options */ 3085 /* Set defaults before we parse the mount options */
3080 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 3086 def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
3081 set_opt(sbi->s_mount_opt, INIT_INODE_TABLE); 3087 set_opt(sb, INIT_INODE_TABLE);
3082 if (def_mount_opts & EXT4_DEFM_DEBUG) 3088 if (def_mount_opts & EXT4_DEFM_DEBUG)
3083 set_opt(sbi->s_mount_opt, DEBUG); 3089 set_opt(sb, DEBUG);
3084 if (def_mount_opts & EXT4_DEFM_BSDGROUPS) { 3090 if (def_mount_opts & EXT4_DEFM_BSDGROUPS) {
3085 ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups", 3091 ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups",
3086 "2.6.38"); 3092 "2.6.38");
3087 set_opt(sbi->s_mount_opt, GRPID); 3093 set_opt(sb, GRPID);
3088 } 3094 }
3089 if (def_mount_opts & EXT4_DEFM_UID16) 3095 if (def_mount_opts & EXT4_DEFM_UID16)
3090 set_opt(sbi->s_mount_opt, NO_UID32); 3096 set_opt(sb, NO_UID32);
3091#ifdef CONFIG_EXT4_FS_XATTR 3097#ifdef CONFIG_EXT4_FS_XATTR
3092 if (def_mount_opts & EXT4_DEFM_XATTR_USER) 3098 if (def_mount_opts & EXT4_DEFM_XATTR_USER)
3093 set_opt(sbi->s_mount_opt, XATTR_USER); 3099 set_opt(sb, XATTR_USER);
3094#endif 3100#endif
3095#ifdef CONFIG_EXT4_FS_POSIX_ACL 3101#ifdef CONFIG_EXT4_FS_POSIX_ACL
3096 if (def_mount_opts & EXT4_DEFM_ACL) 3102 if (def_mount_opts & EXT4_DEFM_ACL)
3097 set_opt(sbi->s_mount_opt, POSIX_ACL); 3103 set_opt(sb, POSIX_ACL);
3098#endif 3104#endif
3099 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) 3105 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
3100 set_opt(sbi->s_mount_opt, JOURNAL_DATA); 3106 set_opt(sb, JOURNAL_DATA);
3101 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) 3107 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
3102 set_opt(sbi->s_mount_opt, ORDERED_DATA); 3108 set_opt(sb, ORDERED_DATA);
3103 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) 3109 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
3104 set_opt(sbi->s_mount_opt, WRITEBACK_DATA); 3110 set_opt(sb, WRITEBACK_DATA);
3105 3111
3106 if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) 3112 if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
3107 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 3113 set_opt(sb, ERRORS_PANIC);
3108 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE) 3114 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
3109 set_opt(sbi->s_mount_opt, ERRORS_CONT); 3115 set_opt(sb, ERRORS_CONT);
3110 else 3116 else
3111 set_opt(sbi->s_mount_opt, ERRORS_RO); 3117 set_opt(sb, ERRORS_RO);
3112 if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY) 3118 if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)
3113 set_opt(sbi->s_mount_opt, BLOCK_VALIDITY); 3119 set_opt(sb, BLOCK_VALIDITY);
3114 if (def_mount_opts & EXT4_DEFM_DISCARD) 3120 if (def_mount_opts & EXT4_DEFM_DISCARD)
3115 set_opt(sbi->s_mount_opt, DISCARD); 3121 set_opt(sb, DISCARD);
3116 3122
3117 sbi->s_resuid = le16_to_cpu(es->s_def_resuid); 3123 sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
3118 sbi->s_resgid = le16_to_cpu(es->s_def_resgid); 3124 sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
@@ -3121,7 +3127,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3121 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; 3127 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
3122 3128
3123 if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0) 3129 if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
3124 set_opt(sbi->s_mount_opt, BARRIER); 3130 set_opt(sb, BARRIER);
3125 3131
3126 /* 3132 /*
3127 * enable delayed allocation by default 3133 * enable delayed allocation by default
@@ -3129,7 +3135,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3129 */ 3135 */
3130 if (!IS_EXT3_SB(sb) && 3136 if (!IS_EXT3_SB(sb) &&
3131 ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) 3137 ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
3132 set_opt(sbi->s_mount_opt, DELALLOC); 3138 set_opt(sb, DELALLOC);
3133 3139
3134 if (!parse_options((char *) sbi->s_es->s_mount_opts, sb, 3140 if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
3135 &journal_devnum, &journal_ioprio, NULL, 0)) { 3141 &journal_devnum, &journal_ioprio, NULL, 0)) {
@@ -3432,8 +3438,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3432 "suppressed and not mounted read-only"); 3438 "suppressed and not mounted read-only");
3433 goto failed_mount_wq; 3439 goto failed_mount_wq;
3434 } else { 3440 } else {
3435 clear_opt(sbi->s_mount_opt, DATA_FLAGS); 3441 clear_opt(sb, DATA_FLAGS);
3436 set_opt(sbi->s_mount_opt, WRITEBACK_DATA); 3442 set_opt(sb, WRITEBACK_DATA);
3437 sbi->s_journal = NULL; 3443 sbi->s_journal = NULL;
3438 needs_recovery = 0; 3444 needs_recovery = 0;
3439 goto no_journal; 3445 goto no_journal;
@@ -3471,9 +3477,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3471 */ 3477 */
3472 if (jbd2_journal_check_available_features 3478 if (jbd2_journal_check_available_features
3473 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) 3479 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
3474 set_opt(sbi->s_mount_opt, ORDERED_DATA); 3480 set_opt(sb, ORDERED_DATA);
3475 else 3481 else
3476 set_opt(sbi->s_mount_opt, JOURNAL_DATA); 3482 set_opt(sb, JOURNAL_DATA);
3477 break; 3483 break;
3478 3484
3479 case EXT4_MOUNT_ORDERED_DATA: 3485 case EXT4_MOUNT_ORDERED_DATA:
@@ -3563,18 +3569,18 @@ no_journal:
3563 (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) { 3569 (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) {
3564 ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - " 3570 ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - "
3565 "requested data journaling mode"); 3571 "requested data journaling mode");
3566 clear_opt(sbi->s_mount_opt, DELALLOC); 3572 clear_opt(sb, DELALLOC);
3567 } 3573 }
3568 if (test_opt(sb, DIOREAD_NOLOCK)) { 3574 if (test_opt(sb, DIOREAD_NOLOCK)) {
3569 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { 3575 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
3570 ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " 3576 ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
3571 "option - requested data journaling mode"); 3577 "option - requested data journaling mode");
3572 clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); 3578 clear_opt(sb, DIOREAD_NOLOCK);
3573 } 3579 }
3574 if (sb->s_blocksize < PAGE_SIZE) { 3580 if (sb->s_blocksize < PAGE_SIZE) {
3575 ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " 3581 ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
3576 "option - block size is too small"); 3582 "option - block size is too small");
3577 clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); 3583 clear_opt(sb, DIOREAD_NOLOCK);
3578 } 3584 }
3579 } 3585 }
3580 3586
@@ -4173,6 +4179,22 @@ static int ext4_unfreeze(struct super_block *sb)
4173 return 0; 4179 return 0;
4174} 4180}
4175 4181
4182/*
4183 * Structure to save mount options for ext4_remount's benefit
4184 */
4185struct ext4_mount_options {
4186 unsigned long s_mount_opt;
4187 unsigned long s_mount_opt2;
4188 uid_t s_resuid;
4189 gid_t s_resgid;
4190 unsigned long s_commit_interval;
4191 u32 s_min_batch_time, s_max_batch_time;
4192#ifdef CONFIG_QUOTA
4193 int s_jquota_fmt;
4194 char *s_qf_names[MAXQUOTAS];
4195#endif
4196};
4197
4176static int ext4_remount(struct super_block *sb, int *flags, char *data) 4198static int ext4_remount(struct super_block *sb, int *flags, char *data)
4177{ 4199{
4178 struct ext4_super_block *es; 4200 struct ext4_super_block *es;
@@ -4193,6 +4215,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4193 lock_super(sb); 4215 lock_super(sb);
4194 old_sb_flags = sb->s_flags; 4216 old_sb_flags = sb->s_flags;
4195 old_opts.s_mount_opt = sbi->s_mount_opt; 4217 old_opts.s_mount_opt = sbi->s_mount_opt;
4218 old_opts.s_mount_opt2 = sbi->s_mount_opt2;
4196 old_opts.s_resuid = sbi->s_resuid; 4219 old_opts.s_resuid = sbi->s_resuid;
4197 old_opts.s_resgid = sbi->s_resgid; 4220 old_opts.s_resgid = sbi->s_resgid;
4198 old_opts.s_commit_interval = sbi->s_commit_interval; 4221 old_opts.s_commit_interval = sbi->s_commit_interval;
@@ -4346,6 +4369,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4346restore_opts: 4369restore_opts:
4347 sb->s_flags = old_sb_flags; 4370 sb->s_flags = old_sb_flags;
4348 sbi->s_mount_opt = old_opts.s_mount_opt; 4371 sbi->s_mount_opt = old_opts.s_mount_opt;
4372 sbi->s_mount_opt2 = old_opts.s_mount_opt2;
4349 sbi->s_resuid = old_opts.s_resuid; 4373 sbi->s_resuid = old_opts.s_resuid;
4350 sbi->s_resgid = old_opts.s_resgid; 4374 sbi->s_resgid = old_opts.s_resgid;
4351 sbi->s_commit_interval = old_opts.s_commit_interval; 4375 sbi->s_commit_interval = old_opts.s_commit_interval;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index fa4b899da4b3..fc32176eee39 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -427,23 +427,23 @@ cleanup:
427static int 427static int
428ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) 428ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
429{ 429{
430 int i_error, b_error; 430 int ret, ret2;
431 431
432 down_read(&EXT4_I(dentry->d_inode)->xattr_sem); 432 down_read(&EXT4_I(dentry->d_inode)->xattr_sem);
433 i_error = ext4_xattr_ibody_list(dentry, buffer, buffer_size); 433 ret = ret2 = ext4_xattr_ibody_list(dentry, buffer, buffer_size);
434 if (i_error < 0) { 434 if (ret < 0)
435 b_error = 0; 435 goto errout;
436 } else { 436 if (buffer) {
437 if (buffer) { 437 buffer += ret;
438 buffer += i_error; 438 buffer_size -= ret;
439 buffer_size -= i_error;
440 }
441 b_error = ext4_xattr_block_list(dentry, buffer, buffer_size);
442 if (b_error < 0)
443 i_error = 0;
444 } 439 }
440 ret = ext4_xattr_block_list(dentry, buffer, buffer_size);
441 if (ret < 0)
442 goto errout;
443 ret += ret2;
444errout:
445 up_read(&EXT4_I(dentry->d_inode)->xattr_sem); 445 up_read(&EXT4_I(dentry->d_inode)->xattr_sem);
446 return i_error + b_error; 446 return ret;
447} 447}
448 448
449/* 449/*
@@ -947,7 +947,7 @@ ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
947/* 947/*
948 * ext4_xattr_set_handle() 948 * ext4_xattr_set_handle()
949 * 949 *
950 * Create, replace or remove an extended attribute for this inode. Buffer 950 * Create, replace or remove an extended attribute for this inode. Value
951 * is NULL to remove an existing extended attribute, and non-NULL to 951 * is NULL to remove an existing extended attribute, and non-NULL to
952 * either replace an existing extended attribute, or create a new extended 952 * either replace an existing extended attribute, or create a new extended
953 * attribute. The flags XATTR_REPLACE and XATTR_CREATE 953 * attribute. The flags XATTR_REPLACE and XATTR_CREATE
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 6e07696308dc..cf8d28d1fbad 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -251,6 +251,20 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
251 kill_fasync(&fc->fasync, SIGIO, POLL_IN); 251 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
252} 252}
253 253
254void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
255 u64 nodeid, u64 nlookup)
256{
257 forget->forget_one.nodeid = nodeid;
258 forget->forget_one.nlookup = nlookup;
259
260 spin_lock(&fc->lock);
261 fc->forget_list_tail->next = forget;
262 fc->forget_list_tail = forget;
263 wake_up(&fc->waitq);
264 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
265 spin_unlock(&fc->lock);
266}
267
254static void flush_bg_queue(struct fuse_conn *fc) 268static void flush_bg_queue(struct fuse_conn *fc)
255{ 269{
256 while (fc->active_background < fc->max_background && 270 while (fc->active_background < fc->max_background &&
@@ -438,12 +452,6 @@ static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
438 } 452 }
439} 453}
440 454
441void fuse_request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
442{
443 req->isreply = 0;
444 fuse_request_send_nowait(fc, req);
445}
446
447void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req) 455void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
448{ 456{
449 req->isreply = 1; 457 req->isreply = 1;
@@ -896,9 +904,15 @@ static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
896 return err; 904 return err;
897} 905}
898 906
907static int forget_pending(struct fuse_conn *fc)
908{
909 return fc->forget_list_head.next != NULL;
910}
911
899static int request_pending(struct fuse_conn *fc) 912static int request_pending(struct fuse_conn *fc)
900{ 913{
901 return !list_empty(&fc->pending) || !list_empty(&fc->interrupts); 914 return !list_empty(&fc->pending) || !list_empty(&fc->interrupts) ||
915 forget_pending(fc);
902} 916}
903 917
904/* Wait until a request is available on the pending list */ 918/* Wait until a request is available on the pending list */
@@ -960,6 +974,120 @@ __releases(fc->lock)
960 return err ? err : reqsize; 974 return err ? err : reqsize;
961} 975}
962 976
977static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc,
978 unsigned max,
979 unsigned *countp)
980{
981 struct fuse_forget_link *head = fc->forget_list_head.next;
982 struct fuse_forget_link **newhead = &head;
983 unsigned count;
984
985 for (count = 0; *newhead != NULL && count < max; count++)
986 newhead = &(*newhead)->next;
987
988 fc->forget_list_head.next = *newhead;
989 *newhead = NULL;
990 if (fc->forget_list_head.next == NULL)
991 fc->forget_list_tail = &fc->forget_list_head;
992
993 if (countp != NULL)
994 *countp = count;
995
996 return head;
997}
998
999static int fuse_read_single_forget(struct fuse_conn *fc,
1000 struct fuse_copy_state *cs,
1001 size_t nbytes)
1002__releases(fc->lock)
1003{
1004 int err;
1005 struct fuse_forget_link *forget = dequeue_forget(fc, 1, NULL);
1006 struct fuse_forget_in arg = {
1007 .nlookup = forget->forget_one.nlookup,
1008 };
1009 struct fuse_in_header ih = {
1010 .opcode = FUSE_FORGET,
1011 .nodeid = forget->forget_one.nodeid,
1012 .unique = fuse_get_unique(fc),
1013 .len = sizeof(ih) + sizeof(arg),
1014 };
1015
1016 spin_unlock(&fc->lock);
1017 kfree(forget);
1018 if (nbytes < ih.len)
1019 return -EINVAL;
1020
1021 err = fuse_copy_one(cs, &ih, sizeof(ih));
1022 if (!err)
1023 err = fuse_copy_one(cs, &arg, sizeof(arg));
1024 fuse_copy_finish(cs);
1025
1026 if (err)
1027 return err;
1028
1029 return ih.len;
1030}
1031
1032static int fuse_read_batch_forget(struct fuse_conn *fc,
1033 struct fuse_copy_state *cs, size_t nbytes)
1034__releases(fc->lock)
1035{
1036 int err;
1037 unsigned max_forgets;
1038 unsigned count;
1039 struct fuse_forget_link *head;
1040 struct fuse_batch_forget_in arg = { .count = 0 };
1041 struct fuse_in_header ih = {
1042 .opcode = FUSE_BATCH_FORGET,
1043 .unique = fuse_get_unique(fc),
1044 .len = sizeof(ih) + sizeof(arg),
1045 };
1046
1047 if (nbytes < ih.len) {
1048 spin_unlock(&fc->lock);
1049 return -EINVAL;
1050 }
1051
1052 max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
1053 head = dequeue_forget(fc, max_forgets, &count);
1054 spin_unlock(&fc->lock);
1055
1056 arg.count = count;
1057 ih.len += count * sizeof(struct fuse_forget_one);
1058 err = fuse_copy_one(cs, &ih, sizeof(ih));
1059 if (!err)
1060 err = fuse_copy_one(cs, &arg, sizeof(arg));
1061
1062 while (head) {
1063 struct fuse_forget_link *forget = head;
1064
1065 if (!err) {
1066 err = fuse_copy_one(cs, &forget->forget_one,
1067 sizeof(forget->forget_one));
1068 }
1069 head = forget->next;
1070 kfree(forget);
1071 }
1072
1073 fuse_copy_finish(cs);
1074
1075 if (err)
1076 return err;
1077
1078 return ih.len;
1079}
1080
1081static int fuse_read_forget(struct fuse_conn *fc, struct fuse_copy_state *cs,
1082 size_t nbytes)
1083__releases(fc->lock)
1084{
1085 if (fc->minor < 16 || fc->forget_list_head.next->next == NULL)
1086 return fuse_read_single_forget(fc, cs, nbytes);
1087 else
1088 return fuse_read_batch_forget(fc, cs, nbytes);
1089}
1090
963/* 1091/*
964 * Read a single request into the userspace filesystem's buffer. This 1092 * Read a single request into the userspace filesystem's buffer. This
965 * function waits until a request is available, then removes it from 1093 * function waits until a request is available, then removes it from
@@ -998,6 +1126,14 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
998 return fuse_read_interrupt(fc, cs, nbytes, req); 1126 return fuse_read_interrupt(fc, cs, nbytes, req);
999 } 1127 }
1000 1128
1129 if (forget_pending(fc)) {
1130 if (list_empty(&fc->pending) || fc->forget_batch-- > 0)
1131 return fuse_read_forget(fc, cs, nbytes);
1132
1133 if (fc->forget_batch <= -8)
1134 fc->forget_batch = 16;
1135 }
1136
1001 req = list_entry(fc->pending.next, struct fuse_req, list); 1137 req = list_entry(fc->pending.next, struct fuse_req, list);
1002 req->state = FUSE_REQ_READING; 1138 req->state = FUSE_REQ_READING;
1003 list_move(&req->list, &fc->io); 1139 list_move(&req->list, &fc->io);
@@ -1090,7 +1226,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
1090 if (!fc) 1226 if (!fc)
1091 return -EPERM; 1227 return -EPERM;
1092 1228
1093 bufs = kmalloc(pipe->buffers * sizeof (struct pipe_buffer), GFP_KERNEL); 1229 bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
1094 if (!bufs) 1230 if (!bufs)
1095 return -ENOMEM; 1231 return -ENOMEM;
1096 1232
@@ -1626,7 +1762,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
1626 if (!fc) 1762 if (!fc)
1627 return -EPERM; 1763 return -EPERM;
1628 1764
1629 bufs = kmalloc(pipe->buffers * sizeof (struct pipe_buffer), GFP_KERNEL); 1765 bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
1630 if (!bufs) 1766 if (!bufs)
1631 return -ENOMEM; 1767 return -ENOMEM;
1632 1768
@@ -1770,6 +1906,8 @@ __acquires(fc->lock)
1770 flush_bg_queue(fc); 1906 flush_bg_queue(fc);
1771 end_requests(fc, &fc->pending); 1907 end_requests(fc, &fc->pending);
1772 end_requests(fc, &fc->processing); 1908 end_requests(fc, &fc->processing);
1909 while (forget_pending(fc))
1910 kfree(dequeue_forget(fc, 1, NULL));
1773} 1911}
1774 1912
1775/* 1913/*
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index f738599fd8cd..042af7346ec1 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -10,9 +10,9 @@
10 10
11#include <linux/pagemap.h> 11#include <linux/pagemap.h>
12#include <linux/file.h> 12#include <linux/file.h>
13#include <linux/gfp.h>
14#include <linux/sched.h> 13#include <linux/sched.h>
15#include <linux/namei.h> 14#include <linux/namei.h>
15#include <linux/slab.h>
16 16
17#if BITS_PER_LONG >= 64 17#if BITS_PER_LONG >= 64
18static inline void fuse_dentry_settime(struct dentry *entry, u64 time) 18static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
@@ -169,7 +169,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
169 struct fuse_entry_out outarg; 169 struct fuse_entry_out outarg;
170 struct fuse_conn *fc; 170 struct fuse_conn *fc;
171 struct fuse_req *req; 171 struct fuse_req *req;
172 struct fuse_req *forget_req; 172 struct fuse_forget_link *forget;
173 struct dentry *parent; 173 struct dentry *parent;
174 u64 attr_version; 174 u64 attr_version;
175 175
@@ -182,8 +182,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
182 if (IS_ERR(req)) 182 if (IS_ERR(req))
183 return 0; 183 return 0;
184 184
185 forget_req = fuse_get_req(fc); 185 forget = fuse_alloc_forget();
186 if (IS_ERR(forget_req)) { 186 if (!forget) {
187 fuse_put_request(fc, req); 187 fuse_put_request(fc, req);
188 return 0; 188 return 0;
189 } 189 }
@@ -203,15 +203,14 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
203 if (!err) { 203 if (!err) {
204 struct fuse_inode *fi = get_fuse_inode(inode); 204 struct fuse_inode *fi = get_fuse_inode(inode);
205 if (outarg.nodeid != get_node_id(inode)) { 205 if (outarg.nodeid != get_node_id(inode)) {
206 fuse_send_forget(fc, forget_req, 206 fuse_queue_forget(fc, forget, outarg.nodeid, 1);
207 outarg.nodeid, 1);
208 return 0; 207 return 0;
209 } 208 }
210 spin_lock(&fc->lock); 209 spin_lock(&fc->lock);
211 fi->nlookup++; 210 fi->nlookup++;
212 spin_unlock(&fc->lock); 211 spin_unlock(&fc->lock);
213 } 212 }
214 fuse_put_request(fc, forget_req); 213 kfree(forget);
215 if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT) 214 if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
216 return 0; 215 return 0;
217 216
@@ -263,7 +262,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
263{ 262{
264 struct fuse_conn *fc = get_fuse_conn_super(sb); 263 struct fuse_conn *fc = get_fuse_conn_super(sb);
265 struct fuse_req *req; 264 struct fuse_req *req;
266 struct fuse_req *forget_req; 265 struct fuse_forget_link *forget;
267 u64 attr_version; 266 u64 attr_version;
268 int err; 267 int err;
269 268
@@ -277,9 +276,9 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
277 if (IS_ERR(req)) 276 if (IS_ERR(req))
278 goto out; 277 goto out;
279 278
280 forget_req = fuse_get_req(fc); 279 forget = fuse_alloc_forget();
281 err = PTR_ERR(forget_req); 280 err = -ENOMEM;
282 if (IS_ERR(forget_req)) { 281 if (!forget) {
283 fuse_put_request(fc, req); 282 fuse_put_request(fc, req);
284 goto out; 283 goto out;
285 } 284 }
@@ -305,13 +304,13 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
305 attr_version); 304 attr_version);
306 err = -ENOMEM; 305 err = -ENOMEM;
307 if (!*inode) { 306 if (!*inode) {
308 fuse_send_forget(fc, forget_req, outarg->nodeid, 1); 307 fuse_queue_forget(fc, forget, outarg->nodeid, 1);
309 goto out; 308 goto out;
310 } 309 }
311 err = 0; 310 err = 0;
312 311
313 out_put_forget: 312 out_put_forget:
314 fuse_put_request(fc, forget_req); 313 kfree(forget);
315 out: 314 out:
316 return err; 315 return err;
317} 316}
@@ -378,7 +377,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
378 struct inode *inode; 377 struct inode *inode;
379 struct fuse_conn *fc = get_fuse_conn(dir); 378 struct fuse_conn *fc = get_fuse_conn(dir);
380 struct fuse_req *req; 379 struct fuse_req *req;
381 struct fuse_req *forget_req; 380 struct fuse_forget_link *forget;
382 struct fuse_create_in inarg; 381 struct fuse_create_in inarg;
383 struct fuse_open_out outopen; 382 struct fuse_open_out outopen;
384 struct fuse_entry_out outentry; 383 struct fuse_entry_out outentry;
@@ -392,9 +391,9 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
392 if (flags & O_DIRECT) 391 if (flags & O_DIRECT)
393 return -EINVAL; 392 return -EINVAL;
394 393
395 forget_req = fuse_get_req(fc); 394 forget = fuse_alloc_forget();
396 if (IS_ERR(forget_req)) 395 if (!forget)
397 return PTR_ERR(forget_req); 396 return -ENOMEM;
398 397
399 req = fuse_get_req(fc); 398 req = fuse_get_req(fc);
400 err = PTR_ERR(req); 399 err = PTR_ERR(req);
@@ -452,10 +451,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
452 if (!inode) { 451 if (!inode) {
453 flags &= ~(O_CREAT | O_EXCL | O_TRUNC); 452 flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
454 fuse_sync_release(ff, flags); 453 fuse_sync_release(ff, flags);
455 fuse_send_forget(fc, forget_req, outentry.nodeid, 1); 454 fuse_queue_forget(fc, forget, outentry.nodeid, 1);
456 return -ENOMEM; 455 return -ENOMEM;
457 } 456 }
458 fuse_put_request(fc, forget_req); 457 kfree(forget);
459 d_instantiate(entry, inode); 458 d_instantiate(entry, inode);
460 fuse_change_entry_timeout(entry, &outentry); 459 fuse_change_entry_timeout(entry, &outentry);
461 fuse_invalidate_attr(dir); 460 fuse_invalidate_attr(dir);
@@ -473,7 +472,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
473 out_put_request: 472 out_put_request:
474 fuse_put_request(fc, req); 473 fuse_put_request(fc, req);
475 out_put_forget_req: 474 out_put_forget_req:
476 fuse_put_request(fc, forget_req); 475 kfree(forget);
477 return err; 476 return err;
478} 477}
479 478
@@ -487,12 +486,12 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
487 struct fuse_entry_out outarg; 486 struct fuse_entry_out outarg;
488 struct inode *inode; 487 struct inode *inode;
489 int err; 488 int err;
490 struct fuse_req *forget_req; 489 struct fuse_forget_link *forget;
491 490
492 forget_req = fuse_get_req(fc); 491 forget = fuse_alloc_forget();
493 if (IS_ERR(forget_req)) { 492 if (!forget) {
494 fuse_put_request(fc, req); 493 fuse_put_request(fc, req);
495 return PTR_ERR(forget_req); 494 return -ENOMEM;
496 } 495 }
497 496
498 memset(&outarg, 0, sizeof(outarg)); 497 memset(&outarg, 0, sizeof(outarg));
@@ -519,10 +518,10 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
519 inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, 518 inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
520 &outarg.attr, entry_attr_timeout(&outarg), 0); 519 &outarg.attr, entry_attr_timeout(&outarg), 0);
521 if (!inode) { 520 if (!inode) {
522 fuse_send_forget(fc, forget_req, outarg.nodeid, 1); 521 fuse_queue_forget(fc, forget, outarg.nodeid, 1);
523 return -ENOMEM; 522 return -ENOMEM;
524 } 523 }
525 fuse_put_request(fc, forget_req); 524 kfree(forget);
526 525
527 if (S_ISDIR(inode->i_mode)) { 526 if (S_ISDIR(inode->i_mode)) {
528 struct dentry *alias; 527 struct dentry *alias;
@@ -545,7 +544,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
545 return 0; 544 return 0;
546 545
547 out_put_forget_req: 546 out_put_forget_req:
548 fuse_put_request(fc, forget_req); 547 kfree(forget);
549 return err; 548 return err;
550} 549}
551 550
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 8b984a2cebbd..95da1bc1c826 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1634,9 +1634,9 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
1634 * and 64bit. Fortunately we can determine which structure the server 1634 * and 64bit. Fortunately we can determine which structure the server
1635 * used from the size of the reply. 1635 * used from the size of the reply.
1636 */ 1636 */
1637static int fuse_copy_ioctl_iovec(struct iovec *dst, void *src, 1637static int fuse_copy_ioctl_iovec_old(struct iovec *dst, void *src,
1638 size_t transferred, unsigned count, 1638 size_t transferred, unsigned count,
1639 bool is_compat) 1639 bool is_compat)
1640{ 1640{
1641#ifdef CONFIG_COMPAT 1641#ifdef CONFIG_COMPAT
1642 if (count * sizeof(struct compat_iovec) == transferred) { 1642 if (count * sizeof(struct compat_iovec) == transferred) {
@@ -1680,6 +1680,42 @@ static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count)
1680 return 0; 1680 return 0;
1681} 1681}
1682 1682
1683static int fuse_copy_ioctl_iovec(struct fuse_conn *fc, struct iovec *dst,
1684 void *src, size_t transferred, unsigned count,
1685 bool is_compat)
1686{
1687 unsigned i;
1688 struct fuse_ioctl_iovec *fiov = src;
1689
1690 if (fc->minor < 16) {
1691 return fuse_copy_ioctl_iovec_old(dst, src, transferred,
1692 count, is_compat);
1693 }
1694
1695 if (count * sizeof(struct fuse_ioctl_iovec) != transferred)
1696 return -EIO;
1697
1698 for (i = 0; i < count; i++) {
1699 /* Did the server supply an inappropriate value? */
1700 if (fiov[i].base != (unsigned long) fiov[i].base ||
1701 fiov[i].len != (unsigned long) fiov[i].len)
1702 return -EIO;
1703
1704 dst[i].iov_base = (void __user *) (unsigned long) fiov[i].base;
1705 dst[i].iov_len = (size_t) fiov[i].len;
1706
1707#ifdef CONFIG_COMPAT
1708 if (is_compat &&
1709 (ptr_to_compat(dst[i].iov_base) != fiov[i].base ||
1710 (compat_size_t) dst[i].iov_len != fiov[i].len))
1711 return -EIO;
1712#endif
1713 }
1714
1715 return 0;
1716}
1717
1718
1683/* 1719/*
1684 * For ioctls, there is no generic way to determine how much memory 1720 * For ioctls, there is no generic way to determine how much memory
1685 * needs to be read and/or written. Furthermore, ioctls are allowed 1721 * needs to be read and/or written. Furthermore, ioctls are allowed
@@ -1740,18 +1776,25 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
1740 struct fuse_ioctl_out outarg; 1776 struct fuse_ioctl_out outarg;
1741 struct fuse_req *req = NULL; 1777 struct fuse_req *req = NULL;
1742 struct page **pages = NULL; 1778 struct page **pages = NULL;
1743 struct page *iov_page = NULL; 1779 struct iovec *iov_page = NULL;
1744 struct iovec *in_iov = NULL, *out_iov = NULL; 1780 struct iovec *in_iov = NULL, *out_iov = NULL;
1745 unsigned int in_iovs = 0, out_iovs = 0, num_pages = 0, max_pages; 1781 unsigned int in_iovs = 0, out_iovs = 0, num_pages = 0, max_pages;
1746 size_t in_size, out_size, transferred; 1782 size_t in_size, out_size, transferred;
1747 int err; 1783 int err;
1748 1784
1785#if BITS_PER_LONG == 32
1786 inarg.flags |= FUSE_IOCTL_32BIT;
1787#else
1788 if (flags & FUSE_IOCTL_COMPAT)
1789 inarg.flags |= FUSE_IOCTL_32BIT;
1790#endif
1791
1749 /* assume all the iovs returned by client always fits in a page */ 1792 /* assume all the iovs returned by client always fits in a page */
1750 BUILD_BUG_ON(sizeof(struct iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); 1793 BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
1751 1794
1752 err = -ENOMEM; 1795 err = -ENOMEM;
1753 pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL); 1796 pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL);
1754 iov_page = alloc_page(GFP_KERNEL); 1797 iov_page = (struct iovec *) __get_free_page(GFP_KERNEL);
1755 if (!pages || !iov_page) 1798 if (!pages || !iov_page)
1756 goto out; 1799 goto out;
1757 1800
@@ -1760,7 +1803,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
1760 * RETRY from server is not allowed. 1803 * RETRY from server is not allowed.
1761 */ 1804 */
1762 if (!(flags & FUSE_IOCTL_UNRESTRICTED)) { 1805 if (!(flags & FUSE_IOCTL_UNRESTRICTED)) {
1763 struct iovec *iov = page_address(iov_page); 1806 struct iovec *iov = iov_page;
1764 1807
1765 iov->iov_base = (void __user *)arg; 1808 iov->iov_base = (void __user *)arg;
1766 iov->iov_len = _IOC_SIZE(cmd); 1809 iov->iov_len = _IOC_SIZE(cmd);
@@ -1841,7 +1884,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
1841 1884
1842 /* did it ask for retry? */ 1885 /* did it ask for retry? */
1843 if (outarg.flags & FUSE_IOCTL_RETRY) { 1886 if (outarg.flags & FUSE_IOCTL_RETRY) {
1844 char *vaddr; 1887 void *vaddr;
1845 1888
1846 /* no retry if in restricted mode */ 1889 /* no retry if in restricted mode */
1847 err = -EIO; 1890 err = -EIO;
@@ -1862,14 +1905,14 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
1862 goto out; 1905 goto out;
1863 1906
1864 vaddr = kmap_atomic(pages[0], KM_USER0); 1907 vaddr = kmap_atomic(pages[0], KM_USER0);
1865 err = fuse_copy_ioctl_iovec(page_address(iov_page), vaddr, 1908 err = fuse_copy_ioctl_iovec(fc, iov_page, vaddr,
1866 transferred, in_iovs + out_iovs, 1909 transferred, in_iovs + out_iovs,
1867 (flags & FUSE_IOCTL_COMPAT) != 0); 1910 (flags & FUSE_IOCTL_COMPAT) != 0);
1868 kunmap_atomic(vaddr, KM_USER0); 1911 kunmap_atomic(vaddr, KM_USER0);
1869 if (err) 1912 if (err)
1870 goto out; 1913 goto out;
1871 1914
1872 in_iov = page_address(iov_page); 1915 in_iov = iov_page;
1873 out_iov = in_iov + in_iovs; 1916 out_iov = in_iov + in_iovs;
1874 1917
1875 err = fuse_verify_ioctl_iov(in_iov, in_iovs); 1918 err = fuse_verify_ioctl_iov(in_iov, in_iovs);
@@ -1891,8 +1934,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
1891 out: 1934 out:
1892 if (req) 1935 if (req)
1893 fuse_put_request(fc, req); 1936 fuse_put_request(fc, req);
1894 if (iov_page) 1937 free_page((unsigned long) iov_page);
1895 __free_page(iov_page);
1896 while (num_pages) 1938 while (num_pages)
1897 __free_page(pages[--num_pages]); 1939 __free_page(pages[--num_pages]);
1898 kfree(pages); 1940 kfree(pages);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 57d4a3a0f102..ae5744a2f9e9 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -53,6 +53,12 @@ extern struct mutex fuse_mutex;
53extern unsigned max_user_bgreq; 53extern unsigned max_user_bgreq;
54extern unsigned max_user_congthresh; 54extern unsigned max_user_congthresh;
55 55
56/* One forget request */
57struct fuse_forget_link {
58 struct fuse_forget_one forget_one;
59 struct fuse_forget_link *next;
60};
61
56/** FUSE inode */ 62/** FUSE inode */
57struct fuse_inode { 63struct fuse_inode {
58 /** Inode data */ 64 /** Inode data */
@@ -66,7 +72,7 @@ struct fuse_inode {
66 u64 nlookup; 72 u64 nlookup;
67 73
68 /** The request used for sending the FORGET message */ 74 /** The request used for sending the FORGET message */
69 struct fuse_req *forget_req; 75 struct fuse_forget_link *forget;
70 76
71 /** Time in jiffies until the file attributes are valid */ 77 /** Time in jiffies until the file attributes are valid */
72 u64 i_time; 78 u64 i_time;
@@ -255,7 +261,6 @@ struct fuse_req {
255 261
256 /** Data for asynchronous requests */ 262 /** Data for asynchronous requests */
257 union { 263 union {
258 struct fuse_forget_in forget_in;
259 struct { 264 struct {
260 struct fuse_release_in in; 265 struct fuse_release_in in;
261 struct path path; 266 struct path path;
@@ -369,6 +374,13 @@ struct fuse_conn {
369 /** Pending interrupts */ 374 /** Pending interrupts */
370 struct list_head interrupts; 375 struct list_head interrupts;
371 376
377 /** Queue of pending forgets */
378 struct fuse_forget_link forget_list_head;
379 struct fuse_forget_link *forget_list_tail;
380
381 /** Batching of FORGET requests (positive indicates FORGET batch) */
382 int forget_batch;
383
372 /** Flag indicating if connection is blocked. This will be 384 /** Flag indicating if connection is blocked. This will be
373 the case before the INIT reply is received, and if there 385 the case before the INIT reply is received, and if there
374 are too many outstading backgrounds requests */ 386 are too many outstading backgrounds requests */
@@ -543,8 +555,10 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
543/** 555/**
544 * Send FORGET command 556 * Send FORGET command
545 */ 557 */
546void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, 558void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
547 u64 nodeid, u64 nlookup); 559 u64 nodeid, u64 nlookup);
560
561struct fuse_forget_link *fuse_alloc_forget(void);
548 562
549/** 563/**
550 * Initialize READ or READDIR request 564 * Initialize READ or READDIR request
@@ -656,11 +670,6 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
656void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req); 670void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req);
657 671
658/** 672/**
659 * Send a request with no reply
660 */
661void fuse_request_send_noreply(struct fuse_conn *fc, struct fuse_req *req);
662
663/**
664 * Send a request in the background 673 * Send a request in the background
665 */ 674 */
666void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req); 675void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index a8b31da19b93..f62b32cffea9 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -71,6 +71,11 @@ struct fuse_mount_data {
71 unsigned blksize; 71 unsigned blksize;
72}; 72};
73 73
74struct fuse_forget_link *fuse_alloc_forget()
75{
76 return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL);
77}
78
74static struct inode *fuse_alloc_inode(struct super_block *sb) 79static struct inode *fuse_alloc_inode(struct super_block *sb)
75{ 80{
76 struct inode *inode; 81 struct inode *inode;
@@ -90,8 +95,8 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
90 INIT_LIST_HEAD(&fi->queued_writes); 95 INIT_LIST_HEAD(&fi->queued_writes);
91 INIT_LIST_HEAD(&fi->writepages); 96 INIT_LIST_HEAD(&fi->writepages);
92 init_waitqueue_head(&fi->page_waitq); 97 init_waitqueue_head(&fi->page_waitq);
93 fi->forget_req = fuse_request_alloc(); 98 fi->forget = fuse_alloc_forget();
94 if (!fi->forget_req) { 99 if (!fi->forget) {
95 kmem_cache_free(fuse_inode_cachep, inode); 100 kmem_cache_free(fuse_inode_cachep, inode);
96 return NULL; 101 return NULL;
97 } 102 }
@@ -111,24 +116,10 @@ static void fuse_destroy_inode(struct inode *inode)
111 struct fuse_inode *fi = get_fuse_inode(inode); 116 struct fuse_inode *fi = get_fuse_inode(inode);
112 BUG_ON(!list_empty(&fi->write_files)); 117 BUG_ON(!list_empty(&fi->write_files));
113 BUG_ON(!list_empty(&fi->queued_writes)); 118 BUG_ON(!list_empty(&fi->queued_writes));
114 if (fi->forget_req) 119 kfree(fi->forget);
115 fuse_request_free(fi->forget_req);
116 call_rcu(&inode->i_rcu, fuse_i_callback); 120 call_rcu(&inode->i_rcu, fuse_i_callback);
117} 121}
118 122
119void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
120 u64 nodeid, u64 nlookup)
121{
122 struct fuse_forget_in *inarg = &req->misc.forget_in;
123 inarg->nlookup = nlookup;
124 req->in.h.opcode = FUSE_FORGET;
125 req->in.h.nodeid = nodeid;
126 req->in.numargs = 1;
127 req->in.args[0].size = sizeof(struct fuse_forget_in);
128 req->in.args[0].value = inarg;
129 fuse_request_send_noreply(fc, req);
130}
131
132static void fuse_evict_inode(struct inode *inode) 123static void fuse_evict_inode(struct inode *inode)
133{ 124{
134 truncate_inode_pages(&inode->i_data, 0); 125 truncate_inode_pages(&inode->i_data, 0);
@@ -136,8 +127,8 @@ static void fuse_evict_inode(struct inode *inode)
136 if (inode->i_sb->s_flags & MS_ACTIVE) { 127 if (inode->i_sb->s_flags & MS_ACTIVE) {
137 struct fuse_conn *fc = get_fuse_conn(inode); 128 struct fuse_conn *fc = get_fuse_conn(inode);
138 struct fuse_inode *fi = get_fuse_inode(inode); 129 struct fuse_inode *fi = get_fuse_inode(inode);
139 fuse_send_forget(fc, fi->forget_req, fi->nodeid, fi->nlookup); 130 fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup);
140 fi->forget_req = NULL; 131 fi->forget = NULL;
141 } 132 }
142} 133}
143 134
@@ -541,6 +532,7 @@ void fuse_conn_init(struct fuse_conn *fc)
541 INIT_LIST_HEAD(&fc->interrupts); 532 INIT_LIST_HEAD(&fc->interrupts);
542 INIT_LIST_HEAD(&fc->bg_queue); 533 INIT_LIST_HEAD(&fc->bg_queue);
543 INIT_LIST_HEAD(&fc->entry); 534 INIT_LIST_HEAD(&fc->entry);
535 fc->forget_list_tail = &fc->forget_list_head;
544 atomic_set(&fc->num_waiting, 0); 536 atomic_set(&fc->num_waiting, 0);
545 fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND; 537 fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;
546 fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD; 538 fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 8d3d2b4a0a7d..a79790c06275 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -11,6 +11,7 @@
11#define __INCORE_DOT_H__ 11#define __INCORE_DOT_H__
12 12
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/kobject.h>
14#include <linux/workqueue.h> 15#include <linux/workqueue.h>
15#include <linux/dlm.h> 16#include <linux/dlm.h>
16#include <linux/buffer_head.h> 17#include <linux/buffer_head.h>
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index f837ba953529..9e4686900f18 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -43,6 +43,7 @@
43#include <linux/vmalloc.h> 43#include <linux/vmalloc.h>
44#include <linux/backing-dev.h> 44#include <linux/backing-dev.h>
45#include <linux/bitops.h> 45#include <linux/bitops.h>
46#include <linux/ratelimit.h>
46 47
47#define CREATE_TRACE_POINTS 48#define CREATE_TRACE_POINTS
48#include <trace/events/jbd2.h> 49#include <trace/events/jbd2.h>
@@ -93,6 +94,7 @@ EXPORT_SYMBOL(jbd2_journal_file_inode);
93EXPORT_SYMBOL(jbd2_journal_init_jbd_inode); 94EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
94EXPORT_SYMBOL(jbd2_journal_release_jbd_inode); 95EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
95EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); 96EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
97EXPORT_SYMBOL(jbd2_inode_cache);
96 98
97static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); 99static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
98static void __journal_abort_soft (journal_t *journal, int errno); 100static void __journal_abort_soft (journal_t *journal, int errno);
@@ -827,7 +829,7 @@ static journal_t * journal_init_common (void)
827 829
828 journal = kzalloc(sizeof(*journal), GFP_KERNEL); 830 journal = kzalloc(sizeof(*journal), GFP_KERNEL);
829 if (!journal) 831 if (!journal)
830 goto fail; 832 return NULL;
831 833
832 init_waitqueue_head(&journal->j_wait_transaction_locked); 834 init_waitqueue_head(&journal->j_wait_transaction_locked);
833 init_waitqueue_head(&journal->j_wait_logspace); 835 init_waitqueue_head(&journal->j_wait_logspace);
@@ -852,14 +854,12 @@ static journal_t * journal_init_common (void)
852 err = jbd2_journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH); 854 err = jbd2_journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH);
853 if (err) { 855 if (err) {
854 kfree(journal); 856 kfree(journal);
855 goto fail; 857 return NULL;
856 } 858 }
857 859
858 spin_lock_init(&journal->j_history_lock); 860 spin_lock_init(&journal->j_history_lock);
859 861
860 return journal; 862 return journal;
861fail:
862 return NULL;
863} 863}
864 864
865/* jbd2_journal_init_dev and jbd2_journal_init_inode: 865/* jbd2_journal_init_dev and jbd2_journal_init_inode:
@@ -1982,7 +1982,6 @@ static void jbd2_journal_destroy_jbd2_journal_head_cache(void)
1982static struct journal_head *journal_alloc_journal_head(void) 1982static struct journal_head *journal_alloc_journal_head(void)
1983{ 1983{
1984 struct journal_head *ret; 1984 struct journal_head *ret;
1985 static unsigned long last_warning;
1986 1985
1987#ifdef CONFIG_JBD2_DEBUG 1986#ifdef CONFIG_JBD2_DEBUG
1988 atomic_inc(&nr_journal_heads); 1987 atomic_inc(&nr_journal_heads);
@@ -1990,11 +1989,7 @@ static struct journal_head *journal_alloc_journal_head(void)
1990 ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS); 1989 ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS);
1991 if (!ret) { 1990 if (!ret) {
1992 jbd_debug(1, "out of memory for journal_head\n"); 1991 jbd_debug(1, "out of memory for journal_head\n");
1993 if (time_after(jiffies, last_warning + 5*HZ)) { 1992 pr_notice_ratelimited("ENOMEM in %s, retrying.\n", __func__);
1994 printk(KERN_NOTICE "ENOMEM in %s, retrying.\n",
1995 __func__);
1996 last_warning = jiffies;
1997 }
1998 while (!ret) { 1993 while (!ret) {
1999 yield(); 1994 yield();
2000 ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS); 1995 ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS);
@@ -2292,17 +2287,19 @@ static void __exit jbd2_remove_jbd_stats_proc_entry(void)
2292 2287
2293#endif 2288#endif
2294 2289
2295struct kmem_cache *jbd2_handle_cache; 2290struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache;
2296 2291
2297static int __init journal_init_handle_cache(void) 2292static int __init journal_init_handle_cache(void)
2298{ 2293{
2299 jbd2_handle_cache = kmem_cache_create("jbd2_journal_handle", 2294 jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY);
2300 sizeof(handle_t),
2301 0, /* offset */
2302 SLAB_TEMPORARY, /* flags */
2303 NULL); /* ctor */
2304 if (jbd2_handle_cache == NULL) { 2295 if (jbd2_handle_cache == NULL) {
2305 printk(KERN_EMERG "JBD: failed to create handle cache\n"); 2296 printk(KERN_EMERG "JBD2: failed to create handle cache\n");
2297 return -ENOMEM;
2298 }
2299 jbd2_inode_cache = KMEM_CACHE(jbd2_inode, 0);
2300 if (jbd2_inode_cache == NULL) {
2301 printk(KERN_EMERG "JBD2: failed to create inode cache\n");
2302 kmem_cache_destroy(jbd2_handle_cache);
2306 return -ENOMEM; 2303 return -ENOMEM;
2307 } 2304 }
2308 return 0; 2305 return 0;
@@ -2312,6 +2309,9 @@ static void jbd2_journal_destroy_handle_cache(void)
2312{ 2309{
2313 if (jbd2_handle_cache) 2310 if (jbd2_handle_cache)
2314 kmem_cache_destroy(jbd2_handle_cache); 2311 kmem_cache_destroy(jbd2_handle_cache);
2312 if (jbd2_inode_cache)
2313 kmem_cache_destroy(jbd2_inode_cache);
2314
2315} 2315}
2316 2316
2317/* 2317/*
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 2bc4d5f116f1..1cad869494f0 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -299,10 +299,10 @@ int jbd2_journal_skip_recovery(journal_t *journal)
299#ifdef CONFIG_JBD2_DEBUG 299#ifdef CONFIG_JBD2_DEBUG
300 int dropped = info.end_transaction - 300 int dropped = info.end_transaction -
301 be32_to_cpu(journal->j_superblock->s_sequence); 301 be32_to_cpu(journal->j_superblock->s_sequence);
302#endif
303 jbd_debug(1, 302 jbd_debug(1,
304 "JBD: ignoring %d transaction%s from the journal.\n", 303 "JBD: ignoring %d transaction%s from the journal.\n",
305 dropped, (dropped == 1) ? "" : "s"); 304 dropped, (dropped == 1) ? "" : "s");
305#endif
306 journal->j_transaction_sequence = ++info.end_transaction; 306 journal->j_transaction_sequence = ++info.end_transaction;
307 } 307 }
308 308
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 6bf0a242613e..394893242ae3 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -340,9 +340,7 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask)
340 jbd2_free_handle(handle); 340 jbd2_free_handle(handle);
341 current->journal_info = NULL; 341 current->journal_info = NULL;
342 handle = ERR_PTR(err); 342 handle = ERR_PTR(err);
343 goto out;
344 } 343 }
345out:
346 return handle; 344 return handle;
347} 345}
348EXPORT_SYMBOL(jbd2__journal_start); 346EXPORT_SYMBOL(jbd2__journal_start);
@@ -589,7 +587,7 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
589 transaction = handle->h_transaction; 587 transaction = handle->h_transaction;
590 journal = transaction->t_journal; 588 journal = transaction->t_journal;
591 589
592 jbd_debug(5, "buffer_head %p, force_copy %d\n", jh, force_copy); 590 jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy);
593 591
594 JBUFFER_TRACE(jh, "entry"); 592 JBUFFER_TRACE(jh, "entry");
595repeat: 593repeat:
@@ -774,7 +772,7 @@ done:
774 J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)), 772 J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)),
775 "Possible IO failure.\n"); 773 "Possible IO failure.\n");
776 page = jh2bh(jh)->b_page; 774 page = jh2bh(jh)->b_page;
777 offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK; 775 offset = offset_in_page(jh2bh(jh)->b_data);
778 source = kmap_atomic(page, KM_USER0); 776 source = kmap_atomic(page, KM_USER0);
779 /* Fire data frozen trigger just before we copy the data */ 777 /* Fire data frozen trigger just before we copy the data */
780 jbd2_buffer_frozen_trigger(jh, source + offset, 778 jbd2_buffer_frozen_trigger(jh, source + offset,
diff --git a/fs/lockd/Makefile b/fs/lockd/Makefile
index 97f6073ab339..ca58d64374ca 100644
--- a/fs/lockd/Makefile
+++ b/fs/lockd/Makefile
@@ -4,7 +4,7 @@
4 4
5obj-$(CONFIG_LOCKD) += lockd.o 5obj-$(CONFIG_LOCKD) += lockd.o
6 6
7lockd-objs-y := clntlock.o clntproc.o host.o svc.o svclock.o svcshare.o \ 7lockd-objs-y := clntlock.o clntproc.o clntxdr.o host.o svc.o svclock.o \
8 svcproc.o svcsubs.o mon.o xdr.o grace.o 8 svcshare.o svcproc.o svcsubs.o mon.o xdr.o grace.o
9lockd-objs-$(CONFIG_LOCKD_V4) += xdr4.o svc4proc.o 9lockd-objs-$(CONFIG_LOCKD_V4) += clnt4xdr.o xdr4.o svc4proc.o
10lockd-objs := $(lockd-objs-y) 10lockd-objs := $(lockd-objs-y)
diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
new file mode 100644
index 000000000000..f848b52c67b1
--- /dev/null
+++ b/fs/lockd/clnt4xdr.c
@@ -0,0 +1,605 @@
1/*
2 * linux/fs/lockd/clnt4xdr.c
3 *
4 * XDR functions to encode/decode NLM version 4 RPC arguments and results.
5 *
6 * NLM client-side only.
7 *
8 * Copyright (C) 2010, Oracle. All rights reserved.
9 */
10
11#include <linux/types.h>
12#include <linux/sunrpc/xdr.h>
13#include <linux/sunrpc/clnt.h>
14#include <linux/sunrpc/stats.h>
15#include <linux/lockd/lockd.h>
16
17#define NLMDBG_FACILITY NLMDBG_XDR
18
19#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
20# error "NLM host name cannot be larger than XDR_MAX_NETOBJ!"
21#endif
22
23#if (NLMCLNT_OHSIZE > NLM_MAXSTRLEN)
24# error "NLM host name cannot be larger than NLM's maximum string length!"
25#endif
26
27/*
28 * Declare the space requirements for NLM arguments and replies as
29 * number of 32bit-words
30 */
31#define NLM4_void_sz (0)
32#define NLM4_cookie_sz (1+(NLM_MAXCOOKIELEN>>2))
33#define NLM4_caller_sz (1+(NLMCLNT_OHSIZE>>2))
34#define NLM4_owner_sz (1+(NLMCLNT_OHSIZE>>2))
35#define NLM4_fhandle_sz (1+(NFS3_FHSIZE>>2))
36#define NLM4_lock_sz (5+NLM4_caller_sz+NLM4_owner_sz+NLM4_fhandle_sz)
37#define NLM4_holder_sz (6+NLM4_owner_sz)
38
39#define NLM4_testargs_sz (NLM4_cookie_sz+1+NLM4_lock_sz)
40#define NLM4_lockargs_sz (NLM4_cookie_sz+4+NLM4_lock_sz)
41#define NLM4_cancargs_sz (NLM4_cookie_sz+2+NLM4_lock_sz)
42#define NLM4_unlockargs_sz (NLM4_cookie_sz+NLM4_lock_sz)
43
44#define NLM4_testres_sz (NLM4_cookie_sz+1+NLM4_holder_sz)
45#define NLM4_res_sz (NLM4_cookie_sz+1)
46#define NLM4_norep_sz (0)
47
48
49static s64 loff_t_to_s64(loff_t offset)
50{
51 s64 res;
52
53 if (offset >= NLM4_OFFSET_MAX)
54 res = NLM4_OFFSET_MAX;
55 else if (offset <= -NLM4_OFFSET_MAX)
56 res = -NLM4_OFFSET_MAX;
57 else
58 res = offset;
59 return res;
60}
61
62static void nlm4_compute_offsets(const struct nlm_lock *lock,
63 u64 *l_offset, u64 *l_len)
64{
65 const struct file_lock *fl = &lock->fl;
66
67 BUG_ON(fl->fl_start > NLM4_OFFSET_MAX);
68 BUG_ON(fl->fl_end > NLM4_OFFSET_MAX &&
69 fl->fl_end != OFFSET_MAX);
70
71 *l_offset = loff_t_to_s64(fl->fl_start);
72 if (fl->fl_end == OFFSET_MAX)
73 *l_len = 0;
74 else
75 *l_len = loff_t_to_s64(fl->fl_end - fl->fl_start + 1);
76}
77
78/*
79 * Handle decode buffer overflows out-of-line.
80 */
81static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
82{
83 dprintk("lockd: %s prematurely hit the end of our receive buffer. "
84 "Remaining buffer length is %tu words.\n",
85 func, xdr->end - xdr->p);
86}
87
88
89/*
90 * Encode/decode NLMv4 basic data types
91 *
92 * Basic NLMv4 data types are defined in Appendix II, section 6.1.4
93 * of RFC 1813: "NFS Version 3 Protocol Specification" and in Chapter
94 * 10 of X/Open's "Protocols for Interworking: XNFS, Version 3W".
95 *
96 * Not all basic data types have their own encoding and decoding
97 * functions. For run-time efficiency, some data types are encoded
98 * or decoded inline.
99 */
100
101static void encode_bool(struct xdr_stream *xdr, const int value)
102{
103 __be32 *p;
104
105 p = xdr_reserve_space(xdr, 4);
106 *p = value ? xdr_one : xdr_zero;
107}
108
109static void encode_int32(struct xdr_stream *xdr, const s32 value)
110{
111 __be32 *p;
112
113 p = xdr_reserve_space(xdr, 4);
114 *p = cpu_to_be32(value);
115}
116
117/*
118 * typedef opaque netobj<MAXNETOBJ_SZ>
119 */
120static void encode_netobj(struct xdr_stream *xdr,
121 const u8 *data, const unsigned int length)
122{
123 __be32 *p;
124
125 BUG_ON(length > XDR_MAX_NETOBJ);
126 p = xdr_reserve_space(xdr, 4 + length);
127 xdr_encode_opaque(p, data, length);
128}
129
130static int decode_netobj(struct xdr_stream *xdr,
131 struct xdr_netobj *obj)
132{
133 u32 length;
134 __be32 *p;
135
136 p = xdr_inline_decode(xdr, 4);
137 if (unlikely(p == NULL))
138 goto out_overflow;
139 length = be32_to_cpup(p++);
140 if (unlikely(length > XDR_MAX_NETOBJ))
141 goto out_size;
142 obj->len = length;
143 obj->data = (u8 *)p;
144 return 0;
145out_size:
146 dprintk("NFS: returned netobj was too long: %u\n", length);
147 return -EIO;
148out_overflow:
149 print_overflow_msg(__func__, xdr);
150 return -EIO;
151}
152
153/*
154 * netobj cookie;
155 */
156static void encode_cookie(struct xdr_stream *xdr,
157 const struct nlm_cookie *cookie)
158{
159 BUG_ON(cookie->len > NLM_MAXCOOKIELEN);
160 encode_netobj(xdr, (u8 *)&cookie->data, cookie->len);
161}
162
163static int decode_cookie(struct xdr_stream *xdr,
164 struct nlm_cookie *cookie)
165{
166 u32 length;
167 __be32 *p;
168
169 p = xdr_inline_decode(xdr, 4);
170 if (unlikely(p == NULL))
171 goto out_overflow;
172 length = be32_to_cpup(p++);
173 /* apparently HPUX can return empty cookies */
174 if (length == 0)
175 goto out_hpux;
176 if (length > NLM_MAXCOOKIELEN)
177 goto out_size;
178 p = xdr_inline_decode(xdr, length);
179 if (unlikely(p == NULL))
180 goto out_overflow;
181 cookie->len = length;
182 memcpy(cookie->data, p, length);
183 return 0;
184out_hpux:
185 cookie->len = 4;
186 memset(cookie->data, 0, 4);
187 return 0;
188out_size:
189 dprintk("NFS: returned cookie was too long: %u\n", length);
190 return -EIO;
191out_overflow:
192 print_overflow_msg(__func__, xdr);
193 return -EIO;
194}
195
196/*
197 * netobj fh;
198 */
199static void encode_fh(struct xdr_stream *xdr, const struct nfs_fh *fh)
200{
201 BUG_ON(fh->size > NFS3_FHSIZE);
202 encode_netobj(xdr, (u8 *)&fh->data, fh->size);
203}
204
205/*
206 * enum nlm4_stats {
207 * NLM4_GRANTED = 0,
208 * NLM4_DENIED = 1,
209 * NLM4_DENIED_NOLOCKS = 2,
210 * NLM4_BLOCKED = 3,
211 * NLM4_DENIED_GRACE_PERIOD = 4,
212 * NLM4_DEADLCK = 5,
213 * NLM4_ROFS = 6,
214 * NLM4_STALE_FH = 7,
215 * NLM4_FBIG = 8,
216 * NLM4_FAILED = 9
217 * };
218 *
219 * struct nlm4_stat {
220 * nlm4_stats stat;
221 * };
222 *
223 * NB: we don't swap bytes for the NLM status values. The upper
224 * layers deal directly with the status value in network byte
225 * order.
226 */
227static void encode_nlm4_stat(struct xdr_stream *xdr,
228 const __be32 stat)
229{
230 __be32 *p;
231
232 BUG_ON(be32_to_cpu(stat) > NLM_FAILED);
233 p = xdr_reserve_space(xdr, 4);
234 *p = stat;
235}
236
237static int decode_nlm4_stat(struct xdr_stream *xdr, __be32 *stat)
238{
239 __be32 *p;
240
241 p = xdr_inline_decode(xdr, 4);
242 if (unlikely(p == NULL))
243 goto out_overflow;
244 if (unlikely(*p > nlm4_failed))
245 goto out_bad_xdr;
246 *stat = *p;
247 return 0;
248out_bad_xdr:
249 dprintk("%s: server returned invalid nlm4_stats value: %u\n",
250 __func__, be32_to_cpup(p));
251 return -EIO;
252out_overflow:
253 print_overflow_msg(__func__, xdr);
254 return -EIO;
255}
256
257/*
258 * struct nlm4_holder {
259 * bool exclusive;
260 * int32 svid;
261 * netobj oh;
262 * uint64 l_offset;
263 * uint64 l_len;
264 * };
265 */
266static void encode_nlm4_holder(struct xdr_stream *xdr,
267 const struct nlm_res *result)
268{
269 const struct nlm_lock *lock = &result->lock;
270 u64 l_offset, l_len;
271 __be32 *p;
272
273 encode_bool(xdr, lock->fl.fl_type == F_RDLCK);
274 encode_int32(xdr, lock->svid);
275 encode_netobj(xdr, lock->oh.data, lock->oh.len);
276
277 p = xdr_reserve_space(xdr, 4 + 4);
278 nlm4_compute_offsets(lock, &l_offset, &l_len);
279 p = xdr_encode_hyper(p, l_offset);
280 xdr_encode_hyper(p, l_len);
281}
282
283static int decode_nlm4_holder(struct xdr_stream *xdr, struct nlm_res *result)
284{
285 struct nlm_lock *lock = &result->lock;
286 struct file_lock *fl = &lock->fl;
287 u64 l_offset, l_len;
288 u32 exclusive;
289 int error;
290 __be32 *p;
291 s32 end;
292
293 memset(lock, 0, sizeof(*lock));
294 locks_init_lock(fl);
295
296 p = xdr_inline_decode(xdr, 4 + 4);
297 if (unlikely(p == NULL))
298 goto out_overflow;
299 exclusive = be32_to_cpup(p++);
300 lock->svid = be32_to_cpup(p);
301 fl->fl_pid = (pid_t)lock->svid;
302
303 error = decode_netobj(xdr, &lock->oh);
304 if (unlikely(error))
305 goto out;
306
307 p = xdr_inline_decode(xdr, 8 + 8);
308 if (unlikely(p == NULL))
309 goto out_overflow;
310
311 fl->fl_flags = FL_POSIX;
312 fl->fl_type = exclusive != 0 ? F_WRLCK : F_RDLCK;
313 p = xdr_decode_hyper(p, &l_offset);
314 xdr_decode_hyper(p, &l_len);
315 end = l_offset + l_len - 1;
316
317 fl->fl_start = (loff_t)l_offset;
318 if (l_len == 0 || end < 0)
319 fl->fl_end = OFFSET_MAX;
320 else
321 fl->fl_end = (loff_t)end;
322 error = 0;
323out:
324 return error;
325out_overflow:
326 print_overflow_msg(__func__, xdr);
327 return -EIO;
328}
329
330/*
331 * string caller_name<LM_MAXSTRLEN>;
332 */
333static void encode_caller_name(struct xdr_stream *xdr, const char *name)
334{
335 /* NB: client-side does not set lock->len */
336 u32 length = strlen(name);
337 __be32 *p;
338
339 BUG_ON(length > NLM_MAXSTRLEN);
340 p = xdr_reserve_space(xdr, 4 + length);
341 xdr_encode_opaque(p, name, length);
342}
343
344/*
345 * struct nlm4_lock {
346 * string caller_name<LM_MAXSTRLEN>;
347 * netobj fh;
348 * netobj oh;
349 * int32 svid;
350 * uint64 l_offset;
351 * uint64 l_len;
352 * };
353 */
354static void encode_nlm4_lock(struct xdr_stream *xdr,
355 const struct nlm_lock *lock)
356{
357 u64 l_offset, l_len;
358 __be32 *p;
359
360 encode_caller_name(xdr, lock->caller);
361 encode_fh(xdr, &lock->fh);
362 encode_netobj(xdr, lock->oh.data, lock->oh.len);
363
364 p = xdr_reserve_space(xdr, 4 + 8 + 8);
365 *p++ = cpu_to_be32(lock->svid);
366
367 nlm4_compute_offsets(lock, &l_offset, &l_len);
368 p = xdr_encode_hyper(p, l_offset);
369 xdr_encode_hyper(p, l_len);
370}
371
372
373/*
374 * NLMv4 XDR encode functions
375 *
376 * NLMv4 argument types are defined in Appendix II of RFC 1813:
377 * "NFS Version 3 Protocol Specification" and Chapter 10 of X/Open's
378 * "Protocols for Interworking: XNFS, Version 3W".
379 */
380
381/*
382 * struct nlm4_testargs {
383 * netobj cookie;
384 * bool exclusive;
385 * struct nlm4_lock alock;
386 * };
387 */
388static void nlm4_xdr_enc_testargs(struct rpc_rqst *req,
389 struct xdr_stream *xdr,
390 const struct nlm_args *args)
391{
392 const struct nlm_lock *lock = &args->lock;
393
394 encode_cookie(xdr, &args->cookie);
395 encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
396 encode_nlm4_lock(xdr, lock);
397}
398
399/*
400 * struct nlm4_lockargs {
401 * netobj cookie;
402 * bool block;
403 * bool exclusive;
404 * struct nlm4_lock alock;
405 * bool reclaim;
406 * int state;
407 * };
408 */
409static void nlm4_xdr_enc_lockargs(struct rpc_rqst *req,
410 struct xdr_stream *xdr,
411 const struct nlm_args *args)
412{
413 const struct nlm_lock *lock = &args->lock;
414
415 encode_cookie(xdr, &args->cookie);
416 encode_bool(xdr, args->block);
417 encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
418 encode_nlm4_lock(xdr, lock);
419 encode_bool(xdr, args->reclaim);
420 encode_int32(xdr, args->state);
421}
422
423/*
424 * struct nlm4_cancargs {
425 * netobj cookie;
426 * bool block;
427 * bool exclusive;
428 * struct nlm4_lock alock;
429 * };
430 */
431static void nlm4_xdr_enc_cancargs(struct rpc_rqst *req,
432 struct xdr_stream *xdr,
433 const struct nlm_args *args)
434{
435 const struct nlm_lock *lock = &args->lock;
436
437 encode_cookie(xdr, &args->cookie);
438 encode_bool(xdr, args->block);
439 encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
440 encode_nlm4_lock(xdr, lock);
441}
442
443/*
444 * struct nlm4_unlockargs {
445 * netobj cookie;
446 * struct nlm4_lock alock;
447 * };
448 */
449static void nlm4_xdr_enc_unlockargs(struct rpc_rqst *req,
450 struct xdr_stream *xdr,
451 const struct nlm_args *args)
452{
453 const struct nlm_lock *lock = &args->lock;
454
455 encode_cookie(xdr, &args->cookie);
456 encode_nlm4_lock(xdr, lock);
457}
458
459/*
460 * struct nlm4_res {
461 * netobj cookie;
462 * nlm4_stat stat;
463 * };
464 */
465static void nlm4_xdr_enc_res(struct rpc_rqst *req,
466 struct xdr_stream *xdr,
467 const struct nlm_res *result)
468{
469 encode_cookie(xdr, &result->cookie);
470 encode_nlm4_stat(xdr, result->status);
471}
472
473/*
474 * union nlm4_testrply switch (nlm4_stats stat) {
475 * case NLM4_DENIED:
476 * struct nlm4_holder holder;
477 * default:
478 * void;
479 * };
480 *
481 * struct nlm4_testres {
482 * netobj cookie;
483 * nlm4_testrply test_stat;
484 * };
485 */
486static void nlm4_xdr_enc_testres(struct rpc_rqst *req,
487 struct xdr_stream *xdr,
488 const struct nlm_res *result)
489{
490 encode_cookie(xdr, &result->cookie);
491 encode_nlm4_stat(xdr, result->status);
492 if (result->status == nlm_lck_denied)
493 encode_nlm4_holder(xdr, result);
494}
495
496
497/*
498 * NLMv4 XDR decode functions
499 *
500 * NLMv4 argument types are defined in Appendix II of RFC 1813:
501 * "NFS Version 3 Protocol Specification" and Chapter 10 of X/Open's
502 * "Protocols for Interworking: XNFS, Version 3W".
503 */
504
505/*
506 * union nlm4_testrply switch (nlm4_stats stat) {
507 * case NLM4_DENIED:
508 * struct nlm4_holder holder;
509 * default:
510 * void;
511 * };
512 *
513 * struct nlm4_testres {
514 * netobj cookie;
515 * nlm4_testrply test_stat;
516 * };
517 */
518static int decode_nlm4_testrply(struct xdr_stream *xdr,
519 struct nlm_res *result)
520{
521 int error;
522
523 error = decode_nlm4_stat(xdr, &result->status);
524 if (unlikely(error))
525 goto out;
526 if (result->status == nlm_lck_denied)
527 error = decode_nlm4_holder(xdr, result);
528out:
529 return error;
530}
531
532static int nlm4_xdr_dec_testres(struct rpc_rqst *req,
533 struct xdr_stream *xdr,
534 struct nlm_res *result)
535{
536 int error;
537
538 error = decode_cookie(xdr, &result->cookie);
539 if (unlikely(error))
540 goto out;
541 error = decode_nlm4_testrply(xdr, result);
542out:
543 return error;
544}
545
546/*
547 * struct nlm4_res {
548 * netobj cookie;
549 * nlm4_stat stat;
550 * };
551 */
552static int nlm4_xdr_dec_res(struct rpc_rqst *req,
553 struct xdr_stream *xdr,
554 struct nlm_res *result)
555{
556 int error;
557
558 error = decode_cookie(xdr, &result->cookie);
559 if (unlikely(error))
560 goto out;
561 error = decode_nlm4_stat(xdr, &result->status);
562out:
563 return error;
564}
565
566
567/*
568 * For NLM, a void procedure really returns nothing
569 */
570#define nlm4_xdr_dec_norep NULL
571
572#define PROC(proc, argtype, restype) \
573[NLMPROC_##proc] = { \
574 .p_proc = NLMPROC_##proc, \
575 .p_encode = (kxdreproc_t)nlm4_xdr_enc_##argtype, \
576 .p_decode = (kxdrdproc_t)nlm4_xdr_dec_##restype, \
577 .p_arglen = NLM4_##argtype##_sz, \
578 .p_replen = NLM4_##restype##_sz, \
579 .p_statidx = NLMPROC_##proc, \
580 .p_name = #proc, \
581 }
582
583static struct rpc_procinfo nlm4_procedures[] = {
584 PROC(TEST, testargs, testres),
585 PROC(LOCK, lockargs, res),
586 PROC(CANCEL, cancargs, res),
587 PROC(UNLOCK, unlockargs, res),
588 PROC(GRANTED, testargs, res),
589 PROC(TEST_MSG, testargs, norep),
590 PROC(LOCK_MSG, lockargs, norep),
591 PROC(CANCEL_MSG, cancargs, norep),
592 PROC(UNLOCK_MSG, unlockargs, norep),
593 PROC(GRANTED_MSG, testargs, norep),
594 PROC(TEST_RES, testres, norep),
595 PROC(LOCK_RES, res, norep),
596 PROC(CANCEL_RES, res, norep),
597 PROC(UNLOCK_RES, res, norep),
598 PROC(GRANTED_RES, res, norep),
599};
600
601struct rpc_version nlm_version4 = {
602 .number = 4,
603 .nrprocs = ARRAY_SIZE(nlm4_procedures),
604 .procs = nlm4_procedures,
605};
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 25509eb28fd7..8d4ea8351e3d 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -79,7 +79,7 @@ EXPORT_SYMBOL_GPL(nlmclnt_init);
79 */ 79 */
80void nlmclnt_done(struct nlm_host *host) 80void nlmclnt_done(struct nlm_host *host)
81{ 81{
82 nlm_release_host(host); 82 nlmclnt_release_host(host);
83 lockd_down(); 83 lockd_down();
84} 84}
85EXPORT_SYMBOL_GPL(nlmclnt_done); 85EXPORT_SYMBOL_GPL(nlmclnt_done);
@@ -273,7 +273,7 @@ restart:
273 spin_unlock(&nlm_blocked_lock); 273 spin_unlock(&nlm_blocked_lock);
274 274
275 /* Release host handle after use */ 275 /* Release host handle after use */
276 nlm_release_host(host); 276 nlmclnt_release_host(host);
277 lockd_down(); 277 lockd_down();
278 return 0; 278 return 0;
279} 279}
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 332c54cf75e0..adb45ec9038c 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -58,7 +58,7 @@ static void nlm_put_lockowner(struct nlm_lockowner *lockowner)
58 return; 58 return;
59 list_del(&lockowner->list); 59 list_del(&lockowner->list);
60 spin_unlock(&lockowner->host->h_lock); 60 spin_unlock(&lockowner->host->h_lock);
61 nlm_release_host(lockowner->host); 61 nlmclnt_release_host(lockowner->host);
62 kfree(lockowner); 62 kfree(lockowner);
63} 63}
64 64
@@ -207,22 +207,22 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
207 printk("nlm_alloc_call: failed, waiting for memory\n"); 207 printk("nlm_alloc_call: failed, waiting for memory\n");
208 schedule_timeout_interruptible(5*HZ); 208 schedule_timeout_interruptible(5*HZ);
209 } 209 }
210 nlm_release_host(host); 210 nlmclnt_release_host(host);
211 return NULL; 211 return NULL;
212} 212}
213 213
214void nlm_release_call(struct nlm_rqst *call) 214void nlmclnt_release_call(struct nlm_rqst *call)
215{ 215{
216 if (!atomic_dec_and_test(&call->a_count)) 216 if (!atomic_dec_and_test(&call->a_count))
217 return; 217 return;
218 nlm_release_host(call->a_host); 218 nlmclnt_release_host(call->a_host);
219 nlmclnt_release_lockargs(call); 219 nlmclnt_release_lockargs(call);
220 kfree(call); 220 kfree(call);
221} 221}
222 222
223static void nlmclnt_rpc_release(void *data) 223static void nlmclnt_rpc_release(void *data)
224{ 224{
225 nlm_release_call(data); 225 nlmclnt_release_call(data);
226} 226}
227 227
228static int nlm_wait_on_grace(wait_queue_head_t *queue) 228static int nlm_wait_on_grace(wait_queue_head_t *queue)
@@ -436,7 +436,7 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl)
436 status = nlm_stat_to_errno(req->a_res.status); 436 status = nlm_stat_to_errno(req->a_res.status);
437 } 437 }
438out: 438out:
439 nlm_release_call(req); 439 nlmclnt_release_call(req);
440 return status; 440 return status;
441} 441}
442 442
@@ -593,7 +593,7 @@ again:
593out_unblock: 593out_unblock:
594 nlmclnt_finish_block(block); 594 nlmclnt_finish_block(block);
595out: 595out:
596 nlm_release_call(req); 596 nlmclnt_release_call(req);
597 return status; 597 return status;
598out_unlock: 598out_unlock:
599 /* Fatal error: ensure that we remove the lock altogether */ 599 /* Fatal error: ensure that we remove the lock altogether */
@@ -694,7 +694,7 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
694 /* What to do now? I'm out of my depth... */ 694 /* What to do now? I'm out of my depth... */
695 status = -ENOLCK; 695 status = -ENOLCK;
696out: 696out:
697 nlm_release_call(req); 697 nlmclnt_release_call(req);
698 return status; 698 return status;
699} 699}
700 700
@@ -755,7 +755,7 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl
755 NLMPROC_CANCEL, &nlmclnt_cancel_ops); 755 NLMPROC_CANCEL, &nlmclnt_cancel_ops);
756 if (status == 0 && req->a_res.status == nlm_lck_denied) 756 if (status == 0 && req->a_res.status == nlm_lck_denied)
757 status = -ENOLCK; 757 status = -ENOLCK;
758 nlm_release_call(req); 758 nlmclnt_release_call(req);
759 return status; 759 return status;
760} 760}
761 761
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
new file mode 100644
index 000000000000..180ac34feb9a
--- /dev/null
+++ b/fs/lockd/clntxdr.c
@@ -0,0 +1,627 @@
1/*
2 * linux/fs/lockd/clntxdr.c
3 *
4 * XDR functions to encode/decode NLM version 3 RPC arguments and results.
5 * NLM version 3 is backwards compatible with NLM versions 1 and 2.
6 *
7 * NLM client-side only.
8 *
9 * Copyright (C) 2010, Oracle. All rights reserved.
10 */
11
12#include <linux/types.h>
13#include <linux/sunrpc/xdr.h>
14#include <linux/sunrpc/clnt.h>
15#include <linux/sunrpc/stats.h>
16#include <linux/lockd/lockd.h>
17
18#define NLMDBG_FACILITY NLMDBG_XDR
19
20#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
21# error "NLM host name cannot be larger than XDR_MAX_NETOBJ!"
22#endif
23
24/*
25 * Declare the space requirements for NLM arguments and replies as
26 * number of 32bit-words
27 */
28#define NLM_cookie_sz (1+(NLM_MAXCOOKIELEN>>2))
29#define NLM_caller_sz (1+(NLMCLNT_OHSIZE>>2))
30#define NLM_owner_sz (1+(NLMCLNT_OHSIZE>>2))
31#define NLM_fhandle_sz (1+(NFS2_FHSIZE>>2))
32#define NLM_lock_sz (3+NLM_caller_sz+NLM_owner_sz+NLM_fhandle_sz)
33#define NLM_holder_sz (4+NLM_owner_sz)
34
35#define NLM_testargs_sz (NLM_cookie_sz+1+NLM_lock_sz)
36#define NLM_lockargs_sz (NLM_cookie_sz+4+NLM_lock_sz)
37#define NLM_cancargs_sz (NLM_cookie_sz+2+NLM_lock_sz)
38#define NLM_unlockargs_sz (NLM_cookie_sz+NLM_lock_sz)
39
40#define NLM_testres_sz (NLM_cookie_sz+1+NLM_holder_sz)
41#define NLM_res_sz (NLM_cookie_sz+1)
42#define NLM_norep_sz (0)
43
44
45static s32 loff_t_to_s32(loff_t offset)
46{
47 s32 res;
48
49 if (offset >= NLM_OFFSET_MAX)
50 res = NLM_OFFSET_MAX;
51 else if (offset <= -NLM_OFFSET_MAX)
52 res = -NLM_OFFSET_MAX;
53 else
54 res = offset;
55 return res;
56}
57
58static void nlm_compute_offsets(const struct nlm_lock *lock,
59 u32 *l_offset, u32 *l_len)
60{
61 const struct file_lock *fl = &lock->fl;
62
63 BUG_ON(fl->fl_start > NLM_OFFSET_MAX);
64 BUG_ON(fl->fl_end > NLM_OFFSET_MAX &&
65 fl->fl_end != OFFSET_MAX);
66
67 *l_offset = loff_t_to_s32(fl->fl_start);
68 if (fl->fl_end == OFFSET_MAX)
69 *l_len = 0;
70 else
71 *l_len = loff_t_to_s32(fl->fl_end - fl->fl_start + 1);
72}
73
74/*
75 * Handle decode buffer overflows out-of-line.
76 */
77static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
78{
79 dprintk("lockd: %s prematurely hit the end of our receive buffer. "
80 "Remaining buffer length is %tu words.\n",
81 func, xdr->end - xdr->p);
82}
83
84
85/*
86 * Encode/decode NLMv3 basic data types
87 *
88 * Basic NLMv3 data types are not defined in an IETF standards
89 * document. X/Open has a description of these data types that
90 * is useful. See Chapter 10 of "Protocols for Interworking:
91 * XNFS, Version 3W".
92 *
93 * Not all basic data types have their own encoding and decoding
94 * functions. For run-time efficiency, some data types are encoded
95 * or decoded inline.
96 */
97
98static void encode_bool(struct xdr_stream *xdr, const int value)
99{
100 __be32 *p;
101
102 p = xdr_reserve_space(xdr, 4);
103 *p = value ? xdr_one : xdr_zero;
104}
105
106static void encode_int32(struct xdr_stream *xdr, const s32 value)
107{
108 __be32 *p;
109
110 p = xdr_reserve_space(xdr, 4);
111 *p = cpu_to_be32(value);
112}
113
114/*
115 * typedef opaque netobj<MAXNETOBJ_SZ>
116 */
117static void encode_netobj(struct xdr_stream *xdr,
118 const u8 *data, const unsigned int length)
119{
120 __be32 *p;
121
122 BUG_ON(length > XDR_MAX_NETOBJ);
123 p = xdr_reserve_space(xdr, 4 + length);
124 xdr_encode_opaque(p, data, length);
125}
126
127static int decode_netobj(struct xdr_stream *xdr,
128 struct xdr_netobj *obj)
129{
130 u32 length;
131 __be32 *p;
132
133 p = xdr_inline_decode(xdr, 4);
134 if (unlikely(p == NULL))
135 goto out_overflow;
136 length = be32_to_cpup(p++);
137 if (unlikely(length > XDR_MAX_NETOBJ))
138 goto out_size;
139 obj->len = length;
140 obj->data = (u8 *)p;
141 return 0;
142out_size:
143 dprintk("NFS: returned netobj was too long: %u\n", length);
144 return -EIO;
145out_overflow:
146 print_overflow_msg(__func__, xdr);
147 return -EIO;
148}
149
150/*
151 * netobj cookie;
152 */
153static void encode_cookie(struct xdr_stream *xdr,
154 const struct nlm_cookie *cookie)
155{
156 BUG_ON(cookie->len > NLM_MAXCOOKIELEN);
157 encode_netobj(xdr, (u8 *)&cookie->data, cookie->len);
158}
159
160static int decode_cookie(struct xdr_stream *xdr,
161 struct nlm_cookie *cookie)
162{
163 u32 length;
164 __be32 *p;
165
166 p = xdr_inline_decode(xdr, 4);
167 if (unlikely(p == NULL))
168 goto out_overflow;
169 length = be32_to_cpup(p++);
170 /* apparently HPUX can return empty cookies */
171 if (length == 0)
172 goto out_hpux;
173 if (length > NLM_MAXCOOKIELEN)
174 goto out_size;
175 p = xdr_inline_decode(xdr, length);
176 if (unlikely(p == NULL))
177 goto out_overflow;
178 cookie->len = length;
179 memcpy(cookie->data, p, length);
180 return 0;
181out_hpux:
182 cookie->len = 4;
183 memset(cookie->data, 0, 4);
184 return 0;
185out_size:
186 dprintk("NFS: returned cookie was too long: %u\n", length);
187 return -EIO;
188out_overflow:
189 print_overflow_msg(__func__, xdr);
190 return -EIO;
191}
192
193/*
194 * netobj fh;
195 */
196static void encode_fh(struct xdr_stream *xdr, const struct nfs_fh *fh)
197{
198 BUG_ON(fh->size != NFS2_FHSIZE);
199 encode_netobj(xdr, (u8 *)&fh->data, NFS2_FHSIZE);
200}
201
202/*
203 * enum nlm_stats {
204 * LCK_GRANTED = 0,
205 * LCK_DENIED = 1,
206 * LCK_DENIED_NOLOCKS = 2,
207 * LCK_BLOCKED = 3,
208 * LCK_DENIED_GRACE_PERIOD = 4
209 * };
210 *
211 *
212 * struct nlm_stat {
213 * nlm_stats stat;
214 * };
215 *
216 * NB: we don't swap bytes for the NLM status values. The upper
217 * layers deal directly with the status value in network byte
218 * order.
219 */
220
221static void encode_nlm_stat(struct xdr_stream *xdr,
222 const __be32 stat)
223{
224 __be32 *p;
225
226 BUG_ON(be32_to_cpu(stat) > NLM_LCK_DENIED_GRACE_PERIOD);
227 p = xdr_reserve_space(xdr, 4);
228 *p = stat;
229}
230
231static int decode_nlm_stat(struct xdr_stream *xdr,
232 __be32 *stat)
233{
234 __be32 *p;
235
236 p = xdr_inline_decode(xdr, 4);
237 if (unlikely(p == NULL))
238 goto out_overflow;
239 if (unlikely(*p > nlm_lck_denied_grace_period))
240 goto out_enum;
241 *stat = *p;
242 return 0;
243out_enum:
244 dprintk("%s: server returned invalid nlm_stats value: %u\n",
245 __func__, be32_to_cpup(p));
246 return -EIO;
247out_overflow:
248 print_overflow_msg(__func__, xdr);
249 return -EIO;
250}
251
252/*
253 * struct nlm_holder {
254 * bool exclusive;
255 * int uppid;
256 * netobj oh;
257 * unsigned l_offset;
258 * unsigned l_len;
259 * };
260 */
261static void encode_nlm_holder(struct xdr_stream *xdr,
262 const struct nlm_res *result)
263{
264 const struct nlm_lock *lock = &result->lock;
265 u32 l_offset, l_len;
266 __be32 *p;
267
268 encode_bool(xdr, lock->fl.fl_type == F_RDLCK);
269 encode_int32(xdr, lock->svid);
270 encode_netobj(xdr, lock->oh.data, lock->oh.len);
271
272 p = xdr_reserve_space(xdr, 4 + 4);
273 nlm_compute_offsets(lock, &l_offset, &l_len);
274 *p++ = cpu_to_be32(l_offset);
275 *p = cpu_to_be32(l_len);
276}
277
278static int decode_nlm_holder(struct xdr_stream *xdr, struct nlm_res *result)
279{
280 struct nlm_lock *lock = &result->lock;
281 struct file_lock *fl = &lock->fl;
282 u32 exclusive, l_offset, l_len;
283 int error;
284 __be32 *p;
285 s32 end;
286
287 memset(lock, 0, sizeof(*lock));
288 locks_init_lock(fl);
289
290 p = xdr_inline_decode(xdr, 4 + 4);
291 if (unlikely(p == NULL))
292 goto out_overflow;
293 exclusive = be32_to_cpup(p++);
294 lock->svid = be32_to_cpup(p);
295 fl->fl_pid = (pid_t)lock->svid;
296
297 error = decode_netobj(xdr, &lock->oh);
298 if (unlikely(error))
299 goto out;
300
301 p = xdr_inline_decode(xdr, 4 + 4);
302 if (unlikely(p == NULL))
303 goto out_overflow;
304
305 fl->fl_flags = FL_POSIX;
306 fl->fl_type = exclusive != 0 ? F_WRLCK : F_RDLCK;
307 l_offset = be32_to_cpup(p++);
308 l_len = be32_to_cpup(p);
309 end = l_offset + l_len - 1;
310
311 fl->fl_start = (loff_t)l_offset;
312 if (l_len == 0 || end < 0)
313 fl->fl_end = OFFSET_MAX;
314 else
315 fl->fl_end = (loff_t)end;
316 error = 0;
317out:
318 return error;
319out_overflow:
320 print_overflow_msg(__func__, xdr);
321 return -EIO;
322}
323
324/*
325 * string caller_name<LM_MAXSTRLEN>;
326 */
327static void encode_caller_name(struct xdr_stream *xdr, const char *name)
328{
329 /* NB: client-side does not set lock->len */
330 u32 length = strlen(name);
331 __be32 *p;
332
333 BUG_ON(length > NLM_MAXSTRLEN);
334 p = xdr_reserve_space(xdr, 4 + length);
335 xdr_encode_opaque(p, name, length);
336}
337
338/*
339 * struct nlm_lock {
340 * string caller_name<LM_MAXSTRLEN>;
341 * netobj fh;
342 * netobj oh;
343 * int uppid;
344 * unsigned l_offset;
345 * unsigned l_len;
346 * };
347 */
348static void encode_nlm_lock(struct xdr_stream *xdr,
349 const struct nlm_lock *lock)
350{
351 u32 l_offset, l_len;
352 __be32 *p;
353
354 encode_caller_name(xdr, lock->caller);
355 encode_fh(xdr, &lock->fh);
356 encode_netobj(xdr, lock->oh.data, lock->oh.len);
357
358 p = xdr_reserve_space(xdr, 4 + 4 + 4);
359 *p++ = cpu_to_be32(lock->svid);
360
361 nlm_compute_offsets(lock, &l_offset, &l_len);
362 *p++ = cpu_to_be32(l_offset);
363 *p = cpu_to_be32(l_len);
364}
365
366
367/*
368 * NLMv3 XDR encode functions
369 *
370 * NLMv3 argument types are defined in Chapter 10 of The Open Group's
371 * "Protocols for Interworking: XNFS, Version 3W".
372 */
373
374/*
375 * struct nlm_testargs {
376 * netobj cookie;
377 * bool exclusive;
378 * struct nlm_lock alock;
379 * };
380 */
381static void nlm_xdr_enc_testargs(struct rpc_rqst *req,
382 struct xdr_stream *xdr,
383 const struct nlm_args *args)
384{
385 const struct nlm_lock *lock = &args->lock;
386
387 encode_cookie(xdr, &args->cookie);
388 encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
389 encode_nlm_lock(xdr, lock);
390}
391
392/*
393 * struct nlm_lockargs {
394 * netobj cookie;
395 * bool block;
396 * bool exclusive;
397 * struct nlm_lock alock;
398 * bool reclaim;
399 * int state;
400 * };
401 */
402static void nlm_xdr_enc_lockargs(struct rpc_rqst *req,
403 struct xdr_stream *xdr,
404 const struct nlm_args *args)
405{
406 const struct nlm_lock *lock = &args->lock;
407
408 encode_cookie(xdr, &args->cookie);
409 encode_bool(xdr, args->block);
410 encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
411 encode_nlm_lock(xdr, lock);
412 encode_bool(xdr, args->reclaim);
413 encode_int32(xdr, args->state);
414}
415
416/*
417 * struct nlm_cancargs {
418 * netobj cookie;
419 * bool block;
420 * bool exclusive;
421 * struct nlm_lock alock;
422 * };
423 */
424static void nlm_xdr_enc_cancargs(struct rpc_rqst *req,
425 struct xdr_stream *xdr,
426 const struct nlm_args *args)
427{
428 const struct nlm_lock *lock = &args->lock;
429
430 encode_cookie(xdr, &args->cookie);
431 encode_bool(xdr, args->block);
432 encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
433 encode_nlm_lock(xdr, lock);
434}
435
436/*
437 * struct nlm_unlockargs {
438 * netobj cookie;
439 * struct nlm_lock alock;
440 * };
441 */
442static void nlm_xdr_enc_unlockargs(struct rpc_rqst *req,
443 struct xdr_stream *xdr,
444 const struct nlm_args *args)
445{
446 const struct nlm_lock *lock = &args->lock;
447
448 encode_cookie(xdr, &args->cookie);
449 encode_nlm_lock(xdr, lock);
450}
451
452/*
453 * struct nlm_res {
454 * netobj cookie;
455 * nlm_stat stat;
456 * };
457 */
458static void nlm_xdr_enc_res(struct rpc_rqst *req,
459 struct xdr_stream *xdr,
460 const struct nlm_res *result)
461{
462 encode_cookie(xdr, &result->cookie);
463 encode_nlm_stat(xdr, result->status);
464}
465
466/*
467 * union nlm_testrply switch (nlm_stats stat) {
468 * case LCK_DENIED:
469 * struct nlm_holder holder;
470 * default:
471 * void;
472 * };
473 *
474 * struct nlm_testres {
475 * netobj cookie;
476 * nlm_testrply test_stat;
477 * };
478 */
479static void encode_nlm_testrply(struct xdr_stream *xdr,
480 const struct nlm_res *result)
481{
482 if (result->status == nlm_lck_denied)
483 encode_nlm_holder(xdr, result);
484}
485
486static void nlm_xdr_enc_testres(struct rpc_rqst *req,
487 struct xdr_stream *xdr,
488 const struct nlm_res *result)
489{
490 encode_cookie(xdr, &result->cookie);
491 encode_nlm_stat(xdr, result->status);
492 encode_nlm_testrply(xdr, result);
493}
494
495
496/*
497 * NLMv3 XDR decode functions
498 *
499 * NLMv3 result types are defined in Chapter 10 of The Open Group's
500 * "Protocols for Interworking: XNFS, Version 3W".
501 */
502
503/*
504 * union nlm_testrply switch (nlm_stats stat) {
505 * case LCK_DENIED:
506 * struct nlm_holder holder;
507 * default:
508 * void;
509 * };
510 *
511 * struct nlm_testres {
512 * netobj cookie;
513 * nlm_testrply test_stat;
514 * };
515 */
516static int decode_nlm_testrply(struct xdr_stream *xdr,
517 struct nlm_res *result)
518{
519 int error;
520
521 error = decode_nlm_stat(xdr, &result->status);
522 if (unlikely(error))
523 goto out;
524 if (result->status == nlm_lck_denied)
525 error = decode_nlm_holder(xdr, result);
526out:
527 return error;
528}
529
530static int nlm_xdr_dec_testres(struct rpc_rqst *req,
531 struct xdr_stream *xdr,
532 struct nlm_res *result)
533{
534 int error;
535
536 error = decode_cookie(xdr, &result->cookie);
537 if (unlikely(error))
538 goto out;
539 error = decode_nlm_testrply(xdr, result);
540out:
541 return error;
542}
543
544/*
545 * struct nlm_res {
546 * netobj cookie;
547 * nlm_stat stat;
548 * };
549 */
550static int nlm_xdr_dec_res(struct rpc_rqst *req,
551 struct xdr_stream *xdr,
552 struct nlm_res *result)
553{
554 int error;
555
556 error = decode_cookie(xdr, &result->cookie);
557 if (unlikely(error))
558 goto out;
559 error = decode_nlm_stat(xdr, &result->status);
560out:
561 return error;
562}
563
564
565/*
566 * For NLM, a void procedure really returns nothing
567 */
568#define nlm_xdr_dec_norep NULL
569
570#define PROC(proc, argtype, restype) \
571[NLMPROC_##proc] = { \
572 .p_proc = NLMPROC_##proc, \
573 .p_encode = (kxdreproc_t)nlm_xdr_enc_##argtype, \
574 .p_decode = (kxdrdproc_t)nlm_xdr_dec_##restype, \
575 .p_arglen = NLM_##argtype##_sz, \
576 .p_replen = NLM_##restype##_sz, \
577 .p_statidx = NLMPROC_##proc, \
578 .p_name = #proc, \
579 }
580
581static struct rpc_procinfo nlm_procedures[] = {
582 PROC(TEST, testargs, testres),
583 PROC(LOCK, lockargs, res),
584 PROC(CANCEL, cancargs, res),
585 PROC(UNLOCK, unlockargs, res),
586 PROC(GRANTED, testargs, res),
587 PROC(TEST_MSG, testargs, norep),
588 PROC(LOCK_MSG, lockargs, norep),
589 PROC(CANCEL_MSG, cancargs, norep),
590 PROC(UNLOCK_MSG, unlockargs, norep),
591 PROC(GRANTED_MSG, testargs, norep),
592 PROC(TEST_RES, testres, norep),
593 PROC(LOCK_RES, res, norep),
594 PROC(CANCEL_RES, res, norep),
595 PROC(UNLOCK_RES, res, norep),
596 PROC(GRANTED_RES, res, norep),
597};
598
599static struct rpc_version nlm_version1 = {
600 .number = 1,
601 .nrprocs = ARRAY_SIZE(nlm_procedures),
602 .procs = nlm_procedures,
603};
604
605static struct rpc_version nlm_version3 = {
606 .number = 3,
607 .nrprocs = ARRAY_SIZE(nlm_procedures),
608 .procs = nlm_procedures,
609};
610
611static struct rpc_version *nlm_versions[] = {
612 [1] = &nlm_version1,
613 [3] = &nlm_version3,
614#ifdef CONFIG_LOCKD_V4
615 [4] = &nlm_version4,
616#endif
617};
618
619static struct rpc_stat nlm_rpc_stats;
620
621struct rpc_program nlm_program = {
622 .name = "lockd",
623 .number = NLM_PROGRAM,
624 .nrvers = ARRAY_SIZE(nlm_versions),
625 .version = nlm_versions,
626 .stats = &nlm_rpc_stats,
627};
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index ed0c59fe23ce..5f1bcb2f06f3 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -25,9 +25,22 @@
25#define NLM_HOST_EXPIRE (300 * HZ) 25#define NLM_HOST_EXPIRE (300 * HZ)
26#define NLM_HOST_COLLECT (120 * HZ) 26#define NLM_HOST_COLLECT (120 * HZ)
27 27
28static struct hlist_head nlm_hosts[NLM_HOST_NRHASH]; 28static struct hlist_head nlm_server_hosts[NLM_HOST_NRHASH];
29static struct hlist_head nlm_client_hosts[NLM_HOST_NRHASH];
30
31#define for_each_host(host, pos, chain, table) \
32 for ((chain) = (table); \
33 (chain) < (table) + NLM_HOST_NRHASH; ++(chain)) \
34 hlist_for_each_entry((host), (pos), (chain), h_hash)
35
36#define for_each_host_safe(host, pos, next, chain, table) \
37 for ((chain) = (table); \
38 (chain) < (table) + NLM_HOST_NRHASH; ++(chain)) \
39 hlist_for_each_entry_safe((host), (pos), (next), \
40 (chain), h_hash)
41
29static unsigned long next_gc; 42static unsigned long next_gc;
30static int nrhosts; 43static unsigned long nrhosts;
31static DEFINE_MUTEX(nlm_host_mutex); 44static DEFINE_MUTEX(nlm_host_mutex);
32 45
33static void nlm_gc_hosts(void); 46static void nlm_gc_hosts(void);
@@ -40,8 +53,6 @@ struct nlm_lookup_host_info {
40 const u32 version; /* NLM version to search for */ 53 const u32 version; /* NLM version to search for */
41 const char *hostname; /* remote's hostname */ 54 const char *hostname; /* remote's hostname */
42 const size_t hostname_len; /* it's length */ 55 const size_t hostname_len; /* it's length */
43 const struct sockaddr *src_sap; /* our address (optional) */
44 const size_t src_len; /* it's length */
45 const int noresvport; /* use non-priv port */ 56 const int noresvport; /* use non-priv port */
46}; 57};
47 58
@@ -88,127 +99,83 @@ static unsigned int nlm_hash_address(const struct sockaddr *sap)
88} 99}
89 100
90/* 101/*
91 * Common host lookup routine for server & client 102 * Allocate and initialize an nlm_host. Common to both client and server.
92 */ 103 */
93static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni) 104static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
105 struct nsm_handle *nsm)
94{ 106{
95 struct hlist_head *chain; 107 struct nlm_host *host = NULL;
96 struct hlist_node *pos; 108 unsigned long now = jiffies;
97 struct nlm_host *host;
98 struct nsm_handle *nsm = NULL;
99
100 mutex_lock(&nlm_host_mutex);
101 109
102 if (time_after_eq(jiffies, next_gc)) 110 if (nsm != NULL)
103 nlm_gc_hosts();
104
105 /* We may keep several nlm_host objects for a peer, because each
106 * nlm_host is identified by
107 * (address, protocol, version, server/client)
108 * We could probably simplify this a little by putting all those
109 * different NLM rpc_clients into one single nlm_host object.
110 * This would allow us to have one nlm_host per address.
111 */
112 chain = &nlm_hosts[nlm_hash_address(ni->sap)];
113 hlist_for_each_entry(host, pos, chain, h_hash) {
114 if (!rpc_cmp_addr(nlm_addr(host), ni->sap))
115 continue;
116
117 /* See if we have an NSM handle for this client */
118 if (!nsm)
119 nsm = host->h_nsmhandle;
120
121 if (host->h_proto != ni->protocol)
122 continue;
123 if (host->h_version != ni->version)
124 continue;
125 if (host->h_server != ni->server)
126 continue;
127 if (ni->server && ni->src_len != 0 &&
128 !rpc_cmp_addr(nlm_srcaddr(host), ni->src_sap))
129 continue;
130
131 /* Move to head of hash chain. */
132 hlist_del(&host->h_hash);
133 hlist_add_head(&host->h_hash, chain);
134
135 nlm_get_host(host);
136 dprintk("lockd: nlm_lookup_host found host %s (%s)\n",
137 host->h_name, host->h_addrbuf);
138 goto out;
139 }
140
141 /*
142 * The host wasn't in our hash table. If we don't
143 * have an NSM handle for it yet, create one.
144 */
145 if (nsm)
146 atomic_inc(&nsm->sm_count); 111 atomic_inc(&nsm->sm_count);
147 else { 112 else {
148 host = NULL; 113 host = NULL;
149 nsm = nsm_get_handle(ni->sap, ni->salen, 114 nsm = nsm_get_handle(ni->sap, ni->salen,
150 ni->hostname, ni->hostname_len); 115 ni->hostname, ni->hostname_len);
151 if (!nsm) { 116 if (unlikely(nsm == NULL)) {
152 dprintk("lockd: nlm_lookup_host failed; " 117 dprintk("lockd: %s failed; no nsm handle\n",
153 "no nsm handle\n"); 118 __func__);
154 goto out; 119 goto out;
155 } 120 }
156 } 121 }
157 122
158 host = kzalloc(sizeof(*host), GFP_KERNEL); 123 host = kmalloc(sizeof(*host), GFP_KERNEL);
159 if (!host) { 124 if (unlikely(host == NULL)) {
125 dprintk("lockd: %s failed; no memory\n", __func__);
160 nsm_release(nsm); 126 nsm_release(nsm);
161 dprintk("lockd: nlm_lookup_host failed; no memory\n");
162 goto out; 127 goto out;
163 } 128 }
164 host->h_name = nsm->sm_name; 129
165 host->h_addrbuf = nsm->sm_addrbuf;
166 memcpy(nlm_addr(host), ni->sap, ni->salen); 130 memcpy(nlm_addr(host), ni->sap, ni->salen);
167 host->h_addrlen = ni->salen; 131 host->h_addrlen = ni->salen;
168 rpc_set_port(nlm_addr(host), 0); 132 rpc_set_port(nlm_addr(host), 0);
169 memcpy(nlm_srcaddr(host), ni->src_sap, ni->src_len); 133 host->h_srcaddrlen = 0;
170 host->h_srcaddrlen = ni->src_len; 134
135 host->h_rpcclnt = NULL;
136 host->h_name = nsm->sm_name;
171 host->h_version = ni->version; 137 host->h_version = ni->version;
172 host->h_proto = ni->protocol; 138 host->h_proto = ni->protocol;
173 host->h_rpcclnt = NULL; 139 host->h_reclaiming = 0;
174 mutex_init(&host->h_mutex); 140 host->h_server = ni->server;
175 host->h_nextrebind = jiffies + NLM_HOST_REBIND; 141 host->h_noresvport = ni->noresvport;
176 host->h_expires = jiffies + NLM_HOST_EXPIRE; 142 host->h_inuse = 0;
177 atomic_set(&host->h_count, 1);
178 init_waitqueue_head(&host->h_gracewait); 143 init_waitqueue_head(&host->h_gracewait);
179 init_rwsem(&host->h_rwsem); 144 init_rwsem(&host->h_rwsem);
180 host->h_state = 0; /* pseudo NSM state */ 145 host->h_state = 0;
181 host->h_nsmstate = 0; /* real NSM state */ 146 host->h_nsmstate = 0;
182 host->h_nsmhandle = nsm; 147 host->h_pidcount = 0;
183 host->h_server = ni->server; 148 atomic_set(&host->h_count, 1);
184 host->h_noresvport = ni->noresvport; 149 mutex_init(&host->h_mutex);
185 hlist_add_head(&host->h_hash, chain); 150 host->h_nextrebind = now + NLM_HOST_REBIND;
151 host->h_expires = now + NLM_HOST_EXPIRE;
186 INIT_LIST_HEAD(&host->h_lockowners); 152 INIT_LIST_HEAD(&host->h_lockowners);
187 spin_lock_init(&host->h_lock); 153 spin_lock_init(&host->h_lock);
188 INIT_LIST_HEAD(&host->h_granted); 154 INIT_LIST_HEAD(&host->h_granted);
189 INIT_LIST_HEAD(&host->h_reclaim); 155 INIT_LIST_HEAD(&host->h_reclaim);
190 156 host->h_nsmhandle = nsm;
191 nrhosts++; 157 host->h_addrbuf = nsm->sm_addrbuf;
192
193 dprintk("lockd: nlm_lookup_host created host %s\n",
194 host->h_name);
195 158
196out: 159out:
197 mutex_unlock(&nlm_host_mutex);
198 return host; 160 return host;
199} 161}
200 162
201/* 163/*
202 * Destroy a host 164 * Destroy an nlm_host and free associated resources
165 *
166 * Caller must hold nlm_host_mutex.
203 */ 167 */
204static void 168static void nlm_destroy_host_locked(struct nlm_host *host)
205nlm_destroy_host(struct nlm_host *host)
206{ 169{
207 struct rpc_clnt *clnt; 170 struct rpc_clnt *clnt;
208 171
172 dprintk("lockd: destroy host %s\n", host->h_name);
173
209 BUG_ON(!list_empty(&host->h_lockowners)); 174 BUG_ON(!list_empty(&host->h_lockowners));
210 BUG_ON(atomic_read(&host->h_count)); 175 BUG_ON(atomic_read(&host->h_count));
211 176
177 hlist_del_init(&host->h_hash);
178
212 nsm_unmonitor(host); 179 nsm_unmonitor(host);
213 nsm_release(host->h_nsmhandle); 180 nsm_release(host->h_nsmhandle);
214 181
@@ -216,6 +183,8 @@ nlm_destroy_host(struct nlm_host *host)
216 if (clnt != NULL) 183 if (clnt != NULL)
217 rpc_shutdown_client(clnt); 184 rpc_shutdown_client(clnt);
218 kfree(host); 185 kfree(host);
186
187 nrhosts--;
219} 188}
220 189
221/** 190/**
@@ -249,12 +218,76 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
249 .hostname_len = strlen(hostname), 218 .hostname_len = strlen(hostname),
250 .noresvport = noresvport, 219 .noresvport = noresvport,
251 }; 220 };
221 struct hlist_head *chain;
222 struct hlist_node *pos;
223 struct nlm_host *host;
224 struct nsm_handle *nsm = NULL;
252 225
253 dprintk("lockd: %s(host='%s', vers=%u, proto=%s)\n", __func__, 226 dprintk("lockd: %s(host='%s', vers=%u, proto=%s)\n", __func__,
254 (hostname ? hostname : "<none>"), version, 227 (hostname ? hostname : "<none>"), version,
255 (protocol == IPPROTO_UDP ? "udp" : "tcp")); 228 (protocol == IPPROTO_UDP ? "udp" : "tcp"));
256 229
257 return nlm_lookup_host(&ni); 230 mutex_lock(&nlm_host_mutex);
231
232 chain = &nlm_client_hosts[nlm_hash_address(sap)];
233 hlist_for_each_entry(host, pos, chain, h_hash) {
234 if (!rpc_cmp_addr(nlm_addr(host), sap))
235 continue;
236
237 /* Same address. Share an NSM handle if we already have one */
238 if (nsm == NULL)
239 nsm = host->h_nsmhandle;
240
241 if (host->h_proto != protocol)
242 continue;
243 if (host->h_version != version)
244 continue;
245
246 nlm_get_host(host);
247 dprintk("lockd: %s found host %s (%s)\n", __func__,
248 host->h_name, host->h_addrbuf);
249 goto out;
250 }
251
252 host = nlm_alloc_host(&ni, nsm);
253 if (unlikely(host == NULL))
254 goto out;
255
256 hlist_add_head(&host->h_hash, chain);
257 nrhosts++;
258
259 dprintk("lockd: %s created host %s (%s)\n", __func__,
260 host->h_name, host->h_addrbuf);
261
262out:
263 mutex_unlock(&nlm_host_mutex);
264 return host;
265}
266
267/**
268 * nlmclnt_release_host - release client nlm_host
269 * @host: nlm_host to release
270 *
271 */
272void nlmclnt_release_host(struct nlm_host *host)
273{
274 if (host == NULL)
275 return;
276
277 dprintk("lockd: release client host %s\n", host->h_name);
278
279 BUG_ON(atomic_read(&host->h_count) < 0);
280 BUG_ON(host->h_server);
281
282 if (atomic_dec_and_test(&host->h_count)) {
283 BUG_ON(!list_empty(&host->h_lockowners));
284 BUG_ON(!list_empty(&host->h_granted));
285 BUG_ON(!list_empty(&host->h_reclaim));
286
287 mutex_lock(&nlm_host_mutex);
288 nlm_destroy_host_locked(host);
289 mutex_unlock(&nlm_host_mutex);
290 }
258} 291}
259 292
260/** 293/**
@@ -279,12 +312,18 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
279 const char *hostname, 312 const char *hostname,
280 const size_t hostname_len) 313 const size_t hostname_len)
281{ 314{
315 struct hlist_head *chain;
316 struct hlist_node *pos;
317 struct nlm_host *host = NULL;
318 struct nsm_handle *nsm = NULL;
282 struct sockaddr_in sin = { 319 struct sockaddr_in sin = {
283 .sin_family = AF_INET, 320 .sin_family = AF_INET,
284 }; 321 };
285 struct sockaddr_in6 sin6 = { 322 struct sockaddr_in6 sin6 = {
286 .sin6_family = AF_INET6, 323 .sin6_family = AF_INET6,
287 }; 324 };
325 struct sockaddr *src_sap;
326 size_t src_len = rqstp->rq_addrlen;
288 struct nlm_lookup_host_info ni = { 327 struct nlm_lookup_host_info ni = {
289 .server = 1, 328 .server = 1,
290 .sap = svc_addr(rqstp), 329 .sap = svc_addr(rqstp),
@@ -293,27 +332,91 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
293 .version = rqstp->rq_vers, 332 .version = rqstp->rq_vers,
294 .hostname = hostname, 333 .hostname = hostname,
295 .hostname_len = hostname_len, 334 .hostname_len = hostname_len,
296 .src_len = rqstp->rq_addrlen,
297 }; 335 };
298 336
299 dprintk("lockd: %s(host='%*s', vers=%u, proto=%s)\n", __func__, 337 dprintk("lockd: %s(host='%*s', vers=%u, proto=%s)\n", __func__,
300 (int)hostname_len, hostname, rqstp->rq_vers, 338 (int)hostname_len, hostname, rqstp->rq_vers,
301 (rqstp->rq_prot == IPPROTO_UDP ? "udp" : "tcp")); 339 (rqstp->rq_prot == IPPROTO_UDP ? "udp" : "tcp"));
302 340
341 mutex_lock(&nlm_host_mutex);
342
303 switch (ni.sap->sa_family) { 343 switch (ni.sap->sa_family) {
304 case AF_INET: 344 case AF_INET:
305 sin.sin_addr.s_addr = rqstp->rq_daddr.addr.s_addr; 345 sin.sin_addr.s_addr = rqstp->rq_daddr.addr.s_addr;
306 ni.src_sap = (struct sockaddr *)&sin; 346 src_sap = (struct sockaddr *)&sin;
307 break; 347 break;
308 case AF_INET6: 348 case AF_INET6:
309 ipv6_addr_copy(&sin6.sin6_addr, &rqstp->rq_daddr.addr6); 349 ipv6_addr_copy(&sin6.sin6_addr, &rqstp->rq_daddr.addr6);
310 ni.src_sap = (struct sockaddr *)&sin6; 350 src_sap = (struct sockaddr *)&sin6;
311 break; 351 break;
312 default: 352 default:
313 return NULL; 353 dprintk("lockd: %s failed; unrecognized address family\n",
354 __func__);
355 goto out;
356 }
357
358 if (time_after_eq(jiffies, next_gc))
359 nlm_gc_hosts();
360
361 chain = &nlm_server_hosts[nlm_hash_address(ni.sap)];
362 hlist_for_each_entry(host, pos, chain, h_hash) {
363 if (!rpc_cmp_addr(nlm_addr(host), ni.sap))
364 continue;
365
366 /* Same address. Share an NSM handle if we already have one */
367 if (nsm == NULL)
368 nsm = host->h_nsmhandle;
369
370 if (host->h_proto != ni.protocol)
371 continue;
372 if (host->h_version != ni.version)
373 continue;
374 if (!rpc_cmp_addr(nlm_srcaddr(host), src_sap))
375 continue;
376
377 /* Move to head of hash chain. */
378 hlist_del(&host->h_hash);
379 hlist_add_head(&host->h_hash, chain);
380
381 nlm_get_host(host);
382 dprintk("lockd: %s found host %s (%s)\n",
383 __func__, host->h_name, host->h_addrbuf);
384 goto out;
314 } 385 }
315 386
316 return nlm_lookup_host(&ni); 387 host = nlm_alloc_host(&ni, nsm);
388 if (unlikely(host == NULL))
389 goto out;
390
391 memcpy(nlm_srcaddr(host), src_sap, src_len);
392 host->h_srcaddrlen = src_len;
393 hlist_add_head(&host->h_hash, chain);
394 nrhosts++;
395
396 dprintk("lockd: %s created host %s (%s)\n",
397 __func__, host->h_name, host->h_addrbuf);
398
399out:
400 mutex_unlock(&nlm_host_mutex);
401 return host;
402}
403
404/**
405 * nlmsvc_release_host - release server nlm_host
406 * @host: nlm_host to release
407 *
408 * Host is destroyed later in nlm_gc_host().
409 */
410void nlmsvc_release_host(struct nlm_host *host)
411{
412 if (host == NULL)
413 return;
414
415 dprintk("lockd: release server host %s\n", host->h_name);
416
417 BUG_ON(atomic_read(&host->h_count) < 0);
418 BUG_ON(!host->h_server);
419 atomic_dec(&host->h_count);
317} 420}
318 421
319/* 422/*
@@ -413,20 +516,28 @@ struct nlm_host * nlm_get_host(struct nlm_host *host)
413 return host; 516 return host;
414} 517}
415 518
416/* 519static struct nlm_host *next_host_state(struct hlist_head *cache,
417 * Release NLM host after use 520 struct nsm_handle *nsm,
418 */ 521 const struct nlm_reboot *info)
419void nlm_release_host(struct nlm_host *host)
420{ 522{
421 if (host != NULL) { 523 struct nlm_host *host = NULL;
422 dprintk("lockd: release host %s\n", host->h_name); 524 struct hlist_head *chain;
423 BUG_ON(atomic_read(&host->h_count) < 0); 525 struct hlist_node *pos;
424 if (atomic_dec_and_test(&host->h_count)) { 526
425 BUG_ON(!list_empty(&host->h_lockowners)); 527 mutex_lock(&nlm_host_mutex);
426 BUG_ON(!list_empty(&host->h_granted)); 528 for_each_host(host, pos, chain, cache) {
427 BUG_ON(!list_empty(&host->h_reclaim)); 529 if (host->h_nsmhandle == nsm
530 && host->h_nsmstate != info->state) {
531 host->h_nsmstate = info->state;
532 host->h_state++;
533
534 nlm_get_host(host);
535 goto out;
428 } 536 }
429 } 537 }
538out:
539 mutex_unlock(&nlm_host_mutex);
540 return host;
430} 541}
431 542
432/** 543/**
@@ -438,8 +549,6 @@ void nlm_release_host(struct nlm_host *host)
438 */ 549 */
439void nlm_host_rebooted(const struct nlm_reboot *info) 550void nlm_host_rebooted(const struct nlm_reboot *info)
440{ 551{
441 struct hlist_head *chain;
442 struct hlist_node *pos;
443 struct nsm_handle *nsm; 552 struct nsm_handle *nsm;
444 struct nlm_host *host; 553 struct nlm_host *host;
445 554
@@ -452,32 +561,15 @@ void nlm_host_rebooted(const struct nlm_reboot *info)
452 * lock for this. 561 * lock for this.
453 * To avoid processing a host several times, we match the nsmstate. 562 * To avoid processing a host several times, we match the nsmstate.
454 */ 563 */
455again: mutex_lock(&nlm_host_mutex); 564 while ((host = next_host_state(nlm_server_hosts, nsm, info)) != NULL) {
456 for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) { 565 nlmsvc_free_host_resources(host);
457 hlist_for_each_entry(host, pos, chain, h_hash) { 566 nlmsvc_release_host(host);
458 if (host->h_nsmhandle == nsm
459 && host->h_nsmstate != info->state) {
460 host->h_nsmstate = info->state;
461 host->h_state++;
462
463 nlm_get_host(host);
464 mutex_unlock(&nlm_host_mutex);
465
466 if (host->h_server) {
467 /* We're server for this guy, just ditch
468 * all the locks he held. */
469 nlmsvc_free_host_resources(host);
470 } else {
471 /* He's the server, initiate lock recovery. */
472 nlmclnt_recovery(host);
473 }
474
475 nlm_release_host(host);
476 goto again;
477 }
478 }
479 } 567 }
480 mutex_unlock(&nlm_host_mutex); 568 while ((host = next_host_state(nlm_client_hosts, nsm, info)) != NULL) {
569 nlmclnt_recovery(host);
570 nlmclnt_release_host(host);
571 }
572
481 nsm_release(nsm); 573 nsm_release(nsm);
482} 574}
483 575
@@ -497,13 +589,11 @@ nlm_shutdown_hosts(void)
497 589
498 /* First, make all hosts eligible for gc */ 590 /* First, make all hosts eligible for gc */
499 dprintk("lockd: nuking all hosts...\n"); 591 dprintk("lockd: nuking all hosts...\n");
500 for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) { 592 for_each_host(host, pos, chain, nlm_server_hosts) {
501 hlist_for_each_entry(host, pos, chain, h_hash) { 593 host->h_expires = jiffies - 1;
502 host->h_expires = jiffies - 1; 594 if (host->h_rpcclnt) {
503 if (host->h_rpcclnt) { 595 rpc_shutdown_client(host->h_rpcclnt);
504 rpc_shutdown_client(host->h_rpcclnt); 596 host->h_rpcclnt = NULL;
505 host->h_rpcclnt = NULL;
506 }
507 } 597 }
508 } 598 }
509 599
@@ -512,15 +602,13 @@ nlm_shutdown_hosts(void)
512 mutex_unlock(&nlm_host_mutex); 602 mutex_unlock(&nlm_host_mutex);
513 603
514 /* complain if any hosts are left */ 604 /* complain if any hosts are left */
515 if (nrhosts) { 605 if (nrhosts != 0) {
516 printk(KERN_WARNING "lockd: couldn't shutdown host module!\n"); 606 printk(KERN_WARNING "lockd: couldn't shutdown host module!\n");
517 dprintk("lockd: %d hosts left:\n", nrhosts); 607 dprintk("lockd: %lu hosts left:\n", nrhosts);
518 for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) { 608 for_each_host(host, pos, chain, nlm_server_hosts) {
519 hlist_for_each_entry(host, pos, chain, h_hash) { 609 dprintk(" %s (cnt %d use %d exp %ld)\n",
520 dprintk(" %s (cnt %d use %d exp %ld)\n", 610 host->h_name, atomic_read(&host->h_count),
521 host->h_name, atomic_read(&host->h_count), 611 host->h_inuse, host->h_expires);
522 host->h_inuse, host->h_expires);
523 }
524 } 612 }
525 } 613 }
526} 614}
@@ -538,29 +626,22 @@ nlm_gc_hosts(void)
538 struct nlm_host *host; 626 struct nlm_host *host;
539 627
540 dprintk("lockd: host garbage collection\n"); 628 dprintk("lockd: host garbage collection\n");
541 for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) { 629 for_each_host(host, pos, chain, nlm_server_hosts)
542 hlist_for_each_entry(host, pos, chain, h_hash) 630 host->h_inuse = 0;
543 host->h_inuse = 0;
544 }
545 631
546 /* Mark all hosts that hold locks, blocks or shares */ 632 /* Mark all hosts that hold locks, blocks or shares */
547 nlmsvc_mark_resources(); 633 nlmsvc_mark_resources();
548 634
549 for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) { 635 for_each_host_safe(host, pos, next, chain, nlm_server_hosts) {
550 hlist_for_each_entry_safe(host, pos, next, chain, h_hash) { 636 if (atomic_read(&host->h_count) || host->h_inuse
551 if (atomic_read(&host->h_count) || host->h_inuse 637 || time_before(jiffies, host->h_expires)) {
552 || time_before(jiffies, host->h_expires)) { 638 dprintk("nlm_gc_hosts skipping %s "
553 dprintk("nlm_gc_hosts skipping %s (cnt %d use %d exp %ld)\n", 639 "(cnt %d use %d exp %ld)\n",
554 host->h_name, atomic_read(&host->h_count), 640 host->h_name, atomic_read(&host->h_count),
555 host->h_inuse, host->h_expires); 641 host->h_inuse, host->h_expires);
556 continue; 642 continue;
557 }
558 dprintk("lockd: delete host %s\n", host->h_name);
559 hlist_del_init(&host->h_hash);
560
561 nlm_destroy_host(host);
562 nrhosts--;
563 } 643 }
644 nlm_destroy_host_locked(host);
564 } 645 }
565 646
566 next_gc = jiffies + NLM_HOST_COLLECT; 647 next_gc = jiffies + NLM_HOST_COLLECT;
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index e0c918949644..23d7451b2938 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -401,26 +401,22 @@ void nsm_release(struct nsm_handle *nsm)
401 * Status Monitor wire protocol. 401 * Status Monitor wire protocol.
402 */ 402 */
403 403
404static int encode_nsm_string(struct xdr_stream *xdr, const char *string) 404static void encode_nsm_string(struct xdr_stream *xdr, const char *string)
405{ 405{
406 const u32 len = strlen(string); 406 const u32 len = strlen(string);
407 __be32 *p; 407 __be32 *p;
408 408
409 if (unlikely(len > SM_MAXSTRLEN)) 409 BUG_ON(len > SM_MAXSTRLEN);
410 return -EIO; 410 p = xdr_reserve_space(xdr, 4 + len);
411 p = xdr_reserve_space(xdr, sizeof(u32) + len);
412 if (unlikely(p == NULL))
413 return -EIO;
414 xdr_encode_opaque(p, string, len); 411 xdr_encode_opaque(p, string, len);
415 return 0;
416} 412}
417 413
418/* 414/*
419 * "mon_name" specifies the host to be monitored. 415 * "mon_name" specifies the host to be monitored.
420 */ 416 */
421static int encode_mon_name(struct xdr_stream *xdr, const struct nsm_args *argp) 417static void encode_mon_name(struct xdr_stream *xdr, const struct nsm_args *argp)
422{ 418{
423 return encode_nsm_string(xdr, argp->mon_name); 419 encode_nsm_string(xdr, argp->mon_name);
424} 420}
425 421
426/* 422/*
@@ -429,35 +425,25 @@ static int encode_mon_name(struct xdr_stream *xdr, const struct nsm_args *argp)
429 * (via the NLMPROC_SM_NOTIFY call) that the state of host "mon_name" 425 * (via the NLMPROC_SM_NOTIFY call) that the state of host "mon_name"
430 * has changed. 426 * has changed.
431 */ 427 */
432static int encode_my_id(struct xdr_stream *xdr, const struct nsm_args *argp) 428static void encode_my_id(struct xdr_stream *xdr, const struct nsm_args *argp)
433{ 429{
434 int status;
435 __be32 *p; 430 __be32 *p;
436 431
437 status = encode_nsm_string(xdr, utsname()->nodename); 432 encode_nsm_string(xdr, utsname()->nodename);
438 if (unlikely(status != 0)) 433 p = xdr_reserve_space(xdr, 4 + 4 + 4);
439 return status; 434 *p++ = cpu_to_be32(argp->prog);
440 p = xdr_reserve_space(xdr, 3 * sizeof(u32)); 435 *p++ = cpu_to_be32(argp->vers);
441 if (unlikely(p == NULL)) 436 *p = cpu_to_be32(argp->proc);
442 return -EIO;
443 *p++ = htonl(argp->prog);
444 *p++ = htonl(argp->vers);
445 *p++ = htonl(argp->proc);
446 return 0;
447} 437}
448 438
449/* 439/*
450 * The "mon_id" argument specifies the non-private arguments 440 * The "mon_id" argument specifies the non-private arguments
451 * of an NSMPROC_MON or NSMPROC_UNMON call. 441 * of an NSMPROC_MON or NSMPROC_UNMON call.
452 */ 442 */
453static int encode_mon_id(struct xdr_stream *xdr, const struct nsm_args *argp) 443static void encode_mon_id(struct xdr_stream *xdr, const struct nsm_args *argp)
454{ 444{
455 int status; 445 encode_mon_name(xdr, argp);
456 446 encode_my_id(xdr, argp);
457 status = encode_mon_name(xdr, argp);
458 if (unlikely(status != 0))
459 return status;
460 return encode_my_id(xdr, argp);
461} 447}
462 448
463/* 449/*
@@ -465,68 +451,56 @@ static int encode_mon_id(struct xdr_stream *xdr, const struct nsm_args *argp)
465 * by the NSMPROC_MON call. This information will be supplied in the 451 * by the NSMPROC_MON call. This information will be supplied in the
466 * NLMPROC_SM_NOTIFY call. 452 * NLMPROC_SM_NOTIFY call.
467 */ 453 */
468static int encode_priv(struct xdr_stream *xdr, const struct nsm_args *argp) 454static void encode_priv(struct xdr_stream *xdr, const struct nsm_args *argp)
469{ 455{
470 __be32 *p; 456 __be32 *p;
471 457
472 p = xdr_reserve_space(xdr, SM_PRIV_SIZE); 458 p = xdr_reserve_space(xdr, SM_PRIV_SIZE);
473 if (unlikely(p == NULL))
474 return -EIO;
475 xdr_encode_opaque_fixed(p, argp->priv->data, SM_PRIV_SIZE); 459 xdr_encode_opaque_fixed(p, argp->priv->data, SM_PRIV_SIZE);
476 return 0;
477} 460}
478 461
479static int xdr_enc_mon(struct rpc_rqst *req, __be32 *p, 462static void nsm_xdr_enc_mon(struct rpc_rqst *req, struct xdr_stream *xdr,
480 const struct nsm_args *argp) 463 const struct nsm_args *argp)
481{ 464{
482 struct xdr_stream xdr; 465 encode_mon_id(xdr, argp);
483 int status; 466 encode_priv(xdr, argp);
484
485 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
486 status = encode_mon_id(&xdr, argp);
487 if (unlikely(status))
488 return status;
489 return encode_priv(&xdr, argp);
490} 467}
491 468
492static int xdr_enc_unmon(struct rpc_rqst *req, __be32 *p, 469static void nsm_xdr_enc_unmon(struct rpc_rqst *req, struct xdr_stream *xdr,
493 const struct nsm_args *argp) 470 const struct nsm_args *argp)
494{ 471{
495 struct xdr_stream xdr; 472 encode_mon_id(xdr, argp);
496
497 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
498 return encode_mon_id(&xdr, argp);
499} 473}
500 474
501static int xdr_dec_stat_res(struct rpc_rqst *rqstp, __be32 *p, 475static int nsm_xdr_dec_stat_res(struct rpc_rqst *rqstp,
502 struct nsm_res *resp) 476 struct xdr_stream *xdr,
477 struct nsm_res *resp)
503{ 478{
504 struct xdr_stream xdr; 479 __be32 *p;
505 480
506 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 481 p = xdr_inline_decode(xdr, 4 + 4);
507 p = xdr_inline_decode(&xdr, 2 * sizeof(u32));
508 if (unlikely(p == NULL)) 482 if (unlikely(p == NULL))
509 return -EIO; 483 return -EIO;
510 resp->status = ntohl(*p++); 484 resp->status = be32_to_cpup(p++);
511 resp->state = ntohl(*p); 485 resp->state = be32_to_cpup(p);
512 486
513 dprintk("lockd: xdr_dec_stat_res status %d state %d\n", 487 dprintk("lockd: %s status %d state %d\n",
514 resp->status, resp->state); 488 __func__, resp->status, resp->state);
515 return 0; 489 return 0;
516} 490}
517 491
518static int xdr_dec_stat(struct rpc_rqst *rqstp, __be32 *p, 492static int nsm_xdr_dec_stat(struct rpc_rqst *rqstp,
519 struct nsm_res *resp) 493 struct xdr_stream *xdr,
494 struct nsm_res *resp)
520{ 495{
521 struct xdr_stream xdr; 496 __be32 *p;
522 497
523 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 498 p = xdr_inline_decode(xdr, 4);
524 p = xdr_inline_decode(&xdr, sizeof(u32));
525 if (unlikely(p == NULL)) 499 if (unlikely(p == NULL))
526 return -EIO; 500 return -EIO;
527 resp->state = ntohl(*p); 501 resp->state = be32_to_cpup(p);
528 502
529 dprintk("lockd: xdr_dec_stat state %d\n", resp->state); 503 dprintk("lockd: %s state %d\n", __func__, resp->state);
530 return 0; 504 return 0;
531} 505}
532 506
@@ -542,8 +516,8 @@ static int xdr_dec_stat(struct rpc_rqst *rqstp, __be32 *p,
542static struct rpc_procinfo nsm_procedures[] = { 516static struct rpc_procinfo nsm_procedures[] = {
543[NSMPROC_MON] = { 517[NSMPROC_MON] = {
544 .p_proc = NSMPROC_MON, 518 .p_proc = NSMPROC_MON,
545 .p_encode = (kxdrproc_t)xdr_enc_mon, 519 .p_encode = (kxdreproc_t)nsm_xdr_enc_mon,
546 .p_decode = (kxdrproc_t)xdr_dec_stat_res, 520 .p_decode = (kxdrdproc_t)nsm_xdr_dec_stat_res,
547 .p_arglen = SM_mon_sz, 521 .p_arglen = SM_mon_sz,
548 .p_replen = SM_monres_sz, 522 .p_replen = SM_monres_sz,
549 .p_statidx = NSMPROC_MON, 523 .p_statidx = NSMPROC_MON,
@@ -551,8 +525,8 @@ static struct rpc_procinfo nsm_procedures[] = {
551 }, 525 },
552[NSMPROC_UNMON] = { 526[NSMPROC_UNMON] = {
553 .p_proc = NSMPROC_UNMON, 527 .p_proc = NSMPROC_UNMON,
554 .p_encode = (kxdrproc_t)xdr_enc_unmon, 528 .p_encode = (kxdreproc_t)nsm_xdr_enc_unmon,
555 .p_decode = (kxdrproc_t)xdr_dec_stat, 529 .p_decode = (kxdrdproc_t)nsm_xdr_dec_stat,
556 .p_arglen = SM_mon_id_sz, 530 .p_arglen = SM_mon_id_sz,
557 .p_replen = SM_unmonres_sz, 531 .p_replen = SM_unmonres_sz,
558 .p_statidx = NSMPROC_UNMON, 532 .p_statidx = NSMPROC_UNMON,
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 38d261192453..9a41fdc19511 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -51,7 +51,7 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
51 return 0; 51 return 0;
52 52
53no_locks: 53no_locks:
54 nlm_release_host(host); 54 nlmsvc_release_host(host);
55 if (error) 55 if (error)
56 return error; 56 return error;
57 return nlm_lck_denied_nolocks; 57 return nlm_lck_denied_nolocks;
@@ -92,7 +92,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
92 else 92 else
93 dprintk("lockd: TEST4 status %d\n", ntohl(resp->status)); 93 dprintk("lockd: TEST4 status %d\n", ntohl(resp->status));
94 94
95 nlm_release_host(host); 95 nlmsvc_release_host(host);
96 nlm_release_file(file); 96 nlm_release_file(file);
97 return rc; 97 return rc;
98} 98}
@@ -134,7 +134,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
134 else 134 else
135 dprintk("lockd: LOCK status %d\n", ntohl(resp->status)); 135 dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
136 136
137 nlm_release_host(host); 137 nlmsvc_release_host(host);
138 nlm_release_file(file); 138 nlm_release_file(file);
139 return rc; 139 return rc;
140} 140}
@@ -164,7 +164,7 @@ nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
164 resp->status = nlmsvc_cancel_blocked(file, &argp->lock); 164 resp->status = nlmsvc_cancel_blocked(file, &argp->lock);
165 165
166 dprintk("lockd: CANCEL status %d\n", ntohl(resp->status)); 166 dprintk("lockd: CANCEL status %d\n", ntohl(resp->status));
167 nlm_release_host(host); 167 nlmsvc_release_host(host);
168 nlm_release_file(file); 168 nlm_release_file(file);
169 return rpc_success; 169 return rpc_success;
170} 170}
@@ -197,7 +197,7 @@ nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
197 resp->status = nlmsvc_unlock(file, &argp->lock); 197 resp->status = nlmsvc_unlock(file, &argp->lock);
198 198
199 dprintk("lockd: UNLOCK status %d\n", ntohl(resp->status)); 199 dprintk("lockd: UNLOCK status %d\n", ntohl(resp->status));
200 nlm_release_host(host); 200 nlmsvc_release_host(host);
201 nlm_release_file(file); 201 nlm_release_file(file);
202 return rpc_success; 202 return rpc_success;
203} 203}
@@ -229,7 +229,7 @@ static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
229 229
230static void nlm4svc_callback_release(void *data) 230static void nlm4svc_callback_release(void *data)
231{ 231{
232 nlm_release_call(data); 232 nlmsvc_release_call(data);
233} 233}
234 234
235static const struct rpc_call_ops nlm4svc_callback_ops = { 235static const struct rpc_call_ops nlm4svc_callback_ops = {
@@ -261,7 +261,7 @@ static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
261 261
262 stat = func(rqstp, argp, &call->a_res); 262 stat = func(rqstp, argp, &call->a_res);
263 if (stat != 0) { 263 if (stat != 0) {
264 nlm_release_call(call); 264 nlmsvc_release_call(call);
265 return stat; 265 return stat;
266 } 266 }
267 267
@@ -334,7 +334,7 @@ nlm4svc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
334 resp->status = nlmsvc_share_file(host, file, argp); 334 resp->status = nlmsvc_share_file(host, file, argp);
335 335
336 dprintk("lockd: SHARE status %d\n", ntohl(resp->status)); 336 dprintk("lockd: SHARE status %d\n", ntohl(resp->status));
337 nlm_release_host(host); 337 nlmsvc_release_host(host);
338 nlm_release_file(file); 338 nlm_release_file(file);
339 return rpc_success; 339 return rpc_success;
340} 340}
@@ -367,7 +367,7 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
367 resp->status = nlmsvc_unshare_file(host, file, argp); 367 resp->status = nlmsvc_unshare_file(host, file, argp);
368 368
369 dprintk("lockd: UNSHARE status %d\n", ntohl(resp->status)); 369 dprintk("lockd: UNSHARE status %d\n", ntohl(resp->status));
370 nlm_release_host(host); 370 nlmsvc_release_host(host);
371 nlm_release_file(file); 371 nlm_release_file(file);
372 return rpc_success; 372 return rpc_success;
373} 373}
@@ -399,7 +399,7 @@ nlm4svc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
399 return rpc_success; 399 return rpc_success;
400 400
401 nlmsvc_free_host_resources(host); 401 nlmsvc_free_host_resources(host);
402 nlm_release_host(host); 402 nlmsvc_release_host(host);
403 return rpc_success; 403 return rpc_success;
404} 404}
405 405
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index ef5659b211e9..6e31695d046f 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -46,6 +46,7 @@ static void nlmsvc_remove_block(struct nlm_block *block);
46static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock); 46static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock);
47static void nlmsvc_freegrantargs(struct nlm_rqst *call); 47static void nlmsvc_freegrantargs(struct nlm_rqst *call);
48static const struct rpc_call_ops nlmsvc_grant_ops; 48static const struct rpc_call_ops nlmsvc_grant_ops;
49static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie);
49 50
50/* 51/*
51 * The list of blocked locks to retry 52 * The list of blocked locks to retry
@@ -233,7 +234,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host,
233failed_free: 234failed_free:
234 kfree(block); 235 kfree(block);
235failed: 236failed:
236 nlm_release_call(call); 237 nlmsvc_release_call(call);
237 return NULL; 238 return NULL;
238} 239}
239 240
@@ -266,7 +267,7 @@ static void nlmsvc_free_block(struct kref *kref)
266 mutex_unlock(&file->f_mutex); 267 mutex_unlock(&file->f_mutex);
267 268
268 nlmsvc_freegrantargs(block->b_call); 269 nlmsvc_freegrantargs(block->b_call);
269 nlm_release_call(block->b_call); 270 nlmsvc_release_call(block->b_call);
270 nlm_release_file(block->b_file); 271 nlm_release_file(block->b_file);
271 kfree(block->b_fl); 272 kfree(block->b_fl);
272 kfree(block); 273 kfree(block);
@@ -934,3 +935,32 @@ nlmsvc_retry_blocked(void)
934 935
935 return timeout; 936 return timeout;
936} 937}
938
939#ifdef RPC_DEBUG
940static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
941{
942 /*
943 * We can get away with a static buffer because we're only
944 * called with BKL held.
945 */
946 static char buf[2*NLM_MAXCOOKIELEN+1];
947 unsigned int i, len = sizeof(buf);
948 char *p = buf;
949
950 len--; /* allow for trailing \0 */
951 if (len < 3)
952 return "???";
953 for (i = 0 ; i < cookie->len ; i++) {
954 if (len < 2) {
955 strcpy(p-3, "...");
956 break;
957 }
958 sprintf(p, "%02x", cookie->data[i]);
959 p += 2;
960 len -= 2;
961 }
962 *p = '\0';
963
964 return buf;
965}
966#endif
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 0caea5310ac3..d27aab11f324 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -80,7 +80,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
80 return 0; 80 return 0;
81 81
82no_locks: 82no_locks:
83 nlm_release_host(host); 83 nlmsvc_release_host(host);
84 if (error) 84 if (error)
85 return error; 85 return error;
86 return nlm_lck_denied_nolocks; 86 return nlm_lck_denied_nolocks;
@@ -122,7 +122,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
122 dprintk("lockd: TEST status %d vers %d\n", 122 dprintk("lockd: TEST status %d vers %d\n",
123 ntohl(resp->status), rqstp->rq_vers); 123 ntohl(resp->status), rqstp->rq_vers);
124 124
125 nlm_release_host(host); 125 nlmsvc_release_host(host);
126 nlm_release_file(file); 126 nlm_release_file(file);
127 return rc; 127 return rc;
128} 128}
@@ -164,7 +164,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
164 else 164 else
165 dprintk("lockd: LOCK status %d\n", ntohl(resp->status)); 165 dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
166 166
167 nlm_release_host(host); 167 nlmsvc_release_host(host);
168 nlm_release_file(file); 168 nlm_release_file(file);
169 return rc; 169 return rc;
170} 170}
@@ -194,7 +194,7 @@ nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
194 resp->status = cast_status(nlmsvc_cancel_blocked(file, &argp->lock)); 194 resp->status = cast_status(nlmsvc_cancel_blocked(file, &argp->lock));
195 195
196 dprintk("lockd: CANCEL status %d\n", ntohl(resp->status)); 196 dprintk("lockd: CANCEL status %d\n", ntohl(resp->status));
197 nlm_release_host(host); 197 nlmsvc_release_host(host);
198 nlm_release_file(file); 198 nlm_release_file(file);
199 return rpc_success; 199 return rpc_success;
200} 200}
@@ -227,7 +227,7 @@ nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
227 resp->status = cast_status(nlmsvc_unlock(file, &argp->lock)); 227 resp->status = cast_status(nlmsvc_unlock(file, &argp->lock));
228 228
229 dprintk("lockd: UNLOCK status %d\n", ntohl(resp->status)); 229 dprintk("lockd: UNLOCK status %d\n", ntohl(resp->status));
230 nlm_release_host(host); 230 nlmsvc_release_host(host);
231 nlm_release_file(file); 231 nlm_release_file(file);
232 return rpc_success; 232 return rpc_success;
233} 233}
@@ -257,9 +257,17 @@ static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
257 -task->tk_status); 257 -task->tk_status);
258} 258}
259 259
260void nlmsvc_release_call(struct nlm_rqst *call)
261{
262 if (!atomic_dec_and_test(&call->a_count))
263 return;
264 nlmsvc_release_host(call->a_host);
265 kfree(call);
266}
267
260static void nlmsvc_callback_release(void *data) 268static void nlmsvc_callback_release(void *data)
261{ 269{
262 nlm_release_call(data); 270 nlmsvc_release_call(data);
263} 271}
264 272
265static const struct rpc_call_ops nlmsvc_callback_ops = { 273static const struct rpc_call_ops nlmsvc_callback_ops = {
@@ -291,7 +299,7 @@ static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
291 299
292 stat = func(rqstp, argp, &call->a_res); 300 stat = func(rqstp, argp, &call->a_res);
293 if (stat != 0) { 301 if (stat != 0) {
294 nlm_release_call(call); 302 nlmsvc_release_call(call);
295 return stat; 303 return stat;
296 } 304 }
297 305
@@ -366,7 +374,7 @@ nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
366 resp->status = cast_status(nlmsvc_share_file(host, file, argp)); 374 resp->status = cast_status(nlmsvc_share_file(host, file, argp));
367 375
368 dprintk("lockd: SHARE status %d\n", ntohl(resp->status)); 376 dprintk("lockd: SHARE status %d\n", ntohl(resp->status));
369 nlm_release_host(host); 377 nlmsvc_release_host(host);
370 nlm_release_file(file); 378 nlm_release_file(file);
371 return rpc_success; 379 return rpc_success;
372} 380}
@@ -399,7 +407,7 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
399 resp->status = cast_status(nlmsvc_unshare_file(host, file, argp)); 407 resp->status = cast_status(nlmsvc_unshare_file(host, file, argp));
400 408
401 dprintk("lockd: UNSHARE status %d\n", ntohl(resp->status)); 409 dprintk("lockd: UNSHARE status %d\n", ntohl(resp->status));
402 nlm_release_host(host); 410 nlmsvc_release_host(host);
403 nlm_release_file(file); 411 nlm_release_file(file);
404 return rpc_success; 412 return rpc_success;
405} 413}
@@ -431,7 +439,7 @@ nlmsvc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
431 return rpc_success; 439 return rpc_success;
432 440
433 nlmsvc_free_host_resources(host); 441 nlmsvc_free_host_resources(host);
434 nlm_release_host(host); 442 nlmsvc_release_host(host);
435 return rpc_success; 443 return rpc_success;
436} 444}
437 445
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index b583ab0a4cbb..964666c68a86 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -149,37 +149,6 @@ nlm_decode_lock(__be32 *p, struct nlm_lock *lock)
149} 149}
150 150
151/* 151/*
152 * Encode a lock as part of an NLM call
153 */
154static __be32 *
155nlm_encode_lock(__be32 *p, struct nlm_lock *lock)
156{
157 struct file_lock *fl = &lock->fl;
158 __s32 start, len;
159
160 if (!(p = xdr_encode_string(p, lock->caller))
161 || !(p = nlm_encode_fh(p, &lock->fh))
162 || !(p = nlm_encode_oh(p, &lock->oh)))
163 return NULL;
164
165 if (fl->fl_start > NLM_OFFSET_MAX
166 || (fl->fl_end > NLM_OFFSET_MAX && fl->fl_end != OFFSET_MAX))
167 return NULL;
168
169 start = loff_t_to_s32(fl->fl_start);
170 if (fl->fl_end == OFFSET_MAX)
171 len = 0;
172 else
173 len = loff_t_to_s32(fl->fl_end - fl->fl_start + 1);
174
175 *p++ = htonl(lock->svid);
176 *p++ = htonl(start);
177 *p++ = htonl(len);
178
179 return p;
180}
181
182/*
183 * Encode result of a TEST/TEST_MSG call 152 * Encode result of a TEST/TEST_MSG call
184 */ 153 */
185static __be32 * 154static __be32 *
@@ -372,259 +341,3 @@ nlmsvc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
372{ 341{
373 return xdr_ressize_check(rqstp, p); 342 return xdr_ressize_check(rqstp, p);
374} 343}
375
376/*
377 * Now, the client side XDR functions
378 */
379#ifdef NLMCLNT_SUPPORT_SHARES
380static int
381nlmclt_decode_void(struct rpc_rqst *req, u32 *p, void *ptr)
382{
383 return 0;
384}
385#endif
386
387static int
388nlmclt_encode_testargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
389{
390 struct nlm_lock *lock = &argp->lock;
391
392 if (!(p = nlm_encode_cookie(p, &argp->cookie)))
393 return -EIO;
394 *p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
395 if (!(p = nlm_encode_lock(p, lock)))
396 return -EIO;
397 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
398 return 0;
399}
400
401static int
402nlmclt_decode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
403{
404 if (!(p = nlm_decode_cookie(p, &resp->cookie)))
405 return -EIO;
406 resp->status = *p++;
407 if (resp->status == nlm_lck_denied) {
408 struct file_lock *fl = &resp->lock.fl;
409 u32 excl;
410 s32 start, len, end;
411
412 memset(&resp->lock, 0, sizeof(resp->lock));
413 locks_init_lock(fl);
414 excl = ntohl(*p++);
415 resp->lock.svid = ntohl(*p++);
416 fl->fl_pid = (pid_t)resp->lock.svid;
417 if (!(p = nlm_decode_oh(p, &resp->lock.oh)))
418 return -EIO;
419
420 fl->fl_flags = FL_POSIX;
421 fl->fl_type = excl? F_WRLCK : F_RDLCK;
422 start = ntohl(*p++);
423 len = ntohl(*p++);
424 end = start + len - 1;
425
426 fl->fl_start = s32_to_loff_t(start);
427 if (len == 0 || end < 0)
428 fl->fl_end = OFFSET_MAX;
429 else
430 fl->fl_end = s32_to_loff_t(end);
431 }
432 return 0;
433}
434
435
436static int
437nlmclt_encode_lockargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
438{
439 struct nlm_lock *lock = &argp->lock;
440
441 if (!(p = nlm_encode_cookie(p, &argp->cookie)))
442 return -EIO;
443 *p++ = argp->block? xdr_one : xdr_zero;
444 *p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
445 if (!(p = nlm_encode_lock(p, lock)))
446 return -EIO;
447 *p++ = argp->reclaim? xdr_one : xdr_zero;
448 *p++ = htonl(argp->state);
449 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
450 return 0;
451}
452
453static int
454nlmclt_encode_cancargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
455{
456 struct nlm_lock *lock = &argp->lock;
457
458 if (!(p = nlm_encode_cookie(p, &argp->cookie)))
459 return -EIO;
460 *p++ = argp->block? xdr_one : xdr_zero;
461 *p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
462 if (!(p = nlm_encode_lock(p, lock)))
463 return -EIO;
464 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
465 return 0;
466}
467
468static int
469nlmclt_encode_unlockargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
470{
471 struct nlm_lock *lock = &argp->lock;
472
473 if (!(p = nlm_encode_cookie(p, &argp->cookie)))
474 return -EIO;
475 if (!(p = nlm_encode_lock(p, lock)))
476 return -EIO;
477 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
478 return 0;
479}
480
481static int
482nlmclt_encode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
483{
484 if (!(p = nlm_encode_cookie(p, &resp->cookie)))
485 return -EIO;
486 *p++ = resp->status;
487 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
488 return 0;
489}
490
491static int
492nlmclt_encode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
493{
494 if (!(p = nlm_encode_testres(p, resp)))
495 return -EIO;
496 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
497 return 0;
498}
499
500static int
501nlmclt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
502{
503 if (!(p = nlm_decode_cookie(p, &resp->cookie)))
504 return -EIO;
505 resp->status = *p++;
506 return 0;
507}
508
509#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
510# error "NLM host name cannot be larger than XDR_MAX_NETOBJ!"
511#endif
512
513/*
514 * Buffer requirements for NLM
515 */
516#define NLM_void_sz 0
517#define NLM_cookie_sz 1+XDR_QUADLEN(NLM_MAXCOOKIELEN)
518#define NLM_caller_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE)
519#define NLM_owner_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE)
520#define NLM_fhandle_sz 1+XDR_QUADLEN(NFS2_FHSIZE)
521#define NLM_lock_sz 3+NLM_caller_sz+NLM_owner_sz+NLM_fhandle_sz
522#define NLM_holder_sz 4+NLM_owner_sz
523
524#define NLM_testargs_sz NLM_cookie_sz+1+NLM_lock_sz
525#define NLM_lockargs_sz NLM_cookie_sz+4+NLM_lock_sz
526#define NLM_cancargs_sz NLM_cookie_sz+2+NLM_lock_sz
527#define NLM_unlockargs_sz NLM_cookie_sz+NLM_lock_sz
528
529#define NLM_testres_sz NLM_cookie_sz+1+NLM_holder_sz
530#define NLM_res_sz NLM_cookie_sz+1
531#define NLM_norep_sz 0
532
533/*
534 * For NLM, a void procedure really returns nothing
535 */
536#define nlmclt_decode_norep NULL
537
538#define PROC(proc, argtype, restype) \
539[NLMPROC_##proc] = { \
540 .p_proc = NLMPROC_##proc, \
541 .p_encode = (kxdrproc_t) nlmclt_encode_##argtype, \
542 .p_decode = (kxdrproc_t) nlmclt_decode_##restype, \
543 .p_arglen = NLM_##argtype##_sz, \
544 .p_replen = NLM_##restype##_sz, \
545 .p_statidx = NLMPROC_##proc, \
546 .p_name = #proc, \
547 }
548
549static struct rpc_procinfo nlm_procedures[] = {
550 PROC(TEST, testargs, testres),
551 PROC(LOCK, lockargs, res),
552 PROC(CANCEL, cancargs, res),
553 PROC(UNLOCK, unlockargs, res),
554 PROC(GRANTED, testargs, res),
555 PROC(TEST_MSG, testargs, norep),
556 PROC(LOCK_MSG, lockargs, norep),
557 PROC(CANCEL_MSG, cancargs, norep),
558 PROC(UNLOCK_MSG, unlockargs, norep),
559 PROC(GRANTED_MSG, testargs, norep),
560 PROC(TEST_RES, testres, norep),
561 PROC(LOCK_RES, res, norep),
562 PROC(CANCEL_RES, res, norep),
563 PROC(UNLOCK_RES, res, norep),
564 PROC(GRANTED_RES, res, norep),
565#ifdef NLMCLNT_SUPPORT_SHARES
566 PROC(SHARE, shareargs, shareres),
567 PROC(UNSHARE, shareargs, shareres),
568 PROC(NM_LOCK, lockargs, res),
569 PROC(FREE_ALL, notify, void),
570#endif
571};
572
573static struct rpc_version nlm_version1 = {
574 .number = 1,
575 .nrprocs = 16,
576 .procs = nlm_procedures,
577};
578
579static struct rpc_version nlm_version3 = {
580 .number = 3,
581 .nrprocs = 24,
582 .procs = nlm_procedures,
583};
584
585static struct rpc_version * nlm_versions[] = {
586 [1] = &nlm_version1,
587 [3] = &nlm_version3,
588#ifdef CONFIG_LOCKD_V4
589 [4] = &nlm_version4,
590#endif
591};
592
593static struct rpc_stat nlm_stats;
594
595struct rpc_program nlm_program = {
596 .name = "lockd",
597 .number = NLM_PROGRAM,
598 .nrvers = ARRAY_SIZE(nlm_versions),
599 .version = nlm_versions,
600 .stats = &nlm_stats,
601};
602
603#ifdef RPC_DEBUG
604const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
605{
606 /*
607 * We can get away with a static buffer because we're only
608 * called with BKL held.
609 */
610 static char buf[2*NLM_MAXCOOKIELEN+1];
611 unsigned int i, len = sizeof(buf);
612 char *p = buf;
613
614 len--; /* allow for trailing \0 */
615 if (len < 3)
616 return "???";
617 for (i = 0 ; i < cookie->len ; i++) {
618 if (len < 2) {
619 strcpy(p-3, "...");
620 break;
621 }
622 sprintf(p, "%02x", cookie->data[i]);
623 p += 2;
624 len -= 2;
625 }
626 *p = '\0';
627
628 return buf;
629}
630#endif
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index ad9dbbc9145d..dfa4789cd460 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -93,15 +93,6 @@ nlm4_decode_fh(__be32 *p, struct nfs_fh *f)
93 return p + XDR_QUADLEN(f->size); 93 return p + XDR_QUADLEN(f->size);
94} 94}
95 95
96static __be32 *
97nlm4_encode_fh(__be32 *p, struct nfs_fh *f)
98{
99 *p++ = htonl(f->size);
100 if (f->size) p[XDR_QUADLEN(f->size)-1] = 0; /* don't leak anything */
101 memcpy(p, f->data, f->size);
102 return p + XDR_QUADLEN(f->size);
103}
104
105/* 96/*
106 * Encode and decode owner handle 97 * Encode and decode owner handle
107 */ 98 */
@@ -112,12 +103,6 @@ nlm4_decode_oh(__be32 *p, struct xdr_netobj *oh)
112} 103}
113 104
114static __be32 * 105static __be32 *
115nlm4_encode_oh(__be32 *p, struct xdr_netobj *oh)
116{
117 return xdr_encode_netobj(p, oh);
118}
119
120static __be32 *
121nlm4_decode_lock(__be32 *p, struct nlm_lock *lock) 106nlm4_decode_lock(__be32 *p, struct nlm_lock *lock)
122{ 107{
123 struct file_lock *fl = &lock->fl; 108 struct file_lock *fl = &lock->fl;
@@ -150,38 +135,6 @@ nlm4_decode_lock(__be32 *p, struct nlm_lock *lock)
150} 135}
151 136
152/* 137/*
153 * Encode a lock as part of an NLM call
154 */
155static __be32 *
156nlm4_encode_lock(__be32 *p, struct nlm_lock *lock)
157{
158 struct file_lock *fl = &lock->fl;
159 __s64 start, len;
160
161 if (!(p = xdr_encode_string(p, lock->caller))
162 || !(p = nlm4_encode_fh(p, &lock->fh))
163 || !(p = nlm4_encode_oh(p, &lock->oh)))
164 return NULL;
165
166 if (fl->fl_start > NLM4_OFFSET_MAX
167 || (fl->fl_end > NLM4_OFFSET_MAX && fl->fl_end != OFFSET_MAX))
168 return NULL;
169
170 *p++ = htonl(lock->svid);
171
172 start = loff_t_to_s64(fl->fl_start);
173 if (fl->fl_end == OFFSET_MAX)
174 len = 0;
175 else
176 len = loff_t_to_s64(fl->fl_end - fl->fl_start + 1);
177
178 p = xdr_encode_hyper(p, start);
179 p = xdr_encode_hyper(p, len);
180
181 return p;
182}
183
184/*
185 * Encode result of a TEST/TEST_MSG call 138 * Encode result of a TEST/TEST_MSG call
186 */ 139 */
187static __be32 * 140static __be32 *
@@ -379,211 +332,3 @@ nlm4svc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
379{ 332{
380 return xdr_ressize_check(rqstp, p); 333 return xdr_ressize_check(rqstp, p);
381} 334}
382
383/*
384 * Now, the client side XDR functions
385 */
386#ifdef NLMCLNT_SUPPORT_SHARES
387static int
388nlm4clt_decode_void(struct rpc_rqst *req, __be32 *p, void *ptr)
389{
390 return 0;
391}
392#endif
393
394static int
395nlm4clt_encode_testargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
396{
397 struct nlm_lock *lock = &argp->lock;
398
399 if (!(p = nlm4_encode_cookie(p, &argp->cookie)))
400 return -EIO;
401 *p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
402 if (!(p = nlm4_encode_lock(p, lock)))
403 return -EIO;
404 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
405 return 0;
406}
407
408static int
409nlm4clt_decode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
410{
411 if (!(p = nlm4_decode_cookie(p, &resp->cookie)))
412 return -EIO;
413 resp->status = *p++;
414 if (resp->status == nlm_lck_denied) {
415 struct file_lock *fl = &resp->lock.fl;
416 u32 excl;
417 __u64 start, len;
418 __s64 end;
419
420 memset(&resp->lock, 0, sizeof(resp->lock));
421 locks_init_lock(fl);
422 excl = ntohl(*p++);
423 resp->lock.svid = ntohl(*p++);
424 fl->fl_pid = (pid_t)resp->lock.svid;
425 if (!(p = nlm4_decode_oh(p, &resp->lock.oh)))
426 return -EIO;
427
428 fl->fl_flags = FL_POSIX;
429 fl->fl_type = excl? F_WRLCK : F_RDLCK;
430 p = xdr_decode_hyper(p, &start);
431 p = xdr_decode_hyper(p, &len);
432 end = start + len - 1;
433
434 fl->fl_start = s64_to_loff_t(start);
435 if (len == 0 || end < 0)
436 fl->fl_end = OFFSET_MAX;
437 else
438 fl->fl_end = s64_to_loff_t(end);
439 }
440 return 0;
441}
442
443
444static int
445nlm4clt_encode_lockargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
446{
447 struct nlm_lock *lock = &argp->lock;
448
449 if (!(p = nlm4_encode_cookie(p, &argp->cookie)))
450 return -EIO;
451 *p++ = argp->block? xdr_one : xdr_zero;
452 *p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
453 if (!(p = nlm4_encode_lock(p, lock)))
454 return -EIO;
455 *p++ = argp->reclaim? xdr_one : xdr_zero;
456 *p++ = htonl(argp->state);
457 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
458 return 0;
459}
460
461static int
462nlm4clt_encode_cancargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
463{
464 struct nlm_lock *lock = &argp->lock;
465
466 if (!(p = nlm4_encode_cookie(p, &argp->cookie)))
467 return -EIO;
468 *p++ = argp->block? xdr_one : xdr_zero;
469 *p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
470 if (!(p = nlm4_encode_lock(p, lock)))
471 return -EIO;
472 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
473 return 0;
474}
475
476static int
477nlm4clt_encode_unlockargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
478{
479 struct nlm_lock *lock = &argp->lock;
480
481 if (!(p = nlm4_encode_cookie(p, &argp->cookie)))
482 return -EIO;
483 if (!(p = nlm4_encode_lock(p, lock)))
484 return -EIO;
485 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
486 return 0;
487}
488
489static int
490nlm4clt_encode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
491{
492 if (!(p = nlm4_encode_cookie(p, &resp->cookie)))
493 return -EIO;
494 *p++ = resp->status;
495 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
496 return 0;
497}
498
499static int
500nlm4clt_encode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
501{
502 if (!(p = nlm4_encode_testres(p, resp)))
503 return -EIO;
504 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
505 return 0;
506}
507
508static int
509nlm4clt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
510{
511 if (!(p = nlm4_decode_cookie(p, &resp->cookie)))
512 return -EIO;
513 resp->status = *p++;
514 return 0;
515}
516
517#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
518# error "NLM host name cannot be larger than XDR_MAX_NETOBJ!"
519#endif
520
521#if (NLMCLNT_OHSIZE > NLM_MAXSTRLEN)
522# error "NLM host name cannot be larger than NLM's maximum string length!"
523#endif
524
525/*
526 * Buffer requirements for NLM
527 */
528#define NLM4_void_sz 0
529#define NLM4_cookie_sz 1+XDR_QUADLEN(NLM_MAXCOOKIELEN)
530#define NLM4_caller_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE)
531#define NLM4_owner_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE)
532#define NLM4_fhandle_sz 1+XDR_QUADLEN(NFS3_FHSIZE)
533#define NLM4_lock_sz 5+NLM4_caller_sz+NLM4_owner_sz+NLM4_fhandle_sz
534#define NLM4_holder_sz 6+NLM4_owner_sz
535
536#define NLM4_testargs_sz NLM4_cookie_sz+1+NLM4_lock_sz
537#define NLM4_lockargs_sz NLM4_cookie_sz+4+NLM4_lock_sz
538#define NLM4_cancargs_sz NLM4_cookie_sz+2+NLM4_lock_sz
539#define NLM4_unlockargs_sz NLM4_cookie_sz+NLM4_lock_sz
540
541#define NLM4_testres_sz NLM4_cookie_sz+1+NLM4_holder_sz
542#define NLM4_res_sz NLM4_cookie_sz+1
543#define NLM4_norep_sz 0
544
545/*
546 * For NLM, a void procedure really returns nothing
547 */
548#define nlm4clt_decode_norep NULL
549
550#define PROC(proc, argtype, restype) \
551[NLMPROC_##proc] = { \
552 .p_proc = NLMPROC_##proc, \
553 .p_encode = (kxdrproc_t) nlm4clt_encode_##argtype, \
554 .p_decode = (kxdrproc_t) nlm4clt_decode_##restype, \
555 .p_arglen = NLM4_##argtype##_sz, \
556 .p_replen = NLM4_##restype##_sz, \
557 .p_statidx = NLMPROC_##proc, \
558 .p_name = #proc, \
559 }
560
561static struct rpc_procinfo nlm4_procedures[] = {
562 PROC(TEST, testargs, testres),
563 PROC(LOCK, lockargs, res),
564 PROC(CANCEL, cancargs, res),
565 PROC(UNLOCK, unlockargs, res),
566 PROC(GRANTED, testargs, res),
567 PROC(TEST_MSG, testargs, norep),
568 PROC(LOCK_MSG, lockargs, norep),
569 PROC(CANCEL_MSG, cancargs, norep),
570 PROC(UNLOCK_MSG, unlockargs, norep),
571 PROC(GRANTED_MSG, testargs, norep),
572 PROC(TEST_RES, testres, norep),
573 PROC(LOCK_RES, res, norep),
574 PROC(CANCEL_RES, res, norep),
575 PROC(UNLOCK_RES, res, norep),
576 PROC(GRANTED_RES, res, norep),
577#ifdef NLMCLNT_SUPPORT_SHARES
578 PROC(SHARE, shareargs, shareres),
579 PROC(UNSHARE, shareargs, shareres),
580 PROC(NM_LOCK, lockargs, res),
581 PROC(FREE_ALL, notify, void),
582#endif
583};
584
585struct rpc_version nlm_version4 = {
586 .number = 4,
587 .nrprocs = 24,
588 .procs = nlm4_procedures,
589};
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 93444747237b..a25444ab2baf 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -76,18 +76,6 @@ EXPORT_SYMBOL(mb_cache_entry_find_first);
76EXPORT_SYMBOL(mb_cache_entry_find_next); 76EXPORT_SYMBOL(mb_cache_entry_find_next);
77#endif 77#endif
78 78
79struct mb_cache {
80 struct list_head c_cache_list;
81 const char *c_name;
82 atomic_t c_entry_count;
83 int c_max_entries;
84 int c_bucket_bits;
85 struct kmem_cache *c_entry_cache;
86 struct list_head *c_block_hash;
87 struct list_head *c_index_hash;
88};
89
90
91/* 79/*
92 * Global data: list of all mbcache's, lru list, and a spinlock for 80 * Global data: list of all mbcache's, lru list, and a spinlock for
93 * accessing cache data structures on SMP machines. The lru list is 81 * accessing cache data structures on SMP machines. The lru list is
diff --git a/fs/namei.c b/fs/namei.c
index 19433cdba011..24ece10470b6 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -202,7 +202,7 @@ static int acl_permission_check(struct inode *inode, int mask, unsigned int flag
202 * @inode: inode to check access rights for 202 * @inode: inode to check access rights for
203 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) 203 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
204 * @check_acl: optional callback to check for Posix ACLs 204 * @check_acl: optional callback to check for Posix ACLs
205 * @flags IPERM_FLAG_ flags. 205 * @flags: IPERM_FLAG_ flags.
206 * 206 *
207 * Used to check for read/write/execute permissions on a file. 207 * Used to check for read/write/execute permissions on a file.
208 * We use "fsuid" for this, letting us set arbitrary permissions 208 * We use "fsuid" for this, letting us set arbitrary permissions
@@ -407,7 +407,7 @@ void path_put_long(struct path *path)
407/** 407/**
408 * nameidata_drop_rcu - drop this nameidata out of rcu-walk 408 * nameidata_drop_rcu - drop this nameidata out of rcu-walk
409 * @nd: nameidata pathwalk data to drop 409 * @nd: nameidata pathwalk data to drop
410 * @Returns: 0 on success, -ECHLID on failure 410 * Returns: 0 on success, -ECHILD on failure
411 * 411 *
412 * Path walking has 2 modes, rcu-walk and ref-walk (see 412 * Path walking has 2 modes, rcu-walk and ref-walk (see
413 * Documentation/filesystems/path-lookup.txt). __drop_rcu* functions attempt 413 * Documentation/filesystems/path-lookup.txt). __drop_rcu* functions attempt
@@ -468,7 +468,7 @@ static inline int nameidata_drop_rcu_maybe(struct nameidata *nd)
468 * nameidata_dentry_drop_rcu - drop nameidata and dentry out of rcu-walk 468 * nameidata_dentry_drop_rcu - drop nameidata and dentry out of rcu-walk
469 * @nd: nameidata pathwalk data to drop 469 * @nd: nameidata pathwalk data to drop
470 * @dentry: dentry to drop 470 * @dentry: dentry to drop
471 * @Returns: 0 on success, -ECHLID on failure 471 * Returns: 0 on success, -ECHILD on failure
472 * 472 *
473 * nameidata_dentry_drop_rcu attempts to drop the current nd->path and nd->root, 473 * nameidata_dentry_drop_rcu attempts to drop the current nd->path and nd->root,
474 * and dentry into ref-walk. @dentry must be a path found by a do_lookup call on 474 * and dentry into ref-walk. @dentry must be a path found by a do_lookup call on
@@ -530,7 +530,7 @@ static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct d
530/** 530/**
531 * nameidata_drop_rcu_last - drop nameidata ending path walk out of rcu-walk 531 * nameidata_drop_rcu_last - drop nameidata ending path walk out of rcu-walk
532 * @nd: nameidata pathwalk data to drop 532 * @nd: nameidata pathwalk data to drop
533 * @Returns: 0 on success, -ECHLID on failure 533 * Returns: 0 on success, -ECHILD on failure
534 * 534 *
535 * nameidata_drop_rcu_last attempts to drop the current nd->path into ref-walk. 535 * nameidata_drop_rcu_last attempts to drop the current nd->path into ref-walk.
536 * nd->path should be the final element of the lookup, so nd->root is discarded. 536 * nd->path should be the final element of the lookup, so nd->root is discarded.
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 93a8b3bd69e3..199016528fcb 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -16,9 +16,7 @@
16#include <linux/freezer.h> 16#include <linux/freezer.h>
17#include <linux/kthread.h> 17#include <linux/kthread.h>
18#include <linux/sunrpc/svcauth_gss.h> 18#include <linux/sunrpc/svcauth_gss.h>
19#if defined(CONFIG_NFS_V4_1)
20#include <linux/sunrpc/bc_xprt.h> 19#include <linux/sunrpc/bc_xprt.h>
21#endif
22 20
23#include <net/inet_sock.h> 21#include <net/inet_sock.h>
24 22
@@ -137,6 +135,33 @@ out_err:
137 135
138#if defined(CONFIG_NFS_V4_1) 136#if defined(CONFIG_NFS_V4_1)
139/* 137/*
138 * * CB_SEQUENCE operations will fail until the callback sessionid is set.
139 * */
140int nfs4_set_callback_sessionid(struct nfs_client *clp)
141{
142 struct svc_serv *serv = clp->cl_rpcclient->cl_xprt->bc_serv;
143 struct nfs4_sessionid *bc_sid;
144
145 if (!serv->sv_bc_xprt)
146 return -EINVAL;
147
148 /* on success freed in xprt_free */
149 bc_sid = kmalloc(sizeof(struct nfs4_sessionid), GFP_KERNEL);
150 if (!bc_sid)
151 return -ENOMEM;
152 memcpy(bc_sid->data, &clp->cl_session->sess_id.data,
153 NFS4_MAX_SESSIONID_LEN);
154 spin_lock_bh(&serv->sv_cb_lock);
155 serv->sv_bc_xprt->xpt_bc_sid = bc_sid;
156 spin_unlock_bh(&serv->sv_cb_lock);
157 dprintk("%s set xpt_bc_sid=%u:%u:%u:%u for sv_bc_xprt %p\n", __func__,
158 ((u32 *)bc_sid->data)[0], ((u32 *)bc_sid->data)[1],
159 ((u32 *)bc_sid->data)[2], ((u32 *)bc_sid->data)[3],
160 serv->sv_bc_xprt);
161 return 0;
162}
163
164/*
140 * The callback service for NFSv4.1 callbacks 165 * The callback service for NFSv4.1 callbacks
141 */ 166 */
142static int 167static int
@@ -177,30 +202,38 @@ nfs41_callback_svc(void *vrqstp)
177struct svc_rqst * 202struct svc_rqst *
178nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) 203nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
179{ 204{
180 struct svc_xprt *bc_xprt; 205 struct svc_rqst *rqstp;
181 struct svc_rqst *rqstp = ERR_PTR(-ENOMEM); 206 int ret;
182 207
183 dprintk("--> %s\n", __func__); 208 /*
184 /* Create a svc_sock for the service */ 209 * Create an svc_sock for the back channel service that shares the
185 bc_xprt = svc_sock_create(serv, xprt->prot); 210 * fore channel connection.
186 if (!bc_xprt) 211 * Returns the input port (0) and sets the svc_serv bc_xprt on success
212 */
213 ret = svc_create_xprt(serv, "tcp-bc", &init_net, PF_INET, 0,
214 SVC_SOCK_ANONYMOUS);
215 if (ret < 0) {
216 rqstp = ERR_PTR(ret);
187 goto out; 217 goto out;
218 }
188 219
189 /* 220 /*
190 * Save the svc_serv in the transport so that it can 221 * Save the svc_serv in the transport so that it can
191 * be referenced when the session backchannel is initialized 222 * be referenced when the session backchannel is initialized
192 */ 223 */
193 serv->bc_xprt = bc_xprt;
194 xprt->bc_serv = serv; 224 xprt->bc_serv = serv;
195 225
196 INIT_LIST_HEAD(&serv->sv_cb_list); 226 INIT_LIST_HEAD(&serv->sv_cb_list);
197 spin_lock_init(&serv->sv_cb_lock); 227 spin_lock_init(&serv->sv_cb_lock);
198 init_waitqueue_head(&serv->sv_cb_waitq); 228 init_waitqueue_head(&serv->sv_cb_waitq);
199 rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]); 229 rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]);
200 if (IS_ERR(rqstp)) 230 if (IS_ERR(rqstp)) {
201 svc_sock_destroy(bc_xprt); 231 svc_xprt_put(serv->sv_bc_xprt);
232 serv->sv_bc_xprt = NULL;
233 }
202out: 234out:
203 dprintk("--> %s return %p\n", __func__, rqstp); 235 dprintk("--> %s return %ld\n", __func__,
236 IS_ERR(rqstp) ? PTR_ERR(rqstp) : 0);
204 return rqstp; 237 return rqstp;
205} 238}
206 239
@@ -233,6 +266,10 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt,
233 struct nfs_callback_data *cb_info) 266 struct nfs_callback_data *cb_info)
234{ 267{
235} 268}
269int nfs4_set_callback_sessionid(struct nfs_client *clp)
270{
271 return 0;
272}
236#endif /* CONFIG_NFS_V4_1 */ 273#endif /* CONFIG_NFS_V4_1 */
237 274
238/* 275/*
@@ -328,6 +365,9 @@ static int check_gss_callback_principal(struct nfs_client *clp,
328 struct rpc_clnt *r = clp->cl_rpcclient; 365 struct rpc_clnt *r = clp->cl_rpcclient;
329 char *p = svc_gss_principal(rqstp); 366 char *p = svc_gss_principal(rqstp);
330 367
368 /* No RPC_AUTH_GSS on NFSv4.1 back channel yet */
369 if (clp->cl_minorversion != 0)
370 return SVC_DROP;
331 /* 371 /*
332 * It might just be a normal user principal, in which case 372 * It might just be a normal user principal, in which case
333 * userspace won't bother to tell us the name at all. 373 * userspace won't bother to tell us the name at all.
@@ -345,6 +385,23 @@ static int check_gss_callback_principal(struct nfs_client *clp,
345 return SVC_OK; 385 return SVC_OK;
346} 386}
347 387
388/* pg_authenticate method helper */
389static struct nfs_client *nfs_cb_find_client(struct svc_rqst *rqstp)
390{
391 struct nfs4_sessionid *sessionid = bc_xprt_sid(rqstp);
392 int is_cb_compound = rqstp->rq_proc == CB_COMPOUND ? 1 : 0;
393
394 dprintk("--> %s rq_proc %d\n", __func__, rqstp->rq_proc);
395 if (svc_is_backchannel(rqstp))
396 /* Sessionid (usually) set after CB_NULL ping */
397 return nfs4_find_client_sessionid(svc_addr(rqstp), sessionid,
398 is_cb_compound);
399 else
400 /* No callback identifier in pg_authenticate */
401 return nfs4_find_client_no_ident(svc_addr(rqstp));
402}
403
404/* pg_authenticate method for nfsv4 callback threads. */
348static int nfs_callback_authenticate(struct svc_rqst *rqstp) 405static int nfs_callback_authenticate(struct svc_rqst *rqstp)
349{ 406{
350 struct nfs_client *clp; 407 struct nfs_client *clp;
@@ -352,7 +409,7 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp)
352 int ret = SVC_OK; 409 int ret = SVC_OK;
353 410
354 /* Don't talk to strangers */ 411 /* Don't talk to strangers */
355 clp = nfs_find_client(svc_addr(rqstp), 4); 412 clp = nfs_cb_find_client(rqstp);
356 if (clp == NULL) 413 if (clp == NULL)
357 return SVC_DROP; 414 return SVC_DROP;
358 415
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index 85a7cfd1b8dd..d3b44f9bd747 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -34,10 +34,17 @@ enum nfs4_callback_opnum {
34 OP_CB_ILLEGAL = 10044, 34 OP_CB_ILLEGAL = 10044,
35}; 35};
36 36
37struct cb_process_state {
38 __be32 drc_status;
39 struct nfs_client *clp;
40 struct nfs4_sessionid *svc_sid; /* v4.1 callback service sessionid */
41};
42
37struct cb_compound_hdr_arg { 43struct cb_compound_hdr_arg {
38 unsigned int taglen; 44 unsigned int taglen;
39 const char *tag; 45 const char *tag;
40 unsigned int minorversion; 46 unsigned int minorversion;
47 unsigned int cb_ident; /* v4.0 callback identifier */
41 unsigned nops; 48 unsigned nops;
42}; 49};
43 50
@@ -103,14 +110,23 @@ struct cb_sequenceres {
103 uint32_t csr_target_highestslotid; 110 uint32_t csr_target_highestslotid;
104}; 111};
105 112
106extern unsigned nfs4_callback_sequence(struct cb_sequenceargs *args, 113extern __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
107 struct cb_sequenceres *res); 114 struct cb_sequenceres *res,
115 struct cb_process_state *cps);
108 116
109extern int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, 117extern int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation,
110 const nfs4_stateid *stateid); 118 const nfs4_stateid *stateid);
111 119
112#define RCA4_TYPE_MASK_RDATA_DLG 0 120#define RCA4_TYPE_MASK_RDATA_DLG 0
113#define RCA4_TYPE_MASK_WDATA_DLG 1 121#define RCA4_TYPE_MASK_WDATA_DLG 1
122#define RCA4_TYPE_MASK_DIR_DLG 2
123#define RCA4_TYPE_MASK_FILE_LAYOUT 3
124#define RCA4_TYPE_MASK_BLK_LAYOUT 4
125#define RCA4_TYPE_MASK_OBJ_LAYOUT_MIN 8
126#define RCA4_TYPE_MASK_OBJ_LAYOUT_MAX 9
127#define RCA4_TYPE_MASK_OTHER_LAYOUT_MIN 12
128#define RCA4_TYPE_MASK_OTHER_LAYOUT_MAX 15
129#define RCA4_TYPE_MASK_ALL 0xf31f
114 130
115struct cb_recallanyargs { 131struct cb_recallanyargs {
116 struct sockaddr *craa_addr; 132 struct sockaddr *craa_addr;
@@ -118,25 +134,52 @@ struct cb_recallanyargs {
118 uint32_t craa_type_mask; 134 uint32_t craa_type_mask;
119}; 135};
120 136
121extern unsigned nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy); 137extern __be32 nfs4_callback_recallany(struct cb_recallanyargs *args,
138 void *dummy,
139 struct cb_process_state *cps);
122 140
123struct cb_recallslotargs { 141struct cb_recallslotargs {
124 struct sockaddr *crsa_addr; 142 struct sockaddr *crsa_addr;
125 uint32_t crsa_target_max_slots; 143 uint32_t crsa_target_max_slots;
126}; 144};
127extern unsigned nfs4_callback_recallslot(struct cb_recallslotargs *args, 145extern __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args,
128 void *dummy); 146 void *dummy,
147 struct cb_process_state *cps);
148
149struct cb_layoutrecallargs {
150 struct sockaddr *cbl_addr;
151 uint32_t cbl_recall_type;
152 uint32_t cbl_layout_type;
153 uint32_t cbl_layoutchanged;
154 union {
155 struct {
156 struct nfs_fh cbl_fh;
157 struct pnfs_layout_range cbl_range;
158 nfs4_stateid cbl_stateid;
159 };
160 struct nfs_fsid cbl_fsid;
161 };
162};
129 163
130#endif /* CONFIG_NFS_V4_1 */ 164extern unsigned nfs4_callback_layoutrecall(
165 struct cb_layoutrecallargs *args,
166 void *dummy, struct cb_process_state *cps);
131 167
132extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res); 168extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses);
133extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy); 169extern void nfs4_cb_take_slot(struct nfs_client *clp);
170#endif /* CONFIG_NFS_V4_1 */
134 171
172extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
173 struct cb_getattrres *res,
174 struct cb_process_state *cps);
175extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
176 struct cb_process_state *cps);
135#ifdef CONFIG_NFS_V4 177#ifdef CONFIG_NFS_V4
136extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt); 178extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt);
137extern void nfs_callback_down(int minorversion); 179extern void nfs_callback_down(int minorversion);
138extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, 180extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation,
139 const nfs4_stateid *stateid); 181 const nfs4_stateid *stateid);
182extern int nfs4_set_callback_sessionid(struct nfs_client *clp);
140#endif /* CONFIG_NFS_V4 */ 183#endif /* CONFIG_NFS_V4 */
141/* 184/*
142 * nfs41: Callbacks are expected to not cause substantial latency, 185 * nfs41: Callbacks are expected to not cause substantial latency,
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 2950fca0c61b..4bb91cb2620d 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -12,30 +12,33 @@
12#include "callback.h" 12#include "callback.h"
13#include "delegation.h" 13#include "delegation.h"
14#include "internal.h" 14#include "internal.h"
15#include "pnfs.h"
15 16
16#ifdef NFS_DEBUG 17#ifdef NFS_DEBUG
17#define NFSDBG_FACILITY NFSDBG_CALLBACK 18#define NFSDBG_FACILITY NFSDBG_CALLBACK
18#endif 19#endif
19 20
20__be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res) 21__be32 nfs4_callback_getattr(struct cb_getattrargs *args,
22 struct cb_getattrres *res,
23 struct cb_process_state *cps)
21{ 24{
22 struct nfs_client *clp;
23 struct nfs_delegation *delegation; 25 struct nfs_delegation *delegation;
24 struct nfs_inode *nfsi; 26 struct nfs_inode *nfsi;
25 struct inode *inode; 27 struct inode *inode;
26 28
29 res->status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
30 if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */
31 goto out;
32
27 res->bitmap[0] = res->bitmap[1] = 0; 33 res->bitmap[0] = res->bitmap[1] = 0;
28 res->status = htonl(NFS4ERR_BADHANDLE); 34 res->status = htonl(NFS4ERR_BADHANDLE);
29 clp = nfs_find_client(args->addr, 4);
30 if (clp == NULL)
31 goto out;
32 35
33 dprintk("NFS: GETATTR callback request from %s\n", 36 dprintk("NFS: GETATTR callback request from %s\n",
34 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); 37 rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
35 38
36 inode = nfs_delegation_find_inode(clp, &args->fh); 39 inode = nfs_delegation_find_inode(cps->clp, &args->fh);
37 if (inode == NULL) 40 if (inode == NULL)
38 goto out_putclient; 41 goto out;
39 nfsi = NFS_I(inode); 42 nfsi = NFS_I(inode);
40 rcu_read_lock(); 43 rcu_read_lock();
41 delegation = rcu_dereference(nfsi->delegation); 44 delegation = rcu_dereference(nfsi->delegation);
@@ -55,49 +58,41 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *
55out_iput: 58out_iput:
56 rcu_read_unlock(); 59 rcu_read_unlock();
57 iput(inode); 60 iput(inode);
58out_putclient:
59 nfs_put_client(clp);
60out: 61out:
61 dprintk("%s: exit with status = %d\n", __func__, ntohl(res->status)); 62 dprintk("%s: exit with status = %d\n", __func__, ntohl(res->status));
62 return res->status; 63 return res->status;
63} 64}
64 65
65__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy) 66__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
67 struct cb_process_state *cps)
66{ 68{
67 struct nfs_client *clp;
68 struct inode *inode; 69 struct inode *inode;
69 __be32 res; 70 __be32 res;
70 71
71 res = htonl(NFS4ERR_BADHANDLE); 72 res = htonl(NFS4ERR_OP_NOT_IN_SESSION);
72 clp = nfs_find_client(args->addr, 4); 73 if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */
73 if (clp == NULL)
74 goto out; 74 goto out;
75 75
76 dprintk("NFS: RECALL callback request from %s\n", 76 dprintk("NFS: RECALL callback request from %s\n",
77 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); 77 rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
78 78
79 do { 79 res = htonl(NFS4ERR_BADHANDLE);
80 struct nfs_client *prev = clp; 80 inode = nfs_delegation_find_inode(cps->clp, &args->fh);
81 81 if (inode == NULL)
82 inode = nfs_delegation_find_inode(clp, &args->fh); 82 goto out;
83 if (inode != NULL) { 83 /* Set up a helper thread to actually return the delegation */
84 /* Set up a helper thread to actually return the delegation */ 84 switch (nfs_async_inode_return_delegation(inode, &args->stateid)) {
85 switch (nfs_async_inode_return_delegation(inode, &args->stateid)) { 85 case 0:
86 case 0: 86 res = 0;
87 res = 0; 87 break;
88 break; 88 case -ENOENT:
89 case -ENOENT: 89 if (res != 0)
90 if (res != 0) 90 res = htonl(NFS4ERR_BAD_STATEID);
91 res = htonl(NFS4ERR_BAD_STATEID); 91 break;
92 break; 92 default:
93 default: 93 res = htonl(NFS4ERR_RESOURCE);
94 res = htonl(NFS4ERR_RESOURCE); 94 }
95 } 95 iput(inode);
96 iput(inode);
97 }
98 clp = nfs_find_client_next(prev);
99 nfs_put_client(prev);
100 } while (clp != NULL);
101out: 96out:
102 dprintk("%s: exit with status = %d\n", __func__, ntohl(res)); 97 dprintk("%s: exit with status = %d\n", __func__, ntohl(res));
103 return res; 98 return res;
@@ -113,6 +108,139 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf
113 108
114#if defined(CONFIG_NFS_V4_1) 109#if defined(CONFIG_NFS_V4_1)
115 110
111static u32 initiate_file_draining(struct nfs_client *clp,
112 struct cb_layoutrecallargs *args)
113{
114 struct pnfs_layout_hdr *lo;
115 struct inode *ino;
116 bool found = false;
117 u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
118 LIST_HEAD(free_me_list);
119
120 spin_lock(&clp->cl_lock);
121 list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
122 if (nfs_compare_fh(&args->cbl_fh,
123 &NFS_I(lo->plh_inode)->fh))
124 continue;
125 ino = igrab(lo->plh_inode);
126 if (!ino)
127 continue;
128 found = true;
129 /* Without this, layout can be freed as soon
130 * as we release cl_lock.
131 */
132 get_layout_hdr(lo);
133 break;
134 }
135 spin_unlock(&clp->cl_lock);
136 if (!found)
137 return NFS4ERR_NOMATCHING_LAYOUT;
138
139 spin_lock(&ino->i_lock);
140 if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
141 mark_matching_lsegs_invalid(lo, &free_me_list,
142 args->cbl_range.iomode))
143 rv = NFS4ERR_DELAY;
144 else
145 rv = NFS4ERR_NOMATCHING_LAYOUT;
146 pnfs_set_layout_stateid(lo, &args->cbl_stateid, true);
147 spin_unlock(&ino->i_lock);
148 pnfs_free_lseg_list(&free_me_list);
149 put_layout_hdr(lo);
150 iput(ino);
151 return rv;
152}
153
154static u32 initiate_bulk_draining(struct nfs_client *clp,
155 struct cb_layoutrecallargs *args)
156{
157 struct pnfs_layout_hdr *lo;
158 struct inode *ino;
159 u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
160 struct pnfs_layout_hdr *tmp;
161 LIST_HEAD(recall_list);
162 LIST_HEAD(free_me_list);
163 struct pnfs_layout_range range = {
164 .iomode = IOMODE_ANY,
165 .offset = 0,
166 .length = NFS4_MAX_UINT64,
167 };
168
169 spin_lock(&clp->cl_lock);
170 list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
171 if ((args->cbl_recall_type == RETURN_FSID) &&
172 memcmp(&NFS_SERVER(lo->plh_inode)->fsid,
173 &args->cbl_fsid, sizeof(struct nfs_fsid)))
174 continue;
175 if (!igrab(lo->plh_inode))
176 continue;
177 get_layout_hdr(lo);
178 BUG_ON(!list_empty(&lo->plh_bulk_recall));
179 list_add(&lo->plh_bulk_recall, &recall_list);
180 }
181 spin_unlock(&clp->cl_lock);
182 list_for_each_entry_safe(lo, tmp,
183 &recall_list, plh_bulk_recall) {
184 ino = lo->plh_inode;
185 spin_lock(&ino->i_lock);
186 set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
187 if (mark_matching_lsegs_invalid(lo, &free_me_list, range.iomode))
188 rv = NFS4ERR_DELAY;
189 list_del_init(&lo->plh_bulk_recall);
190 spin_unlock(&ino->i_lock);
191 put_layout_hdr(lo);
192 iput(ino);
193 }
194 pnfs_free_lseg_list(&free_me_list);
195 return rv;
196}
197
198static u32 do_callback_layoutrecall(struct nfs_client *clp,
199 struct cb_layoutrecallargs *args)
200{
201 u32 res = NFS4ERR_DELAY;
202
203 dprintk("%s enter, type=%i\n", __func__, args->cbl_recall_type);
204 if (test_and_set_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state))
205 goto out;
206 if (args->cbl_recall_type == RETURN_FILE)
207 res = initiate_file_draining(clp, args);
208 else
209 res = initiate_bulk_draining(clp, args);
210 clear_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state);
211out:
212 dprintk("%s returning %i\n", __func__, res);
213 return res;
214
215}
216
217__be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args,
218 void *dummy, struct cb_process_state *cps)
219{
220 u32 res;
221
222 dprintk("%s: -->\n", __func__);
223
224 if (cps->clp)
225 res = do_callback_layoutrecall(cps->clp, args);
226 else
227 res = NFS4ERR_OP_NOT_IN_SESSION;
228
229 dprintk("%s: exit with status = %d\n", __func__, res);
230 return cpu_to_be32(res);
231}
232
233static void pnfs_recall_all_layouts(struct nfs_client *clp)
234{
235 struct cb_layoutrecallargs args;
236
237 /* Pretend we got a CB_LAYOUTRECALL(ALL) */
238 memset(&args, 0, sizeof(args));
239 args.cbl_recall_type = RETURN_ALL;
240 /* FIXME we ignore errors, what should we do? */
241 do_callback_layoutrecall(clp, &args);
242}
243
116int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid) 244int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
117{ 245{
118 if (delegation == NULL) 246 if (delegation == NULL)
@@ -185,42 +313,6 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args)
185} 313}
186 314
187/* 315/*
188 * Returns a pointer to a held 'struct nfs_client' that matches the server's
189 * address, major version number, and session ID. It is the caller's
190 * responsibility to release the returned reference.
191 *
192 * Returns NULL if there are no connections with sessions, or if no session
193 * matches the one of interest.
194 */
195 static struct nfs_client *find_client_with_session(
196 const struct sockaddr *addr, u32 nfsversion,
197 struct nfs4_sessionid *sessionid)
198{
199 struct nfs_client *clp;
200
201 clp = nfs_find_client(addr, 4);
202 if (clp == NULL)
203 return NULL;
204
205 do {
206 struct nfs_client *prev = clp;
207
208 if (clp->cl_session != NULL) {
209 if (memcmp(clp->cl_session->sess_id.data,
210 sessionid->data,
211 NFS4_MAX_SESSIONID_LEN) == 0) {
212 /* Returns a held reference to clp */
213 return clp;
214 }
215 }
216 clp = nfs_find_client_next(prev);
217 nfs_put_client(prev);
218 } while (clp != NULL);
219
220 return NULL;
221}
222
223/*
224 * For each referring call triple, check the session's slot table for 316 * For each referring call triple, check the session's slot table for
225 * a match. If the slot is in use and the sequence numbers match, the 317 * a match. If the slot is in use and the sequence numbers match, the
226 * client is still waiting for a response to the original request. 318 * client is still waiting for a response to the original request.
@@ -276,20 +368,34 @@ out:
276} 368}
277 369
278__be32 nfs4_callback_sequence(struct cb_sequenceargs *args, 370__be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
279 struct cb_sequenceres *res) 371 struct cb_sequenceres *res,
372 struct cb_process_state *cps)
280{ 373{
281 struct nfs_client *clp; 374 struct nfs_client *clp;
282 int i; 375 int i;
283 __be32 status; 376 __be32 status;
284 377
378 cps->clp = NULL;
379
285 status = htonl(NFS4ERR_BADSESSION); 380 status = htonl(NFS4ERR_BADSESSION);
286 clp = find_client_with_session(args->csa_addr, 4, &args->csa_sessionid); 381 /* Incoming session must match the callback session */
382 if (memcmp(&args->csa_sessionid, cps->svc_sid, NFS4_MAX_SESSIONID_LEN))
383 goto out;
384
385 clp = nfs4_find_client_sessionid(args->csa_addr,
386 &args->csa_sessionid, 1);
287 if (clp == NULL) 387 if (clp == NULL)
288 goto out; 388 goto out;
289 389
390 /* state manager is resetting the session */
391 if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) {
392 status = NFS4ERR_DELAY;
393 goto out;
394 }
395
290 status = validate_seqid(&clp->cl_session->bc_slot_table, args); 396 status = validate_seqid(&clp->cl_session->bc_slot_table, args);
291 if (status) 397 if (status)
292 goto out_putclient; 398 goto out;
293 399
294 /* 400 /*
295 * Check for pending referring calls. If a match is found, a 401 * Check for pending referring calls. If a match is found, a
@@ -298,7 +404,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
298 */ 404 */
299 if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists)) { 405 if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists)) {
300 status = htonl(NFS4ERR_DELAY); 406 status = htonl(NFS4ERR_DELAY);
301 goto out_putclient; 407 goto out;
302 } 408 }
303 409
304 memcpy(&res->csr_sessionid, &args->csa_sessionid, 410 memcpy(&res->csr_sessionid, &args->csa_sessionid,
@@ -307,83 +413,93 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
307 res->csr_slotid = args->csa_slotid; 413 res->csr_slotid = args->csa_slotid;
308 res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; 414 res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
309 res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; 415 res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
416 nfs4_cb_take_slot(clp);
417 cps->clp = clp; /* put in nfs4_callback_compound */
310 418
311out_putclient:
312 nfs_put_client(clp);
313out: 419out:
314 for (i = 0; i < args->csa_nrclists; i++) 420 for (i = 0; i < args->csa_nrclists; i++)
315 kfree(args->csa_rclists[i].rcl_refcalls); 421 kfree(args->csa_rclists[i].rcl_refcalls);
316 kfree(args->csa_rclists); 422 kfree(args->csa_rclists);
317 423
318 if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP)) 424 if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP)) {
319 res->csr_status = 0; 425 cps->drc_status = status;
320 else 426 status = 0;
427 } else
321 res->csr_status = status; 428 res->csr_status = status;
429
322 dprintk("%s: exit with status = %d res->csr_status %d\n", __func__, 430 dprintk("%s: exit with status = %d res->csr_status %d\n", __func__,
323 ntohl(status), ntohl(res->csr_status)); 431 ntohl(status), ntohl(res->csr_status));
324 return status; 432 return status;
325} 433}
326 434
327__be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy) 435static bool
436validate_bitmap_values(unsigned long mask)
437{
438 return (mask & ~RCA4_TYPE_MASK_ALL) == 0;
439}
440
441__be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy,
442 struct cb_process_state *cps)
328{ 443{
329 struct nfs_client *clp;
330 __be32 status; 444 __be32 status;
331 fmode_t flags = 0; 445 fmode_t flags = 0;
332 446
333 status = htonl(NFS4ERR_OP_NOT_IN_SESSION); 447 status = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
334 clp = nfs_find_client(args->craa_addr, 4); 448 if (!cps->clp) /* set in cb_sequence */
335 if (clp == NULL)
336 goto out; 449 goto out;
337 450
338 dprintk("NFS: RECALL_ANY callback request from %s\n", 451 dprintk("NFS: RECALL_ANY callback request from %s\n",
339 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); 452 rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
453
454 status = cpu_to_be32(NFS4ERR_INVAL);
455 if (!validate_bitmap_values(args->craa_type_mask))
456 goto out;
340 457
458 status = cpu_to_be32(NFS4_OK);
341 if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, (const unsigned long *) 459 if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, (const unsigned long *)
342 &args->craa_type_mask)) 460 &args->craa_type_mask))
343 flags = FMODE_READ; 461 flags = FMODE_READ;
344 if (test_bit(RCA4_TYPE_MASK_WDATA_DLG, (const unsigned long *) 462 if (test_bit(RCA4_TYPE_MASK_WDATA_DLG, (const unsigned long *)
345 &args->craa_type_mask)) 463 &args->craa_type_mask))
346 flags |= FMODE_WRITE; 464 flags |= FMODE_WRITE;
347 465 if (test_bit(RCA4_TYPE_MASK_FILE_LAYOUT, (const unsigned long *)
466 &args->craa_type_mask))
467 pnfs_recall_all_layouts(cps->clp);
348 if (flags) 468 if (flags)
349 nfs_expire_all_delegation_types(clp, flags); 469 nfs_expire_all_delegation_types(cps->clp, flags);
350 status = htonl(NFS4_OK);
351out: 470out:
352 dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); 471 dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
353 return status; 472 return status;
354} 473}
355 474
356/* Reduce the fore channel's max_slots to the target value */ 475/* Reduce the fore channel's max_slots to the target value */
357__be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy) 476__be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy,
477 struct cb_process_state *cps)
358{ 478{
359 struct nfs_client *clp;
360 struct nfs4_slot_table *fc_tbl; 479 struct nfs4_slot_table *fc_tbl;
361 __be32 status; 480 __be32 status;
362 481
363 status = htonl(NFS4ERR_OP_NOT_IN_SESSION); 482 status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
364 clp = nfs_find_client(args->crsa_addr, 4); 483 if (!cps->clp) /* set in cb_sequence */
365 if (clp == NULL)
366 goto out; 484 goto out;
367 485
368 dprintk("NFS: CB_RECALL_SLOT request from %s target max slots %d\n", 486 dprintk("NFS: CB_RECALL_SLOT request from %s target max slots %d\n",
369 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR), 487 rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR),
370 args->crsa_target_max_slots); 488 args->crsa_target_max_slots);
371 489
372 fc_tbl = &clp->cl_session->fc_slot_table; 490 fc_tbl = &cps->clp->cl_session->fc_slot_table;
373 491
374 status = htonl(NFS4ERR_BAD_HIGH_SLOT); 492 status = htonl(NFS4ERR_BAD_HIGH_SLOT);
375 if (args->crsa_target_max_slots > fc_tbl->max_slots || 493 if (args->crsa_target_max_slots > fc_tbl->max_slots ||
376 args->crsa_target_max_slots < 1) 494 args->crsa_target_max_slots < 1)
377 goto out_putclient; 495 goto out;
378 496
379 status = htonl(NFS4_OK); 497 status = htonl(NFS4_OK);
380 if (args->crsa_target_max_slots == fc_tbl->max_slots) 498 if (args->crsa_target_max_slots == fc_tbl->max_slots)
381 goto out_putclient; 499 goto out;
382 500
383 fc_tbl->target_max_slots = args->crsa_target_max_slots; 501 fc_tbl->target_max_slots = args->crsa_target_max_slots;
384 nfs41_handle_recall_slot(clp); 502 nfs41_handle_recall_slot(cps->clp);
385out_putclient:
386 nfs_put_client(clp); /* balance nfs_find_client */
387out: 503out:
388 dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); 504 dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
389 return status; 505 return status;
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 05af212f0edf..23112c263f81 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -10,8 +10,10 @@
10#include <linux/nfs4.h> 10#include <linux/nfs4.h>
11#include <linux/nfs_fs.h> 11#include <linux/nfs_fs.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/sunrpc/bc_xprt.h>
13#include "nfs4_fs.h" 14#include "nfs4_fs.h"
14#include "callback.h" 15#include "callback.h"
16#include "internal.h"
15 17
16#define CB_OP_TAGLEN_MAXSZ (512) 18#define CB_OP_TAGLEN_MAXSZ (512)
17#define CB_OP_HDR_RES_MAXSZ (2 + CB_OP_TAGLEN_MAXSZ) 19#define CB_OP_HDR_RES_MAXSZ (2 + CB_OP_TAGLEN_MAXSZ)
@@ -22,6 +24,7 @@
22#define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) 24#define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
23 25
24#if defined(CONFIG_NFS_V4_1) 26#if defined(CONFIG_NFS_V4_1)
27#define CB_OP_LAYOUTRECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
25#define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ 28#define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \
26 4 + 1 + 3) 29 4 + 1 + 3)
27#define CB_OP_RECALLANY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) 30#define CB_OP_RECALLANY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
@@ -33,7 +36,8 @@
33/* Internal error code */ 36/* Internal error code */
34#define NFS4ERR_RESOURCE_HDR 11050 37#define NFS4ERR_RESOURCE_HDR 11050
35 38
36typedef __be32 (*callback_process_op_t)(void *, void *); 39typedef __be32 (*callback_process_op_t)(void *, void *,
40 struct cb_process_state *);
37typedef __be32 (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *); 41typedef __be32 (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *);
38typedef __be32 (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *); 42typedef __be32 (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *);
39 43
@@ -160,7 +164,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
160 hdr->minorversion = ntohl(*p++); 164 hdr->minorversion = ntohl(*p++);
161 /* Check minor version is zero or one. */ 165 /* Check minor version is zero or one. */
162 if (hdr->minorversion <= 1) { 166 if (hdr->minorversion <= 1) {
163 p++; /* skip callback_ident */ 167 hdr->cb_ident = ntohl(*p++); /* ignored by v4.1 */
164 } else { 168 } else {
165 printk(KERN_WARNING "%s: NFSv4 server callback with " 169 printk(KERN_WARNING "%s: NFSv4 server callback with "
166 "illegal minor version %u!\n", 170 "illegal minor version %u!\n",
@@ -220,6 +224,66 @@ out:
220 224
221#if defined(CONFIG_NFS_V4_1) 225#if defined(CONFIG_NFS_V4_1)
222 226
227static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
228 struct xdr_stream *xdr,
229 struct cb_layoutrecallargs *args)
230{
231 __be32 *p;
232 __be32 status = 0;
233 uint32_t iomode;
234
235 args->cbl_addr = svc_addr(rqstp);
236 p = read_buf(xdr, 4 * sizeof(uint32_t));
237 if (unlikely(p == NULL)) {
238 status = htonl(NFS4ERR_BADXDR);
239 goto out;
240 }
241
242 args->cbl_layout_type = ntohl(*p++);
243 /* Depite the spec's xdr, iomode really belongs in the FILE switch,
244 * as it is unuseable and ignored with the other types.
245 */
246 iomode = ntohl(*p++);
247 args->cbl_layoutchanged = ntohl(*p++);
248 args->cbl_recall_type = ntohl(*p++);
249
250 if (args->cbl_recall_type == RETURN_FILE) {
251 args->cbl_range.iomode = iomode;
252 status = decode_fh(xdr, &args->cbl_fh);
253 if (unlikely(status != 0))
254 goto out;
255
256 p = read_buf(xdr, 2 * sizeof(uint64_t));
257 if (unlikely(p == NULL)) {
258 status = htonl(NFS4ERR_BADXDR);
259 goto out;
260 }
261 p = xdr_decode_hyper(p, &args->cbl_range.offset);
262 p = xdr_decode_hyper(p, &args->cbl_range.length);
263 status = decode_stateid(xdr, &args->cbl_stateid);
264 if (unlikely(status != 0))
265 goto out;
266 } else if (args->cbl_recall_type == RETURN_FSID) {
267 p = read_buf(xdr, 2 * sizeof(uint64_t));
268 if (unlikely(p == NULL)) {
269 status = htonl(NFS4ERR_BADXDR);
270 goto out;
271 }
272 p = xdr_decode_hyper(p, &args->cbl_fsid.major);
273 p = xdr_decode_hyper(p, &args->cbl_fsid.minor);
274 } else if (args->cbl_recall_type != RETURN_ALL) {
275 status = htonl(NFS4ERR_BADXDR);
276 goto out;
277 }
278 dprintk("%s: ltype 0x%x iomode %d changed %d recall_type %d\n",
279 __func__,
280 args->cbl_layout_type, iomode,
281 args->cbl_layoutchanged, args->cbl_recall_type);
282out:
283 dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
284 return status;
285}
286
223static __be32 decode_sessionid(struct xdr_stream *xdr, 287static __be32 decode_sessionid(struct xdr_stream *xdr,
224 struct nfs4_sessionid *sid) 288 struct nfs4_sessionid *sid)
225{ 289{
@@ -574,10 +638,10 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
574 case OP_CB_SEQUENCE: 638 case OP_CB_SEQUENCE:
575 case OP_CB_RECALL_ANY: 639 case OP_CB_RECALL_ANY:
576 case OP_CB_RECALL_SLOT: 640 case OP_CB_RECALL_SLOT:
641 case OP_CB_LAYOUTRECALL:
577 *op = &callback_ops[op_nr]; 642 *op = &callback_ops[op_nr];
578 break; 643 break;
579 644
580 case OP_CB_LAYOUTRECALL:
581 case OP_CB_NOTIFY_DEVICEID: 645 case OP_CB_NOTIFY_DEVICEID:
582 case OP_CB_NOTIFY: 646 case OP_CB_NOTIFY:
583 case OP_CB_PUSH_DELEG: 647 case OP_CB_PUSH_DELEG:
@@ -593,6 +657,37 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
593 return htonl(NFS_OK); 657 return htonl(NFS_OK);
594} 658}
595 659
660static void nfs4_callback_free_slot(struct nfs4_session *session)
661{
662 struct nfs4_slot_table *tbl = &session->bc_slot_table;
663
664 spin_lock(&tbl->slot_tbl_lock);
665 /*
666 * Let the state manager know callback processing done.
667 * A single slot, so highest used slotid is either 0 or -1
668 */
669 tbl->highest_used_slotid--;
670 nfs4_check_drain_bc_complete(session);
671 spin_unlock(&tbl->slot_tbl_lock);
672}
673
674static void nfs4_cb_free_slot(struct nfs_client *clp)
675{
676 if (clp && clp->cl_session)
677 nfs4_callback_free_slot(clp->cl_session);
678}
679
680/* A single slot, so highest used slotid is either 0 or -1 */
681void nfs4_cb_take_slot(struct nfs_client *clp)
682{
683 struct nfs4_slot_table *tbl = &clp->cl_session->bc_slot_table;
684
685 spin_lock(&tbl->slot_tbl_lock);
686 tbl->highest_used_slotid++;
687 BUG_ON(tbl->highest_used_slotid != 0);
688 spin_unlock(&tbl->slot_tbl_lock);
689}
690
596#else /* CONFIG_NFS_V4_1 */ 691#else /* CONFIG_NFS_V4_1 */
597 692
598static __be32 693static __be32
@@ -601,6 +696,9 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
601 return htonl(NFS4ERR_MINOR_VERS_MISMATCH); 696 return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
602} 697}
603 698
699static void nfs4_cb_free_slot(struct nfs_client *clp)
700{
701}
604#endif /* CONFIG_NFS_V4_1 */ 702#endif /* CONFIG_NFS_V4_1 */
605 703
606static __be32 704static __be32
@@ -621,7 +719,8 @@ preprocess_nfs4_op(unsigned int op_nr, struct callback_op **op)
621static __be32 process_op(uint32_t minorversion, int nop, 719static __be32 process_op(uint32_t minorversion, int nop,
622 struct svc_rqst *rqstp, 720 struct svc_rqst *rqstp,
623 struct xdr_stream *xdr_in, void *argp, 721 struct xdr_stream *xdr_in, void *argp,
624 struct xdr_stream *xdr_out, void *resp, int* drc_status) 722 struct xdr_stream *xdr_out, void *resp,
723 struct cb_process_state *cps)
625{ 724{
626 struct callback_op *op = &callback_ops[0]; 725 struct callback_op *op = &callback_ops[0];
627 unsigned int op_nr; 726 unsigned int op_nr;
@@ -644,8 +743,8 @@ static __be32 process_op(uint32_t minorversion, int nop,
644 if (status) 743 if (status)
645 goto encode_hdr; 744 goto encode_hdr;
646 745
647 if (*drc_status) { 746 if (cps->drc_status) {
648 status = *drc_status; 747 status = cps->drc_status;
649 goto encode_hdr; 748 goto encode_hdr;
650 } 749 }
651 750
@@ -653,16 +752,10 @@ static __be32 process_op(uint32_t minorversion, int nop,
653 if (maxlen > 0 && maxlen < PAGE_SIZE) { 752 if (maxlen > 0 && maxlen < PAGE_SIZE) {
654 status = op->decode_args(rqstp, xdr_in, argp); 753 status = op->decode_args(rqstp, xdr_in, argp);
655 if (likely(status == 0)) 754 if (likely(status == 0))
656 status = op->process_op(argp, resp); 755 status = op->process_op(argp, resp, cps);
657 } else 756 } else
658 status = htonl(NFS4ERR_RESOURCE); 757 status = htonl(NFS4ERR_RESOURCE);
659 758
660 /* Only set by OP_CB_SEQUENCE processing */
661 if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP)) {
662 *drc_status = status;
663 status = 0;
664 }
665
666encode_hdr: 759encode_hdr:
667 res = encode_op_hdr(xdr_out, op_nr, status); 760 res = encode_op_hdr(xdr_out, op_nr, status);
668 if (unlikely(res)) 761 if (unlikely(res))
@@ -681,8 +774,11 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
681 struct cb_compound_hdr_arg hdr_arg = { 0 }; 774 struct cb_compound_hdr_arg hdr_arg = { 0 };
682 struct cb_compound_hdr_res hdr_res = { NULL }; 775 struct cb_compound_hdr_res hdr_res = { NULL };
683 struct xdr_stream xdr_in, xdr_out; 776 struct xdr_stream xdr_in, xdr_out;
684 __be32 *p; 777 __be32 *p, status;
685 __be32 status, drc_status = 0; 778 struct cb_process_state cps = {
779 .drc_status = 0,
780 .clp = NULL,
781 };
686 unsigned int nops = 0; 782 unsigned int nops = 0;
687 783
688 dprintk("%s: start\n", __func__); 784 dprintk("%s: start\n", __func__);
@@ -696,6 +792,13 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
696 if (status == __constant_htonl(NFS4ERR_RESOURCE)) 792 if (status == __constant_htonl(NFS4ERR_RESOURCE))
697 return rpc_garbage_args; 793 return rpc_garbage_args;
698 794
795 if (hdr_arg.minorversion == 0) {
796 cps.clp = nfs4_find_client_ident(hdr_arg.cb_ident);
797 if (!cps.clp)
798 return rpc_drop_reply;
799 } else
800 cps.svc_sid = bc_xprt_sid(rqstp);
801
699 hdr_res.taglen = hdr_arg.taglen; 802 hdr_res.taglen = hdr_arg.taglen;
700 hdr_res.tag = hdr_arg.tag; 803 hdr_res.tag = hdr_arg.tag;
701 if (encode_compound_hdr_res(&xdr_out, &hdr_res) != 0) 804 if (encode_compound_hdr_res(&xdr_out, &hdr_res) != 0)
@@ -703,7 +806,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
703 806
704 while (status == 0 && nops != hdr_arg.nops) { 807 while (status == 0 && nops != hdr_arg.nops) {
705 status = process_op(hdr_arg.minorversion, nops, rqstp, 808 status = process_op(hdr_arg.minorversion, nops, rqstp,
706 &xdr_in, argp, &xdr_out, resp, &drc_status); 809 &xdr_in, argp, &xdr_out, resp, &cps);
707 nops++; 810 nops++;
708 } 811 }
709 812
@@ -716,6 +819,8 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
716 819
717 *hdr_res.status = status; 820 *hdr_res.status = status;
718 *hdr_res.nops = htonl(nops); 821 *hdr_res.nops = htonl(nops);
822 nfs4_cb_free_slot(cps.clp);
823 nfs_put_client(cps.clp);
719 dprintk("%s: done, status = %u\n", __func__, ntohl(status)); 824 dprintk("%s: done, status = %u\n", __func__, ntohl(status));
720 return rpc_success; 825 return rpc_success;
721} 826}
@@ -739,6 +844,12 @@ static struct callback_op callback_ops[] = {
739 .res_maxsize = CB_OP_RECALL_RES_MAXSZ, 844 .res_maxsize = CB_OP_RECALL_RES_MAXSZ,
740 }, 845 },
741#if defined(CONFIG_NFS_V4_1) 846#if defined(CONFIG_NFS_V4_1)
847 [OP_CB_LAYOUTRECALL] = {
848 .process_op = (callback_process_op_t)nfs4_callback_layoutrecall,
849 .decode_args =
850 (callback_decode_arg_t)decode_layoutrecall_args,
851 .res_maxsize = CB_OP_LAYOUTRECALL_RES_MAXSZ,
852 },
742 [OP_CB_SEQUENCE] = { 853 [OP_CB_SEQUENCE] = {
743 .process_op = (callback_process_op_t)nfs4_callback_sequence, 854 .process_op = (callback_process_op_t)nfs4_callback_sequence,
744 .decode_args = (callback_decode_arg_t)decode_cb_sequence_args, 855 .decode_args = (callback_decode_arg_t)decode_cb_sequence_args,
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 0870d0d4efc0..192f2f860265 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -56,6 +56,30 @@ static DEFINE_SPINLOCK(nfs_client_lock);
56static LIST_HEAD(nfs_client_list); 56static LIST_HEAD(nfs_client_list);
57static LIST_HEAD(nfs_volume_list); 57static LIST_HEAD(nfs_volume_list);
58static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); 58static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
59#ifdef CONFIG_NFS_V4
60static DEFINE_IDR(cb_ident_idr); /* Protected by nfs_client_lock */
61
62/*
63 * Get a unique NFSv4.0 callback identifier which will be used
64 * by the V4.0 callback service to lookup the nfs_client struct
65 */
66static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)
67{
68 int ret = 0;
69
70 if (clp->rpc_ops->version != 4 || minorversion != 0)
71 return ret;
72retry:
73 if (!idr_pre_get(&cb_ident_idr, GFP_KERNEL))
74 return -ENOMEM;
75 spin_lock(&nfs_client_lock);
76 ret = idr_get_new(&cb_ident_idr, clp, &clp->cl_cb_ident);
77 spin_unlock(&nfs_client_lock);
78 if (ret == -EAGAIN)
79 goto retry;
80 return ret;
81}
82#endif /* CONFIG_NFS_V4 */
59 83
60/* 84/*
61 * RPC cruft for NFS 85 * RPC cruft for NFS
@@ -144,7 +168,10 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
144 clp->cl_proto = cl_init->proto; 168 clp->cl_proto = cl_init->proto;
145 169
146#ifdef CONFIG_NFS_V4 170#ifdef CONFIG_NFS_V4
147 INIT_LIST_HEAD(&clp->cl_delegations); 171 err = nfs_get_cb_ident_idr(clp, cl_init->minorversion);
172 if (err)
173 goto error_cleanup;
174
148 spin_lock_init(&clp->cl_lock); 175 spin_lock_init(&clp->cl_lock);
149 INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); 176 INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
150 rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); 177 rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
@@ -170,21 +197,17 @@ error_0:
170} 197}
171 198
172#ifdef CONFIG_NFS_V4 199#ifdef CONFIG_NFS_V4
173/*
174 * Clears/puts all minor version specific parts from an nfs_client struct
175 * reverting it to minorversion 0.
176 */
177static void nfs4_clear_client_minor_version(struct nfs_client *clp)
178{
179#ifdef CONFIG_NFS_V4_1 200#ifdef CONFIG_NFS_V4_1
180 if (nfs4_has_session(clp)) { 201static void nfs4_shutdown_session(struct nfs_client *clp)
202{
203 if (nfs4_has_session(clp))
181 nfs4_destroy_session(clp->cl_session); 204 nfs4_destroy_session(clp->cl_session);
182 clp->cl_session = NULL;
183 }
184
185 clp->cl_mvops = nfs_v4_minor_ops[0];
186#endif /* CONFIG_NFS_V4_1 */
187} 205}
206#else /* CONFIG_NFS_V4_1 */
207static void nfs4_shutdown_session(struct nfs_client *clp)
208{
209}
210#endif /* CONFIG_NFS_V4_1 */
188 211
189/* 212/*
190 * Destroy the NFS4 callback service 213 * Destroy the NFS4 callback service
@@ -199,17 +222,49 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
199{ 222{
200 if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state)) 223 if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
201 nfs4_kill_renewd(clp); 224 nfs4_kill_renewd(clp);
202 nfs4_clear_client_minor_version(clp); 225 nfs4_shutdown_session(clp);
203 nfs4_destroy_callback(clp); 226 nfs4_destroy_callback(clp);
204 if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) 227 if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
205 nfs_idmap_delete(clp); 228 nfs_idmap_delete(clp);
206 229
207 rpc_destroy_wait_queue(&clp->cl_rpcwaitq); 230 rpc_destroy_wait_queue(&clp->cl_rpcwaitq);
208} 231}
232
233/* idr_remove_all is not needed as all id's are removed by nfs_put_client */
234void nfs_cleanup_cb_ident_idr(void)
235{
236 idr_destroy(&cb_ident_idr);
237}
238
239/* nfs_client_lock held */
240static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
241{
242 if (clp->cl_cb_ident)
243 idr_remove(&cb_ident_idr, clp->cl_cb_ident);
244}
245
246static void pnfs_init_server(struct nfs_server *server)
247{
248 rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC");
249}
250
209#else 251#else
210static void nfs4_shutdown_client(struct nfs_client *clp) 252static void nfs4_shutdown_client(struct nfs_client *clp)
211{ 253{
212} 254}
255
256void nfs_cleanup_cb_ident_idr(void)
257{
258}
259
260static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
261{
262}
263
264static void pnfs_init_server(struct nfs_server *server)
265{
266}
267
213#endif /* CONFIG_NFS_V4 */ 268#endif /* CONFIG_NFS_V4 */
214 269
215/* 270/*
@@ -248,6 +303,7 @@ void nfs_put_client(struct nfs_client *clp)
248 303
249 if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) { 304 if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) {
250 list_del(&clp->cl_share_link); 305 list_del(&clp->cl_share_link);
306 nfs_cb_idr_remove_locked(clp);
251 spin_unlock(&nfs_client_lock); 307 spin_unlock(&nfs_client_lock);
252 308
253 BUG_ON(!list_empty(&clp->cl_superblocks)); 309 BUG_ON(!list_empty(&clp->cl_superblocks));
@@ -363,70 +419,28 @@ static int nfs_sockaddr_cmp(const struct sockaddr *sa1,
363 return 0; 419 return 0;
364} 420}
365 421
366/* 422/* Common match routine for v4.0 and v4.1 callback services */
367 * Find a client by IP address and protocol version 423bool
368 * - returns NULL if no such client 424nfs4_cb_match_client(const struct sockaddr *addr, struct nfs_client *clp,
369 */ 425 u32 minorversion)
370struct nfs_client *nfs_find_client(const struct sockaddr *addr, u32 nfsversion)
371{
372 struct nfs_client *clp;
373
374 spin_lock(&nfs_client_lock);
375 list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
376 struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
377
378 /* Don't match clients that failed to initialise properly */
379 if (!(clp->cl_cons_state == NFS_CS_READY ||
380 clp->cl_cons_state == NFS_CS_SESSION_INITING))
381 continue;
382
383 /* Different NFS versions cannot share the same nfs_client */
384 if (clp->rpc_ops->version != nfsversion)
385 continue;
386
387 /* Match only the IP address, not the port number */
388 if (!nfs_sockaddr_match_ipaddr(addr, clap))
389 continue;
390
391 atomic_inc(&clp->cl_count);
392 spin_unlock(&nfs_client_lock);
393 return clp;
394 }
395 spin_unlock(&nfs_client_lock);
396 return NULL;
397}
398
399/*
400 * Find a client by IP address and protocol version
401 * - returns NULL if no such client
402 */
403struct nfs_client *nfs_find_client_next(struct nfs_client *clp)
404{ 426{
405 struct sockaddr *sap = (struct sockaddr *)&clp->cl_addr; 427 struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
406 u32 nfsvers = clp->rpc_ops->version;
407 428
408 spin_lock(&nfs_client_lock); 429 /* Don't match clients that failed to initialise */
409 list_for_each_entry_continue(clp, &nfs_client_list, cl_share_link) { 430 if (!(clp->cl_cons_state == NFS_CS_READY ||
410 struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; 431 clp->cl_cons_state == NFS_CS_SESSION_INITING))
432 return false;
411 433
412 /* Don't match clients that failed to initialise properly */ 434 /* Match the version and minorversion */
413 if (clp->cl_cons_state != NFS_CS_READY) 435 if (clp->rpc_ops->version != 4 ||
414 continue; 436 clp->cl_minorversion != minorversion)
437 return false;
415 438
416 /* Different NFS versions cannot share the same nfs_client */ 439 /* Match only the IP address, not the port number */
417 if (clp->rpc_ops->version != nfsvers) 440 if (!nfs_sockaddr_match_ipaddr(addr, clap))
418 continue; 441 return false;
419 442
420 /* Match only the IP address, not the port number */ 443 return true;
421 if (!nfs_sockaddr_match_ipaddr(sap, clap))
422 continue;
423
424 atomic_inc(&clp->cl_count);
425 spin_unlock(&nfs_client_lock);
426 return clp;
427 }
428 spin_unlock(&nfs_client_lock);
429 return NULL;
430} 444}
431 445
432/* 446/*
@@ -988,6 +1002,27 @@ static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_serve
988 target->options = source->options; 1002 target->options = source->options;
989} 1003}
990 1004
1005static void nfs_server_insert_lists(struct nfs_server *server)
1006{
1007 struct nfs_client *clp = server->nfs_client;
1008
1009 spin_lock(&nfs_client_lock);
1010 list_add_tail_rcu(&server->client_link, &clp->cl_superblocks);
1011 list_add_tail(&server->master_link, &nfs_volume_list);
1012 spin_unlock(&nfs_client_lock);
1013
1014}
1015
1016static void nfs_server_remove_lists(struct nfs_server *server)
1017{
1018 spin_lock(&nfs_client_lock);
1019 list_del_rcu(&server->client_link);
1020 list_del(&server->master_link);
1021 spin_unlock(&nfs_client_lock);
1022
1023 synchronize_rcu();
1024}
1025
991/* 1026/*
992 * Allocate and initialise a server record 1027 * Allocate and initialise a server record
993 */ 1028 */
@@ -1004,6 +1039,7 @@ static struct nfs_server *nfs_alloc_server(void)
1004 /* Zero out the NFS state stuff */ 1039 /* Zero out the NFS state stuff */
1005 INIT_LIST_HEAD(&server->client_link); 1040 INIT_LIST_HEAD(&server->client_link);
1006 INIT_LIST_HEAD(&server->master_link); 1041 INIT_LIST_HEAD(&server->master_link);
1042 INIT_LIST_HEAD(&server->delegations);
1007 1043
1008 atomic_set(&server->active, 0); 1044 atomic_set(&server->active, 0);
1009 1045
@@ -1019,6 +1055,8 @@ static struct nfs_server *nfs_alloc_server(void)
1019 return NULL; 1055 return NULL;
1020 } 1056 }
1021 1057
1058 pnfs_init_server(server);
1059
1022 return server; 1060 return server;
1023} 1061}
1024 1062
@@ -1029,11 +1067,8 @@ void nfs_free_server(struct nfs_server *server)
1029{ 1067{
1030 dprintk("--> nfs_free_server()\n"); 1068 dprintk("--> nfs_free_server()\n");
1031 1069
1070 nfs_server_remove_lists(server);
1032 unset_pnfs_layoutdriver(server); 1071 unset_pnfs_layoutdriver(server);
1033 spin_lock(&nfs_client_lock);
1034 list_del(&server->client_link);
1035 list_del(&server->master_link);
1036 spin_unlock(&nfs_client_lock);
1037 1072
1038 if (server->destroy != NULL) 1073 if (server->destroy != NULL)
1039 server->destroy(server); 1074 server->destroy(server);
@@ -1108,11 +1143,7 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data,
1108 (unsigned long long) server->fsid.major, 1143 (unsigned long long) server->fsid.major,
1109 (unsigned long long) server->fsid.minor); 1144 (unsigned long long) server->fsid.minor);
1110 1145
1111 spin_lock(&nfs_client_lock); 1146 nfs_server_insert_lists(server);
1112 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
1113 list_add_tail(&server->master_link, &nfs_volume_list);
1114 spin_unlock(&nfs_client_lock);
1115
1116 server->mount_time = jiffies; 1147 server->mount_time = jiffies;
1117 nfs_free_fattr(fattr); 1148 nfs_free_fattr(fattr);
1118 return server; 1149 return server;
@@ -1125,6 +1156,101 @@ error:
1125 1156
1126#ifdef CONFIG_NFS_V4 1157#ifdef CONFIG_NFS_V4
1127/* 1158/*
1159 * NFSv4.0 callback thread helper
1160 *
1161 * Find a client by IP address, protocol version, and minorversion
1162 *
1163 * Called from the pg_authenticate method. The callback identifier
1164 * is not used as it has not been decoded.
1165 *
1166 * Returns NULL if no such client
1167 */
1168struct nfs_client *
1169nfs4_find_client_no_ident(const struct sockaddr *addr)
1170{
1171 struct nfs_client *clp;
1172
1173 spin_lock(&nfs_client_lock);
1174 list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
1175 if (nfs4_cb_match_client(addr, clp, 0) == false)
1176 continue;
1177 atomic_inc(&clp->cl_count);
1178 spin_unlock(&nfs_client_lock);
1179 return clp;
1180 }
1181 spin_unlock(&nfs_client_lock);
1182 return NULL;
1183}
1184
1185/*
1186 * NFSv4.0 callback thread helper
1187 *
1188 * Find a client by callback identifier
1189 */
1190struct nfs_client *
1191nfs4_find_client_ident(int cb_ident)
1192{
1193 struct nfs_client *clp;
1194
1195 spin_lock(&nfs_client_lock);
1196 clp = idr_find(&cb_ident_idr, cb_ident);
1197 if (clp)
1198 atomic_inc(&clp->cl_count);
1199 spin_unlock(&nfs_client_lock);
1200 return clp;
1201}
1202
1203#if defined(CONFIG_NFS_V4_1)
1204/*
1205 * NFSv4.1 callback thread helper
1206 * For CB_COMPOUND calls, find a client by IP address, protocol version,
1207 * minorversion, and sessionID
1208 *
1209 * CREATE_SESSION triggers a CB_NULL ping from servers. The callback service
1210 * sessionid can only be set after the CREATE_SESSION return, so a CB_NULL
1211 * can arrive before the callback sessionid is set. For CB_NULL calls,
1212 * find a client by IP address protocol version, and minorversion.
1213 *
1214 * Returns NULL if no such client
1215 */
1216struct nfs_client *
1217nfs4_find_client_sessionid(const struct sockaddr *addr,
1218 struct nfs4_sessionid *sid, int is_cb_compound)
1219{
1220 struct nfs_client *clp;
1221
1222 spin_lock(&nfs_client_lock);
1223 list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
1224 if (nfs4_cb_match_client(addr, clp, 1) == false)
1225 continue;
1226
1227 if (!nfs4_has_session(clp))
1228 continue;
1229
1230 /* Match sessionid unless cb_null call*/
1231 if (is_cb_compound && (memcmp(clp->cl_session->sess_id.data,
1232 sid->data, NFS4_MAX_SESSIONID_LEN) != 0))
1233 continue;
1234
1235 atomic_inc(&clp->cl_count);
1236 spin_unlock(&nfs_client_lock);
1237 return clp;
1238 }
1239 spin_unlock(&nfs_client_lock);
1240 return NULL;
1241}
1242
1243#else /* CONFIG_NFS_V4_1 */
1244
1245struct nfs_client *
1246nfs4_find_client_sessionid(const struct sockaddr *addr,
1247 struct nfs4_sessionid *sid, int is_cb_compound)
1248{
1249 return NULL;
1250}
1251#endif /* CONFIG_NFS_V4_1 */
1252
1253/*
1128 * Initialize the NFS4 callback service 1254 * Initialize the NFS4 callback service
1129 */ 1255 */
1130static int nfs4_init_callback(struct nfs_client *clp) 1256static int nfs4_init_callback(struct nfs_client *clp)
@@ -1342,11 +1468,7 @@ static int nfs4_server_common_setup(struct nfs_server *server,
1342 if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) 1468 if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
1343 server->namelen = NFS4_MAXNAMLEN; 1469 server->namelen = NFS4_MAXNAMLEN;
1344 1470
1345 spin_lock(&nfs_client_lock); 1471 nfs_server_insert_lists(server);
1346 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
1347 list_add_tail(&server->master_link, &nfs_volume_list);
1348 spin_unlock(&nfs_client_lock);
1349
1350 server->mount_time = jiffies; 1472 server->mount_time = jiffies;
1351out: 1473out:
1352 nfs_free_fattr(fattr); 1474 nfs_free_fattr(fattr);
@@ -1551,11 +1673,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
1551 if (error < 0) 1673 if (error < 0)
1552 goto out_free_server; 1674 goto out_free_server;
1553 1675
1554 spin_lock(&nfs_client_lock); 1676 nfs_server_insert_lists(server);
1555 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
1556 list_add_tail(&server->master_link, &nfs_volume_list);
1557 spin_unlock(&nfs_client_lock);
1558
1559 server->mount_time = jiffies; 1677 server->mount_time = jiffies;
1560 1678
1561 nfs_free_fattr(fattr_fsinfo); 1679 nfs_free_fattr(fattr_fsinfo);
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 1fd62fc49be3..364e4328f392 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -40,11 +40,23 @@ static void nfs_free_delegation(struct nfs_delegation *delegation)
40 call_rcu(&delegation->rcu, nfs_free_delegation_callback); 40 call_rcu(&delegation->rcu, nfs_free_delegation_callback);
41} 41}
42 42
43/**
44 * nfs_mark_delegation_referenced - set delegation's REFERENCED flag
45 * @delegation: delegation to process
46 *
47 */
43void nfs_mark_delegation_referenced(struct nfs_delegation *delegation) 48void nfs_mark_delegation_referenced(struct nfs_delegation *delegation)
44{ 49{
45 set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags); 50 set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags);
46} 51}
47 52
53/**
54 * nfs_have_delegation - check if inode has a delegation
55 * @inode: inode to check
56 * @flags: delegation types to check for
57 *
58 * Returns one if inode has the indicated delegation, otherwise zero.
59 */
48int nfs_have_delegation(struct inode *inode, fmode_t flags) 60int nfs_have_delegation(struct inode *inode, fmode_t flags)
49{ 61{
50 struct nfs_delegation *delegation; 62 struct nfs_delegation *delegation;
@@ -119,10 +131,15 @@ again:
119 return 0; 131 return 0;
120} 132}
121 133
122/* 134/**
123 * Set up a delegation on an inode 135 * nfs_inode_reclaim_delegation - process a delegation reclaim request
136 * @inode: inode to process
137 * @cred: credential to use for request
138 * @res: new delegation state from server
139 *
124 */ 140 */
125void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) 141void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred,
142 struct nfs_openres *res)
126{ 143{
127 struct nfs_delegation *delegation; 144 struct nfs_delegation *delegation;
128 struct rpc_cred *oldcred = NULL; 145 struct rpc_cred *oldcred = NULL;
@@ -175,38 +192,52 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation
175 return inode; 192 return inode;
176} 193}
177 194
178static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, 195static struct nfs_delegation *
179 const nfs4_stateid *stateid, 196nfs_detach_delegation_locked(struct nfs_inode *nfsi,
180 struct nfs_client *clp) 197 struct nfs_server *server)
181{ 198{
182 struct nfs_delegation *delegation = 199 struct nfs_delegation *delegation =
183 rcu_dereference_protected(nfsi->delegation, 200 rcu_dereference_protected(nfsi->delegation,
184 lockdep_is_held(&clp->cl_lock)); 201 lockdep_is_held(&server->nfs_client->cl_lock));
185 202
186 if (delegation == NULL) 203 if (delegation == NULL)
187 goto nomatch; 204 goto nomatch;
205
188 spin_lock(&delegation->lock); 206 spin_lock(&delegation->lock);
189 if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data,
190 sizeof(delegation->stateid.data)) != 0)
191 goto nomatch_unlock;
192 list_del_rcu(&delegation->super_list); 207 list_del_rcu(&delegation->super_list);
193 delegation->inode = NULL; 208 delegation->inode = NULL;
194 nfsi->delegation_state = 0; 209 nfsi->delegation_state = 0;
195 rcu_assign_pointer(nfsi->delegation, NULL); 210 rcu_assign_pointer(nfsi->delegation, NULL);
196 spin_unlock(&delegation->lock); 211 spin_unlock(&delegation->lock);
197 return delegation; 212 return delegation;
198nomatch_unlock:
199 spin_unlock(&delegation->lock);
200nomatch: 213nomatch:
201 return NULL; 214 return NULL;
202} 215}
203 216
204/* 217static struct nfs_delegation *nfs_detach_delegation(struct nfs_inode *nfsi,
205 * Set up a delegation on an inode 218 struct nfs_server *server)
219{
220 struct nfs_client *clp = server->nfs_client;
221 struct nfs_delegation *delegation;
222
223 spin_lock(&clp->cl_lock);
224 delegation = nfs_detach_delegation_locked(nfsi, server);
225 spin_unlock(&clp->cl_lock);
226 return delegation;
227}
228
229/**
230 * nfs_inode_set_delegation - set up a delegation on an inode
231 * @inode: inode to which delegation applies
232 * @cred: cred to use for subsequent delegation processing
233 * @res: new delegation state from server
234 *
235 * Returns zero on success, or a negative errno value.
206 */ 236 */
207int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) 237int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
208{ 238{
209 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; 239 struct nfs_server *server = NFS_SERVER(inode);
240 struct nfs_client *clp = server->nfs_client;
210 struct nfs_inode *nfsi = NFS_I(inode); 241 struct nfs_inode *nfsi = NFS_I(inode);
211 struct nfs_delegation *delegation, *old_delegation; 242 struct nfs_delegation *delegation, *old_delegation;
212 struct nfs_delegation *freeme = NULL; 243 struct nfs_delegation *freeme = NULL;
@@ -227,7 +258,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
227 258
228 spin_lock(&clp->cl_lock); 259 spin_lock(&clp->cl_lock);
229 old_delegation = rcu_dereference_protected(nfsi->delegation, 260 old_delegation = rcu_dereference_protected(nfsi->delegation,
230 lockdep_is_held(&clp->cl_lock)); 261 lockdep_is_held(&clp->cl_lock));
231 if (old_delegation != NULL) { 262 if (old_delegation != NULL) {
232 if (memcmp(&delegation->stateid, &old_delegation->stateid, 263 if (memcmp(&delegation->stateid, &old_delegation->stateid,
233 sizeof(old_delegation->stateid)) == 0 && 264 sizeof(old_delegation->stateid)) == 0 &&
@@ -246,9 +277,9 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
246 delegation = NULL; 277 delegation = NULL;
247 goto out; 278 goto out;
248 } 279 }
249 freeme = nfs_detach_delegation_locked(nfsi, NULL, clp); 280 freeme = nfs_detach_delegation_locked(nfsi, server);
250 } 281 }
251 list_add_rcu(&delegation->super_list, &clp->cl_delegations); 282 list_add_rcu(&delegation->super_list, &server->delegations);
252 nfsi->delegation_state = delegation->type; 283 nfsi->delegation_state = delegation->type;
253 rcu_assign_pointer(nfsi->delegation, delegation); 284 rcu_assign_pointer(nfsi->delegation, delegation);
254 delegation = NULL; 285 delegation = NULL;
@@ -290,73 +321,85 @@ out:
290 return err; 321 return err;
291} 322}
292 323
293/* 324/**
294 * Return all delegations that have been marked for return 325 * nfs_client_return_marked_delegations - return previously marked delegations
326 * @clp: nfs_client to process
327 *
328 * Returns zero on success, or a negative errno value.
295 */ 329 */
296int nfs_client_return_marked_delegations(struct nfs_client *clp) 330int nfs_client_return_marked_delegations(struct nfs_client *clp)
297{ 331{
298 struct nfs_delegation *delegation; 332 struct nfs_delegation *delegation;
333 struct nfs_server *server;
299 struct inode *inode; 334 struct inode *inode;
300 int err = 0; 335 int err = 0;
301 336
302restart: 337restart:
303 rcu_read_lock(); 338 rcu_read_lock();
304 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { 339 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
305 if (!test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags)) 340 list_for_each_entry_rcu(delegation, &server->delegations,
306 continue; 341 super_list) {
307 inode = nfs_delegation_grab_inode(delegation); 342 if (!test_and_clear_bit(NFS_DELEGATION_RETURN,
308 if (inode == NULL) 343 &delegation->flags))
309 continue; 344 continue;
310 spin_lock(&clp->cl_lock); 345 inode = nfs_delegation_grab_inode(delegation);
311 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp); 346 if (inode == NULL)
312 spin_unlock(&clp->cl_lock); 347 continue;
313 rcu_read_unlock(); 348 delegation = nfs_detach_delegation(NFS_I(inode),
314 if (delegation != NULL) { 349 server);
315 filemap_flush(inode->i_mapping); 350 rcu_read_unlock();
316 err = __nfs_inode_return_delegation(inode, delegation, 0); 351
352 if (delegation != NULL) {
353 filemap_flush(inode->i_mapping);
354 err = __nfs_inode_return_delegation(inode,
355 delegation, 0);
356 }
357 iput(inode);
358 if (!err)
359 goto restart;
360 set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
361 return err;
317 } 362 }
318 iput(inode);
319 if (!err)
320 goto restart;
321 set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
322 return err;
323 } 363 }
324 rcu_read_unlock(); 364 rcu_read_unlock();
325 return 0; 365 return 0;
326} 366}
327 367
328/* 368/**
329 * This function returns the delegation without reclaiming opens 369 * nfs_inode_return_delegation_noreclaim - return delegation, don't reclaim opens
330 * or protecting against delegation reclaims. 370 * @inode: inode to process
331 * It is therefore really only safe to be called from 371 *
332 * nfs4_clear_inode() 372 * Does not protect against delegation reclaims, therefore really only safe
373 * to be called from nfs4_clear_inode().
333 */ 374 */
334void nfs_inode_return_delegation_noreclaim(struct inode *inode) 375void nfs_inode_return_delegation_noreclaim(struct inode *inode)
335{ 376{
336 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; 377 struct nfs_server *server = NFS_SERVER(inode);
337 struct nfs_inode *nfsi = NFS_I(inode); 378 struct nfs_inode *nfsi = NFS_I(inode);
338 struct nfs_delegation *delegation; 379 struct nfs_delegation *delegation;
339 380
340 if (rcu_access_pointer(nfsi->delegation) != NULL) { 381 if (rcu_access_pointer(nfsi->delegation) != NULL) {
341 spin_lock(&clp->cl_lock); 382 delegation = nfs_detach_delegation(nfsi, server);
342 delegation = nfs_detach_delegation_locked(nfsi, NULL, clp);
343 spin_unlock(&clp->cl_lock);
344 if (delegation != NULL) 383 if (delegation != NULL)
345 nfs_do_return_delegation(inode, delegation, 0); 384 nfs_do_return_delegation(inode, delegation, 0);
346 } 385 }
347} 386}
348 387
388/**
389 * nfs_inode_return_delegation - synchronously return a delegation
390 * @inode: inode to process
391 *
392 * Returns zero on success, or a negative errno value.
393 */
349int nfs_inode_return_delegation(struct inode *inode) 394int nfs_inode_return_delegation(struct inode *inode)
350{ 395{
351 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; 396 struct nfs_server *server = NFS_SERVER(inode);
352 struct nfs_inode *nfsi = NFS_I(inode); 397 struct nfs_inode *nfsi = NFS_I(inode);
353 struct nfs_delegation *delegation; 398 struct nfs_delegation *delegation;
354 int err = 0; 399 int err = 0;
355 400
356 if (rcu_access_pointer(nfsi->delegation) != NULL) { 401 if (rcu_access_pointer(nfsi->delegation) != NULL) {
357 spin_lock(&clp->cl_lock); 402 delegation = nfs_detach_delegation(nfsi, server);
358 delegation = nfs_detach_delegation_locked(nfsi, NULL, clp);
359 spin_unlock(&clp->cl_lock);
360 if (delegation != NULL) { 403 if (delegation != NULL) {
361 nfs_wb_all(inode); 404 nfs_wb_all(inode);
362 err = __nfs_inode_return_delegation(inode, delegation, 1); 405 err = __nfs_inode_return_delegation(inode, delegation, 1);
@@ -365,46 +408,61 @@ int nfs_inode_return_delegation(struct inode *inode)
365 return err; 408 return err;
366} 409}
367 410
368static void nfs_mark_return_delegation(struct nfs_client *clp, struct nfs_delegation *delegation) 411static void nfs_mark_return_delegation(struct nfs_delegation *delegation)
369{ 412{
413 struct nfs_client *clp = NFS_SERVER(delegation->inode)->nfs_client;
414
370 set_bit(NFS_DELEGATION_RETURN, &delegation->flags); 415 set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
371 set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); 416 set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
372} 417}
373 418
374/* 419/**
375 * Return all delegations associated to a super block 420 * nfs_super_return_all_delegations - return delegations for one superblock
421 * @sb: sb to process
422 *
376 */ 423 */
377void nfs_super_return_all_delegations(struct super_block *sb) 424void nfs_super_return_all_delegations(struct super_block *sb)
378{ 425{
379 struct nfs_client *clp = NFS_SB(sb)->nfs_client; 426 struct nfs_server *server = NFS_SB(sb);
427 struct nfs_client *clp = server->nfs_client;
380 struct nfs_delegation *delegation; 428 struct nfs_delegation *delegation;
381 429
382 if (clp == NULL) 430 if (clp == NULL)
383 return; 431 return;
432
384 rcu_read_lock(); 433 rcu_read_lock();
385 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { 434 list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
386 spin_lock(&delegation->lock); 435 spin_lock(&delegation->lock);
387 if (delegation->inode != NULL && delegation->inode->i_sb == sb) 436 set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
388 set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
389 spin_unlock(&delegation->lock); 437 spin_unlock(&delegation->lock);
390 } 438 }
391 rcu_read_unlock(); 439 rcu_read_unlock();
440
392 if (nfs_client_return_marked_delegations(clp) != 0) 441 if (nfs_client_return_marked_delegations(clp) != 0)
393 nfs4_schedule_state_manager(clp); 442 nfs4_schedule_state_manager(clp);
394} 443}
395 444
396static 445static void nfs_mark_return_all_delegation_types(struct nfs_server *server,
397void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp, fmode_t flags) 446 fmode_t flags)
398{ 447{
399 struct nfs_delegation *delegation; 448 struct nfs_delegation *delegation;
400 449
401 rcu_read_lock(); 450 list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
402 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
403 if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE)) 451 if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE))
404 continue; 452 continue;
405 if (delegation->type & flags) 453 if (delegation->type & flags)
406 nfs_mark_return_delegation(clp, delegation); 454 nfs_mark_return_delegation(delegation);
407 } 455 }
456}
457
458static void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp,
459 fmode_t flags)
460{
461 struct nfs_server *server;
462
463 rcu_read_lock();
464 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
465 nfs_mark_return_all_delegation_types(server, flags);
408 rcu_read_unlock(); 466 rcu_read_unlock();
409} 467}
410 468
@@ -419,19 +477,32 @@ static void nfs_delegation_run_state_manager(struct nfs_client *clp)
419 nfs4_schedule_state_manager(clp); 477 nfs4_schedule_state_manager(clp);
420} 478}
421 479
480/**
481 * nfs_expire_all_delegation_types
482 * @clp: client to process
483 * @flags: delegation types to expire
484 *
485 */
422void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags) 486void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags)
423{ 487{
424 nfs_client_mark_return_all_delegation_types(clp, flags); 488 nfs_client_mark_return_all_delegation_types(clp, flags);
425 nfs_delegation_run_state_manager(clp); 489 nfs_delegation_run_state_manager(clp);
426} 490}
427 491
492/**
493 * nfs_expire_all_delegations
494 * @clp: client to process
495 *
496 */
428void nfs_expire_all_delegations(struct nfs_client *clp) 497void nfs_expire_all_delegations(struct nfs_client *clp)
429{ 498{
430 nfs_expire_all_delegation_types(clp, FMODE_READ|FMODE_WRITE); 499 nfs_expire_all_delegation_types(clp, FMODE_READ|FMODE_WRITE);
431} 500}
432 501
433/* 502/**
434 * Return all delegations following an NFS4ERR_CB_PATH_DOWN error. 503 * nfs_handle_cb_pathdown - return all delegations after NFS4ERR_CB_PATH_DOWN
504 * @clp: client to process
505 *
435 */ 506 */
436void nfs_handle_cb_pathdown(struct nfs_client *clp) 507void nfs_handle_cb_pathdown(struct nfs_client *clp)
437{ 508{
@@ -440,29 +511,43 @@ void nfs_handle_cb_pathdown(struct nfs_client *clp)
440 nfs_client_mark_return_all_delegations(clp); 511 nfs_client_mark_return_all_delegations(clp);
441} 512}
442 513
443static void nfs_client_mark_return_unreferenced_delegations(struct nfs_client *clp) 514static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server)
444{ 515{
445 struct nfs_delegation *delegation; 516 struct nfs_delegation *delegation;
446 517
447 rcu_read_lock(); 518 list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
448 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
449 if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags)) 519 if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags))
450 continue; 520 continue;
451 nfs_mark_return_delegation(clp, delegation); 521 nfs_mark_return_delegation(delegation);
452 } 522 }
453 rcu_read_unlock();
454} 523}
455 524
525/**
526 * nfs_expire_unreferenced_delegations - Eliminate unused delegations
527 * @clp: nfs_client to process
528 *
529 */
456void nfs_expire_unreferenced_delegations(struct nfs_client *clp) 530void nfs_expire_unreferenced_delegations(struct nfs_client *clp)
457{ 531{
458 nfs_client_mark_return_unreferenced_delegations(clp); 532 struct nfs_server *server;
533
534 rcu_read_lock();
535 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
536 nfs_mark_return_unreferenced_delegations(server);
537 rcu_read_unlock();
538
459 nfs_delegation_run_state_manager(clp); 539 nfs_delegation_run_state_manager(clp);
460} 540}
461 541
462/* 542/**
463 * Asynchronous delegation recall! 543 * nfs_async_inode_return_delegation - asynchronously return a delegation
544 * @inode: inode to process
545 * @stateid: state ID information from CB_RECALL arguments
546 *
547 * Returns zero on success, or a negative errno value.
464 */ 548 */
465int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid) 549int nfs_async_inode_return_delegation(struct inode *inode,
550 const nfs4_stateid *stateid)
466{ 551{
467 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; 552 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
468 struct nfs_delegation *delegation; 553 struct nfs_delegation *delegation;
@@ -474,22 +559,21 @@ int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *s
474 rcu_read_unlock(); 559 rcu_read_unlock();
475 return -ENOENT; 560 return -ENOENT;
476 } 561 }
477 562 nfs_mark_return_delegation(delegation);
478 nfs_mark_return_delegation(clp, delegation);
479 rcu_read_unlock(); 563 rcu_read_unlock();
564
480 nfs_delegation_run_state_manager(clp); 565 nfs_delegation_run_state_manager(clp);
481 return 0; 566 return 0;
482} 567}
483 568
484/* 569static struct inode *
485 * Retrieve the inode associated with a delegation 570nfs_delegation_find_inode_server(struct nfs_server *server,
486 */ 571 const struct nfs_fh *fhandle)
487struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle)
488{ 572{
489 struct nfs_delegation *delegation; 573 struct nfs_delegation *delegation;
490 struct inode *res = NULL; 574 struct inode *res = NULL;
491 rcu_read_lock(); 575
492 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { 576 list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
493 spin_lock(&delegation->lock); 577 spin_lock(&delegation->lock);
494 if (delegation->inode != NULL && 578 if (delegation->inode != NULL &&
495 nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) { 579 nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
@@ -499,49 +583,121 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs
499 if (res != NULL) 583 if (res != NULL)
500 break; 584 break;
501 } 585 }
586 return res;
587}
588
589/**
590 * nfs_delegation_find_inode - retrieve the inode associated with a delegation
591 * @clp: client state handle
592 * @fhandle: filehandle from a delegation recall
593 *
594 * Returns pointer to inode matching "fhandle," or NULL if a matching inode
595 * cannot be found.
596 */
597struct inode *nfs_delegation_find_inode(struct nfs_client *clp,
598 const struct nfs_fh *fhandle)
599{
600 struct nfs_server *server;
601 struct inode *res = NULL;
602
603 rcu_read_lock();
604 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
605 res = nfs_delegation_find_inode_server(server, fhandle);
606 if (res != NULL)
607 break;
608 }
502 rcu_read_unlock(); 609 rcu_read_unlock();
503 return res; 610 return res;
504} 611}
505 612
506/* 613static void nfs_delegation_mark_reclaim_server(struct nfs_server *server)
507 * Mark all delegations as needing to be reclaimed 614{
615 struct nfs_delegation *delegation;
616
617 list_for_each_entry_rcu(delegation, &server->delegations, super_list)
618 set_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
619}
620
621/**
622 * nfs_delegation_mark_reclaim - mark all delegations as needing to be reclaimed
623 * @clp: nfs_client to process
624 *
508 */ 625 */
509void nfs_delegation_mark_reclaim(struct nfs_client *clp) 626void nfs_delegation_mark_reclaim(struct nfs_client *clp)
510{ 627{
511 struct nfs_delegation *delegation; 628 struct nfs_server *server;
629
512 rcu_read_lock(); 630 rcu_read_lock();
513 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) 631 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
514 set_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); 632 nfs_delegation_mark_reclaim_server(server);
515 rcu_read_unlock(); 633 rcu_read_unlock();
516} 634}
517 635
518/* 636/**
519 * Reap all unclaimed delegations after reboot recovery is done 637 * nfs_delegation_reap_unclaimed - reap unclaimed delegations after reboot recovery is done
638 * @clp: nfs_client to process
639 *
520 */ 640 */
521void nfs_delegation_reap_unclaimed(struct nfs_client *clp) 641void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
522{ 642{
523 struct nfs_delegation *delegation; 643 struct nfs_delegation *delegation;
644 struct nfs_server *server;
524 struct inode *inode; 645 struct inode *inode;
646
525restart: 647restart:
526 rcu_read_lock(); 648 rcu_read_lock();
527 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { 649 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
528 if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) == 0) 650 list_for_each_entry_rcu(delegation, &server->delegations,
529 continue; 651 super_list) {
530 inode = nfs_delegation_grab_inode(delegation); 652 if (test_bit(NFS_DELEGATION_NEED_RECLAIM,
531 if (inode == NULL) 653 &delegation->flags) == 0)
532 continue; 654 continue;
533 spin_lock(&clp->cl_lock); 655 inode = nfs_delegation_grab_inode(delegation);
534 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp); 656 if (inode == NULL)
535 spin_unlock(&clp->cl_lock); 657 continue;
536 rcu_read_unlock(); 658 delegation = nfs_detach_delegation(NFS_I(inode),
537 if (delegation != NULL) 659 server);
538 nfs_free_delegation(delegation); 660 rcu_read_unlock();
539 iput(inode); 661
540 goto restart; 662 if (delegation != NULL)
663 nfs_free_delegation(delegation);
664 iput(inode);
665 goto restart;
666 }
541 } 667 }
542 rcu_read_unlock(); 668 rcu_read_unlock();
543} 669}
544 670
671/**
672 * nfs_delegations_present - check for existence of delegations
673 * @clp: client state handle
674 *
675 * Returns one if there are any nfs_delegation structures attached
676 * to this nfs_client.
677 */
678int nfs_delegations_present(struct nfs_client *clp)
679{
680 struct nfs_server *server;
681 int ret = 0;
682
683 rcu_read_lock();
684 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
685 if (!list_empty(&server->delegations)) {
686 ret = 1;
687 break;
688 }
689 rcu_read_unlock();
690 return ret;
691}
692
693/**
694 * nfs4_copy_delegation_stateid - Copy inode's state ID information
695 * @dst: stateid data structure to fill in
696 * @inode: inode to check
697 *
698 * Returns one and fills in "dst->data" * if inode had a delegation,
699 * otherwise zero is returned.
700 */
545int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode) 701int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
546{ 702{
547 struct nfs_inode *nfsi = NFS_I(inode); 703 struct nfs_inode *nfsi = NFS_I(inode);
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 2026304bda19..d9322e490c56 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -44,6 +44,7 @@ void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags);
44void nfs_expire_unreferenced_delegations(struct nfs_client *clp); 44void nfs_expire_unreferenced_delegations(struct nfs_client *clp);
45void nfs_handle_cb_pathdown(struct nfs_client *clp); 45void nfs_handle_cb_pathdown(struct nfs_client *clp);
46int nfs_client_return_marked_delegations(struct nfs_client *clp); 46int nfs_client_return_marked_delegations(struct nfs_client *clp);
47int nfs_delegations_present(struct nfs_client *clp);
47 48
48void nfs_delegation_mark_reclaim(struct nfs_client *clp); 49void nfs_delegation_mark_reclaim(struct nfs_client *clp);
49void nfs_delegation_reap_unclaimed(struct nfs_client *clp); 50void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index d33da530097a..abe4f0c8dc5f 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -33,8 +33,8 @@
33#include <linux/namei.h> 33#include <linux/namei.h>
34#include <linux/mount.h> 34#include <linux/mount.h>
35#include <linux/sched.h> 35#include <linux/sched.h>
36#include <linux/vmalloc.h>
37#include <linux/kmemleak.h> 36#include <linux/kmemleak.h>
37#include <linux/xattr.h>
38 38
39#include "delegation.h" 39#include "delegation.h"
40#include "iostat.h" 40#include "iostat.h"
@@ -125,9 +125,10 @@ const struct inode_operations nfs4_dir_inode_operations = {
125 .permission = nfs_permission, 125 .permission = nfs_permission,
126 .getattr = nfs_getattr, 126 .getattr = nfs_getattr,
127 .setattr = nfs_setattr, 127 .setattr = nfs_setattr,
128 .getxattr = nfs4_getxattr, 128 .getxattr = generic_getxattr,
129 .setxattr = nfs4_setxattr, 129 .setxattr = generic_setxattr,
130 .listxattr = nfs4_listxattr, 130 .listxattr = generic_listxattr,
131 .removexattr = generic_removexattr,
131}; 132};
132 133
133#endif /* CONFIG_NFS_V4 */ 134#endif /* CONFIG_NFS_V4 */
@@ -172,7 +173,7 @@ struct nfs_cache_array {
172 struct nfs_cache_array_entry array[0]; 173 struct nfs_cache_array_entry array[0];
173}; 174};
174 175
175typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int); 176typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, int);
176typedef struct { 177typedef struct {
177 struct file *file; 178 struct file *file;
178 struct page *page; 179 struct page *page;
@@ -378,14 +379,14 @@ error:
378 return error; 379 return error;
379} 380}
380 381
381/* Fill in an entry based on the xdr code stored in desc->page */ 382static int xdr_decode(nfs_readdir_descriptor_t *desc,
382static 383 struct nfs_entry *entry, struct xdr_stream *xdr)
383int xdr_decode(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, struct xdr_stream *stream)
384{ 384{
385 __be32 *p = desc->decode(stream, entry, NFS_SERVER(desc->file->f_path.dentry->d_inode), desc->plus); 385 int error;
386 if (IS_ERR(p))
387 return PTR_ERR(p);
388 386
387 error = desc->decode(xdr, entry, desc->plus);
388 if (error)
389 return error;
389 entry->fattr->time_start = desc->timestamp; 390 entry->fattr->time_start = desc->timestamp;
390 entry->fattr->gencount = desc->gencount; 391 entry->fattr->gencount = desc->gencount;
391 return 0; 392 return 0;
@@ -459,25 +460,26 @@ out:
459/* Perform conversion from xdr to cache array */ 460/* Perform conversion from xdr to cache array */
460static 461static
461int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, 462int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
462 void *xdr_page, struct page *page, unsigned int buflen) 463 struct page **xdr_pages, struct page *page, unsigned int buflen)
463{ 464{
464 struct xdr_stream stream; 465 struct xdr_stream stream;
465 struct xdr_buf buf; 466 struct xdr_buf buf = {
466 __be32 *ptr = xdr_page; 467 .pages = xdr_pages,
468 .page_len = buflen,
469 .buflen = buflen,
470 .len = buflen,
471 };
472 struct page *scratch;
467 struct nfs_cache_array *array; 473 struct nfs_cache_array *array;
468 unsigned int count = 0; 474 unsigned int count = 0;
469 int status; 475 int status;
470 476
471 buf.head->iov_base = xdr_page; 477 scratch = alloc_page(GFP_KERNEL);
472 buf.head->iov_len = buflen; 478 if (scratch == NULL)
473 buf.tail->iov_len = 0; 479 return -ENOMEM;
474 buf.page_base = 0;
475 buf.page_len = 0;
476 buf.buflen = buf.head->iov_len;
477 buf.len = buf.head->iov_len;
478
479 xdr_init_decode(&stream, &buf, ptr);
480 480
481 xdr_init_decode(&stream, &buf, NULL);
482 xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
481 483
482 do { 484 do {
483 status = xdr_decode(desc, entry, &stream); 485 status = xdr_decode(desc, entry, &stream);
@@ -506,6 +508,8 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
506 } else 508 } else
507 status = PTR_ERR(array); 509 status = PTR_ERR(array);
508 } 510 }
511
512 put_page(scratch);
509 return status; 513 return status;
510} 514}
511 515
@@ -521,7 +525,6 @@ static
521void nfs_readdir_free_large_page(void *ptr, struct page **pages, 525void nfs_readdir_free_large_page(void *ptr, struct page **pages,
522 unsigned int npages) 526 unsigned int npages)
523{ 527{
524 vm_unmap_ram(ptr, npages);
525 nfs_readdir_free_pagearray(pages, npages); 528 nfs_readdir_free_pagearray(pages, npages);
526} 529}
527 530
@@ -530,9 +533,8 @@ void nfs_readdir_free_large_page(void *ptr, struct page **pages,
530 * to nfs_readdir_free_large_page 533 * to nfs_readdir_free_large_page
531 */ 534 */
532static 535static
533void *nfs_readdir_large_page(struct page **pages, unsigned int npages) 536int nfs_readdir_large_page(struct page **pages, unsigned int npages)
534{ 537{
535 void *ptr;
536 unsigned int i; 538 unsigned int i;
537 539
538 for (i = 0; i < npages; i++) { 540 for (i = 0; i < npages; i++) {
@@ -541,13 +543,11 @@ void *nfs_readdir_large_page(struct page **pages, unsigned int npages)
541 goto out_freepages; 543 goto out_freepages;
542 pages[i] = page; 544 pages[i] = page;
543 } 545 }
546 return 0;
544 547
545 ptr = vm_map_ram(pages, npages, 0, PAGE_KERNEL);
546 if (!IS_ERR_OR_NULL(ptr))
547 return ptr;
548out_freepages: 548out_freepages:
549 nfs_readdir_free_pagearray(pages, i); 549 nfs_readdir_free_pagearray(pages, i);
550 return NULL; 550 return -ENOMEM;
551} 551}
552 552
553static 553static
@@ -566,6 +566,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
566 entry.eof = 0; 566 entry.eof = 0;
567 entry.fh = nfs_alloc_fhandle(); 567 entry.fh = nfs_alloc_fhandle();
568 entry.fattr = nfs_alloc_fattr(); 568 entry.fattr = nfs_alloc_fattr();
569 entry.server = NFS_SERVER(inode);
569 if (entry.fh == NULL || entry.fattr == NULL) 570 if (entry.fh == NULL || entry.fattr == NULL)
570 goto out; 571 goto out;
571 572
@@ -577,8 +578,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
577 memset(array, 0, sizeof(struct nfs_cache_array)); 578 memset(array, 0, sizeof(struct nfs_cache_array));
578 array->eof_index = -1; 579 array->eof_index = -1;
579 580
580 pages_ptr = nfs_readdir_large_page(pages, array_size); 581 status = nfs_readdir_large_page(pages, array_size);
581 if (!pages_ptr) 582 if (status < 0)
582 goto out_release_array; 583 goto out_release_array;
583 do { 584 do {
584 unsigned int pglen; 585 unsigned int pglen;
@@ -587,7 +588,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
587 if (status < 0) 588 if (status < 0)
588 break; 589 break;
589 pglen = status; 590 pglen = status;
590 status = nfs_readdir_page_filler(desc, &entry, pages_ptr, page, pglen); 591 status = nfs_readdir_page_filler(desc, &entry, pages, page, pglen);
591 if (status < 0) { 592 if (status < 0) {
592 if (status == -ENOSPC) 593 if (status == -ENOSPC)
593 status = 0; 594 status = 0;
@@ -1221,7 +1222,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
1221 goto out_unblock_sillyrename; 1222 goto out_unblock_sillyrename;
1222 } 1223 }
1223 inode = nfs_fhget(dentry->d_sb, fhandle, fattr); 1224 inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
1224 res = (struct dentry *)inode; 1225 res = ERR_CAST(inode);
1225 if (IS_ERR(res)) 1226 if (IS_ERR(res))
1226 goto out_unblock_sillyrename; 1227 goto out_unblock_sillyrename;
1227 1228
@@ -1355,8 +1356,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1355 if (nd->flags & LOOKUP_CREATE) { 1356 if (nd->flags & LOOKUP_CREATE) {
1356 attr.ia_mode = nd->intent.open.create_mode; 1357 attr.ia_mode = nd->intent.open.create_mode;
1357 attr.ia_valid = ATTR_MODE; 1358 attr.ia_valid = ATTR_MODE;
1358 if (!IS_POSIXACL(dir)) 1359 attr.ia_mode &= ~current_umask();
1359 attr.ia_mode &= ~current_umask();
1360 } else { 1360 } else {
1361 open_flags &= ~(O_EXCL | O_CREAT); 1361 open_flags &= ~(O_EXCL | O_CREAT);
1362 attr.ia_valid = 0; 1362 attr.ia_valid = 0;
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 4e2d9b6b1380..18696882f1c6 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -238,7 +238,7 @@ int nfs_map_gid_to_group(struct nfs_client *clp, __u32 gid, char *buf, size_t bu
238 return nfs_idmap_lookup_name(gid, "group", buf, buflen); 238 return nfs_idmap_lookup_name(gid, "group", buf, buflen);
239} 239}
240 240
241#else /* CONFIG_NFS_USE_IDMAPPER not defined */ 241#else /* CONFIG_NFS_USE_NEW_IDMAPPER not defined */
242 242
243#include <linux/module.h> 243#include <linux/module.h>
244#include <linux/mutex.h> 244#include <linux/mutex.h>
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 017daa3bed38..ce00b704452c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1410,9 +1410,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1410 */ 1410 */
1411void nfs4_evict_inode(struct inode *inode) 1411void nfs4_evict_inode(struct inode *inode)
1412{ 1412{
1413 pnfs_destroy_layout(NFS_I(inode));
1413 truncate_inode_pages(&inode->i_data, 0); 1414 truncate_inode_pages(&inode->i_data, 0);
1414 end_writeback(inode); 1415 end_writeback(inode);
1415 pnfs_destroy_layout(NFS_I(inode));
1416 /* If we are holding a delegation, return it! */ 1416 /* If we are holding a delegation, return it! */
1417 nfs_inode_return_delegation_noreclaim(inode); 1417 nfs_inode_return_delegation_noreclaim(inode);
1418 /* First call standard NFS clear_inode() code */ 1418 /* First call standard NFS clear_inode() code */
@@ -1619,6 +1619,7 @@ static void __exit exit_nfs_fs(void)
1619#ifdef CONFIG_PROC_FS 1619#ifdef CONFIG_PROC_FS
1620 rpc_proc_unregister("nfs"); 1620 rpc_proc_unregister("nfs");
1621#endif 1621#endif
1622 nfs_cleanup_cb_ident_idr();
1622 unregister_nfs_fs(); 1623 unregister_nfs_fs();
1623 nfs_fs_proc_exit(); 1624 nfs_fs_proc_exit();
1624 nfsiod_stop(); 1625 nfsiod_stop();
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index e6356b750b77..bfa3a34af801 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -128,9 +128,13 @@ extern void nfs_umount(const struct nfs_mount_request *info);
128/* client.c */ 128/* client.c */
129extern struct rpc_program nfs_program; 129extern struct rpc_program nfs_program;
130 130
131extern void nfs_cleanup_cb_ident_idr(void);
131extern void nfs_put_client(struct nfs_client *); 132extern void nfs_put_client(struct nfs_client *);
132extern struct nfs_client *nfs_find_client(const struct sockaddr *, u32); 133extern struct nfs_client *nfs4_find_client_no_ident(const struct sockaddr *);
133extern struct nfs_client *nfs_find_client_next(struct nfs_client *); 134extern struct nfs_client *nfs4_find_client_ident(int);
135extern struct nfs_client *
136nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *,
137 int);
134extern struct nfs_server *nfs_create_server( 138extern struct nfs_server *nfs_create_server(
135 const struct nfs_parsed_mount_data *, 139 const struct nfs_parsed_mount_data *,
136 struct nfs_fh *); 140 struct nfs_fh *);
@@ -185,17 +189,20 @@ extern int __init nfs_init_directcache(void);
185extern void nfs_destroy_directcache(void); 189extern void nfs_destroy_directcache(void);
186 190
187/* nfs2xdr.c */ 191/* nfs2xdr.c */
188extern int nfs_stat_to_errno(int); 192extern int nfs_stat_to_errno(enum nfs_stat);
189extern struct rpc_procinfo nfs_procedures[]; 193extern struct rpc_procinfo nfs_procedures[];
190extern __be32 *nfs_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int); 194extern int nfs2_decode_dirent(struct xdr_stream *,
195 struct nfs_entry *, int);
191 196
192/* nfs3xdr.c */ 197/* nfs3xdr.c */
193extern struct rpc_procinfo nfs3_procedures[]; 198extern struct rpc_procinfo nfs3_procedures[];
194extern __be32 *nfs3_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int); 199extern int nfs3_decode_dirent(struct xdr_stream *,
200 struct nfs_entry *, int);
195 201
196/* nfs4xdr.c */ 202/* nfs4xdr.c */
197#ifdef CONFIG_NFS_V4 203#ifdef CONFIG_NFS_V4
198extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int); 204extern int nfs4_decode_dirent(struct xdr_stream *,
205 struct nfs_entry *, int);
199#endif 206#endif
200#ifdef CONFIG_NFS_V4_1 207#ifdef CONFIG_NFS_V4_1
201extern const u32 nfs41_maxread_overhead; 208extern const u32 nfs41_maxread_overhead;
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 4f981f1f6689..d4c2d6b7507e 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -236,10 +236,8 @@ void nfs_umount(const struct nfs_mount_request *info)
236 .authflavor = RPC_AUTH_UNIX, 236 .authflavor = RPC_AUTH_UNIX,
237 .flags = RPC_CLNT_CREATE_NOPING, 237 .flags = RPC_CLNT_CREATE_NOPING,
238 }; 238 };
239 struct mountres result;
240 struct rpc_message msg = { 239 struct rpc_message msg = {
241 .rpc_argp = info->dirpath, 240 .rpc_argp = info->dirpath,
242 .rpc_resp = &result,
243 }; 241 };
244 struct rpc_clnt *clnt; 242 struct rpc_clnt *clnt;
245 int status; 243 int status;
@@ -248,7 +246,7 @@ void nfs_umount(const struct nfs_mount_request *info)
248 args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; 246 args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
249 247
250 clnt = rpc_create(&args); 248 clnt = rpc_create(&args);
251 if (unlikely(IS_ERR(clnt))) 249 if (IS_ERR(clnt))
252 goto out_clnt_err; 250 goto out_clnt_err;
253 251
254 dprintk("NFS: sending UMNT request for %s:%s\n", 252 dprintk("NFS: sending UMNT request for %s:%s\n",
@@ -280,29 +278,20 @@ out_call_err:
280 * XDR encode/decode functions for MOUNT 278 * XDR encode/decode functions for MOUNT
281 */ 279 */
282 280
283static int encode_mntdirpath(struct xdr_stream *xdr, const char *pathname) 281static void encode_mntdirpath(struct xdr_stream *xdr, const char *pathname)
284{ 282{
285 const u32 pathname_len = strlen(pathname); 283 const u32 pathname_len = strlen(pathname);
286 __be32 *p; 284 __be32 *p;
287 285
288 if (unlikely(pathname_len > MNTPATHLEN)) 286 BUG_ON(pathname_len > MNTPATHLEN);
289 return -EIO; 287 p = xdr_reserve_space(xdr, 4 + pathname_len);
290
291 p = xdr_reserve_space(xdr, sizeof(u32) + pathname_len);
292 if (unlikely(p == NULL))
293 return -EIO;
294 xdr_encode_opaque(p, pathname, pathname_len); 288 xdr_encode_opaque(p, pathname, pathname_len);
295
296 return 0;
297} 289}
298 290
299static int mnt_enc_dirpath(struct rpc_rqst *req, __be32 *p, 291static void mnt_xdr_enc_dirpath(struct rpc_rqst *req, struct xdr_stream *xdr,
300 const char *dirpath) 292 const char *dirpath)
301{ 293{
302 struct xdr_stream xdr; 294 encode_mntdirpath(xdr, dirpath);
303
304 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
305 return encode_mntdirpath(&xdr, dirpath);
306} 295}
307 296
308/* 297/*
@@ -320,10 +309,10 @@ static int decode_status(struct xdr_stream *xdr, struct mountres *res)
320 u32 status; 309 u32 status;
321 __be32 *p; 310 __be32 *p;
322 311
323 p = xdr_inline_decode(xdr, sizeof(status)); 312 p = xdr_inline_decode(xdr, 4);
324 if (unlikely(p == NULL)) 313 if (unlikely(p == NULL))
325 return -EIO; 314 return -EIO;
326 status = ntohl(*p); 315 status = be32_to_cpup(p);
327 316
328 for (i = 0; i < ARRAY_SIZE(mnt_errtbl); i++) { 317 for (i = 0; i < ARRAY_SIZE(mnt_errtbl); i++) {
329 if (mnt_errtbl[i].status == status) { 318 if (mnt_errtbl[i].status == status) {
@@ -351,18 +340,16 @@ static int decode_fhandle(struct xdr_stream *xdr, struct mountres *res)
351 return 0; 340 return 0;
352} 341}
353 342
354static int mnt_dec_mountres(struct rpc_rqst *req, __be32 *p, 343static int mnt_xdr_dec_mountres(struct rpc_rqst *req,
355 struct mountres *res) 344 struct xdr_stream *xdr,
345 struct mountres *res)
356{ 346{
357 struct xdr_stream xdr;
358 int status; 347 int status;
359 348
360 xdr_init_decode(&xdr, &req->rq_rcv_buf, p); 349 status = decode_status(xdr, res);
361
362 status = decode_status(&xdr, res);
363 if (unlikely(status != 0 || res->errno != 0)) 350 if (unlikely(status != 0 || res->errno != 0))
364 return status; 351 return status;
365 return decode_fhandle(&xdr, res); 352 return decode_fhandle(xdr, res);
366} 353}
367 354
368static int decode_fhs_status(struct xdr_stream *xdr, struct mountres *res) 355static int decode_fhs_status(struct xdr_stream *xdr, struct mountres *res)
@@ -371,10 +358,10 @@ static int decode_fhs_status(struct xdr_stream *xdr, struct mountres *res)
371 u32 status; 358 u32 status;
372 __be32 *p; 359 __be32 *p;
373 360
374 p = xdr_inline_decode(xdr, sizeof(status)); 361 p = xdr_inline_decode(xdr, 4);
375 if (unlikely(p == NULL)) 362 if (unlikely(p == NULL))
376 return -EIO; 363 return -EIO;
377 status = ntohl(*p); 364 status = be32_to_cpup(p);
378 365
379 for (i = 0; i < ARRAY_SIZE(mnt3_errtbl); i++) { 366 for (i = 0; i < ARRAY_SIZE(mnt3_errtbl); i++) {
380 if (mnt3_errtbl[i].status == status) { 367 if (mnt3_errtbl[i].status == status) {
@@ -394,11 +381,11 @@ static int decode_fhandle3(struct xdr_stream *xdr, struct mountres *res)
394 u32 size; 381 u32 size;
395 __be32 *p; 382 __be32 *p;
396 383
397 p = xdr_inline_decode(xdr, sizeof(size)); 384 p = xdr_inline_decode(xdr, 4);
398 if (unlikely(p == NULL)) 385 if (unlikely(p == NULL))
399 return -EIO; 386 return -EIO;
400 387
401 size = ntohl(*p++); 388 size = be32_to_cpup(p);
402 if (size > NFS3_FHSIZE || size == 0) 389 if (size > NFS3_FHSIZE || size == 0)
403 return -EIO; 390 return -EIO;
404 391
@@ -421,15 +408,15 @@ static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res)
421 if (*count == 0) 408 if (*count == 0)
422 return 0; 409 return 0;
423 410
424 p = xdr_inline_decode(xdr, sizeof(entries)); 411 p = xdr_inline_decode(xdr, 4);
425 if (unlikely(p == NULL)) 412 if (unlikely(p == NULL))
426 return -EIO; 413 return -EIO;
427 entries = ntohl(*p); 414 entries = be32_to_cpup(p);
428 dprintk("NFS: received %u auth flavors\n", entries); 415 dprintk("NFS: received %u auth flavors\n", entries);
429 if (entries > NFS_MAX_SECFLAVORS) 416 if (entries > NFS_MAX_SECFLAVORS)
430 entries = NFS_MAX_SECFLAVORS; 417 entries = NFS_MAX_SECFLAVORS;
431 418
432 p = xdr_inline_decode(xdr, sizeof(u32) * entries); 419 p = xdr_inline_decode(xdr, 4 * entries);
433 if (unlikely(p == NULL)) 420 if (unlikely(p == NULL))
434 return -EIO; 421 return -EIO;
435 422
@@ -437,7 +424,7 @@ static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res)
437 entries = *count; 424 entries = *count;
438 425
439 for (i = 0; i < entries; i++) { 426 for (i = 0; i < entries; i++) {
440 flavors[i] = ntohl(*p++); 427 flavors[i] = be32_to_cpup(p++);
441 dprintk("NFS: auth flavor[%u]: %d\n", i, flavors[i]); 428 dprintk("NFS: auth flavor[%u]: %d\n", i, flavors[i]);
442 } 429 }
443 *count = i; 430 *count = i;
@@ -445,30 +432,28 @@ static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res)
445 return 0; 432 return 0;
446} 433}
447 434
448static int mnt_dec_mountres3(struct rpc_rqst *req, __be32 *p, 435static int mnt_xdr_dec_mountres3(struct rpc_rqst *req,
449 struct mountres *res) 436 struct xdr_stream *xdr,
437 struct mountres *res)
450{ 438{
451 struct xdr_stream xdr;
452 int status; 439 int status;
453 440
454 xdr_init_decode(&xdr, &req->rq_rcv_buf, p); 441 status = decode_fhs_status(xdr, res);
455
456 status = decode_fhs_status(&xdr, res);
457 if (unlikely(status != 0 || res->errno != 0)) 442 if (unlikely(status != 0 || res->errno != 0))
458 return status; 443 return status;
459 status = decode_fhandle3(&xdr, res); 444 status = decode_fhandle3(xdr, res);
460 if (unlikely(status != 0)) { 445 if (unlikely(status != 0)) {
461 res->errno = -EBADHANDLE; 446 res->errno = -EBADHANDLE;
462 return 0; 447 return 0;
463 } 448 }
464 return decode_auth_flavors(&xdr, res); 449 return decode_auth_flavors(xdr, res);
465} 450}
466 451
467static struct rpc_procinfo mnt_procedures[] = { 452static struct rpc_procinfo mnt_procedures[] = {
468 [MOUNTPROC_MNT] = { 453 [MOUNTPROC_MNT] = {
469 .p_proc = MOUNTPROC_MNT, 454 .p_proc = MOUNTPROC_MNT,
470 .p_encode = (kxdrproc_t)mnt_enc_dirpath, 455 .p_encode = (kxdreproc_t)mnt_xdr_enc_dirpath,
471 .p_decode = (kxdrproc_t)mnt_dec_mountres, 456 .p_decode = (kxdrdproc_t)mnt_xdr_dec_mountres,
472 .p_arglen = MNT_enc_dirpath_sz, 457 .p_arglen = MNT_enc_dirpath_sz,
473 .p_replen = MNT_dec_mountres_sz, 458 .p_replen = MNT_dec_mountres_sz,
474 .p_statidx = MOUNTPROC_MNT, 459 .p_statidx = MOUNTPROC_MNT,
@@ -476,7 +461,7 @@ static struct rpc_procinfo mnt_procedures[] = {
476 }, 461 },
477 [MOUNTPROC_UMNT] = { 462 [MOUNTPROC_UMNT] = {
478 .p_proc = MOUNTPROC_UMNT, 463 .p_proc = MOUNTPROC_UMNT,
479 .p_encode = (kxdrproc_t)mnt_enc_dirpath, 464 .p_encode = (kxdreproc_t)mnt_xdr_enc_dirpath,
480 .p_arglen = MNT_enc_dirpath_sz, 465 .p_arglen = MNT_enc_dirpath_sz,
481 .p_statidx = MOUNTPROC_UMNT, 466 .p_statidx = MOUNTPROC_UMNT,
482 .p_name = "UMOUNT", 467 .p_name = "UMOUNT",
@@ -486,8 +471,8 @@ static struct rpc_procinfo mnt_procedures[] = {
486static struct rpc_procinfo mnt3_procedures[] = { 471static struct rpc_procinfo mnt3_procedures[] = {
487 [MOUNTPROC3_MNT] = { 472 [MOUNTPROC3_MNT] = {
488 .p_proc = MOUNTPROC3_MNT, 473 .p_proc = MOUNTPROC3_MNT,
489 .p_encode = (kxdrproc_t)mnt_enc_dirpath, 474 .p_encode = (kxdreproc_t)mnt_xdr_enc_dirpath,
490 .p_decode = (kxdrproc_t)mnt_dec_mountres3, 475 .p_decode = (kxdrdproc_t)mnt_xdr_dec_mountres3,
491 .p_arglen = MNT_enc_dirpath_sz, 476 .p_arglen = MNT_enc_dirpath_sz,
492 .p_replen = MNT_dec_mountres3_sz, 477 .p_replen = MNT_dec_mountres3_sz,
493 .p_statidx = MOUNTPROC3_MNT, 478 .p_statidx = MOUNTPROC3_MNT,
@@ -495,7 +480,7 @@ static struct rpc_procinfo mnt3_procedures[] = {
495 }, 480 },
496 [MOUNTPROC3_UMNT] = { 481 [MOUNTPROC3_UMNT] = {
497 .p_proc = MOUNTPROC3_UMNT, 482 .p_proc = MOUNTPROC3_UMNT,
498 .p_encode = (kxdrproc_t)mnt_enc_dirpath, 483 .p_encode = (kxdreproc_t)mnt_xdr_enc_dirpath,
499 .p_arglen = MNT_enc_dirpath_sz, 484 .p_arglen = MNT_enc_dirpath_sz,
500 .p_statidx = MOUNTPROC3_UMNT, 485 .p_statidx = MOUNTPROC3_UMNT,
501 .p_name = "UMOUNT", 486 .p_name = "UMOUNT",
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 5914a1911c95..792cb13a4304 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -61,584 +61,1008 @@
61#define NFS_readdirres_sz (1) 61#define NFS_readdirres_sz (1)
62#define NFS_statfsres_sz (1+NFS_info_sz) 62#define NFS_statfsres_sz (1+NFS_info_sz)
63 63
64
64/* 65/*
65 * Common NFS XDR functions as inlines 66 * While encoding arguments, set up the reply buffer in advance to
67 * receive reply data directly into the page cache.
66 */ 68 */
67static inline __be32 * 69static void prepare_reply_buffer(struct rpc_rqst *req, struct page **pages,
68xdr_encode_fhandle(__be32 *p, const struct nfs_fh *fhandle) 70 unsigned int base, unsigned int len,
71 unsigned int bufsize)
69{ 72{
70 memcpy(p, fhandle->data, NFS2_FHSIZE); 73 struct rpc_auth *auth = req->rq_cred->cr_auth;
71 return p + XDR_QUADLEN(NFS2_FHSIZE); 74 unsigned int replen;
75
76 replen = RPC_REPHDRSIZE + auth->au_rslack + bufsize;
77 xdr_inline_pages(&req->rq_rcv_buf, replen << 2, pages, base, len);
72} 78}
73 79
74static inline __be32 * 80/*
75xdr_decode_fhandle(__be32 *p, struct nfs_fh *fhandle) 81 * Handle decode buffer overflows out-of-line.
82 */
83static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
76{ 84{
77 /* NFSv2 handles have a fixed length */ 85 dprintk("NFS: %s prematurely hit the end of our receive buffer. "
78 fhandle->size = NFS2_FHSIZE; 86 "Remaining buffer length is %tu words.\n",
79 memcpy(fhandle->data, p, NFS2_FHSIZE); 87 func, xdr->end - xdr->p);
80 return p + XDR_QUADLEN(NFS2_FHSIZE); 88}
89
90
91/*
92 * Encode/decode NFSv2 basic data types
93 *
94 * Basic NFSv2 data types are defined in section 2.3 of RFC 1094:
95 * "NFS: Network File System Protocol Specification".
96 *
97 * Not all basic data types have their own encoding and decoding
98 * functions. For run-time efficiency, some data types are encoded
99 * or decoded inline.
100 */
101
102/*
103 * typedef opaque nfsdata<>;
104 */
105static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_readres *result)
106{
107 u32 recvd, count;
108 size_t hdrlen;
109 __be32 *p;
110
111 p = xdr_inline_decode(xdr, 4);
112 if (unlikely(p == NULL))
113 goto out_overflow;
114 count = be32_to_cpup(p);
115 hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
116 recvd = xdr->buf->len - hdrlen;
117 if (unlikely(count > recvd))
118 goto out_cheating;
119out:
120 xdr_read_pages(xdr, count);
121 result->eof = 0; /* NFSv2 does not pass EOF flag on the wire. */
122 result->count = count;
123 return count;
124out_cheating:
125 dprintk("NFS: server cheating in read result: "
126 "count %u > recvd %u\n", count, recvd);
127 count = recvd;
128 goto out;
129out_overflow:
130 print_overflow_msg(__func__, xdr);
131 return -EIO;
132}
133
134/*
135 * enum stat {
136 * NFS_OK = 0,
137 * NFSERR_PERM = 1,
138 * NFSERR_NOENT = 2,
139 * NFSERR_IO = 5,
140 * NFSERR_NXIO = 6,
141 * NFSERR_ACCES = 13,
142 * NFSERR_EXIST = 17,
143 * NFSERR_NODEV = 19,
144 * NFSERR_NOTDIR = 20,
145 * NFSERR_ISDIR = 21,
146 * NFSERR_FBIG = 27,
147 * NFSERR_NOSPC = 28,
148 * NFSERR_ROFS = 30,
149 * NFSERR_NAMETOOLONG = 63,
150 * NFSERR_NOTEMPTY = 66,
151 * NFSERR_DQUOT = 69,
152 * NFSERR_STALE = 70,
153 * NFSERR_WFLUSH = 99
154 * };
155 */
156static int decode_stat(struct xdr_stream *xdr, enum nfs_stat *status)
157{
158 __be32 *p;
159
160 p = xdr_inline_decode(xdr, 4);
161 if (unlikely(p == NULL))
162 goto out_overflow;
163 *status = be32_to_cpup(p);
164 return 0;
165out_overflow:
166 print_overflow_msg(__func__, xdr);
167 return -EIO;
81} 168}
82 169
83static inline __be32* 170/*
84xdr_encode_time(__be32 *p, struct timespec *timep) 171 * 2.3.2. ftype
172 *
173 * enum ftype {
174 * NFNON = 0,
175 * NFREG = 1,
176 * NFDIR = 2,
177 * NFBLK = 3,
178 * NFCHR = 4,
179 * NFLNK = 5
180 * };
181 *
182 */
183static __be32 *xdr_decode_ftype(__be32 *p, u32 *type)
85{ 184{
86 *p++ = htonl(timep->tv_sec); 185 *type = be32_to_cpup(p++);
87 /* Convert nanoseconds into microseconds */ 186 if (unlikely(*type > NF2FIFO))
88 *p++ = htonl(timep->tv_nsec ? timep->tv_nsec / 1000 : 0); 187 *type = NFBAD;
89 return p; 188 return p;
90} 189}
91 190
92static inline __be32* 191/*
93xdr_encode_current_server_time(__be32 *p, struct timespec *timep) 192 * 2.3.3. fhandle
193 *
194 * typedef opaque fhandle[FHSIZE];
195 */
196static void encode_fhandle(struct xdr_stream *xdr, const struct nfs_fh *fh)
94{ 197{
95 /* 198 __be32 *p;
96 * Passing the invalid value useconds=1000000 is a 199
97 * Sun convention for "set to current server time". 200 BUG_ON(fh->size != NFS2_FHSIZE);
98 * It's needed to make permissions checks for the 201 p = xdr_reserve_space(xdr, NFS2_FHSIZE);
99 * "touch" program across v2 mounts to Solaris and 202 memcpy(p, fh->data, NFS2_FHSIZE);
100 * Irix boxes work correctly. See description of 203}
101 * sattr in section 6.1 of "NFS Illustrated" by 204
102 * Brent Callaghan, Addison-Wesley, ISBN 0-201-32750-5 205static int decode_fhandle(struct xdr_stream *xdr, struct nfs_fh *fh)
103 */ 206{
104 *p++ = htonl(timep->tv_sec); 207 __be32 *p;
105 *p++ = htonl(1000000); 208
209 p = xdr_inline_decode(xdr, NFS2_FHSIZE);
210 if (unlikely(p == NULL))
211 goto out_overflow;
212 fh->size = NFS2_FHSIZE;
213 memcpy(fh->data, p, NFS2_FHSIZE);
214 return 0;
215out_overflow:
216 print_overflow_msg(__func__, xdr);
217 return -EIO;
218}
219
220/*
221 * 2.3.4. timeval
222 *
223 * struct timeval {
224 * unsigned int seconds;
225 * unsigned int useconds;
226 * };
227 */
228static __be32 *xdr_encode_time(__be32 *p, const struct timespec *timep)
229{
230 *p++ = cpu_to_be32(timep->tv_sec);
231 if (timep->tv_nsec != 0)
232 *p++ = cpu_to_be32(timep->tv_nsec / NSEC_PER_USEC);
233 else
234 *p++ = cpu_to_be32(0);
106 return p; 235 return p;
107} 236}
108 237
109static inline __be32* 238/*
110xdr_decode_time(__be32 *p, struct timespec *timep) 239 * Passing the invalid value useconds=1000000 is a Sun convention for
240 * "set to current server time". It's needed to make permissions checks
241 * for the "touch" program across v2 mounts to Solaris and Irix servers
242 * work correctly. See description of sattr in section 6.1 of "NFS
243 * Illustrated" by Brent Callaghan, Addison-Wesley, ISBN 0-201-32750-5.
244 */
245static __be32 *xdr_encode_current_server_time(__be32 *p,
246 const struct timespec *timep)
111{ 247{
112 timep->tv_sec = ntohl(*p++); 248 *p++ = cpu_to_be32(timep->tv_sec);
113 /* Convert microseconds into nanoseconds */ 249 *p++ = cpu_to_be32(1000000);
114 timep->tv_nsec = ntohl(*p++) * 1000;
115 return p; 250 return p;
116} 251}
117 252
118static __be32 * 253static __be32 *xdr_decode_time(__be32 *p, struct timespec *timep)
119xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr) 254{
255 timep->tv_sec = be32_to_cpup(p++);
256 timep->tv_nsec = be32_to_cpup(p++) * NSEC_PER_USEC;
257 return p;
258}
259
260/*
261 * 2.3.5. fattr
262 *
263 * struct fattr {
264 * ftype type;
265 * unsigned int mode;
266 * unsigned int nlink;
267 * unsigned int uid;
268 * unsigned int gid;
269 * unsigned int size;
270 * unsigned int blocksize;
271 * unsigned int rdev;
272 * unsigned int blocks;
273 * unsigned int fsid;
274 * unsigned int fileid;
275 * timeval atime;
276 * timeval mtime;
277 * timeval ctime;
278 * };
279 *
280 */
281static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
120{ 282{
121 u32 rdev, type; 283 u32 rdev, type;
122 type = ntohl(*p++); 284 __be32 *p;
123 fattr->mode = ntohl(*p++); 285
124 fattr->nlink = ntohl(*p++); 286 p = xdr_inline_decode(xdr, NFS_fattr_sz << 2);
125 fattr->uid = ntohl(*p++); 287 if (unlikely(p == NULL))
126 fattr->gid = ntohl(*p++); 288 goto out_overflow;
127 fattr->size = ntohl(*p++); 289
128 fattr->du.nfs2.blocksize = ntohl(*p++);
129 rdev = ntohl(*p++);
130 fattr->du.nfs2.blocks = ntohl(*p++);
131 fattr->fsid.major = ntohl(*p++);
132 fattr->fsid.minor = 0;
133 fattr->fileid = ntohl(*p++);
134 p = xdr_decode_time(p, &fattr->atime);
135 p = xdr_decode_time(p, &fattr->mtime);
136 p = xdr_decode_time(p, &fattr->ctime);
137 fattr->valid |= NFS_ATTR_FATTR_V2; 290 fattr->valid |= NFS_ATTR_FATTR_V2;
291
292 p = xdr_decode_ftype(p, &type);
293
294 fattr->mode = be32_to_cpup(p++);
295 fattr->nlink = be32_to_cpup(p++);
296 fattr->uid = be32_to_cpup(p++);
297 fattr->gid = be32_to_cpup(p++);
298 fattr->size = be32_to_cpup(p++);
299 fattr->du.nfs2.blocksize = be32_to_cpup(p++);
300
301 rdev = be32_to_cpup(p++);
138 fattr->rdev = new_decode_dev(rdev); 302 fattr->rdev = new_decode_dev(rdev);
139 if (type == NFCHR && rdev == NFS2_FIFO_DEV) { 303 if (type == (u32)NFCHR && rdev == (u32)NFS2_FIFO_DEV) {
140 fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO; 304 fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO;
141 fattr->rdev = 0; 305 fattr->rdev = 0;
142 } 306 }
307
308 fattr->du.nfs2.blocks = be32_to_cpup(p++);
309 fattr->fsid.major = be32_to_cpup(p++);
310 fattr->fsid.minor = 0;
311 fattr->fileid = be32_to_cpup(p++);
312
313 p = xdr_decode_time(p, &fattr->atime);
314 p = xdr_decode_time(p, &fattr->mtime);
315 xdr_decode_time(p, &fattr->ctime);
316 return 0;
317out_overflow:
318 print_overflow_msg(__func__, xdr);
319 return -EIO;
320}
321
322/*
323 * 2.3.6. sattr
324 *
325 * struct sattr {
326 * unsigned int mode;
327 * unsigned int uid;
328 * unsigned int gid;
329 * unsigned int size;
330 * timeval atime;
331 * timeval mtime;
332 * };
333 */
334
335#define NFS2_SATTR_NOT_SET (0xffffffff)
336
337static __be32 *xdr_time_not_set(__be32 *p)
338{
339 *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
340 *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
143 return p; 341 return p;
144} 342}
145 343
146static inline __be32 * 344static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr)
147xdr_encode_sattr(__be32 *p, struct iattr *attr)
148{ 345{
149 const __be32 not_set = __constant_htonl(0xFFFFFFFF); 346 __be32 *p;
150 347
151 *p++ = (attr->ia_valid & ATTR_MODE) ? htonl(attr->ia_mode) : not_set; 348 p = xdr_reserve_space(xdr, NFS_sattr_sz << 2);
152 *p++ = (attr->ia_valid & ATTR_UID) ? htonl(attr->ia_uid) : not_set;
153 *p++ = (attr->ia_valid & ATTR_GID) ? htonl(attr->ia_gid) : not_set;
154 *p++ = (attr->ia_valid & ATTR_SIZE) ? htonl(attr->ia_size) : not_set;
155 349
156 if (attr->ia_valid & ATTR_ATIME_SET) { 350 if (attr->ia_valid & ATTR_MODE)
351 *p++ = cpu_to_be32(attr->ia_mode);
352 else
353 *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
354 if (attr->ia_valid & ATTR_UID)
355 *p++ = cpu_to_be32(attr->ia_uid);
356 else
357 *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
358 if (attr->ia_valid & ATTR_GID)
359 *p++ = cpu_to_be32(attr->ia_gid);
360 else
361 *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
362 if (attr->ia_valid & ATTR_SIZE)
363 *p++ = cpu_to_be32((u32)attr->ia_size);
364 else
365 *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
366
367 if (attr->ia_valid & ATTR_ATIME_SET)
157 p = xdr_encode_time(p, &attr->ia_atime); 368 p = xdr_encode_time(p, &attr->ia_atime);
158 } else if (attr->ia_valid & ATTR_ATIME) { 369 else if (attr->ia_valid & ATTR_ATIME)
159 p = xdr_encode_current_server_time(p, &attr->ia_atime); 370 p = xdr_encode_current_server_time(p, &attr->ia_atime);
160 } else { 371 else
161 *p++ = not_set; 372 p = xdr_time_not_set(p);
162 *p++ = not_set; 373 if (attr->ia_valid & ATTR_MTIME_SET)
163 } 374 xdr_encode_time(p, &attr->ia_mtime);
164 375 else if (attr->ia_valid & ATTR_MTIME)
165 if (attr->ia_valid & ATTR_MTIME_SET) { 376 xdr_encode_current_server_time(p, &attr->ia_mtime);
166 p = xdr_encode_time(p, &attr->ia_mtime); 377 else
167 } else if (attr->ia_valid & ATTR_MTIME) { 378 xdr_time_not_set(p);
168 p = xdr_encode_current_server_time(p, &attr->ia_mtime);
169 } else {
170 *p++ = not_set;
171 *p++ = not_set;
172 }
173 return p;
174} 379}
175 380
176/* 381/*
177 * NFS encode functions 382 * 2.3.7. filename
383 *
384 * typedef string filename<MAXNAMLEN>;
178 */ 385 */
386static void encode_filename(struct xdr_stream *xdr,
387 const char *name, u32 length)
388{
389 __be32 *p;
390
391 BUG_ON(length > NFS2_MAXNAMLEN);
392 p = xdr_reserve_space(xdr, 4 + length);
393 xdr_encode_opaque(p, name, length);
394}
395
396static int decode_filename_inline(struct xdr_stream *xdr,
397 const char **name, u32 *length)
398{
399 __be32 *p;
400 u32 count;
401
402 p = xdr_inline_decode(xdr, 4);
403 if (unlikely(p == NULL))
404 goto out_overflow;
405 count = be32_to_cpup(p);
406 if (count > NFS3_MAXNAMLEN)
407 goto out_nametoolong;
408 p = xdr_inline_decode(xdr, count);
409 if (unlikely(p == NULL))
410 goto out_overflow;
411 *name = (const char *)p;
412 *length = count;
413 return 0;
414out_nametoolong:
415 dprintk("NFS: returned filename too long: %u\n", count);
416 return -ENAMETOOLONG;
417out_overflow:
418 print_overflow_msg(__func__, xdr);
419 return -EIO;
420}
421
179/* 422/*
180 * Encode file handle argument 423 * 2.3.8. path
181 * GETATTR, READLINK, STATFS 424 *
425 * typedef string path<MAXPATHLEN>;
182 */ 426 */
183static int 427static void encode_path(struct xdr_stream *xdr, struct page **pages, u32 length)
184nfs_xdr_fhandle(struct rpc_rqst *req, __be32 *p, struct nfs_fh *fh)
185{ 428{
186 p = xdr_encode_fhandle(p, fh); 429 __be32 *p;
187 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 430
431 BUG_ON(length > NFS2_MAXPATHLEN);
432 p = xdr_reserve_space(xdr, 4);
433 *p = cpu_to_be32(length);
434 xdr_write_pages(xdr, pages, 0, length);
435}
436
437static int decode_path(struct xdr_stream *xdr)
438{
439 u32 length, recvd;
440 size_t hdrlen;
441 __be32 *p;
442
443 p = xdr_inline_decode(xdr, 4);
444 if (unlikely(p == NULL))
445 goto out_overflow;
446 length = be32_to_cpup(p);
447 if (unlikely(length >= xdr->buf->page_len || length > NFS_MAXPATHLEN))
448 goto out_size;
449 hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
450 recvd = xdr->buf->len - hdrlen;
451 if (unlikely(length > recvd))
452 goto out_cheating;
453
454 xdr_read_pages(xdr, length);
455 xdr_terminate_string(xdr->buf, length);
188 return 0; 456 return 0;
457out_size:
458 dprintk("NFS: returned pathname too long: %u\n", length);
459 return -ENAMETOOLONG;
460out_cheating:
461 dprintk("NFS: server cheating in pathname result: "
462 "length %u > received %u\n", length, recvd);
463 return -EIO;
464out_overflow:
465 print_overflow_msg(__func__, xdr);
466 return -EIO;
189} 467}
190 468
191/* 469/*
192 * Encode SETATTR arguments 470 * 2.3.9. attrstat
471 *
472 * union attrstat switch (stat status) {
473 * case NFS_OK:
474 * fattr attributes;
475 * default:
476 * void;
477 * };
193 */ 478 */
194static int 479static int decode_attrstat(struct xdr_stream *xdr, struct nfs_fattr *result)
195nfs_xdr_sattrargs(struct rpc_rqst *req, __be32 *p, struct nfs_sattrargs *args)
196{ 480{
197 p = xdr_encode_fhandle(p, args->fh); 481 enum nfs_stat status;
198 p = xdr_encode_sattr(p, args->sattr); 482 int error;
199 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 483
200 return 0; 484 error = decode_stat(xdr, &status);
485 if (unlikely(error))
486 goto out;
487 if (status != NFS_OK)
488 goto out_default;
489 error = decode_fattr(xdr, result);
490out:
491 return error;
492out_default:
493 return nfs_stat_to_errno(status);
201} 494}
202 495
203/* 496/*
204 * Encode directory ops argument 497 * 2.3.10. diropargs
205 * LOOKUP, RMDIR 498 *
499 * struct diropargs {
500 * fhandle dir;
501 * filename name;
502 * };
206 */ 503 */
207static int 504static void encode_diropargs(struct xdr_stream *xdr, const struct nfs_fh *fh,
208nfs_xdr_diropargs(struct rpc_rqst *req, __be32 *p, struct nfs_diropargs *args) 505 const char *name, u32 length)
209{ 506{
210 p = xdr_encode_fhandle(p, args->fh); 507 encode_fhandle(xdr, fh);
211 p = xdr_encode_array(p, args->name, args->len); 508 encode_filename(xdr, name, length);
212 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
213 return 0;
214} 509}
215 510
216/* 511/*
217 * Encode REMOVE argument 512 * 2.3.11. diropres
513 *
514 * union diropres switch (stat status) {
515 * case NFS_OK:
516 * struct {
517 * fhandle file;
518 * fattr attributes;
519 * } diropok;
520 * default:
521 * void;
522 * };
218 */ 523 */
219static int 524static int decode_diropok(struct xdr_stream *xdr, struct nfs_diropok *result)
220nfs_xdr_removeargs(struct rpc_rqst *req, __be32 *p, const struct nfs_removeargs *args)
221{ 525{
222 p = xdr_encode_fhandle(p, args->fh); 526 int error;
223 p = xdr_encode_array(p, args->name.name, args->name.len); 527
224 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 528 error = decode_fhandle(xdr, result->fh);
225 return 0; 529 if (unlikely(error))
530 goto out;
531 error = decode_fattr(xdr, result->fattr);
532out:
533 return error;
534}
535
536static int decode_diropres(struct xdr_stream *xdr, struct nfs_diropok *result)
537{
538 enum nfs_stat status;
539 int error;
540
541 error = decode_stat(xdr, &status);
542 if (unlikely(error))
543 goto out;
544 if (status != NFS_OK)
545 goto out_default;
546 error = decode_diropok(xdr, result);
547out:
548 return error;
549out_default:
550 return nfs_stat_to_errno(status);
226} 551}
227 552
553
228/* 554/*
229 * Arguments to a READ call. Since we read data directly into the page 555 * NFSv2 XDR encode functions
230 * cache, we also set up the reply iovec here so that iov[1] points 556 *
231 * exactly to the page we want to fetch. 557 * NFSv2 argument types are defined in section 2.2 of RFC 1094:
558 * "NFS: Network File System Protocol Specification".
232 */ 559 */
233static int 560
234nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) 561static void nfs2_xdr_enc_fhandle(struct rpc_rqst *req,
562 struct xdr_stream *xdr,
563 const struct nfs_fh *fh)
235{ 564{
236 struct rpc_auth *auth = req->rq_cred->cr_auth; 565 encode_fhandle(xdr, fh);
237 unsigned int replen; 566}
238 u32 offset = (u32)args->offset; 567
568/*
569 * 2.2.3. sattrargs
570 *
571 * struct sattrargs {
572 * fhandle file;
573 * sattr attributes;
574 * };
575 */
576static void nfs2_xdr_enc_sattrargs(struct rpc_rqst *req,
577 struct xdr_stream *xdr,
578 const struct nfs_sattrargs *args)
579{
580 encode_fhandle(xdr, args->fh);
581 encode_sattr(xdr, args->sattr);
582}
583
584static void nfs2_xdr_enc_diropargs(struct rpc_rqst *req,
585 struct xdr_stream *xdr,
586 const struct nfs_diropargs *args)
587{
588 encode_diropargs(xdr, args->fh, args->name, args->len);
589}
590
591static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req,
592 struct xdr_stream *xdr,
593 const struct nfs_readlinkargs *args)
594{
595 encode_fhandle(xdr, args->fh);
596 prepare_reply_buffer(req, args->pages, args->pgbase,
597 args->pglen, NFS_readlinkres_sz);
598}
599
600/*
601 * 2.2.7. readargs
602 *
603 * struct readargs {
604 * fhandle file;
605 * unsigned offset;
606 * unsigned count;
607 * unsigned totalcount;
608 * };
609 */
610static void encode_readargs(struct xdr_stream *xdr,
611 const struct nfs_readargs *args)
612{
613 u32 offset = args->offset;
239 u32 count = args->count; 614 u32 count = args->count;
615 __be32 *p;
240 616
241 p = xdr_encode_fhandle(p, args->fh); 617 encode_fhandle(xdr, args->fh);
242 *p++ = htonl(offset);
243 *p++ = htonl(count);
244 *p++ = htonl(count);
245 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
246 618
247 /* Inline the page array */ 619 p = xdr_reserve_space(xdr, 4 + 4 + 4);
248 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2; 620 *p++ = cpu_to_be32(offset);
249 xdr_inline_pages(&req->rq_rcv_buf, replen, 621 *p++ = cpu_to_be32(count);
250 args->pages, args->pgbase, count); 622 *p = cpu_to_be32(count);
623}
624
625static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
626 struct xdr_stream *xdr,
627 const struct nfs_readargs *args)
628{
629 encode_readargs(xdr, args);
630 prepare_reply_buffer(req, args->pages, args->pgbase,
631 args->count, NFS_readres_sz);
251 req->rq_rcv_buf.flags |= XDRBUF_READ; 632 req->rq_rcv_buf.flags |= XDRBUF_READ;
252 return 0;
253} 633}
254 634
255/* 635/*
256 * Decode READ reply 636 * 2.2.9. writeargs
637 *
638 * struct writeargs {
639 * fhandle file;
640 * unsigned beginoffset;
641 * unsigned offset;
642 * unsigned totalcount;
643 * nfsdata data;
644 * };
257 */ 645 */
258static int 646static void encode_writeargs(struct xdr_stream *xdr,
259nfs_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) 647 const struct nfs_writeargs *args)
260{ 648{
261 struct kvec *iov = req->rq_rcv_buf.head; 649 u32 offset = args->offset;
262 size_t hdrlen; 650 u32 count = args->count;
263 u32 count, recvd; 651 __be32 *p;
264 int status;
265
266 if ((status = ntohl(*p++)))
267 return nfs_stat_to_errno(status);
268 p = xdr_decode_fattr(p, res->fattr);
269
270 count = ntohl(*p++);
271 res->eof = 0;
272 hdrlen = (u8 *) p - (u8 *) iov->iov_base;
273 if (iov->iov_len < hdrlen) {
274 dprintk("NFS: READ reply header overflowed:"
275 "length %Zu > %Zu\n", hdrlen, iov->iov_len);
276 return -errno_NFSERR_IO;
277 } else if (iov->iov_len != hdrlen) {
278 dprintk("NFS: READ header is short. iovec will be shifted.\n");
279 xdr_shift_buf(&req->rq_rcv_buf, iov->iov_len - hdrlen);
280 }
281 652
282 recvd = req->rq_rcv_buf.len - hdrlen; 653 encode_fhandle(xdr, args->fh);
283 if (count > recvd) {
284 dprintk("NFS: server cheating in read reply: "
285 "count %u > recvd %u\n", count, recvd);
286 count = recvd;
287 }
288 654
289 dprintk("RPC: readres OK count %u\n", count); 655 p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4);
290 if (count < res->count) 656 *p++ = cpu_to_be32(offset);
291 res->count = count; 657 *p++ = cpu_to_be32(offset);
658 *p++ = cpu_to_be32(count);
292 659
293 return count; 660 /* nfsdata */
661 *p = cpu_to_be32(count);
662 xdr_write_pages(xdr, args->pages, args->pgbase, count);
294} 663}
295 664
665static void nfs2_xdr_enc_writeargs(struct rpc_rqst *req,
666 struct xdr_stream *xdr,
667 const struct nfs_writeargs *args)
668{
669 encode_writeargs(xdr, args);
670 xdr->buf->flags |= XDRBUF_WRITE;
671}
296 672
297/* 673/*
298 * Write arguments. Splice the buffer to be written into the iovec. 674 * 2.2.10. createargs
675 *
676 * struct createargs {
677 * diropargs where;
678 * sattr attributes;
679 * };
299 */ 680 */
300static int 681static void nfs2_xdr_enc_createargs(struct rpc_rqst *req,
301nfs_xdr_writeargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args) 682 struct xdr_stream *xdr,
683 const struct nfs_createargs *args)
302{ 684{
303 struct xdr_buf *sndbuf = &req->rq_snd_buf; 685 encode_diropargs(xdr, args->fh, args->name, args->len);
304 u32 offset = (u32)args->offset; 686 encode_sattr(xdr, args->sattr);
305 u32 count = args->count; 687}
306
307 p = xdr_encode_fhandle(p, args->fh);
308 *p++ = htonl(offset);
309 *p++ = htonl(offset);
310 *p++ = htonl(count);
311 *p++ = htonl(count);
312 sndbuf->len = xdr_adjust_iovec(sndbuf->head, p);
313 688
314 /* Copy the page array */ 689static void nfs2_xdr_enc_removeargs(struct rpc_rqst *req,
315 xdr_encode_pages(sndbuf, args->pages, args->pgbase, count); 690 struct xdr_stream *xdr,
316 sndbuf->flags |= XDRBUF_WRITE; 691 const struct nfs_removeargs *args)
317 return 0; 692{
693 encode_diropargs(xdr, args->fh, args->name.name, args->name.len);
318} 694}
319 695
320/* 696/*
321 * Encode create arguments 697 * 2.2.12. renameargs
322 * CREATE, MKDIR 698 *
699 * struct renameargs {
700 * diropargs from;
701 * diropargs to;
702 * };
323 */ 703 */
324static int 704static void nfs2_xdr_enc_renameargs(struct rpc_rqst *req,
325nfs_xdr_createargs(struct rpc_rqst *req, __be32 *p, struct nfs_createargs *args) 705 struct xdr_stream *xdr,
706 const struct nfs_renameargs *args)
326{ 707{
327 p = xdr_encode_fhandle(p, args->fh); 708 const struct qstr *old = args->old_name;
328 p = xdr_encode_array(p, args->name, args->len); 709 const struct qstr *new = args->new_name;
329 p = xdr_encode_sattr(p, args->sattr); 710
330 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 711 encode_diropargs(xdr, args->old_dir, old->name, old->len);
331 return 0; 712 encode_diropargs(xdr, args->new_dir, new->name, new->len);
332} 713}
333 714
334/* 715/*
335 * Encode RENAME arguments 716 * 2.2.13. linkargs
717 *
718 * struct linkargs {
719 * fhandle from;
720 * diropargs to;
721 * };
336 */ 722 */
337static int 723static void nfs2_xdr_enc_linkargs(struct rpc_rqst *req,
338nfs_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args) 724 struct xdr_stream *xdr,
725 const struct nfs_linkargs *args)
339{ 726{
340 p = xdr_encode_fhandle(p, args->old_dir); 727 encode_fhandle(xdr, args->fromfh);
341 p = xdr_encode_array(p, args->old_name->name, args->old_name->len); 728 encode_diropargs(xdr, args->tofh, args->toname, args->tolen);
342 p = xdr_encode_fhandle(p, args->new_dir);
343 p = xdr_encode_array(p, args->new_name->name, args->new_name->len);
344 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
345 return 0;
346} 729}
347 730
348/* 731/*
349 * Encode LINK arguments 732 * 2.2.14. symlinkargs
733 *
734 * struct symlinkargs {
735 * diropargs from;
736 * path to;
737 * sattr attributes;
738 * };
350 */ 739 */
351static int 740static void nfs2_xdr_enc_symlinkargs(struct rpc_rqst *req,
352nfs_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs_linkargs *args) 741 struct xdr_stream *xdr,
742 const struct nfs_symlinkargs *args)
353{ 743{
354 p = xdr_encode_fhandle(p, args->fromfh); 744 encode_diropargs(xdr, args->fromfh, args->fromname, args->fromlen);
355 p = xdr_encode_fhandle(p, args->tofh); 745 encode_path(xdr, args->pages, args->pathlen);
356 p = xdr_encode_array(p, args->toname, args->tolen); 746 encode_sattr(xdr, args->sattr);
357 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
358 return 0;
359} 747}
360 748
361/* 749/*
362 * Encode SYMLINK arguments 750 * 2.2.17. readdirargs
751 *
752 * struct readdirargs {
753 * fhandle dir;
754 * nfscookie cookie;
755 * unsigned count;
756 * };
363 */ 757 */
364static int 758static void encode_readdirargs(struct xdr_stream *xdr,
365nfs_xdr_symlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_symlinkargs *args) 759 const struct nfs_readdirargs *args)
366{ 760{
367 struct xdr_buf *sndbuf = &req->rq_snd_buf; 761 __be32 *p;
368 size_t pad;
369 762
370 p = xdr_encode_fhandle(p, args->fromfh); 763 encode_fhandle(xdr, args->fh);
371 p = xdr_encode_array(p, args->fromname, args->fromlen);
372 *p++ = htonl(args->pathlen);
373 sndbuf->len = xdr_adjust_iovec(sndbuf->head, p);
374 764
375 xdr_encode_pages(sndbuf, args->pages, 0, args->pathlen); 765 p = xdr_reserve_space(xdr, 4 + 4);
766 *p++ = cpu_to_be32(args->cookie);
767 *p = cpu_to_be32(args->count);
768}
376 769
377 /* 770static void nfs2_xdr_enc_readdirargs(struct rpc_rqst *req,
378 * xdr_encode_pages may have added a few bytes to ensure the 771 struct xdr_stream *xdr,
379 * pathname ends on a 4-byte boundary. Start encoding the 772 const struct nfs_readdirargs *args)
380 * attributes after the pad bytes. 773{
381 */ 774 encode_readdirargs(xdr, args);
382 pad = sndbuf->tail->iov_len; 775 prepare_reply_buffer(req, args->pages, 0,
383 if (pad > 0) 776 args->count, NFS_readdirres_sz);
384 p++;
385 p = xdr_encode_sattr(p, args->sattr);
386 sndbuf->len += xdr_adjust_iovec(sndbuf->tail, p) - pad;
387 return 0;
388} 777}
389 778
390/* 779/*
391 * Encode arguments to readdir call 780 * NFSv2 XDR decode functions
781 *
782 * NFSv2 result types are defined in section 2.2 of RFC 1094:
783 * "NFS: Network File System Protocol Specification".
392 */ 784 */
393static int 785
394nfs_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs_readdirargs *args) 786static int nfs2_xdr_dec_stat(struct rpc_rqst *req, struct xdr_stream *xdr,
787 void *__unused)
395{ 788{
396 struct rpc_auth *auth = req->rq_cred->cr_auth; 789 enum nfs_stat status;
397 unsigned int replen; 790 int error;
398 u32 count = args->count; 791
792 error = decode_stat(xdr, &status);
793 if (unlikely(error))
794 goto out;
795 if (status != NFS_OK)
796 goto out_default;
797out:
798 return error;
799out_default:
800 return nfs_stat_to_errno(status);
801}
399 802
400 p = xdr_encode_fhandle(p, args->fh); 803static int nfs2_xdr_dec_attrstat(struct rpc_rqst *req, struct xdr_stream *xdr,
401 *p++ = htonl(args->cookie); 804 struct nfs_fattr *result)
402 *p++ = htonl(count); /* see above */ 805{
403 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 806 return decode_attrstat(xdr, result);
807}
404 808
405 /* Inline the page array */ 809static int nfs2_xdr_dec_diropres(struct rpc_rqst *req, struct xdr_stream *xdr,
406 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readdirres_sz) << 2; 810 struct nfs_diropok *result)
407 xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, count); 811{
408 return 0; 812 return decode_diropres(xdr, result);
409} 813}
410 814
411/* 815/*
412 * Decode the result of a readdir call. 816 * 2.2.6. readlinkres
413 * We're not really decoding anymore, we just leave the buffer untouched 817 *
414 * and only check that it is syntactically correct. 818 * union readlinkres switch (stat status) {
415 * The real decoding happens in nfs_decode_entry below, called directly 819 * case NFS_OK:
416 * from nfs_readdir for each entry. 820 * path data;
821 * default:
822 * void;
823 * };
417 */ 824 */
418static int 825static int nfs2_xdr_dec_readlinkres(struct rpc_rqst *req,
419nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy) 826 struct xdr_stream *xdr, void *__unused)
420{ 827{
421 struct xdr_buf *rcvbuf = &req->rq_rcv_buf; 828 enum nfs_stat status;
422 struct kvec *iov = rcvbuf->head; 829 int error;
423 struct page **page; 830
424 size_t hdrlen; 831 error = decode_stat(xdr, &status);
425 unsigned int pglen, recvd; 832 if (unlikely(error))
426 int status; 833 goto out;
427 834 if (status != NFS_OK)
428 if ((status = ntohl(*p++))) 835 goto out_default;
429 return nfs_stat_to_errno(status); 836 error = decode_path(xdr);
430 837out:
431 hdrlen = (u8 *) p - (u8 *) iov->iov_base; 838 return error;
432 if (iov->iov_len < hdrlen) { 839out_default:
433 dprintk("NFS: READDIR reply header overflowed:" 840 return nfs_stat_to_errno(status);
434 "length %Zu > %Zu\n", hdrlen, iov->iov_len); 841}
435 return -errno_NFSERR_IO;
436 } else if (iov->iov_len != hdrlen) {
437 dprintk("NFS: READDIR header is short. iovec will be shifted.\n");
438 xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
439 }
440 842
441 pglen = rcvbuf->page_len; 843/*
442 recvd = rcvbuf->len - hdrlen; 844 * 2.2.7. readres
443 if (pglen > recvd) 845 *
444 pglen = recvd; 846 * union readres switch (stat status) {
445 page = rcvbuf->pages; 847 * case NFS_OK:
446 return pglen; 848 * fattr attributes;
849 * nfsdata data;
850 * default:
851 * void;
852 * };
853 */
854static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr,
855 struct nfs_readres *result)
856{
857 enum nfs_stat status;
858 int error;
859
860 error = decode_stat(xdr, &status);
861 if (unlikely(error))
862 goto out;
863 if (status != NFS_OK)
864 goto out_default;
865 error = decode_fattr(xdr, result->fattr);
866 if (unlikely(error))
867 goto out;
868 error = decode_nfsdata(xdr, result);
869out:
870 return error;
871out_default:
872 return nfs_stat_to_errno(status);
447} 873}
448 874
449static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) 875static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr,
876 struct nfs_writeres *result)
450{ 877{
451 dprintk("nfs: %s: prematurely hit end of receive buffer. " 878 /* All NFSv2 writes are "file sync" writes */
452 "Remaining buffer length is %tu words.\n", 879 result->verf->committed = NFS_FILE_SYNC;
453 func, xdr->end - xdr->p); 880 return decode_attrstat(xdr, result->fattr);
454} 881}
455 882
456__be32 * 883/**
457nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_server *server, int plus) 884 * nfs2_decode_dirent - Decode a single NFSv2 directory entry stored in
885 * the local page cache.
886 * @xdr: XDR stream where entry resides
887 * @entry: buffer to fill in with entry data
888 * @plus: boolean indicating whether this should be a readdirplus entry
889 *
890 * Returns zero if successful, otherwise a negative errno value is
891 * returned.
892 *
893 * This function is not invoked during READDIR reply decoding, but
894 * rather whenever an application invokes the getdents(2) system call
895 * on a directory already in our cache.
896 *
897 * 2.2.17. entry
898 *
899 * struct entry {
900 * unsigned fileid;
901 * filename name;
902 * nfscookie cookie;
903 * entry *nextentry;
904 * };
905 */
906int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
907 int plus)
458{ 908{
459 __be32 *p; 909 __be32 *p;
910 int error;
911
460 p = xdr_inline_decode(xdr, 4); 912 p = xdr_inline_decode(xdr, 4);
461 if (unlikely(!p)) 913 if (unlikely(p == NULL))
462 goto out_overflow; 914 goto out_overflow;
463 if (!ntohl(*p++)) { 915 if (*p++ == xdr_zero) {
464 p = xdr_inline_decode(xdr, 4); 916 p = xdr_inline_decode(xdr, 4);
465 if (unlikely(!p)) 917 if (unlikely(p == NULL))
466 goto out_overflow; 918 goto out_overflow;
467 if (!ntohl(*p++)) 919 if (*p++ == xdr_zero)
468 return ERR_PTR(-EAGAIN); 920 return -EAGAIN;
469 entry->eof = 1; 921 entry->eof = 1;
470 return ERR_PTR(-EBADCOOKIE); 922 return -EBADCOOKIE;
471 } 923 }
472 924
473 p = xdr_inline_decode(xdr, 8); 925 p = xdr_inline_decode(xdr, 4);
474 if (unlikely(!p)) 926 if (unlikely(p == NULL))
475 goto out_overflow; 927 goto out_overflow;
928 entry->ino = be32_to_cpup(p);
476 929
477 entry->ino = ntohl(*p++); 930 error = decode_filename_inline(xdr, &entry->name, &entry->len);
478 entry->len = ntohl(*p++); 931 if (unlikely(error))
932 return error;
479 933
480 p = xdr_inline_decode(xdr, entry->len + 4); 934 /*
481 if (unlikely(!p)) 935 * The type (size and byte order) of nfscookie isn't defined in
936 * RFC 1094. This implementation assumes that it's an XDR uint32.
937 */
938 entry->prev_cookie = entry->cookie;
939 p = xdr_inline_decode(xdr, 4);
940 if (unlikely(p == NULL))
482 goto out_overflow; 941 goto out_overflow;
483 entry->name = (const char *) p; 942 entry->cookie = be32_to_cpup(p);
484 p += XDR_QUADLEN(entry->len);
485 entry->prev_cookie = entry->cookie;
486 entry->cookie = ntohl(*p++);
487 943
488 entry->d_type = DT_UNKNOWN; 944 entry->d_type = DT_UNKNOWN;
489 945
490 p = xdr_inline_peek(xdr, 8); 946 return 0;
491 if (p != NULL)
492 entry->eof = !p[0] && p[1];
493 else
494 entry->eof = 0;
495
496 return p;
497 947
498out_overflow: 948out_overflow:
499 print_overflow_msg(__func__, xdr); 949 print_overflow_msg(__func__, xdr);
500 return ERR_PTR(-EAGAIN); 950 return -EAGAIN;
501}
502
503/*
504 * NFS XDR decode functions
505 */
506/*
507 * Decode simple status reply
508 */
509static int
510nfs_xdr_stat(struct rpc_rqst *req, __be32 *p, void *dummy)
511{
512 int status;
513
514 if ((status = ntohl(*p++)) != 0)
515 status = nfs_stat_to_errno(status);
516 return status;
517} 951}
518 952
519/* 953/*
520 * Decode attrstat reply 954 * 2.2.17. readdirres
521 * GETATTR, SETATTR, WRITE 955 *
522 */ 956 * union readdirres switch (stat status) {
523static int 957 * case NFS_OK:
524nfs_xdr_attrstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) 958 * struct {
525{ 959 * entry *entries;
526 int status; 960 * bool eof;
527 961 * } readdirok;
528 if ((status = ntohl(*p++))) 962 * default:
529 return nfs_stat_to_errno(status); 963 * void;
530 xdr_decode_fattr(p, fattr); 964 * };
531 return 0; 965 *
532} 966 * Read the directory contents into the page cache, but don't
533 967 * touch them. The actual decoding is done by nfs2_decode_dirent()
534/* 968 * during subsequent nfs_readdir() calls.
535 * Decode diropres reply
536 * LOOKUP, CREATE, MKDIR
537 */ 969 */
538static int 970static int decode_readdirok(struct xdr_stream *xdr)
539nfs_xdr_diropres(struct rpc_rqst *req, __be32 *p, struct nfs_diropok *res)
540{ 971{
541 int status; 972 u32 recvd, pglen;
973 size_t hdrlen;
542 974
543 if ((status = ntohl(*p++))) 975 pglen = xdr->buf->page_len;
544 return nfs_stat_to_errno(status); 976 hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
545 p = xdr_decode_fhandle(p, res->fh); 977 recvd = xdr->buf->len - hdrlen;
546 xdr_decode_fattr(p, res->fattr); 978 if (unlikely(pglen > recvd))
547 return 0; 979 goto out_cheating;
980out:
981 xdr_read_pages(xdr, pglen);
982 return pglen;
983out_cheating:
984 dprintk("NFS: server cheating in readdir result: "
985 "pglen %u > recvd %u\n", pglen, recvd);
986 pglen = recvd;
987 goto out;
548} 988}
549 989
550/* 990static int nfs2_xdr_dec_readdirres(struct rpc_rqst *req,
551 * Encode READLINK args 991 struct xdr_stream *xdr, void *__unused)
552 */
553static int
554nfs_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_readlinkargs *args)
555{ 992{
556 struct rpc_auth *auth = req->rq_cred->cr_auth; 993 enum nfs_stat status;
557 unsigned int replen; 994 int error;
558 995
559 p = xdr_encode_fhandle(p, args->fh); 996 error = decode_stat(xdr, &status);
560 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 997 if (unlikely(error))
561 998 goto out;
562 /* Inline the page array */ 999 if (status != NFS_OK)
563 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readlinkres_sz) << 2; 1000 goto out_default;
564 xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, args->pglen); 1001 error = decode_readdirok(xdr);
565 return 0; 1002out:
1003 return error;
1004out_default:
1005 return nfs_stat_to_errno(status);
566} 1006}
567 1007
568/* 1008/*
569 * Decode READLINK reply 1009 * 2.2.18. statfsres
1010 *
1011 * union statfsres (stat status) {
1012 * case NFS_OK:
1013 * struct {
1014 * unsigned tsize;
1015 * unsigned bsize;
1016 * unsigned blocks;
1017 * unsigned bfree;
1018 * unsigned bavail;
1019 * } info;
1020 * default:
1021 * void;
1022 * };
570 */ 1023 */
571static int 1024static int decode_info(struct xdr_stream *xdr, struct nfs2_fsstat *result)
572nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy)
573{ 1025{
574 struct xdr_buf *rcvbuf = &req->rq_rcv_buf; 1026 __be32 *p;
575 struct kvec *iov = rcvbuf->head;
576 size_t hdrlen;
577 u32 len, recvd;
578 int status;
579
580 if ((status = ntohl(*p++)))
581 return nfs_stat_to_errno(status);
582 /* Convert length of symlink */
583 len = ntohl(*p++);
584 if (len >= rcvbuf->page_len) {
585 dprintk("nfs: server returned giant symlink!\n");
586 return -ENAMETOOLONG;
587 }
588 hdrlen = (u8 *) p - (u8 *) iov->iov_base;
589 if (iov->iov_len < hdrlen) {
590 dprintk("NFS: READLINK reply header overflowed:"
591 "length %Zu > %Zu\n", hdrlen, iov->iov_len);
592 return -errno_NFSERR_IO;
593 } else if (iov->iov_len != hdrlen) {
594 dprintk("NFS: READLINK header is short. iovec will be shifted.\n");
595 xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
596 }
597 recvd = req->rq_rcv_buf.len - hdrlen;
598 if (recvd < len) {
599 dprintk("NFS: server cheating in readlink reply: "
600 "count %u > recvd %u\n", len, recvd);
601 return -EIO;
602 }
603 1027
604 xdr_terminate_string(rcvbuf, len); 1028 p = xdr_inline_decode(xdr, NFS_info_sz << 2);
1029 if (unlikely(p == NULL))
1030 goto out_overflow;
1031 result->tsize = be32_to_cpup(p++);
1032 result->bsize = be32_to_cpup(p++);
1033 result->blocks = be32_to_cpup(p++);
1034 result->bfree = be32_to_cpup(p++);
1035 result->bavail = be32_to_cpup(p);
605 return 0; 1036 return 0;
1037out_overflow:
1038 print_overflow_msg(__func__, xdr);
1039 return -EIO;
606} 1040}
607 1041
608/* 1042static int nfs2_xdr_dec_statfsres(struct rpc_rqst *req, struct xdr_stream *xdr,
609 * Decode WRITE reply 1043 struct nfs2_fsstat *result)
610 */
611static int
612nfs_xdr_writeres(struct rpc_rqst *req, __be32 *p, struct nfs_writeres *res)
613{ 1044{
614 res->verf->committed = NFS_FILE_SYNC; 1045 enum nfs_stat status;
615 return nfs_xdr_attrstat(req, p, res->fattr); 1046 int error;
1047
1048 error = decode_stat(xdr, &status);
1049 if (unlikely(error))
1050 goto out;
1051 if (status != NFS_OK)
1052 goto out_default;
1053 error = decode_info(xdr, result);
1054out:
1055 return error;
1056out_default:
1057 return nfs_stat_to_errno(status);
616} 1058}
617 1059
618/*
619 * Decode STATFS reply
620 */
621static int
622nfs_xdr_statfsres(struct rpc_rqst *req, __be32 *p, struct nfs2_fsstat *res)
623{
624 int status;
625
626 if ((status = ntohl(*p++)))
627 return nfs_stat_to_errno(status);
628
629 res->tsize = ntohl(*p++);
630 res->bsize = ntohl(*p++);
631 res->blocks = ntohl(*p++);
632 res->bfree = ntohl(*p++);
633 res->bavail = ntohl(*p++);
634 return 0;
635}
636 1060
637/* 1061/*
638 * We need to translate between nfs status return values and 1062 * We need to translate between nfs status return values and
639 * the local errno values which may not be the same. 1063 * the local errno values which may not be the same.
640 */ 1064 */
641static struct { 1065static const struct {
642 int stat; 1066 int stat;
643 int errno; 1067 int errno;
644} nfs_errtbl[] = { 1068} nfs_errtbl[] = {
@@ -678,28 +1102,30 @@ static struct {
678 { -1, -EIO } 1102 { -1, -EIO }
679}; 1103};
680 1104
681/* 1105/**
682 * Convert an NFS error code to a local one. 1106 * nfs_stat_to_errno - convert an NFS status code to a local errno
683 * This one is used jointly by NFSv2 and NFSv3. 1107 * @status: NFS status code to convert
1108 *
1109 * Returns a local errno value, or -EIO if the NFS status code is
1110 * not recognized. This function is used jointly by NFSv2 and NFSv3.
684 */ 1111 */
685int 1112int nfs_stat_to_errno(enum nfs_stat status)
686nfs_stat_to_errno(int stat)
687{ 1113{
688 int i; 1114 int i;
689 1115
690 for (i = 0; nfs_errtbl[i].stat != -1; i++) { 1116 for (i = 0; nfs_errtbl[i].stat != -1; i++) {
691 if (nfs_errtbl[i].stat == stat) 1117 if (nfs_errtbl[i].stat == (int)status)
692 return nfs_errtbl[i].errno; 1118 return nfs_errtbl[i].errno;
693 } 1119 }
694 dprintk("nfs_stat_to_errno: bad nfs status return value: %d\n", stat); 1120 dprintk("NFS: Unrecognized nfs status value: %u\n", status);
695 return nfs_errtbl[i].errno; 1121 return nfs_errtbl[i].errno;
696} 1122}
697 1123
698#define PROC(proc, argtype, restype, timer) \ 1124#define PROC(proc, argtype, restype, timer) \
699[NFSPROC_##proc] = { \ 1125[NFSPROC_##proc] = { \
700 .p_proc = NFSPROC_##proc, \ 1126 .p_proc = NFSPROC_##proc, \
701 .p_encode = (kxdrproc_t) nfs_xdr_##argtype, \ 1127 .p_encode = (kxdreproc_t)nfs2_xdr_enc_##argtype, \
702 .p_decode = (kxdrproc_t) nfs_xdr_##restype, \ 1128 .p_decode = (kxdrdproc_t)nfs2_xdr_dec_##restype, \
703 .p_arglen = NFS_##argtype##_sz, \ 1129 .p_arglen = NFS_##argtype##_sz, \
704 .p_replen = NFS_##restype##_sz, \ 1130 .p_replen = NFS_##restype##_sz, \
705 .p_timer = timer, \ 1131 .p_timer = timer, \
@@ -707,21 +1133,21 @@ nfs_stat_to_errno(int stat)
707 .p_name = #proc, \ 1133 .p_name = #proc, \
708 } 1134 }
709struct rpc_procinfo nfs_procedures[] = { 1135struct rpc_procinfo nfs_procedures[] = {
710 PROC(GETATTR, fhandle, attrstat, 1), 1136 PROC(GETATTR, fhandle, attrstat, 1),
711 PROC(SETATTR, sattrargs, attrstat, 0), 1137 PROC(SETATTR, sattrargs, attrstat, 0),
712 PROC(LOOKUP, diropargs, diropres, 2), 1138 PROC(LOOKUP, diropargs, diropres, 2),
713 PROC(READLINK, readlinkargs, readlinkres, 3), 1139 PROC(READLINK, readlinkargs, readlinkres, 3),
714 PROC(READ, readargs, readres, 3), 1140 PROC(READ, readargs, readres, 3),
715 PROC(WRITE, writeargs, writeres, 4), 1141 PROC(WRITE, writeargs, writeres, 4),
716 PROC(CREATE, createargs, diropres, 0), 1142 PROC(CREATE, createargs, diropres, 0),
717 PROC(REMOVE, removeargs, stat, 0), 1143 PROC(REMOVE, removeargs, stat, 0),
718 PROC(RENAME, renameargs, stat, 0), 1144 PROC(RENAME, renameargs, stat, 0),
719 PROC(LINK, linkargs, stat, 0), 1145 PROC(LINK, linkargs, stat, 0),
720 PROC(SYMLINK, symlinkargs, stat, 0), 1146 PROC(SYMLINK, symlinkargs, stat, 0),
721 PROC(MKDIR, createargs, diropres, 0), 1147 PROC(MKDIR, createargs, diropres, 0),
722 PROC(RMDIR, diropargs, stat, 0), 1148 PROC(RMDIR, diropargs, stat, 0),
723 PROC(READDIR, readdirargs, readdirres, 3), 1149 PROC(READDIR, readdirargs, readdirres, 3),
724 PROC(STATFS, fhandle, statfsres, 0), 1150 PROC(STATFS, fhandle, statfsres, 0),
725}; 1151};
726 1152
727struct rpc_version nfs_version2 = { 1153struct rpc_version nfs_version2 = {
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index f6cc60f06dac..01c5e8b1941d 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -37,18 +37,16 @@
37#define NFS3_filename_sz (1+(NFS3_MAXNAMLEN>>2)) 37#define NFS3_filename_sz (1+(NFS3_MAXNAMLEN>>2))
38#define NFS3_path_sz (1+(NFS3_MAXPATHLEN>>2)) 38#define NFS3_path_sz (1+(NFS3_MAXPATHLEN>>2))
39#define NFS3_fattr_sz (21) 39#define NFS3_fattr_sz (21)
40#define NFS3_wcc_attr_sz (6) 40#define NFS3_cookieverf_sz (NFS3_COOKIEVERFSIZE>>2)
41#define NFS3_wcc_attr_sz (6)
41#define NFS3_pre_op_attr_sz (1+NFS3_wcc_attr_sz) 42#define NFS3_pre_op_attr_sz (1+NFS3_wcc_attr_sz)
42#define NFS3_post_op_attr_sz (1+NFS3_fattr_sz) 43#define NFS3_post_op_attr_sz (1+NFS3_fattr_sz)
43#define NFS3_wcc_data_sz (NFS3_pre_op_attr_sz+NFS3_post_op_attr_sz) 44#define NFS3_wcc_data_sz (NFS3_pre_op_attr_sz+NFS3_post_op_attr_sz)
44#define NFS3_fsstat_sz
45#define NFS3_fsinfo_sz
46#define NFS3_pathconf_sz
47#define NFS3_entry_sz (NFS3_filename_sz+3)
48
49#define NFS3_sattrargs_sz (NFS3_fh_sz+NFS3_sattr_sz+3)
50#define NFS3_diropargs_sz (NFS3_fh_sz+NFS3_filename_sz) 45#define NFS3_diropargs_sz (NFS3_fh_sz+NFS3_filename_sz)
51#define NFS3_removeargs_sz (NFS3_fh_sz+NFS3_filename_sz) 46
47#define NFS3_getattrargs_sz (NFS3_fh_sz)
48#define NFS3_setattrargs_sz (NFS3_fh_sz+NFS3_sattr_sz+3)
49#define NFS3_lookupargs_sz (NFS3_fh_sz+NFS3_filename_sz)
52#define NFS3_accessargs_sz (NFS3_fh_sz+1) 50#define NFS3_accessargs_sz (NFS3_fh_sz+1)
53#define NFS3_readlinkargs_sz (NFS3_fh_sz) 51#define NFS3_readlinkargs_sz (NFS3_fh_sz)
54#define NFS3_readargs_sz (NFS3_fh_sz+3) 52#define NFS3_readargs_sz (NFS3_fh_sz+3)
@@ -57,14 +55,16 @@
57#define NFS3_mkdirargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz) 55#define NFS3_mkdirargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz)
58#define NFS3_symlinkargs_sz (NFS3_diropargs_sz+1+NFS3_sattr_sz) 56#define NFS3_symlinkargs_sz (NFS3_diropargs_sz+1+NFS3_sattr_sz)
59#define NFS3_mknodargs_sz (NFS3_diropargs_sz+2+NFS3_sattr_sz) 57#define NFS3_mknodargs_sz (NFS3_diropargs_sz+2+NFS3_sattr_sz)
58#define NFS3_removeargs_sz (NFS3_fh_sz+NFS3_filename_sz)
60#define NFS3_renameargs_sz (NFS3_diropargs_sz+NFS3_diropargs_sz) 59#define NFS3_renameargs_sz (NFS3_diropargs_sz+NFS3_diropargs_sz)
61#define NFS3_linkargs_sz (NFS3_fh_sz+NFS3_diropargs_sz) 60#define NFS3_linkargs_sz (NFS3_fh_sz+NFS3_diropargs_sz)
62#define NFS3_readdirargs_sz (NFS3_fh_sz+2) 61#define NFS3_readdirargs_sz (NFS3_fh_sz+NFS3_cookieverf_sz+3)
62#define NFS3_readdirplusargs_sz (NFS3_fh_sz+NFS3_cookieverf_sz+4)
63#define NFS3_commitargs_sz (NFS3_fh_sz+3) 63#define NFS3_commitargs_sz (NFS3_fh_sz+3)
64 64
65#define NFS3_attrstat_sz (1+NFS3_fattr_sz) 65#define NFS3_getattrres_sz (1+NFS3_fattr_sz)
66#define NFS3_wccstat_sz (1+NFS3_wcc_data_sz) 66#define NFS3_setattrres_sz (1+NFS3_wcc_data_sz)
67#define NFS3_removeres_sz (NFS3_wccstat_sz) 67#define NFS3_removeres_sz (NFS3_setattrres_sz)
68#define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz)) 68#define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz))
69#define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1) 69#define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1)
70#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1) 70#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1)
@@ -100,1079 +100,2362 @@ static const umode_t nfs_type2fmt[] = {
100 [NF3FIFO] = S_IFIFO, 100 [NF3FIFO] = S_IFIFO,
101}; 101};
102 102
103/*
104 * While encoding arguments, set up the reply buffer in advance to
105 * receive reply data directly into the page cache.
106 */
107static void prepare_reply_buffer(struct rpc_rqst *req, struct page **pages,
108 unsigned int base, unsigned int len,
109 unsigned int bufsize)
110{
111 struct rpc_auth *auth = req->rq_cred->cr_auth;
112 unsigned int replen;
113
114 replen = RPC_REPHDRSIZE + auth->au_rslack + bufsize;
115 xdr_inline_pages(&req->rq_rcv_buf, replen << 2, pages, base, len);
116}
117
118/*
119 * Handle decode buffer overflows out-of-line.
120 */
103static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) 121static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
104{ 122{
105 dprintk("nfs: %s: prematurely hit end of receive buffer. " 123 dprintk("NFS: %s prematurely hit the end of our receive buffer. "
106 "Remaining buffer length is %tu words.\n", 124 "Remaining buffer length is %tu words.\n",
107 func, xdr->end - xdr->p); 125 func, xdr->end - xdr->p);
108} 126}
109 127
128
110/* 129/*
111 * Common NFS XDR functions as inlines 130 * Encode/decode NFSv3 basic data types
131 *
132 * Basic NFSv3 data types are defined in section 2.5 of RFC 1813:
133 * "NFS Version 3 Protocol Specification".
134 *
135 * Not all basic data types have their own encoding and decoding
136 * functions. For run-time efficiency, some data types are encoded
137 * or decoded inline.
112 */ 138 */
113static inline __be32 * 139
114xdr_encode_fhandle(__be32 *p, const struct nfs_fh *fh) 140static void encode_uint32(struct xdr_stream *xdr, u32 value)
115{ 141{
116 return xdr_encode_array(p, fh->data, fh->size); 142 __be32 *p = xdr_reserve_space(xdr, 4);
143 *p = cpu_to_be32(value);
117} 144}
118 145
119static inline __be32 * 146static int decode_uint32(struct xdr_stream *xdr, u32 *value)
120xdr_decode_fhandle(__be32 *p, struct nfs_fh *fh)
121{ 147{
122 if ((fh->size = ntohl(*p++)) <= NFS3_FHSIZE) { 148 __be32 *p;
123 memcpy(fh->data, p, fh->size); 149
124 return p + XDR_QUADLEN(fh->size); 150 p = xdr_inline_decode(xdr, 4);
125 } 151 if (unlikely(p == NULL))
126 return NULL; 152 goto out_overflow;
153 *value = be32_to_cpup(p);
154 return 0;
155out_overflow:
156 print_overflow_msg(__func__, xdr);
157 return -EIO;
158}
159
160static int decode_uint64(struct xdr_stream *xdr, u64 *value)
161{
162 __be32 *p;
163
164 p = xdr_inline_decode(xdr, 8);
165 if (unlikely(p == NULL))
166 goto out_overflow;
167 xdr_decode_hyper(p, value);
168 return 0;
169out_overflow:
170 print_overflow_msg(__func__, xdr);
171 return -EIO;
172}
173
174/*
175 * fileid3
176 *
177 * typedef uint64 fileid3;
178 */
179static __be32 *xdr_decode_fileid3(__be32 *p, u64 *fileid)
180{
181 return xdr_decode_hyper(p, fileid);
182}
183
184static int decode_fileid3(struct xdr_stream *xdr, u64 *fileid)
185{
186 return decode_uint64(xdr, fileid);
187}
188
189/*
190 * filename3
191 *
192 * typedef string filename3<>;
193 */
194static void encode_filename3(struct xdr_stream *xdr,
195 const char *name, u32 length)
196{
197 __be32 *p;
198
199 BUG_ON(length > NFS3_MAXNAMLEN);
200 p = xdr_reserve_space(xdr, 4 + length);
201 xdr_encode_opaque(p, name, length);
127} 202}
128 203
129static inline __be32 * 204static int decode_inline_filename3(struct xdr_stream *xdr,
130xdr_decode_fhandle_stream(struct xdr_stream *xdr, struct nfs_fh *fh) 205 const char **name, u32 *length)
131{ 206{
132 __be32 *p; 207 __be32 *p;
208 u32 count;
209
133 p = xdr_inline_decode(xdr, 4); 210 p = xdr_inline_decode(xdr, 4);
134 if (unlikely(!p)) 211 if (unlikely(p == NULL))
212 goto out_overflow;
213 count = be32_to_cpup(p);
214 if (count > NFS3_MAXNAMLEN)
215 goto out_nametoolong;
216 p = xdr_inline_decode(xdr, count);
217 if (unlikely(p == NULL))
135 goto out_overflow; 218 goto out_overflow;
136 fh->size = ntohl(*p++); 219 *name = (const char *)p;
220 *length = count;
221 return 0;
137 222
138 if (fh->size <= NFS3_FHSIZE) { 223out_nametoolong:
139 p = xdr_inline_decode(xdr, fh->size); 224 dprintk("NFS: returned filename too long: %u\n", count);
140 if (unlikely(!p)) 225 return -ENAMETOOLONG;
141 goto out_overflow; 226out_overflow:
142 memcpy(fh->data, p, fh->size); 227 print_overflow_msg(__func__, xdr);
143 return p + XDR_QUADLEN(fh->size); 228 return -EIO;
144 } 229}
145 return NULL; 230
231/*
232 * nfspath3
233 *
234 * typedef string nfspath3<>;
235 */
236static void encode_nfspath3(struct xdr_stream *xdr, struct page **pages,
237 const u32 length)
238{
239 BUG_ON(length > NFS3_MAXPATHLEN);
240 encode_uint32(xdr, length);
241 xdr_write_pages(xdr, pages, 0, length);
242}
146 243
244static int decode_nfspath3(struct xdr_stream *xdr)
245{
246 u32 recvd, count;
247 size_t hdrlen;
248 __be32 *p;
249
250 p = xdr_inline_decode(xdr, 4);
251 if (unlikely(p == NULL))
252 goto out_overflow;
253 count = be32_to_cpup(p);
254 if (unlikely(count >= xdr->buf->page_len || count > NFS3_MAXPATHLEN))
255 goto out_nametoolong;
256 hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
257 recvd = xdr->buf->len - hdrlen;
258 if (unlikely(count > recvd))
259 goto out_cheating;
260
261 xdr_read_pages(xdr, count);
262 xdr_terminate_string(xdr->buf, count);
263 return 0;
264
265out_nametoolong:
266 dprintk("NFS: returned pathname too long: %u\n", count);
267 return -ENAMETOOLONG;
268out_cheating:
269 dprintk("NFS: server cheating in pathname result: "
270 "count %u > recvd %u\n", count, recvd);
271 return -EIO;
147out_overflow: 272out_overflow:
148 print_overflow_msg(__func__, xdr); 273 print_overflow_msg(__func__, xdr);
149 return ERR_PTR(-EIO); 274 return -EIO;
150} 275}
151 276
152/* 277/*
153 * Encode/decode time. 278 * cookie3
279 *
280 * typedef uint64 cookie3
154 */ 281 */
155static inline __be32 * 282static __be32 *xdr_encode_cookie3(__be32 *p, u64 cookie)
156xdr_encode_time3(__be32 *p, struct timespec *timep)
157{ 283{
158 *p++ = htonl(timep->tv_sec); 284 return xdr_encode_hyper(p, cookie);
159 *p++ = htonl(timep->tv_nsec);
160 return p;
161} 285}
162 286
163static inline __be32 * 287static int decode_cookie3(struct xdr_stream *xdr, u64 *cookie)
164xdr_decode_time3(__be32 *p, struct timespec *timep)
165{ 288{
166 timep->tv_sec = ntohl(*p++); 289 return decode_uint64(xdr, cookie);
167 timep->tv_nsec = ntohl(*p++); 290}
168 return p; 291
292/*
293 * cookieverf3
294 *
295 * typedef opaque cookieverf3[NFS3_COOKIEVERFSIZE];
296 */
297static __be32 *xdr_encode_cookieverf3(__be32 *p, const __be32 *verifier)
298{
299 memcpy(p, verifier, NFS3_COOKIEVERFSIZE);
300 return p + XDR_QUADLEN(NFS3_COOKIEVERFSIZE);
301}
302
303static int decode_cookieverf3(struct xdr_stream *xdr, __be32 *verifier)
304{
305 __be32 *p;
306
307 p = xdr_inline_decode(xdr, NFS3_COOKIEVERFSIZE);
308 if (unlikely(p == NULL))
309 goto out_overflow;
310 memcpy(verifier, p, NFS3_COOKIEVERFSIZE);
311 return 0;
312out_overflow:
313 print_overflow_msg(__func__, xdr);
314 return -EIO;
315}
316
317/*
318 * createverf3
319 *
320 * typedef opaque createverf3[NFS3_CREATEVERFSIZE];
321 */
322static void encode_createverf3(struct xdr_stream *xdr, const __be32 *verifier)
323{
324 __be32 *p;
325
326 p = xdr_reserve_space(xdr, NFS3_CREATEVERFSIZE);
327 memcpy(p, verifier, NFS3_CREATEVERFSIZE);
328}
329
330static int decode_writeverf3(struct xdr_stream *xdr, __be32 *verifier)
331{
332 __be32 *p;
333
334 p = xdr_inline_decode(xdr, NFS3_WRITEVERFSIZE);
335 if (unlikely(p == NULL))
336 goto out_overflow;
337 memcpy(verifier, p, NFS3_WRITEVERFSIZE);
338 return 0;
339out_overflow:
340 print_overflow_msg(__func__, xdr);
341 return -EIO;
342}
343
344/*
345 * size3
346 *
347 * typedef uint64 size3;
348 */
349static __be32 *xdr_decode_size3(__be32 *p, u64 *size)
350{
351 return xdr_decode_hyper(p, size);
352}
353
354/*
355 * nfsstat3
356 *
357 * enum nfsstat3 {
358 * NFS3_OK = 0,
359 * ...
360 * }
361 */
362#define NFS3_OK NFS_OK
363
364static int decode_nfsstat3(struct xdr_stream *xdr, enum nfs_stat *status)
365{
366 __be32 *p;
367
368 p = xdr_inline_decode(xdr, 4);
369 if (unlikely(p == NULL))
370 goto out_overflow;
371 *status = be32_to_cpup(p);
372 return 0;
373out_overflow:
374 print_overflow_msg(__func__, xdr);
375 return -EIO;
376}
377
378/*
379 * ftype3
380 *
381 * enum ftype3 {
382 * NF3REG = 1,
383 * NF3DIR = 2,
384 * NF3BLK = 3,
385 * NF3CHR = 4,
386 * NF3LNK = 5,
387 * NF3SOCK = 6,
388 * NF3FIFO = 7
389 * };
390 */
391static void encode_ftype3(struct xdr_stream *xdr, const u32 type)
392{
393 BUG_ON(type > NF3FIFO);
394 encode_uint32(xdr, type);
169} 395}
170 396
171static __be32 * 397static __be32 *xdr_decode_ftype3(__be32 *p, umode_t *mode)
172xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr)
173{ 398{
174 unsigned int type, major, minor; 399 u32 type;
175 umode_t fmode;
176 400
177 type = ntohl(*p++); 401 type = be32_to_cpup(p++);
178 if (type > NF3FIFO) 402 if (type > NF3FIFO)
179 type = NF3NON; 403 type = NF3NON;
180 fmode = nfs_type2fmt[type]; 404 *mode = nfs_type2fmt[type];
181 fattr->mode = (ntohl(*p++) & ~S_IFMT) | fmode; 405 return p;
182 fattr->nlink = ntohl(*p++); 406}
183 fattr->uid = ntohl(*p++);
184 fattr->gid = ntohl(*p++);
185 p = xdr_decode_hyper(p, &fattr->size);
186 p = xdr_decode_hyper(p, &fattr->du.nfs3.used);
187
188 /* Turn remote device info into Linux-specific dev_t */
189 major = ntohl(*p++);
190 minor = ntohl(*p++);
191 fattr->rdev = MKDEV(major, minor);
192 if (MAJOR(fattr->rdev) != major || MINOR(fattr->rdev) != minor)
193 fattr->rdev = 0;
194 407
195 p = xdr_decode_hyper(p, &fattr->fsid.major); 408/*
196 fattr->fsid.minor = 0; 409 * specdata3
197 p = xdr_decode_hyper(p, &fattr->fileid); 410 *
198 p = xdr_decode_time3(p, &fattr->atime); 411 * struct specdata3 {
199 p = xdr_decode_time3(p, &fattr->mtime); 412 * uint32 specdata1;
200 p = xdr_decode_time3(p, &fattr->ctime); 413 * uint32 specdata2;
414 * };
415 */
416static void encode_specdata3(struct xdr_stream *xdr, const dev_t rdev)
417{
418 __be32 *p;
201 419
202 /* Update the mode bits */ 420 p = xdr_reserve_space(xdr, 8);
203 fattr->valid |= NFS_ATTR_FATTR_V3; 421 *p++ = cpu_to_be32(MAJOR(rdev));
422 *p = cpu_to_be32(MINOR(rdev));
423}
424
425static __be32 *xdr_decode_specdata3(__be32 *p, dev_t *rdev)
426{
427 unsigned int major, minor;
428
429 major = be32_to_cpup(p++);
430 minor = be32_to_cpup(p++);
431 *rdev = MKDEV(major, minor);
432 if (MAJOR(*rdev) != major || MINOR(*rdev) != minor)
433 *rdev = 0;
434 return p;
435}
436
437/*
438 * nfs_fh3
439 *
440 * struct nfs_fh3 {
441 * opaque data<NFS3_FHSIZE>;
442 * };
443 */
444static void encode_nfs_fh3(struct xdr_stream *xdr, const struct nfs_fh *fh)
445{
446 __be32 *p;
447
448 BUG_ON(fh->size > NFS3_FHSIZE);
449 p = xdr_reserve_space(xdr, 4 + fh->size);
450 xdr_encode_opaque(p, fh->data, fh->size);
451}
452
453static int decode_nfs_fh3(struct xdr_stream *xdr, struct nfs_fh *fh)
454{
455 u32 length;
456 __be32 *p;
457
458 p = xdr_inline_decode(xdr, 4);
459 if (unlikely(p == NULL))
460 goto out_overflow;
461 length = be32_to_cpup(p++);
462 if (unlikely(length > NFS3_FHSIZE))
463 goto out_toobig;
464 p = xdr_inline_decode(xdr, length);
465 if (unlikely(p == NULL))
466 goto out_overflow;
467 fh->size = length;
468 memcpy(fh->data, p, length);
469 return 0;
470out_toobig:
471 dprintk("NFS: file handle size (%u) too big\n", length);
472 return -E2BIG;
473out_overflow:
474 print_overflow_msg(__func__, xdr);
475 return -EIO;
476}
477
478static void zero_nfs_fh3(struct nfs_fh *fh)
479{
480 memset(fh, 0, sizeof(*fh));
481}
482
483/*
484 * nfstime3
485 *
486 * struct nfstime3 {
487 * uint32 seconds;
488 * uint32 nseconds;
489 * };
490 */
491static __be32 *xdr_encode_nfstime3(__be32 *p, const struct timespec *timep)
492{
493 *p++ = cpu_to_be32(timep->tv_sec);
494 *p++ = cpu_to_be32(timep->tv_nsec);
204 return p; 495 return p;
205} 496}
206 497
207static inline __be32 * 498static __be32 *xdr_decode_nfstime3(__be32 *p, struct timespec *timep)
208xdr_encode_sattr(__be32 *p, struct iattr *attr)
209{ 499{
500 timep->tv_sec = be32_to_cpup(p++);
501 timep->tv_nsec = be32_to_cpup(p++);
502 return p;
503}
504
505/*
506 * sattr3
507 *
508 * enum time_how {
509 * DONT_CHANGE = 0,
510 * SET_TO_SERVER_TIME = 1,
511 * SET_TO_CLIENT_TIME = 2
512 * };
513 *
514 * union set_mode3 switch (bool set_it) {
515 * case TRUE:
516 * mode3 mode;
517 * default:
518 * void;
519 * };
520 *
521 * union set_uid3 switch (bool set_it) {
522 * case TRUE:
523 * uid3 uid;
524 * default:
525 * void;
526 * };
527 *
528 * union set_gid3 switch (bool set_it) {
529 * case TRUE:
530 * gid3 gid;
531 * default:
532 * void;
533 * };
534 *
535 * union set_size3 switch (bool set_it) {
536 * case TRUE:
537 * size3 size;
538 * default:
539 * void;
540 * };
541 *
542 * union set_atime switch (time_how set_it) {
543 * case SET_TO_CLIENT_TIME:
544 * nfstime3 atime;
545 * default:
546 * void;
547 * };
548 *
549 * union set_mtime switch (time_how set_it) {
550 * case SET_TO_CLIENT_TIME:
551 * nfstime3 mtime;
552 * default:
553 * void;
554 * };
555 *
556 * struct sattr3 {
557 * set_mode3 mode;
558 * set_uid3 uid;
559 * set_gid3 gid;
560 * set_size3 size;
561 * set_atime atime;
562 * set_mtime mtime;
563 * };
564 */
565static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
566{
567 u32 nbytes;
568 __be32 *p;
569
570 /*
571 * In order to make only a single xdr_reserve_space() call,
572 * pre-compute the total number of bytes to be reserved.
573 * Six boolean values, one for each set_foo field, are always
574 * present in the encoded result, so start there.
575 */
576 nbytes = 6 * 4;
577 if (attr->ia_valid & ATTR_MODE)
578 nbytes += 4;
579 if (attr->ia_valid & ATTR_UID)
580 nbytes += 4;
581 if (attr->ia_valid & ATTR_GID)
582 nbytes += 4;
583 if (attr->ia_valid & ATTR_SIZE)
584 nbytes += 8;
585 if (attr->ia_valid & ATTR_ATIME_SET)
586 nbytes += 8;
587 if (attr->ia_valid & ATTR_MTIME_SET)
588 nbytes += 8;
589 p = xdr_reserve_space(xdr, nbytes);
590
210 if (attr->ia_valid & ATTR_MODE) { 591 if (attr->ia_valid & ATTR_MODE) {
211 *p++ = xdr_one; 592 *p++ = xdr_one;
212 *p++ = htonl(attr->ia_mode & S_IALLUGO); 593 *p++ = cpu_to_be32(attr->ia_mode & S_IALLUGO);
213 } else { 594 } else
214 *p++ = xdr_zero; 595 *p++ = xdr_zero;
215 } 596
216 if (attr->ia_valid & ATTR_UID) { 597 if (attr->ia_valid & ATTR_UID) {
217 *p++ = xdr_one; 598 *p++ = xdr_one;
218 *p++ = htonl(attr->ia_uid); 599 *p++ = cpu_to_be32(attr->ia_uid);
219 } else { 600 } else
220 *p++ = xdr_zero; 601 *p++ = xdr_zero;
221 } 602
222 if (attr->ia_valid & ATTR_GID) { 603 if (attr->ia_valid & ATTR_GID) {
223 *p++ = xdr_one; 604 *p++ = xdr_one;
224 *p++ = htonl(attr->ia_gid); 605 *p++ = cpu_to_be32(attr->ia_gid);
225 } else { 606 } else
226 *p++ = xdr_zero; 607 *p++ = xdr_zero;
227 } 608
228 if (attr->ia_valid & ATTR_SIZE) { 609 if (attr->ia_valid & ATTR_SIZE) {
229 *p++ = xdr_one; 610 *p++ = xdr_one;
230 p = xdr_encode_hyper(p, (__u64) attr->ia_size); 611 p = xdr_encode_hyper(p, (u64)attr->ia_size);
231 } else { 612 } else
232 *p++ = xdr_zero; 613 *p++ = xdr_zero;
233 } 614
234 if (attr->ia_valid & ATTR_ATIME_SET) { 615 if (attr->ia_valid & ATTR_ATIME_SET) {
235 *p++ = xdr_two; 616 *p++ = xdr_two;
236 p = xdr_encode_time3(p, &attr->ia_atime); 617 p = xdr_encode_nfstime3(p, &attr->ia_atime);
237 } else if (attr->ia_valid & ATTR_ATIME) { 618 } else if (attr->ia_valid & ATTR_ATIME) {
238 *p++ = xdr_one; 619 *p++ = xdr_one;
239 } else { 620 } else
240 *p++ = xdr_zero; 621 *p++ = xdr_zero;
241 } 622
242 if (attr->ia_valid & ATTR_MTIME_SET) { 623 if (attr->ia_valid & ATTR_MTIME_SET) {
243 *p++ = xdr_two; 624 *p++ = xdr_two;
244 p = xdr_encode_time3(p, &attr->ia_mtime); 625 xdr_encode_nfstime3(p, &attr->ia_mtime);
245 } else if (attr->ia_valid & ATTR_MTIME) { 626 } else if (attr->ia_valid & ATTR_MTIME) {
246 *p++ = xdr_one; 627 *p = xdr_one;
247 } else { 628 } else
248 *p++ = xdr_zero; 629 *p = xdr_zero;
249 } 630}
250 return p; 631
632/*
633 * fattr3
634 *
635 * struct fattr3 {
636 * ftype3 type;
637 * mode3 mode;
638 * uint32 nlink;
639 * uid3 uid;
640 * gid3 gid;
641 * size3 size;
642 * size3 used;
643 * specdata3 rdev;
644 * uint64 fsid;
645 * fileid3 fileid;
646 * nfstime3 atime;
647 * nfstime3 mtime;
648 * nfstime3 ctime;
649 * };
650 */
651static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
652{
653 umode_t fmode;
654 __be32 *p;
655
656 p = xdr_inline_decode(xdr, NFS3_fattr_sz << 2);
657 if (unlikely(p == NULL))
658 goto out_overflow;
659
660 p = xdr_decode_ftype3(p, &fmode);
661
662 fattr->mode = (be32_to_cpup(p++) & ~S_IFMT) | fmode;
663 fattr->nlink = be32_to_cpup(p++);
664 fattr->uid = be32_to_cpup(p++);
665 fattr->gid = be32_to_cpup(p++);
666
667 p = xdr_decode_size3(p, &fattr->size);
668 p = xdr_decode_size3(p, &fattr->du.nfs3.used);
669 p = xdr_decode_specdata3(p, &fattr->rdev);
670
671 p = xdr_decode_hyper(p, &fattr->fsid.major);
672 fattr->fsid.minor = 0;
673
674 p = xdr_decode_fileid3(p, &fattr->fileid);
675 p = xdr_decode_nfstime3(p, &fattr->atime);
676 p = xdr_decode_nfstime3(p, &fattr->mtime);
677 xdr_decode_nfstime3(p, &fattr->ctime);
678
679 fattr->valid |= NFS_ATTR_FATTR_V3;
680 return 0;
681out_overflow:
682 print_overflow_msg(__func__, xdr);
683 return -EIO;
251} 684}
252 685
253static inline __be32 * 686/*
254xdr_decode_wcc_attr(__be32 *p, struct nfs_fattr *fattr) 687 * post_op_attr
688 *
689 * union post_op_attr switch (bool attributes_follow) {
690 * case TRUE:
691 * fattr3 attributes;
692 * case FALSE:
693 * void;
694 * };
695 */
696static int decode_post_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
255{ 697{
256 p = xdr_decode_hyper(p, &fattr->pre_size); 698 __be32 *p;
257 p = xdr_decode_time3(p, &fattr->pre_mtime); 699
258 p = xdr_decode_time3(p, &fattr->pre_ctime); 700 p = xdr_inline_decode(xdr, 4);
701 if (unlikely(p == NULL))
702 goto out_overflow;
703 if (*p != xdr_zero)
704 return decode_fattr3(xdr, fattr);
705 return 0;
706out_overflow:
707 print_overflow_msg(__func__, xdr);
708 return -EIO;
709}
710
711/*
712 * wcc_attr
713 * struct wcc_attr {
714 * size3 size;
715 * nfstime3 mtime;
716 * nfstime3 ctime;
717 * };
718 */
719static int decode_wcc_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
720{
721 __be32 *p;
722
723 p = xdr_inline_decode(xdr, NFS3_wcc_attr_sz << 2);
724 if (unlikely(p == NULL))
725 goto out_overflow;
726
259 fattr->valid |= NFS_ATTR_FATTR_PRESIZE 727 fattr->valid |= NFS_ATTR_FATTR_PRESIZE
260 | NFS_ATTR_FATTR_PREMTIME 728 | NFS_ATTR_FATTR_PREMTIME
261 | NFS_ATTR_FATTR_PRECTIME; 729 | NFS_ATTR_FATTR_PRECTIME;
262 return p;
263}
264 730
265static inline __be32 * 731 p = xdr_decode_size3(p, &fattr->pre_size);
266xdr_decode_post_op_attr(__be32 *p, struct nfs_fattr *fattr) 732 p = xdr_decode_nfstime3(p, &fattr->pre_mtime);
267{ 733 xdr_decode_nfstime3(p, &fattr->pre_ctime);
268 if (*p++) 734
269 p = xdr_decode_fattr(p, fattr); 735 return 0;
270 return p; 736out_overflow:
737 print_overflow_msg(__func__, xdr);
738 return -EIO;
271} 739}
272 740
273static inline __be32 * 741/*
274xdr_decode_post_op_attr_stream(struct xdr_stream *xdr, struct nfs_fattr *fattr) 742 * pre_op_attr
743 * union pre_op_attr switch (bool attributes_follow) {
744 * case TRUE:
745 * wcc_attr attributes;
746 * case FALSE:
747 * void;
748 * };
749 *
750 * wcc_data
751 *
752 * struct wcc_data {
753 * pre_op_attr before;
754 * post_op_attr after;
755 * };
756 */
757static int decode_pre_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
275{ 758{
276 __be32 *p; 759 __be32 *p;
277 760
278 p = xdr_inline_decode(xdr, 4); 761 p = xdr_inline_decode(xdr, 4);
279 if (unlikely(!p)) 762 if (unlikely(p == NULL))
280 goto out_overflow; 763 goto out_overflow;
281 if (ntohl(*p++)) { 764 if (*p != xdr_zero)
282 p = xdr_inline_decode(xdr, 84); 765 return decode_wcc_attr(xdr, fattr);
283 if (unlikely(!p)) 766 return 0;
284 goto out_overflow;
285 p = xdr_decode_fattr(p, fattr);
286 }
287 return p;
288out_overflow: 767out_overflow:
289 print_overflow_msg(__func__, xdr); 768 print_overflow_msg(__func__, xdr);
290 return ERR_PTR(-EIO); 769 return -EIO;
291} 770}
292 771
293static inline __be32 * 772static int decode_wcc_data(struct xdr_stream *xdr, struct nfs_fattr *fattr)
294xdr_decode_pre_op_attr(__be32 *p, struct nfs_fattr *fattr)
295{ 773{
296 if (*p++) 774 int error;
297 return xdr_decode_wcc_attr(p, fattr); 775
298 return p; 776 error = decode_pre_op_attr(xdr, fattr);
777 if (unlikely(error))
778 goto out;
779 error = decode_post_op_attr(xdr, fattr);
780out:
781 return error;
299} 782}
300 783
784/*
785 * post_op_fh3
786 *
787 * union post_op_fh3 switch (bool handle_follows) {
788 * case TRUE:
789 * nfs_fh3 handle;
790 * case FALSE:
791 * void;
792 * };
793 */
794static int decode_post_op_fh3(struct xdr_stream *xdr, struct nfs_fh *fh)
795{
796 __be32 *p = xdr_inline_decode(xdr, 4);
797 if (unlikely(p == NULL))
798 goto out_overflow;
799 if (*p != xdr_zero)
800 return decode_nfs_fh3(xdr, fh);
801 zero_nfs_fh3(fh);
802 return 0;
803out_overflow:
804 print_overflow_msg(__func__, xdr);
805 return -EIO;
806}
301 807
302static inline __be32 * 808/*
303xdr_decode_wcc_data(__be32 *p, struct nfs_fattr *fattr) 809 * diropargs3
810 *
811 * struct diropargs3 {
812 * nfs_fh3 dir;
813 * filename3 name;
814 * };
815 */
816static void encode_diropargs3(struct xdr_stream *xdr, const struct nfs_fh *fh,
817 const char *name, u32 length)
304{ 818{
305 p = xdr_decode_pre_op_attr(p, fattr); 819 encode_nfs_fh3(xdr, fh);
306 return xdr_decode_post_op_attr(p, fattr); 820 encode_filename3(xdr, name, length);
307} 821}
308 822
823
309/* 824/*
310 * NFS encode functions 825 * NFSv3 XDR encode functions
826 *
827 * NFSv3 argument types are defined in section 3.3 of RFC 1813:
828 * "NFS Version 3 Protocol Specification".
311 */ 829 */
312 830
313/* 831/*
314 * Encode file handle argument 832 * 3.3.1 GETATTR3args
833 *
834 * struct GETATTR3args {
835 * nfs_fh3 object;
836 * };
315 */ 837 */
316static int 838static void nfs3_xdr_enc_getattr3args(struct rpc_rqst *req,
317nfs3_xdr_fhandle(struct rpc_rqst *req, __be32 *p, struct nfs_fh *fh) 839 struct xdr_stream *xdr,
840 const struct nfs_fh *fh)
318{ 841{
319 p = xdr_encode_fhandle(p, fh); 842 encode_nfs_fh3(xdr, fh);
320 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
321 return 0;
322} 843}
323 844
324/* 845/*
325 * Encode SETATTR arguments 846 * 3.3.2 SETATTR3args
847 *
848 * union sattrguard3 switch (bool check) {
849 * case TRUE:
850 * nfstime3 obj_ctime;
851 * case FALSE:
852 * void;
853 * };
854 *
855 * struct SETATTR3args {
856 * nfs_fh3 object;
857 * sattr3 new_attributes;
858 * sattrguard3 guard;
859 * };
326 */ 860 */
327static int 861static void encode_sattrguard3(struct xdr_stream *xdr,
328nfs3_xdr_sattrargs(struct rpc_rqst *req, __be32 *p, struct nfs3_sattrargs *args) 862 const struct nfs3_sattrargs *args)
329{ 863{
330 p = xdr_encode_fhandle(p, args->fh); 864 __be32 *p;
331 p = xdr_encode_sattr(p, args->sattr); 865
332 *p++ = htonl(args->guard); 866 if (args->guard) {
333 if (args->guard) 867 p = xdr_reserve_space(xdr, 4 + 8);
334 p = xdr_encode_time3(p, &args->guardtime); 868 *p++ = xdr_one;
335 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 869 xdr_encode_nfstime3(p, &args->guardtime);
336 return 0; 870 } else {
871 p = xdr_reserve_space(xdr, 4);
872 *p = xdr_zero;
873 }
874}
875
876static void nfs3_xdr_enc_setattr3args(struct rpc_rqst *req,
877 struct xdr_stream *xdr,
878 const struct nfs3_sattrargs *args)
879{
880 encode_nfs_fh3(xdr, args->fh);
881 encode_sattr3(xdr, args->sattr);
882 encode_sattrguard3(xdr, args);
337} 883}
338 884
339/* 885/*
340 * Encode directory ops argument 886 * 3.3.3 LOOKUP3args
887 *
888 * struct LOOKUP3args {
889 * diropargs3 what;
890 * };
341 */ 891 */
342static int 892static void nfs3_xdr_enc_lookup3args(struct rpc_rqst *req,
343nfs3_xdr_diropargs(struct rpc_rqst *req, __be32 *p, struct nfs3_diropargs *args) 893 struct xdr_stream *xdr,
894 const struct nfs3_diropargs *args)
344{ 895{
345 p = xdr_encode_fhandle(p, args->fh); 896 encode_diropargs3(xdr, args->fh, args->name, args->len);
346 p = xdr_encode_array(p, args->name, args->len);
347 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
348 return 0;
349} 897}
350 898
351/* 899/*
352 * Encode REMOVE argument 900 * 3.3.4 ACCESS3args
901 *
902 * struct ACCESS3args {
903 * nfs_fh3 object;
904 * uint32 access;
905 * };
353 */ 906 */
354static int 907static void encode_access3args(struct xdr_stream *xdr,
355nfs3_xdr_removeargs(struct rpc_rqst *req, __be32 *p, const struct nfs_removeargs *args) 908 const struct nfs3_accessargs *args)
356{ 909{
357 p = xdr_encode_fhandle(p, args->fh); 910 encode_nfs_fh3(xdr, args->fh);
358 p = xdr_encode_array(p, args->name.name, args->name.len); 911 encode_uint32(xdr, args->access);
359 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 912}
360 return 0; 913
914static void nfs3_xdr_enc_access3args(struct rpc_rqst *req,
915 struct xdr_stream *xdr,
916 const struct nfs3_accessargs *args)
917{
918 encode_access3args(xdr, args);
361} 919}
362 920
363/* 921/*
364 * Encode access() argument 922 * 3.3.5 READLINK3args
923 *
924 * struct READLINK3args {
925 * nfs_fh3 symlink;
926 * };
365 */ 927 */
366static int 928static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req,
367nfs3_xdr_accessargs(struct rpc_rqst *req, __be32 *p, struct nfs3_accessargs *args) 929 struct xdr_stream *xdr,
930 const struct nfs3_readlinkargs *args)
368{ 931{
369 p = xdr_encode_fhandle(p, args->fh); 932 encode_nfs_fh3(xdr, args->fh);
370 *p++ = htonl(args->access); 933 prepare_reply_buffer(req, args->pages, args->pgbase,
371 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 934 args->pglen, NFS3_readlinkres_sz);
372 return 0;
373} 935}
374 936
375/* 937/*
376 * Arguments to a READ call. Since we read data directly into the page 938 * 3.3.6 READ3args
377 * cache, we also set up the reply iovec here so that iov[1] points 939 *
378 * exactly to the page we want to fetch. 940 * struct READ3args {
941 * nfs_fh3 file;
942 * offset3 offset;
943 * count3 count;
944 * };
379 */ 945 */
380static int 946static void encode_read3args(struct xdr_stream *xdr,
381nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) 947 const struct nfs_readargs *args)
382{ 948{
383 struct rpc_auth *auth = req->rq_cred->cr_auth; 949 __be32 *p;
384 unsigned int replen; 950
385 u32 count = args->count; 951 encode_nfs_fh3(xdr, args->fh);
386 952
387 p = xdr_encode_fhandle(p, args->fh); 953 p = xdr_reserve_space(xdr, 8 + 4);
388 p = xdr_encode_hyper(p, args->offset); 954 p = xdr_encode_hyper(p, args->offset);
389 *p++ = htonl(count); 955 *p = cpu_to_be32(args->count);
390 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 956}
391 957
392 /* Inline the page array */ 958static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
393 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readres_sz) << 2; 959 struct xdr_stream *xdr,
394 xdr_inline_pages(&req->rq_rcv_buf, replen, 960 const struct nfs_readargs *args)
395 args->pages, args->pgbase, count); 961{
962 encode_read3args(xdr, args);
963 prepare_reply_buffer(req, args->pages, args->pgbase,
964 args->count, NFS3_readres_sz);
396 req->rq_rcv_buf.flags |= XDRBUF_READ; 965 req->rq_rcv_buf.flags |= XDRBUF_READ;
397 return 0;
398} 966}
399 967
400/* 968/*
401 * Write arguments. Splice the buffer to be written into the iovec. 969 * 3.3.7 WRITE3args
970 *
971 * enum stable_how {
972 * UNSTABLE = 0,
973 * DATA_SYNC = 1,
974 * FILE_SYNC = 2
975 * };
976 *
977 * struct WRITE3args {
978 * nfs_fh3 file;
979 * offset3 offset;
980 * count3 count;
981 * stable_how stable;
982 * opaque data<>;
983 * };
402 */ 984 */
403static int 985static void encode_write3args(struct xdr_stream *xdr,
404nfs3_xdr_writeargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args) 986 const struct nfs_writeargs *args)
405{ 987{
406 struct xdr_buf *sndbuf = &req->rq_snd_buf; 988 __be32 *p;
407 u32 count = args->count; 989
990 encode_nfs_fh3(xdr, args->fh);
408 991
409 p = xdr_encode_fhandle(p, args->fh); 992 p = xdr_reserve_space(xdr, 8 + 4 + 4 + 4);
410 p = xdr_encode_hyper(p, args->offset); 993 p = xdr_encode_hyper(p, args->offset);
411 *p++ = htonl(count); 994 *p++ = cpu_to_be32(args->count);
412 *p++ = htonl(args->stable); 995 *p++ = cpu_to_be32(args->stable);
413 *p++ = htonl(count); 996 *p = cpu_to_be32(args->count);
414 sndbuf->len = xdr_adjust_iovec(sndbuf->head, p); 997 xdr_write_pages(xdr, args->pages, args->pgbase, args->count);
415 998}
416 /* Copy the page array */ 999
417 xdr_encode_pages(sndbuf, args->pages, args->pgbase, count); 1000static void nfs3_xdr_enc_write3args(struct rpc_rqst *req,
418 sndbuf->flags |= XDRBUF_WRITE; 1001 struct xdr_stream *xdr,
419 return 0; 1002 const struct nfs_writeargs *args)
1003{
1004 encode_write3args(xdr, args);
1005 xdr->buf->flags |= XDRBUF_WRITE;
420} 1006}
421 1007
422/* 1008/*
423 * Encode CREATE arguments 1009 * 3.3.8 CREATE3args
1010 *
1011 * enum createmode3 {
1012 * UNCHECKED = 0,
1013 * GUARDED = 1,
1014 * EXCLUSIVE = 2
1015 * };
1016 *
1017 * union createhow3 switch (createmode3 mode) {
1018 * case UNCHECKED:
1019 * case GUARDED:
1020 * sattr3 obj_attributes;
1021 * case EXCLUSIVE:
1022 * createverf3 verf;
1023 * };
1024 *
1025 * struct CREATE3args {
1026 * diropargs3 where;
1027 * createhow3 how;
1028 * };
424 */ 1029 */
425static int 1030static void encode_createhow3(struct xdr_stream *xdr,
426nfs3_xdr_createargs(struct rpc_rqst *req, __be32 *p, struct nfs3_createargs *args) 1031 const struct nfs3_createargs *args)
427{ 1032{
428 p = xdr_encode_fhandle(p, args->fh); 1033 encode_uint32(xdr, args->createmode);
429 p = xdr_encode_array(p, args->name, args->len); 1034 switch (args->createmode) {
430 1035 case NFS3_CREATE_UNCHECKED:
431 *p++ = htonl(args->createmode); 1036 case NFS3_CREATE_GUARDED:
432 if (args->createmode == NFS3_CREATE_EXCLUSIVE) { 1037 encode_sattr3(xdr, args->sattr);
433 *p++ = args->verifier[0]; 1038 break;
434 *p++ = args->verifier[1]; 1039 case NFS3_CREATE_EXCLUSIVE:
435 } else 1040 encode_createverf3(xdr, args->verifier);
436 p = xdr_encode_sattr(p, args->sattr); 1041 break;
1042 default:
1043 BUG();
1044 }
1045}
437 1046
438 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 1047static void nfs3_xdr_enc_create3args(struct rpc_rqst *req,
439 return 0; 1048 struct xdr_stream *xdr,
1049 const struct nfs3_createargs *args)
1050{
1051 encode_diropargs3(xdr, args->fh, args->name, args->len);
1052 encode_createhow3(xdr, args);
440} 1053}
441 1054
442/* 1055/*
443 * Encode MKDIR arguments 1056 * 3.3.9 MKDIR3args
1057 *
1058 * struct MKDIR3args {
1059 * diropargs3 where;
1060 * sattr3 attributes;
1061 * };
444 */ 1062 */
445static int 1063static void nfs3_xdr_enc_mkdir3args(struct rpc_rqst *req,
446nfs3_xdr_mkdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_mkdirargs *args) 1064 struct xdr_stream *xdr,
1065 const struct nfs3_mkdirargs *args)
447{ 1066{
448 p = xdr_encode_fhandle(p, args->fh); 1067 encode_diropargs3(xdr, args->fh, args->name, args->len);
449 p = xdr_encode_array(p, args->name, args->len); 1068 encode_sattr3(xdr, args->sattr);
450 p = xdr_encode_sattr(p, args->sattr);
451 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
452 return 0;
453} 1069}
454 1070
455/* 1071/*
456 * Encode SYMLINK arguments 1072 * 3.3.10 SYMLINK3args
1073 *
1074 * struct symlinkdata3 {
1075 * sattr3 symlink_attributes;
1076 * nfspath3 symlink_data;
1077 * };
1078 *
1079 * struct SYMLINK3args {
1080 * diropargs3 where;
1081 * symlinkdata3 symlink;
1082 * };
457 */ 1083 */
458static int 1084static void encode_symlinkdata3(struct xdr_stream *xdr,
459nfs3_xdr_symlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_symlinkargs *args) 1085 const struct nfs3_symlinkargs *args)
460{ 1086{
461 p = xdr_encode_fhandle(p, args->fromfh); 1087 encode_sattr3(xdr, args->sattr);
462 p = xdr_encode_array(p, args->fromname, args->fromlen); 1088 encode_nfspath3(xdr, args->pages, args->pathlen);
463 p = xdr_encode_sattr(p, args->sattr); 1089}
464 *p++ = htonl(args->pathlen);
465 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
466 1090
467 /* Copy the page */ 1091static void nfs3_xdr_enc_symlink3args(struct rpc_rqst *req,
468 xdr_encode_pages(&req->rq_snd_buf, args->pages, 0, args->pathlen); 1092 struct xdr_stream *xdr,
469 return 0; 1093 const struct nfs3_symlinkargs *args)
1094{
1095 encode_diropargs3(xdr, args->fromfh, args->fromname, args->fromlen);
1096 encode_symlinkdata3(xdr, args);
470} 1097}
471 1098
472/* 1099/*
473 * Encode MKNOD arguments 1100 * 3.3.11 MKNOD3args
1101 *
1102 * struct devicedata3 {
1103 * sattr3 dev_attributes;
1104 * specdata3 spec;
1105 * };
1106 *
1107 * union mknoddata3 switch (ftype3 type) {
1108 * case NF3CHR:
1109 * case NF3BLK:
1110 * devicedata3 device;
1111 * case NF3SOCK:
1112 * case NF3FIFO:
1113 * sattr3 pipe_attributes;
1114 * default:
1115 * void;
1116 * };
1117 *
1118 * struct MKNOD3args {
1119 * diropargs3 where;
1120 * mknoddata3 what;
1121 * };
474 */ 1122 */
475static int 1123static void encode_devicedata3(struct xdr_stream *xdr,
476nfs3_xdr_mknodargs(struct rpc_rqst *req, __be32 *p, struct nfs3_mknodargs *args) 1124 const struct nfs3_mknodargs *args)
477{ 1125{
478 p = xdr_encode_fhandle(p, args->fh); 1126 encode_sattr3(xdr, args->sattr);
479 p = xdr_encode_array(p, args->name, args->len); 1127 encode_specdata3(xdr, args->rdev);
480 *p++ = htonl(args->type); 1128}
481 p = xdr_encode_sattr(p, args->sattr); 1129
482 if (args->type == NF3CHR || args->type == NF3BLK) { 1130static void encode_mknoddata3(struct xdr_stream *xdr,
483 *p++ = htonl(MAJOR(args->rdev)); 1131 const struct nfs3_mknodargs *args)
484 *p++ = htonl(MINOR(args->rdev)); 1132{
1133 encode_ftype3(xdr, args->type);
1134 switch (args->type) {
1135 case NF3CHR:
1136 case NF3BLK:
1137 encode_devicedata3(xdr, args);
1138 break;
1139 case NF3SOCK:
1140 case NF3FIFO:
1141 encode_sattr3(xdr, args->sattr);
1142 break;
1143 case NF3REG:
1144 case NF3DIR:
1145 break;
1146 default:
1147 BUG();
485 } 1148 }
1149}
486 1150
487 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 1151static void nfs3_xdr_enc_mknod3args(struct rpc_rqst *req,
488 return 0; 1152 struct xdr_stream *xdr,
1153 const struct nfs3_mknodargs *args)
1154{
1155 encode_diropargs3(xdr, args->fh, args->name, args->len);
1156 encode_mknoddata3(xdr, args);
489} 1157}
490 1158
491/* 1159/*
492 * Encode RENAME arguments 1160 * 3.3.12 REMOVE3args
1161 *
1162 * struct REMOVE3args {
1163 * diropargs3 object;
1164 * };
493 */ 1165 */
494static int 1166static void nfs3_xdr_enc_remove3args(struct rpc_rqst *req,
495nfs3_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args) 1167 struct xdr_stream *xdr,
496{ 1168 const struct nfs_removeargs *args)
497 p = xdr_encode_fhandle(p, args->old_dir); 1169{
498 p = xdr_encode_array(p, args->old_name->name, args->old_name->len); 1170 encode_diropargs3(xdr, args->fh, args->name.name, args->name.len);
499 p = xdr_encode_fhandle(p, args->new_dir);
500 p = xdr_encode_array(p, args->new_name->name, args->new_name->len);
501 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
502 return 0;
503} 1171}
504 1172
505/* 1173/*
506 * Encode LINK arguments 1174 * 3.3.14 RENAME3args
1175 *
1176 * struct RENAME3args {
1177 * diropargs3 from;
1178 * diropargs3 to;
1179 * };
507 */ 1180 */
508static int 1181static void nfs3_xdr_enc_rename3args(struct rpc_rqst *req,
509nfs3_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_linkargs *args) 1182 struct xdr_stream *xdr,
1183 const struct nfs_renameargs *args)
510{ 1184{
511 p = xdr_encode_fhandle(p, args->fromfh); 1185 const struct qstr *old = args->old_name;
512 p = xdr_encode_fhandle(p, args->tofh); 1186 const struct qstr *new = args->new_name;
513 p = xdr_encode_array(p, args->toname, args->tolen); 1187
514 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 1188 encode_diropargs3(xdr, args->old_dir, old->name, old->len);
515 return 0; 1189 encode_diropargs3(xdr, args->new_dir, new->name, new->len);
516} 1190}
517 1191
518/* 1192/*
519 * Encode arguments to readdir call 1193 * 3.3.15 LINK3args
1194 *
1195 * struct LINK3args {
1196 * nfs_fh3 file;
1197 * diropargs3 link;
1198 * };
520 */ 1199 */
521static int 1200static void nfs3_xdr_enc_link3args(struct rpc_rqst *req,
522nfs3_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirargs *args) 1201 struct xdr_stream *xdr,
1202 const struct nfs3_linkargs *args)
523{ 1203{
524 struct rpc_auth *auth = req->rq_cred->cr_auth; 1204 encode_nfs_fh3(xdr, args->fromfh);
525 unsigned int replen; 1205 encode_diropargs3(xdr, args->tofh, args->toname, args->tolen);
526 u32 count = args->count;
527
528 p = xdr_encode_fhandle(p, args->fh);
529 p = xdr_encode_hyper(p, args->cookie);
530 *p++ = args->verf[0];
531 *p++ = args->verf[1];
532 if (args->plus) {
533 /* readdirplus: need dircount + buffer size.
534 * We just make sure we make dircount big enough */
535 *p++ = htonl(count >> 3);
536 }
537 *p++ = htonl(count);
538 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
539
540 /* Inline the page array */
541 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readdirres_sz) << 2;
542 xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, count);
543 return 0;
544} 1206}
545 1207
546/* 1208/*
547 * Decode the result of a readdir call. 1209 * 3.3.16 READDIR3args
548 * We just check for syntactical correctness. 1210 *
1211 * struct READDIR3args {
1212 * nfs_fh3 dir;
1213 * cookie3 cookie;
1214 * cookieverf3 cookieverf;
1215 * count3 count;
1216 * };
549 */ 1217 */
550static int 1218static void encode_readdir3args(struct xdr_stream *xdr,
551nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res) 1219 const struct nfs3_readdirargs *args)
552{ 1220{
553 struct xdr_buf *rcvbuf = &req->rq_rcv_buf; 1221 __be32 *p;
554 struct kvec *iov = rcvbuf->head;
555 struct page **page;
556 size_t hdrlen;
557 u32 recvd, pglen;
558 int status;
559
560 status = ntohl(*p++);
561 /* Decode post_op_attrs */
562 p = xdr_decode_post_op_attr(p, res->dir_attr);
563 if (status)
564 return nfs_stat_to_errno(status);
565 /* Decode verifier cookie */
566 if (res->verf) {
567 res->verf[0] = *p++;
568 res->verf[1] = *p++;
569 } else {
570 p += 2;
571 }
572 1222
573 hdrlen = (u8 *) p - (u8 *) iov->iov_base; 1223 encode_nfs_fh3(xdr, args->fh);
574 if (iov->iov_len < hdrlen) {
575 dprintk("NFS: READDIR reply header overflowed:"
576 "length %Zu > %Zu\n", hdrlen, iov->iov_len);
577 return -errno_NFSERR_IO;
578 } else if (iov->iov_len != hdrlen) {
579 dprintk("NFS: READDIR header is short. iovec will be shifted.\n");
580 xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
581 }
582 1224
583 pglen = rcvbuf->page_len; 1225 p = xdr_reserve_space(xdr, 8 + NFS3_COOKIEVERFSIZE + 4);
584 recvd = rcvbuf->len - hdrlen; 1226 p = xdr_encode_cookie3(p, args->cookie);
585 if (pglen > recvd) 1227 p = xdr_encode_cookieverf3(p, args->verf);
586 pglen = recvd; 1228 *p = cpu_to_be32(args->count);
587 page = rcvbuf->pages; 1229}
588 1230
589 return pglen; 1231static void nfs3_xdr_enc_readdir3args(struct rpc_rqst *req,
1232 struct xdr_stream *xdr,
1233 const struct nfs3_readdirargs *args)
1234{
1235 encode_readdir3args(xdr, args);
1236 prepare_reply_buffer(req, args->pages, 0,
1237 args->count, NFS3_readdirres_sz);
590} 1238}
591 1239
592__be32 * 1240/*
593nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_server *server, int plus) 1241 * 3.3.17 READDIRPLUS3args
1242 *
1243 * struct READDIRPLUS3args {
1244 * nfs_fh3 dir;
1245 * cookie3 cookie;
1246 * cookieverf3 cookieverf;
1247 * count3 dircount;
1248 * count3 maxcount;
1249 * };
1250 */
1251static void encode_readdirplus3args(struct xdr_stream *xdr,
1252 const struct nfs3_readdirargs *args)
594{ 1253{
595 __be32 *p; 1254 __be32 *p;
596 struct nfs_entry old = *entry;
597
598 p = xdr_inline_decode(xdr, 4);
599 if (unlikely(!p))
600 goto out_overflow;
601 if (!ntohl(*p++)) {
602 p = xdr_inline_decode(xdr, 4);
603 if (unlikely(!p))
604 goto out_overflow;
605 if (!ntohl(*p++))
606 return ERR_PTR(-EAGAIN);
607 entry->eof = 1;
608 return ERR_PTR(-EBADCOOKIE);
609 }
610 1255
611 p = xdr_inline_decode(xdr, 12); 1256 encode_nfs_fh3(xdr, args->fh);
612 if (unlikely(!p))
613 goto out_overflow;
614 p = xdr_decode_hyper(p, &entry->ino);
615 entry->len = ntohl(*p++);
616 1257
617 p = xdr_inline_decode(xdr, entry->len + 8); 1258 p = xdr_reserve_space(xdr, 8 + NFS3_COOKIEVERFSIZE + 4 + 4);
618 if (unlikely(!p)) 1259 p = xdr_encode_cookie3(p, args->cookie);
619 goto out_overflow; 1260 p = xdr_encode_cookieverf3(p, args->verf);
620 entry->name = (const char *) p;
621 p += XDR_QUADLEN(entry->len);
622 entry->prev_cookie = entry->cookie;
623 p = xdr_decode_hyper(p, &entry->cookie);
624
625 entry->d_type = DT_UNKNOWN;
626 if (plus) {
627 entry->fattr->valid = 0;
628 p = xdr_decode_post_op_attr_stream(xdr, entry->fattr);
629 if (IS_ERR(p))
630 goto out_overflow_exit;
631 entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
632 /* In fact, a post_op_fh3: */
633 p = xdr_inline_decode(xdr, 4);
634 if (unlikely(!p))
635 goto out_overflow;
636 if (*p++) {
637 p = xdr_decode_fhandle_stream(xdr, entry->fh);
638 if (IS_ERR(p))
639 goto out_overflow_exit;
640 /* Ugh -- server reply was truncated */
641 if (p == NULL) {
642 dprintk("NFS: FH truncated\n");
643 *entry = old;
644 return ERR_PTR(-EAGAIN);
645 }
646 } else
647 memset((u8*)(entry->fh), 0, sizeof(*entry->fh));
648 }
649 1261
650 p = xdr_inline_peek(xdr, 8); 1262 /*
651 if (p != NULL) 1263 * readdirplus: need dircount + buffer size.
652 entry->eof = !p[0] && p[1]; 1264 * We just make sure we make dircount big enough
653 else 1265 */
654 entry->eof = 0; 1266 *p++ = cpu_to_be32(args->count >> 3);
655 1267
656 return p; 1268 *p = cpu_to_be32(args->count);
1269}
657 1270
658out_overflow: 1271static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req,
659 print_overflow_msg(__func__, xdr); 1272 struct xdr_stream *xdr,
660out_overflow_exit: 1273 const struct nfs3_readdirargs *args)
661 return ERR_PTR(-EAGAIN); 1274{
1275 encode_readdirplus3args(xdr, args);
1276 prepare_reply_buffer(req, args->pages, 0,
1277 args->count, NFS3_readdirres_sz);
662} 1278}
663 1279
664/* 1280/*
665 * Encode COMMIT arguments 1281 * 3.3.21 COMMIT3args
1282 *
1283 * struct COMMIT3args {
1284 * nfs_fh3 file;
1285 * offset3 offset;
1286 * count3 count;
1287 * };
666 */ 1288 */
667static int 1289static void encode_commit3args(struct xdr_stream *xdr,
668nfs3_xdr_commitargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args) 1290 const struct nfs_writeargs *args)
669{ 1291{
670 p = xdr_encode_fhandle(p, args->fh); 1292 __be32 *p;
1293
1294 encode_nfs_fh3(xdr, args->fh);
1295
1296 p = xdr_reserve_space(xdr, 8 + 4);
671 p = xdr_encode_hyper(p, args->offset); 1297 p = xdr_encode_hyper(p, args->offset);
672 *p++ = htonl(args->count); 1298 *p = cpu_to_be32(args->count);
673 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
674 return 0;
675} 1299}
676 1300
677#ifdef CONFIG_NFS_V3_ACL 1301static void nfs3_xdr_enc_commit3args(struct rpc_rqst *req,
678/* 1302 struct xdr_stream *xdr,
679 * Encode GETACL arguments 1303 const struct nfs_writeargs *args)
680 */
681static int
682nfs3_xdr_getaclargs(struct rpc_rqst *req, __be32 *p,
683 struct nfs3_getaclargs *args)
684{ 1304{
685 struct rpc_auth *auth = req->rq_cred->cr_auth; 1305 encode_commit3args(xdr, args);
686 unsigned int replen; 1306}
687 1307
688 p = xdr_encode_fhandle(p, args->fh); 1308#ifdef CONFIG_NFS_V3_ACL
689 *p++ = htonl(args->mask);
690 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
691 1309
692 if (args->mask & (NFS_ACL | NFS_DFACL)) { 1310static void nfs3_xdr_enc_getacl3args(struct rpc_rqst *req,
693 /* Inline the page array */ 1311 struct xdr_stream *xdr,
694 replen = (RPC_REPHDRSIZE + auth->au_rslack + 1312 const struct nfs3_getaclargs *args)
695 ACL3_getaclres_sz) << 2; 1313{
696 xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, 1314 encode_nfs_fh3(xdr, args->fh);
697 NFSACL_MAXPAGES << PAGE_SHIFT); 1315 encode_uint32(xdr, args->mask);
698 } 1316 if (args->mask & (NFS_ACL | NFS_DFACL))
699 return 0; 1317 prepare_reply_buffer(req, args->pages, 0,
1318 NFSACL_MAXPAGES << PAGE_SHIFT,
1319 ACL3_getaclres_sz);
700} 1320}
701 1321
702/* 1322static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req,
703 * Encode SETACL arguments 1323 struct xdr_stream *xdr,
704 */ 1324 const struct nfs3_setaclargs *args)
705static int
706nfs3_xdr_setaclargs(struct rpc_rqst *req, __be32 *p,
707 struct nfs3_setaclargs *args)
708{ 1325{
709 struct xdr_buf *buf = &req->rq_snd_buf;
710 unsigned int base; 1326 unsigned int base;
711 int err; 1327 int error;
712
713 p = xdr_encode_fhandle(p, NFS_FH(args->inode));
714 *p++ = htonl(args->mask);
715 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
716 base = req->rq_slen;
717 1328
1329 encode_nfs_fh3(xdr, NFS_FH(args->inode));
1330 encode_uint32(xdr, args->mask);
718 if (args->npages != 0) 1331 if (args->npages != 0)
719 xdr_encode_pages(buf, args->pages, 0, args->len); 1332 xdr_write_pages(xdr, args->pages, 0, args->len);
720 else
721 req->rq_slen = xdr_adjust_iovec(req->rq_svec,
722 p + XDR_QUADLEN(args->len));
723 1333
724 err = nfsacl_encode(buf, base, args->inode, 1334 base = req->rq_slen;
1335 error = nfsacl_encode(xdr->buf, base, args->inode,
725 (args->mask & NFS_ACL) ? 1336 (args->mask & NFS_ACL) ?
726 args->acl_access : NULL, 1, 0); 1337 args->acl_access : NULL, 1, 0);
727 if (err > 0) 1338 BUG_ON(error < 0);
728 err = nfsacl_encode(buf, base + err, args->inode, 1339 error = nfsacl_encode(xdr->buf, base + error, args->inode,
729 (args->mask & NFS_DFACL) ? 1340 (args->mask & NFS_DFACL) ?
730 args->acl_default : NULL, 1, 1341 args->acl_default : NULL, 1,
731 NFS_ACL_DEFAULT); 1342 NFS_ACL_DEFAULT);
732 return (err > 0) ? 0 : err; 1343 BUG_ON(error < 0);
733} 1344}
1345
734#endif /* CONFIG_NFS_V3_ACL */ 1346#endif /* CONFIG_NFS_V3_ACL */
735 1347
736/* 1348/*
737 * NFS XDR decode functions 1349 * NFSv3 XDR decode functions
1350 *
1351 * NFSv3 result types are defined in section 3.3 of RFC 1813:
1352 * "NFS Version 3 Protocol Specification".
738 */ 1353 */
739 1354
740/* 1355/*
741 * Decode attrstat reply. 1356 * 3.3.1 GETATTR3res
1357 *
1358 * struct GETATTR3resok {
1359 * fattr3 obj_attributes;
1360 * };
1361 *
1362 * union GETATTR3res switch (nfsstat3 status) {
1363 * case NFS3_OK:
1364 * GETATTR3resok resok;
1365 * default:
1366 * void;
1367 * };
742 */ 1368 */
743static int 1369static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req,
744nfs3_xdr_attrstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) 1370 struct xdr_stream *xdr,
1371 struct nfs_fattr *result)
745{ 1372{
746 int status; 1373 enum nfs_stat status;
747 1374 int error;
748 if ((status = ntohl(*p++))) 1375
749 return nfs_stat_to_errno(status); 1376 error = decode_nfsstat3(xdr, &status);
750 xdr_decode_fattr(p, fattr); 1377 if (unlikely(error))
751 return 0; 1378 goto out;
1379 if (status != NFS3_OK)
1380 goto out_default;
1381 error = decode_fattr3(xdr, result);
1382out:
1383 return error;
1384out_default:
1385 return nfs_stat_to_errno(status);
752} 1386}
753 1387
754/* 1388/*
755 * Decode status+wcc_data reply 1389 * 3.3.2 SETATTR3res
756 * SATTR, REMOVE, RMDIR 1390 *
1391 * struct SETATTR3resok {
1392 * wcc_data obj_wcc;
1393 * };
1394 *
1395 * struct SETATTR3resfail {
1396 * wcc_data obj_wcc;
1397 * };
1398 *
1399 * union SETATTR3res switch (nfsstat3 status) {
1400 * case NFS3_OK:
1401 * SETATTR3resok resok;
1402 * default:
1403 * SETATTR3resfail resfail;
1404 * };
757 */ 1405 */
758static int 1406static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req,
759nfs3_xdr_wccstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) 1407 struct xdr_stream *xdr,
1408 struct nfs_fattr *result)
760{ 1409{
761 int status; 1410 enum nfs_stat status;
762 1411 int error;
763 if ((status = ntohl(*p++))) 1412
764 status = nfs_stat_to_errno(status); 1413 error = decode_nfsstat3(xdr, &status);
765 xdr_decode_wcc_data(p, fattr); 1414 if (unlikely(error))
766 return status; 1415 goto out;
1416 error = decode_wcc_data(xdr, result);
1417 if (unlikely(error))
1418 goto out;
1419 if (status != NFS3_OK)
1420 goto out_status;
1421out:
1422 return error;
1423out_status:
1424 return nfs_stat_to_errno(status);
767} 1425}
768 1426
769static int 1427/*
770nfs3_xdr_removeres(struct rpc_rqst *req, __be32 *p, struct nfs_removeres *res) 1428 * 3.3.3 LOOKUP3res
1429 *
1430 * struct LOOKUP3resok {
1431 * nfs_fh3 object;
1432 * post_op_attr obj_attributes;
1433 * post_op_attr dir_attributes;
1434 * };
1435 *
1436 * struct LOOKUP3resfail {
1437 * post_op_attr dir_attributes;
1438 * };
1439 *
1440 * union LOOKUP3res switch (nfsstat3 status) {
1441 * case NFS3_OK:
1442 * LOOKUP3resok resok;
1443 * default:
1444 * LOOKUP3resfail resfail;
1445 * };
1446 */
1447static int nfs3_xdr_dec_lookup3res(struct rpc_rqst *req,
1448 struct xdr_stream *xdr,
1449 struct nfs3_diropres *result)
771{ 1450{
772 return nfs3_xdr_wccstat(req, p, res->dir_attr); 1451 enum nfs_stat status;
1452 int error;
1453
1454 error = decode_nfsstat3(xdr, &status);
1455 if (unlikely(error))
1456 goto out;
1457 if (status != NFS3_OK)
1458 goto out_default;
1459 error = decode_nfs_fh3(xdr, result->fh);
1460 if (unlikely(error))
1461 goto out;
1462 error = decode_post_op_attr(xdr, result->fattr);
1463 if (unlikely(error))
1464 goto out;
1465 error = decode_post_op_attr(xdr, result->dir_attr);
1466out:
1467 return error;
1468out_default:
1469 error = decode_post_op_attr(xdr, result->dir_attr);
1470 if (unlikely(error))
1471 goto out;
1472 return nfs_stat_to_errno(status);
773} 1473}
774 1474
775/* 1475/*
776 * Decode LOOKUP reply 1476 * 3.3.4 ACCESS3res
1477 *
1478 * struct ACCESS3resok {
1479 * post_op_attr obj_attributes;
1480 * uint32 access;
1481 * };
1482 *
1483 * struct ACCESS3resfail {
1484 * post_op_attr obj_attributes;
1485 * };
1486 *
1487 * union ACCESS3res switch (nfsstat3 status) {
1488 * case NFS3_OK:
1489 * ACCESS3resok resok;
1490 * default:
1491 * ACCESS3resfail resfail;
1492 * };
777 */ 1493 */
778static int 1494static int nfs3_xdr_dec_access3res(struct rpc_rqst *req,
779nfs3_xdr_lookupres(struct rpc_rqst *req, __be32 *p, struct nfs3_diropres *res) 1495 struct xdr_stream *xdr,
1496 struct nfs3_accessres *result)
780{ 1497{
781 int status; 1498 enum nfs_stat status;
782 1499 int error;
783 if ((status = ntohl(*p++))) { 1500
784 status = nfs_stat_to_errno(status); 1501 error = decode_nfsstat3(xdr, &status);
785 } else { 1502 if (unlikely(error))
786 if (!(p = xdr_decode_fhandle(p, res->fh))) 1503 goto out;
787 return -errno_NFSERR_IO; 1504 error = decode_post_op_attr(xdr, result->fattr);
788 p = xdr_decode_post_op_attr(p, res->fattr); 1505 if (unlikely(error))
789 } 1506 goto out;
790 xdr_decode_post_op_attr(p, res->dir_attr); 1507 if (status != NFS3_OK)
791 return status; 1508 goto out_default;
1509 error = decode_uint32(xdr, &result->access);
1510out:
1511 return error;
1512out_default:
1513 return nfs_stat_to_errno(status);
792} 1514}
793 1515
794/* 1516/*
795 * Decode ACCESS reply 1517 * 3.3.5 READLINK3res
1518 *
1519 * struct READLINK3resok {
1520 * post_op_attr symlink_attributes;
1521 * nfspath3 data;
1522 * };
1523 *
1524 * struct READLINK3resfail {
1525 * post_op_attr symlink_attributes;
1526 * };
1527 *
1528 * union READLINK3res switch (nfsstat3 status) {
1529 * case NFS3_OK:
1530 * READLINK3resok resok;
1531 * default:
1532 * READLINK3resfail resfail;
1533 * };
796 */ 1534 */
797static int 1535static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req,
798nfs3_xdr_accessres(struct rpc_rqst *req, __be32 *p, struct nfs3_accessres *res) 1536 struct xdr_stream *xdr,
1537 struct nfs_fattr *result)
799{ 1538{
800 int status = ntohl(*p++); 1539 enum nfs_stat status;
801 1540 int error;
802 p = xdr_decode_post_op_attr(p, res->fattr); 1541
803 if (status) 1542 error = decode_nfsstat3(xdr, &status);
804 return nfs_stat_to_errno(status); 1543 if (unlikely(error))
805 res->access = ntohl(*p++); 1544 goto out;
806 return 0; 1545 error = decode_post_op_attr(xdr, result);
1546 if (unlikely(error))
1547 goto out;
1548 if (status != NFS3_OK)
1549 goto out_default;
1550 error = decode_nfspath3(xdr);
1551out:
1552 return error;
1553out_default:
1554 return nfs_stat_to_errno(status);
807} 1555}
808 1556
809static int 1557/*
810nfs3_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readlinkargs *args) 1558 * 3.3.6 READ3res
1559 *
1560 * struct READ3resok {
1561 * post_op_attr file_attributes;
1562 * count3 count;
1563 * bool eof;
1564 * opaque data<>;
1565 * };
1566 *
1567 * struct READ3resfail {
1568 * post_op_attr file_attributes;
1569 * };
1570 *
1571 * union READ3res switch (nfsstat3 status) {
1572 * case NFS3_OK:
1573 * READ3resok resok;
1574 * default:
1575 * READ3resfail resfail;
1576 * };
1577 */
1578static int decode_read3resok(struct xdr_stream *xdr,
1579 struct nfs_readres *result)
811{ 1580{
812 struct rpc_auth *auth = req->rq_cred->cr_auth; 1581 u32 eof, count, ocount, recvd;
813 unsigned int replen; 1582 size_t hdrlen;
1583 __be32 *p;
814 1584
815 p = xdr_encode_fhandle(p, args->fh); 1585 p = xdr_inline_decode(xdr, 4 + 4 + 4);
816 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 1586 if (unlikely(p == NULL))
1587 goto out_overflow;
1588 count = be32_to_cpup(p++);
1589 eof = be32_to_cpup(p++);
1590 ocount = be32_to_cpup(p++);
1591 if (unlikely(ocount != count))
1592 goto out_mismatch;
1593 hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
1594 recvd = xdr->buf->len - hdrlen;
1595 if (unlikely(count > recvd))
1596 goto out_cheating;
1597
1598out:
1599 xdr_read_pages(xdr, count);
1600 result->eof = eof;
1601 result->count = count;
1602 return count;
1603out_mismatch:
1604 dprintk("NFS: READ count doesn't match length of opaque: "
1605 "count %u != ocount %u\n", count, ocount);
1606 return -EIO;
1607out_cheating:
1608 dprintk("NFS: server cheating in read result: "
1609 "count %u > recvd %u\n", count, recvd);
1610 count = recvd;
1611 eof = 0;
1612 goto out;
1613out_overflow:
1614 print_overflow_msg(__func__, xdr);
1615 return -EIO;
1616}
817 1617
818 /* Inline the page array */ 1618static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
819 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readlinkres_sz) << 2; 1619 struct nfs_readres *result)
820 xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, args->pglen); 1620{
821 return 0; 1621 enum nfs_stat status;
1622 int error;
1623
1624 error = decode_nfsstat3(xdr, &status);
1625 if (unlikely(error))
1626 goto out;
1627 error = decode_post_op_attr(xdr, result->fattr);
1628 if (unlikely(error))
1629 goto out;
1630 if (status != NFS3_OK)
1631 goto out_status;
1632 error = decode_read3resok(xdr, result);
1633out:
1634 return error;
1635out_status:
1636 return nfs_stat_to_errno(status);
822} 1637}
823 1638
824/* 1639/*
825 * Decode READLINK reply 1640 * 3.3.7 WRITE3res
1641 *
1642 * enum stable_how {
1643 * UNSTABLE = 0,
1644 * DATA_SYNC = 1,
1645 * FILE_SYNC = 2
1646 * };
1647 *
1648 * struct WRITE3resok {
1649 * wcc_data file_wcc;
1650 * count3 count;
1651 * stable_how committed;
1652 * writeverf3 verf;
1653 * };
1654 *
1655 * struct WRITE3resfail {
1656 * wcc_data file_wcc;
1657 * };
1658 *
1659 * union WRITE3res switch (nfsstat3 status) {
1660 * case NFS3_OK:
1661 * WRITE3resok resok;
1662 * default:
1663 * WRITE3resfail resfail;
1664 * };
826 */ 1665 */
827static int 1666static int decode_write3resok(struct xdr_stream *xdr,
828nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) 1667 struct nfs_writeres *result)
829{ 1668{
830 struct xdr_buf *rcvbuf = &req->rq_rcv_buf; 1669 __be32 *p;
831 struct kvec *iov = rcvbuf->head;
832 size_t hdrlen;
833 u32 len, recvd;
834 int status;
835
836 status = ntohl(*p++);
837 p = xdr_decode_post_op_attr(p, fattr);
838
839 if (status != 0)
840 return nfs_stat_to_errno(status);
841
842 /* Convert length of symlink */
843 len = ntohl(*p++);
844 if (len >= rcvbuf->page_len) {
845 dprintk("nfs: server returned giant symlink!\n");
846 return -ENAMETOOLONG;
847 }
848 1670
849 hdrlen = (u8 *) p - (u8 *) iov->iov_base; 1671 p = xdr_inline_decode(xdr, 4 + 4 + NFS3_WRITEVERFSIZE);
850 if (iov->iov_len < hdrlen) { 1672 if (unlikely(p == NULL))
851 dprintk("NFS: READLINK reply header overflowed:" 1673 goto out_overflow;
852 "length %Zu > %Zu\n", hdrlen, iov->iov_len); 1674 result->count = be32_to_cpup(p++);
853 return -errno_NFSERR_IO; 1675 result->verf->committed = be32_to_cpup(p++);
854 } else if (iov->iov_len != hdrlen) { 1676 if (unlikely(result->verf->committed > NFS_FILE_SYNC))
855 dprintk("NFS: READLINK header is short. " 1677 goto out_badvalue;
856 "iovec will be shifted.\n"); 1678 memcpy(result->verf->verifier, p, NFS3_WRITEVERFSIZE);
857 xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen); 1679 return result->count;
858 } 1680out_badvalue:
859 recvd = req->rq_rcv_buf.len - hdrlen; 1681 dprintk("NFS: bad stable_how value: %u\n", result->verf->committed);
860 if (recvd < len) { 1682 return -EIO;
861 dprintk("NFS: server cheating in readlink reply: " 1683out_overflow:
862 "count %u > recvd %u\n", len, recvd); 1684 print_overflow_msg(__func__, xdr);
863 return -EIO; 1685 return -EIO;
864 } 1686}
865 1687
866 xdr_terminate_string(rcvbuf, len); 1688static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
867 return 0; 1689 struct nfs_writeres *result)
1690{
1691 enum nfs_stat status;
1692 int error;
1693
1694 error = decode_nfsstat3(xdr, &status);
1695 if (unlikely(error))
1696 goto out;
1697 error = decode_wcc_data(xdr, result->fattr);
1698 if (unlikely(error))
1699 goto out;
1700 if (status != NFS3_OK)
1701 goto out_status;
1702 error = decode_write3resok(xdr, result);
1703out:
1704 return error;
1705out_status:
1706 return nfs_stat_to_errno(status);
868} 1707}
869 1708
870/* 1709/*
871 * Decode READ reply 1710 * 3.3.8 CREATE3res
1711 *
1712 * struct CREATE3resok {
1713 * post_op_fh3 obj;
1714 * post_op_attr obj_attributes;
1715 * wcc_data dir_wcc;
1716 * };
1717 *
1718 * struct CREATE3resfail {
1719 * wcc_data dir_wcc;
1720 * };
1721 *
1722 * union CREATE3res switch (nfsstat3 status) {
1723 * case NFS3_OK:
1724 * CREATE3resok resok;
1725 * default:
1726 * CREATE3resfail resfail;
1727 * };
872 */ 1728 */
873static int 1729static int decode_create3resok(struct xdr_stream *xdr,
874nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) 1730 struct nfs3_diropres *result)
875{ 1731{
876 struct kvec *iov = req->rq_rcv_buf.head; 1732 int error;
877 size_t hdrlen; 1733
878 u32 count, ocount, recvd; 1734 error = decode_post_op_fh3(xdr, result->fh);
879 int status; 1735 if (unlikely(error))
1736 goto out;
1737 error = decode_post_op_attr(xdr, result->fattr);
1738 if (unlikely(error))
1739 goto out;
1740 /* The server isn't required to return a file handle.
1741 * If it didn't, force the client to perform a LOOKUP
1742 * to determine the correct file handle and attribute
1743 * values for the new object. */
1744 if (result->fh->size == 0)
1745 result->fattr->valid = 0;
1746 error = decode_wcc_data(xdr, result->dir_attr);
1747out:
1748 return error;
1749}
880 1750
881 status = ntohl(*p++); 1751static int nfs3_xdr_dec_create3res(struct rpc_rqst *req,
882 p = xdr_decode_post_op_attr(p, res->fattr); 1752 struct xdr_stream *xdr,
1753 struct nfs3_diropres *result)
1754{
1755 enum nfs_stat status;
1756 int error;
1757
1758 error = decode_nfsstat3(xdr, &status);
1759 if (unlikely(error))
1760 goto out;
1761 if (status != NFS3_OK)
1762 goto out_default;
1763 error = decode_create3resok(xdr, result);
1764out:
1765 return error;
1766out_default:
1767 error = decode_wcc_data(xdr, result->dir_attr);
1768 if (unlikely(error))
1769 goto out;
1770 return nfs_stat_to_errno(status);
1771}
883 1772
884 if (status != 0) 1773/*
885 return nfs_stat_to_errno(status); 1774 * 3.3.12 REMOVE3res
1775 *
1776 * struct REMOVE3resok {
1777 * wcc_data dir_wcc;
1778 * };
1779 *
1780 * struct REMOVE3resfail {
1781 * wcc_data dir_wcc;
1782 * };
1783 *
1784 * union REMOVE3res switch (nfsstat3 status) {
1785 * case NFS3_OK:
1786 * REMOVE3resok resok;
1787 * default:
1788 * REMOVE3resfail resfail;
1789 * };
1790 */
1791static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req,
1792 struct xdr_stream *xdr,
1793 struct nfs_removeres *result)
1794{
1795 enum nfs_stat status;
1796 int error;
1797
1798 error = decode_nfsstat3(xdr, &status);
1799 if (unlikely(error))
1800 goto out;
1801 error = decode_wcc_data(xdr, result->dir_attr);
1802 if (unlikely(error))
1803 goto out;
1804 if (status != NFS3_OK)
1805 goto out_status;
1806out:
1807 return error;
1808out_status:
1809 return nfs_stat_to_errno(status);
1810}
886 1811
887 /* Decode reply count and EOF flag. NFSv3 is somewhat redundant 1812/*
888 * in that it puts the count both in the res struct and in the 1813 * 3.3.14 RENAME3res
889 * opaque data count. */ 1814 *
890 count = ntohl(*p++); 1815 * struct RENAME3resok {
891 res->eof = ntohl(*p++); 1816 * wcc_data fromdir_wcc;
892 ocount = ntohl(*p++); 1817 * wcc_data todir_wcc;
1818 * };
1819 *
1820 * struct RENAME3resfail {
1821 * wcc_data fromdir_wcc;
1822 * wcc_data todir_wcc;
1823 * };
1824 *
1825 * union RENAME3res switch (nfsstat3 status) {
1826 * case NFS3_OK:
1827 * RENAME3resok resok;
1828 * default:
1829 * RENAME3resfail resfail;
1830 * };
1831 */
1832static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
1833 struct xdr_stream *xdr,
1834 struct nfs_renameres *result)
1835{
1836 enum nfs_stat status;
1837 int error;
1838
1839 error = decode_nfsstat3(xdr, &status);
1840 if (unlikely(error))
1841 goto out;
1842 error = decode_wcc_data(xdr, result->old_fattr);
1843 if (unlikely(error))
1844 goto out;
1845 error = decode_wcc_data(xdr, result->new_fattr);
1846 if (unlikely(error))
1847 goto out;
1848 if (status != NFS3_OK)
1849 goto out_status;
1850out:
1851 return error;
1852out_status:
1853 return nfs_stat_to_errno(status);
1854}
893 1855
894 if (ocount != count) { 1856/*
895 dprintk("NFS: READ count doesn't match RPC opaque count.\n"); 1857 * 3.3.15 LINK3res
896 return -errno_NFSERR_IO; 1858 *
897 } 1859 * struct LINK3resok {
1860 * post_op_attr file_attributes;
1861 * wcc_data linkdir_wcc;
1862 * };
1863 *
1864 * struct LINK3resfail {
1865 * post_op_attr file_attributes;
1866 * wcc_data linkdir_wcc;
1867 * };
1868 *
1869 * union LINK3res switch (nfsstat3 status) {
1870 * case NFS3_OK:
1871 * LINK3resok resok;
1872 * default:
1873 * LINK3resfail resfail;
1874 * };
1875 */
1876static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
1877 struct nfs3_linkres *result)
1878{
1879 enum nfs_stat status;
1880 int error;
1881
1882 error = decode_nfsstat3(xdr, &status);
1883 if (unlikely(error))
1884 goto out;
1885 error = decode_post_op_attr(xdr, result->fattr);
1886 if (unlikely(error))
1887 goto out;
1888 error = decode_wcc_data(xdr, result->dir_attr);
1889 if (unlikely(error))
1890 goto out;
1891 if (status != NFS3_OK)
1892 goto out_status;
1893out:
1894 return error;
1895out_status:
1896 return nfs_stat_to_errno(status);
1897}
898 1898
899 hdrlen = (u8 *) p - (u8 *) iov->iov_base; 1899/**
900 if (iov->iov_len < hdrlen) { 1900 * nfs3_decode_dirent - Decode a single NFSv3 directory entry stored in
901 dprintk("NFS: READ reply header overflowed:" 1901 * the local page cache
902 "length %Zu > %Zu\n", hdrlen, iov->iov_len); 1902 * @xdr: XDR stream where entry resides
903 return -errno_NFSERR_IO; 1903 * @entry: buffer to fill in with entry data
904 } else if (iov->iov_len != hdrlen) { 1904 * @plus: boolean indicating whether this should be a readdirplus entry
905 dprintk("NFS: READ header is short. iovec will be shifted.\n"); 1905 *
906 xdr_shift_buf(&req->rq_rcv_buf, iov->iov_len - hdrlen); 1906 * Returns zero if successful, otherwise a negative errno value is
907 } 1907 * returned.
1908 *
1909 * This function is not invoked during READDIR reply decoding, but
1910 * rather whenever an application invokes the getdents(2) system call
1911 * on a directory already in our cache.
1912 *
1913 * 3.3.16 entry3
1914 *
1915 * struct entry3 {
1916 * fileid3 fileid;
1917 * filename3 name;
1918 * cookie3 cookie;
1919 * fhandle3 filehandle;
1920 * post_op_attr3 attributes;
1921 * entry3 *nextentry;
1922 * };
1923 *
1924 * 3.3.17 entryplus3
1925 * struct entryplus3 {
1926 * fileid3 fileid;
1927 * filename3 name;
1928 * cookie3 cookie;
1929 * post_op_attr name_attributes;
1930 * post_op_fh3 name_handle;
1931 * entryplus3 *nextentry;
1932 * };
1933 */
1934int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
1935 int plus)
1936{
1937 struct nfs_entry old = *entry;
1938 __be32 *p;
1939 int error;
908 1940
909 recvd = req->rq_rcv_buf.len - hdrlen; 1941 p = xdr_inline_decode(xdr, 4);
910 if (count > recvd) { 1942 if (unlikely(p == NULL))
911 dprintk("NFS: server cheating in read reply: " 1943 goto out_overflow;
912 "count %u > recvd %u\n", count, recvd); 1944 if (*p == xdr_zero) {
913 count = recvd; 1945 p = xdr_inline_decode(xdr, 4);
914 res->eof = 0; 1946 if (unlikely(p == NULL))
1947 goto out_overflow;
1948 if (*p == xdr_zero)
1949 return -EAGAIN;
1950 entry->eof = 1;
1951 return -EBADCOOKIE;
915 } 1952 }
916 1953
917 if (count < res->count) 1954 error = decode_fileid3(xdr, &entry->ino);
918 res->count = count; 1955 if (unlikely(error))
1956 return error;
919 1957
920 return count; 1958 error = decode_inline_filename3(xdr, &entry->name, &entry->len);
921} 1959 if (unlikely(error))
1960 return error;
922 1961
923/* 1962 entry->prev_cookie = entry->cookie;
924 * Decode WRITE response 1963 error = decode_cookie3(xdr, &entry->cookie);
925 */ 1964 if (unlikely(error))
926static int 1965 return error;
927nfs3_xdr_writeres(struct rpc_rqst *req, __be32 *p, struct nfs_writeres *res)
928{
929 int status;
930 1966
931 status = ntohl(*p++); 1967 entry->d_type = DT_UNKNOWN;
932 p = xdr_decode_wcc_data(p, res->fattr);
933 1968
934 if (status != 0) 1969 if (plus) {
935 return nfs_stat_to_errno(status); 1970 entry->fattr->valid = 0;
1971 error = decode_post_op_attr(xdr, entry->fattr);
1972 if (unlikely(error))
1973 return error;
1974 if (entry->fattr->valid & NFS_ATTR_FATTR_V3)
1975 entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
936 1976
937 res->count = ntohl(*p++); 1977 /* In fact, a post_op_fh3: */
938 res->verf->committed = (enum nfs3_stable_how)ntohl(*p++); 1978 p = xdr_inline_decode(xdr, 4);
939 res->verf->verifier[0] = *p++; 1979 if (unlikely(p == NULL))
940 res->verf->verifier[1] = *p++; 1980 goto out_overflow;
1981 if (*p != xdr_zero) {
1982 error = decode_nfs_fh3(xdr, entry->fh);
1983 if (unlikely(error)) {
1984 if (error == -E2BIG)
1985 goto out_truncated;
1986 return error;
1987 }
1988 } else
1989 zero_nfs_fh3(entry->fh);
1990 }
941 1991
942 return res->count; 1992 return 0;
943}
944 1993
945/* 1994out_overflow:
946 * Decode a CREATE response 1995 print_overflow_msg(__func__, xdr);
947 */ 1996 return -EAGAIN;
948static int 1997out_truncated:
949nfs3_xdr_createres(struct rpc_rqst *req, __be32 *p, struct nfs3_diropres *res) 1998 dprintk("NFS: directory entry contains invalid file handle\n");
950{ 1999 *entry = old;
951 int status; 2000 return -EAGAIN;
952
953 status = ntohl(*p++);
954 if (status == 0) {
955 if (*p++) {
956 if (!(p = xdr_decode_fhandle(p, res->fh)))
957 return -errno_NFSERR_IO;
958 p = xdr_decode_post_op_attr(p, res->fattr);
959 } else {
960 memset(res->fh, 0, sizeof(*res->fh));
961 /* Do decode post_op_attr but set it to NULL */
962 p = xdr_decode_post_op_attr(p, res->fattr);
963 res->fattr->valid = 0;
964 }
965 } else {
966 status = nfs_stat_to_errno(status);
967 }
968 p = xdr_decode_wcc_data(p, res->dir_attr);
969 return status;
970} 2001}
971 2002
972/* 2003/*
973 * Decode RENAME reply 2004 * 3.3.16 READDIR3res
2005 *
2006 * struct dirlist3 {
2007 * entry3 *entries;
2008 * bool eof;
2009 * };
2010 *
2011 * struct READDIR3resok {
2012 * post_op_attr dir_attributes;
2013 * cookieverf3 cookieverf;
2014 * dirlist3 reply;
2015 * };
2016 *
2017 * struct READDIR3resfail {
2018 * post_op_attr dir_attributes;
2019 * };
2020 *
2021 * union READDIR3res switch (nfsstat3 status) {
2022 * case NFS3_OK:
2023 * READDIR3resok resok;
2024 * default:
2025 * READDIR3resfail resfail;
2026 * };
2027 *
2028 * Read the directory contents into the page cache, but otherwise
2029 * don't touch them. The actual decoding is done by nfs3_decode_entry()
2030 * during subsequent nfs_readdir() calls.
974 */ 2031 */
975static int 2032static int decode_dirlist3(struct xdr_stream *xdr)
976nfs3_xdr_renameres(struct rpc_rqst *req, __be32 *p, struct nfs_renameres *res)
977{ 2033{
978 int status; 2034 u32 recvd, pglen;
2035 size_t hdrlen;
979 2036
980 if ((status = ntohl(*p++)) != 0) 2037 pglen = xdr->buf->page_len;
981 status = nfs_stat_to_errno(status); 2038 hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
982 p = xdr_decode_wcc_data(p, res->old_fattr); 2039 recvd = xdr->buf->len - hdrlen;
983 p = xdr_decode_wcc_data(p, res->new_fattr); 2040 if (unlikely(pglen > recvd))
984 return status; 2041 goto out_cheating;
2042out:
2043 xdr_read_pages(xdr, pglen);
2044 return pglen;
2045out_cheating:
2046 dprintk("NFS: server cheating in readdir result: "
2047 "pglen %u > recvd %u\n", pglen, recvd);
2048 pglen = recvd;
2049 goto out;
985} 2050}
986 2051
987/* 2052static int decode_readdir3resok(struct xdr_stream *xdr,
988 * Decode LINK reply 2053 struct nfs3_readdirres *result)
989 */
990static int
991nfs3_xdr_linkres(struct rpc_rqst *req, __be32 *p, struct nfs3_linkres *res)
992{ 2054{
993 int status; 2055 int error;
2056
2057 error = decode_post_op_attr(xdr, result->dir_attr);
2058 if (unlikely(error))
2059 goto out;
2060 /* XXX: do we need to check if result->verf != NULL ? */
2061 error = decode_cookieverf3(xdr, result->verf);
2062 if (unlikely(error))
2063 goto out;
2064 error = decode_dirlist3(xdr);
2065out:
2066 return error;
2067}
994 2068
995 if ((status = ntohl(*p++)) != 0) 2069static int nfs3_xdr_dec_readdir3res(struct rpc_rqst *req,
996 status = nfs_stat_to_errno(status); 2070 struct xdr_stream *xdr,
997 p = xdr_decode_post_op_attr(p, res->fattr); 2071 struct nfs3_readdirres *result)
998 p = xdr_decode_wcc_data(p, res->dir_attr); 2072{
999 return status; 2073 enum nfs_stat status;
2074 int error;
2075
2076 error = decode_nfsstat3(xdr, &status);
2077 if (unlikely(error))
2078 goto out;
2079 if (status != NFS3_OK)
2080 goto out_default;
2081 error = decode_readdir3resok(xdr, result);
2082out:
2083 return error;
2084out_default:
2085 error = decode_post_op_attr(xdr, result->dir_attr);
2086 if (unlikely(error))
2087 goto out;
2088 return nfs_stat_to_errno(status);
1000} 2089}
1001 2090
1002/* 2091/*
1003 * Decode FSSTAT reply 2092 * 3.3.18 FSSTAT3res
2093 *
2094 * struct FSSTAT3resok {
2095 * post_op_attr obj_attributes;
2096 * size3 tbytes;
2097 * size3 fbytes;
2098 * size3 abytes;
2099 * size3 tfiles;
2100 * size3 ffiles;
2101 * size3 afiles;
2102 * uint32 invarsec;
2103 * };
2104 *
2105 * struct FSSTAT3resfail {
2106 * post_op_attr obj_attributes;
2107 * };
2108 *
2109 * union FSSTAT3res switch (nfsstat3 status) {
2110 * case NFS3_OK:
2111 * FSSTAT3resok resok;
2112 * default:
2113 * FSSTAT3resfail resfail;
2114 * };
1004 */ 2115 */
1005static int 2116static int decode_fsstat3resok(struct xdr_stream *xdr,
1006nfs3_xdr_fsstatres(struct rpc_rqst *req, __be32 *p, struct nfs_fsstat *res) 2117 struct nfs_fsstat *result)
1007{ 2118{
1008 int status; 2119 __be32 *p;
1009
1010 status = ntohl(*p++);
1011
1012 p = xdr_decode_post_op_attr(p, res->fattr);
1013 if (status != 0)
1014 return nfs_stat_to_errno(status);
1015
1016 p = xdr_decode_hyper(p, &res->tbytes);
1017 p = xdr_decode_hyper(p, &res->fbytes);
1018 p = xdr_decode_hyper(p, &res->abytes);
1019 p = xdr_decode_hyper(p, &res->tfiles);
1020 p = xdr_decode_hyper(p, &res->ffiles);
1021 p = xdr_decode_hyper(p, &res->afiles);
1022 2120
2121 p = xdr_inline_decode(xdr, 8 * 6 + 4);
2122 if (unlikely(p == NULL))
2123 goto out_overflow;
2124 p = xdr_decode_size3(p, &result->tbytes);
2125 p = xdr_decode_size3(p, &result->fbytes);
2126 p = xdr_decode_size3(p, &result->abytes);
2127 p = xdr_decode_size3(p, &result->tfiles);
2128 p = xdr_decode_size3(p, &result->ffiles);
2129 xdr_decode_size3(p, &result->afiles);
1023 /* ignore invarsec */ 2130 /* ignore invarsec */
1024 return 0; 2131 return 0;
2132out_overflow:
2133 print_overflow_msg(__func__, xdr);
2134 return -EIO;
2135}
2136
2137static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req,
2138 struct xdr_stream *xdr,
2139 struct nfs_fsstat *result)
2140{
2141 enum nfs_stat status;
2142 int error;
2143
2144 error = decode_nfsstat3(xdr, &status);
2145 if (unlikely(error))
2146 goto out;
2147 error = decode_post_op_attr(xdr, result->fattr);
2148 if (unlikely(error))
2149 goto out;
2150 if (status != NFS3_OK)
2151 goto out_status;
2152 error = decode_fsstat3resok(xdr, result);
2153out:
2154 return error;
2155out_status:
2156 return nfs_stat_to_errno(status);
1025} 2157}
1026 2158
1027/* 2159/*
1028 * Decode FSINFO reply 2160 * 3.3.19 FSINFO3res
2161 *
2162 * struct FSINFO3resok {
2163 * post_op_attr obj_attributes;
2164 * uint32 rtmax;
2165 * uint32 rtpref;
2166 * uint32 rtmult;
2167 * uint32 wtmax;
2168 * uint32 wtpref;
2169 * uint32 wtmult;
2170 * uint32 dtpref;
2171 * size3 maxfilesize;
2172 * nfstime3 time_delta;
2173 * uint32 properties;
2174 * };
2175 *
2176 * struct FSINFO3resfail {
2177 * post_op_attr obj_attributes;
2178 * };
2179 *
2180 * union FSINFO3res switch (nfsstat3 status) {
2181 * case NFS3_OK:
2182 * FSINFO3resok resok;
2183 * default:
2184 * FSINFO3resfail resfail;
2185 * };
1029 */ 2186 */
1030static int 2187static int decode_fsinfo3resok(struct xdr_stream *xdr,
1031nfs3_xdr_fsinfores(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *res) 2188 struct nfs_fsinfo *result)
1032{ 2189{
1033 int status; 2190 __be32 *p;
1034
1035 status = ntohl(*p++);
1036
1037 p = xdr_decode_post_op_attr(p, res->fattr);
1038 if (status != 0)
1039 return nfs_stat_to_errno(status);
1040 2191
1041 res->rtmax = ntohl(*p++); 2192 p = xdr_inline_decode(xdr, 4 * 7 + 8 + 8 + 4);
1042 res->rtpref = ntohl(*p++); 2193 if (unlikely(p == NULL))
1043 res->rtmult = ntohl(*p++); 2194 goto out_overflow;
1044 res->wtmax = ntohl(*p++); 2195 result->rtmax = be32_to_cpup(p++);
1045 res->wtpref = ntohl(*p++); 2196 result->rtpref = be32_to_cpup(p++);
1046 res->wtmult = ntohl(*p++); 2197 result->rtmult = be32_to_cpup(p++);
1047 res->dtpref = ntohl(*p++); 2198 result->wtmax = be32_to_cpup(p++);
1048 p = xdr_decode_hyper(p, &res->maxfilesize); 2199 result->wtpref = be32_to_cpup(p++);
1049 p = xdr_decode_time3(p, &res->time_delta); 2200 result->wtmult = be32_to_cpup(p++);
2201 result->dtpref = be32_to_cpup(p++);
2202 p = xdr_decode_size3(p, &result->maxfilesize);
2203 xdr_decode_nfstime3(p, &result->time_delta);
1050 2204
1051 /* ignore properties */ 2205 /* ignore properties */
1052 res->lease_time = 0; 2206 result->lease_time = 0;
1053 return 0; 2207 return 0;
2208out_overflow:
2209 print_overflow_msg(__func__, xdr);
2210 return -EIO;
2211}
2212
2213static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req,
2214 struct xdr_stream *xdr,
2215 struct nfs_fsinfo *result)
2216{
2217 enum nfs_stat status;
2218 int error;
2219
2220 error = decode_nfsstat3(xdr, &status);
2221 if (unlikely(error))
2222 goto out;
2223 error = decode_post_op_attr(xdr, result->fattr);
2224 if (unlikely(error))
2225 goto out;
2226 if (status != NFS3_OK)
2227 goto out_status;
2228 error = decode_fsinfo3resok(xdr, result);
2229out:
2230 return error;
2231out_status:
2232 return nfs_stat_to_errno(status);
1054} 2233}
1055 2234
1056/* 2235/*
1057 * Decode PATHCONF reply 2236 * 3.3.20 PATHCONF3res
2237 *
2238 * struct PATHCONF3resok {
2239 * post_op_attr obj_attributes;
2240 * uint32 linkmax;
2241 * uint32 name_max;
2242 * bool no_trunc;
2243 * bool chown_restricted;
2244 * bool case_insensitive;
2245 * bool case_preserving;
2246 * };
2247 *
2248 * struct PATHCONF3resfail {
2249 * post_op_attr obj_attributes;
2250 * };
2251 *
2252 * union PATHCONF3res switch (nfsstat3 status) {
2253 * case NFS3_OK:
2254 * PATHCONF3resok resok;
2255 * default:
2256 * PATHCONF3resfail resfail;
2257 * };
1058 */ 2258 */
1059static int 2259static int decode_pathconf3resok(struct xdr_stream *xdr,
1060nfs3_xdr_pathconfres(struct rpc_rqst *req, __be32 *p, struct nfs_pathconf *res) 2260 struct nfs_pathconf *result)
1061{ 2261{
1062 int status; 2262 __be32 *p;
1063
1064 status = ntohl(*p++);
1065
1066 p = xdr_decode_post_op_attr(p, res->fattr);
1067 if (status != 0)
1068 return nfs_stat_to_errno(status);
1069 res->max_link = ntohl(*p++);
1070 res->max_namelen = ntohl(*p++);
1071 2263
2264 p = xdr_inline_decode(xdr, 4 * 6);
2265 if (unlikely(p == NULL))
2266 goto out_overflow;
2267 result->max_link = be32_to_cpup(p++);
2268 result->max_namelen = be32_to_cpup(p);
1072 /* ignore remaining fields */ 2269 /* ignore remaining fields */
1073 return 0; 2270 return 0;
2271out_overflow:
2272 print_overflow_msg(__func__, xdr);
2273 return -EIO;
2274}
2275
2276static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req,
2277 struct xdr_stream *xdr,
2278 struct nfs_pathconf *result)
2279{
2280 enum nfs_stat status;
2281 int error;
2282
2283 error = decode_nfsstat3(xdr, &status);
2284 if (unlikely(error))
2285 goto out;
2286 error = decode_post_op_attr(xdr, result->fattr);
2287 if (unlikely(error))
2288 goto out;
2289 if (status != NFS3_OK)
2290 goto out_status;
2291 error = decode_pathconf3resok(xdr, result);
2292out:
2293 return error;
2294out_status:
2295 return nfs_stat_to_errno(status);
1074} 2296}
1075 2297
1076/* 2298/*
1077 * Decode COMMIT reply 2299 * 3.3.21 COMMIT3res
2300 *
2301 * struct COMMIT3resok {
2302 * wcc_data file_wcc;
2303 * writeverf3 verf;
2304 * };
2305 *
2306 * struct COMMIT3resfail {
2307 * wcc_data file_wcc;
2308 * };
2309 *
2310 * union COMMIT3res switch (nfsstat3 status) {
2311 * case NFS3_OK:
2312 * COMMIT3resok resok;
2313 * default:
2314 * COMMIT3resfail resfail;
2315 * };
1078 */ 2316 */
1079static int 2317static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
1080nfs3_xdr_commitres(struct rpc_rqst *req, __be32 *p, struct nfs_writeres *res) 2318 struct xdr_stream *xdr,
2319 struct nfs_writeres *result)
1081{ 2320{
1082 int status; 2321 enum nfs_stat status;
1083 2322 int error;
1084 status = ntohl(*p++); 2323
1085 p = xdr_decode_wcc_data(p, res->fattr); 2324 error = decode_nfsstat3(xdr, &status);
1086 if (status != 0) 2325 if (unlikely(error))
1087 return nfs_stat_to_errno(status); 2326 goto out;
1088 2327 error = decode_wcc_data(xdr, result->fattr);
1089 res->verf->verifier[0] = *p++; 2328 if (unlikely(error))
1090 res->verf->verifier[1] = *p++; 2329 goto out;
1091 return 0; 2330 if (status != NFS3_OK)
2331 goto out_status;
2332 error = decode_writeverf3(xdr, result->verf->verifier);
2333out:
2334 return error;
2335out_status:
2336 return nfs_stat_to_errno(status);
1092} 2337}
1093 2338
1094#ifdef CONFIG_NFS_V3_ACL 2339#ifdef CONFIG_NFS_V3_ACL
1095/* 2340
1096 * Decode GETACL reply 2341static inline int decode_getacl3resok(struct xdr_stream *xdr,
1097 */ 2342 struct nfs3_getaclres *result)
1098static int
1099nfs3_xdr_getaclres(struct rpc_rqst *req, __be32 *p,
1100 struct nfs3_getaclres *res)
1101{ 2343{
1102 struct xdr_buf *buf = &req->rq_rcv_buf;
1103 int status = ntohl(*p++);
1104 struct posix_acl **acl; 2344 struct posix_acl **acl;
1105 unsigned int *aclcnt; 2345 unsigned int *aclcnt;
1106 int err, base; 2346 size_t hdrlen;
1107 2347 int error;
1108 if (status != 0) 2348
1109 return nfs_stat_to_errno(status); 2349 error = decode_post_op_attr(xdr, result->fattr);
1110 p = xdr_decode_post_op_attr(p, res->fattr); 2350 if (unlikely(error))
1111 res->mask = ntohl(*p++); 2351 goto out;
1112 if (res->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) 2352 error = decode_uint32(xdr, &result->mask);
1113 return -EINVAL; 2353 if (unlikely(error))
1114 base = (char *)p - (char *)req->rq_rcv_buf.head->iov_base; 2354 goto out;
1115 2355 error = -EINVAL;
1116 acl = (res->mask & NFS_ACL) ? &res->acl_access : NULL; 2356 if (result->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
1117 aclcnt = (res->mask & NFS_ACLCNT) ? &res->acl_access_count : NULL; 2357 goto out;
1118 err = nfsacl_decode(buf, base, aclcnt, acl); 2358
1119 2359 hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
1120 acl = (res->mask & NFS_DFACL) ? &res->acl_default : NULL; 2360
1121 aclcnt = (res->mask & NFS_DFACLCNT) ? &res->acl_default_count : NULL; 2361 acl = NULL;
1122 if (err > 0) 2362 if (result->mask & NFS_ACL)
1123 err = nfsacl_decode(buf, base + err, aclcnt, acl); 2363 acl = &result->acl_access;
1124 return (err > 0) ? 0 : err; 2364 aclcnt = NULL;
2365 if (result->mask & NFS_ACLCNT)
2366 aclcnt = &result->acl_access_count;
2367 error = nfsacl_decode(xdr->buf, hdrlen, aclcnt, acl);
2368 if (unlikely(error <= 0))
2369 goto out;
2370
2371 acl = NULL;
2372 if (result->mask & NFS_DFACL)
2373 acl = &result->acl_default;
2374 aclcnt = NULL;
2375 if (result->mask & NFS_DFACLCNT)
2376 aclcnt = &result->acl_default_count;
2377 error = nfsacl_decode(xdr->buf, hdrlen + error, aclcnt, acl);
2378 if (unlikely(error <= 0))
2379 return error;
2380 error = 0;
2381out:
2382 return error;
1125} 2383}
1126 2384
1127/* 2385static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req,
1128 * Decode setacl reply. 2386 struct xdr_stream *xdr,
1129 */ 2387 struct nfs3_getaclres *result)
1130static int
1131nfs3_xdr_setaclres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
1132{ 2388{
1133 int status = ntohl(*p++); 2389 enum nfs_stat status;
2390 int error;
2391
2392 error = decode_nfsstat3(xdr, &status);
2393 if (unlikely(error))
2394 goto out;
2395 if (status != NFS3_OK)
2396 goto out_default;
2397 error = decode_getacl3resok(xdr, result);
2398out:
2399 return error;
2400out_default:
2401 return nfs_stat_to_errno(status);
2402}
1134 2403
1135 if (status) 2404static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
1136 return nfs_stat_to_errno(status); 2405 struct xdr_stream *xdr,
1137 xdr_decode_post_op_attr(p, fattr); 2406 struct nfs_fattr *result)
1138 return 0; 2407{
2408 enum nfs_stat status;
2409 int error;
2410
2411 error = decode_nfsstat3(xdr, &status);
2412 if (unlikely(error))
2413 goto out;
2414 if (status != NFS3_OK)
2415 goto out_default;
2416 error = decode_post_op_attr(xdr, result);
2417out:
2418 return error;
2419out_default:
2420 return nfs_stat_to_errno(status);
1139} 2421}
2422
1140#endif /* CONFIG_NFS_V3_ACL */ 2423#endif /* CONFIG_NFS_V3_ACL */
1141 2424
1142#define PROC(proc, argtype, restype, timer) \ 2425#define PROC(proc, argtype, restype, timer) \
1143[NFS3PROC_##proc] = { \ 2426[NFS3PROC_##proc] = { \
1144 .p_proc = NFS3PROC_##proc, \ 2427 .p_proc = NFS3PROC_##proc, \
1145 .p_encode = (kxdrproc_t) nfs3_xdr_##argtype, \ 2428 .p_encode = (kxdreproc_t)nfs3_xdr_enc_##argtype##3args, \
1146 .p_decode = (kxdrproc_t) nfs3_xdr_##restype, \ 2429 .p_decode = (kxdrdproc_t)nfs3_xdr_dec_##restype##3res, \
1147 .p_arglen = NFS3_##argtype##_sz, \ 2430 .p_arglen = NFS3_##argtype##args_sz, \
1148 .p_replen = NFS3_##restype##_sz, \ 2431 .p_replen = NFS3_##restype##res_sz, \
1149 .p_timer = timer, \ 2432 .p_timer = timer, \
1150 .p_statidx = NFS3PROC_##proc, \ 2433 .p_statidx = NFS3PROC_##proc, \
1151 .p_name = #proc, \ 2434 .p_name = #proc, \
1152 } 2435 }
1153 2436
1154struct rpc_procinfo nfs3_procedures[] = { 2437struct rpc_procinfo nfs3_procedures[] = {
1155 PROC(GETATTR, fhandle, attrstat, 1), 2438 PROC(GETATTR, getattr, getattr, 1),
1156 PROC(SETATTR, sattrargs, wccstat, 0), 2439 PROC(SETATTR, setattr, setattr, 0),
1157 PROC(LOOKUP, diropargs, lookupres, 2), 2440 PROC(LOOKUP, lookup, lookup, 2),
1158 PROC(ACCESS, accessargs, accessres, 1), 2441 PROC(ACCESS, access, access, 1),
1159 PROC(READLINK, readlinkargs, readlinkres, 3), 2442 PROC(READLINK, readlink, readlink, 3),
1160 PROC(READ, readargs, readres, 3), 2443 PROC(READ, read, read, 3),
1161 PROC(WRITE, writeargs, writeres, 4), 2444 PROC(WRITE, write, write, 4),
1162 PROC(CREATE, createargs, createres, 0), 2445 PROC(CREATE, create, create, 0),
1163 PROC(MKDIR, mkdirargs, createres, 0), 2446 PROC(MKDIR, mkdir, create, 0),
1164 PROC(SYMLINK, symlinkargs, createres, 0), 2447 PROC(SYMLINK, symlink, create, 0),
1165 PROC(MKNOD, mknodargs, createres, 0), 2448 PROC(MKNOD, mknod, create, 0),
1166 PROC(REMOVE, removeargs, removeres, 0), 2449 PROC(REMOVE, remove, remove, 0),
1167 PROC(RMDIR, diropargs, wccstat, 0), 2450 PROC(RMDIR, lookup, setattr, 0),
1168 PROC(RENAME, renameargs, renameres, 0), 2451 PROC(RENAME, rename, rename, 0),
1169 PROC(LINK, linkargs, linkres, 0), 2452 PROC(LINK, link, link, 0),
1170 PROC(READDIR, readdirargs, readdirres, 3), 2453 PROC(READDIR, readdir, readdir, 3),
1171 PROC(READDIRPLUS, readdirargs, readdirres, 3), 2454 PROC(READDIRPLUS, readdirplus, readdir, 3),
1172 PROC(FSSTAT, fhandle, fsstatres, 0), 2455 PROC(FSSTAT, getattr, fsstat, 0),
1173 PROC(FSINFO, fhandle, fsinfores, 0), 2456 PROC(FSINFO, getattr, fsinfo, 0),
1174 PROC(PATHCONF, fhandle, pathconfres, 0), 2457 PROC(PATHCONF, getattr, pathconf, 0),
1175 PROC(COMMIT, commitargs, commitres, 5), 2458 PROC(COMMIT, commit, commit, 5),
1176}; 2459};
1177 2460
1178struct rpc_version nfs_version3 = { 2461struct rpc_version nfs_version3 = {
@@ -1185,8 +2468,8 @@ struct rpc_version nfs_version3 = {
1185static struct rpc_procinfo nfs3_acl_procedures[] = { 2468static struct rpc_procinfo nfs3_acl_procedures[] = {
1186 [ACLPROC3_GETACL] = { 2469 [ACLPROC3_GETACL] = {
1187 .p_proc = ACLPROC3_GETACL, 2470 .p_proc = ACLPROC3_GETACL,
1188 .p_encode = (kxdrproc_t) nfs3_xdr_getaclargs, 2471 .p_encode = (kxdreproc_t)nfs3_xdr_enc_getacl3args,
1189 .p_decode = (kxdrproc_t) nfs3_xdr_getaclres, 2472 .p_decode = (kxdrdproc_t)nfs3_xdr_dec_getacl3res,
1190 .p_arglen = ACL3_getaclargs_sz, 2473 .p_arglen = ACL3_getaclargs_sz,
1191 .p_replen = ACL3_getaclres_sz, 2474 .p_replen = ACL3_getaclres_sz,
1192 .p_timer = 1, 2475 .p_timer = 1,
@@ -1194,8 +2477,8 @@ static struct rpc_procinfo nfs3_acl_procedures[] = {
1194 }, 2477 },
1195 [ACLPROC3_SETACL] = { 2478 [ACLPROC3_SETACL] = {
1196 .p_proc = ACLPROC3_SETACL, 2479 .p_proc = ACLPROC3_SETACL,
1197 .p_encode = (kxdrproc_t) nfs3_xdr_setaclargs, 2480 .p_encode = (kxdreproc_t)nfs3_xdr_enc_setacl3args,
1198 .p_decode = (kxdrproc_t) nfs3_xdr_setaclres, 2481 .p_decode = (kxdrdproc_t)nfs3_xdr_dec_setacl3res,
1199 .p_arglen = ACL3_setaclargs_sz, 2482 .p_arglen = ACL3_setaclargs_sz,
1200 .p_replen = ACL3_setaclres_sz, 2483 .p_replen = ACL3_setaclres_sz,
1201 .p_timer = 0, 2484 .p_timer = 0,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 9fa496387fdf..7a7474073148 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -44,6 +44,7 @@ enum nfs4_client_state {
44 NFS4CLNT_RECLAIM_REBOOT, 44 NFS4CLNT_RECLAIM_REBOOT,
45 NFS4CLNT_RECLAIM_NOGRACE, 45 NFS4CLNT_RECLAIM_NOGRACE,
46 NFS4CLNT_DELEGRETURN, 46 NFS4CLNT_DELEGRETURN,
47 NFS4CLNT_LAYOUTRECALL,
47 NFS4CLNT_SESSION_RESET, 48 NFS4CLNT_SESSION_RESET,
48 NFS4CLNT_RECALL_SLOT, 49 NFS4CLNT_RECALL_SLOT,
49}; 50};
@@ -109,7 +110,7 @@ struct nfs_unique_id {
109struct nfs4_state_owner { 110struct nfs4_state_owner {
110 struct nfs_unique_id so_owner_id; 111 struct nfs_unique_id so_owner_id;
111 struct nfs_server *so_server; 112 struct nfs_server *so_server;
112 struct rb_node so_client_node; 113 struct rb_node so_server_node;
113 114
114 struct rpc_cred *so_cred; /* Associated cred */ 115 struct rpc_cred *so_cred; /* Associated cred */
115 116
@@ -227,12 +228,6 @@ struct nfs4_state_maintenance_ops {
227extern const struct dentry_operations nfs4_dentry_operations; 228extern const struct dentry_operations nfs4_dentry_operations;
228extern const struct inode_operations nfs4_dir_inode_operations; 229extern const struct inode_operations nfs4_dir_inode_operations;
229 230
230/* inode.c */
231extern ssize_t nfs4_getxattr(struct dentry *, const char *, void *, size_t);
232extern int nfs4_setxattr(struct dentry *, const char *, const void *, size_t, int);
233extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t);
234
235
236/* nfs4proc.c */ 231/* nfs4proc.c */
237extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); 232extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
238extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); 233extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
@@ -241,11 +236,12 @@ extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
241extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); 236extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
242extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); 237extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
243extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); 238extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
244extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait); 239extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc);
245extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); 240extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
246extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, 241extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
247 struct nfs4_fs_locations *fs_locations, struct page *page); 242 struct nfs4_fs_locations *fs_locations, struct page *page);
248extern void nfs4_release_lockowner(const struct nfs4_lock_state *); 243extern void nfs4_release_lockowner(const struct nfs4_lock_state *);
244extern const struct xattr_handler *nfs4_xattr_handlers[];
249 245
250#if defined(CONFIG_NFS_V4_1) 246#if defined(CONFIG_NFS_V4_1)
251static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server) 247static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
@@ -331,7 +327,6 @@ extern void nfs_free_seqid(struct nfs_seqid *seqid);
331extern const nfs4_stateid zero_stateid; 327extern const nfs4_stateid zero_stateid;
332 328
333/* nfs4xdr.c */ 329/* nfs4xdr.c */
334extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
335extern struct rpc_procinfo nfs4_procedures[]; 330extern struct rpc_procinfo nfs4_procedures[];
336 331
337struct nfs4_mount_data; 332struct nfs4_mount_data;
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 2e92f0d8d654..23f930caf1e2 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -82,7 +82,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
82{ 82{
83 struct nfs4_file_layout_dsaddr *dsaddr; 83 struct nfs4_file_layout_dsaddr *dsaddr;
84 int status = -EINVAL; 84 int status = -EINVAL;
85 struct nfs_server *nfss = NFS_SERVER(lo->inode); 85 struct nfs_server *nfss = NFS_SERVER(lo->plh_inode);
86 86
87 dprintk("--> %s\n", __func__); 87 dprintk("--> %s\n", __func__);
88 88
@@ -101,7 +101,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
101 /* find and reference the deviceid */ 101 /* find and reference the deviceid */
102 dsaddr = nfs4_fl_find_get_deviceid(nfss->nfs_client, id); 102 dsaddr = nfs4_fl_find_get_deviceid(nfss->nfs_client, id);
103 if (dsaddr == NULL) { 103 if (dsaddr == NULL) {
104 dsaddr = get_device_info(lo->inode, id); 104 dsaddr = get_device_info(lo->plh_inode, id);
105 if (dsaddr == NULL) 105 if (dsaddr == NULL)
106 goto out; 106 goto out;
107 } 107 }
@@ -243,7 +243,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
243static void 243static void
244filelayout_free_lseg(struct pnfs_layout_segment *lseg) 244filelayout_free_lseg(struct pnfs_layout_segment *lseg)
245{ 245{
246 struct nfs_server *nfss = NFS_SERVER(lseg->layout->inode); 246 struct nfs_server *nfss = NFS_SERVER(lseg->pls_layout->plh_inode);
247 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); 247 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
248 248
249 dprintk("--> %s\n", __func__); 249 dprintk("--> %s\n", __func__);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4435e5e1f904..9d992b0346e3 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -49,6 +49,7 @@
49#include <linux/mount.h> 49#include <linux/mount.h>
50#include <linux/module.h> 50#include <linux/module.h>
51#include <linux/sunrpc/bc_xprt.h> 51#include <linux/sunrpc/bc_xprt.h>
52#include <linux/xattr.h>
52 53
53#include "nfs4_fs.h" 54#include "nfs4_fs.h"
54#include "delegation.h" 55#include "delegation.h"
@@ -355,9 +356,9 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *free_slot)
355} 356}
356 357
357/* 358/*
358 * Signal state manager thread if session is drained 359 * Signal state manager thread if session fore channel is drained
359 */ 360 */
360static void nfs41_check_drain_session_complete(struct nfs4_session *ses) 361static void nfs4_check_drain_fc_complete(struct nfs4_session *ses)
361{ 362{
362 struct rpc_task *task; 363 struct rpc_task *task;
363 364
@@ -371,8 +372,20 @@ static void nfs41_check_drain_session_complete(struct nfs4_session *ses)
371 if (ses->fc_slot_table.highest_used_slotid != -1) 372 if (ses->fc_slot_table.highest_used_slotid != -1)
372 return; 373 return;
373 374
374 dprintk("%s COMPLETE: Session Drained\n", __func__); 375 dprintk("%s COMPLETE: Session Fore Channel Drained\n", __func__);
375 complete(&ses->complete); 376 complete(&ses->fc_slot_table.complete);
377}
378
379/*
380 * Signal state manager thread if session back channel is drained
381 */
382void nfs4_check_drain_bc_complete(struct nfs4_session *ses)
383{
384 if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state) ||
385 ses->bc_slot_table.highest_used_slotid != -1)
386 return;
387 dprintk("%s COMPLETE: Session Back Channel Drained\n", __func__);
388 complete(&ses->bc_slot_table.complete);
376} 389}
377 390
378static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) 391static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
@@ -389,7 +402,7 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
389 402
390 spin_lock(&tbl->slot_tbl_lock); 403 spin_lock(&tbl->slot_tbl_lock);
391 nfs4_free_slot(tbl, res->sr_slot); 404 nfs4_free_slot(tbl, res->sr_slot);
392 nfs41_check_drain_session_complete(res->sr_session); 405 nfs4_check_drain_fc_complete(res->sr_session);
393 spin_unlock(&tbl->slot_tbl_lock); 406 spin_unlock(&tbl->slot_tbl_lock);
394 res->sr_slot = NULL; 407 res->sr_slot = NULL;
395} 408}
@@ -1826,6 +1839,8 @@ struct nfs4_closedata {
1826 struct nfs_closeres res; 1839 struct nfs_closeres res;
1827 struct nfs_fattr fattr; 1840 struct nfs_fattr fattr;
1828 unsigned long timestamp; 1841 unsigned long timestamp;
1842 bool roc;
1843 u32 roc_barrier;
1829}; 1844};
1830 1845
1831static void nfs4_free_closedata(void *data) 1846static void nfs4_free_closedata(void *data)
@@ -1833,6 +1848,8 @@ static void nfs4_free_closedata(void *data)
1833 struct nfs4_closedata *calldata = data; 1848 struct nfs4_closedata *calldata = data;
1834 struct nfs4_state_owner *sp = calldata->state->owner; 1849 struct nfs4_state_owner *sp = calldata->state->owner;
1835 1850
1851 if (calldata->roc)
1852 pnfs_roc_release(calldata->state->inode);
1836 nfs4_put_open_state(calldata->state); 1853 nfs4_put_open_state(calldata->state);
1837 nfs_free_seqid(calldata->arg.seqid); 1854 nfs_free_seqid(calldata->arg.seqid);
1838 nfs4_put_state_owner(sp); 1855 nfs4_put_state_owner(sp);
@@ -1865,6 +1882,9 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
1865 */ 1882 */
1866 switch (task->tk_status) { 1883 switch (task->tk_status) {
1867 case 0: 1884 case 0:
1885 if (calldata->roc)
1886 pnfs_roc_set_barrier(state->inode,
1887 calldata->roc_barrier);
1868 nfs_set_open_stateid(state, &calldata->res.stateid, 0); 1888 nfs_set_open_stateid(state, &calldata->res.stateid, 0);
1869 renew_lease(server, calldata->timestamp); 1889 renew_lease(server, calldata->timestamp);
1870 nfs4_close_clear_stateid_flags(state, 1890 nfs4_close_clear_stateid_flags(state,
@@ -1917,8 +1937,15 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
1917 return; 1937 return;
1918 } 1938 }
1919 1939
1920 if (calldata->arg.fmode == 0) 1940 if (calldata->arg.fmode == 0) {
1921 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; 1941 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
1942 if (calldata->roc &&
1943 pnfs_roc_drain(calldata->inode, &calldata->roc_barrier)) {
1944 rpc_sleep_on(&NFS_SERVER(calldata->inode)->roc_rpcwaitq,
1945 task, NULL);
1946 return;
1947 }
1948 }
1922 1949
1923 nfs_fattr_init(calldata->res.fattr); 1950 nfs_fattr_init(calldata->res.fattr);
1924 calldata->timestamp = jiffies; 1951 calldata->timestamp = jiffies;
@@ -1946,7 +1973,7 @@ static const struct rpc_call_ops nfs4_close_ops = {
1946 * 1973 *
1947 * NOTE: Caller must be holding the sp->so_owner semaphore! 1974 * NOTE: Caller must be holding the sp->so_owner semaphore!
1948 */ 1975 */
1949int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait) 1976int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc)
1950{ 1977{
1951 struct nfs_server *server = NFS_SERVER(state->inode); 1978 struct nfs_server *server = NFS_SERVER(state->inode);
1952 struct nfs4_closedata *calldata; 1979 struct nfs4_closedata *calldata;
@@ -1981,11 +2008,12 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, i
1981 calldata->res.fattr = &calldata->fattr; 2008 calldata->res.fattr = &calldata->fattr;
1982 calldata->res.seqid = calldata->arg.seqid; 2009 calldata->res.seqid = calldata->arg.seqid;
1983 calldata->res.server = server; 2010 calldata->res.server = server;
2011 calldata->roc = roc;
1984 path_get(path); 2012 path_get(path);
1985 calldata->path = *path; 2013 calldata->path = *path;
1986 2014
1987 msg.rpc_argp = &calldata->arg, 2015 msg.rpc_argp = &calldata->arg;
1988 msg.rpc_resp = &calldata->res, 2016 msg.rpc_resp = &calldata->res;
1989 task_setup_data.callback_data = calldata; 2017 task_setup_data.callback_data = calldata;
1990 task = rpc_run_task(&task_setup_data); 2018 task = rpc_run_task(&task_setup_data);
1991 if (IS_ERR(task)) 2019 if (IS_ERR(task))
@@ -1998,6 +2026,8 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, i
1998out_free_calldata: 2026out_free_calldata:
1999 kfree(calldata); 2027 kfree(calldata);
2000out: 2028out:
2029 if (roc)
2030 pnfs_roc_release(state->inode);
2001 nfs4_put_open_state(state); 2031 nfs4_put_open_state(state);
2002 nfs4_put_state_owner(sp); 2032 nfs4_put_state_owner(sp);
2003 return status; 2033 return status;
@@ -2486,6 +2516,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
2486 path = &ctx->path; 2516 path = &ctx->path;
2487 fmode = ctx->mode; 2517 fmode = ctx->mode;
2488 } 2518 }
2519 sattr->ia_mode &= ~current_umask();
2489 state = nfs4_do_open(dir, path, fmode, flags, sattr, cred); 2520 state = nfs4_do_open(dir, path, fmode, flags, sattr, cred);
2490 d_drop(dentry); 2521 d_drop(dentry);
2491 if (IS_ERR(state)) { 2522 if (IS_ERR(state)) {
@@ -2816,6 +2847,8 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
2816{ 2847{
2817 struct nfs4_exception exception = { }; 2848 struct nfs4_exception exception = { };
2818 int err; 2849 int err;
2850
2851 sattr->ia_mode &= ~current_umask();
2819 do { 2852 do {
2820 err = nfs4_handle_exception(NFS_SERVER(dir), 2853 err = nfs4_handle_exception(NFS_SERVER(dir),
2821 _nfs4_proc_mkdir(dir, dentry, sattr), 2854 _nfs4_proc_mkdir(dir, dentry, sattr),
@@ -2916,6 +2949,8 @@ static int nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
2916{ 2949{
2917 struct nfs4_exception exception = { }; 2950 struct nfs4_exception exception = { };
2918 int err; 2951 int err;
2952
2953 sattr->ia_mode &= ~current_umask();
2919 do { 2954 do {
2920 err = nfs4_handle_exception(NFS_SERVER(dir), 2955 err = nfs4_handle_exception(NFS_SERVER(dir),
2921 _nfs4_proc_mknod(dir, dentry, sattr, rdev), 2956 _nfs4_proc_mknod(dir, dentry, sattr, rdev),
@@ -3478,6 +3513,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
3478 struct nfs4_setclientid setclientid = { 3513 struct nfs4_setclientid setclientid = {
3479 .sc_verifier = &sc_verifier, 3514 .sc_verifier = &sc_verifier,
3480 .sc_prog = program, 3515 .sc_prog = program,
3516 .sc_cb_ident = clp->cl_cb_ident,
3481 }; 3517 };
3482 struct rpc_message msg = { 3518 struct rpc_message msg = {
3483 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID], 3519 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID],
@@ -3517,7 +3553,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
3517 if (signalled()) 3553 if (signalled())
3518 break; 3554 break;
3519 if (loop++ & 1) 3555 if (loop++ & 1)
3520 ssleep(clp->cl_lease_time + 1); 3556 ssleep(clp->cl_lease_time / HZ + 1);
3521 else 3557 else
3522 if (++clp->cl_id_uniquifier == 0) 3558 if (++clp->cl_id_uniquifier == 0)
3523 break; 3559 break;
@@ -3663,8 +3699,8 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
3663 data->rpc_status = 0; 3699 data->rpc_status = 0;
3664 3700
3665 task_setup_data.callback_data = data; 3701 task_setup_data.callback_data = data;
3666 msg.rpc_argp = &data->args, 3702 msg.rpc_argp = &data->args;
3667 msg.rpc_resp = &data->res, 3703 msg.rpc_resp = &data->res;
3668 task = rpc_run_task(&task_setup_data); 3704 task = rpc_run_task(&task_setup_data);
3669 if (IS_ERR(task)) 3705 if (IS_ERR(task))
3670 return PTR_ERR(task); 3706 return PTR_ERR(task);
@@ -3743,6 +3779,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
3743 goto out; 3779 goto out;
3744 lsp = request->fl_u.nfs4_fl.owner; 3780 lsp = request->fl_u.nfs4_fl.owner;
3745 arg.lock_owner.id = lsp->ls_id.id; 3781 arg.lock_owner.id = lsp->ls_id.id;
3782 arg.lock_owner.s_dev = server->s_dev;
3746 status = nfs4_call_sync(server, &msg, &arg, &res, 1); 3783 status = nfs4_call_sync(server, &msg, &arg, &res, 1);
3747 switch (status) { 3784 switch (status) {
3748 case 0: 3785 case 0:
@@ -3908,8 +3945,8 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
3908 return ERR_PTR(-ENOMEM); 3945 return ERR_PTR(-ENOMEM);
3909 } 3946 }
3910 3947
3911 msg.rpc_argp = &data->arg, 3948 msg.rpc_argp = &data->arg;
3912 msg.rpc_resp = &data->res, 3949 msg.rpc_resp = &data->res;
3913 task_setup_data.callback_data = data; 3950 task_setup_data.callback_data = data;
3914 return rpc_run_task(&task_setup_data); 3951 return rpc_run_task(&task_setup_data);
3915} 3952}
@@ -3988,6 +4025,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
3988 p->arg.lock_stateid = &lsp->ls_stateid; 4025 p->arg.lock_stateid = &lsp->ls_stateid;
3989 p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; 4026 p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
3990 p->arg.lock_owner.id = lsp->ls_id.id; 4027 p->arg.lock_owner.id = lsp->ls_id.id;
4028 p->arg.lock_owner.s_dev = server->s_dev;
3991 p->res.lock_seqid = p->arg.lock_seqid; 4029 p->res.lock_seqid = p->arg.lock_seqid;
3992 p->lsp = lsp; 4030 p->lsp = lsp;
3993 p->server = server; 4031 p->server = server;
@@ -4145,8 +4183,8 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
4145 data->arg.reclaim = NFS_LOCK_RECLAIM; 4183 data->arg.reclaim = NFS_LOCK_RECLAIM;
4146 task_setup_data.callback_ops = &nfs4_recover_lock_ops; 4184 task_setup_data.callback_ops = &nfs4_recover_lock_ops;
4147 } 4185 }
4148 msg.rpc_argp = &data->arg, 4186 msg.rpc_argp = &data->arg;
4149 msg.rpc_resp = &data->res, 4187 msg.rpc_resp = &data->res;
4150 task_setup_data.callback_data = data; 4188 task_setup_data.callback_data = data;
4151 task = rpc_run_task(&task_setup_data); 4189 task = rpc_run_task(&task_setup_data);
4152 if (IS_ERR(task)) 4190 if (IS_ERR(task))
@@ -4392,48 +4430,43 @@ void nfs4_release_lockowner(const struct nfs4_lock_state *lsp)
4392 return; 4430 return;
4393 args->lock_owner.clientid = server->nfs_client->cl_clientid; 4431 args->lock_owner.clientid = server->nfs_client->cl_clientid;
4394 args->lock_owner.id = lsp->ls_id.id; 4432 args->lock_owner.id = lsp->ls_id.id;
4433 args->lock_owner.s_dev = server->s_dev;
4395 msg.rpc_argp = args; 4434 msg.rpc_argp = args;
4396 rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, args); 4435 rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, args);
4397} 4436}
4398 4437
4399#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" 4438#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
4400 4439
4401int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf, 4440static int nfs4_xattr_set_nfs4_acl(struct dentry *dentry, const char *key,
4402 size_t buflen, int flags) 4441 const void *buf, size_t buflen,
4442 int flags, int type)
4403{ 4443{
4404 struct inode *inode = dentry->d_inode; 4444 if (strcmp(key, "") != 0)
4405 4445 return -EINVAL;
4406 if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0)
4407 return -EOPNOTSUPP;
4408 4446
4409 return nfs4_proc_set_acl(inode, buf, buflen); 4447 return nfs4_proc_set_acl(dentry->d_inode, buf, buflen);
4410} 4448}
4411 4449
4412/* The getxattr man page suggests returning -ENODATA for unknown attributes, 4450static int nfs4_xattr_get_nfs4_acl(struct dentry *dentry, const char *key,
4413 * and that's what we'll do for e.g. user attributes that haven't been set. 4451 void *buf, size_t buflen, int type)
4414 * But we'll follow ext2/ext3's lead by returning -EOPNOTSUPP for unsupported
4415 * attributes in kernel-managed attribute namespaces. */
4416ssize_t nfs4_getxattr(struct dentry *dentry, const char *key, void *buf,
4417 size_t buflen)
4418{ 4452{
4419 struct inode *inode = dentry->d_inode; 4453 if (strcmp(key, "") != 0)
4420 4454 return -EINVAL;
4421 if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0)
4422 return -EOPNOTSUPP;
4423 4455
4424 return nfs4_proc_get_acl(inode, buf, buflen); 4456 return nfs4_proc_get_acl(dentry->d_inode, buf, buflen);
4425} 4457}
4426 4458
4427ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen) 4459static size_t nfs4_xattr_list_nfs4_acl(struct dentry *dentry, char *list,
4460 size_t list_len, const char *name,
4461 size_t name_len, int type)
4428{ 4462{
4429 size_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1; 4463 size_t len = sizeof(XATTR_NAME_NFSV4_ACL);
4430 4464
4431 if (!nfs4_server_supports_acls(NFS_SERVER(dentry->d_inode))) 4465 if (!nfs4_server_supports_acls(NFS_SERVER(dentry->d_inode)))
4432 return 0; 4466 return 0;
4433 if (buf && buflen < len) 4467
4434 return -ERANGE; 4468 if (list && len <= list_len)
4435 if (buf) 4469 memcpy(list, XATTR_NAME_NFSV4_ACL, len);
4436 memcpy(buf, XATTR_NAME_NFSV4_ACL, len);
4437 return len; 4470 return len;
4438} 4471}
4439 4472
@@ -4486,6 +4519,25 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
4486 4519
4487#ifdef CONFIG_NFS_V4_1 4520#ifdef CONFIG_NFS_V4_1
4488/* 4521/*
4522 * Check the exchange flags returned by the server for invalid flags, having
4523 * both PNFS and NON_PNFS flags set, and not having one of NON_PNFS, PNFS, or
4524 * DS flags set.
4525 */
4526static int nfs4_check_cl_exchange_flags(u32 flags)
4527{
4528 if (flags & ~EXCHGID4_FLAG_MASK_R)
4529 goto out_inval;
4530 if ((flags & EXCHGID4_FLAG_USE_PNFS_MDS) &&
4531 (flags & EXCHGID4_FLAG_USE_NON_PNFS))
4532 goto out_inval;
4533 if (!(flags & (EXCHGID4_FLAG_MASK_PNFS)))
4534 goto out_inval;
4535 return NFS_OK;
4536out_inval:
4537 return -NFS4ERR_INVAL;
4538}
4539
4540/*
4489 * nfs4_proc_exchange_id() 4541 * nfs4_proc_exchange_id()
4490 * 4542 *
4491 * Since the clientid has expired, all compounds using sessions 4543 * Since the clientid has expired, all compounds using sessions
@@ -4498,7 +4550,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
4498 nfs4_verifier verifier; 4550 nfs4_verifier verifier;
4499 struct nfs41_exchange_id_args args = { 4551 struct nfs41_exchange_id_args args = {
4500 .client = clp, 4552 .client = clp,
4501 .flags = clp->cl_exchange_flags, 4553 .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER,
4502 }; 4554 };
4503 struct nfs41_exchange_id_res res = { 4555 struct nfs41_exchange_id_res res = {
4504 .client = clp, 4556 .client = clp,
@@ -4515,9 +4567,6 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
4515 dprintk("--> %s\n", __func__); 4567 dprintk("--> %s\n", __func__);
4516 BUG_ON(clp == NULL); 4568 BUG_ON(clp == NULL);
4517 4569
4518 /* Remove server-only flags */
4519 args.flags &= ~EXCHGID4_FLAG_CONFIRMED_R;
4520
4521 p = (u32 *)verifier.data; 4570 p = (u32 *)verifier.data;
4522 *p++ = htonl((u32)clp->cl_boot_time.tv_sec); 4571 *p++ = htonl((u32)clp->cl_boot_time.tv_sec);
4523 *p = htonl((u32)clp->cl_boot_time.tv_nsec); 4572 *p = htonl((u32)clp->cl_boot_time.tv_nsec);
@@ -4543,6 +4592,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
4543 break; 4592 break;
4544 } 4593 }
4545 4594
4595 status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
4546 dprintk("<-- %s status= %d\n", __func__, status); 4596 dprintk("<-- %s status= %d\n", __func__, status);
4547 return status; 4597 return status;
4548} 4598}
@@ -4776,17 +4826,17 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
4776 if (!session) 4826 if (!session)
4777 return NULL; 4827 return NULL;
4778 4828
4779 init_completion(&session->complete);
4780
4781 tbl = &session->fc_slot_table; 4829 tbl = &session->fc_slot_table;
4782 tbl->highest_used_slotid = -1; 4830 tbl->highest_used_slotid = -1;
4783 spin_lock_init(&tbl->slot_tbl_lock); 4831 spin_lock_init(&tbl->slot_tbl_lock);
4784 rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table"); 4832 rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table");
4833 init_completion(&tbl->complete);
4785 4834
4786 tbl = &session->bc_slot_table; 4835 tbl = &session->bc_slot_table;
4787 tbl->highest_used_slotid = -1; 4836 tbl->highest_used_slotid = -1;
4788 spin_lock_init(&tbl->slot_tbl_lock); 4837 spin_lock_init(&tbl->slot_tbl_lock);
4789 rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table"); 4838 rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table");
4839 init_completion(&tbl->complete);
4790 4840
4791 session->session_state = 1<<NFS4_SESSION_INITING; 4841 session->session_state = 1<<NFS4_SESSION_INITING;
4792 4842
@@ -5280,13 +5330,23 @@ static void
5280nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) 5330nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
5281{ 5331{
5282 struct nfs4_layoutget *lgp = calldata; 5332 struct nfs4_layoutget *lgp = calldata;
5283 struct inode *ino = lgp->args.inode; 5333 struct nfs_server *server = NFS_SERVER(lgp->args.inode);
5284 struct nfs_server *server = NFS_SERVER(ino);
5285 5334
5286 dprintk("--> %s\n", __func__); 5335 dprintk("--> %s\n", __func__);
5336 /* Note the is a race here, where a CB_LAYOUTRECALL can come in
5337 * right now covering the LAYOUTGET we are about to send.
5338 * However, that is not so catastrophic, and there seems
5339 * to be no way to prevent it completely.
5340 */
5287 if (nfs4_setup_sequence(server, &lgp->args.seq_args, 5341 if (nfs4_setup_sequence(server, &lgp->args.seq_args,
5288 &lgp->res.seq_res, 0, task)) 5342 &lgp->res.seq_res, 0, task))
5289 return; 5343 return;
5344 if (pnfs_choose_layoutget_stateid(&lgp->args.stateid,
5345 NFS_I(lgp->args.inode)->layout,
5346 lgp->args.ctx->state)) {
5347 rpc_exit(task, NFS4_OK);
5348 return;
5349 }
5290 rpc_call_start(task); 5350 rpc_call_start(task);
5291} 5351}
5292 5352
@@ -5313,7 +5373,6 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
5313 return; 5373 return;
5314 } 5374 }
5315 } 5375 }
5316 lgp->status = task->tk_status;
5317 dprintk("<-- %s\n", __func__); 5376 dprintk("<-- %s\n", __func__);
5318} 5377}
5319 5378
@@ -5322,7 +5381,6 @@ static void nfs4_layoutget_release(void *calldata)
5322 struct nfs4_layoutget *lgp = calldata; 5381 struct nfs4_layoutget *lgp = calldata;
5323 5382
5324 dprintk("--> %s\n", __func__); 5383 dprintk("--> %s\n", __func__);
5325 put_layout_hdr(lgp->args.inode);
5326 if (lgp->res.layout.buf != NULL) 5384 if (lgp->res.layout.buf != NULL)
5327 free_page((unsigned long) lgp->res.layout.buf); 5385 free_page((unsigned long) lgp->res.layout.buf);
5328 put_nfs_open_context(lgp->args.ctx); 5386 put_nfs_open_context(lgp->args.ctx);
@@ -5367,13 +5425,10 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
5367 if (IS_ERR(task)) 5425 if (IS_ERR(task))
5368 return PTR_ERR(task); 5426 return PTR_ERR(task);
5369 status = nfs4_wait_for_completion_rpc_task(task); 5427 status = nfs4_wait_for_completion_rpc_task(task);
5370 if (status != 0) 5428 if (status == 0)
5371 goto out; 5429 status = task->tk_status;
5372 status = lgp->status; 5430 if (status == 0)
5373 if (status != 0) 5431 status = pnfs_layout_process(lgp);
5374 goto out;
5375 status = pnfs_layout_process(lgp);
5376out:
5377 rpc_put_task(task); 5432 rpc_put_task(task);
5378 dprintk("<-- %s status=%d\n", __func__, status); 5433 dprintk("<-- %s status=%d\n", __func__, status);
5379 return status; 5434 return status;
@@ -5504,9 +5559,10 @@ static const struct inode_operations nfs4_file_inode_operations = {
5504 .permission = nfs_permission, 5559 .permission = nfs_permission,
5505 .getattr = nfs_getattr, 5560 .getattr = nfs_getattr,
5506 .setattr = nfs_setattr, 5561 .setattr = nfs_setattr,
5507 .getxattr = nfs4_getxattr, 5562 .getxattr = generic_getxattr,
5508 .setxattr = nfs4_setxattr, 5563 .setxattr = generic_setxattr,
5509 .listxattr = nfs4_listxattr, 5564 .listxattr = generic_listxattr,
5565 .removexattr = generic_removexattr,
5510}; 5566};
5511 5567
5512const struct nfs_rpc_ops nfs_v4_clientops = { 5568const struct nfs_rpc_ops nfs_v4_clientops = {
@@ -5551,6 +5607,18 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
5551 .open_context = nfs4_atomic_open, 5607 .open_context = nfs4_atomic_open,
5552}; 5608};
5553 5609
5610static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
5611 .prefix = XATTR_NAME_NFSV4_ACL,
5612 .list = nfs4_xattr_list_nfs4_acl,
5613 .get = nfs4_xattr_get_nfs4_acl,
5614 .set = nfs4_xattr_set_nfs4_acl,
5615};
5616
5617const struct xattr_handler *nfs4_xattr_handlers[] = {
5618 &nfs4_xattr_nfs4_acl_handler,
5619 NULL
5620};
5621
5554/* 5622/*
5555 * Local variables: 5623 * Local variables:
5556 * c-basic-offset: 8 5624 * c-basic-offset: 8
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 72b6c580af13..402143d75fc5 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -63,9 +63,14 @@ nfs4_renew_state(struct work_struct *work)
63 63
64 ops = clp->cl_mvops->state_renewal_ops; 64 ops = clp->cl_mvops->state_renewal_ops;
65 dprintk("%s: start\n", __func__); 65 dprintk("%s: start\n", __func__);
66 /* Are there any active superblocks? */ 66
67 if (list_empty(&clp->cl_superblocks)) 67 rcu_read_lock();
68 if (list_empty(&clp->cl_superblocks)) {
69 rcu_read_unlock();
68 goto out; 70 goto out;
71 }
72 rcu_read_unlock();
73
69 spin_lock(&clp->cl_lock); 74 spin_lock(&clp->cl_lock);
70 lease = clp->cl_lease_time; 75 lease = clp->cl_lease_time;
71 last = clp->cl_last_renewal; 76 last = clp->cl_last_renewal;
@@ -75,7 +80,7 @@ nfs4_renew_state(struct work_struct *work)
75 cred = ops->get_state_renewal_cred_locked(clp); 80 cred = ops->get_state_renewal_cred_locked(clp);
76 spin_unlock(&clp->cl_lock); 81 spin_unlock(&clp->cl_lock);
77 if (cred == NULL) { 82 if (cred == NULL) {
78 if (list_empty(&clp->cl_delegations)) { 83 if (!nfs_delegations_present(clp)) {
79 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); 84 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
80 goto out; 85 goto out;
81 } 86 }
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index f575a3126737..2336d532cf66 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -105,14 +105,17 @@ static void nfs4_clear_machine_cred(struct nfs_client *clp)
105 put_rpccred(cred); 105 put_rpccred(cred);
106} 106}
107 107
108struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp) 108static struct rpc_cred *
109nfs4_get_renew_cred_server_locked(struct nfs_server *server)
109{ 110{
111 struct rpc_cred *cred = NULL;
110 struct nfs4_state_owner *sp; 112 struct nfs4_state_owner *sp;
111 struct rb_node *pos; 113 struct rb_node *pos;
112 struct rpc_cred *cred = NULL;
113 114
114 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { 115 for (pos = rb_first(&server->state_owners);
115 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); 116 pos != NULL;
117 pos = rb_next(pos)) {
118 sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
116 if (list_empty(&sp->so_states)) 119 if (list_empty(&sp->so_states))
117 continue; 120 continue;
118 cred = get_rpccred(sp->so_cred); 121 cred = get_rpccred(sp->so_cred);
@@ -121,6 +124,28 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
121 return cred; 124 return cred;
122} 125}
123 126
127/**
128 * nfs4_get_renew_cred_locked - Acquire credential for a renew operation
129 * @clp: client state handle
130 *
131 * Returns an rpc_cred with reference count bumped, or NULL.
132 * Caller must hold clp->cl_lock.
133 */
134struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
135{
136 struct rpc_cred *cred = NULL;
137 struct nfs_server *server;
138
139 rcu_read_lock();
140 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
141 cred = nfs4_get_renew_cred_server_locked(server);
142 if (cred != NULL)
143 break;
144 }
145 rcu_read_unlock();
146 return cred;
147}
148
124#if defined(CONFIG_NFS_V4_1) 149#if defined(CONFIG_NFS_V4_1)
125 150
126static int nfs41_setup_state_renewal(struct nfs_client *clp) 151static int nfs41_setup_state_renewal(struct nfs_client *clp)
@@ -142,6 +167,11 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp)
142 return status; 167 return status;
143} 168}
144 169
170/*
171 * Back channel returns NFS4ERR_DELAY for new requests when
172 * NFS4_SESSION_DRAINING is set so there is no work to be done when draining
173 * is ended.
174 */
145static void nfs4_end_drain_session(struct nfs_client *clp) 175static void nfs4_end_drain_session(struct nfs_client *clp)
146{ 176{
147 struct nfs4_session *ses = clp->cl_session; 177 struct nfs4_session *ses = clp->cl_session;
@@ -165,22 +195,32 @@ static void nfs4_end_drain_session(struct nfs_client *clp)
165 } 195 }
166} 196}
167 197
168static int nfs4_begin_drain_session(struct nfs_client *clp) 198static int nfs4_wait_on_slot_tbl(struct nfs4_slot_table *tbl)
169{ 199{
170 struct nfs4_session *ses = clp->cl_session;
171 struct nfs4_slot_table *tbl = &ses->fc_slot_table;
172
173 spin_lock(&tbl->slot_tbl_lock); 200 spin_lock(&tbl->slot_tbl_lock);
174 set_bit(NFS4_SESSION_DRAINING, &ses->session_state);
175 if (tbl->highest_used_slotid != -1) { 201 if (tbl->highest_used_slotid != -1) {
176 INIT_COMPLETION(ses->complete); 202 INIT_COMPLETION(tbl->complete);
177 spin_unlock(&tbl->slot_tbl_lock); 203 spin_unlock(&tbl->slot_tbl_lock);
178 return wait_for_completion_interruptible(&ses->complete); 204 return wait_for_completion_interruptible(&tbl->complete);
179 } 205 }
180 spin_unlock(&tbl->slot_tbl_lock); 206 spin_unlock(&tbl->slot_tbl_lock);
181 return 0; 207 return 0;
182} 208}
183 209
210static int nfs4_begin_drain_session(struct nfs_client *clp)
211{
212 struct nfs4_session *ses = clp->cl_session;
213 int ret = 0;
214
215 set_bit(NFS4_SESSION_DRAINING, &ses->session_state);
216 /* back channel */
217 ret = nfs4_wait_on_slot_tbl(&ses->bc_slot_table);
218 if (ret)
219 return ret;
220 /* fore channel */
221 return nfs4_wait_on_slot_tbl(&ses->fc_slot_table);
222}
223
184int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) 224int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
185{ 225{
186 int status; 226 int status;
@@ -192,6 +232,12 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
192 status = nfs4_proc_create_session(clp); 232 status = nfs4_proc_create_session(clp);
193 if (status != 0) 233 if (status != 0)
194 goto out; 234 goto out;
235 status = nfs4_set_callback_sessionid(clp);
236 if (status != 0) {
237 printk(KERN_WARNING "Sessionid not set. No callback service\n");
238 nfs_callback_down(1);
239 status = 0;
240 }
195 nfs41_setup_state_renewal(clp); 241 nfs41_setup_state_renewal(clp);
196 nfs_mark_client_ready(clp, NFS_CS_READY); 242 nfs_mark_client_ready(clp, NFS_CS_READY);
197out: 243out:
@@ -210,28 +256,56 @@ struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp)
210 256
211#endif /* CONFIG_NFS_V4_1 */ 257#endif /* CONFIG_NFS_V4_1 */
212 258
213struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) 259static struct rpc_cred *
260nfs4_get_setclientid_cred_server(struct nfs_server *server)
214{ 261{
262 struct nfs_client *clp = server->nfs_client;
263 struct rpc_cred *cred = NULL;
215 struct nfs4_state_owner *sp; 264 struct nfs4_state_owner *sp;
216 struct rb_node *pos; 265 struct rb_node *pos;
266
267 spin_lock(&clp->cl_lock);
268 pos = rb_first(&server->state_owners);
269 if (pos != NULL) {
270 sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
271 cred = get_rpccred(sp->so_cred);
272 }
273 spin_unlock(&clp->cl_lock);
274 return cred;
275}
276
277/**
278 * nfs4_get_setclientid_cred - Acquire credential for a setclientid operation
279 * @clp: client state handle
280 *
281 * Returns an rpc_cred with reference count bumped, or NULL.
282 */
283struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
284{
285 struct nfs_server *server;
217 struct rpc_cred *cred; 286 struct rpc_cred *cred;
218 287
219 spin_lock(&clp->cl_lock); 288 spin_lock(&clp->cl_lock);
220 cred = nfs4_get_machine_cred_locked(clp); 289 cred = nfs4_get_machine_cred_locked(clp);
290 spin_unlock(&clp->cl_lock);
221 if (cred != NULL) 291 if (cred != NULL)
222 goto out; 292 goto out;
223 pos = rb_first(&clp->cl_state_owners); 293
224 if (pos != NULL) { 294 rcu_read_lock();
225 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); 295 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
226 cred = get_rpccred(sp->so_cred); 296 cred = nfs4_get_setclientid_cred_server(server);
297 if (cred != NULL)
298 break;
227 } 299 }
300 rcu_read_unlock();
301
228out: 302out:
229 spin_unlock(&clp->cl_lock);
230 return cred; 303 return cred;
231} 304}
232 305
233static void nfs_alloc_unique_id(struct rb_root *root, struct nfs_unique_id *new, 306static void nfs_alloc_unique_id_locked(struct rb_root *root,
234 __u64 minval, int maxbits) 307 struct nfs_unique_id *new,
308 __u64 minval, int maxbits)
235{ 309{
236 struct rb_node **p, *parent; 310 struct rb_node **p, *parent;
237 struct nfs_unique_id *pos; 311 struct nfs_unique_id *pos;
@@ -286,16 +360,15 @@ static void nfs_free_unique_id(struct rb_root *root, struct nfs_unique_id *id)
286} 360}
287 361
288static struct nfs4_state_owner * 362static struct nfs4_state_owner *
289nfs4_find_state_owner(struct nfs_server *server, struct rpc_cred *cred) 363nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred)
290{ 364{
291 struct nfs_client *clp = server->nfs_client; 365 struct rb_node **p = &server->state_owners.rb_node,
292 struct rb_node **p = &clp->cl_state_owners.rb_node,
293 *parent = NULL; 366 *parent = NULL;
294 struct nfs4_state_owner *sp, *res = NULL; 367 struct nfs4_state_owner *sp, *res = NULL;
295 368
296 while (*p != NULL) { 369 while (*p != NULL) {
297 parent = *p; 370 parent = *p;
298 sp = rb_entry(parent, struct nfs4_state_owner, so_client_node); 371 sp = rb_entry(parent, struct nfs4_state_owner, so_server_node);
299 372
300 if (server < sp->so_server) { 373 if (server < sp->so_server) {
301 p = &parent->rb_left; 374 p = &parent->rb_left;
@@ -319,24 +392,17 @@ nfs4_find_state_owner(struct nfs_server *server, struct rpc_cred *cred)
319} 392}
320 393
321static struct nfs4_state_owner * 394static struct nfs4_state_owner *
322nfs4_insert_state_owner(struct nfs_client *clp, struct nfs4_state_owner *new) 395nfs4_insert_state_owner_locked(struct nfs4_state_owner *new)
323{ 396{
324 struct rb_node **p = &clp->cl_state_owners.rb_node, 397 struct nfs_server *server = new->so_server;
398 struct rb_node **p = &server->state_owners.rb_node,
325 *parent = NULL; 399 *parent = NULL;
326 struct nfs4_state_owner *sp; 400 struct nfs4_state_owner *sp;
327 401
328 while (*p != NULL) { 402 while (*p != NULL) {
329 parent = *p; 403 parent = *p;
330 sp = rb_entry(parent, struct nfs4_state_owner, so_client_node); 404 sp = rb_entry(parent, struct nfs4_state_owner, so_server_node);
331 405
332 if (new->so_server < sp->so_server) {
333 p = &parent->rb_left;
334 continue;
335 }
336 if (new->so_server > sp->so_server) {
337 p = &parent->rb_right;
338 continue;
339 }
340 if (new->so_cred < sp->so_cred) 406 if (new->so_cred < sp->so_cred)
341 p = &parent->rb_left; 407 p = &parent->rb_left;
342 else if (new->so_cred > sp->so_cred) 408 else if (new->so_cred > sp->so_cred)
@@ -346,18 +412,21 @@ nfs4_insert_state_owner(struct nfs_client *clp, struct nfs4_state_owner *new)
346 return sp; 412 return sp;
347 } 413 }
348 } 414 }
349 nfs_alloc_unique_id(&clp->cl_openowner_id, &new->so_owner_id, 1, 64); 415 nfs_alloc_unique_id_locked(&server->openowner_id,
350 rb_link_node(&new->so_client_node, parent, p); 416 &new->so_owner_id, 1, 64);
351 rb_insert_color(&new->so_client_node, &clp->cl_state_owners); 417 rb_link_node(&new->so_server_node, parent, p);
418 rb_insert_color(&new->so_server_node, &server->state_owners);
352 return new; 419 return new;
353} 420}
354 421
355static void 422static void
356nfs4_remove_state_owner(struct nfs_client *clp, struct nfs4_state_owner *sp) 423nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp)
357{ 424{
358 if (!RB_EMPTY_NODE(&sp->so_client_node)) 425 struct nfs_server *server = sp->so_server;
359 rb_erase(&sp->so_client_node, &clp->cl_state_owners); 426
360 nfs_free_unique_id(&clp->cl_openowner_id, &sp->so_owner_id); 427 if (!RB_EMPTY_NODE(&sp->so_server_node))
428 rb_erase(&sp->so_server_node, &server->state_owners);
429 nfs_free_unique_id(&server->openowner_id, &sp->so_owner_id);
361} 430}
362 431
363/* 432/*
@@ -386,23 +455,32 @@ nfs4_alloc_state_owner(void)
386static void 455static void
387nfs4_drop_state_owner(struct nfs4_state_owner *sp) 456nfs4_drop_state_owner(struct nfs4_state_owner *sp)
388{ 457{
389 if (!RB_EMPTY_NODE(&sp->so_client_node)) { 458 if (!RB_EMPTY_NODE(&sp->so_server_node)) {
390 struct nfs_client *clp = sp->so_server->nfs_client; 459 struct nfs_server *server = sp->so_server;
460 struct nfs_client *clp = server->nfs_client;
391 461
392 spin_lock(&clp->cl_lock); 462 spin_lock(&clp->cl_lock);
393 rb_erase(&sp->so_client_node, &clp->cl_state_owners); 463 rb_erase(&sp->so_server_node, &server->state_owners);
394 RB_CLEAR_NODE(&sp->so_client_node); 464 RB_CLEAR_NODE(&sp->so_server_node);
395 spin_unlock(&clp->cl_lock); 465 spin_unlock(&clp->cl_lock);
396 } 466 }
397} 467}
398 468
399struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred) 469/**
470 * nfs4_get_state_owner - Look up a state owner given a credential
471 * @server: nfs_server to search
472 * @cred: RPC credential to match
473 *
474 * Returns a pointer to an instantiated nfs4_state_owner struct, or NULL.
475 */
476struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
477 struct rpc_cred *cred)
400{ 478{
401 struct nfs_client *clp = server->nfs_client; 479 struct nfs_client *clp = server->nfs_client;
402 struct nfs4_state_owner *sp, *new; 480 struct nfs4_state_owner *sp, *new;
403 481
404 spin_lock(&clp->cl_lock); 482 spin_lock(&clp->cl_lock);
405 sp = nfs4_find_state_owner(server, cred); 483 sp = nfs4_find_state_owner_locked(server, cred);
406 spin_unlock(&clp->cl_lock); 484 spin_unlock(&clp->cl_lock);
407 if (sp != NULL) 485 if (sp != NULL)
408 return sp; 486 return sp;
@@ -412,7 +490,7 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
412 new->so_server = server; 490 new->so_server = server;
413 new->so_cred = cred; 491 new->so_cred = cred;
414 spin_lock(&clp->cl_lock); 492 spin_lock(&clp->cl_lock);
415 sp = nfs4_insert_state_owner(clp, new); 493 sp = nfs4_insert_state_owner_locked(new);
416 spin_unlock(&clp->cl_lock); 494 spin_unlock(&clp->cl_lock);
417 if (sp == new) 495 if (sp == new)
418 get_rpccred(cred); 496 get_rpccred(cred);
@@ -423,6 +501,11 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
423 return sp; 501 return sp;
424} 502}
425 503
504/**
505 * nfs4_put_state_owner - Release a nfs4_state_owner
506 * @sp: state owner data to release
507 *
508 */
426void nfs4_put_state_owner(struct nfs4_state_owner *sp) 509void nfs4_put_state_owner(struct nfs4_state_owner *sp)
427{ 510{
428 struct nfs_client *clp = sp->so_server->nfs_client; 511 struct nfs_client *clp = sp->so_server->nfs_client;
@@ -430,7 +513,7 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp)
430 513
431 if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock)) 514 if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
432 return; 515 return;
433 nfs4_remove_state_owner(clp, sp); 516 nfs4_remove_state_owner_locked(sp);
434 spin_unlock(&clp->cl_lock); 517 spin_unlock(&clp->cl_lock);
435 rpc_destroy_wait_queue(&sp->so_sequence.wait); 518 rpc_destroy_wait_queue(&sp->so_sequence.wait);
436 put_rpccred(cred); 519 put_rpccred(cred);
@@ -585,8 +668,11 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state,
585 if (!call_close) { 668 if (!call_close) {
586 nfs4_put_open_state(state); 669 nfs4_put_open_state(state);
587 nfs4_put_state_owner(owner); 670 nfs4_put_state_owner(owner);
588 } else 671 } else {
589 nfs4_do_close(path, state, gfp_mask, wait); 672 bool roc = pnfs_roc(state->inode);
673
674 nfs4_do_close(path, state, gfp_mask, wait, roc);
675 }
590} 676}
591 677
592void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode) 678void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode)
@@ -633,7 +719,8 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_p
633static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type) 719static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type)
634{ 720{
635 struct nfs4_lock_state *lsp; 721 struct nfs4_lock_state *lsp;
636 struct nfs_client *clp = state->owner->so_server->nfs_client; 722 struct nfs_server *server = state->owner->so_server;
723 struct nfs_client *clp = server->nfs_client;
637 724
638 lsp = kzalloc(sizeof(*lsp), GFP_NOFS); 725 lsp = kzalloc(sizeof(*lsp), GFP_NOFS);
639 if (lsp == NULL) 726 if (lsp == NULL)
@@ -657,7 +744,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
657 return NULL; 744 return NULL;
658 } 745 }
659 spin_lock(&clp->cl_lock); 746 spin_lock(&clp->cl_lock);
660 nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64); 747 nfs_alloc_unique_id_locked(&server->lockowner_id, &lsp->ls_id, 1, 64);
661 spin_unlock(&clp->cl_lock); 748 spin_unlock(&clp->cl_lock);
662 INIT_LIST_HEAD(&lsp->ls_locks); 749 INIT_LIST_HEAD(&lsp->ls_locks);
663 return lsp; 750 return lsp;
@@ -665,10 +752,11 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
665 752
666static void nfs4_free_lock_state(struct nfs4_lock_state *lsp) 753static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
667{ 754{
668 struct nfs_client *clp = lsp->ls_state->owner->so_server->nfs_client; 755 struct nfs_server *server = lsp->ls_state->owner->so_server;
756 struct nfs_client *clp = server->nfs_client;
669 757
670 spin_lock(&clp->cl_lock); 758 spin_lock(&clp->cl_lock);
671 nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id); 759 nfs_free_unique_id(&server->lockowner_id, &lsp->ls_id);
672 spin_unlock(&clp->cl_lock); 760 spin_unlock(&clp->cl_lock);
673 rpc_destroy_wait_queue(&lsp->ls_sequence.wait); 761 rpc_destroy_wait_queue(&lsp->ls_sequence.wait);
674 kfree(lsp); 762 kfree(lsp);
@@ -1114,15 +1202,19 @@ static void nfs4_clear_open_state(struct nfs4_state *state)
1114 } 1202 }
1115} 1203}
1116 1204
1117static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp, int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state)) 1205static void nfs4_reset_seqids(struct nfs_server *server,
1206 int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state))
1118{ 1207{
1208 struct nfs_client *clp = server->nfs_client;
1119 struct nfs4_state_owner *sp; 1209 struct nfs4_state_owner *sp;
1120 struct rb_node *pos; 1210 struct rb_node *pos;
1121 struct nfs4_state *state; 1211 struct nfs4_state *state;
1122 1212
1123 /* Reset all sequence ids to zero */ 1213 spin_lock(&clp->cl_lock);
1124 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { 1214 for (pos = rb_first(&server->state_owners);
1125 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); 1215 pos != NULL;
1216 pos = rb_next(pos)) {
1217 sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
1126 sp->so_seqid.flags = 0; 1218 sp->so_seqid.flags = 0;
1127 spin_lock(&sp->so_lock); 1219 spin_lock(&sp->so_lock);
1128 list_for_each_entry(state, &sp->so_states, open_states) { 1220 list_for_each_entry(state, &sp->so_states, open_states) {
@@ -1131,6 +1223,18 @@ static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp, int (*mark_re
1131 } 1223 }
1132 spin_unlock(&sp->so_lock); 1224 spin_unlock(&sp->so_lock);
1133 } 1225 }
1226 spin_unlock(&clp->cl_lock);
1227}
1228
1229static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp,
1230 int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state))
1231{
1232 struct nfs_server *server;
1233
1234 rcu_read_lock();
1235 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
1236 nfs4_reset_seqids(server, mark_reclaim);
1237 rcu_read_unlock();
1134} 1238}
1135 1239
1136static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp) 1240static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp)
@@ -1148,25 +1252,41 @@ static void nfs4_reclaim_complete(struct nfs_client *clp,
1148 (void)ops->reclaim_complete(clp); 1252 (void)ops->reclaim_complete(clp);
1149} 1253}
1150 1254
1151static int nfs4_state_clear_reclaim_reboot(struct nfs_client *clp) 1255static void nfs4_clear_reclaim_server(struct nfs_server *server)
1152{ 1256{
1257 struct nfs_client *clp = server->nfs_client;
1153 struct nfs4_state_owner *sp; 1258 struct nfs4_state_owner *sp;
1154 struct rb_node *pos; 1259 struct rb_node *pos;
1155 struct nfs4_state *state; 1260 struct nfs4_state *state;
1156 1261
1157 if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) 1262 spin_lock(&clp->cl_lock);
1158 return 0; 1263 for (pos = rb_first(&server->state_owners);
1159 1264 pos != NULL;
1160 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { 1265 pos = rb_next(pos)) {
1161 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); 1266 sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
1162 spin_lock(&sp->so_lock); 1267 spin_lock(&sp->so_lock);
1163 list_for_each_entry(state, &sp->so_states, open_states) { 1268 list_for_each_entry(state, &sp->so_states, open_states) {
1164 if (!test_and_clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags)) 1269 if (!test_and_clear_bit(NFS_STATE_RECLAIM_REBOOT,
1270 &state->flags))
1165 continue; 1271 continue;
1166 nfs4_state_mark_reclaim_nograce(clp, state); 1272 nfs4_state_mark_reclaim_nograce(clp, state);
1167 } 1273 }
1168 spin_unlock(&sp->so_lock); 1274 spin_unlock(&sp->so_lock);
1169 } 1275 }
1276 spin_unlock(&clp->cl_lock);
1277}
1278
1279static int nfs4_state_clear_reclaim_reboot(struct nfs_client *clp)
1280{
1281 struct nfs_server *server;
1282
1283 if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
1284 return 0;
1285
1286 rcu_read_lock();
1287 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
1288 nfs4_clear_reclaim_server(server);
1289 rcu_read_unlock();
1170 1290
1171 nfs_delegation_reap_unclaimed(clp); 1291 nfs_delegation_reap_unclaimed(clp);
1172 return 1; 1292 return 1;
@@ -1238,27 +1358,40 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1238 1358
1239static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops) 1359static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops)
1240{ 1360{
1361 struct nfs4_state_owner *sp;
1362 struct nfs_server *server;
1241 struct rb_node *pos; 1363 struct rb_node *pos;
1242 int status = 0; 1364 int status = 0;
1243 1365
1244restart: 1366restart:
1245 spin_lock(&clp->cl_lock); 1367 rcu_read_lock();
1246 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { 1368 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
1247 struct nfs4_state_owner *sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); 1369 spin_lock(&clp->cl_lock);
1248 if (!test_and_clear_bit(ops->owner_flag_bit, &sp->so_flags)) 1370 for (pos = rb_first(&server->state_owners);
1249 continue; 1371 pos != NULL;
1250 atomic_inc(&sp->so_count); 1372 pos = rb_next(pos)) {
1251 spin_unlock(&clp->cl_lock); 1373 sp = rb_entry(pos,
1252 status = nfs4_reclaim_open_state(sp, ops); 1374 struct nfs4_state_owner, so_server_node);
1253 if (status < 0) { 1375 if (!test_and_clear_bit(ops->owner_flag_bit,
1254 set_bit(ops->owner_flag_bit, &sp->so_flags); 1376 &sp->so_flags))
1377 continue;
1378 atomic_inc(&sp->so_count);
1379 spin_unlock(&clp->cl_lock);
1380 rcu_read_unlock();
1381
1382 status = nfs4_reclaim_open_state(sp, ops);
1383 if (status < 0) {
1384 set_bit(ops->owner_flag_bit, &sp->so_flags);
1385 nfs4_put_state_owner(sp);
1386 return nfs4_recovery_handle_error(clp, status);
1387 }
1388
1255 nfs4_put_state_owner(sp); 1389 nfs4_put_state_owner(sp);
1256 return nfs4_recovery_handle_error(clp, status); 1390 goto restart;
1257 } 1391 }
1258 nfs4_put_state_owner(sp); 1392 spin_unlock(&clp->cl_lock);
1259 goto restart;
1260 } 1393 }
1261 spin_unlock(&clp->cl_lock); 1394 rcu_read_unlock();
1262 return status; 1395 return status;
1263} 1396}
1264 1397
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 9f1826b012e6..2ab8e5cb8f59 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -71,8 +71,8 @@ static int nfs4_stat_to_errno(int);
71/* lock,open owner id: 71/* lock,open owner id:
72 * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2) 72 * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2)
73 */ 73 */
74#define open_owner_id_maxsz (1 + 4) 74#define open_owner_id_maxsz (1 + 1 + 4)
75#define lock_owner_id_maxsz (1 + 4) 75#define lock_owner_id_maxsz (1 + 1 + 4)
76#define decode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) 76#define decode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
77#define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) 77#define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2))
78#define compound_decode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) 78#define compound_decode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2))
@@ -1088,10 +1088,11 @@ static void encode_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lo
1088{ 1088{
1089 __be32 *p; 1089 __be32 *p;
1090 1090
1091 p = reserve_space(xdr, 28); 1091 p = reserve_space(xdr, 32);
1092 p = xdr_encode_hyper(p, lowner->clientid); 1092 p = xdr_encode_hyper(p, lowner->clientid);
1093 *p++ = cpu_to_be32(16); 1093 *p++ = cpu_to_be32(20);
1094 p = xdr_encode_opaque_fixed(p, "lock id:", 8); 1094 p = xdr_encode_opaque_fixed(p, "lock id:", 8);
1095 *p++ = cpu_to_be32(lowner->s_dev);
1095 xdr_encode_hyper(p, lowner->id); 1096 xdr_encode_hyper(p, lowner->id);
1096} 1097}
1097 1098
@@ -1210,10 +1211,11 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
1210 *p++ = cpu_to_be32(OP_OPEN); 1211 *p++ = cpu_to_be32(OP_OPEN);
1211 *p = cpu_to_be32(arg->seqid->sequence->counter); 1212 *p = cpu_to_be32(arg->seqid->sequence->counter);
1212 encode_share_access(xdr, arg->fmode); 1213 encode_share_access(xdr, arg->fmode);
1213 p = reserve_space(xdr, 28); 1214 p = reserve_space(xdr, 32);
1214 p = xdr_encode_hyper(p, arg->clientid); 1215 p = xdr_encode_hyper(p, arg->clientid);
1215 *p++ = cpu_to_be32(16); 1216 *p++ = cpu_to_be32(20);
1216 p = xdr_encode_opaque_fixed(p, "open id:", 8); 1217 p = xdr_encode_opaque_fixed(p, "open id:", 8);
1218 *p++ = cpu_to_be32(arg->server->s_dev);
1217 xdr_encode_hyper(p, arg->id); 1219 xdr_encode_hyper(p, arg->id);
1218} 1220}
1219 1221
@@ -1510,7 +1512,7 @@ encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1510 hdr->replen += decode_restorefh_maxsz; 1512 hdr->replen += decode_restorefh_maxsz;
1511} 1513}
1512 1514
1513static int 1515static void
1514encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compound_hdr *hdr) 1516encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compound_hdr *hdr)
1515{ 1517{
1516 __be32 *p; 1518 __be32 *p;
@@ -1521,14 +1523,12 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun
1521 p = reserve_space(xdr, 2*4); 1523 p = reserve_space(xdr, 2*4);
1522 *p++ = cpu_to_be32(1); 1524 *p++ = cpu_to_be32(1);
1523 *p = cpu_to_be32(FATTR4_WORD0_ACL); 1525 *p = cpu_to_be32(FATTR4_WORD0_ACL);
1524 if (arg->acl_len % 4) 1526 BUG_ON(arg->acl_len % 4);
1525 return -EINVAL;
1526 p = reserve_space(xdr, 4); 1527 p = reserve_space(xdr, 4);
1527 *p = cpu_to_be32(arg->acl_len); 1528 *p = cpu_to_be32(arg->acl_len);
1528 xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); 1529 xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len);
1529 hdr->nops++; 1530 hdr->nops++;
1530 hdr->replen += decode_setacl_maxsz; 1531 hdr->replen += decode_setacl_maxsz;
1531 return 0;
1532} 1532}
1533 1533
1534static void 1534static void
@@ -1789,7 +1789,6 @@ encode_layoutget(struct xdr_stream *xdr,
1789 const struct nfs4_layoutget_args *args, 1789 const struct nfs4_layoutget_args *args,
1790 struct compound_hdr *hdr) 1790 struct compound_hdr *hdr)
1791{ 1791{
1792 nfs4_stateid stateid;
1793 __be32 *p; 1792 __be32 *p;
1794 1793
1795 p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE); 1794 p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE);
@@ -1800,9 +1799,7 @@ encode_layoutget(struct xdr_stream *xdr,
1800 p = xdr_encode_hyper(p, args->range.offset); 1799 p = xdr_encode_hyper(p, args->range.offset);
1801 p = xdr_encode_hyper(p, args->range.length); 1800 p = xdr_encode_hyper(p, args->range.length);
1802 p = xdr_encode_hyper(p, args->minlength); 1801 p = xdr_encode_hyper(p, args->minlength);
1803 pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout, 1802 p = xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE);
1804 args->ctx->state);
1805 p = xdr_encode_opaque_fixed(p, &stateid.data, NFS4_STATEID_SIZE);
1806 *p = cpu_to_be32(args->maxcount); 1803 *p = cpu_to_be32(args->maxcount);
1807 1804
1808 dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n", 1805 dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n",
@@ -1833,393 +1830,362 @@ static u32 nfs4_xdr_minorversion(const struct nfs4_sequence_args *args)
1833/* 1830/*
1834 * Encode an ACCESS request 1831 * Encode an ACCESS request
1835 */ 1832 */
1836static int nfs4_xdr_enc_access(struct rpc_rqst *req, __be32 *p, const struct nfs4_accessargs *args) 1833static void nfs4_xdr_enc_access(struct rpc_rqst *req, struct xdr_stream *xdr,
1834 const struct nfs4_accessargs *args)
1837{ 1835{
1838 struct xdr_stream xdr;
1839 struct compound_hdr hdr = { 1836 struct compound_hdr hdr = {
1840 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 1837 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1841 }; 1838 };
1842 1839
1843 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1840 encode_compound_hdr(xdr, req, &hdr);
1844 encode_compound_hdr(&xdr, req, &hdr); 1841 encode_sequence(xdr, &args->seq_args, &hdr);
1845 encode_sequence(&xdr, &args->seq_args, &hdr); 1842 encode_putfh(xdr, args->fh, &hdr);
1846 encode_putfh(&xdr, args->fh, &hdr); 1843 encode_access(xdr, args->access, &hdr);
1847 encode_access(&xdr, args->access, &hdr); 1844 encode_getfattr(xdr, args->bitmask, &hdr);
1848 encode_getfattr(&xdr, args->bitmask, &hdr);
1849 encode_nops(&hdr); 1845 encode_nops(&hdr);
1850 return 0;
1851} 1846}
1852 1847
1853/* 1848/*
1854 * Encode LOOKUP request 1849 * Encode LOOKUP request
1855 */ 1850 */
1856static int nfs4_xdr_enc_lookup(struct rpc_rqst *req, __be32 *p, const struct nfs4_lookup_arg *args) 1851static void nfs4_xdr_enc_lookup(struct rpc_rqst *req, struct xdr_stream *xdr,
1852 const struct nfs4_lookup_arg *args)
1857{ 1853{
1858 struct xdr_stream xdr;
1859 struct compound_hdr hdr = { 1854 struct compound_hdr hdr = {
1860 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 1855 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1861 }; 1856 };
1862 1857
1863 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1858 encode_compound_hdr(xdr, req, &hdr);
1864 encode_compound_hdr(&xdr, req, &hdr); 1859 encode_sequence(xdr, &args->seq_args, &hdr);
1865 encode_sequence(&xdr, &args->seq_args, &hdr); 1860 encode_putfh(xdr, args->dir_fh, &hdr);
1866 encode_putfh(&xdr, args->dir_fh, &hdr); 1861 encode_lookup(xdr, args->name, &hdr);
1867 encode_lookup(&xdr, args->name, &hdr); 1862 encode_getfh(xdr, &hdr);
1868 encode_getfh(&xdr, &hdr); 1863 encode_getfattr(xdr, args->bitmask, &hdr);
1869 encode_getfattr(&xdr, args->bitmask, &hdr);
1870 encode_nops(&hdr); 1864 encode_nops(&hdr);
1871 return 0;
1872} 1865}
1873 1866
1874/* 1867/*
1875 * Encode LOOKUP_ROOT request 1868 * Encode LOOKUP_ROOT request
1876 */ 1869 */
1877static int nfs4_xdr_enc_lookup_root(struct rpc_rqst *req, __be32 *p, const struct nfs4_lookup_root_arg *args) 1870static void nfs4_xdr_enc_lookup_root(struct rpc_rqst *req,
1871 struct xdr_stream *xdr,
1872 const struct nfs4_lookup_root_arg *args)
1878{ 1873{
1879 struct xdr_stream xdr;
1880 struct compound_hdr hdr = { 1874 struct compound_hdr hdr = {
1881 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 1875 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1882 }; 1876 };
1883 1877
1884 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1878 encode_compound_hdr(xdr, req, &hdr);
1885 encode_compound_hdr(&xdr, req, &hdr); 1879 encode_sequence(xdr, &args->seq_args, &hdr);
1886 encode_sequence(&xdr, &args->seq_args, &hdr); 1880 encode_putrootfh(xdr, &hdr);
1887 encode_putrootfh(&xdr, &hdr); 1881 encode_getfh(xdr, &hdr);
1888 encode_getfh(&xdr, &hdr); 1882 encode_getfattr(xdr, args->bitmask, &hdr);
1889 encode_getfattr(&xdr, args->bitmask, &hdr);
1890 encode_nops(&hdr); 1883 encode_nops(&hdr);
1891 return 0;
1892} 1884}
1893 1885
1894/* 1886/*
1895 * Encode REMOVE request 1887 * Encode REMOVE request
1896 */ 1888 */
1897static int nfs4_xdr_enc_remove(struct rpc_rqst *req, __be32 *p, const struct nfs_removeargs *args) 1889static void nfs4_xdr_enc_remove(struct rpc_rqst *req, struct xdr_stream *xdr,
1890 const struct nfs_removeargs *args)
1898{ 1891{
1899 struct xdr_stream xdr;
1900 struct compound_hdr hdr = { 1892 struct compound_hdr hdr = {
1901 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 1893 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1902 }; 1894 };
1903 1895
1904 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1896 encode_compound_hdr(xdr, req, &hdr);
1905 encode_compound_hdr(&xdr, req, &hdr); 1897 encode_sequence(xdr, &args->seq_args, &hdr);
1906 encode_sequence(&xdr, &args->seq_args, &hdr); 1898 encode_putfh(xdr, args->fh, &hdr);
1907 encode_putfh(&xdr, args->fh, &hdr); 1899 encode_remove(xdr, &args->name, &hdr);
1908 encode_remove(&xdr, &args->name, &hdr); 1900 encode_getfattr(xdr, args->bitmask, &hdr);
1909 encode_getfattr(&xdr, args->bitmask, &hdr);
1910 encode_nops(&hdr); 1901 encode_nops(&hdr);
1911 return 0;
1912} 1902}
1913 1903
1914/* 1904/*
1915 * Encode RENAME request 1905 * Encode RENAME request
1916 */ 1906 */
1917static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs_renameargs *args) 1907static void nfs4_xdr_enc_rename(struct rpc_rqst *req, struct xdr_stream *xdr,
1908 const struct nfs_renameargs *args)
1918{ 1909{
1919 struct xdr_stream xdr;
1920 struct compound_hdr hdr = { 1910 struct compound_hdr hdr = {
1921 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 1911 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1922 }; 1912 };
1923 1913
1924 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1914 encode_compound_hdr(xdr, req, &hdr);
1925 encode_compound_hdr(&xdr, req, &hdr); 1915 encode_sequence(xdr, &args->seq_args, &hdr);
1926 encode_sequence(&xdr, &args->seq_args, &hdr); 1916 encode_putfh(xdr, args->old_dir, &hdr);
1927 encode_putfh(&xdr, args->old_dir, &hdr); 1917 encode_savefh(xdr, &hdr);
1928 encode_savefh(&xdr, &hdr); 1918 encode_putfh(xdr, args->new_dir, &hdr);
1929 encode_putfh(&xdr, args->new_dir, &hdr); 1919 encode_rename(xdr, args->old_name, args->new_name, &hdr);
1930 encode_rename(&xdr, args->old_name, args->new_name, &hdr); 1920 encode_getfattr(xdr, args->bitmask, &hdr);
1931 encode_getfattr(&xdr, args->bitmask, &hdr); 1921 encode_restorefh(xdr, &hdr);
1932 encode_restorefh(&xdr, &hdr); 1922 encode_getfattr(xdr, args->bitmask, &hdr);
1933 encode_getfattr(&xdr, args->bitmask, &hdr);
1934 encode_nops(&hdr); 1923 encode_nops(&hdr);
1935 return 0;
1936} 1924}
1937 1925
1938/* 1926/*
1939 * Encode LINK request 1927 * Encode LINK request
1940 */ 1928 */
1941static int nfs4_xdr_enc_link(struct rpc_rqst *req, __be32 *p, const struct nfs4_link_arg *args) 1929static void nfs4_xdr_enc_link(struct rpc_rqst *req, struct xdr_stream *xdr,
1930 const struct nfs4_link_arg *args)
1942{ 1931{
1943 struct xdr_stream xdr;
1944 struct compound_hdr hdr = { 1932 struct compound_hdr hdr = {
1945 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 1933 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1946 }; 1934 };
1947 1935
1948 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1936 encode_compound_hdr(xdr, req, &hdr);
1949 encode_compound_hdr(&xdr, req, &hdr); 1937 encode_sequence(xdr, &args->seq_args, &hdr);
1950 encode_sequence(&xdr, &args->seq_args, &hdr); 1938 encode_putfh(xdr, args->fh, &hdr);
1951 encode_putfh(&xdr, args->fh, &hdr); 1939 encode_savefh(xdr, &hdr);
1952 encode_savefh(&xdr, &hdr); 1940 encode_putfh(xdr, args->dir_fh, &hdr);
1953 encode_putfh(&xdr, args->dir_fh, &hdr); 1941 encode_link(xdr, args->name, &hdr);
1954 encode_link(&xdr, args->name, &hdr); 1942 encode_getfattr(xdr, args->bitmask, &hdr);
1955 encode_getfattr(&xdr, args->bitmask, &hdr); 1943 encode_restorefh(xdr, &hdr);
1956 encode_restorefh(&xdr, &hdr); 1944 encode_getfattr(xdr, args->bitmask, &hdr);
1957 encode_getfattr(&xdr, args->bitmask, &hdr);
1958 encode_nops(&hdr); 1945 encode_nops(&hdr);
1959 return 0;
1960} 1946}
1961 1947
1962/* 1948/*
1963 * Encode CREATE request 1949 * Encode CREATE request
1964 */ 1950 */
1965static int nfs4_xdr_enc_create(struct rpc_rqst *req, __be32 *p, const struct nfs4_create_arg *args) 1951static void nfs4_xdr_enc_create(struct rpc_rqst *req, struct xdr_stream *xdr,
1952 const struct nfs4_create_arg *args)
1966{ 1953{
1967 struct xdr_stream xdr;
1968 struct compound_hdr hdr = { 1954 struct compound_hdr hdr = {
1969 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 1955 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1970 }; 1956 };
1971 1957
1972 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1958 encode_compound_hdr(xdr, req, &hdr);
1973 encode_compound_hdr(&xdr, req, &hdr); 1959 encode_sequence(xdr, &args->seq_args, &hdr);
1974 encode_sequence(&xdr, &args->seq_args, &hdr); 1960 encode_putfh(xdr, args->dir_fh, &hdr);
1975 encode_putfh(&xdr, args->dir_fh, &hdr); 1961 encode_savefh(xdr, &hdr);
1976 encode_savefh(&xdr, &hdr); 1962 encode_create(xdr, args, &hdr);
1977 encode_create(&xdr, args, &hdr); 1963 encode_getfh(xdr, &hdr);
1978 encode_getfh(&xdr, &hdr); 1964 encode_getfattr(xdr, args->bitmask, &hdr);
1979 encode_getfattr(&xdr, args->bitmask, &hdr); 1965 encode_restorefh(xdr, &hdr);
1980 encode_restorefh(&xdr, &hdr); 1966 encode_getfattr(xdr, args->bitmask, &hdr);
1981 encode_getfattr(&xdr, args->bitmask, &hdr);
1982 encode_nops(&hdr); 1967 encode_nops(&hdr);
1983 return 0;
1984} 1968}
1985 1969
1986/* 1970/*
1987 * Encode SYMLINK request 1971 * Encode SYMLINK request
1988 */ 1972 */
1989static int nfs4_xdr_enc_symlink(struct rpc_rqst *req, __be32 *p, const struct nfs4_create_arg *args) 1973static void nfs4_xdr_enc_symlink(struct rpc_rqst *req, struct xdr_stream *xdr,
1974 const struct nfs4_create_arg *args)
1990{ 1975{
1991 return nfs4_xdr_enc_create(req, p, args); 1976 nfs4_xdr_enc_create(req, xdr, args);
1992} 1977}
1993 1978
1994/* 1979/*
1995 * Encode GETATTR request 1980 * Encode GETATTR request
1996 */ 1981 */
1997static int nfs4_xdr_enc_getattr(struct rpc_rqst *req, __be32 *p, const struct nfs4_getattr_arg *args) 1982static void nfs4_xdr_enc_getattr(struct rpc_rqst *req, struct xdr_stream *xdr,
1983 const struct nfs4_getattr_arg *args)
1998{ 1984{
1999 struct xdr_stream xdr;
2000 struct compound_hdr hdr = { 1985 struct compound_hdr hdr = {
2001 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 1986 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2002 }; 1987 };
2003 1988
2004 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1989 encode_compound_hdr(xdr, req, &hdr);
2005 encode_compound_hdr(&xdr, req, &hdr); 1990 encode_sequence(xdr, &args->seq_args, &hdr);
2006 encode_sequence(&xdr, &args->seq_args, &hdr); 1991 encode_putfh(xdr, args->fh, &hdr);
2007 encode_putfh(&xdr, args->fh, &hdr); 1992 encode_getfattr(xdr, args->bitmask, &hdr);
2008 encode_getfattr(&xdr, args->bitmask, &hdr);
2009 encode_nops(&hdr); 1993 encode_nops(&hdr);
2010 return 0;
2011} 1994}
2012 1995
2013/* 1996/*
2014 * Encode a CLOSE request 1997 * Encode a CLOSE request
2015 */ 1998 */
2016static int nfs4_xdr_enc_close(struct rpc_rqst *req, __be32 *p, struct nfs_closeargs *args) 1999static void nfs4_xdr_enc_close(struct rpc_rqst *req, struct xdr_stream *xdr,
2000 struct nfs_closeargs *args)
2017{ 2001{
2018 struct xdr_stream xdr;
2019 struct compound_hdr hdr = { 2002 struct compound_hdr hdr = {
2020 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2003 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2021 }; 2004 };
2022 2005
2023 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2006 encode_compound_hdr(xdr, req, &hdr);
2024 encode_compound_hdr(&xdr, req, &hdr); 2007 encode_sequence(xdr, &args->seq_args, &hdr);
2025 encode_sequence(&xdr, &args->seq_args, &hdr); 2008 encode_putfh(xdr, args->fh, &hdr);
2026 encode_putfh(&xdr, args->fh, &hdr); 2009 encode_close(xdr, args, &hdr);
2027 encode_close(&xdr, args, &hdr); 2010 encode_getfattr(xdr, args->bitmask, &hdr);
2028 encode_getfattr(&xdr, args->bitmask, &hdr);
2029 encode_nops(&hdr); 2011 encode_nops(&hdr);
2030 return 0;
2031} 2012}
2032 2013
2033/* 2014/*
2034 * Encode an OPEN request 2015 * Encode an OPEN request
2035 */ 2016 */
2036static int nfs4_xdr_enc_open(struct rpc_rqst *req, __be32 *p, struct nfs_openargs *args) 2017static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
2018 struct nfs_openargs *args)
2037{ 2019{
2038 struct xdr_stream xdr;
2039 struct compound_hdr hdr = { 2020 struct compound_hdr hdr = {
2040 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2021 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2041 }; 2022 };
2042 2023
2043 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2024 encode_compound_hdr(xdr, req, &hdr);
2044 encode_compound_hdr(&xdr, req, &hdr); 2025 encode_sequence(xdr, &args->seq_args, &hdr);
2045 encode_sequence(&xdr, &args->seq_args, &hdr); 2026 encode_putfh(xdr, args->fh, &hdr);
2046 encode_putfh(&xdr, args->fh, &hdr); 2027 encode_savefh(xdr, &hdr);
2047 encode_savefh(&xdr, &hdr); 2028 encode_open(xdr, args, &hdr);
2048 encode_open(&xdr, args, &hdr); 2029 encode_getfh(xdr, &hdr);
2049 encode_getfh(&xdr, &hdr); 2030 encode_getfattr(xdr, args->bitmask, &hdr);
2050 encode_getfattr(&xdr, args->bitmask, &hdr); 2031 encode_restorefh(xdr, &hdr);
2051 encode_restorefh(&xdr, &hdr); 2032 encode_getfattr(xdr, args->bitmask, &hdr);
2052 encode_getfattr(&xdr, args->bitmask, &hdr);
2053 encode_nops(&hdr); 2033 encode_nops(&hdr);
2054 return 0;
2055} 2034}
2056 2035
2057/* 2036/*
2058 * Encode an OPEN_CONFIRM request 2037 * Encode an OPEN_CONFIRM request
2059 */ 2038 */
2060static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, __be32 *p, struct nfs_open_confirmargs *args) 2039static void nfs4_xdr_enc_open_confirm(struct rpc_rqst *req,
2040 struct xdr_stream *xdr,
2041 struct nfs_open_confirmargs *args)
2061{ 2042{
2062 struct xdr_stream xdr;
2063 struct compound_hdr hdr = { 2043 struct compound_hdr hdr = {
2064 .nops = 0, 2044 .nops = 0,
2065 }; 2045 };
2066 2046
2067 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2047 encode_compound_hdr(xdr, req, &hdr);
2068 encode_compound_hdr(&xdr, req, &hdr); 2048 encode_putfh(xdr, args->fh, &hdr);
2069 encode_putfh(&xdr, args->fh, &hdr); 2049 encode_open_confirm(xdr, args, &hdr);
2070 encode_open_confirm(&xdr, args, &hdr);
2071 encode_nops(&hdr); 2050 encode_nops(&hdr);
2072 return 0;
2073} 2051}
2074 2052
2075/* 2053/*
2076 * Encode an OPEN request with no attributes. 2054 * Encode an OPEN request with no attributes.
2077 */ 2055 */
2078static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, __be32 *p, struct nfs_openargs *args) 2056static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req,
2057 struct xdr_stream *xdr,
2058 struct nfs_openargs *args)
2079{ 2059{
2080 struct xdr_stream xdr;
2081 struct compound_hdr hdr = { 2060 struct compound_hdr hdr = {
2082 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2061 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2083 }; 2062 };
2084 2063
2085 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2064 encode_compound_hdr(xdr, req, &hdr);
2086 encode_compound_hdr(&xdr, req, &hdr); 2065 encode_sequence(xdr, &args->seq_args, &hdr);
2087 encode_sequence(&xdr, &args->seq_args, &hdr); 2066 encode_putfh(xdr, args->fh, &hdr);
2088 encode_putfh(&xdr, args->fh, &hdr); 2067 encode_open(xdr, args, &hdr);
2089 encode_open(&xdr, args, &hdr); 2068 encode_getfattr(xdr, args->bitmask, &hdr);
2090 encode_getfattr(&xdr, args->bitmask, &hdr);
2091 encode_nops(&hdr); 2069 encode_nops(&hdr);
2092 return 0;
2093} 2070}
2094 2071
2095/* 2072/*
2096 * Encode an OPEN_DOWNGRADE request 2073 * Encode an OPEN_DOWNGRADE request
2097 */ 2074 */
2098static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, __be32 *p, struct nfs_closeargs *args) 2075static void nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req,
2076 struct xdr_stream *xdr,
2077 struct nfs_closeargs *args)
2099{ 2078{
2100 struct xdr_stream xdr;
2101 struct compound_hdr hdr = { 2079 struct compound_hdr hdr = {
2102 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2080 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2103 }; 2081 };
2104 2082
2105 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2083 encode_compound_hdr(xdr, req, &hdr);
2106 encode_compound_hdr(&xdr, req, &hdr); 2084 encode_sequence(xdr, &args->seq_args, &hdr);
2107 encode_sequence(&xdr, &args->seq_args, &hdr); 2085 encode_putfh(xdr, args->fh, &hdr);
2108 encode_putfh(&xdr, args->fh, &hdr); 2086 encode_open_downgrade(xdr, args, &hdr);
2109 encode_open_downgrade(&xdr, args, &hdr); 2087 encode_getfattr(xdr, args->bitmask, &hdr);
2110 encode_getfattr(&xdr, args->bitmask, &hdr);
2111 encode_nops(&hdr); 2088 encode_nops(&hdr);
2112 return 0;
2113} 2089}
2114 2090
2115/* 2091/*
2116 * Encode a LOCK request 2092 * Encode a LOCK request
2117 */ 2093 */
2118static int nfs4_xdr_enc_lock(struct rpc_rqst *req, __be32 *p, struct nfs_lock_args *args) 2094static void nfs4_xdr_enc_lock(struct rpc_rqst *req, struct xdr_stream *xdr,
2095 struct nfs_lock_args *args)
2119{ 2096{
2120 struct xdr_stream xdr;
2121 struct compound_hdr hdr = { 2097 struct compound_hdr hdr = {
2122 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2098 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2123 }; 2099 };
2124 2100
2125 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2101 encode_compound_hdr(xdr, req, &hdr);
2126 encode_compound_hdr(&xdr, req, &hdr); 2102 encode_sequence(xdr, &args->seq_args, &hdr);
2127 encode_sequence(&xdr, &args->seq_args, &hdr); 2103 encode_putfh(xdr, args->fh, &hdr);
2128 encode_putfh(&xdr, args->fh, &hdr); 2104 encode_lock(xdr, args, &hdr);
2129 encode_lock(&xdr, args, &hdr);
2130 encode_nops(&hdr); 2105 encode_nops(&hdr);
2131 return 0;
2132} 2106}
2133 2107
2134/* 2108/*
2135 * Encode a LOCKT request 2109 * Encode a LOCKT request
2136 */ 2110 */
2137static int nfs4_xdr_enc_lockt(struct rpc_rqst *req, __be32 *p, struct nfs_lockt_args *args) 2111static void nfs4_xdr_enc_lockt(struct rpc_rqst *req, struct xdr_stream *xdr,
2112 struct nfs_lockt_args *args)
2138{ 2113{
2139 struct xdr_stream xdr;
2140 struct compound_hdr hdr = { 2114 struct compound_hdr hdr = {
2141 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2115 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2142 }; 2116 };
2143 2117
2144 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2118 encode_compound_hdr(xdr, req, &hdr);
2145 encode_compound_hdr(&xdr, req, &hdr); 2119 encode_sequence(xdr, &args->seq_args, &hdr);
2146 encode_sequence(&xdr, &args->seq_args, &hdr); 2120 encode_putfh(xdr, args->fh, &hdr);
2147 encode_putfh(&xdr, args->fh, &hdr); 2121 encode_lockt(xdr, args, &hdr);
2148 encode_lockt(&xdr, args, &hdr);
2149 encode_nops(&hdr); 2122 encode_nops(&hdr);
2150 return 0;
2151} 2123}
2152 2124
2153/* 2125/*
2154 * Encode a LOCKU request 2126 * Encode a LOCKU request
2155 */ 2127 */
2156static int nfs4_xdr_enc_locku(struct rpc_rqst *req, __be32 *p, struct nfs_locku_args *args) 2128static void nfs4_xdr_enc_locku(struct rpc_rqst *req, struct xdr_stream *xdr,
2129 struct nfs_locku_args *args)
2157{ 2130{
2158 struct xdr_stream xdr;
2159 struct compound_hdr hdr = { 2131 struct compound_hdr hdr = {
2160 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2132 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2161 }; 2133 };
2162 2134
2163 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2135 encode_compound_hdr(xdr, req, &hdr);
2164 encode_compound_hdr(&xdr, req, &hdr); 2136 encode_sequence(xdr, &args->seq_args, &hdr);
2165 encode_sequence(&xdr, &args->seq_args, &hdr); 2137 encode_putfh(xdr, args->fh, &hdr);
2166 encode_putfh(&xdr, args->fh, &hdr); 2138 encode_locku(xdr, args, &hdr);
2167 encode_locku(&xdr, args, &hdr);
2168 encode_nops(&hdr); 2139 encode_nops(&hdr);
2169 return 0;
2170} 2140}
2171 2141
2172static int nfs4_xdr_enc_release_lockowner(struct rpc_rqst *req, __be32 *p, struct nfs_release_lockowner_args *args) 2142static void nfs4_xdr_enc_release_lockowner(struct rpc_rqst *req,
2143 struct xdr_stream *xdr,
2144 struct nfs_release_lockowner_args *args)
2173{ 2145{
2174 struct xdr_stream xdr;
2175 struct compound_hdr hdr = { 2146 struct compound_hdr hdr = {
2176 .minorversion = 0, 2147 .minorversion = 0,
2177 }; 2148 };
2178 2149
2179 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2150 encode_compound_hdr(xdr, req, &hdr);
2180 encode_compound_hdr(&xdr, req, &hdr); 2151 encode_release_lockowner(xdr, &args->lock_owner, &hdr);
2181 encode_release_lockowner(&xdr, &args->lock_owner, &hdr);
2182 encode_nops(&hdr); 2152 encode_nops(&hdr);
2183 return 0;
2184} 2153}
2185 2154
2186/* 2155/*
2187 * Encode a READLINK request 2156 * Encode a READLINK request
2188 */ 2157 */
2189static int nfs4_xdr_enc_readlink(struct rpc_rqst *req, __be32 *p, const struct nfs4_readlink *args) 2158static void nfs4_xdr_enc_readlink(struct rpc_rqst *req, struct xdr_stream *xdr,
2159 const struct nfs4_readlink *args)
2190{ 2160{
2191 struct xdr_stream xdr;
2192 struct compound_hdr hdr = { 2161 struct compound_hdr hdr = {
2193 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2162 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2194 }; 2163 };
2195 2164
2196 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2165 encode_compound_hdr(xdr, req, &hdr);
2197 encode_compound_hdr(&xdr, req, &hdr); 2166 encode_sequence(xdr, &args->seq_args, &hdr);
2198 encode_sequence(&xdr, &args->seq_args, &hdr); 2167 encode_putfh(xdr, args->fh, &hdr);
2199 encode_putfh(&xdr, args->fh, &hdr); 2168 encode_readlink(xdr, args, req, &hdr);
2200 encode_readlink(&xdr, args, req, &hdr);
2201 2169
2202 xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages, 2170 xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages,
2203 args->pgbase, args->pglen); 2171 args->pgbase, args->pglen);
2204 encode_nops(&hdr); 2172 encode_nops(&hdr);
2205 return 0;
2206} 2173}
2207 2174
2208/* 2175/*
2209 * Encode a READDIR request 2176 * Encode a READDIR request
2210 */ 2177 */
2211static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nfs4_readdir_arg *args) 2178static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr,
2179 const struct nfs4_readdir_arg *args)
2212{ 2180{
2213 struct xdr_stream xdr;
2214 struct compound_hdr hdr = { 2181 struct compound_hdr hdr = {
2215 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2182 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2216 }; 2183 };
2217 2184
2218 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2185 encode_compound_hdr(xdr, req, &hdr);
2219 encode_compound_hdr(&xdr, req, &hdr); 2186 encode_sequence(xdr, &args->seq_args, &hdr);
2220 encode_sequence(&xdr, &args->seq_args, &hdr); 2187 encode_putfh(xdr, args->fh, &hdr);
2221 encode_putfh(&xdr, args->fh, &hdr); 2188 encode_readdir(xdr, args, req, &hdr);
2222 encode_readdir(&xdr, args, req, &hdr);
2223 2189
2224 xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages, 2190 xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages,
2225 args->pgbase, args->count); 2191 args->pgbase, args->count);
@@ -2227,428 +2193,387 @@ static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nf
2227 __func__, hdr.replen << 2, args->pages, 2193 __func__, hdr.replen << 2, args->pages,
2228 args->pgbase, args->count); 2194 args->pgbase, args->count);
2229 encode_nops(&hdr); 2195 encode_nops(&hdr);
2230 return 0;
2231} 2196}
2232 2197
2233/* 2198/*
2234 * Encode a READ request 2199 * Encode a READ request
2235 */ 2200 */
2236static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) 2201static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr,
2202 struct nfs_readargs *args)
2237{ 2203{
2238 struct xdr_stream xdr;
2239 struct compound_hdr hdr = { 2204 struct compound_hdr hdr = {
2240 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2205 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2241 }; 2206 };
2242 2207
2243 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2208 encode_compound_hdr(xdr, req, &hdr);
2244 encode_compound_hdr(&xdr, req, &hdr); 2209 encode_sequence(xdr, &args->seq_args, &hdr);
2245 encode_sequence(&xdr, &args->seq_args, &hdr); 2210 encode_putfh(xdr, args->fh, &hdr);
2246 encode_putfh(&xdr, args->fh, &hdr); 2211 encode_read(xdr, args, &hdr);
2247 encode_read(&xdr, args, &hdr);
2248 2212
2249 xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, 2213 xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2,
2250 args->pages, args->pgbase, args->count); 2214 args->pages, args->pgbase, args->count);
2251 req->rq_rcv_buf.flags |= XDRBUF_READ; 2215 req->rq_rcv_buf.flags |= XDRBUF_READ;
2252 encode_nops(&hdr); 2216 encode_nops(&hdr);
2253 return 0;
2254} 2217}
2255 2218
2256/* 2219/*
2257 * Encode an SETATTR request 2220 * Encode an SETATTR request
2258 */ 2221 */
2259static int nfs4_xdr_enc_setattr(struct rpc_rqst *req, __be32 *p, struct nfs_setattrargs *args) 2222static void nfs4_xdr_enc_setattr(struct rpc_rqst *req, struct xdr_stream *xdr,
2223 struct nfs_setattrargs *args)
2260{ 2224{
2261 struct xdr_stream xdr;
2262 struct compound_hdr hdr = { 2225 struct compound_hdr hdr = {
2263 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2226 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2264 }; 2227 };
2265 2228
2266 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2229 encode_compound_hdr(xdr, req, &hdr);
2267 encode_compound_hdr(&xdr, req, &hdr); 2230 encode_sequence(xdr, &args->seq_args, &hdr);
2268 encode_sequence(&xdr, &args->seq_args, &hdr); 2231 encode_putfh(xdr, args->fh, &hdr);
2269 encode_putfh(&xdr, args->fh, &hdr); 2232 encode_setattr(xdr, args, args->server, &hdr);
2270 encode_setattr(&xdr, args, args->server, &hdr); 2233 encode_getfattr(xdr, args->bitmask, &hdr);
2271 encode_getfattr(&xdr, args->bitmask, &hdr);
2272 encode_nops(&hdr); 2234 encode_nops(&hdr);
2273 return 0;
2274} 2235}
2275 2236
2276/* 2237/*
2277 * Encode a GETACL request 2238 * Encode a GETACL request
2278 */ 2239 */
2279static int 2240static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
2280nfs4_xdr_enc_getacl(struct rpc_rqst *req, __be32 *p, 2241 struct nfs_getaclargs *args)
2281 struct nfs_getaclargs *args)
2282{ 2242{
2283 struct xdr_stream xdr;
2284 struct compound_hdr hdr = { 2243 struct compound_hdr hdr = {
2285 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2244 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2286 }; 2245 };
2287 uint32_t replen; 2246 uint32_t replen;
2288 2247
2289 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2248 encode_compound_hdr(xdr, req, &hdr);
2290 encode_compound_hdr(&xdr, req, &hdr); 2249 encode_sequence(xdr, &args->seq_args, &hdr);
2291 encode_sequence(&xdr, &args->seq_args, &hdr); 2250 encode_putfh(xdr, args->fh, &hdr);
2292 encode_putfh(&xdr, args->fh, &hdr);
2293 replen = hdr.replen + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz + 1; 2251 replen = hdr.replen + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz + 1;
2294 encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0, &hdr); 2252 encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr);
2295 2253
2296 xdr_inline_pages(&req->rq_rcv_buf, replen << 2, 2254 xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
2297 args->acl_pages, args->acl_pgbase, args->acl_len); 2255 args->acl_pages, args->acl_pgbase, args->acl_len);
2298 encode_nops(&hdr); 2256 encode_nops(&hdr);
2299 return 0;
2300} 2257}
2301 2258
2302/* 2259/*
2303 * Encode a WRITE request 2260 * Encode a WRITE request
2304 */ 2261 */
2305static int nfs4_xdr_enc_write(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args) 2262static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
2263 struct nfs_writeargs *args)
2306{ 2264{
2307 struct xdr_stream xdr;
2308 struct compound_hdr hdr = { 2265 struct compound_hdr hdr = {
2309 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2266 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2310 }; 2267 };
2311 2268
2312 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2269 encode_compound_hdr(xdr, req, &hdr);
2313 encode_compound_hdr(&xdr, req, &hdr); 2270 encode_sequence(xdr, &args->seq_args, &hdr);
2314 encode_sequence(&xdr, &args->seq_args, &hdr); 2271 encode_putfh(xdr, args->fh, &hdr);
2315 encode_putfh(&xdr, args->fh, &hdr); 2272 encode_write(xdr, args, &hdr);
2316 encode_write(&xdr, args, &hdr);
2317 req->rq_snd_buf.flags |= XDRBUF_WRITE; 2273 req->rq_snd_buf.flags |= XDRBUF_WRITE;
2318 encode_getfattr(&xdr, args->bitmask, &hdr); 2274 encode_getfattr(xdr, args->bitmask, &hdr);
2319 encode_nops(&hdr); 2275 encode_nops(&hdr);
2320 return 0;
2321} 2276}
2322 2277
2323/* 2278/*
2324 * a COMMIT request 2279 * a COMMIT request
2325 */ 2280 */
2326static int nfs4_xdr_enc_commit(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args) 2281static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
2282 struct nfs_writeargs *args)
2327{ 2283{
2328 struct xdr_stream xdr;
2329 struct compound_hdr hdr = { 2284 struct compound_hdr hdr = {
2330 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2285 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2331 }; 2286 };
2332 2287
2333 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2288 encode_compound_hdr(xdr, req, &hdr);
2334 encode_compound_hdr(&xdr, req, &hdr); 2289 encode_sequence(xdr, &args->seq_args, &hdr);
2335 encode_sequence(&xdr, &args->seq_args, &hdr); 2290 encode_putfh(xdr, args->fh, &hdr);
2336 encode_putfh(&xdr, args->fh, &hdr); 2291 encode_commit(xdr, args, &hdr);
2337 encode_commit(&xdr, args, &hdr); 2292 encode_getfattr(xdr, args->bitmask, &hdr);
2338 encode_getfattr(&xdr, args->bitmask, &hdr);
2339 encode_nops(&hdr); 2293 encode_nops(&hdr);
2340 return 0;
2341} 2294}
2342 2295
2343/* 2296/*
2344 * FSINFO request 2297 * FSINFO request
2345 */ 2298 */
2346static int nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs4_fsinfo_arg *args) 2299static void nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, struct xdr_stream *xdr,
2300 struct nfs4_fsinfo_arg *args)
2347{ 2301{
2348 struct xdr_stream xdr;
2349 struct compound_hdr hdr = { 2302 struct compound_hdr hdr = {
2350 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2303 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2351 }; 2304 };
2352 2305
2353 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2306 encode_compound_hdr(xdr, req, &hdr);
2354 encode_compound_hdr(&xdr, req, &hdr); 2307 encode_sequence(xdr, &args->seq_args, &hdr);
2355 encode_sequence(&xdr, &args->seq_args, &hdr); 2308 encode_putfh(xdr, args->fh, &hdr);
2356 encode_putfh(&xdr, args->fh, &hdr); 2309 encode_fsinfo(xdr, args->bitmask, &hdr);
2357 encode_fsinfo(&xdr, args->bitmask, &hdr);
2358 encode_nops(&hdr); 2310 encode_nops(&hdr);
2359 return 0;
2360} 2311}
2361 2312
2362/* 2313/*
2363 * a PATHCONF request 2314 * a PATHCONF request
2364 */ 2315 */
2365static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, __be32 *p, const struct nfs4_pathconf_arg *args) 2316static void nfs4_xdr_enc_pathconf(struct rpc_rqst *req, struct xdr_stream *xdr,
2317 const struct nfs4_pathconf_arg *args)
2366{ 2318{
2367 struct xdr_stream xdr;
2368 struct compound_hdr hdr = { 2319 struct compound_hdr hdr = {
2369 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2320 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2370 }; 2321 };
2371 2322
2372 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2323 encode_compound_hdr(xdr, req, &hdr);
2373 encode_compound_hdr(&xdr, req, &hdr); 2324 encode_sequence(xdr, &args->seq_args, &hdr);
2374 encode_sequence(&xdr, &args->seq_args, &hdr); 2325 encode_putfh(xdr, args->fh, &hdr);
2375 encode_putfh(&xdr, args->fh, &hdr); 2326 encode_getattr_one(xdr, args->bitmask[0] & nfs4_pathconf_bitmap[0],
2376 encode_getattr_one(&xdr, args->bitmask[0] & nfs4_pathconf_bitmap[0],
2377 &hdr); 2327 &hdr);
2378 encode_nops(&hdr); 2328 encode_nops(&hdr);
2379 return 0;
2380} 2329}
2381 2330
2382/* 2331/*
2383 * a STATFS request 2332 * a STATFS request
2384 */ 2333 */
2385static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, __be32 *p, const struct nfs4_statfs_arg *args) 2334static void nfs4_xdr_enc_statfs(struct rpc_rqst *req, struct xdr_stream *xdr,
2335 const struct nfs4_statfs_arg *args)
2386{ 2336{
2387 struct xdr_stream xdr;
2388 struct compound_hdr hdr = { 2337 struct compound_hdr hdr = {
2389 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2338 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2390 }; 2339 };
2391 2340
2392 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2341 encode_compound_hdr(xdr, req, &hdr);
2393 encode_compound_hdr(&xdr, req, &hdr); 2342 encode_sequence(xdr, &args->seq_args, &hdr);
2394 encode_sequence(&xdr, &args->seq_args, &hdr); 2343 encode_putfh(xdr, args->fh, &hdr);
2395 encode_putfh(&xdr, args->fh, &hdr); 2344 encode_getattr_two(xdr, args->bitmask[0] & nfs4_statfs_bitmap[0],
2396 encode_getattr_two(&xdr, args->bitmask[0] & nfs4_statfs_bitmap[0],
2397 args->bitmask[1] & nfs4_statfs_bitmap[1], &hdr); 2345 args->bitmask[1] & nfs4_statfs_bitmap[1], &hdr);
2398 encode_nops(&hdr); 2346 encode_nops(&hdr);
2399 return 0;
2400} 2347}
2401 2348
2402/* 2349/*
2403 * GETATTR_BITMAP request 2350 * GETATTR_BITMAP request
2404 */ 2351 */
2405static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, __be32 *p, 2352static void nfs4_xdr_enc_server_caps(struct rpc_rqst *req,
2406 struct nfs4_server_caps_arg *args) 2353 struct xdr_stream *xdr,
2354 struct nfs4_server_caps_arg *args)
2407{ 2355{
2408 struct xdr_stream xdr;
2409 struct compound_hdr hdr = { 2356 struct compound_hdr hdr = {
2410 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2357 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2411 }; 2358 };
2412 2359
2413 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2360 encode_compound_hdr(xdr, req, &hdr);
2414 encode_compound_hdr(&xdr, req, &hdr); 2361 encode_sequence(xdr, &args->seq_args, &hdr);
2415 encode_sequence(&xdr, &args->seq_args, &hdr); 2362 encode_putfh(xdr, args->fhandle, &hdr);
2416 encode_putfh(&xdr, args->fhandle, &hdr); 2363 encode_getattr_one(xdr, FATTR4_WORD0_SUPPORTED_ATTRS|
2417 encode_getattr_one(&xdr, FATTR4_WORD0_SUPPORTED_ATTRS|
2418 FATTR4_WORD0_LINK_SUPPORT| 2364 FATTR4_WORD0_LINK_SUPPORT|
2419 FATTR4_WORD0_SYMLINK_SUPPORT| 2365 FATTR4_WORD0_SYMLINK_SUPPORT|
2420 FATTR4_WORD0_ACLSUPPORT, &hdr); 2366 FATTR4_WORD0_ACLSUPPORT, &hdr);
2421 encode_nops(&hdr); 2367 encode_nops(&hdr);
2422 return 0;
2423} 2368}
2424 2369
2425/* 2370/*
2426 * a RENEW request 2371 * a RENEW request
2427 */ 2372 */
2428static int nfs4_xdr_enc_renew(struct rpc_rqst *req, __be32 *p, struct nfs_client *clp) 2373static void nfs4_xdr_enc_renew(struct rpc_rqst *req, struct xdr_stream *xdr,
2374 struct nfs_client *clp)
2429{ 2375{
2430 struct xdr_stream xdr;
2431 struct compound_hdr hdr = { 2376 struct compound_hdr hdr = {
2432 .nops = 0, 2377 .nops = 0,
2433 }; 2378 };
2434 2379
2435 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2380 encode_compound_hdr(xdr, req, &hdr);
2436 encode_compound_hdr(&xdr, req, &hdr); 2381 encode_renew(xdr, clp, &hdr);
2437 encode_renew(&xdr, clp, &hdr);
2438 encode_nops(&hdr); 2382 encode_nops(&hdr);
2439 return 0;
2440} 2383}
2441 2384
2442/* 2385/*
2443 * a SETCLIENTID request 2386 * a SETCLIENTID request
2444 */ 2387 */
2445static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, __be32 *p, struct nfs4_setclientid *sc) 2388static void nfs4_xdr_enc_setclientid(struct rpc_rqst *req,
2389 struct xdr_stream *xdr,
2390 struct nfs4_setclientid *sc)
2446{ 2391{
2447 struct xdr_stream xdr;
2448 struct compound_hdr hdr = { 2392 struct compound_hdr hdr = {
2449 .nops = 0, 2393 .nops = 0,
2450 }; 2394 };
2451 2395
2452 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2396 encode_compound_hdr(xdr, req, &hdr);
2453 encode_compound_hdr(&xdr, req, &hdr); 2397 encode_setclientid(xdr, sc, &hdr);
2454 encode_setclientid(&xdr, sc, &hdr);
2455 encode_nops(&hdr); 2398 encode_nops(&hdr);
2456 return 0;
2457} 2399}
2458 2400
2459/* 2401/*
2460 * a SETCLIENTID_CONFIRM request 2402 * a SETCLIENTID_CONFIRM request
2461 */ 2403 */
2462static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs4_setclientid_res *arg) 2404static void nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req,
2405 struct xdr_stream *xdr,
2406 struct nfs4_setclientid_res *arg)
2463{ 2407{
2464 struct xdr_stream xdr;
2465 struct compound_hdr hdr = { 2408 struct compound_hdr hdr = {
2466 .nops = 0, 2409 .nops = 0,
2467 }; 2410 };
2468 const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 }; 2411 const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
2469 2412
2470 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2413 encode_compound_hdr(xdr, req, &hdr);
2471 encode_compound_hdr(&xdr, req, &hdr); 2414 encode_setclientid_confirm(xdr, arg, &hdr);
2472 encode_setclientid_confirm(&xdr, arg, &hdr); 2415 encode_putrootfh(xdr, &hdr);
2473 encode_putrootfh(&xdr, &hdr); 2416 encode_fsinfo(xdr, lease_bitmap, &hdr);
2474 encode_fsinfo(&xdr, lease_bitmap, &hdr);
2475 encode_nops(&hdr); 2417 encode_nops(&hdr);
2476 return 0;
2477} 2418}
2478 2419
2479/* 2420/*
2480 * DELEGRETURN request 2421 * DELEGRETURN request
2481 */ 2422 */
2482static int nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, __be32 *p, const struct nfs4_delegreturnargs *args) 2423static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req,
2424 struct xdr_stream *xdr,
2425 const struct nfs4_delegreturnargs *args)
2483{ 2426{
2484 struct xdr_stream xdr;
2485 struct compound_hdr hdr = { 2427 struct compound_hdr hdr = {
2486 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2428 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2487 }; 2429 };
2488 2430
2489 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2431 encode_compound_hdr(xdr, req, &hdr);
2490 encode_compound_hdr(&xdr, req, &hdr); 2432 encode_sequence(xdr, &args->seq_args, &hdr);
2491 encode_sequence(&xdr, &args->seq_args, &hdr); 2433 encode_putfh(xdr, args->fhandle, &hdr);
2492 encode_putfh(&xdr, args->fhandle, &hdr); 2434 encode_delegreturn(xdr, args->stateid, &hdr);
2493 encode_delegreturn(&xdr, args->stateid, &hdr); 2435 encode_getfattr(xdr, args->bitmask, &hdr);
2494 encode_getfattr(&xdr, args->bitmask, &hdr);
2495 encode_nops(&hdr); 2436 encode_nops(&hdr);
2496 return 0;
2497} 2437}
2498 2438
2499/* 2439/*
2500 * Encode FS_LOCATIONS request 2440 * Encode FS_LOCATIONS request
2501 */ 2441 */
2502static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs4_fs_locations_arg *args) 2442static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req,
2443 struct xdr_stream *xdr,
2444 struct nfs4_fs_locations_arg *args)
2503{ 2445{
2504 struct xdr_stream xdr;
2505 struct compound_hdr hdr = { 2446 struct compound_hdr hdr = {
2506 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2447 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2507 }; 2448 };
2508 uint32_t replen; 2449 uint32_t replen;
2509 2450
2510 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2451 encode_compound_hdr(xdr, req, &hdr);
2511 encode_compound_hdr(&xdr, req, &hdr); 2452 encode_sequence(xdr, &args->seq_args, &hdr);
2512 encode_sequence(&xdr, &args->seq_args, &hdr); 2453 encode_putfh(xdr, args->dir_fh, &hdr);
2513 encode_putfh(&xdr, args->dir_fh, &hdr); 2454 encode_lookup(xdr, args->name, &hdr);
2514 encode_lookup(&xdr, args->name, &hdr);
2515 replen = hdr.replen; /* get the attribute into args->page */ 2455 replen = hdr.replen; /* get the attribute into args->page */
2516 encode_fs_locations(&xdr, args->bitmask, &hdr); 2456 encode_fs_locations(xdr, args->bitmask, &hdr);
2517 2457
2518 xdr_inline_pages(&req->rq_rcv_buf, replen << 2, &args->page, 2458 xdr_inline_pages(&req->rq_rcv_buf, replen << 2, &args->page,
2519 0, PAGE_SIZE); 2459 0, PAGE_SIZE);
2520 encode_nops(&hdr); 2460 encode_nops(&hdr);
2521 return 0;
2522} 2461}
2523 2462
2524#if defined(CONFIG_NFS_V4_1) 2463#if defined(CONFIG_NFS_V4_1)
2525/* 2464/*
2526 * EXCHANGE_ID request 2465 * EXCHANGE_ID request
2527 */ 2466 */
2528static int nfs4_xdr_enc_exchange_id(struct rpc_rqst *req, uint32_t *p, 2467static void nfs4_xdr_enc_exchange_id(struct rpc_rqst *req,
2529 struct nfs41_exchange_id_args *args) 2468 struct xdr_stream *xdr,
2469 struct nfs41_exchange_id_args *args)
2530{ 2470{
2531 struct xdr_stream xdr;
2532 struct compound_hdr hdr = { 2471 struct compound_hdr hdr = {
2533 .minorversion = args->client->cl_mvops->minor_version, 2472 .minorversion = args->client->cl_mvops->minor_version,
2534 }; 2473 };
2535 2474
2536 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2475 encode_compound_hdr(xdr, req, &hdr);
2537 encode_compound_hdr(&xdr, req, &hdr); 2476 encode_exchange_id(xdr, args, &hdr);
2538 encode_exchange_id(&xdr, args, &hdr);
2539 encode_nops(&hdr); 2477 encode_nops(&hdr);
2540 return 0;
2541} 2478}
2542 2479
2543/* 2480/*
2544 * a CREATE_SESSION request 2481 * a CREATE_SESSION request
2545 */ 2482 */
2546static int nfs4_xdr_enc_create_session(struct rpc_rqst *req, uint32_t *p, 2483static void nfs4_xdr_enc_create_session(struct rpc_rqst *req,
2547 struct nfs41_create_session_args *args) 2484 struct xdr_stream *xdr,
2485 struct nfs41_create_session_args *args)
2548{ 2486{
2549 struct xdr_stream xdr;
2550 struct compound_hdr hdr = { 2487 struct compound_hdr hdr = {
2551 .minorversion = args->client->cl_mvops->minor_version, 2488 .minorversion = args->client->cl_mvops->minor_version,
2552 }; 2489 };
2553 2490
2554 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2491 encode_compound_hdr(xdr, req, &hdr);
2555 encode_compound_hdr(&xdr, req, &hdr); 2492 encode_create_session(xdr, args, &hdr);
2556 encode_create_session(&xdr, args, &hdr);
2557 encode_nops(&hdr); 2493 encode_nops(&hdr);
2558 return 0;
2559} 2494}
2560 2495
2561/* 2496/*
2562 * a DESTROY_SESSION request 2497 * a DESTROY_SESSION request
2563 */ 2498 */
2564static int nfs4_xdr_enc_destroy_session(struct rpc_rqst *req, uint32_t *p, 2499static void nfs4_xdr_enc_destroy_session(struct rpc_rqst *req,
2565 struct nfs4_session *session) 2500 struct xdr_stream *xdr,
2501 struct nfs4_session *session)
2566{ 2502{
2567 struct xdr_stream xdr;
2568 struct compound_hdr hdr = { 2503 struct compound_hdr hdr = {
2569 .minorversion = session->clp->cl_mvops->minor_version, 2504 .minorversion = session->clp->cl_mvops->minor_version,
2570 }; 2505 };
2571 2506
2572 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2507 encode_compound_hdr(xdr, req, &hdr);
2573 encode_compound_hdr(&xdr, req, &hdr); 2508 encode_destroy_session(xdr, session, &hdr);
2574 encode_destroy_session(&xdr, session, &hdr);
2575 encode_nops(&hdr); 2509 encode_nops(&hdr);
2576 return 0;
2577} 2510}
2578 2511
2579/* 2512/*
2580 * a SEQUENCE request 2513 * a SEQUENCE request
2581 */ 2514 */
2582static int nfs4_xdr_enc_sequence(struct rpc_rqst *req, uint32_t *p, 2515static void nfs4_xdr_enc_sequence(struct rpc_rqst *req, struct xdr_stream *xdr,
2583 struct nfs4_sequence_args *args) 2516 struct nfs4_sequence_args *args)
2584{ 2517{
2585 struct xdr_stream xdr;
2586 struct compound_hdr hdr = { 2518 struct compound_hdr hdr = {
2587 .minorversion = nfs4_xdr_minorversion(args), 2519 .minorversion = nfs4_xdr_minorversion(args),
2588 }; 2520 };
2589 2521
2590 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2522 encode_compound_hdr(xdr, req, &hdr);
2591 encode_compound_hdr(&xdr, req, &hdr); 2523 encode_sequence(xdr, args, &hdr);
2592 encode_sequence(&xdr, args, &hdr);
2593 encode_nops(&hdr); 2524 encode_nops(&hdr);
2594 return 0;
2595} 2525}
2596 2526
2597/* 2527/*
2598 * a GET_LEASE_TIME request 2528 * a GET_LEASE_TIME request
2599 */ 2529 */
2600static int nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req, uint32_t *p, 2530static void nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req,
2601 struct nfs4_get_lease_time_args *args) 2531 struct xdr_stream *xdr,
2532 struct nfs4_get_lease_time_args *args)
2602{ 2533{
2603 struct xdr_stream xdr;
2604 struct compound_hdr hdr = { 2534 struct compound_hdr hdr = {
2605 .minorversion = nfs4_xdr_minorversion(&args->la_seq_args), 2535 .minorversion = nfs4_xdr_minorversion(&args->la_seq_args),
2606 }; 2536 };
2607 const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 }; 2537 const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
2608 2538
2609 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2539 encode_compound_hdr(xdr, req, &hdr);
2610 encode_compound_hdr(&xdr, req, &hdr); 2540 encode_sequence(xdr, &args->la_seq_args, &hdr);
2611 encode_sequence(&xdr, &args->la_seq_args, &hdr); 2541 encode_putrootfh(xdr, &hdr);
2612 encode_putrootfh(&xdr, &hdr); 2542 encode_fsinfo(xdr, lease_bitmap, &hdr);
2613 encode_fsinfo(&xdr, lease_bitmap, &hdr);
2614 encode_nops(&hdr); 2543 encode_nops(&hdr);
2615 return 0;
2616} 2544}
2617 2545
2618/* 2546/*
2619 * a RECLAIM_COMPLETE request 2547 * a RECLAIM_COMPLETE request
2620 */ 2548 */
2621static int nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, uint32_t *p, 2549static void nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req,
2622 struct nfs41_reclaim_complete_args *args) 2550 struct xdr_stream *xdr,
2551 struct nfs41_reclaim_complete_args *args)
2623{ 2552{
2624 struct xdr_stream xdr;
2625 struct compound_hdr hdr = { 2553 struct compound_hdr hdr = {
2626 .minorversion = nfs4_xdr_minorversion(&args->seq_args) 2554 .minorversion = nfs4_xdr_minorversion(&args->seq_args)
2627 }; 2555 };
2628 2556
2629 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2557 encode_compound_hdr(xdr, req, &hdr);
2630 encode_compound_hdr(&xdr, req, &hdr); 2558 encode_sequence(xdr, &args->seq_args, &hdr);
2631 encode_sequence(&xdr, &args->seq_args, &hdr); 2559 encode_reclaim_complete(xdr, args, &hdr);
2632 encode_reclaim_complete(&xdr, args, &hdr);
2633 encode_nops(&hdr); 2560 encode_nops(&hdr);
2634 return 0;
2635} 2561}
2636 2562
2637/* 2563/*
2638 * Encode GETDEVICEINFO request 2564 * Encode GETDEVICEINFO request
2639 */ 2565 */
2640static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p, 2566static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req,
2641 struct nfs4_getdeviceinfo_args *args) 2567 struct xdr_stream *xdr,
2568 struct nfs4_getdeviceinfo_args *args)
2642{ 2569{
2643 struct xdr_stream xdr;
2644 struct compound_hdr hdr = { 2570 struct compound_hdr hdr = {
2645 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2571 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2646 }; 2572 };
2647 2573
2648 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2574 encode_compound_hdr(xdr, req, &hdr);
2649 encode_compound_hdr(&xdr, req, &hdr); 2575 encode_sequence(xdr, &args->seq_args, &hdr);
2650 encode_sequence(&xdr, &args->seq_args, &hdr); 2576 encode_getdeviceinfo(xdr, args, &hdr);
2651 encode_getdeviceinfo(&xdr, args, &hdr);
2652 2577
2653 /* set up reply kvec. Subtract notification bitmap max size (2) 2578 /* set up reply kvec. Subtract notification bitmap max size (2)
2654 * so that notification bitmap is put in xdr_buf tail */ 2579 * so that notification bitmap is put in xdr_buf tail */
@@ -2657,27 +2582,24 @@ static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p,
2657 args->pdev->pglen); 2582 args->pdev->pglen);
2658 2583
2659 encode_nops(&hdr); 2584 encode_nops(&hdr);
2660 return 0;
2661} 2585}
2662 2586
2663/* 2587/*
2664 * Encode LAYOUTGET request 2588 * Encode LAYOUTGET request
2665 */ 2589 */
2666static int nfs4_xdr_enc_layoutget(struct rpc_rqst *req, uint32_t *p, 2590static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req,
2667 struct nfs4_layoutget_args *args) 2591 struct xdr_stream *xdr,
2592 struct nfs4_layoutget_args *args)
2668{ 2593{
2669 struct xdr_stream xdr;
2670 struct compound_hdr hdr = { 2594 struct compound_hdr hdr = {
2671 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2595 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2672 }; 2596 };
2673 2597
2674 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2598 encode_compound_hdr(xdr, req, &hdr);
2675 encode_compound_hdr(&xdr, req, &hdr); 2599 encode_sequence(xdr, &args->seq_args, &hdr);
2676 encode_sequence(&xdr, &args->seq_args, &hdr); 2600 encode_putfh(xdr, NFS_FH(args->inode), &hdr);
2677 encode_putfh(&xdr, NFS_FH(args->inode), &hdr); 2601 encode_layoutget(xdr, args, &hdr);
2678 encode_layoutget(&xdr, args, &hdr);
2679 encode_nops(&hdr); 2602 encode_nops(&hdr);
2680 return 0;
2681} 2603}
2682#endif /* CONFIG_NFS_V4_1 */ 2604#endif /* CONFIG_NFS_V4_1 */
2683 2605
@@ -4475,7 +4397,7 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_
4475 goto out_overflow; 4397 goto out_overflow;
4476 eof = be32_to_cpup(p++); 4398 eof = be32_to_cpup(p++);
4477 count = be32_to_cpup(p); 4399 count = be32_to_cpup(p);
4478 hdrlen = (u8 *) p - (u8 *) iov->iov_base; 4400 hdrlen = (u8 *) xdr->p - (u8 *) iov->iov_base;
4479 recvd = req->rq_rcv_buf.len - hdrlen; 4401 recvd = req->rq_rcv_buf.len - hdrlen;
4480 if (count > recvd) { 4402 if (count > recvd) {
4481 dprintk("NFS: server cheating in read reply: " 4403 dprintk("NFS: server cheating in read reply: "
@@ -5000,7 +4922,7 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr,
5000 goto out_overflow; 4922 goto out_overflow;
5001 len = be32_to_cpup(p); 4923 len = be32_to_cpup(p);
5002 if (len) { 4924 if (len) {
5003 int i; 4925 uint32_t i;
5004 4926
5005 p = xdr_inline_decode(xdr, 4 * len); 4927 p = xdr_inline_decode(xdr, 4 * len);
5006 if (unlikely(!p)) 4928 if (unlikely(!p))
@@ -5090,26 +5012,26 @@ out_overflow:
5090/* 5012/*
5091 * Decode OPEN_DOWNGRADE response 5013 * Decode OPEN_DOWNGRADE response
5092 */ 5014 */
5093static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, __be32 *p, struct nfs_closeres *res) 5015static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp,
5016 struct xdr_stream *xdr,
5017 struct nfs_closeres *res)
5094{ 5018{
5095 struct xdr_stream xdr;
5096 struct compound_hdr hdr; 5019 struct compound_hdr hdr;
5097 int status; 5020 int status;
5098 5021
5099 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5022 status = decode_compound_hdr(xdr, &hdr);
5100 status = decode_compound_hdr(&xdr, &hdr);
5101 if (status) 5023 if (status)
5102 goto out; 5024 goto out;
5103 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5025 status = decode_sequence(xdr, &res->seq_res, rqstp);
5104 if (status) 5026 if (status)
5105 goto out; 5027 goto out;
5106 status = decode_putfh(&xdr); 5028 status = decode_putfh(xdr);
5107 if (status) 5029 if (status)
5108 goto out; 5030 goto out;
5109 status = decode_open_downgrade(&xdr, res); 5031 status = decode_open_downgrade(xdr, res);
5110 if (status != 0) 5032 if (status != 0)
5111 goto out; 5033 goto out;
5112 decode_getfattr(&xdr, res->fattr, res->server, 5034 decode_getfattr(xdr, res->fattr, res->server,
5113 !RPC_IS_ASYNC(rqstp->rq_task)); 5035 !RPC_IS_ASYNC(rqstp->rq_task));
5114out: 5036out:
5115 return status; 5037 return status;
@@ -5118,26 +5040,25 @@ out:
5118/* 5040/*
5119 * Decode ACCESS response 5041 * Decode ACCESS response
5120 */ 5042 */
5121static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_accessres *res) 5043static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5044 struct nfs4_accessres *res)
5122{ 5045{
5123 struct xdr_stream xdr;
5124 struct compound_hdr hdr; 5046 struct compound_hdr hdr;
5125 int status; 5047 int status;
5126 5048
5127 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5049 status = decode_compound_hdr(xdr, &hdr);
5128 status = decode_compound_hdr(&xdr, &hdr);
5129 if (status) 5050 if (status)
5130 goto out; 5051 goto out;
5131 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5052 status = decode_sequence(xdr, &res->seq_res, rqstp);
5132 if (status) 5053 if (status)
5133 goto out; 5054 goto out;
5134 status = decode_putfh(&xdr); 5055 status = decode_putfh(xdr);
5135 if (status != 0) 5056 if (status != 0)
5136 goto out; 5057 goto out;
5137 status = decode_access(&xdr, res); 5058 status = decode_access(xdr, res);
5138 if (status != 0) 5059 if (status != 0)
5139 goto out; 5060 goto out;
5140 decode_getfattr(&xdr, res->fattr, res->server, 5061 decode_getfattr(xdr, res->fattr, res->server,
5141 !RPC_IS_ASYNC(rqstp->rq_task)); 5062 !RPC_IS_ASYNC(rqstp->rq_task));
5142out: 5063out:
5143 return status; 5064 return status;
@@ -5146,26 +5067,28 @@ out:
5146/* 5067/*
5147 * Decode LOOKUP response 5068 * Decode LOOKUP response
5148 */ 5069 */
5149static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lookup_res *res) 5070static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5071 struct nfs4_lookup_res *res)
5150{ 5072{
5151 struct xdr_stream xdr;
5152 struct compound_hdr hdr; 5073 struct compound_hdr hdr;
5153 int status; 5074 int status;
5154 5075
5155 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5076 status = decode_compound_hdr(xdr, &hdr);
5156 status = decode_compound_hdr(&xdr, &hdr);
5157 if (status) 5077 if (status)
5158 goto out; 5078 goto out;
5159 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5079 status = decode_sequence(xdr, &res->seq_res, rqstp);
5160 if (status) 5080 if (status)
5161 goto out; 5081 goto out;
5162 if ((status = decode_putfh(&xdr)) != 0) 5082 status = decode_putfh(xdr);
5083 if (status)
5163 goto out; 5084 goto out;
5164 if ((status = decode_lookup(&xdr)) != 0) 5085 status = decode_lookup(xdr);
5086 if (status)
5165 goto out; 5087 goto out;
5166 if ((status = decode_getfh(&xdr, res->fh)) != 0) 5088 status = decode_getfh(xdr, res->fh);
5089 if (status)
5167 goto out; 5090 goto out;
5168 status = decode_getfattr(&xdr, res->fattr, res->server 5091 status = decode_getfattr(xdr, res->fattr, res->server
5169 ,!RPC_IS_ASYNC(rqstp->rq_task)); 5092 ,!RPC_IS_ASYNC(rqstp->rq_task));
5170out: 5093out:
5171 return status; 5094 return status;
@@ -5174,23 +5097,25 @@ out:
5174/* 5097/*
5175 * Decode LOOKUP_ROOT response 5098 * Decode LOOKUP_ROOT response
5176 */ 5099 */
5177static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lookup_res *res) 5100static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp,
5101 struct xdr_stream *xdr,
5102 struct nfs4_lookup_res *res)
5178{ 5103{
5179 struct xdr_stream xdr;
5180 struct compound_hdr hdr; 5104 struct compound_hdr hdr;
5181 int status; 5105 int status;
5182 5106
5183 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5107 status = decode_compound_hdr(xdr, &hdr);
5184 status = decode_compound_hdr(&xdr, &hdr);
5185 if (status) 5108 if (status)
5186 goto out; 5109 goto out;
5187 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5110 status = decode_sequence(xdr, &res->seq_res, rqstp);
5188 if (status) 5111 if (status)
5189 goto out; 5112 goto out;
5190 if ((status = decode_putrootfh(&xdr)) != 0) 5113 status = decode_putrootfh(xdr);
5114 if (status)
5191 goto out; 5115 goto out;
5192 if ((status = decode_getfh(&xdr, res->fh)) == 0) 5116 status = decode_getfh(xdr, res->fh);
5193 status = decode_getfattr(&xdr, res->fattr, res->server, 5117 if (status == 0)
5118 status = decode_getfattr(xdr, res->fattr, res->server,
5194 !RPC_IS_ASYNC(rqstp->rq_task)); 5119 !RPC_IS_ASYNC(rqstp->rq_task));
5195out: 5120out:
5196 return status; 5121 return status;
@@ -5199,24 +5124,25 @@ out:
5199/* 5124/*
5200 * Decode REMOVE response 5125 * Decode REMOVE response
5201 */ 5126 */
5202static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, __be32 *p, struct nfs_removeres *res) 5127static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5128 struct nfs_removeres *res)
5203{ 5129{
5204 struct xdr_stream xdr;
5205 struct compound_hdr hdr; 5130 struct compound_hdr hdr;
5206 int status; 5131 int status;
5207 5132
5208 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5133 status = decode_compound_hdr(xdr, &hdr);
5209 status = decode_compound_hdr(&xdr, &hdr);
5210 if (status) 5134 if (status)
5211 goto out; 5135 goto out;
5212 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5136 status = decode_sequence(xdr, &res->seq_res, rqstp);
5213 if (status) 5137 if (status)
5214 goto out; 5138 goto out;
5215 if ((status = decode_putfh(&xdr)) != 0) 5139 status = decode_putfh(xdr);
5140 if (status)
5216 goto out; 5141 goto out;
5217 if ((status = decode_remove(&xdr, &res->cinfo)) != 0) 5142 status = decode_remove(xdr, &res->cinfo);
5143 if (status)
5218 goto out; 5144 goto out;
5219 decode_getfattr(&xdr, res->dir_attr, res->server, 5145 decode_getfattr(xdr, res->dir_attr, res->server,
5220 !RPC_IS_ASYNC(rqstp->rq_task)); 5146 !RPC_IS_ASYNC(rqstp->rq_task));
5221out: 5147out:
5222 return status; 5148 return status;
@@ -5225,34 +5151,38 @@ out:
5225/* 5151/*
5226 * Decode RENAME response 5152 * Decode RENAME response
5227 */ 5153 */
5228static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs_renameres *res) 5154static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5155 struct nfs_renameres *res)
5229{ 5156{
5230 struct xdr_stream xdr;
5231 struct compound_hdr hdr; 5157 struct compound_hdr hdr;
5232 int status; 5158 int status;
5233 5159
5234 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5160 status = decode_compound_hdr(xdr, &hdr);
5235 status = decode_compound_hdr(&xdr, &hdr);
5236 if (status) 5161 if (status)
5237 goto out; 5162 goto out;
5238 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5163 status = decode_sequence(xdr, &res->seq_res, rqstp);
5239 if (status) 5164 if (status)
5240 goto out; 5165 goto out;
5241 if ((status = decode_putfh(&xdr)) != 0) 5166 status = decode_putfh(xdr);
5167 if (status)
5242 goto out; 5168 goto out;
5243 if ((status = decode_savefh(&xdr)) != 0) 5169 status = decode_savefh(xdr);
5170 if (status)
5244 goto out; 5171 goto out;
5245 if ((status = decode_putfh(&xdr)) != 0) 5172 status = decode_putfh(xdr);
5173 if (status)
5246 goto out; 5174 goto out;
5247 if ((status = decode_rename(&xdr, &res->old_cinfo, &res->new_cinfo)) != 0) 5175 status = decode_rename(xdr, &res->old_cinfo, &res->new_cinfo);
5176 if (status)
5248 goto out; 5177 goto out;
5249 /* Current FH is target directory */ 5178 /* Current FH is target directory */
5250 if (decode_getfattr(&xdr, res->new_fattr, res->server, 5179 if (decode_getfattr(xdr, res->new_fattr, res->server,
5251 !RPC_IS_ASYNC(rqstp->rq_task)) != 0) 5180 !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
5252 goto out; 5181 goto out;
5253 if ((status = decode_restorefh(&xdr)) != 0) 5182 status = decode_restorefh(xdr);
5183 if (status)
5254 goto out; 5184 goto out;
5255 decode_getfattr(&xdr, res->old_fattr, res->server, 5185 decode_getfattr(xdr, res->old_fattr, res->server,
5256 !RPC_IS_ASYNC(rqstp->rq_task)); 5186 !RPC_IS_ASYNC(rqstp->rq_task));
5257out: 5187out:
5258 return status; 5188 return status;
@@ -5261,37 +5191,41 @@ out:
5261/* 5191/*
5262 * Decode LINK response 5192 * Decode LINK response
5263 */ 5193 */
5264static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_link_res *res) 5194static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5195 struct nfs4_link_res *res)
5265{ 5196{
5266 struct xdr_stream xdr;
5267 struct compound_hdr hdr; 5197 struct compound_hdr hdr;
5268 int status; 5198 int status;
5269 5199
5270 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5200 status = decode_compound_hdr(xdr, &hdr);
5271 status = decode_compound_hdr(&xdr, &hdr);
5272 if (status) 5201 if (status)
5273 goto out; 5202 goto out;
5274 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5203 status = decode_sequence(xdr, &res->seq_res, rqstp);
5275 if (status) 5204 if (status)
5276 goto out; 5205 goto out;
5277 if ((status = decode_putfh(&xdr)) != 0) 5206 status = decode_putfh(xdr);
5207 if (status)
5278 goto out; 5208 goto out;
5279 if ((status = decode_savefh(&xdr)) != 0) 5209 status = decode_savefh(xdr);
5210 if (status)
5280 goto out; 5211 goto out;
5281 if ((status = decode_putfh(&xdr)) != 0) 5212 status = decode_putfh(xdr);
5213 if (status)
5282 goto out; 5214 goto out;
5283 if ((status = decode_link(&xdr, &res->cinfo)) != 0) 5215 status = decode_link(xdr, &res->cinfo);
5216 if (status)
5284 goto out; 5217 goto out;
5285 /* 5218 /*
5286 * Note order: OP_LINK leaves the directory as the current 5219 * Note order: OP_LINK leaves the directory as the current
5287 * filehandle. 5220 * filehandle.
5288 */ 5221 */
5289 if (decode_getfattr(&xdr, res->dir_attr, res->server, 5222 if (decode_getfattr(xdr, res->dir_attr, res->server,
5290 !RPC_IS_ASYNC(rqstp->rq_task)) != 0) 5223 !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
5291 goto out; 5224 goto out;
5292 if ((status = decode_restorefh(&xdr)) != 0) 5225 status = decode_restorefh(xdr);
5226 if (status)
5293 goto out; 5227 goto out;
5294 decode_getfattr(&xdr, res->fattr, res->server, 5228 decode_getfattr(xdr, res->fattr, res->server,
5295 !RPC_IS_ASYNC(rqstp->rq_task)); 5229 !RPC_IS_ASYNC(rqstp->rq_task));
5296out: 5230out:
5297 return status; 5231 return status;
@@ -5300,33 +5234,37 @@ out:
5300/* 5234/*
5301 * Decode CREATE response 5235 * Decode CREATE response
5302 */ 5236 */
5303static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_create_res *res) 5237static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5238 struct nfs4_create_res *res)
5304{ 5239{
5305 struct xdr_stream xdr;
5306 struct compound_hdr hdr; 5240 struct compound_hdr hdr;
5307 int status; 5241 int status;
5308 5242
5309 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5243 status = decode_compound_hdr(xdr, &hdr);
5310 status = decode_compound_hdr(&xdr, &hdr);
5311 if (status) 5244 if (status)
5312 goto out; 5245 goto out;
5313 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5246 status = decode_sequence(xdr, &res->seq_res, rqstp);
5314 if (status) 5247 if (status)
5315 goto out; 5248 goto out;
5316 if ((status = decode_putfh(&xdr)) != 0) 5249 status = decode_putfh(xdr);
5250 if (status)
5317 goto out; 5251 goto out;
5318 if ((status = decode_savefh(&xdr)) != 0) 5252 status = decode_savefh(xdr);
5253 if (status)
5319 goto out; 5254 goto out;
5320 if ((status = decode_create(&xdr,&res->dir_cinfo)) != 0) 5255 status = decode_create(xdr, &res->dir_cinfo);
5256 if (status)
5321 goto out; 5257 goto out;
5322 if ((status = decode_getfh(&xdr, res->fh)) != 0) 5258 status = decode_getfh(xdr, res->fh);
5259 if (status)
5323 goto out; 5260 goto out;
5324 if (decode_getfattr(&xdr, res->fattr, res->server, 5261 if (decode_getfattr(xdr, res->fattr, res->server,
5325 !RPC_IS_ASYNC(rqstp->rq_task)) != 0) 5262 !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
5326 goto out; 5263 goto out;
5327 if ((status = decode_restorefh(&xdr)) != 0) 5264 status = decode_restorefh(xdr);
5265 if (status)
5328 goto out; 5266 goto out;
5329 decode_getfattr(&xdr, res->dir_fattr, res->server, 5267 decode_getfattr(xdr, res->dir_fattr, res->server,
5330 !RPC_IS_ASYNC(rqstp->rq_task)); 5268 !RPC_IS_ASYNC(rqstp->rq_task));
5331out: 5269out:
5332 return status; 5270 return status;
@@ -5335,31 +5273,31 @@ out:
5335/* 5273/*
5336 * Decode SYMLINK response 5274 * Decode SYMLINK response
5337 */ 5275 */
5338static int nfs4_xdr_dec_symlink(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_create_res *res) 5276static int nfs4_xdr_dec_symlink(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5277 struct nfs4_create_res *res)
5339{ 5278{
5340 return nfs4_xdr_dec_create(rqstp, p, res); 5279 return nfs4_xdr_dec_create(rqstp, xdr, res);
5341} 5280}
5342 5281
5343/* 5282/*
5344 * Decode GETATTR response 5283 * Decode GETATTR response
5345 */ 5284 */
5346static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_getattr_res *res) 5285static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5286 struct nfs4_getattr_res *res)
5347{ 5287{
5348 struct xdr_stream xdr;
5349 struct compound_hdr hdr; 5288 struct compound_hdr hdr;
5350 int status; 5289 int status;
5351 5290
5352 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5291 status = decode_compound_hdr(xdr, &hdr);
5353 status = decode_compound_hdr(&xdr, &hdr);
5354 if (status) 5292 if (status)
5355 goto out; 5293 goto out;
5356 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5294 status = decode_sequence(xdr, &res->seq_res, rqstp);
5357 if (status) 5295 if (status)
5358 goto out; 5296 goto out;
5359 status = decode_putfh(&xdr); 5297 status = decode_putfh(xdr);
5360 if (status) 5298 if (status)
5361 goto out; 5299 goto out;
5362 status = decode_getfattr(&xdr, res->fattr, res->server, 5300 status = decode_getfattr(xdr, res->fattr, res->server,
5363 !RPC_IS_ASYNC(rqstp->rq_task)); 5301 !RPC_IS_ASYNC(rqstp->rq_task));
5364out: 5302out:
5365 return status; 5303 return status;
@@ -5368,46 +5306,40 @@ out:
5368/* 5306/*
5369 * Encode an SETACL request 5307 * Encode an SETACL request
5370 */ 5308 */
5371static int 5309static void nfs4_xdr_enc_setacl(struct rpc_rqst *req, struct xdr_stream *xdr,
5372nfs4_xdr_enc_setacl(struct rpc_rqst *req, __be32 *p, struct nfs_setaclargs *args) 5310 struct nfs_setaclargs *args)
5373{ 5311{
5374 struct xdr_stream xdr;
5375 struct compound_hdr hdr = { 5312 struct compound_hdr hdr = {
5376 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 5313 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
5377 }; 5314 };
5378 int status;
5379 5315
5380 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 5316 encode_compound_hdr(xdr, req, &hdr);
5381 encode_compound_hdr(&xdr, req, &hdr); 5317 encode_sequence(xdr, &args->seq_args, &hdr);
5382 encode_sequence(&xdr, &args->seq_args, &hdr); 5318 encode_putfh(xdr, args->fh, &hdr);
5383 encode_putfh(&xdr, args->fh, &hdr); 5319 encode_setacl(xdr, args, &hdr);
5384 status = encode_setacl(&xdr, args, &hdr);
5385 encode_nops(&hdr); 5320 encode_nops(&hdr);
5386 return status;
5387} 5321}
5388 5322
5389/* 5323/*
5390 * Decode SETACL response 5324 * Decode SETACL response
5391 */ 5325 */
5392static int 5326static int
5393nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, __be32 *p, 5327nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5394 struct nfs_setaclres *res) 5328 struct nfs_setaclres *res)
5395{ 5329{
5396 struct xdr_stream xdr;
5397 struct compound_hdr hdr; 5330 struct compound_hdr hdr;
5398 int status; 5331 int status;
5399 5332
5400 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5333 status = decode_compound_hdr(xdr, &hdr);
5401 status = decode_compound_hdr(&xdr, &hdr);
5402 if (status) 5334 if (status)
5403 goto out; 5335 goto out;
5404 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5336 status = decode_sequence(xdr, &res->seq_res, rqstp);
5405 if (status) 5337 if (status)
5406 goto out; 5338 goto out;
5407 status = decode_putfh(&xdr); 5339 status = decode_putfh(xdr);
5408 if (status) 5340 if (status)
5409 goto out; 5341 goto out;
5410 status = decode_setattr(&xdr); 5342 status = decode_setattr(xdr);
5411out: 5343out:
5412 return status; 5344 return status;
5413} 5345}
@@ -5416,24 +5348,22 @@ out:
5416 * Decode GETACL response 5348 * Decode GETACL response
5417 */ 5349 */
5418static int 5350static int
5419nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, __be32 *p, 5351nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5420 struct nfs_getaclres *res) 5352 struct nfs_getaclres *res)
5421{ 5353{
5422 struct xdr_stream xdr;
5423 struct compound_hdr hdr; 5354 struct compound_hdr hdr;
5424 int status; 5355 int status;
5425 5356
5426 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5357 status = decode_compound_hdr(xdr, &hdr);
5427 status = decode_compound_hdr(&xdr, &hdr);
5428 if (status) 5358 if (status)
5429 goto out; 5359 goto out;
5430 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5360 status = decode_sequence(xdr, &res->seq_res, rqstp);
5431 if (status) 5361 if (status)
5432 goto out; 5362 goto out;
5433 status = decode_putfh(&xdr); 5363 status = decode_putfh(xdr);
5434 if (status) 5364 if (status)
5435 goto out; 5365 goto out;
5436 status = decode_getacl(&xdr, rqstp, &res->acl_len); 5366 status = decode_getacl(xdr, rqstp, &res->acl_len);
5437 5367
5438out: 5368out:
5439 return status; 5369 return status;
@@ -5442,23 +5372,22 @@ out:
5442/* 5372/*
5443 * Decode CLOSE response 5373 * Decode CLOSE response
5444 */ 5374 */
5445static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_closeres *res) 5375static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5376 struct nfs_closeres *res)
5446{ 5377{
5447 struct xdr_stream xdr;
5448 struct compound_hdr hdr; 5378 struct compound_hdr hdr;
5449 int status; 5379 int status;
5450 5380
5451 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5381 status = decode_compound_hdr(xdr, &hdr);
5452 status = decode_compound_hdr(&xdr, &hdr);
5453 if (status) 5382 if (status)
5454 goto out; 5383 goto out;
5455 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5384 status = decode_sequence(xdr, &res->seq_res, rqstp);
5456 if (status) 5385 if (status)
5457 goto out; 5386 goto out;
5458 status = decode_putfh(&xdr); 5387 status = decode_putfh(xdr);
5459 if (status) 5388 if (status)
5460 goto out; 5389 goto out;
5461 status = decode_close(&xdr, res); 5390 status = decode_close(xdr, res);
5462 if (status != 0) 5391 if (status != 0)
5463 goto out; 5392 goto out;
5464 /* 5393 /*
@@ -5467,7 +5396,7 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_clos
5467 * an ESTALE error. Shouldn't be a problem, 5396 * an ESTALE error. Shouldn't be a problem,
5468 * though, since fattr->valid will remain unset. 5397 * though, since fattr->valid will remain unset.
5469 */ 5398 */
5470 decode_getfattr(&xdr, res->fattr, res->server, 5399 decode_getfattr(xdr, res->fattr, res->server,
5471 !RPC_IS_ASYNC(rqstp->rq_task)); 5400 !RPC_IS_ASYNC(rqstp->rq_task));
5472out: 5401out:
5473 return status; 5402 return status;
@@ -5476,36 +5405,35 @@ out:
5476/* 5405/*
5477 * Decode OPEN response 5406 * Decode OPEN response
5478 */ 5407 */
5479static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openres *res) 5408static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5409 struct nfs_openres *res)
5480{ 5410{
5481 struct xdr_stream xdr;
5482 struct compound_hdr hdr; 5411 struct compound_hdr hdr;
5483 int status; 5412 int status;
5484 5413
5485 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5414 status = decode_compound_hdr(xdr, &hdr);
5486 status = decode_compound_hdr(&xdr, &hdr);
5487 if (status) 5415 if (status)
5488 goto out; 5416 goto out;
5489 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5417 status = decode_sequence(xdr, &res->seq_res, rqstp);
5490 if (status) 5418 if (status)
5491 goto out; 5419 goto out;
5492 status = decode_putfh(&xdr); 5420 status = decode_putfh(xdr);
5493 if (status) 5421 if (status)
5494 goto out; 5422 goto out;
5495 status = decode_savefh(&xdr); 5423 status = decode_savefh(xdr);
5496 if (status) 5424 if (status)
5497 goto out; 5425 goto out;
5498 status = decode_open(&xdr, res); 5426 status = decode_open(xdr, res);
5499 if (status) 5427 if (status)
5500 goto out; 5428 goto out;
5501 if (decode_getfh(&xdr, &res->fh) != 0) 5429 if (decode_getfh(xdr, &res->fh) != 0)
5502 goto out; 5430 goto out;
5503 if (decode_getfattr(&xdr, res->f_attr, res->server, 5431 if (decode_getfattr(xdr, res->f_attr, res->server,
5504 !RPC_IS_ASYNC(rqstp->rq_task)) != 0) 5432 !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
5505 goto out; 5433 goto out;
5506 if (decode_restorefh(&xdr) != 0) 5434 if (decode_restorefh(xdr) != 0)
5507 goto out; 5435 goto out;
5508 decode_getfattr(&xdr, res->dir_attr, res->server, 5436 decode_getfattr(xdr, res->dir_attr, res->server,
5509 !RPC_IS_ASYNC(rqstp->rq_task)); 5437 !RPC_IS_ASYNC(rqstp->rq_task));
5510out: 5438out:
5511 return status; 5439 return status;
@@ -5514,20 +5442,20 @@ out:
5514/* 5442/*
5515 * Decode OPEN_CONFIRM response 5443 * Decode OPEN_CONFIRM response
5516 */ 5444 */
5517static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp, __be32 *p, struct nfs_open_confirmres *res) 5445static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp,
5446 struct xdr_stream *xdr,
5447 struct nfs_open_confirmres *res)
5518{ 5448{
5519 struct xdr_stream xdr;
5520 struct compound_hdr hdr; 5449 struct compound_hdr hdr;
5521 int status; 5450 int status;
5522 5451
5523 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5452 status = decode_compound_hdr(xdr, &hdr);
5524 status = decode_compound_hdr(&xdr, &hdr);
5525 if (status) 5453 if (status)
5526 goto out; 5454 goto out;
5527 status = decode_putfh(&xdr); 5455 status = decode_putfh(xdr);
5528 if (status) 5456 if (status)
5529 goto out; 5457 goto out;
5530 status = decode_open_confirm(&xdr, res); 5458 status = decode_open_confirm(xdr, res);
5531out: 5459out:
5532 return status; 5460 return status;
5533} 5461}
@@ -5535,26 +5463,26 @@ out:
5535/* 5463/*
5536 * Decode OPEN response 5464 * Decode OPEN response
5537 */ 5465 */
5538static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openres *res) 5466static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp,
5467 struct xdr_stream *xdr,
5468 struct nfs_openres *res)
5539{ 5469{
5540 struct xdr_stream xdr;
5541 struct compound_hdr hdr; 5470 struct compound_hdr hdr;
5542 int status; 5471 int status;
5543 5472
5544 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5473 status = decode_compound_hdr(xdr, &hdr);
5545 status = decode_compound_hdr(&xdr, &hdr);
5546 if (status) 5474 if (status)
5547 goto out; 5475 goto out;
5548 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5476 status = decode_sequence(xdr, &res->seq_res, rqstp);
5549 if (status) 5477 if (status)
5550 goto out; 5478 goto out;
5551 status = decode_putfh(&xdr); 5479 status = decode_putfh(xdr);
5552 if (status) 5480 if (status)
5553 goto out; 5481 goto out;
5554 status = decode_open(&xdr, res); 5482 status = decode_open(xdr, res);
5555 if (status) 5483 if (status)
5556 goto out; 5484 goto out;
5557 decode_getfattr(&xdr, res->f_attr, res->server, 5485 decode_getfattr(xdr, res->f_attr, res->server,
5558 !RPC_IS_ASYNC(rqstp->rq_task)); 5486 !RPC_IS_ASYNC(rqstp->rq_task));
5559out: 5487out:
5560 return status; 5488 return status;
@@ -5563,26 +5491,26 @@ out:
5563/* 5491/*
5564 * Decode SETATTR response 5492 * Decode SETATTR response
5565 */ 5493 */
5566static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_setattrres *res) 5494static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp,
5495 struct xdr_stream *xdr,
5496 struct nfs_setattrres *res)
5567{ 5497{
5568 struct xdr_stream xdr;
5569 struct compound_hdr hdr; 5498 struct compound_hdr hdr;
5570 int status; 5499 int status;
5571 5500
5572 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5501 status = decode_compound_hdr(xdr, &hdr);
5573 status = decode_compound_hdr(&xdr, &hdr);
5574 if (status) 5502 if (status)
5575 goto out; 5503 goto out;
5576 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5504 status = decode_sequence(xdr, &res->seq_res, rqstp);
5577 if (status) 5505 if (status)
5578 goto out; 5506 goto out;
5579 status = decode_putfh(&xdr); 5507 status = decode_putfh(xdr);
5580 if (status) 5508 if (status)
5581 goto out; 5509 goto out;
5582 status = decode_setattr(&xdr); 5510 status = decode_setattr(xdr);
5583 if (status) 5511 if (status)
5584 goto out; 5512 goto out;
5585 decode_getfattr(&xdr, res->fattr, res->server, 5513 decode_getfattr(xdr, res->fattr, res->server,
5586 !RPC_IS_ASYNC(rqstp->rq_task)); 5514 !RPC_IS_ASYNC(rqstp->rq_task));
5587out: 5515out:
5588 return status; 5516 return status;
@@ -5591,23 +5519,22 @@ out:
5591/* 5519/*
5592 * Decode LOCK response 5520 * Decode LOCK response
5593 */ 5521 */
5594static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, __be32 *p, struct nfs_lock_res *res) 5522static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5523 struct nfs_lock_res *res)
5595{ 5524{
5596 struct xdr_stream xdr;
5597 struct compound_hdr hdr; 5525 struct compound_hdr hdr;
5598 int status; 5526 int status;
5599 5527
5600 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5528 status = decode_compound_hdr(xdr, &hdr);
5601 status = decode_compound_hdr(&xdr, &hdr);
5602 if (status) 5529 if (status)
5603 goto out; 5530 goto out;
5604 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5531 status = decode_sequence(xdr, &res->seq_res, rqstp);
5605 if (status) 5532 if (status)
5606 goto out; 5533 goto out;
5607 status = decode_putfh(&xdr); 5534 status = decode_putfh(xdr);
5608 if (status) 5535 if (status)
5609 goto out; 5536 goto out;
5610 status = decode_lock(&xdr, res); 5537 status = decode_lock(xdr, res);
5611out: 5538out:
5612 return status; 5539 return status;
5613} 5540}
@@ -5615,23 +5542,22 @@ out:
5615/* 5542/*
5616 * Decode LOCKT response 5543 * Decode LOCKT response
5617 */ 5544 */
5618static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, __be32 *p, struct nfs_lockt_res *res) 5545static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5546 struct nfs_lockt_res *res)
5619{ 5547{
5620 struct xdr_stream xdr;
5621 struct compound_hdr hdr; 5548 struct compound_hdr hdr;
5622 int status; 5549 int status;
5623 5550
5624 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5551 status = decode_compound_hdr(xdr, &hdr);
5625 status = decode_compound_hdr(&xdr, &hdr);
5626 if (status) 5552 if (status)
5627 goto out; 5553 goto out;
5628 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5554 status = decode_sequence(xdr, &res->seq_res, rqstp);
5629 if (status) 5555 if (status)
5630 goto out; 5556 goto out;
5631 status = decode_putfh(&xdr); 5557 status = decode_putfh(xdr);
5632 if (status) 5558 if (status)
5633 goto out; 5559 goto out;
5634 status = decode_lockt(&xdr, res); 5560 status = decode_lockt(xdr, res);
5635out: 5561out:
5636 return status; 5562 return status;
5637} 5563}
@@ -5639,61 +5565,58 @@ out:
5639/* 5565/*
5640 * Decode LOCKU response 5566 * Decode LOCKU response
5641 */ 5567 */
5642static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, __be32 *p, struct nfs_locku_res *res) 5568static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5569 struct nfs_locku_res *res)
5643{ 5570{
5644 struct xdr_stream xdr;
5645 struct compound_hdr hdr; 5571 struct compound_hdr hdr;
5646 int status; 5572 int status;
5647 5573
5648 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5574 status = decode_compound_hdr(xdr, &hdr);
5649 status = decode_compound_hdr(&xdr, &hdr);
5650 if (status) 5575 if (status)
5651 goto out; 5576 goto out;
5652 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5577 status = decode_sequence(xdr, &res->seq_res, rqstp);
5653 if (status) 5578 if (status)
5654 goto out; 5579 goto out;
5655 status = decode_putfh(&xdr); 5580 status = decode_putfh(xdr);
5656 if (status) 5581 if (status)
5657 goto out; 5582 goto out;
5658 status = decode_locku(&xdr, res); 5583 status = decode_locku(xdr, res);
5659out: 5584out:
5660 return status; 5585 return status;
5661} 5586}
5662 5587
5663static int nfs4_xdr_dec_release_lockowner(struct rpc_rqst *rqstp, __be32 *p, void *dummy) 5588static int nfs4_xdr_dec_release_lockowner(struct rpc_rqst *rqstp,
5589 struct xdr_stream *xdr, void *dummy)
5664{ 5590{
5665 struct xdr_stream xdr;
5666 struct compound_hdr hdr; 5591 struct compound_hdr hdr;
5667 int status; 5592 int status;
5668 5593
5669 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5594 status = decode_compound_hdr(xdr, &hdr);
5670 status = decode_compound_hdr(&xdr, &hdr);
5671 if (!status) 5595 if (!status)
5672 status = decode_release_lockowner(&xdr); 5596 status = decode_release_lockowner(xdr);
5673 return status; 5597 return status;
5674} 5598}
5675 5599
5676/* 5600/*
5677 * Decode READLINK response 5601 * Decode READLINK response
5678 */ 5602 */
5679static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, __be32 *p, 5603static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp,
5604 struct xdr_stream *xdr,
5680 struct nfs4_readlink_res *res) 5605 struct nfs4_readlink_res *res)
5681{ 5606{
5682 struct xdr_stream xdr;
5683 struct compound_hdr hdr; 5607 struct compound_hdr hdr;
5684 int status; 5608 int status;
5685 5609
5686 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5610 status = decode_compound_hdr(xdr, &hdr);
5687 status = decode_compound_hdr(&xdr, &hdr);
5688 if (status) 5611 if (status)
5689 goto out; 5612 goto out;
5690 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5613 status = decode_sequence(xdr, &res->seq_res, rqstp);
5691 if (status) 5614 if (status)
5692 goto out; 5615 goto out;
5693 status = decode_putfh(&xdr); 5616 status = decode_putfh(xdr);
5694 if (status) 5617 if (status)
5695 goto out; 5618 goto out;
5696 status = decode_readlink(&xdr, rqstp); 5619 status = decode_readlink(xdr, rqstp);
5697out: 5620out:
5698 return status; 5621 return status;
5699} 5622}
@@ -5701,23 +5624,22 @@ out:
5701/* 5624/*
5702 * Decode READDIR response 5625 * Decode READDIR response
5703 */ 5626 */
5704static int nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_readdir_res *res) 5627static int nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5628 struct nfs4_readdir_res *res)
5705{ 5629{
5706 struct xdr_stream xdr;
5707 struct compound_hdr hdr; 5630 struct compound_hdr hdr;
5708 int status; 5631 int status;
5709 5632
5710 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5633 status = decode_compound_hdr(xdr, &hdr);
5711 status = decode_compound_hdr(&xdr, &hdr);
5712 if (status) 5634 if (status)
5713 goto out; 5635 goto out;
5714 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5636 status = decode_sequence(xdr, &res->seq_res, rqstp);
5715 if (status) 5637 if (status)
5716 goto out; 5638 goto out;
5717 status = decode_putfh(&xdr); 5639 status = decode_putfh(xdr);
5718 if (status) 5640 if (status)
5719 goto out; 5641 goto out;
5720 status = decode_readdir(&xdr, rqstp, res); 5642 status = decode_readdir(xdr, rqstp, res);
5721out: 5643out:
5722 return status; 5644 return status;
5723} 5645}
@@ -5725,23 +5647,22 @@ out:
5725/* 5647/*
5726 * Decode Read response 5648 * Decode Read response
5727 */ 5649 */
5728static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, __be32 *p, struct nfs_readres *res) 5650static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5651 struct nfs_readres *res)
5729{ 5652{
5730 struct xdr_stream xdr;
5731 struct compound_hdr hdr; 5653 struct compound_hdr hdr;
5732 int status; 5654 int status;
5733 5655
5734 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5656 status = decode_compound_hdr(xdr, &hdr);
5735 status = decode_compound_hdr(&xdr, &hdr);
5736 if (status) 5657 if (status)
5737 goto out; 5658 goto out;
5738 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5659 status = decode_sequence(xdr, &res->seq_res, rqstp);
5739 if (status) 5660 if (status)
5740 goto out; 5661 goto out;
5741 status = decode_putfh(&xdr); 5662 status = decode_putfh(xdr);
5742 if (status) 5663 if (status)
5743 goto out; 5664 goto out;
5744 status = decode_read(&xdr, rqstp, res); 5665 status = decode_read(xdr, rqstp, res);
5745 if (!status) 5666 if (!status)
5746 status = res->count; 5667 status = res->count;
5747out: 5668out:
@@ -5751,26 +5672,25 @@ out:
5751/* 5672/*
5752 * Decode WRITE response 5673 * Decode WRITE response
5753 */ 5674 */
5754static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, __be32 *p, struct nfs_writeres *res) 5675static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5676 struct nfs_writeres *res)
5755{ 5677{
5756 struct xdr_stream xdr;
5757 struct compound_hdr hdr; 5678 struct compound_hdr hdr;
5758 int status; 5679 int status;
5759 5680
5760 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5681 status = decode_compound_hdr(xdr, &hdr);
5761 status = decode_compound_hdr(&xdr, &hdr);
5762 if (status) 5682 if (status)
5763 goto out; 5683 goto out;
5764 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5684 status = decode_sequence(xdr, &res->seq_res, rqstp);
5765 if (status) 5685 if (status)
5766 goto out; 5686 goto out;
5767 status = decode_putfh(&xdr); 5687 status = decode_putfh(xdr);
5768 if (status) 5688 if (status)
5769 goto out; 5689 goto out;
5770 status = decode_write(&xdr, res); 5690 status = decode_write(xdr, res);
5771 if (status) 5691 if (status)
5772 goto out; 5692 goto out;
5773 decode_getfattr(&xdr, res->fattr, res->server, 5693 decode_getfattr(xdr, res->fattr, res->server,
5774 !RPC_IS_ASYNC(rqstp->rq_task)); 5694 !RPC_IS_ASYNC(rqstp->rq_task));
5775 if (!status) 5695 if (!status)
5776 status = res->count; 5696 status = res->count;
@@ -5781,26 +5701,25 @@ out:
5781/* 5701/*
5782 * Decode COMMIT response 5702 * Decode COMMIT response
5783 */ 5703 */
5784static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, __be32 *p, struct nfs_writeres *res) 5704static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5705 struct nfs_writeres *res)
5785{ 5706{
5786 struct xdr_stream xdr;
5787 struct compound_hdr hdr; 5707 struct compound_hdr hdr;
5788 int status; 5708 int status;
5789 5709
5790 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5710 status = decode_compound_hdr(xdr, &hdr);
5791 status = decode_compound_hdr(&xdr, &hdr);
5792 if (status) 5711 if (status)
5793 goto out; 5712 goto out;
5794 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5713 status = decode_sequence(xdr, &res->seq_res, rqstp);
5795 if (status) 5714 if (status)
5796 goto out; 5715 goto out;
5797 status = decode_putfh(&xdr); 5716 status = decode_putfh(xdr);
5798 if (status) 5717 if (status)
5799 goto out; 5718 goto out;
5800 status = decode_commit(&xdr, res); 5719 status = decode_commit(xdr, res);
5801 if (status) 5720 if (status)
5802 goto out; 5721 goto out;
5803 decode_getfattr(&xdr, res->fattr, res->server, 5722 decode_getfattr(xdr, res->fattr, res->server,
5804 !RPC_IS_ASYNC(rqstp->rq_task)); 5723 !RPC_IS_ASYNC(rqstp->rq_task));
5805out: 5724out:
5806 return status; 5725 return status;
@@ -5809,85 +5728,80 @@ out:
5809/* 5728/*
5810 * Decode FSINFO response 5729 * Decode FSINFO response
5811 */ 5730 */
5812static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p, 5731static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, struct xdr_stream *xdr,
5813 struct nfs4_fsinfo_res *res) 5732 struct nfs4_fsinfo_res *res)
5814{ 5733{
5815 struct xdr_stream xdr;
5816 struct compound_hdr hdr; 5734 struct compound_hdr hdr;
5817 int status; 5735 int status;
5818 5736
5819 xdr_init_decode(&xdr, &req->rq_rcv_buf, p); 5737 status = decode_compound_hdr(xdr, &hdr);
5820 status = decode_compound_hdr(&xdr, &hdr);
5821 if (!status) 5738 if (!status)
5822 status = decode_sequence(&xdr, &res->seq_res, req); 5739 status = decode_sequence(xdr, &res->seq_res, req);
5823 if (!status) 5740 if (!status)
5824 status = decode_putfh(&xdr); 5741 status = decode_putfh(xdr);
5825 if (!status) 5742 if (!status)
5826 status = decode_fsinfo(&xdr, res->fsinfo); 5743 status = decode_fsinfo(xdr, res->fsinfo);
5827 return status; 5744 return status;
5828} 5745}
5829 5746
5830/* 5747/*
5831 * Decode PATHCONF response 5748 * Decode PATHCONF response
5832 */ 5749 */
5833static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, __be32 *p, 5750static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, struct xdr_stream *xdr,
5834 struct nfs4_pathconf_res *res) 5751 struct nfs4_pathconf_res *res)
5835{ 5752{
5836 struct xdr_stream xdr;
5837 struct compound_hdr hdr; 5753 struct compound_hdr hdr;
5838 int status; 5754 int status;
5839 5755
5840 xdr_init_decode(&xdr, &req->rq_rcv_buf, p); 5756 status = decode_compound_hdr(xdr, &hdr);
5841 status = decode_compound_hdr(&xdr, &hdr);
5842 if (!status) 5757 if (!status)
5843 status = decode_sequence(&xdr, &res->seq_res, req); 5758 status = decode_sequence(xdr, &res->seq_res, req);
5844 if (!status) 5759 if (!status)
5845 status = decode_putfh(&xdr); 5760 status = decode_putfh(xdr);
5846 if (!status) 5761 if (!status)
5847 status = decode_pathconf(&xdr, res->pathconf); 5762 status = decode_pathconf(xdr, res->pathconf);
5848 return status; 5763 return status;
5849} 5764}
5850 5765
5851/* 5766/*
5852 * Decode STATFS response 5767 * Decode STATFS response
5853 */ 5768 */
5854static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, __be32 *p, 5769static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, struct xdr_stream *xdr,
5855 struct nfs4_statfs_res *res) 5770 struct nfs4_statfs_res *res)
5856{ 5771{
5857 struct xdr_stream xdr;
5858 struct compound_hdr hdr; 5772 struct compound_hdr hdr;
5859 int status; 5773 int status;
5860 5774
5861 xdr_init_decode(&xdr, &req->rq_rcv_buf, p); 5775 status = decode_compound_hdr(xdr, &hdr);
5862 status = decode_compound_hdr(&xdr, &hdr);
5863 if (!status) 5776 if (!status)
5864 status = decode_sequence(&xdr, &res->seq_res, req); 5777 status = decode_sequence(xdr, &res->seq_res, req);
5865 if (!status) 5778 if (!status)
5866 status = decode_putfh(&xdr); 5779 status = decode_putfh(xdr);
5867 if (!status) 5780 if (!status)
5868 status = decode_statfs(&xdr, res->fsstat); 5781 status = decode_statfs(xdr, res->fsstat);
5869 return status; 5782 return status;
5870} 5783}
5871 5784
5872/* 5785/*
5873 * Decode GETATTR_BITMAP response 5786 * Decode GETATTR_BITMAP response
5874 */ 5787 */
5875static int nfs4_xdr_dec_server_caps(struct rpc_rqst *req, __be32 *p, struct nfs4_server_caps_res *res) 5788static int nfs4_xdr_dec_server_caps(struct rpc_rqst *req,
5789 struct xdr_stream *xdr,
5790 struct nfs4_server_caps_res *res)
5876{ 5791{
5877 struct xdr_stream xdr;
5878 struct compound_hdr hdr; 5792 struct compound_hdr hdr;
5879 int status; 5793 int status;
5880 5794
5881 xdr_init_decode(&xdr, &req->rq_rcv_buf, p); 5795 status = decode_compound_hdr(xdr, &hdr);
5882 status = decode_compound_hdr(&xdr, &hdr);
5883 if (status) 5796 if (status)
5884 goto out; 5797 goto out;
5885 status = decode_sequence(&xdr, &res->seq_res, req); 5798 status = decode_sequence(xdr, &res->seq_res, req);
5886 if (status) 5799 if (status)
5887 goto out; 5800 goto out;
5888 if ((status = decode_putfh(&xdr)) != 0) 5801 status = decode_putfh(xdr);
5802 if (status)
5889 goto out; 5803 goto out;
5890 status = decode_server_caps(&xdr, res); 5804 status = decode_server_caps(xdr, res);
5891out: 5805out:
5892 return status; 5806 return status;
5893} 5807}
@@ -5895,79 +5809,77 @@ out:
5895/* 5809/*
5896 * Decode RENEW response 5810 * Decode RENEW response
5897 */ 5811 */
5898static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, __be32 *p, void *dummy) 5812static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5813 void *__unused)
5899{ 5814{
5900 struct xdr_stream xdr;
5901 struct compound_hdr hdr; 5815 struct compound_hdr hdr;
5902 int status; 5816 int status;
5903 5817
5904 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5818 status = decode_compound_hdr(xdr, &hdr);
5905 status = decode_compound_hdr(&xdr, &hdr);
5906 if (!status) 5819 if (!status)
5907 status = decode_renew(&xdr); 5820 status = decode_renew(xdr);
5908 return status; 5821 return status;
5909} 5822}
5910 5823
5911/* 5824/*
5912 * Decode SETCLIENTID response 5825 * Decode SETCLIENTID response
5913 */ 5826 */
5914static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, __be32 *p, 5827static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req,
5915 struct nfs4_setclientid_res *res) 5828 struct xdr_stream *xdr,
5829 struct nfs4_setclientid_res *res)
5916{ 5830{
5917 struct xdr_stream xdr;
5918 struct compound_hdr hdr; 5831 struct compound_hdr hdr;
5919 int status; 5832 int status;
5920 5833
5921 xdr_init_decode(&xdr, &req->rq_rcv_buf, p); 5834 status = decode_compound_hdr(xdr, &hdr);
5922 status = decode_compound_hdr(&xdr, &hdr);
5923 if (!status) 5835 if (!status)
5924 status = decode_setclientid(&xdr, res); 5836 status = decode_setclientid(xdr, res);
5925 return status; 5837 return status;
5926} 5838}
5927 5839
5928/* 5840/*
5929 * Decode SETCLIENTID_CONFIRM response 5841 * Decode SETCLIENTID_CONFIRM response
5930 */ 5842 */
5931static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *fsinfo) 5843static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req,
5844 struct xdr_stream *xdr,
5845 struct nfs_fsinfo *fsinfo)
5932{ 5846{
5933 struct xdr_stream xdr;
5934 struct compound_hdr hdr; 5847 struct compound_hdr hdr;
5935 int status; 5848 int status;
5936 5849
5937 xdr_init_decode(&xdr, &req->rq_rcv_buf, p); 5850 status = decode_compound_hdr(xdr, &hdr);
5938 status = decode_compound_hdr(&xdr, &hdr);
5939 if (!status) 5851 if (!status)
5940 status = decode_setclientid_confirm(&xdr); 5852 status = decode_setclientid_confirm(xdr);
5941 if (!status) 5853 if (!status)
5942 status = decode_putrootfh(&xdr); 5854 status = decode_putrootfh(xdr);
5943 if (!status) 5855 if (!status)
5944 status = decode_fsinfo(&xdr, fsinfo); 5856 status = decode_fsinfo(xdr, fsinfo);
5945 return status; 5857 return status;
5946} 5858}
5947 5859
5948/* 5860/*
5949 * Decode DELEGRETURN response 5861 * Decode DELEGRETURN response
5950 */ 5862 */
5951static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_delegreturnres *res) 5863static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp,
5864 struct xdr_stream *xdr,
5865 struct nfs4_delegreturnres *res)
5952{ 5866{
5953 struct xdr_stream xdr;
5954 struct compound_hdr hdr; 5867 struct compound_hdr hdr;
5955 int status; 5868 int status;
5956 5869
5957 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5870 status = decode_compound_hdr(xdr, &hdr);
5958 status = decode_compound_hdr(&xdr, &hdr);
5959 if (status) 5871 if (status)
5960 goto out; 5872 goto out;
5961 status = decode_sequence(&xdr, &res->seq_res, rqstp); 5873 status = decode_sequence(xdr, &res->seq_res, rqstp);
5962 if (status) 5874 if (status)
5963 goto out; 5875 goto out;
5964 status = decode_putfh(&xdr); 5876 status = decode_putfh(xdr);
5965 if (status != 0) 5877 if (status != 0)
5966 goto out; 5878 goto out;
5967 status = decode_delegreturn(&xdr); 5879 status = decode_delegreturn(xdr);
5968 if (status != 0) 5880 if (status != 0)
5969 goto out; 5881 goto out;
5970 decode_getfattr(&xdr, res->fattr, res->server, 5882 decode_getfattr(xdr, res->fattr, res->server,
5971 !RPC_IS_ASYNC(rqstp->rq_task)); 5883 !RPC_IS_ASYNC(rqstp->rq_task));
5972out: 5884out:
5973 return status; 5885 return status;
@@ -5976,26 +5888,27 @@ out:
5976/* 5888/*
5977 * Decode FS_LOCATIONS response 5889 * Decode FS_LOCATIONS response
5978 */ 5890 */
5979static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p, 5891static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
5892 struct xdr_stream *xdr,
5980 struct nfs4_fs_locations_res *res) 5893 struct nfs4_fs_locations_res *res)
5981{ 5894{
5982 struct xdr_stream xdr;
5983 struct compound_hdr hdr; 5895 struct compound_hdr hdr;
5984 int status; 5896 int status;
5985 5897
5986 xdr_init_decode(&xdr, &req->rq_rcv_buf, p); 5898 status = decode_compound_hdr(xdr, &hdr);
5987 status = decode_compound_hdr(&xdr, &hdr);
5988 if (status) 5899 if (status)
5989 goto out; 5900 goto out;
5990 status = decode_sequence(&xdr, &res->seq_res, req); 5901 status = decode_sequence(xdr, &res->seq_res, req);
5991 if (status) 5902 if (status)
5992 goto out; 5903 goto out;
5993 if ((status = decode_putfh(&xdr)) != 0) 5904 status = decode_putfh(xdr);
5905 if (status)
5994 goto out; 5906 goto out;
5995 if ((status = decode_lookup(&xdr)) != 0) 5907 status = decode_lookup(xdr);
5908 if (status)
5996 goto out; 5909 goto out;
5997 xdr_enter_page(&xdr, PAGE_SIZE); 5910 xdr_enter_page(xdr, PAGE_SIZE);
5998 status = decode_getfattr(&xdr, &res->fs_locations->fattr, 5911 status = decode_getfattr(xdr, &res->fs_locations->fattr,
5999 res->fs_locations->server, 5912 res->fs_locations->server,
6000 !RPC_IS_ASYNC(req->rq_task)); 5913 !RPC_IS_ASYNC(req->rq_task));
6001out: 5914out:
@@ -6006,129 +5919,122 @@ out:
6006/* 5919/*
6007 * Decode EXCHANGE_ID response 5920 * Decode EXCHANGE_ID response
6008 */ 5921 */
6009static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp, uint32_t *p, 5922static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp,
5923 struct xdr_stream *xdr,
6010 void *res) 5924 void *res)
6011{ 5925{
6012 struct xdr_stream xdr;
6013 struct compound_hdr hdr; 5926 struct compound_hdr hdr;
6014 int status; 5927 int status;
6015 5928
6016 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5929 status = decode_compound_hdr(xdr, &hdr);
6017 status = decode_compound_hdr(&xdr, &hdr);
6018 if (!status) 5930 if (!status)
6019 status = decode_exchange_id(&xdr, res); 5931 status = decode_exchange_id(xdr, res);
6020 return status; 5932 return status;
6021} 5933}
6022 5934
6023/* 5935/*
6024 * Decode CREATE_SESSION response 5936 * Decode CREATE_SESSION response
6025 */ 5937 */
6026static int nfs4_xdr_dec_create_session(struct rpc_rqst *rqstp, uint32_t *p, 5938static int nfs4_xdr_dec_create_session(struct rpc_rqst *rqstp,
5939 struct xdr_stream *xdr,
6027 struct nfs41_create_session_res *res) 5940 struct nfs41_create_session_res *res)
6028{ 5941{
6029 struct xdr_stream xdr;
6030 struct compound_hdr hdr; 5942 struct compound_hdr hdr;
6031 int status; 5943 int status;
6032 5944
6033 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5945 status = decode_compound_hdr(xdr, &hdr);
6034 status = decode_compound_hdr(&xdr, &hdr);
6035 if (!status) 5946 if (!status)
6036 status = decode_create_session(&xdr, res); 5947 status = decode_create_session(xdr, res);
6037 return status; 5948 return status;
6038} 5949}
6039 5950
6040/* 5951/*
6041 * Decode DESTROY_SESSION response 5952 * Decode DESTROY_SESSION response
6042 */ 5953 */
6043static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp, uint32_t *p, 5954static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp,
6044 void *dummy) 5955 struct xdr_stream *xdr,
5956 void *res)
6045{ 5957{
6046 struct xdr_stream xdr;
6047 struct compound_hdr hdr; 5958 struct compound_hdr hdr;
6048 int status; 5959 int status;
6049 5960
6050 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5961 status = decode_compound_hdr(xdr, &hdr);
6051 status = decode_compound_hdr(&xdr, &hdr);
6052 if (!status) 5962 if (!status)
6053 status = decode_destroy_session(&xdr, dummy); 5963 status = decode_destroy_session(xdr, res);
6054 return status; 5964 return status;
6055} 5965}
6056 5966
6057/* 5967/*
6058 * Decode SEQUENCE response 5968 * Decode SEQUENCE response
6059 */ 5969 */
6060static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp, uint32_t *p, 5970static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp,
5971 struct xdr_stream *xdr,
6061 struct nfs4_sequence_res *res) 5972 struct nfs4_sequence_res *res)
6062{ 5973{
6063 struct xdr_stream xdr;
6064 struct compound_hdr hdr; 5974 struct compound_hdr hdr;
6065 int status; 5975 int status;
6066 5976
6067 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5977 status = decode_compound_hdr(xdr, &hdr);
6068 status = decode_compound_hdr(&xdr, &hdr);
6069 if (!status) 5978 if (!status)
6070 status = decode_sequence(&xdr, res, rqstp); 5979 status = decode_sequence(xdr, res, rqstp);
6071 return status; 5980 return status;
6072} 5981}
6073 5982
6074/* 5983/*
6075 * Decode GET_LEASE_TIME response 5984 * Decode GET_LEASE_TIME response
6076 */ 5985 */
6077static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp, uint32_t *p, 5986static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp,
5987 struct xdr_stream *xdr,
6078 struct nfs4_get_lease_time_res *res) 5988 struct nfs4_get_lease_time_res *res)
6079{ 5989{
6080 struct xdr_stream xdr;
6081 struct compound_hdr hdr; 5990 struct compound_hdr hdr;
6082 int status; 5991 int status;
6083 5992
6084 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5993 status = decode_compound_hdr(xdr, &hdr);
6085 status = decode_compound_hdr(&xdr, &hdr);
6086 if (!status) 5994 if (!status)
6087 status = decode_sequence(&xdr, &res->lr_seq_res, rqstp); 5995 status = decode_sequence(xdr, &res->lr_seq_res, rqstp);
6088 if (!status) 5996 if (!status)
6089 status = decode_putrootfh(&xdr); 5997 status = decode_putrootfh(xdr);
6090 if (!status) 5998 if (!status)
6091 status = decode_fsinfo(&xdr, res->lr_fsinfo); 5999 status = decode_fsinfo(xdr, res->lr_fsinfo);
6092 return status; 6000 return status;
6093} 6001}
6094 6002
6095/* 6003/*
6096 * Decode RECLAIM_COMPLETE response 6004 * Decode RECLAIM_COMPLETE response
6097 */ 6005 */
6098static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, uint32_t *p, 6006static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp,
6007 struct xdr_stream *xdr,
6099 struct nfs41_reclaim_complete_res *res) 6008 struct nfs41_reclaim_complete_res *res)
6100{ 6009{
6101 struct xdr_stream xdr;
6102 struct compound_hdr hdr; 6010 struct compound_hdr hdr;
6103 int status; 6011 int status;
6104 6012
6105 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 6013 status = decode_compound_hdr(xdr, &hdr);
6106 status = decode_compound_hdr(&xdr, &hdr);
6107 if (!status) 6014 if (!status)
6108 status = decode_sequence(&xdr, &res->seq_res, rqstp); 6015 status = decode_sequence(xdr, &res->seq_res, rqstp);
6109 if (!status) 6016 if (!status)
6110 status = decode_reclaim_complete(&xdr, (void *)NULL); 6017 status = decode_reclaim_complete(xdr, (void *)NULL);
6111 return status; 6018 return status;
6112} 6019}
6113 6020
6114/* 6021/*
6115 * Decode GETDEVINFO response 6022 * Decode GETDEVINFO response
6116 */ 6023 */
6117static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, uint32_t *p, 6024static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp,
6025 struct xdr_stream *xdr,
6118 struct nfs4_getdeviceinfo_res *res) 6026 struct nfs4_getdeviceinfo_res *res)
6119{ 6027{
6120 struct xdr_stream xdr;
6121 struct compound_hdr hdr; 6028 struct compound_hdr hdr;
6122 int status; 6029 int status;
6123 6030
6124 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 6031 status = decode_compound_hdr(xdr, &hdr);
6125 status = decode_compound_hdr(&xdr, &hdr);
6126 if (status != 0) 6032 if (status != 0)
6127 goto out; 6033 goto out;
6128 status = decode_sequence(&xdr, &res->seq_res, rqstp); 6034 status = decode_sequence(xdr, &res->seq_res, rqstp);
6129 if (status != 0) 6035 if (status != 0)
6130 goto out; 6036 goto out;
6131 status = decode_getdeviceinfo(&xdr, res->pdev); 6037 status = decode_getdeviceinfo(xdr, res->pdev);
6132out: 6038out:
6133 return status; 6039 return status;
6134} 6040}
@@ -6136,31 +6042,44 @@ out:
6136/* 6042/*
6137 * Decode LAYOUTGET response 6043 * Decode LAYOUTGET response
6138 */ 6044 */
6139static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp, uint32_t *p, 6045static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp,
6046 struct xdr_stream *xdr,
6140 struct nfs4_layoutget_res *res) 6047 struct nfs4_layoutget_res *res)
6141{ 6048{
6142 struct xdr_stream xdr;
6143 struct compound_hdr hdr; 6049 struct compound_hdr hdr;
6144 int status; 6050 int status;
6145 6051
6146 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 6052 status = decode_compound_hdr(xdr, &hdr);
6147 status = decode_compound_hdr(&xdr, &hdr);
6148 if (status) 6053 if (status)
6149 goto out; 6054 goto out;
6150 status = decode_sequence(&xdr, &res->seq_res, rqstp); 6055 status = decode_sequence(xdr, &res->seq_res, rqstp);
6151 if (status) 6056 if (status)
6152 goto out; 6057 goto out;
6153 status = decode_putfh(&xdr); 6058 status = decode_putfh(xdr);
6154 if (status) 6059 if (status)
6155 goto out; 6060 goto out;
6156 status = decode_layoutget(&xdr, rqstp, res); 6061 status = decode_layoutget(xdr, rqstp, res);
6157out: 6062out:
6158 return status; 6063 return status;
6159} 6064}
6160#endif /* CONFIG_NFS_V4_1 */ 6065#endif /* CONFIG_NFS_V4_1 */
6161 6066
6162__be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, 6067/**
6163 struct nfs_server *server, int plus) 6068 * nfs4_decode_dirent - Decode a single NFSv4 directory entry stored in
6069 * the local page cache.
6070 * @xdr: XDR stream where entry resides
6071 * @entry: buffer to fill in with entry data
6072 * @plus: boolean indicating whether this should be a readdirplus entry
6073 *
6074 * Returns zero if successful, otherwise a negative errno value is
6075 * returned.
6076 *
6077 * This function is not invoked during READDIR reply decoding, but
6078 * rather whenever an application invokes the getdents(2) system call
6079 * on a directory already in our cache.
6080 */
6081int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
6082 int plus)
6164{ 6083{
6165 uint32_t bitmap[2] = {0}; 6084 uint32_t bitmap[2] = {0};
6166 uint32_t len; 6085 uint32_t len;
@@ -6172,9 +6091,9 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
6172 if (unlikely(!p)) 6091 if (unlikely(!p))
6173 goto out_overflow; 6092 goto out_overflow;
6174 if (!ntohl(*p++)) 6093 if (!ntohl(*p++))
6175 return ERR_PTR(-EAGAIN); 6094 return -EAGAIN;
6176 entry->eof = 1; 6095 entry->eof = 1;
6177 return ERR_PTR(-EBADCOOKIE); 6096 return -EBADCOOKIE;
6178 } 6097 }
6179 6098
6180 p = xdr_inline_decode(xdr, 12); 6099 p = xdr_inline_decode(xdr, 12);
@@ -6203,7 +6122,8 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
6203 if (decode_attr_length(xdr, &len, &p) < 0) 6122 if (decode_attr_length(xdr, &len, &p) < 0)
6204 goto out_overflow; 6123 goto out_overflow;
6205 6124
6206 if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, server, 1) < 0) 6125 if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh,
6126 entry->server, 1) < 0)
6207 goto out_overflow; 6127 goto out_overflow;
6208 if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID) 6128 if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID)
6209 entry->ino = entry->fattr->fileid; 6129 entry->ino = entry->fattr->fileid;
@@ -6215,17 +6135,11 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
6215 if (verify_attr_len(xdr, p, len) < 0) 6135 if (verify_attr_len(xdr, p, len) < 0)
6216 goto out_overflow; 6136 goto out_overflow;
6217 6137
6218 p = xdr_inline_peek(xdr, 8); 6138 return 0;
6219 if (p != NULL)
6220 entry->eof = !p[0] && p[1];
6221 else
6222 entry->eof = 0;
6223
6224 return p;
6225 6139
6226out_overflow: 6140out_overflow:
6227 print_overflow_msg(__func__, xdr); 6141 print_overflow_msg(__func__, xdr);
6228 return ERR_PTR(-EAGAIN); 6142 return -EAGAIN;
6229} 6143}
6230 6144
6231/* 6145/*
@@ -6301,8 +6215,8 @@ nfs4_stat_to_errno(int stat)
6301#define PROC(proc, argtype, restype) \ 6215#define PROC(proc, argtype, restype) \
6302[NFSPROC4_CLNT_##proc] = { \ 6216[NFSPROC4_CLNT_##proc] = { \
6303 .p_proc = NFSPROC4_COMPOUND, \ 6217 .p_proc = NFSPROC4_COMPOUND, \
6304 .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \ 6218 .p_encode = (kxdreproc_t)nfs4_xdr_##argtype, \
6305 .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \ 6219 .p_decode = (kxdrdproc_t)nfs4_xdr_##restype, \
6306 .p_arglen = NFS4_##argtype##_sz, \ 6220 .p_arglen = NFS4_##argtype##_sz, \
6307 .p_replen = NFS4_##restype##_sz, \ 6221 .p_replen = NFS4_##restype##_sz, \
6308 .p_statidx = NFSPROC4_CLNT_##proc, \ 6222 .p_statidx = NFSPROC4_CLNT_##proc, \
@@ -6310,50 +6224,50 @@ nfs4_stat_to_errno(int stat)
6310} 6224}
6311 6225
6312struct rpc_procinfo nfs4_procedures[] = { 6226struct rpc_procinfo nfs4_procedures[] = {
6313 PROC(READ, enc_read, dec_read), 6227 PROC(READ, enc_read, dec_read),
6314 PROC(WRITE, enc_write, dec_write), 6228 PROC(WRITE, enc_write, dec_write),
6315 PROC(COMMIT, enc_commit, dec_commit), 6229 PROC(COMMIT, enc_commit, dec_commit),
6316 PROC(OPEN, enc_open, dec_open), 6230 PROC(OPEN, enc_open, dec_open),
6317 PROC(OPEN_CONFIRM, enc_open_confirm, dec_open_confirm), 6231 PROC(OPEN_CONFIRM, enc_open_confirm, dec_open_confirm),
6318 PROC(OPEN_NOATTR, enc_open_noattr, dec_open_noattr), 6232 PROC(OPEN_NOATTR, enc_open_noattr, dec_open_noattr),
6319 PROC(OPEN_DOWNGRADE, enc_open_downgrade, dec_open_downgrade), 6233 PROC(OPEN_DOWNGRADE, enc_open_downgrade, dec_open_downgrade),
6320 PROC(CLOSE, enc_close, dec_close), 6234 PROC(CLOSE, enc_close, dec_close),
6321 PROC(SETATTR, enc_setattr, dec_setattr), 6235 PROC(SETATTR, enc_setattr, dec_setattr),
6322 PROC(FSINFO, enc_fsinfo, dec_fsinfo), 6236 PROC(FSINFO, enc_fsinfo, dec_fsinfo),
6323 PROC(RENEW, enc_renew, dec_renew), 6237 PROC(RENEW, enc_renew, dec_renew),
6324 PROC(SETCLIENTID, enc_setclientid, dec_setclientid), 6238 PROC(SETCLIENTID, enc_setclientid, dec_setclientid),
6325 PROC(SETCLIENTID_CONFIRM, enc_setclientid_confirm, dec_setclientid_confirm), 6239 PROC(SETCLIENTID_CONFIRM, enc_setclientid_confirm, dec_setclientid_confirm),
6326 PROC(LOCK, enc_lock, dec_lock), 6240 PROC(LOCK, enc_lock, dec_lock),
6327 PROC(LOCKT, enc_lockt, dec_lockt), 6241 PROC(LOCKT, enc_lockt, dec_lockt),
6328 PROC(LOCKU, enc_locku, dec_locku), 6242 PROC(LOCKU, enc_locku, dec_locku),
6329 PROC(ACCESS, enc_access, dec_access), 6243 PROC(ACCESS, enc_access, dec_access),
6330 PROC(GETATTR, enc_getattr, dec_getattr), 6244 PROC(GETATTR, enc_getattr, dec_getattr),
6331 PROC(LOOKUP, enc_lookup, dec_lookup), 6245 PROC(LOOKUP, enc_lookup, dec_lookup),
6332 PROC(LOOKUP_ROOT, enc_lookup_root, dec_lookup_root), 6246 PROC(LOOKUP_ROOT, enc_lookup_root, dec_lookup_root),
6333 PROC(REMOVE, enc_remove, dec_remove), 6247 PROC(REMOVE, enc_remove, dec_remove),
6334 PROC(RENAME, enc_rename, dec_rename), 6248 PROC(RENAME, enc_rename, dec_rename),
6335 PROC(LINK, enc_link, dec_link), 6249 PROC(LINK, enc_link, dec_link),
6336 PROC(SYMLINK, enc_symlink, dec_symlink), 6250 PROC(SYMLINK, enc_symlink, dec_symlink),
6337 PROC(CREATE, enc_create, dec_create), 6251 PROC(CREATE, enc_create, dec_create),
6338 PROC(PATHCONF, enc_pathconf, dec_pathconf), 6252 PROC(PATHCONF, enc_pathconf, dec_pathconf),
6339 PROC(STATFS, enc_statfs, dec_statfs), 6253 PROC(STATFS, enc_statfs, dec_statfs),
6340 PROC(READLINK, enc_readlink, dec_readlink), 6254 PROC(READLINK, enc_readlink, dec_readlink),
6341 PROC(READDIR, enc_readdir, dec_readdir), 6255 PROC(READDIR, enc_readdir, dec_readdir),
6342 PROC(SERVER_CAPS, enc_server_caps, dec_server_caps), 6256 PROC(SERVER_CAPS, enc_server_caps, dec_server_caps),
6343 PROC(DELEGRETURN, enc_delegreturn, dec_delegreturn), 6257 PROC(DELEGRETURN, enc_delegreturn, dec_delegreturn),
6344 PROC(GETACL, enc_getacl, dec_getacl), 6258 PROC(GETACL, enc_getacl, dec_getacl),
6345 PROC(SETACL, enc_setacl, dec_setacl), 6259 PROC(SETACL, enc_setacl, dec_setacl),
6346 PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations), 6260 PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations),
6347 PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner), 6261 PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner),
6348#if defined(CONFIG_NFS_V4_1) 6262#if defined(CONFIG_NFS_V4_1)
6349 PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id), 6263 PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id),
6350 PROC(CREATE_SESSION, enc_create_session, dec_create_session), 6264 PROC(CREATE_SESSION, enc_create_session, dec_create_session),
6351 PROC(DESTROY_SESSION, enc_destroy_session, dec_destroy_session), 6265 PROC(DESTROY_SESSION, enc_destroy_session, dec_destroy_session),
6352 PROC(SEQUENCE, enc_sequence, dec_sequence), 6266 PROC(SEQUENCE, enc_sequence, dec_sequence),
6353 PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), 6267 PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time),
6354 PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), 6268 PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete),
6355 PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), 6269 PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo),
6356 PROC(LAYOUTGET, enc_layoutget, dec_layoutget), 6270 PROC(LAYOUTGET, enc_layoutget, dec_layoutget),
6357#endif /* CONFIG_NFS_V4_1 */ 6271#endif /* CONFIG_NFS_V4_1 */
6358}; 6272};
6359 6273
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index b68536cc9046..e1164e3f9e69 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -26,12 +26,9 @@ static struct kmem_cache *nfs_page_cachep;
26static inline struct nfs_page * 26static inline struct nfs_page *
27nfs_page_alloc(void) 27nfs_page_alloc(void)
28{ 28{
29 struct nfs_page *p; 29 struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_KERNEL);
30 p = kmem_cache_alloc(nfs_page_cachep, GFP_KERNEL); 30 if (p)
31 if (p) {
32 memset(p, 0, sizeof(*p));
33 INIT_LIST_HEAD(&p->wb_list); 31 INIT_LIST_HEAD(&p->wb_list);
34 }
35 return p; 32 return p;
36} 33}
37 34
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index db773428f95f..bc4089769735 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -177,105 +177,149 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
177 * pNFS client layout cache 177 * pNFS client layout cache
178 */ 178 */
179 179
180/* Need to hold i_lock if caller does not already hold reference */
181void
182get_layout_hdr(struct pnfs_layout_hdr *lo)
183{
184 atomic_inc(&lo->plh_refcount);
185}
186
180static void 187static void
181get_layout_hdr_locked(struct pnfs_layout_hdr *lo) 188destroy_layout_hdr(struct pnfs_layout_hdr *lo)
182{ 189{
183 assert_spin_locked(&lo->inode->i_lock); 190 dprintk("%s: freeing layout cache %p\n", __func__, lo);
184 lo->refcount++; 191 BUG_ON(!list_empty(&lo->plh_layouts));
192 NFS_I(lo->plh_inode)->layout = NULL;
193 kfree(lo);
185} 194}
186 195
187static void 196static void
188put_layout_hdr_locked(struct pnfs_layout_hdr *lo) 197put_layout_hdr_locked(struct pnfs_layout_hdr *lo)
189{ 198{
190 assert_spin_locked(&lo->inode->i_lock); 199 if (atomic_dec_and_test(&lo->plh_refcount))
191 BUG_ON(lo->refcount == 0); 200 destroy_layout_hdr(lo);
192
193 lo->refcount--;
194 if (!lo->refcount) {
195 dprintk("%s: freeing layout cache %p\n", __func__, lo);
196 BUG_ON(!list_empty(&lo->layouts));
197 NFS_I(lo->inode)->layout = NULL;
198 kfree(lo);
199 }
200} 201}
201 202
202void 203void
203put_layout_hdr(struct inode *inode) 204put_layout_hdr(struct pnfs_layout_hdr *lo)
204{ 205{
205 spin_lock(&inode->i_lock); 206 struct inode *inode = lo->plh_inode;
206 put_layout_hdr_locked(NFS_I(inode)->layout); 207
207 spin_unlock(&inode->i_lock); 208 if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
209 destroy_layout_hdr(lo);
210 spin_unlock(&inode->i_lock);
211 }
208} 212}
209 213
210static void 214static void
211init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) 215init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
212{ 216{
213 INIT_LIST_HEAD(&lseg->fi_list); 217 INIT_LIST_HEAD(&lseg->pls_list);
214 kref_init(&lseg->kref); 218 atomic_set(&lseg->pls_refcount, 1);
215 lseg->layout = lo; 219 smp_mb();
220 set_bit(NFS_LSEG_VALID, &lseg->pls_flags);
221 lseg->pls_layout = lo;
216} 222}
217 223
218/* Called without i_lock held, as the free_lseg call may sleep */ 224static void free_lseg(struct pnfs_layout_segment *lseg)
219static void
220destroy_lseg(struct kref *kref)
221{ 225{
222 struct pnfs_layout_segment *lseg = 226 struct inode *ino = lseg->pls_layout->plh_inode;
223 container_of(kref, struct pnfs_layout_segment, kref);
224 struct inode *ino = lseg->layout->inode;
225 227
226 dprintk("--> %s\n", __func__);
227 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); 228 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
228 /* Matched by get_layout_hdr_locked in pnfs_insert_layout */ 229 /* Matched by get_layout_hdr in pnfs_insert_layout */
229 put_layout_hdr(ino); 230 put_layout_hdr(NFS_I(ino)->layout);
230} 231}
231 232
232static void 233/* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg
233put_lseg(struct pnfs_layout_segment *lseg) 234 * could sleep, so must be called outside of the lock.
235 * Returns 1 if object was removed, otherwise return 0.
236 */
237static int
238put_lseg_locked(struct pnfs_layout_segment *lseg,
239 struct list_head *tmp_list)
240{
241 dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
242 atomic_read(&lseg->pls_refcount),
243 test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
244 if (atomic_dec_and_test(&lseg->pls_refcount)) {
245 struct inode *ino = lseg->pls_layout->plh_inode;
246
247 BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
248 list_del(&lseg->pls_list);
249 if (list_empty(&lseg->pls_layout->plh_segs)) {
250 struct nfs_client *clp;
251
252 clp = NFS_SERVER(ino)->nfs_client;
253 spin_lock(&clp->cl_lock);
254 /* List does not take a reference, so no need for put here */
255 list_del_init(&lseg->pls_layout->plh_layouts);
256 spin_unlock(&clp->cl_lock);
257 clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags);
258 }
259 rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq);
260 list_add(&lseg->pls_list, tmp_list);
261 return 1;
262 }
263 return 0;
264}
265
266static bool
267should_free_lseg(u32 lseg_iomode, u32 recall_iomode)
234{ 268{
235 if (!lseg) 269 return (recall_iomode == IOMODE_ANY ||
236 return; 270 lseg_iomode == recall_iomode);
271}
237 272
238 dprintk("%s: lseg %p ref %d\n", __func__, lseg, 273/* Returns 1 if lseg is removed from list, 0 otherwise */
239 atomic_read(&lseg->kref.refcount)); 274static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
240 kref_put(&lseg->kref, destroy_lseg); 275 struct list_head *tmp_list)
276{
277 int rv = 0;
278
279 if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
280 /* Remove the reference keeping the lseg in the
281 * list. It will now be removed when all
282 * outstanding io is finished.
283 */
284 rv = put_lseg_locked(lseg, tmp_list);
285 }
286 return rv;
241} 287}
242 288
243static void 289/* Returns count of number of matching invalid lsegs remaining in list
244pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list) 290 * after call.
291 */
292int
293mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
294 struct list_head *tmp_list,
295 u32 iomode)
245{ 296{
246 struct pnfs_layout_segment *lseg, *next; 297 struct pnfs_layout_segment *lseg, *next;
247 struct nfs_client *clp; 298 int invalid = 0, removed = 0;
248 299
249 dprintk("%s:Begin lo %p\n", __func__, lo); 300 dprintk("%s:Begin lo %p\n", __func__, lo);
250 301
251 assert_spin_locked(&lo->inode->i_lock); 302 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
252 list_for_each_entry_safe(lseg, next, &lo->segs, fi_list) { 303 if (should_free_lseg(lseg->pls_range.iomode, iomode)) {
253 dprintk("%s: freeing lseg %p\n", __func__, lseg); 304 dprintk("%s: freeing lseg %p iomode %d "
254 list_move(&lseg->fi_list, tmp_list); 305 "offset %llu length %llu\n", __func__,
255 } 306 lseg, lseg->pls_range.iomode, lseg->pls_range.offset,
256 clp = NFS_SERVER(lo->inode)->nfs_client; 307 lseg->pls_range.length);
257 spin_lock(&clp->cl_lock); 308 invalid++;
258 /* List does not take a reference, so no need for put here */ 309 removed += mark_lseg_invalid(lseg, tmp_list);
259 list_del_init(&lo->layouts); 310 }
260 spin_unlock(&clp->cl_lock); 311 dprintk("%s:Return %i\n", __func__, invalid - removed);
261 write_seqlock(&lo->seqlock); 312 return invalid - removed;
262 clear_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
263 write_sequnlock(&lo->seqlock);
264
265 dprintk("%s:Return\n", __func__);
266} 313}
267 314
268static void 315void
269pnfs_free_lseg_list(struct list_head *tmp_list) 316pnfs_free_lseg_list(struct list_head *free_me)
270{ 317{
271 struct pnfs_layout_segment *lseg; 318 struct pnfs_layout_segment *lseg, *tmp;
272 319
273 while (!list_empty(tmp_list)) { 320 list_for_each_entry_safe(lseg, tmp, free_me, pls_list) {
274 lseg = list_entry(tmp_list->next, struct pnfs_layout_segment, 321 list_del(&lseg->pls_list);
275 fi_list); 322 free_lseg(lseg);
276 dprintk("%s calling put_lseg on %p\n", __func__, lseg);
277 list_del(&lseg->fi_list);
278 put_lseg(lseg);
279 } 323 }
280} 324}
281 325
@@ -288,7 +332,8 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
288 spin_lock(&nfsi->vfs_inode.i_lock); 332 spin_lock(&nfsi->vfs_inode.i_lock);
289 lo = nfsi->layout; 333 lo = nfsi->layout;
290 if (lo) { 334 if (lo) {
291 pnfs_clear_lseg_list(lo, &tmp_list); 335 set_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags);
336 mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY);
292 /* Matched by refcount set to 1 in alloc_init_layout_hdr */ 337 /* Matched by refcount set to 1 in alloc_init_layout_hdr */
293 put_layout_hdr_locked(lo); 338 put_layout_hdr_locked(lo);
294 } 339 }
@@ -312,76 +357,80 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
312 357
313 while (!list_empty(&tmp_list)) { 358 while (!list_empty(&tmp_list)) {
314 lo = list_entry(tmp_list.next, struct pnfs_layout_hdr, 359 lo = list_entry(tmp_list.next, struct pnfs_layout_hdr,
315 layouts); 360 plh_layouts);
316 dprintk("%s freeing layout for inode %lu\n", __func__, 361 dprintk("%s freeing layout for inode %lu\n", __func__,
317 lo->inode->i_ino); 362 lo->plh_inode->i_ino);
318 pnfs_destroy_layout(NFS_I(lo->inode)); 363 pnfs_destroy_layout(NFS_I(lo->plh_inode));
319 } 364 }
320} 365}
321 366
322/* update lo->stateid with new if is more recent 367/* update lo->plh_stateid with new if is more recent */
323 * 368void
324 * lo->stateid could be the open stateid, in which case we just use what given. 369pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
325 */ 370 bool update_barrier)
326static void 371{
327pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, 372 u32 oldseq, newseq;
328 const nfs4_stateid *new) 373
329{ 374 oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid);
330 nfs4_stateid *old = &lo->stateid; 375 newseq = be32_to_cpu(new->stateid.seqid);
331 bool overwrite = false; 376 if ((int)(newseq - oldseq) > 0) {
332 377 memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid));
333 write_seqlock(&lo->seqlock); 378 if (update_barrier) {
334 if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state) || 379 u32 new_barrier = be32_to_cpu(new->stateid.seqid);
335 memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other))) 380
336 overwrite = true; 381 if ((int)(new_barrier - lo->plh_barrier))
337 else { 382 lo->plh_barrier = new_barrier;
338 u32 oldseq, newseq; 383 } else {
339 384 /* Because of wraparound, we want to keep the barrier
340 oldseq = be32_to_cpu(old->stateid.seqid); 385 * "close" to the current seqids. It needs to be
341 newseq = be32_to_cpu(new->stateid.seqid); 386 * within 2**31 to count as "behind", so if it
342 if ((int)(newseq - oldseq) > 0) 387 * gets too near that limit, give us a litle leeway
343 overwrite = true; 388 * and bring it to within 2**30.
389 * NOTE - and yes, this is all unsigned arithmetic.
390 */
391 if (unlikely((newseq - lo->plh_barrier) > (3 << 29)))
392 lo->plh_barrier = newseq - (1 << 30);
393 }
344 } 394 }
345 if (overwrite)
346 memcpy(&old->stateid, &new->stateid, sizeof(new->stateid));
347 write_sequnlock(&lo->seqlock);
348} 395}
349 396
350static void 397/* lget is set to 1 if called from inside send_layoutget call chain */
351pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo, 398static bool
352 struct nfs4_state *state) 399pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid,
400 int lget)
353{ 401{
354 int seq; 402 if ((stateid) &&
355 403 (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0)
356 dprintk("--> %s\n", __func__); 404 return true;
357 write_seqlock(&lo->seqlock); 405 return lo->plh_block_lgets ||
358 do { 406 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
359 seq = read_seqbegin(&state->seqlock); 407 (list_empty(&lo->plh_segs) &&
360 memcpy(lo->stateid.data, state->stateid.data, 408 (atomic_read(&lo->plh_outstanding) > lget));
361 sizeof(state->stateid.data));
362 } while (read_seqretry(&state->seqlock, seq));
363 set_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
364 write_sequnlock(&lo->seqlock);
365 dprintk("<-- %s\n", __func__);
366} 409}
367 410
368void 411int
369pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, 412pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
370 struct nfs4_state *open_state) 413 struct nfs4_state *open_state)
371{ 414{
372 int seq; 415 int status = 0;
373 416
374 dprintk("--> %s\n", __func__); 417 dprintk("--> %s\n", __func__);
375 do { 418 spin_lock(&lo->plh_inode->i_lock);
376 seq = read_seqbegin(&lo->seqlock); 419 if (pnfs_layoutgets_blocked(lo, NULL, 1)) {
377 if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) { 420 status = -EAGAIN;
378 /* This will trigger retry of the read */ 421 } else if (list_empty(&lo->plh_segs)) {
379 pnfs_layout_from_open_stateid(lo, open_state); 422 int seq;
380 } else 423
381 memcpy(dst->data, lo->stateid.data, 424 do {
382 sizeof(lo->stateid.data)); 425 seq = read_seqbegin(&open_state->seqlock);
383 } while (read_seqretry(&lo->seqlock, seq)); 426 memcpy(dst->data, open_state->stateid.data,
427 sizeof(open_state->stateid.data));
428 } while (read_seqretry(&open_state->seqlock, seq));
429 } else
430 memcpy(dst->data, lo->plh_stateid.data, sizeof(lo->plh_stateid.data));
431 spin_unlock(&lo->plh_inode->i_lock);
384 dprintk("<-- %s\n", __func__); 432 dprintk("<-- %s\n", __func__);
433 return status;
385} 434}
386 435
387/* 436/*
@@ -395,7 +444,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
395 struct nfs_open_context *ctx, 444 struct nfs_open_context *ctx,
396 u32 iomode) 445 u32 iomode)
397{ 446{
398 struct inode *ino = lo->inode; 447 struct inode *ino = lo->plh_inode;
399 struct nfs_server *server = NFS_SERVER(ino); 448 struct nfs_server *server = NFS_SERVER(ino);
400 struct nfs4_layoutget *lgp; 449 struct nfs4_layoutget *lgp;
401 struct pnfs_layout_segment *lseg = NULL; 450 struct pnfs_layout_segment *lseg = NULL;
@@ -404,10 +453,8 @@ send_layoutget(struct pnfs_layout_hdr *lo,
404 453
405 BUG_ON(ctx == NULL); 454 BUG_ON(ctx == NULL);
406 lgp = kzalloc(sizeof(*lgp), GFP_KERNEL); 455 lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
407 if (lgp == NULL) { 456 if (lgp == NULL)
408 put_layout_hdr(lo->inode);
409 return NULL; 457 return NULL;
410 }
411 lgp->args.minlength = NFS4_MAX_UINT64; 458 lgp->args.minlength = NFS4_MAX_UINT64;
412 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; 459 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
413 lgp->args.range.iomode = iomode; 460 lgp->args.range.iomode = iomode;
@@ -424,11 +471,88 @@ send_layoutget(struct pnfs_layout_hdr *lo,
424 nfs4_proc_layoutget(lgp); 471 nfs4_proc_layoutget(lgp);
425 if (!lseg) { 472 if (!lseg) {
426 /* remember that LAYOUTGET failed and suspend trying */ 473 /* remember that LAYOUTGET failed and suspend trying */
427 set_bit(lo_fail_bit(iomode), &lo->state); 474 set_bit(lo_fail_bit(iomode), &lo->plh_flags);
428 } 475 }
429 return lseg; 476 return lseg;
430} 477}
431 478
479bool pnfs_roc(struct inode *ino)
480{
481 struct pnfs_layout_hdr *lo;
482 struct pnfs_layout_segment *lseg, *tmp;
483 LIST_HEAD(tmp_list);
484 bool found = false;
485
486 spin_lock(&ino->i_lock);
487 lo = NFS_I(ino)->layout;
488 if (!lo || !test_and_clear_bit(NFS_LAYOUT_ROC, &lo->plh_flags) ||
489 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags))
490 goto out_nolayout;
491 list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list)
492 if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
493 mark_lseg_invalid(lseg, &tmp_list);
494 found = true;
495 }
496 if (!found)
497 goto out_nolayout;
498 lo->plh_block_lgets++;
499 get_layout_hdr(lo); /* matched in pnfs_roc_release */
500 spin_unlock(&ino->i_lock);
501 pnfs_free_lseg_list(&tmp_list);
502 return true;
503
504out_nolayout:
505 spin_unlock(&ino->i_lock);
506 return false;
507}
508
509void pnfs_roc_release(struct inode *ino)
510{
511 struct pnfs_layout_hdr *lo;
512
513 spin_lock(&ino->i_lock);
514 lo = NFS_I(ino)->layout;
515 lo->plh_block_lgets--;
516 put_layout_hdr_locked(lo);
517 spin_unlock(&ino->i_lock);
518}
519
520void pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
521{
522 struct pnfs_layout_hdr *lo;
523
524 spin_lock(&ino->i_lock);
525 lo = NFS_I(ino)->layout;
526 if ((int)(barrier - lo->plh_barrier) > 0)
527 lo->plh_barrier = barrier;
528 spin_unlock(&ino->i_lock);
529}
530
531bool pnfs_roc_drain(struct inode *ino, u32 *barrier)
532{
533 struct nfs_inode *nfsi = NFS_I(ino);
534 struct pnfs_layout_segment *lseg;
535 bool found = false;
536
537 spin_lock(&ino->i_lock);
538 list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list)
539 if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
540 found = true;
541 break;
542 }
543 if (!found) {
544 struct pnfs_layout_hdr *lo = nfsi->layout;
545 u32 current_seqid = be32_to_cpu(lo->plh_stateid.stateid.seqid);
546
547 /* Since close does not return a layout stateid for use as
548 * a barrier, we choose the worst-case barrier.
549 */
550 *barrier = current_seqid + atomic_read(&lo->plh_outstanding);
551 }
552 spin_unlock(&ino->i_lock);
553 return found;
554}
555
432/* 556/*
433 * Compare two layout segments for sorting into layout cache. 557 * Compare two layout segments for sorting into layout cache.
434 * We want to preferentially return RW over RO layouts, so ensure those 558 * We want to preferentially return RW over RO layouts, so ensure those
@@ -450,37 +574,29 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo,
450 574
451 dprintk("%s:Begin\n", __func__); 575 dprintk("%s:Begin\n", __func__);
452 576
453 assert_spin_locked(&lo->inode->i_lock); 577 assert_spin_locked(&lo->plh_inode->i_lock);
454 if (list_empty(&lo->segs)) { 578 list_for_each_entry(lp, &lo->plh_segs, pls_list) {
455 struct nfs_client *clp = NFS_SERVER(lo->inode)->nfs_client; 579 if (cmp_layout(lp->pls_range.iomode, lseg->pls_range.iomode) > 0)
456
457 spin_lock(&clp->cl_lock);
458 BUG_ON(!list_empty(&lo->layouts));
459 list_add_tail(&lo->layouts, &clp->cl_layouts);
460 spin_unlock(&clp->cl_lock);
461 }
462 list_for_each_entry(lp, &lo->segs, fi_list) {
463 if (cmp_layout(lp->range.iomode, lseg->range.iomode) > 0)
464 continue; 580 continue;
465 list_add_tail(&lseg->fi_list, &lp->fi_list); 581 list_add_tail(&lseg->pls_list, &lp->pls_list);
466 dprintk("%s: inserted lseg %p " 582 dprintk("%s: inserted lseg %p "
467 "iomode %d offset %llu length %llu before " 583 "iomode %d offset %llu length %llu before "
468 "lp %p iomode %d offset %llu length %llu\n", 584 "lp %p iomode %d offset %llu length %llu\n",
469 __func__, lseg, lseg->range.iomode, 585 __func__, lseg, lseg->pls_range.iomode,
470 lseg->range.offset, lseg->range.length, 586 lseg->pls_range.offset, lseg->pls_range.length,
471 lp, lp->range.iomode, lp->range.offset, 587 lp, lp->pls_range.iomode, lp->pls_range.offset,
472 lp->range.length); 588 lp->pls_range.length);
473 found = 1; 589 found = 1;
474 break; 590 break;
475 } 591 }
476 if (!found) { 592 if (!found) {
477 list_add_tail(&lseg->fi_list, &lo->segs); 593 list_add_tail(&lseg->pls_list, &lo->plh_segs);
478 dprintk("%s: inserted lseg %p " 594 dprintk("%s: inserted lseg %p "
479 "iomode %d offset %llu length %llu at tail\n", 595 "iomode %d offset %llu length %llu at tail\n",
480 __func__, lseg, lseg->range.iomode, 596 __func__, lseg, lseg->pls_range.iomode,
481 lseg->range.offset, lseg->range.length); 597 lseg->pls_range.offset, lseg->pls_range.length);
482 } 598 }
483 get_layout_hdr_locked(lo); 599 get_layout_hdr(lo);
484 600
485 dprintk("%s:Return\n", __func__); 601 dprintk("%s:Return\n", __func__);
486} 602}
@@ -493,11 +609,11 @@ alloc_init_layout_hdr(struct inode *ino)
493 lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL); 609 lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL);
494 if (!lo) 610 if (!lo)
495 return NULL; 611 return NULL;
496 lo->refcount = 1; 612 atomic_set(&lo->plh_refcount, 1);
497 INIT_LIST_HEAD(&lo->layouts); 613 INIT_LIST_HEAD(&lo->plh_layouts);
498 INIT_LIST_HEAD(&lo->segs); 614 INIT_LIST_HEAD(&lo->plh_segs);
499 seqlock_init(&lo->seqlock); 615 INIT_LIST_HEAD(&lo->plh_bulk_recall);
500 lo->inode = ino; 616 lo->plh_inode = ino;
501 return lo; 617 return lo;
502} 618}
503 619
@@ -510,9 +626,12 @@ pnfs_find_alloc_layout(struct inode *ino)
510 dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout); 626 dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
511 627
512 assert_spin_locked(&ino->i_lock); 628 assert_spin_locked(&ino->i_lock);
513 if (nfsi->layout) 629 if (nfsi->layout) {
514 return nfsi->layout; 630 if (test_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags))
515 631 return NULL;
632 else
633 return nfsi->layout;
634 }
516 spin_unlock(&ino->i_lock); 635 spin_unlock(&ino->i_lock);
517 new = alloc_init_layout_hdr(ino); 636 new = alloc_init_layout_hdr(ino);
518 spin_lock(&ino->i_lock); 637 spin_lock(&ino->i_lock);
@@ -538,31 +657,32 @@ pnfs_find_alloc_layout(struct inode *ino)
538static int 657static int
539is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode) 658is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
540{ 659{
541 return (iomode != IOMODE_RW || lseg->range.iomode == IOMODE_RW); 660 return (iomode != IOMODE_RW || lseg->pls_range.iomode == IOMODE_RW);
542} 661}
543 662
544/* 663/*
545 * lookup range in layout 664 * lookup range in layout
546 */ 665 */
547static struct pnfs_layout_segment * 666static struct pnfs_layout_segment *
548pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode) 667pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
549{ 668{
550 struct pnfs_layout_segment *lseg, *ret = NULL; 669 struct pnfs_layout_segment *lseg, *ret = NULL;
551 670
552 dprintk("%s:Begin\n", __func__); 671 dprintk("%s:Begin\n", __func__);
553 672
554 assert_spin_locked(&lo->inode->i_lock); 673 assert_spin_locked(&lo->plh_inode->i_lock);
555 list_for_each_entry(lseg, &lo->segs, fi_list) { 674 list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
556 if (is_matching_lseg(lseg, iomode)) { 675 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
676 is_matching_lseg(lseg, iomode)) {
557 ret = lseg; 677 ret = lseg;
558 break; 678 break;
559 } 679 }
560 if (cmp_layout(iomode, lseg->range.iomode) > 0) 680 if (cmp_layout(iomode, lseg->pls_range.iomode) > 0)
561 break; 681 break;
562 } 682 }
563 683
564 dprintk("%s:Return lseg %p ref %d\n", 684 dprintk("%s:Return lseg %p ref %d\n",
565 __func__, ret, ret ? atomic_read(&ret->kref.refcount) : 0); 685 __func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0);
566 return ret; 686 return ret;
567} 687}
568 688
@@ -576,6 +696,7 @@ pnfs_update_layout(struct inode *ino,
576 enum pnfs_iomode iomode) 696 enum pnfs_iomode iomode)
577{ 697{
578 struct nfs_inode *nfsi = NFS_I(ino); 698 struct nfs_inode *nfsi = NFS_I(ino);
699 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
579 struct pnfs_layout_hdr *lo; 700 struct pnfs_layout_hdr *lo;
580 struct pnfs_layout_segment *lseg = NULL; 701 struct pnfs_layout_segment *lseg = NULL;
581 702
@@ -588,25 +709,53 @@ pnfs_update_layout(struct inode *ino,
588 goto out_unlock; 709 goto out_unlock;
589 } 710 }
590 711
591 /* Check to see if the layout for the given range already exists */ 712 /* Do we even need to bother with this? */
592 lseg = pnfs_has_layout(lo, iomode); 713 if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
593 if (lseg) { 714 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
594 dprintk("%s: Using cached lseg %p for iomode %d)\n", 715 dprintk("%s matches recall, use MDS\n", __func__);
595 __func__, lseg, iomode);
596 goto out_unlock; 716 goto out_unlock;
597 } 717 }
718 /* Check to see if the layout for the given range already exists */
719 lseg = pnfs_find_lseg(lo, iomode);
720 if (lseg)
721 goto out_unlock;
598 722
599 /* if LAYOUTGET already failed once we don't try again */ 723 /* if LAYOUTGET already failed once we don't try again */
600 if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state)) 724 if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags))
725 goto out_unlock;
726
727 if (pnfs_layoutgets_blocked(lo, NULL, 0))
601 goto out_unlock; 728 goto out_unlock;
729 atomic_inc(&lo->plh_outstanding);
602 730
603 get_layout_hdr_locked(lo); /* Matched in nfs4_layoutget_release */ 731 get_layout_hdr(lo);
732 if (list_empty(&lo->plh_segs)) {
733 /* The lo must be on the clp list if there is any
734 * chance of a CB_LAYOUTRECALL(FILE) coming in.
735 */
736 spin_lock(&clp->cl_lock);
737 BUG_ON(!list_empty(&lo->plh_layouts));
738 list_add_tail(&lo->plh_layouts, &clp->cl_layouts);
739 spin_unlock(&clp->cl_lock);
740 }
604 spin_unlock(&ino->i_lock); 741 spin_unlock(&ino->i_lock);
605 742
606 lseg = send_layoutget(lo, ctx, iomode); 743 lseg = send_layoutget(lo, ctx, iomode);
744 if (!lseg) {
745 spin_lock(&ino->i_lock);
746 if (list_empty(&lo->plh_segs)) {
747 spin_lock(&clp->cl_lock);
748 list_del_init(&lo->plh_layouts);
749 spin_unlock(&clp->cl_lock);
750 clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
751 }
752 spin_unlock(&ino->i_lock);
753 }
754 atomic_dec(&lo->plh_outstanding);
755 put_layout_hdr(lo);
607out: 756out:
608 dprintk("%s end, state 0x%lx lseg %p\n", __func__, 757 dprintk("%s end, state 0x%lx lseg %p\n", __func__,
609 nfsi->layout->state, lseg); 758 nfsi->layout->plh_flags, lseg);
610 return lseg; 759 return lseg;
611out_unlock: 760out_unlock:
612 spin_unlock(&ino->i_lock); 761 spin_unlock(&ino->i_lock);
@@ -619,9 +768,21 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
619 struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout; 768 struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
620 struct nfs4_layoutget_res *res = &lgp->res; 769 struct nfs4_layoutget_res *res = &lgp->res;
621 struct pnfs_layout_segment *lseg; 770 struct pnfs_layout_segment *lseg;
622 struct inode *ino = lo->inode; 771 struct inode *ino = lo->plh_inode;
772 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
623 int status = 0; 773 int status = 0;
624 774
775 /* Verify we got what we asked for.
776 * Note that because the xdr parsing only accepts a single
777 * element array, this can fail even if the server is behaving
778 * correctly.
779 */
780 if (lgp->args.range.iomode > res->range.iomode ||
781 res->range.offset != 0 ||
782 res->range.length != NFS4_MAX_UINT64) {
783 status = -EINVAL;
784 goto out;
785 }
625 /* Inject layout blob into I/O device driver */ 786 /* Inject layout blob into I/O device driver */
626 lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res); 787 lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res);
627 if (!lseg || IS_ERR(lseg)) { 788 if (!lseg || IS_ERR(lseg)) {
@@ -635,16 +796,37 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
635 } 796 }
636 797
637 spin_lock(&ino->i_lock); 798 spin_lock(&ino->i_lock);
799 if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
800 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
801 dprintk("%s forget reply due to recall\n", __func__);
802 goto out_forget_reply;
803 }
804
805 if (pnfs_layoutgets_blocked(lo, &res->stateid, 1)) {
806 dprintk("%s forget reply due to state\n", __func__);
807 goto out_forget_reply;
808 }
638 init_lseg(lo, lseg); 809 init_lseg(lo, lseg);
639 lseg->range = res->range; 810 lseg->pls_range = res->range;
640 *lgp->lsegpp = lseg; 811 *lgp->lsegpp = lseg;
641 pnfs_insert_layout(lo, lseg); 812 pnfs_insert_layout(lo, lseg);
642 813
814 if (res->return_on_close) {
815 set_bit(NFS_LSEG_ROC, &lseg->pls_flags);
816 set_bit(NFS_LAYOUT_ROC, &lo->plh_flags);
817 }
818
643 /* Done processing layoutget. Set the layout stateid */ 819 /* Done processing layoutget. Set the layout stateid */
644 pnfs_set_layout_stateid(lo, &res->stateid); 820 pnfs_set_layout_stateid(lo, &res->stateid, false);
645 spin_unlock(&ino->i_lock); 821 spin_unlock(&ino->i_lock);
646out: 822out:
647 return status; 823 return status;
824
825out_forget_reply:
826 spin_unlock(&ino->i_lock);
827 lseg->pls_layout = lo;
828 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
829 goto out;
648} 830}
649 831
650/* 832/*
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index e12367d50489..e2612ea0cbed 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -30,11 +30,17 @@
30#ifndef FS_NFS_PNFS_H 30#ifndef FS_NFS_PNFS_H
31#define FS_NFS_PNFS_H 31#define FS_NFS_PNFS_H
32 32
33enum {
34 NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */
35 NFS_LSEG_ROC, /* roc bit received from server */
36};
37
33struct pnfs_layout_segment { 38struct pnfs_layout_segment {
34 struct list_head fi_list; 39 struct list_head pls_list;
35 struct pnfs_layout_range range; 40 struct pnfs_layout_range pls_range;
36 struct kref kref; 41 atomic_t pls_refcount;
37 struct pnfs_layout_hdr *layout; 42 unsigned long pls_flags;
43 struct pnfs_layout_hdr *pls_layout;
38}; 44};
39 45
40#ifdef CONFIG_NFS_V4_1 46#ifdef CONFIG_NFS_V4_1
@@ -44,7 +50,9 @@ struct pnfs_layout_segment {
44enum { 50enum {
45 NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */ 51 NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */
46 NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ 52 NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */
47 NFS_LAYOUT_STATEID_SET, /* have a valid layout stateid */ 53 NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */
54 NFS_LAYOUT_ROC, /* some lseg had roc bit set */
55 NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */
48}; 56};
49 57
50/* Per-layout driver specific registration structure */ 58/* Per-layout driver specific registration structure */
@@ -60,13 +68,16 @@ struct pnfs_layoutdriver_type {
60}; 68};
61 69
62struct pnfs_layout_hdr { 70struct pnfs_layout_hdr {
63 unsigned long refcount; 71 atomic_t plh_refcount;
64 struct list_head layouts; /* other client layouts */ 72 struct list_head plh_layouts; /* other client layouts */
65 struct list_head segs; /* layout segments list */ 73 struct list_head plh_bulk_recall; /* clnt list of bulk recalls */
66 seqlock_t seqlock; /* Protects the stateid */ 74 struct list_head plh_segs; /* layout segments list */
67 nfs4_stateid stateid; 75 nfs4_stateid plh_stateid;
68 unsigned long state; 76 atomic_t plh_outstanding; /* number of RPCs out */
69 struct inode *inode; 77 unsigned long plh_block_lgets; /* block LAYOUTGET if >0 */
78 u32 plh_barrier; /* ignore lower seqids */
79 unsigned long plh_flags;
80 struct inode *plh_inode;
70}; 81};
71 82
72struct pnfs_device { 83struct pnfs_device {
@@ -134,17 +145,30 @@ extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
134extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); 145extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
135 146
136/* pnfs.c */ 147/* pnfs.c */
148void get_layout_hdr(struct pnfs_layout_hdr *lo);
137struct pnfs_layout_segment * 149struct pnfs_layout_segment *
138pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, 150pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
139 enum pnfs_iomode access_type); 151 enum pnfs_iomode access_type);
140void set_pnfs_layoutdriver(struct nfs_server *, u32 id); 152void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
141void unset_pnfs_layoutdriver(struct nfs_server *); 153void unset_pnfs_layoutdriver(struct nfs_server *);
142int pnfs_layout_process(struct nfs4_layoutget *lgp); 154int pnfs_layout_process(struct nfs4_layoutget *lgp);
155void pnfs_free_lseg_list(struct list_head *tmp_list);
143void pnfs_destroy_layout(struct nfs_inode *); 156void pnfs_destroy_layout(struct nfs_inode *);
144void pnfs_destroy_all_layouts(struct nfs_client *); 157void pnfs_destroy_all_layouts(struct nfs_client *);
145void put_layout_hdr(struct inode *inode); 158void put_layout_hdr(struct pnfs_layout_hdr *lo);
146void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, 159void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
147 struct nfs4_state *open_state); 160 const nfs4_stateid *new,
161 bool update_barrier);
162int pnfs_choose_layoutget_stateid(nfs4_stateid *dst,
163 struct pnfs_layout_hdr *lo,
164 struct nfs4_state *open_state);
165int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
166 struct list_head *tmp_list,
167 u32 iomode);
168bool pnfs_roc(struct inode *ino);
169void pnfs_roc_release(struct inode *ino);
170void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
171bool pnfs_roc_drain(struct inode *ino, u32 *barrier);
148 172
149 173
150static inline int lo_fail_bit(u32 iomode) 174static inline int lo_fail_bit(u32 iomode)
@@ -176,6 +200,28 @@ pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
176 return NULL; 200 return NULL;
177} 201}
178 202
203static inline bool
204pnfs_roc(struct inode *ino)
205{
206 return false;
207}
208
209static inline void
210pnfs_roc_release(struct inode *ino)
211{
212}
213
214static inline void
215pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
216{
217}
218
219static inline bool
220pnfs_roc_drain(struct inode *ino, u32 *barrier)
221{
222 return false;
223}
224
179static inline void set_pnfs_layoutdriver(struct nfs_server *s, u32 id) 225static inline void set_pnfs_layoutdriver(struct nfs_server *s, u32 id)
180{ 226{
181} 227}
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 58e7f84fc1fd..77d5e21c4ad6 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -458,7 +458,7 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
458 fattr = nfs_alloc_fattr(); 458 fattr = nfs_alloc_fattr();
459 status = -ENOMEM; 459 status = -ENOMEM;
460 if (fh == NULL || fattr == NULL) 460 if (fh == NULL || fattr == NULL)
461 goto out; 461 goto out_free;
462 462
463 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 463 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
464 nfs_mark_for_revalidate(dir); 464 nfs_mark_for_revalidate(dir);
@@ -471,6 +471,7 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
471 if (status == 0) 471 if (status == 0)
472 status = nfs_instantiate(dentry, fh, fattr); 472 status = nfs_instantiate(dentry, fh, fattr);
473 473
474out_free:
474 nfs_free_fattr(fattr); 475 nfs_free_fattr(fattr);
475 nfs_free_fhandle(fh); 476 nfs_free_fhandle(fh);
476out: 477out:
@@ -731,7 +732,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
731 .statfs = nfs_proc_statfs, 732 .statfs = nfs_proc_statfs,
732 .fsinfo = nfs_proc_fsinfo, 733 .fsinfo = nfs_proc_fsinfo,
733 .pathconf = nfs_proc_pathconf, 734 .pathconf = nfs_proc_pathconf,
734 .decode_dirent = nfs_decode_dirent, 735 .decode_dirent = nfs2_decode_dirent,
735 .read_setup = nfs_proc_read_setup, 736 .read_setup = nfs_proc_read_setup,
736 .read_done = nfs_read_done, 737 .read_done = nfs_read_done,
737 .write_setup = nfs_proc_write_setup, 738 .write_setup = nfs_proc_write_setup,
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 4100630c9a5b..0f9ea73e7789 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -598,7 +598,9 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss,
598 598
599 if (nfss->mountd_version || showdefaults) 599 if (nfss->mountd_version || showdefaults)
600 seq_printf(m, ",mountvers=%u", nfss->mountd_version); 600 seq_printf(m, ",mountvers=%u", nfss->mountd_version);
601 if (nfss->mountd_port || showdefaults) 601 if ((nfss->mountd_port &&
602 nfss->mountd_port != (unsigned short)NFS_UNSPEC_PORT) ||
603 showdefaults)
602 seq_printf(m, ",mountport=%u", nfss->mountd_port); 604 seq_printf(m, ",mountport=%u", nfss->mountd_port);
603 605
604 nfs_show_mountd_netid(m, nfss, showdefaults); 606 nfs_show_mountd_netid(m, nfss, showdefaults);
@@ -2494,7 +2496,13 @@ static void nfs4_clone_super(struct super_block *sb,
2494 sb->s_maxbytes = old_sb->s_maxbytes; 2496 sb->s_maxbytes = old_sb->s_maxbytes;
2495 sb->s_time_gran = 1; 2497 sb->s_time_gran = 1;
2496 sb->s_op = old_sb->s_op; 2498 sb->s_op = old_sb->s_op;
2497 nfs_initialise_sb(sb); 2499 /*
2500 * The VFS shouldn't apply the umask to mode bits. We will do
2501 * so ourselves when necessary.
2502 */
2503 sb->s_flags |= MS_POSIXACL;
2504 sb->s_xattr = old_sb->s_xattr;
2505 nfs_initialise_sb(sb);
2498} 2506}
2499 2507
2500/* 2508/*
@@ -2504,6 +2512,12 @@ static void nfs4_fill_super(struct super_block *sb)
2504{ 2512{
2505 sb->s_time_gran = 1; 2513 sb->s_time_gran = 1;
2506 sb->s_op = &nfs4_sops; 2514 sb->s_op = &nfs4_sops;
2515 /*
2516 * The VFS shouldn't apply the umask to mode bits. We will do
2517 * so ourselves when necessary.
2518 */
2519 sb->s_flags |= MS_POSIXACL;
2520 sb->s_xattr = nfs4_xattr_handlers;
2507 nfs_initialise_sb(sb); 2521 nfs_initialise_sb(sb);
2508} 2522}
2509 2523
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 8fe9eb47a97f..e313a51acdd1 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -429,7 +429,7 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
429 data = kzalloc(sizeof(*data), GFP_KERNEL); 429 data = kzalloc(sizeof(*data), GFP_KERNEL);
430 if (data == NULL) 430 if (data == NULL)
431 return ERR_PTR(-ENOMEM); 431 return ERR_PTR(-ENOMEM);
432 task_setup_data.callback_data = data, 432 task_setup_data.callback_data = data;
433 433
434 data->cred = rpc_lookup_cred(); 434 data->cred = rpc_lookup_cred();
435 if (IS_ERR(data->cred)) { 435 if (IS_ERR(data->cred)) {
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 143da2eecd7b..21a63da305ff 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -50,11 +50,6 @@ enum {
50 NFSPROC4_CLNT_CB_SEQUENCE, 50 NFSPROC4_CLNT_CB_SEQUENCE,
51}; 51};
52 52
53enum nfs_cb_opnum4 {
54 OP_CB_RECALL = 4,
55 OP_CB_SEQUENCE = 11,
56};
57
58#define NFS4_MAXTAGLEN 20 53#define NFS4_MAXTAGLEN 20
59 54
60#define NFS4_enc_cb_null_sz 0 55#define NFS4_enc_cb_null_sz 0
@@ -79,61 +74,6 @@ enum nfs_cb_opnum4 {
79 cb_sequence_dec_sz + \ 74 cb_sequence_dec_sz + \
80 op_dec_sz) 75 op_dec_sz)
81 76
82/*
83* Generic encode routines from fs/nfs/nfs4xdr.c
84*/
85static inline __be32 *
86xdr_writemem(__be32 *p, const void *ptr, int nbytes)
87{
88 int tmp = XDR_QUADLEN(nbytes);
89 if (!tmp)
90 return p;
91 p[tmp-1] = 0;
92 memcpy(p, ptr, nbytes);
93 return p + tmp;
94}
95
96#define WRITE32(n) *p++ = htonl(n)
97#define WRITEMEM(ptr,nbytes) do { \
98 p = xdr_writemem(p, ptr, nbytes); \
99} while (0)
100#define RESERVE_SPACE(nbytes) do { \
101 p = xdr_reserve_space(xdr, nbytes); \
102 if (!p) dprintk("NFSD: RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __func__); \
103 BUG_ON(!p); \
104} while (0)
105
106/*
107 * Generic decode routines from fs/nfs/nfs4xdr.c
108 */
109#define DECODE_TAIL \
110 status = 0; \
111out: \
112 return status; \
113xdr_error: \
114 dprintk("NFSD: xdr error! (%s:%d)\n", __FILE__, __LINE__); \
115 status = -EIO; \
116 goto out
117
118#define READ32(x) (x) = ntohl(*p++)
119#define READ64(x) do { \
120 (x) = (u64)ntohl(*p++) << 32; \
121 (x) |= ntohl(*p++); \
122} while (0)
123#define READTIME(x) do { \
124 p++; \
125 (x.tv_sec) = ntohl(*p++); \
126 (x.tv_nsec) = ntohl(*p++); \
127} while (0)
128#define READ_BUF(nbytes) do { \
129 p = xdr_inline_decode(xdr, nbytes); \
130 if (!p) { \
131 dprintk("NFSD: %s: reply buffer overflowed in line %d.\n", \
132 __func__, __LINE__); \
133 return -EIO; \
134 } \
135} while (0)
136
137struct nfs4_cb_compound_hdr { 77struct nfs4_cb_compound_hdr {
138 /* args */ 78 /* args */
139 u32 ident; /* minorversion 0 only */ 79 u32 ident; /* minorversion 0 only */
@@ -144,295 +84,513 @@ struct nfs4_cb_compound_hdr {
144 int status; 84 int status;
145}; 85};
146 86
147static struct { 87/*
148int stat; 88 * Handle decode buffer overflows out-of-line.
149int errno; 89 */
150} nfs_cb_errtbl[] = { 90static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
151 { NFS4_OK, 0 }, 91{
152 { NFS4ERR_PERM, EPERM }, 92 dprintk("NFS: %s prematurely hit the end of our receive buffer. "
153 { NFS4ERR_NOENT, ENOENT }, 93 "Remaining buffer length is %tu words.\n",
154 { NFS4ERR_IO, EIO }, 94 func, xdr->end - xdr->p);
155 { NFS4ERR_NXIO, ENXIO }, 95}
156 { NFS4ERR_ACCESS, EACCES },
157 { NFS4ERR_EXIST, EEXIST },
158 { NFS4ERR_XDEV, EXDEV },
159 { NFS4ERR_NOTDIR, ENOTDIR },
160 { NFS4ERR_ISDIR, EISDIR },
161 { NFS4ERR_INVAL, EINVAL },
162 { NFS4ERR_FBIG, EFBIG },
163 { NFS4ERR_NOSPC, ENOSPC },
164 { NFS4ERR_ROFS, EROFS },
165 { NFS4ERR_MLINK, EMLINK },
166 { NFS4ERR_NAMETOOLONG, ENAMETOOLONG },
167 { NFS4ERR_NOTEMPTY, ENOTEMPTY },
168 { NFS4ERR_DQUOT, EDQUOT },
169 { NFS4ERR_STALE, ESTALE },
170 { NFS4ERR_BADHANDLE, EBADHANDLE },
171 { NFS4ERR_BAD_COOKIE, EBADCOOKIE },
172 { NFS4ERR_NOTSUPP, ENOTSUPP },
173 { NFS4ERR_TOOSMALL, ETOOSMALL },
174 { NFS4ERR_SERVERFAULT, ESERVERFAULT },
175 { NFS4ERR_BADTYPE, EBADTYPE },
176 { NFS4ERR_LOCKED, EAGAIN },
177 { NFS4ERR_RESOURCE, EREMOTEIO },
178 { NFS4ERR_SYMLINK, ELOOP },
179 { NFS4ERR_OP_ILLEGAL, EOPNOTSUPP },
180 { NFS4ERR_DEADLOCK, EDEADLK },
181 { -1, EIO }
182};
183 96
184static int 97static __be32 *xdr_encode_empty_array(__be32 *p)
185nfs_cb_stat_to_errno(int stat)
186{ 98{
187 int i; 99 *p++ = xdr_zero;
188 for (i = 0; nfs_cb_errtbl[i].stat != -1; i++) { 100 return p;
189 if (nfs_cb_errtbl[i].stat == stat)
190 return nfs_cb_errtbl[i].errno;
191 }
192 /* If we cannot translate the error, the recovery routines should
193 * handle it.
194 * Note: remaining NFSv4 error codes have values > 10000, so should
195 * not conflict with native Linux error codes.
196 */
197 return stat;
198} 101}
199 102
200/* 103/*
201 * XDR encode 104 * Encode/decode NFSv4 CB basic data types
105 *
106 * Basic NFSv4 callback data types are defined in section 15 of RFC
107 * 3530: "Network File System (NFS) version 4 Protocol" and section
108 * 20 of RFC 5661: "Network File System (NFS) Version 4 Minor Version
109 * 1 Protocol"
110 */
111
112/*
113 * nfs_cb_opnum4
114 *
115 * enum nfs_cb_opnum4 {
116 * OP_CB_GETATTR = 3,
117 * ...
118 * };
202 */ 119 */
120enum nfs_cb_opnum4 {
121 OP_CB_GETATTR = 3,
122 OP_CB_RECALL = 4,
123 OP_CB_LAYOUTRECALL = 5,
124 OP_CB_NOTIFY = 6,
125 OP_CB_PUSH_DELEG = 7,
126 OP_CB_RECALL_ANY = 8,
127 OP_CB_RECALLABLE_OBJ_AVAIL = 9,
128 OP_CB_RECALL_SLOT = 10,
129 OP_CB_SEQUENCE = 11,
130 OP_CB_WANTS_CANCELLED = 12,
131 OP_CB_NOTIFY_LOCK = 13,
132 OP_CB_NOTIFY_DEVICEID = 14,
133 OP_CB_ILLEGAL = 10044
134};
203 135
204static void 136static void encode_nfs_cb_opnum4(struct xdr_stream *xdr, enum nfs_cb_opnum4 op)
205encode_stateid(struct xdr_stream *xdr, stateid_t *sid)
206{ 137{
207 __be32 *p; 138 __be32 *p;
208 139
209 RESERVE_SPACE(sizeof(stateid_t)); 140 p = xdr_reserve_space(xdr, 4);
210 WRITE32(sid->si_generation); 141 *p = cpu_to_be32(op);
211 WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t));
212} 142}
213 143
214static void 144/*
215encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr) 145 * nfs_fh4
146 *
147 * typedef opaque nfs_fh4<NFS4_FHSIZE>;
148 */
149static void encode_nfs_fh4(struct xdr_stream *xdr, const struct knfsd_fh *fh)
216{ 150{
217 __be32 * p; 151 u32 length = fh->fh_size;
152 __be32 *p;
218 153
219 RESERVE_SPACE(16); 154 BUG_ON(length > NFS4_FHSIZE);
220 WRITE32(0); /* tag length is always 0 */ 155 p = xdr_reserve_space(xdr, 4 + length);
221 WRITE32(hdr->minorversion); 156 xdr_encode_opaque(p, &fh->fh_base, length);
222 WRITE32(hdr->ident);
223 hdr->nops_p = p;
224 WRITE32(hdr->nops);
225} 157}
226 158
227static void encode_cb_nops(struct nfs4_cb_compound_hdr *hdr) 159/*
160 * stateid4
161 *
162 * struct stateid4 {
163 * uint32_t seqid;
164 * opaque other[12];
165 * };
166 */
167static void encode_stateid4(struct xdr_stream *xdr, const stateid_t *sid)
228{ 168{
229 *hdr->nops_p = htonl(hdr->nops); 169 __be32 *p;
170
171 p = xdr_reserve_space(xdr, NFS4_STATEID_SIZE);
172 *p++ = cpu_to_be32(sid->si_generation);
173 xdr_encode_opaque_fixed(p, &sid->si_opaque, NFS4_STATEID_OTHER_SIZE);
230} 174}
231 175
232static void 176/*
233encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp, 177 * sessionid4
234 struct nfs4_cb_compound_hdr *hdr) 178 *
179 * typedef opaque sessionid4[NFS4_SESSIONID_SIZE];
180 */
181static void encode_sessionid4(struct xdr_stream *xdr,
182 const struct nfsd4_session *session)
235{ 183{
236 __be32 *p; 184 __be32 *p;
237 int len = dp->dl_fh.fh_size; 185
238 186 p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN);
239 RESERVE_SPACE(4); 187 xdr_encode_opaque_fixed(p, session->se_sessionid.data,
240 WRITE32(OP_CB_RECALL); 188 NFS4_MAX_SESSIONID_LEN);
241 encode_stateid(xdr, &dp->dl_stateid);
242 RESERVE_SPACE(8 + (XDR_QUADLEN(len) << 2));
243 WRITE32(0); /* truncate optimization not implemented */
244 WRITE32(len);
245 WRITEMEM(&dp->dl_fh.fh_base, len);
246 hdr->nops++;
247} 189}
248 190
249static void 191/*
250encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb, 192 * nfsstat4
251 struct nfs4_cb_compound_hdr *hdr) 193 */
252{ 194static const struct {
253 __be32 *p; 195 int stat;
254 struct nfsd4_session *ses = cb->cb_clp->cl_cb_session; 196 int errno;
197} nfs_cb_errtbl[] = {
198 { NFS4_OK, 0 },
199 { NFS4ERR_PERM, -EPERM },
200 { NFS4ERR_NOENT, -ENOENT },
201 { NFS4ERR_IO, -EIO },
202 { NFS4ERR_NXIO, -ENXIO },
203 { NFS4ERR_ACCESS, -EACCES },
204 { NFS4ERR_EXIST, -EEXIST },
205 { NFS4ERR_XDEV, -EXDEV },
206 { NFS4ERR_NOTDIR, -ENOTDIR },
207 { NFS4ERR_ISDIR, -EISDIR },
208 { NFS4ERR_INVAL, -EINVAL },
209 { NFS4ERR_FBIG, -EFBIG },
210 { NFS4ERR_NOSPC, -ENOSPC },
211 { NFS4ERR_ROFS, -EROFS },
212 { NFS4ERR_MLINK, -EMLINK },
213 { NFS4ERR_NAMETOOLONG, -ENAMETOOLONG },
214 { NFS4ERR_NOTEMPTY, -ENOTEMPTY },
215 { NFS4ERR_DQUOT, -EDQUOT },
216 { NFS4ERR_STALE, -ESTALE },
217 { NFS4ERR_BADHANDLE, -EBADHANDLE },
218 { NFS4ERR_BAD_COOKIE, -EBADCOOKIE },
219 { NFS4ERR_NOTSUPP, -ENOTSUPP },
220 { NFS4ERR_TOOSMALL, -ETOOSMALL },
221 { NFS4ERR_SERVERFAULT, -ESERVERFAULT },
222 { NFS4ERR_BADTYPE, -EBADTYPE },
223 { NFS4ERR_LOCKED, -EAGAIN },
224 { NFS4ERR_RESOURCE, -EREMOTEIO },
225 { NFS4ERR_SYMLINK, -ELOOP },
226 { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP },
227 { NFS4ERR_DEADLOCK, -EDEADLK },
228 { -1, -EIO }
229};
255 230
256 if (hdr->minorversion == 0) 231/*
257 return; 232 * If we cannot translate the error, the recovery routines should
233 * handle it.
234 *
235 * Note: remaining NFSv4 error codes have values > 10000, so should
236 * not conflict with native Linux error codes.
237 */
238static int nfs_cb_stat_to_errno(int status)
239{
240 int i;
258 241
259 RESERVE_SPACE(1 + NFS4_MAX_SESSIONID_LEN + 20); 242 for (i = 0; nfs_cb_errtbl[i].stat != -1; i++) {
243 if (nfs_cb_errtbl[i].stat == status)
244 return nfs_cb_errtbl[i].errno;
245 }
260 246
261 WRITE32(OP_CB_SEQUENCE); 247 dprintk("NFSD: Unrecognized NFS CB status value: %u\n", status);
262 WRITEMEM(ses->se_sessionid.data, NFS4_MAX_SESSIONID_LEN); 248 return -status;
263 WRITE32(ses->se_cb_seq_nr);
264 WRITE32(0); /* slotid, always 0 */
265 WRITE32(0); /* highest slotid always 0 */
266 WRITE32(0); /* cachethis always 0 */
267 WRITE32(0); /* FIXME: support referring_call_lists */
268 hdr->nops++;
269} 249}
270 250
271static int 251static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected,
272nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p) 252 enum nfsstat4 *status)
273{ 253{
274 struct xdr_stream xdrs, *xdr = &xdrs; 254 __be32 *p;
255 u32 op;
275 256
276 xdr_init_encode(&xdrs, &req->rq_snd_buf, p); 257 p = xdr_inline_decode(xdr, 4 + 4);
277 RESERVE_SPACE(0); 258 if (unlikely(p == NULL))
259 goto out_overflow;
260 op = be32_to_cpup(p++);
261 if (unlikely(op != expected))
262 goto out_unexpected;
263 *status = be32_to_cpup(p);
278 return 0; 264 return 0;
265out_overflow:
266 print_overflow_msg(__func__, xdr);
267 return -EIO;
268out_unexpected:
269 dprintk("NFSD: Callback server returned operation %d but "
270 "we issued a request for %d\n", op, expected);
271 return -EIO;
279} 272}
280 273
281static int 274/*
282nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, 275 * CB_COMPOUND4args
283 struct nfsd4_callback *cb) 276 *
277 * struct CB_COMPOUND4args {
278 * utf8str_cs tag;
279 * uint32_t minorversion;
280 * uint32_t callback_ident;
281 * nfs_cb_argop4 argarray<>;
282 * };
283*/
284static void encode_cb_compound4args(struct xdr_stream *xdr,
285 struct nfs4_cb_compound_hdr *hdr)
284{ 286{
285 struct xdr_stream xdr; 287 __be32 * p;
286 struct nfs4_delegation *args = cb->cb_op;
287 struct nfs4_cb_compound_hdr hdr = {
288 .ident = cb->cb_clp->cl_cb_ident,
289 .minorversion = cb->cb_minorversion,
290 };
291 288
292 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 289 p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4);
293 encode_cb_compound_hdr(&xdr, &hdr); 290 p = xdr_encode_empty_array(p); /* empty tag */
294 encode_cb_sequence(&xdr, cb, &hdr); 291 *p++ = cpu_to_be32(hdr->minorversion);
295 encode_cb_recall(&xdr, args, &hdr); 292 *p++ = cpu_to_be32(hdr->ident);
296 encode_cb_nops(&hdr); 293
294 hdr->nops_p = p;
295 *p = cpu_to_be32(hdr->nops); /* argarray element count */
296}
297
298/*
299 * Update argarray element count
300 */
301static void encode_cb_nops(struct nfs4_cb_compound_hdr *hdr)
302{
303 BUG_ON(hdr->nops > NFS4_MAX_BACK_CHANNEL_OPS);
304 *hdr->nops_p = cpu_to_be32(hdr->nops);
305}
306
307/*
308 * CB_COMPOUND4res
309 *
310 * struct CB_COMPOUND4res {
311 * nfsstat4 status;
312 * utf8str_cs tag;
313 * nfs_cb_resop4 resarray<>;
314 * };
315 */
316static int decode_cb_compound4res(struct xdr_stream *xdr,
317 struct nfs4_cb_compound_hdr *hdr)
318{
319 u32 length;
320 __be32 *p;
321
322 p = xdr_inline_decode(xdr, 4 + 4);
323 if (unlikely(p == NULL))
324 goto out_overflow;
325 hdr->status = be32_to_cpup(p++);
326 /* Ignore the tag */
327 length = be32_to_cpup(p++);
328 p = xdr_inline_decode(xdr, length + 4);
329 if (unlikely(p == NULL))
330 goto out_overflow;
331 hdr->nops = be32_to_cpup(p);
297 return 0; 332 return 0;
333out_overflow:
334 print_overflow_msg(__func__, xdr);
335 return -EIO;
298} 336}
299 337
338/*
339 * CB_RECALL4args
340 *
341 * struct CB_RECALL4args {
342 * stateid4 stateid;
343 * bool truncate;
344 * nfs_fh4 fh;
345 * };
346 */
347static void encode_cb_recall4args(struct xdr_stream *xdr,
348 const struct nfs4_delegation *dp,
349 struct nfs4_cb_compound_hdr *hdr)
350{
351 __be32 *p;
352
353 encode_nfs_cb_opnum4(xdr, OP_CB_RECALL);
354 encode_stateid4(xdr, &dp->dl_stateid);
355
356 p = xdr_reserve_space(xdr, 4);
357 *p++ = xdr_zero; /* truncate */
300 358
301static int 359 encode_nfs_fh4(xdr, &dp->dl_fh);
302decode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr){
303 __be32 *p;
304 u32 taglen;
305 360
306 READ_BUF(8); 361 hdr->nops++;
307 READ32(hdr->status);
308 /* We've got no use for the tag; ignore it: */
309 READ32(taglen);
310 READ_BUF(taglen + 4);
311 p += XDR_QUADLEN(taglen);
312 READ32(hdr->nops);
313 return 0;
314} 362}
315 363
316static int 364/*
317decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) 365 * CB_SEQUENCE4args
366 *
367 * struct CB_SEQUENCE4args {
368 * sessionid4 csa_sessionid;
369 * sequenceid4 csa_sequenceid;
370 * slotid4 csa_slotid;
371 * slotid4 csa_highest_slotid;
372 * bool csa_cachethis;
373 * referring_call_list4 csa_referring_call_lists<>;
374 * };
375 */
376static void encode_cb_sequence4args(struct xdr_stream *xdr,
377 const struct nfsd4_callback *cb,
378 struct nfs4_cb_compound_hdr *hdr)
318{ 379{
380 struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
319 __be32 *p; 381 __be32 *p;
320 u32 op; 382
321 int32_t nfserr; 383 if (hdr->minorversion == 0)
322 384 return;
323 READ_BUF(8); 385
324 READ32(op); 386 encode_nfs_cb_opnum4(xdr, OP_CB_SEQUENCE);
325 if (op != expected) { 387 encode_sessionid4(xdr, session);
326 dprintk("NFSD: decode_cb_op_hdr: Callback server returned " 388
327 " operation %d but we issued a request for %d\n", 389 p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4 + 4);
328 op, expected); 390 *p++ = cpu_to_be32(session->se_cb_seq_nr); /* csa_sequenceid */
329 return -EIO; 391 *p++ = xdr_zero; /* csa_slotid */
330 } 392 *p++ = xdr_zero; /* csa_highest_slotid */
331 READ32(nfserr); 393 *p++ = xdr_zero; /* csa_cachethis */
332 if (nfserr != NFS_OK) 394 xdr_encode_empty_array(p); /* csa_referring_call_lists */
333 return -nfs_cb_stat_to_errno(nfserr); 395
334 return 0; 396 hdr->nops++;
335} 397}
336 398
337/* 399/*
400 * CB_SEQUENCE4resok
401 *
402 * struct CB_SEQUENCE4resok {
403 * sessionid4 csr_sessionid;
404 * sequenceid4 csr_sequenceid;
405 * slotid4 csr_slotid;
406 * slotid4 csr_highest_slotid;
407 * slotid4 csr_target_highest_slotid;
408 * };
409 *
410 * union CB_SEQUENCE4res switch (nfsstat4 csr_status) {
411 * case NFS4_OK:
412 * CB_SEQUENCE4resok csr_resok4;
413 * default:
414 * void;
415 * };
416 *
338 * Our current back channel implmentation supports a single backchannel 417 * Our current back channel implmentation supports a single backchannel
339 * with a single slot. 418 * with a single slot.
340 */ 419 */
341static int 420static int decode_cb_sequence4resok(struct xdr_stream *xdr,
342decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb, 421 struct nfsd4_callback *cb)
343 struct rpc_rqst *rqstp)
344{ 422{
345 struct nfsd4_session *ses = cb->cb_clp->cl_cb_session; 423 struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
346 struct nfs4_sessionid id; 424 struct nfs4_sessionid id;
347 int status; 425 int status;
348 u32 dummy;
349 __be32 *p; 426 __be32 *p;
427 u32 dummy;
350 428
351 if (cb->cb_minorversion == 0) 429 status = -ESERVERFAULT;
352 return 0;
353
354 status = decode_cb_op_hdr(xdr, OP_CB_SEQUENCE);
355 if (status)
356 return status;
357 430
358 /* 431 /*
359 * If the server returns different values for sessionID, slotID or 432 * If the server returns different values for sessionID, slotID or
360 * sequence number, the server is looney tunes. 433 * sequence number, the server is looney tunes.
361 */ 434 */
362 status = -ESERVERFAULT; 435 p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4);
363 436 if (unlikely(p == NULL))
364 READ_BUF(NFS4_MAX_SESSIONID_LEN + 16); 437 goto out_overflow;
365 memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN); 438 memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
366 p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN); 439 if (memcmp(id.data, session->se_sessionid.data,
367 if (memcmp(id.data, ses->se_sessionid.data, NFS4_MAX_SESSIONID_LEN)) { 440 NFS4_MAX_SESSIONID_LEN) != 0) {
368 dprintk("%s Invalid session id\n", __func__); 441 dprintk("NFS: %s Invalid session id\n", __func__);
369 goto out; 442 goto out;
370 } 443 }
371 READ32(dummy); 444 p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN);
372 if (dummy != ses->se_cb_seq_nr) { 445
373 dprintk("%s Invalid sequence number\n", __func__); 446 dummy = be32_to_cpup(p++);
447 if (dummy != session->se_cb_seq_nr) {
448 dprintk("NFS: %s Invalid sequence number\n", __func__);
374 goto out; 449 goto out;
375 } 450 }
376 READ32(dummy); /* slotid must be 0 */ 451
452 dummy = be32_to_cpup(p++);
377 if (dummy != 0) { 453 if (dummy != 0) {
378 dprintk("%s Invalid slotid\n", __func__); 454 dprintk("NFS: %s Invalid slotid\n", __func__);
379 goto out; 455 goto out;
380 } 456 }
381 /* FIXME: process highest slotid and target highest slotid */ 457
458 /*
459 * FIXME: process highest slotid and target highest slotid
460 */
382 status = 0; 461 status = 0;
383out: 462out:
384 return status; 463 return status;
464out_overflow:
465 print_overflow_msg(__func__, xdr);
466 return -EIO;
385} 467}
386 468
469static int decode_cb_sequence4res(struct xdr_stream *xdr,
470 struct nfsd4_callback *cb)
471{
472 enum nfsstat4 nfserr;
473 int status;
474
475 if (cb->cb_minorversion == 0)
476 return 0;
477
478 status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &nfserr);
479 if (unlikely(status))
480 goto out;
481 if (unlikely(nfserr != NFS4_OK))
482 goto out_default;
483 status = decode_cb_sequence4resok(xdr, cb);
484out:
485 return status;
486out_default:
487 return nfs_cb_stat_to_errno(status);
488}
387 489
388static int 490/*
389nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p) 491 * NFSv4.0 and NFSv4.1 XDR encode functions
492 *
493 * NFSv4.0 callback argument types are defined in section 15 of RFC
494 * 3530: "Network File System (NFS) version 4 Protocol" and section 20
495 * of RFC 5661: "Network File System (NFS) Version 4 Minor Version 1
496 * Protocol".
497 */
498
499/*
500 * NB: Without this zero space reservation, callbacks over krb5p fail
501 */
502static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
503 void *__unused)
504{
505 xdr_reserve_space(xdr, 0);
506}
507
508/*
509 * 20.2. Operation 4: CB_RECALL - Recall a Delegation
510 */
511static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
512 const struct nfsd4_callback *cb)
513{
514 const struct nfs4_delegation *args = cb->cb_op;
515 struct nfs4_cb_compound_hdr hdr = {
516 .ident = cb->cb_clp->cl_cb_ident,
517 .minorversion = cb->cb_minorversion,
518 };
519
520 encode_cb_compound4args(xdr, &hdr);
521 encode_cb_sequence4args(xdr, cb, &hdr);
522 encode_cb_recall4args(xdr, args, &hdr);
523 encode_cb_nops(&hdr);
524}
525
526
527/*
528 * NFSv4.0 and NFSv4.1 XDR decode functions
529 *
530 * NFSv4.0 callback result types are defined in section 15 of RFC
531 * 3530: "Network File System (NFS) version 4 Protocol" and section 20
532 * of RFC 5661: "Network File System (NFS) Version 4 Minor Version 1
533 * Protocol".
534 */
535
536static int nfs4_xdr_dec_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
537 void *__unused)
390{ 538{
391 return 0; 539 return 0;
392} 540}
393 541
394static int 542/*
395nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p, 543 * 20.2. Operation 4: CB_RECALL - Recall a Delegation
396 struct nfsd4_callback *cb) 544 */
545static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
546 struct xdr_stream *xdr,
547 struct nfsd4_callback *cb)
397{ 548{
398 struct xdr_stream xdr;
399 struct nfs4_cb_compound_hdr hdr; 549 struct nfs4_cb_compound_hdr hdr;
550 enum nfsstat4 nfserr;
400 int status; 551 int status;
401 552
402 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 553 status = decode_cb_compound4res(xdr, &hdr);
403 status = decode_cb_compound_hdr(&xdr, &hdr); 554 if (unlikely(status))
404 if (status)
405 goto out; 555 goto out;
406 if (cb) { 556
407 status = decode_cb_sequence(&xdr, cb, rqstp); 557 if (cb != NULL) {
408 if (status) 558 status = decode_cb_sequence4res(xdr, cb);
559 if (unlikely(status))
409 goto out; 560 goto out;
410 } 561 }
411 status = decode_cb_op_hdr(&xdr, OP_CB_RECALL); 562
563 status = decode_cb_op_status(xdr, OP_CB_RECALL, &nfserr);
564 if (unlikely(status))
565 goto out;
566 if (unlikely(nfserr != NFS4_OK))
567 goto out_default;
412out: 568out:
413 return status; 569 return status;
570out_default:
571 return nfs_cb_stat_to_errno(status);
414} 572}
415 573
416/* 574/*
417 * RPC procedure tables 575 * RPC procedure tables
418 */ 576 */
419#define PROC(proc, call, argtype, restype) \ 577#define PROC(proc, call, argtype, restype) \
420[NFSPROC4_CLNT_##proc] = { \ 578[NFSPROC4_CLNT_##proc] = { \
421 .p_proc = NFSPROC4_CB_##call, \ 579 .p_proc = NFSPROC4_CB_##call, \
422 .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \ 580 .p_encode = (kxdreproc_t)nfs4_xdr_enc_##argtype, \
423 .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \ 581 .p_decode = (kxdrdproc_t)nfs4_xdr_dec_##restype, \
424 .p_arglen = NFS4_##argtype##_sz, \ 582 .p_arglen = NFS4_enc_##argtype##_sz, \
425 .p_replen = NFS4_##restype##_sz, \ 583 .p_replen = NFS4_dec_##restype##_sz, \
426 .p_statidx = NFSPROC4_CB_##call, \ 584 .p_statidx = NFSPROC4_CB_##call, \
427 .p_name = #proc, \ 585 .p_name = #proc, \
428} 586}
429 587
430static struct rpc_procinfo nfs4_cb_procedures[] = { 588static struct rpc_procinfo nfs4_cb_procedures[] = {
431 PROC(CB_NULL, NULL, enc_cb_null, dec_cb_null), 589 PROC(CB_NULL, NULL, cb_null, cb_null),
432 PROC(CB_RECALL, COMPOUND, enc_cb_recall, dec_cb_recall), 590 PROC(CB_RECALL, COMPOUND, cb_recall, cb_recall),
433}; 591};
434 592
435static struct rpc_version nfs_cb_version4 = { 593static struct rpc_version nfs_cb_version4 = {
436/* 594/*
437 * Note on the callback rpc program version number: despite language in rfc 595 * Note on the callback rpc program version number: despite language in rfc
438 * 5661 section 18.36.3 requiring servers to use 4 in this field, the 596 * 5661 section 18.36.3 requiring servers to use 4 in this field, the
@@ -440,29 +598,29 @@ static struct rpc_version nfs_cb_version4 = {
440 * in practice that appears to be what implementations use. The section 598 * in practice that appears to be what implementations use. The section
441 * 18.36.3 language is expected to be fixed in an erratum. 599 * 18.36.3 language is expected to be fixed in an erratum.
442 */ 600 */
443 .number = 1, 601 .number = 1,
444 .nrprocs = ARRAY_SIZE(nfs4_cb_procedures), 602 .nrprocs = ARRAY_SIZE(nfs4_cb_procedures),
445 .procs = nfs4_cb_procedures 603 .procs = nfs4_cb_procedures
446}; 604};
447 605
448static struct rpc_version * nfs_cb_version[] = { 606static struct rpc_version *nfs_cb_version[] = {
449 &nfs_cb_version4, 607 &nfs_cb_version4,
450}; 608};
451 609
452static struct rpc_program cb_program; 610static struct rpc_program cb_program;
453 611
454static struct rpc_stat cb_stats = { 612static struct rpc_stat cb_stats = {
455 .program = &cb_program 613 .program = &cb_program
456}; 614};
457 615
458#define NFS4_CALLBACK 0x40000000 616#define NFS4_CALLBACK 0x40000000
459static struct rpc_program cb_program = { 617static struct rpc_program cb_program = {
460 .name = "nfs4_cb", 618 .name = "nfs4_cb",
461 .number = NFS4_CALLBACK, 619 .number = NFS4_CALLBACK,
462 .nrvers = ARRAY_SIZE(nfs_cb_version), 620 .nrvers = ARRAY_SIZE(nfs_cb_version),
463 .version = nfs_cb_version, 621 .version = nfs_cb_version,
464 .stats = &cb_stats, 622 .stats = &cb_stats,
465 .pipe_dir_name = "/nfsd4_cb", 623 .pipe_dir_name = "/nfsd4_cb",
466}; 624};
467 625
468static int max_cb_time(void) 626static int max_cb_time(void)
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 8b782b062baa..3ee67c67cc52 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -35,7 +35,20 @@
35 35
36struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap) 36struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap)
37{ 37{
38 return nilfs_dat_inode(NILFS_I_NILFS(bmap->b_inode)); 38 return NILFS_I_NILFS(bmap->b_inode)->ns_dat;
39}
40
41static int nilfs_bmap_convert_error(struct nilfs_bmap *bmap,
42 const char *fname, int err)
43{
44 struct inode *inode = bmap->b_inode;
45
46 if (err == -EINVAL) {
47 nilfs_error(inode->i_sb, fname,
48 "broken bmap (inode number=%lu)\n", inode->i_ino);
49 err = -EIO;
50 }
51 return err;
39} 52}
40 53
41/** 54/**
@@ -66,8 +79,10 @@ int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level,
66 79
67 down_read(&bmap->b_sem); 80 down_read(&bmap->b_sem);
68 ret = bmap->b_ops->bop_lookup(bmap, key, level, ptrp); 81 ret = bmap->b_ops->bop_lookup(bmap, key, level, ptrp);
69 if (ret < 0) 82 if (ret < 0) {
83 ret = nilfs_bmap_convert_error(bmap, __func__, ret);
70 goto out; 84 goto out;
85 }
71 if (NILFS_BMAP_USE_VBN(bmap)) { 86 if (NILFS_BMAP_USE_VBN(bmap)) {
72 ret = nilfs_dat_translate(nilfs_bmap_get_dat(bmap), *ptrp, 87 ret = nilfs_dat_translate(nilfs_bmap_get_dat(bmap), *ptrp,
73 &blocknr); 88 &blocknr);
@@ -88,7 +103,8 @@ int nilfs_bmap_lookup_contig(struct nilfs_bmap *bmap, __u64 key, __u64 *ptrp,
88 down_read(&bmap->b_sem); 103 down_read(&bmap->b_sem);
89 ret = bmap->b_ops->bop_lookup_contig(bmap, key, ptrp, maxblocks); 104 ret = bmap->b_ops->bop_lookup_contig(bmap, key, ptrp, maxblocks);
90 up_read(&bmap->b_sem); 105 up_read(&bmap->b_sem);
91 return ret; 106
107 return nilfs_bmap_convert_error(bmap, __func__, ret);
92} 108}
93 109
94static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) 110static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
@@ -144,7 +160,8 @@ int nilfs_bmap_insert(struct nilfs_bmap *bmap,
144 down_write(&bmap->b_sem); 160 down_write(&bmap->b_sem);
145 ret = nilfs_bmap_do_insert(bmap, key, rec); 161 ret = nilfs_bmap_do_insert(bmap, key, rec);
146 up_write(&bmap->b_sem); 162 up_write(&bmap->b_sem);
147 return ret; 163
164 return nilfs_bmap_convert_error(bmap, __func__, ret);
148} 165}
149 166
150static int nilfs_bmap_do_delete(struct nilfs_bmap *bmap, __u64 key) 167static int nilfs_bmap_do_delete(struct nilfs_bmap *bmap, __u64 key)
@@ -180,9 +197,12 @@ int nilfs_bmap_last_key(struct nilfs_bmap *bmap, unsigned long *key)
180 197
181 down_read(&bmap->b_sem); 198 down_read(&bmap->b_sem);
182 ret = bmap->b_ops->bop_last_key(bmap, &lastkey); 199 ret = bmap->b_ops->bop_last_key(bmap, &lastkey);
183 if (!ret)
184 *key = lastkey;
185 up_read(&bmap->b_sem); 200 up_read(&bmap->b_sem);
201
202 if (ret < 0)
203 ret = nilfs_bmap_convert_error(bmap, __func__, ret);
204 else
205 *key = lastkey;
186 return ret; 206 return ret;
187} 207}
188 208
@@ -210,7 +230,8 @@ int nilfs_bmap_delete(struct nilfs_bmap *bmap, unsigned long key)
210 down_write(&bmap->b_sem); 230 down_write(&bmap->b_sem);
211 ret = nilfs_bmap_do_delete(bmap, key); 231 ret = nilfs_bmap_do_delete(bmap, key);
212 up_write(&bmap->b_sem); 232 up_write(&bmap->b_sem);
213 return ret; 233
234 return nilfs_bmap_convert_error(bmap, __func__, ret);
214} 235}
215 236
216static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, unsigned long key) 237static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, unsigned long key)
@@ -261,7 +282,8 @@ int nilfs_bmap_truncate(struct nilfs_bmap *bmap, unsigned long key)
261 down_write(&bmap->b_sem); 282 down_write(&bmap->b_sem);
262 ret = nilfs_bmap_do_truncate(bmap, key); 283 ret = nilfs_bmap_do_truncate(bmap, key);
263 up_write(&bmap->b_sem); 284 up_write(&bmap->b_sem);
264 return ret; 285
286 return nilfs_bmap_convert_error(bmap, __func__, ret);
265} 287}
266 288
267/** 289/**
@@ -300,7 +322,8 @@ int nilfs_bmap_propagate(struct nilfs_bmap *bmap, struct buffer_head *bh)
300 down_write(&bmap->b_sem); 322 down_write(&bmap->b_sem);
301 ret = bmap->b_ops->bop_propagate(bmap, bh); 323 ret = bmap->b_ops->bop_propagate(bmap, bh);
302 up_write(&bmap->b_sem); 324 up_write(&bmap->b_sem);
303 return ret; 325
326 return nilfs_bmap_convert_error(bmap, __func__, ret);
304} 327}
305 328
306/** 329/**
@@ -344,7 +367,8 @@ int nilfs_bmap_assign(struct nilfs_bmap *bmap,
344 down_write(&bmap->b_sem); 367 down_write(&bmap->b_sem);
345 ret = bmap->b_ops->bop_assign(bmap, bh, blocknr, binfo); 368 ret = bmap->b_ops->bop_assign(bmap, bh, blocknr, binfo);
346 up_write(&bmap->b_sem); 369 up_write(&bmap->b_sem);
347 return ret; 370
371 return nilfs_bmap_convert_error(bmap, __func__, ret);
348} 372}
349 373
350/** 374/**
@@ -373,7 +397,8 @@ int nilfs_bmap_mark(struct nilfs_bmap *bmap, __u64 key, int level)
373 down_write(&bmap->b_sem); 397 down_write(&bmap->b_sem);
374 ret = bmap->b_ops->bop_mark(bmap, key, level); 398 ret = bmap->b_ops->bop_mark(bmap, key, level);
375 up_write(&bmap->b_sem); 399 up_write(&bmap->b_sem);
376 return ret; 400
401 return nilfs_bmap_convert_error(bmap, __func__, ret);
377} 402}
378 403
379/** 404/**
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 5115814cb745..388e9e8f5286 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -104,8 +104,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
104 if (pblocknr == 0) { 104 if (pblocknr == 0) {
105 pblocknr = blocknr; 105 pblocknr = blocknr;
106 if (inode->i_ino != NILFS_DAT_INO) { 106 if (inode->i_ino != NILFS_DAT_INO) {
107 struct inode *dat = 107 struct inode *dat = NILFS_I_NILFS(inode)->ns_dat;
108 nilfs_dat_inode(NILFS_I_NILFS(inode));
109 108
110 /* blocknr is a virtual block number */ 109 /* blocknr is a virtual block number */
111 err = nilfs_dat_translate(dat, blocknr, &pblocknr); 110 err = nilfs_dat_translate(dat, blocknr, &pblocknr);
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index cb003c8ee1f6..9d45773b79e6 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -91,7 +91,6 @@ static void nilfs_commit_chunk(struct page *page,
91 unsigned from, unsigned to) 91 unsigned from, unsigned to)
92{ 92{
93 struct inode *dir = mapping->host; 93 struct inode *dir = mapping->host;
94 struct nilfs_sb_info *sbi = NILFS_SB(dir->i_sb);
95 loff_t pos = page_offset(page) + from; 94 loff_t pos = page_offset(page) + from;
96 unsigned len = to - from; 95 unsigned len = to - from;
97 unsigned nr_dirty, copied; 96 unsigned nr_dirty, copied;
@@ -103,7 +102,7 @@ static void nilfs_commit_chunk(struct page *page,
103 i_size_write(dir, pos + copied); 102 i_size_write(dir, pos + copied);
104 if (IS_DIRSYNC(dir)) 103 if (IS_DIRSYNC(dir))
105 nilfs_set_transaction_flag(NILFS_TI_SYNC); 104 nilfs_set_transaction_flag(NILFS_TI_SYNC);
106 err = nilfs_set_file_dirty(sbi, dir, nr_dirty); 105 err = nilfs_set_file_dirty(dir, nr_dirty);
107 WARN_ON(err); /* do not happen */ 106 WARN_ON(err); /* do not happen */
108 unlock_page(page); 107 unlock_page(page);
109} 108}
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index c9a30d7ff6fc..2f560c9fb808 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -155,6 +155,7 @@ const struct inode_operations nilfs_file_inode_operations = {
155 .truncate = nilfs_truncate, 155 .truncate = nilfs_truncate,
156 .setattr = nilfs_setattr, 156 .setattr = nilfs_setattr,
157 .permission = nilfs_permission, 157 .permission = nilfs_permission,
158 .fiemap = nilfs_fiemap,
158}; 159};
159 160
160/* end of file */ 161/* end of file */
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c
index 9f8a2da67f90..bfc73d3a30ed 100644
--- a/fs/nilfs2/ifile.c
+++ b/fs/nilfs2/ifile.c
@@ -149,14 +149,9 @@ int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino,
149 } 149 }
150 150
151 err = nilfs_palloc_get_entry_block(ifile, ino, 0, out_bh); 151 err = nilfs_palloc_get_entry_block(ifile, ino, 0, out_bh);
152 if (unlikely(err)) { 152 if (unlikely(err))
153 if (err == -EINVAL) 153 nilfs_warning(sb, __func__, "unable to read inode: %lu",
154 nilfs_error(sb, __func__, "ifile is broken"); 154 (unsigned long) ino);
155 else
156 nilfs_warning(sb, __func__,
157 "unable to read inode: %lu",
158 (unsigned long) ino);
159 }
160 return err; 155 return err;
161} 156}
162 157
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 77b48c8fab17..2fd440d8d6b8 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -58,7 +58,7 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
58 struct nilfs_inode_info *ii = NILFS_I(inode); 58 struct nilfs_inode_info *ii = NILFS_I(inode);
59 __u64 blknum = 0; 59 __u64 blknum = 0;
60 int err = 0, ret; 60 int err = 0, ret;
61 struct inode *dat = nilfs_dat_inode(NILFS_I_NILFS(inode)); 61 struct inode *dat = NILFS_I_NILFS(inode)->ns_dat;
62 unsigned maxblocks = bh_result->b_size >> inode->i_blkbits; 62 unsigned maxblocks = bh_result->b_size >> inode->i_blkbits;
63 63
64 down_read(&NILFS_MDT(dat)->mi_sem); 64 down_read(&NILFS_MDT(dat)->mi_sem);
@@ -96,11 +96,6 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
96 inode->i_ino, 96 inode->i_ino,
97 (unsigned long long)blkoff); 97 (unsigned long long)blkoff);
98 err = 0; 98 err = 0;
99 } else if (err == -EINVAL) {
100 nilfs_error(inode->i_sb, __func__,
101 "broken bmap (inode=%lu)\n",
102 inode->i_ino);
103 err = -EIO;
104 } 99 }
105 nilfs_transaction_abort(inode->i_sb); 100 nilfs_transaction_abort(inode->i_sb);
106 goto out; 101 goto out;
@@ -109,6 +104,7 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
109 nilfs_transaction_commit(inode->i_sb); /* never fails */ 104 nilfs_transaction_commit(inode->i_sb); /* never fails */
110 /* Error handling should be detailed */ 105 /* Error handling should be detailed */
111 set_buffer_new(bh_result); 106 set_buffer_new(bh_result);
107 set_buffer_delay(bh_result);
112 map_bh(bh_result, inode->i_sb, 0); /* dbn must be changed 108 map_bh(bh_result, inode->i_sb, 0); /* dbn must be changed
113 to proper value */ 109 to proper value */
114 } else if (ret == -ENOENT) { 110 } else if (ret == -ENOENT) {
@@ -185,10 +181,9 @@ static int nilfs_set_page_dirty(struct page *page)
185 181
186 if (ret) { 182 if (ret) {
187 struct inode *inode = page->mapping->host; 183 struct inode *inode = page->mapping->host;
188 struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb);
189 unsigned nr_dirty = 1 << (PAGE_SHIFT - inode->i_blkbits); 184 unsigned nr_dirty = 1 << (PAGE_SHIFT - inode->i_blkbits);
190 185
191 nilfs_set_file_dirty(sbi, inode, nr_dirty); 186 nilfs_set_file_dirty(inode, nr_dirty);
192 } 187 }
193 return ret; 188 return ret;
194} 189}
@@ -229,7 +224,7 @@ static int nilfs_write_end(struct file *file, struct address_space *mapping,
229 start + copied); 224 start + copied);
230 copied = generic_write_end(file, mapping, pos, len, copied, page, 225 copied = generic_write_end(file, mapping, pos, len, copied, page,
231 fsdata); 226 fsdata);
232 nilfs_set_file_dirty(NILFS_SB(inode->i_sb), inode, nr_dirty); 227 nilfs_set_file_dirty(inode, nr_dirty);
233 err = nilfs_transaction_commit(inode->i_sb); 228 err = nilfs_transaction_commit(inode->i_sb);
234 return err ? : copied; 229 return err ? : copied;
235} 230}
@@ -425,13 +420,12 @@ static int __nilfs_read_inode(struct super_block *sb,
425 struct nilfs_root *root, unsigned long ino, 420 struct nilfs_root *root, unsigned long ino,
426 struct inode *inode) 421 struct inode *inode)
427{ 422{
428 struct nilfs_sb_info *sbi = NILFS_SB(sb); 423 struct the_nilfs *nilfs = NILFS_SB(sb)->s_nilfs;
429 struct inode *dat = nilfs_dat_inode(sbi->s_nilfs);
430 struct buffer_head *bh; 424 struct buffer_head *bh;
431 struct nilfs_inode *raw_inode; 425 struct nilfs_inode *raw_inode;
432 int err; 426 int err;
433 427
434 down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ 428 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
435 err = nilfs_ifile_get_inode_block(root->ifile, ino, &bh); 429 err = nilfs_ifile_get_inode_block(root->ifile, ino, &bh);
436 if (unlikely(err)) 430 if (unlikely(err))
437 goto bad_inode; 431 goto bad_inode;
@@ -461,7 +455,7 @@ static int __nilfs_read_inode(struct super_block *sb,
461 } 455 }
462 nilfs_ifile_unmap_inode(root->ifile, ino, bh); 456 nilfs_ifile_unmap_inode(root->ifile, ino, bh);
463 brelse(bh); 457 brelse(bh);
464 up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ 458 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
465 nilfs_set_inode_flags(inode); 459 nilfs_set_inode_flags(inode);
466 return 0; 460 return 0;
467 461
@@ -470,7 +464,7 @@ static int __nilfs_read_inode(struct super_block *sb,
470 brelse(bh); 464 brelse(bh);
471 465
472 bad_inode: 466 bad_inode:
473 up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ 467 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
474 return err; 468 return err;
475} 469}
476 470
@@ -629,7 +623,7 @@ static void nilfs_truncate_bmap(struct nilfs_inode_info *ii,
629 623
630 if (!test_bit(NILFS_I_BMAP, &ii->i_state)) 624 if (!test_bit(NILFS_I_BMAP, &ii->i_state))
631 return; 625 return;
632 repeat: 626repeat:
633 ret = nilfs_bmap_last_key(ii->i_bmap, &b); 627 ret = nilfs_bmap_last_key(ii->i_bmap, &b);
634 if (ret == -ENOENT) 628 if (ret == -ENOENT)
635 return; 629 return;
@@ -646,14 +640,10 @@ static void nilfs_truncate_bmap(struct nilfs_inode_info *ii,
646 nilfs_bmap_truncate(ii->i_bmap, b) == 0)) 640 nilfs_bmap_truncate(ii->i_bmap, b) == 0))
647 goto repeat; 641 goto repeat;
648 642
649 failed: 643failed:
650 if (ret == -EINVAL) 644 nilfs_warning(ii->vfs_inode.i_sb, __func__,
651 nilfs_error(ii->vfs_inode.i_sb, __func__, 645 "failed to truncate bmap (ino=%lu, err=%d)",
652 "bmap is broken (ino=%lu)", ii->vfs_inode.i_ino); 646 ii->vfs_inode.i_ino, ret);
653 else
654 nilfs_warning(ii->vfs_inode.i_sb, __func__,
655 "failed to truncate bmap (ino=%lu, err=%d)",
656 ii->vfs_inode.i_ino, ret);
657} 647}
658 648
659void nilfs_truncate(struct inode *inode) 649void nilfs_truncate(struct inode *inode)
@@ -682,7 +672,7 @@ void nilfs_truncate(struct inode *inode)
682 nilfs_set_transaction_flag(NILFS_TI_SYNC); 672 nilfs_set_transaction_flag(NILFS_TI_SYNC);
683 673
684 nilfs_mark_inode_dirty(inode); 674 nilfs_mark_inode_dirty(inode);
685 nilfs_set_file_dirty(NILFS_SB(sb), inode, 0); 675 nilfs_set_file_dirty(inode, 0);
686 nilfs_transaction_commit(sb); 676 nilfs_transaction_commit(sb);
687 /* May construct a logical segment and may fail in sync mode. 677 /* May construct a logical segment and may fail in sync mode.
688 But truncate has no return value. */ 678 But truncate has no return value. */
@@ -800,9 +790,9 @@ int nilfs_permission(struct inode *inode, int mask, unsigned int flags)
800 return generic_permission(inode, mask, flags, NULL); 790 return generic_permission(inode, mask, flags, NULL);
801} 791}
802 792
803int nilfs_load_inode_block(struct nilfs_sb_info *sbi, struct inode *inode, 793int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
804 struct buffer_head **pbh)
805{ 794{
795 struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb);
806 struct nilfs_inode_info *ii = NILFS_I(inode); 796 struct nilfs_inode_info *ii = NILFS_I(inode);
807 int err; 797 int err;
808 798
@@ -843,9 +833,9 @@ int nilfs_inode_dirty(struct inode *inode)
843 return ret; 833 return ret;
844} 834}
845 835
846int nilfs_set_file_dirty(struct nilfs_sb_info *sbi, struct inode *inode, 836int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty)
847 unsigned nr_dirty)
848{ 837{
838 struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb);
849 struct nilfs_inode_info *ii = NILFS_I(inode); 839 struct nilfs_inode_info *ii = NILFS_I(inode);
850 840
851 atomic_add(nr_dirty, &sbi->s_nilfs->ns_ndirtyblks); 841 atomic_add(nr_dirty, &sbi->s_nilfs->ns_ndirtyblks);
@@ -878,11 +868,10 @@ int nilfs_set_file_dirty(struct nilfs_sb_info *sbi, struct inode *inode,
878 868
879int nilfs_mark_inode_dirty(struct inode *inode) 869int nilfs_mark_inode_dirty(struct inode *inode)
880{ 870{
881 struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb);
882 struct buffer_head *ibh; 871 struct buffer_head *ibh;
883 int err; 872 int err;
884 873
885 err = nilfs_load_inode_block(sbi, inode, &ibh); 874 err = nilfs_load_inode_block(inode, &ibh);
886 if (unlikely(err)) { 875 if (unlikely(err)) {
887 nilfs_warning(inode->i_sb, __func__, 876 nilfs_warning(inode->i_sb, __func__,
888 "failed to reget inode block.\n"); 877 "failed to reget inode block.\n");
@@ -924,3 +913,134 @@ void nilfs_dirty_inode(struct inode *inode)
924 nilfs_mark_inode_dirty(inode); 913 nilfs_mark_inode_dirty(inode);
925 nilfs_transaction_commit(inode->i_sb); /* never fails */ 914 nilfs_transaction_commit(inode->i_sb); /* never fails */
926} 915}
916
917int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
918 __u64 start, __u64 len)
919{
920 struct the_nilfs *nilfs = NILFS_I_NILFS(inode);
921 __u64 logical = 0, phys = 0, size = 0;
922 __u32 flags = 0;
923 loff_t isize;
924 sector_t blkoff, end_blkoff;
925 sector_t delalloc_blkoff;
926 unsigned long delalloc_blklen;
927 unsigned int blkbits = inode->i_blkbits;
928 int ret, n;
929
930 ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
931 if (ret)
932 return ret;
933
934 mutex_lock(&inode->i_mutex);
935
936 isize = i_size_read(inode);
937
938 blkoff = start >> blkbits;
939 end_blkoff = (start + len - 1) >> blkbits;
940
941 delalloc_blklen = nilfs_find_uncommitted_extent(inode, blkoff,
942 &delalloc_blkoff);
943
944 do {
945 __u64 blkphy;
946 unsigned int maxblocks;
947
948 if (delalloc_blklen && blkoff == delalloc_blkoff) {
949 if (size) {
950 /* End of the current extent */
951 ret = fiemap_fill_next_extent(
952 fieinfo, logical, phys, size, flags);
953 if (ret)
954 break;
955 }
956 if (blkoff > end_blkoff)
957 break;
958
959 flags = FIEMAP_EXTENT_MERGED | FIEMAP_EXTENT_DELALLOC;
960 logical = blkoff << blkbits;
961 phys = 0;
962 size = delalloc_blklen << blkbits;
963
964 blkoff = delalloc_blkoff + delalloc_blklen;
965 delalloc_blklen = nilfs_find_uncommitted_extent(
966 inode, blkoff, &delalloc_blkoff);
967 continue;
968 }
969
970 /*
971 * Limit the number of blocks that we look up so as
972 * not to get into the next delayed allocation extent.
973 */
974 maxblocks = INT_MAX;
975 if (delalloc_blklen)
976 maxblocks = min_t(sector_t, delalloc_blkoff - blkoff,
977 maxblocks);
978 blkphy = 0;
979
980 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
981 n = nilfs_bmap_lookup_contig(
982 NILFS_I(inode)->i_bmap, blkoff, &blkphy, maxblocks);
983 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
984
985 if (n < 0) {
986 int past_eof;
987
988 if (unlikely(n != -ENOENT))
989 break; /* error */
990
991 /* HOLE */
992 blkoff++;
993 past_eof = ((blkoff << blkbits) >= isize);
994
995 if (size) {
996 /* End of the current extent */
997
998 if (past_eof)
999 flags |= FIEMAP_EXTENT_LAST;
1000
1001 ret = fiemap_fill_next_extent(
1002 fieinfo, logical, phys, size, flags);
1003 if (ret)
1004 break;
1005 size = 0;
1006 }
1007 if (blkoff > end_blkoff || past_eof)
1008 break;
1009 } else {
1010 if (size) {
1011 if (phys && blkphy << blkbits == phys + size) {
1012 /* The current extent goes on */
1013 size += n << blkbits;
1014 } else {
1015 /* Terminate the current extent */
1016 ret = fiemap_fill_next_extent(
1017 fieinfo, logical, phys, size,
1018 flags);
1019 if (ret || blkoff > end_blkoff)
1020 break;
1021
1022 /* Start another extent */
1023 flags = FIEMAP_EXTENT_MERGED;
1024 logical = blkoff << blkbits;
1025 phys = blkphy << blkbits;
1026 size = n << blkbits;
1027 }
1028 } else {
1029 /* Start a new extent */
1030 flags = FIEMAP_EXTENT_MERGED;
1031 logical = blkoff << blkbits;
1032 phys = blkphy << blkbits;
1033 size = n << blkbits;
1034 }
1035 blkoff += n;
1036 }
1037 cond_resched();
1038 } while (true);
1039
1040 /* If ret is 1 then we just hit the end of the extent array */
1041 if (ret == 1)
1042 ret = 0;
1043
1044 mutex_unlock(&inode->i_mutex);
1045 return ret;
1046}
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index b185e937a335..496738963fdb 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -233,7 +233,7 @@ nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
233 int ret; 233 int ret;
234 234
235 down_read(&nilfs->ns_segctor_sem); 235 down_read(&nilfs->ns_segctor_sem);
236 ret = nilfs_dat_get_vinfo(nilfs_dat_inode(nilfs), buf, size, nmembs); 236 ret = nilfs_dat_get_vinfo(nilfs->ns_dat, buf, size, nmembs);
237 up_read(&nilfs->ns_segctor_sem); 237 up_read(&nilfs->ns_segctor_sem);
238 return ret; 238 return ret;
239} 239}
@@ -242,8 +242,7 @@ static ssize_t
242nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags, 242nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags,
243 void *buf, size_t size, size_t nmembs) 243 void *buf, size_t size, size_t nmembs)
244{ 244{
245 struct inode *dat = nilfs_dat_inode(nilfs); 245 struct nilfs_bmap *bmap = NILFS_I(nilfs->ns_dat)->i_bmap;
246 struct nilfs_bmap *bmap = NILFS_I(dat)->i_bmap;
247 struct nilfs_bdesc *bdescs = buf; 246 struct nilfs_bdesc *bdescs = buf;
248 int ret, i; 247 int ret, i;
249 248
@@ -421,7 +420,7 @@ static int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs,
421 size_t nmembs = argv->v_nmembs; 420 size_t nmembs = argv->v_nmembs;
422 int ret; 421 int ret;
423 422
424 ret = nilfs_dat_freev(nilfs_dat_inode(nilfs), buf, nmembs); 423 ret = nilfs_dat_freev(nilfs->ns_dat, buf, nmembs);
425 424
426 return (ret < 0) ? ret : nmembs; 425 return (ret < 0) ? ret : nmembs;
427} 426}
@@ -430,8 +429,7 @@ static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs,
430 struct nilfs_argv *argv, void *buf) 429 struct nilfs_argv *argv, void *buf)
431{ 430{
432 size_t nmembs = argv->v_nmembs; 431 size_t nmembs = argv->v_nmembs;
433 struct inode *dat = nilfs_dat_inode(nilfs); 432 struct nilfs_bmap *bmap = NILFS_I(nilfs->ns_dat)->i_bmap;
434 struct nilfs_bmap *bmap = NILFS_I(dat)->i_bmap;
435 struct nilfs_bdesc *bdescs = buf; 433 struct nilfs_bdesc *bdescs = buf;
436 int ret, i; 434 int ret, i;
437 435
@@ -450,7 +448,7 @@ static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs,
450 /* skip dead block */ 448 /* skip dead block */
451 continue; 449 continue;
452 if (bdescs[i].bd_level == 0) { 450 if (bdescs[i].bd_level == 0) {
453 ret = nilfs_mdt_mark_block_dirty(dat, 451 ret = nilfs_mdt_mark_block_dirty(nilfs->ns_dat,
454 bdescs[i].bd_offset); 452 bdescs[i].bd_offset);
455 if (ret < 0) { 453 if (ret < 0) {
456 WARN_ON(ret == -ENOENT); 454 WARN_ON(ret == -ENOENT);
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 39a5b84e2c9f..6a0e2a189f60 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -237,8 +237,6 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block,
237 * 237 *
238 * %-ENOENT - the specified block does not exist (hole block) 238 * %-ENOENT - the specified block does not exist (hole block)
239 * 239 *
240 * %-EINVAL - bmap is broken. (the caller should call nilfs_error())
241 *
242 * %-EROFS - Read only filesystem (for create mode) 240 * %-EROFS - Read only filesystem (for create mode)
243 */ 241 */
244int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create, 242int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create,
@@ -273,8 +271,6 @@ int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create,
273 * %-ENOMEM - Insufficient memory available. 271 * %-ENOMEM - Insufficient memory available.
274 * 272 *
275 * %-EIO - I/O error 273 * %-EIO - I/O error
276 *
277 * %-EINVAL - bmap is broken. (the caller should call nilfs_error())
278 */ 274 */
279int nilfs_mdt_delete_block(struct inode *inode, unsigned long block) 275int nilfs_mdt_delete_block(struct inode *inode, unsigned long block)
280{ 276{
@@ -350,8 +346,6 @@ int nilfs_mdt_forget_block(struct inode *inode, unsigned long block)
350 * %-EIO - I/O error 346 * %-EIO - I/O error
351 * 347 *
352 * %-ENOENT - the specified block does not exist (hole block) 348 * %-ENOENT - the specified block does not exist (hole block)
353 *
354 * %-EINVAL - bmap is broken. (the caller should call nilfs_error())
355 */ 349 */
356int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block) 350int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block)
357{ 351{
@@ -499,31 +493,29 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh)
499 struct buffer_head *bh_frozen; 493 struct buffer_head *bh_frozen;
500 struct page *page; 494 struct page *page;
501 int blkbits = inode->i_blkbits; 495 int blkbits = inode->i_blkbits;
502 int ret = -ENOMEM;
503 496
504 page = grab_cache_page(&shadow->frozen_data, bh->b_page->index); 497 page = grab_cache_page(&shadow->frozen_data, bh->b_page->index);
505 if (!page) 498 if (!page)
506 return ret; 499 return -ENOMEM;
507 500
508 if (!page_has_buffers(page)) 501 if (!page_has_buffers(page))
509 create_empty_buffers(page, 1 << blkbits, 0); 502 create_empty_buffers(page, 1 << blkbits, 0);
510 503
511 bh_frozen = nilfs_page_get_nth_block(page, bh_offset(bh) >> blkbits); 504 bh_frozen = nilfs_page_get_nth_block(page, bh_offset(bh) >> blkbits);
512 if (bh_frozen) { 505
513 if (!buffer_uptodate(bh_frozen)) 506 if (!buffer_uptodate(bh_frozen))
514 nilfs_copy_buffer(bh_frozen, bh); 507 nilfs_copy_buffer(bh_frozen, bh);
515 if (list_empty(&bh_frozen->b_assoc_buffers)) { 508 if (list_empty(&bh_frozen->b_assoc_buffers)) {
516 list_add_tail(&bh_frozen->b_assoc_buffers, 509 list_add_tail(&bh_frozen->b_assoc_buffers,
517 &shadow->frozen_buffers); 510 &shadow->frozen_buffers);
518 set_buffer_nilfs_redirected(bh); 511 set_buffer_nilfs_redirected(bh);
519 } else { 512 } else {
520 brelse(bh_frozen); /* already frozen */ 513 brelse(bh_frozen); /* already frozen */
521 }
522 ret = 0;
523 } 514 }
515
524 unlock_page(page); 516 unlock_page(page);
525 page_cache_release(page); 517 page_cache_release(page);
526 return ret; 518 return 0;
527} 519}
528 520
529struct buffer_head * 521struct buffer_head *
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 6e9557ecf161..98034271cd02 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -577,6 +577,7 @@ const struct inode_operations nilfs_dir_inode_operations = {
577 .rename = nilfs_rename, 577 .rename = nilfs_rename,
578 .setattr = nilfs_setattr, 578 .setattr = nilfs_setattr,
579 .permission = nilfs_permission, 579 .permission = nilfs_permission,
580 .fiemap = nilfs_fiemap,
580}; 581};
581 582
582const struct inode_operations nilfs_special_inode_operations = { 583const struct inode_operations nilfs_special_inode_operations = {
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 0ca98823db59..777e8fd04304 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -190,11 +190,6 @@ static inline int nilfs_doing_construction(void)
190 return nilfs_test_transaction_flag(NILFS_TI_WRITER); 190 return nilfs_test_transaction_flag(NILFS_TI_WRITER);
191} 191}
192 192
193static inline struct inode *nilfs_dat_inode(const struct the_nilfs *nilfs)
194{
195 return nilfs->ns_dat;
196}
197
198/* 193/*
199 * function prototype 194 * function prototype
200 */ 195 */
@@ -257,13 +252,13 @@ extern void nilfs_truncate(struct inode *);
257extern void nilfs_evict_inode(struct inode *); 252extern void nilfs_evict_inode(struct inode *);
258extern int nilfs_setattr(struct dentry *, struct iattr *); 253extern int nilfs_setattr(struct dentry *, struct iattr *);
259int nilfs_permission(struct inode *inode, int mask, unsigned int flags); 254int nilfs_permission(struct inode *inode, int mask, unsigned int flags);
260extern int nilfs_load_inode_block(struct nilfs_sb_info *, struct inode *, 255int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh);
261 struct buffer_head **);
262extern int nilfs_inode_dirty(struct inode *); 256extern int nilfs_inode_dirty(struct inode *);
263extern int nilfs_set_file_dirty(struct nilfs_sb_info *, struct inode *, 257int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty);
264 unsigned);
265extern int nilfs_mark_inode_dirty(struct inode *); 258extern int nilfs_mark_inode_dirty(struct inode *);
266extern void nilfs_dirty_inode(struct inode *); 259extern void nilfs_dirty_inode(struct inode *);
260int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
261 __u64 start, __u64 len);
267 262
268/* super.c */ 263/* super.c */
269extern struct inode *nilfs_alloc_inode(struct super_block *); 264extern struct inode *nilfs_alloc_inode(struct super_block *);
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index a6c3c2e817f8..0c432416cfef 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -491,7 +491,7 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
491 } 491 }
492 return nc; 492 return nc;
493} 493}
494 494
495void nilfs_mapping_init_once(struct address_space *mapping) 495void nilfs_mapping_init_once(struct address_space *mapping)
496{ 496{
497 memset(mapping, 0, sizeof(*mapping)); 497 memset(mapping, 0, sizeof(*mapping));
@@ -546,3 +546,87 @@ int __nilfs_clear_page_dirty(struct page *page)
546 } 546 }
547 return TestClearPageDirty(page); 547 return TestClearPageDirty(page);
548} 548}
549
550/**
551 * nilfs_find_uncommitted_extent - find extent of uncommitted data
552 * @inode: inode
553 * @start_blk: start block offset (in)
554 * @blkoff: start offset of the found extent (out)
555 *
556 * This function searches an extent of buffers marked "delayed" which
557 * starts from a block offset equal to or larger than @start_blk. If
558 * such an extent was found, this will store the start offset in
559 * @blkoff and return its length in blocks. Otherwise, zero is
560 * returned.
561 */
562unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
563 sector_t start_blk,
564 sector_t *blkoff)
565{
566 unsigned int i;
567 pgoff_t index;
568 unsigned int nblocks_in_page;
569 unsigned long length = 0;
570 sector_t b;
571 struct pagevec pvec;
572 struct page *page;
573
574 if (inode->i_mapping->nrpages == 0)
575 return 0;
576
577 index = start_blk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
578 nblocks_in_page = 1U << (PAGE_CACHE_SHIFT - inode->i_blkbits);
579
580 pagevec_init(&pvec, 0);
581
582repeat:
583 pvec.nr = find_get_pages_contig(inode->i_mapping, index, PAGEVEC_SIZE,
584 pvec.pages);
585 if (pvec.nr == 0)
586 return length;
587
588 if (length > 0 && pvec.pages[0]->index > index)
589 goto out;
590
591 b = pvec.pages[0]->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
592 i = 0;
593 do {
594 page = pvec.pages[i];
595
596 lock_page(page);
597 if (page_has_buffers(page)) {
598 struct buffer_head *bh, *head;
599
600 bh = head = page_buffers(page);
601 do {
602 if (b < start_blk)
603 continue;
604 if (buffer_delay(bh)) {
605 if (length == 0)
606 *blkoff = b;
607 length++;
608 } else if (length > 0) {
609 goto out_locked;
610 }
611 } while (++b, bh = bh->b_this_page, bh != head);
612 } else {
613 if (length > 0)
614 goto out_locked;
615
616 b += nblocks_in_page;
617 }
618 unlock_page(page);
619
620 } while (++i < pagevec_count(&pvec));
621
622 index = page->index + 1;
623 pagevec_release(&pvec);
624 cond_resched();
625 goto repeat;
626
627out_locked:
628 unlock_page(page);
629out:
630 pagevec_release(&pvec);
631 return length;
632}
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index fb9e8a8a2038..622df27cd891 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -66,6 +66,9 @@ void nilfs_mapping_init(struct address_space *mapping,
66 struct backing_dev_info *bdi, 66 struct backing_dev_info *bdi,
67 const struct address_space_operations *aops); 67 const struct address_space_operations *aops);
68unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned); 68unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned);
69unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
70 sector_t start_blk,
71 sector_t *blkoff);
69 72
70#define NILFS_PAGE_BUG(page, m, a...) \ 73#define NILFS_PAGE_BUG(page, m, a...) \
71 do { nilfs_page_bug(page); BUG(); } while (0) 74 do { nilfs_page_bug(page); BUG(); } while (0)
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 5d2711c28da7..3dfcd3b7d389 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -535,7 +535,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
535 if (unlikely(err)) 535 if (unlikely(err))
536 goto failed_page; 536 goto failed_page;
537 537
538 err = nilfs_set_file_dirty(sbi, inode, 1); 538 err = nilfs_set_file_dirty(inode, 1);
539 if (unlikely(err)) 539 if (unlikely(err))
540 goto failed_page; 540 goto failed_page;
541 541
diff --git a/fs/nilfs2/sb.h b/fs/nilfs2/sb.h
index 35a07157b980..7a17715f215f 100644
--- a/fs/nilfs2/sb.h
+++ b/fs/nilfs2/sb.h
@@ -27,14 +27,6 @@
27#include <linux/types.h> 27#include <linux/types.h>
28#include <linux/fs.h> 28#include <linux/fs.h>
29 29
30/*
31 * Mount options
32 */
33struct nilfs_mount_options {
34 unsigned long mount_opt;
35 __u64 snapshot_cno;
36};
37
38struct the_nilfs; 30struct the_nilfs;
39struct nilfs_sc_info; 31struct nilfs_sc_info;
40 32
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 687d090cea34..55ebae5c7f39 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -504,17 +504,6 @@ static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci,
504 return err; 504 return err;
505} 505}
506 506
507static int nilfs_handle_bmap_error(int err, const char *fname,
508 struct inode *inode, struct super_block *sb)
509{
510 if (err == -EINVAL) {
511 nilfs_error(sb, fname, "broken bmap (inode=%lu)\n",
512 inode->i_ino);
513 err = -EIO;
514 }
515 return err;
516}
517
518/* 507/*
519 * Callback functions that enumerate, mark, and collect dirty blocks 508 * Callback functions that enumerate, mark, and collect dirty blocks
520 */ 509 */
@@ -524,9 +513,8 @@ static int nilfs_collect_file_data(struct nilfs_sc_info *sci,
524 int err; 513 int err;
525 514
526 err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); 515 err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
527 if (unlikely(err < 0)) 516 if (err < 0)
528 return nilfs_handle_bmap_error(err, __func__, inode, 517 return err;
529 sci->sc_super);
530 518
531 err = nilfs_segctor_add_file_block(sci, bh, inode, 519 err = nilfs_segctor_add_file_block(sci, bh, inode,
532 sizeof(struct nilfs_binfo_v)); 520 sizeof(struct nilfs_binfo_v));
@@ -539,13 +527,7 @@ static int nilfs_collect_file_node(struct nilfs_sc_info *sci,
539 struct buffer_head *bh, 527 struct buffer_head *bh,
540 struct inode *inode) 528 struct inode *inode)
541{ 529{
542 int err; 530 return nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
543
544 err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
545 if (unlikely(err < 0))
546 return nilfs_handle_bmap_error(err, __func__, inode,
547 sci->sc_super);
548 return 0;
549} 531}
550 532
551static int nilfs_collect_file_bmap(struct nilfs_sc_info *sci, 533static int nilfs_collect_file_bmap(struct nilfs_sc_info *sci,
@@ -588,9 +570,8 @@ static int nilfs_collect_dat_data(struct nilfs_sc_info *sci,
588 int err; 570 int err;
589 571
590 err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); 572 err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
591 if (unlikely(err < 0)) 573 if (err < 0)
592 return nilfs_handle_bmap_error(err, __func__, inode, 574 return err;
593 sci->sc_super);
594 575
595 err = nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64)); 576 err = nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64));
596 if (!err) 577 if (!err)
@@ -776,9 +757,8 @@ static int nilfs_test_metadata_dirty(struct the_nilfs *nilfs,
776 ret++; 757 ret++;
777 if (nilfs_mdt_fetch_dirty(nilfs->ns_sufile)) 758 if (nilfs_mdt_fetch_dirty(nilfs->ns_sufile))
778 ret++; 759 ret++;
779 if (ret || nilfs_doing_gc()) 760 if ((ret || nilfs_doing_gc()) && nilfs_mdt_fetch_dirty(nilfs->ns_dat))
780 if (nilfs_mdt_fetch_dirty(nilfs_dat_inode(nilfs))) 761 ret++;
781 ret++;
782 return ret; 762 return ret;
783} 763}
784 764
@@ -814,7 +794,7 @@ static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci)
814 nilfs_mdt_clear_dirty(sci->sc_root->ifile); 794 nilfs_mdt_clear_dirty(sci->sc_root->ifile);
815 nilfs_mdt_clear_dirty(nilfs->ns_cpfile); 795 nilfs_mdt_clear_dirty(nilfs->ns_cpfile);
816 nilfs_mdt_clear_dirty(nilfs->ns_sufile); 796 nilfs_mdt_clear_dirty(nilfs->ns_sufile);
817 nilfs_mdt_clear_dirty(nilfs_dat_inode(nilfs)); 797 nilfs_mdt_clear_dirty(nilfs->ns_dat);
818} 798}
819 799
820static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci) 800static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci)
@@ -923,7 +903,7 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
923 nilfs->ns_nongc_ctime : sci->sc_seg_ctime); 903 nilfs->ns_nongc_ctime : sci->sc_seg_ctime);
924 raw_sr->sr_flags = 0; 904 raw_sr->sr_flags = 0;
925 905
926 nilfs_write_inode_common(nilfs_dat_inode(nilfs), (void *)raw_sr + 906 nilfs_write_inode_common(nilfs->ns_dat, (void *)raw_sr +
927 NILFS_SR_DAT_OFFSET(isz), 1); 907 NILFS_SR_DAT_OFFSET(isz), 1);
928 nilfs_write_inode_common(nilfs->ns_cpfile, (void *)raw_sr + 908 nilfs_write_inode_common(nilfs->ns_cpfile, (void *)raw_sr +
929 NILFS_SR_CPFILE_OFFSET(isz), 1); 909 NILFS_SR_CPFILE_OFFSET(isz), 1);
@@ -1179,7 +1159,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
1179 sci->sc_stage.scnt++; /* Fall through */ 1159 sci->sc_stage.scnt++; /* Fall through */
1180 case NILFS_ST_DAT: 1160 case NILFS_ST_DAT:
1181 dat_stage: 1161 dat_stage:
1182 err = nilfs_segctor_scan_file(sci, nilfs_dat_inode(nilfs), 1162 err = nilfs_segctor_scan_file(sci, nilfs->ns_dat,
1183 &nilfs_sc_dat_ops); 1163 &nilfs_sc_dat_ops);
1184 if (unlikely(err)) 1164 if (unlikely(err))
1185 break; 1165 break;
@@ -1563,7 +1543,6 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
1563 return 0; 1543 return 0;
1564 1544
1565 failed_bmap: 1545 failed_bmap:
1566 err = nilfs_handle_bmap_error(err, __func__, inode, sci->sc_super);
1567 return err; 1546 return err;
1568} 1547}
1569 1548
@@ -1783,6 +1762,7 @@ static void nilfs_clear_copied_buffers(struct list_head *list, int err)
1783 if (!err) { 1762 if (!err) {
1784 set_buffer_uptodate(bh); 1763 set_buffer_uptodate(bh);
1785 clear_buffer_dirty(bh); 1764 clear_buffer_dirty(bh);
1765 clear_buffer_delay(bh);
1786 clear_buffer_nilfs_volatile(bh); 1766 clear_buffer_nilfs_volatile(bh);
1787 } 1767 }
1788 brelse(bh); /* for b_assoc_buffers */ 1768 brelse(bh); /* for b_assoc_buffers */
@@ -1909,6 +1889,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
1909 b_assoc_buffers) { 1889 b_assoc_buffers) {
1910 set_buffer_uptodate(bh); 1890 set_buffer_uptodate(bh);
1911 clear_buffer_dirty(bh); 1891 clear_buffer_dirty(bh);
1892 clear_buffer_delay(bh);
1912 clear_buffer_nilfs_volatile(bh); 1893 clear_buffer_nilfs_volatile(bh);
1913 clear_buffer_nilfs_redirected(bh); 1894 clear_buffer_nilfs_redirected(bh);
1914 if (bh == segbuf->sb_super_root) { 1895 if (bh == segbuf->sb_super_root) {
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index e2dcc9c733f7..70dfdd532b83 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -47,7 +47,6 @@
47#include <linux/crc32.h> 47#include <linux/crc32.h>
48#include <linux/vfs.h> 48#include <linux/vfs.h>
49#include <linux/writeback.h> 49#include <linux/writeback.h>
50#include <linux/kobject.h>
51#include <linux/seq_file.h> 50#include <linux/seq_file.h>
52#include <linux/mount.h> 51#include <linux/mount.h>
53#include "nilfs.h" 52#include "nilfs.h"
@@ -111,12 +110,17 @@ void nilfs_error(struct super_block *sb, const char *function,
111 const char *fmt, ...) 110 const char *fmt, ...)
112{ 111{
113 struct nilfs_sb_info *sbi = NILFS_SB(sb); 112 struct nilfs_sb_info *sbi = NILFS_SB(sb);
113 struct va_format vaf;
114 va_list args; 114 va_list args;
115 115
116 va_start(args, fmt); 116 va_start(args, fmt);
117 printk(KERN_CRIT "NILFS error (device %s): %s: ", sb->s_id, function); 117
118 vprintk(fmt, args); 118 vaf.fmt = fmt;
119 printk("\n"); 119 vaf.va = &args;
120
121 printk(KERN_CRIT "NILFS error (device %s): %s: %pV\n",
122 sb->s_id, function, &vaf);
123
120 va_end(args); 124 va_end(args);
121 125
122 if (!(sb->s_flags & MS_RDONLY)) { 126 if (!(sb->s_flags & MS_RDONLY)) {
@@ -136,13 +140,17 @@ void nilfs_error(struct super_block *sb, const char *function,
136void nilfs_warning(struct super_block *sb, const char *function, 140void nilfs_warning(struct super_block *sb, const char *function,
137 const char *fmt, ...) 141 const char *fmt, ...)
138{ 142{
143 struct va_format vaf;
139 va_list args; 144 va_list args;
140 145
141 va_start(args, fmt); 146 va_start(args, fmt);
142 printk(KERN_WARNING "NILFS warning (device %s): %s: ", 147
143 sb->s_id, function); 148 vaf.fmt = fmt;
144 vprintk(fmt, args); 149 vaf.va = &args;
145 printk("\n"); 150
151 printk(KERN_WARNING "NILFS warning (device %s): %s: %pV\n",
152 sb->s_id, function, &vaf);
153
146 va_end(args); 154 va_end(args);
147} 155}
148 156
@@ -1010,11 +1018,11 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
1010 struct nilfs_sb_info *sbi = NILFS_SB(sb); 1018 struct nilfs_sb_info *sbi = NILFS_SB(sb);
1011 struct the_nilfs *nilfs = sbi->s_nilfs; 1019 struct the_nilfs *nilfs = sbi->s_nilfs;
1012 unsigned long old_sb_flags; 1020 unsigned long old_sb_flags;
1013 struct nilfs_mount_options old_opts; 1021 unsigned long old_mount_opt;
1014 int err; 1022 int err;
1015 1023
1016 old_sb_flags = sb->s_flags; 1024 old_sb_flags = sb->s_flags;
1017 old_opts.mount_opt = sbi->s_mount_opt; 1025 old_mount_opt = sbi->s_mount_opt;
1018 1026
1019 if (!parse_options(data, sb, 1)) { 1027 if (!parse_options(data, sb, 1)) {
1020 err = -EINVAL; 1028 err = -EINVAL;
@@ -1083,7 +1091,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
1083 1091
1084 restore_opts: 1092 restore_opts:
1085 sb->s_flags = old_sb_flags; 1093 sb->s_flags = old_sb_flags;
1086 sbi->s_mount_opt = old_opts.mount_opt; 1094 sbi->s_mount_opt = old_mount_opt;
1087 return err; 1095 return err;
1088} 1096}
1089 1097
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 0254be2d73c6..ad4ac607cf57 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -329,7 +329,6 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
329 printk(KERN_INFO "NILFS: recovery complete.\n"); 329 printk(KERN_INFO "NILFS: recovery complete.\n");
330 330
331 skip_recovery: 331 skip_recovery:
332 set_nilfs_loaded(nilfs);
333 nilfs_clear_recovery_info(&ri); 332 nilfs_clear_recovery_info(&ri);
334 sbi->s_super->s_flags = s_flags; 333 sbi->s_super->s_flags = s_flags;
335 return 0; 334 return 0;
@@ -651,12 +650,11 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump,
651 650
652int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks) 651int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks)
653{ 652{
654 struct inode *dat = nilfs_dat_inode(nilfs);
655 unsigned long ncleansegs; 653 unsigned long ncleansegs;
656 654
657 down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ 655 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
658 ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile); 656 ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile);
659 up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ 657 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
660 *nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment; 658 *nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment;
661 return 0; 659 return 0;
662} 660}
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index 69226e14b745..fd85e4c05c6b 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -36,8 +36,6 @@
36/* the_nilfs struct */ 36/* the_nilfs struct */
37enum { 37enum {
38 THE_NILFS_INIT = 0, /* Information from super_block is set */ 38 THE_NILFS_INIT = 0, /* Information from super_block is set */
39 THE_NILFS_LOADED, /* Roll-back/roll-forward has done and
40 the latest checkpoint was loaded */
41 THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */ 39 THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */
42 THE_NILFS_GC_RUNNING, /* gc process is running */ 40 THE_NILFS_GC_RUNNING, /* gc process is running */
43 THE_NILFS_SB_DIRTY, /* super block is dirty */ 41 THE_NILFS_SB_DIRTY, /* super block is dirty */
@@ -178,7 +176,6 @@ static inline int nilfs_##name(struct the_nilfs *nilfs) \
178} 176}
179 177
180THE_NILFS_FNS(INIT, init) 178THE_NILFS_FNS(INIT, init)
181THE_NILFS_FNS(LOADED, loaded)
182THE_NILFS_FNS(DISCONTINUED, discontinued) 179THE_NILFS_FNS(DISCONTINUED, discontinued)
183THE_NILFS_FNS(GC_RUNNING, gc_running) 180THE_NILFS_FNS(GC_RUNNING, gc_running)
184THE_NILFS_FNS(SB_DIRTY, sb_dirty) 181THE_NILFS_FNS(SB_DIRTY, sb_dirty)
diff --git a/fs/ocfs2/Kconfig b/fs/ocfs2/Kconfig
index 0d840669698e..ab152c00cd3a 100644
--- a/fs/ocfs2/Kconfig
+++ b/fs/ocfs2/Kconfig
@@ -51,7 +51,7 @@ config OCFS2_FS_USERSPACE_CLUSTER
51 51
52config OCFS2_FS_STATS 52config OCFS2_FS_STATS
53 bool "OCFS2 statistics" 53 bool "OCFS2 statistics"
54 depends on OCFS2_FS 54 depends on OCFS2_FS && DEBUG_FS
55 default y 55 default y
56 help 56 help
57 This option allows some fs statistics to be captured. Enabling 57 This option allows some fs statistics to be captured. Enabling
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 592fae5007d1..e4984e259cb6 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -565,7 +565,6 @@ static inline int ocfs2_et_sanity_check(struct ocfs2_extent_tree *et)
565 return ret; 565 return ret;
566} 566}
567 567
568static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc);
569static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt, 568static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
570 struct ocfs2_extent_block *eb); 569 struct ocfs2_extent_block *eb);
571static void ocfs2_adjust_rightmost_records(handle_t *handle, 570static void ocfs2_adjust_rightmost_records(handle_t *handle,
@@ -5858,6 +5857,7 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
5858 5857
5859 ocfs2_journal_dirty(handle, tl_bh); 5858 ocfs2_journal_dirty(handle, tl_bh);
5860 5859
5860 osb->truncated_clusters += num_clusters;
5861bail: 5861bail:
5862 mlog_exit(status); 5862 mlog_exit(status);
5863 return status; 5863 return status;
@@ -5929,6 +5929,8 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
5929 i--; 5929 i--;
5930 } 5930 }
5931 5931
5932 osb->truncated_clusters = 0;
5933
5932bail: 5934bail:
5933 mlog_exit(status); 5935 mlog_exit(status);
5934 return status; 5936 return status;
@@ -7139,64 +7141,6 @@ bail:
7139} 7141}
7140 7142
7141/* 7143/*
7142 * Expects the inode to already be locked.
7143 */
7144int ocfs2_prepare_truncate(struct ocfs2_super *osb,
7145 struct inode *inode,
7146 struct buffer_head *fe_bh,
7147 struct ocfs2_truncate_context **tc)
7148{
7149 int status;
7150 unsigned int new_i_clusters;
7151 struct ocfs2_dinode *fe;
7152 struct ocfs2_extent_block *eb;
7153 struct buffer_head *last_eb_bh = NULL;
7154
7155 mlog_entry_void();
7156
7157 *tc = NULL;
7158
7159 new_i_clusters = ocfs2_clusters_for_bytes(osb->sb,
7160 i_size_read(inode));
7161 fe = (struct ocfs2_dinode *) fe_bh->b_data;
7162
7163 mlog(0, "fe->i_clusters = %u, new_i_clusters = %u, fe->i_size ="
7164 "%llu\n", le32_to_cpu(fe->i_clusters), new_i_clusters,
7165 (unsigned long long)le64_to_cpu(fe->i_size));
7166
7167 *tc = kzalloc(sizeof(struct ocfs2_truncate_context), GFP_KERNEL);
7168 if (!(*tc)) {
7169 status = -ENOMEM;
7170 mlog_errno(status);
7171 goto bail;
7172 }
7173 ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc);
7174
7175 if (fe->id2.i_list.l_tree_depth) {
7176 status = ocfs2_read_extent_block(INODE_CACHE(inode),
7177 le64_to_cpu(fe->i_last_eb_blk),
7178 &last_eb_bh);
7179 if (status < 0) {
7180 mlog_errno(status);
7181 goto bail;
7182 }
7183 eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
7184 }
7185
7186 (*tc)->tc_last_eb_bh = last_eb_bh;
7187
7188 status = 0;
7189bail:
7190 if (status < 0) {
7191 if (*tc)
7192 ocfs2_free_truncate_context(*tc);
7193 *tc = NULL;
7194 }
7195 mlog_exit_void();
7196 return status;
7197}
7198
7199/*
7200 * 'start' is inclusive, 'end' is not. 7144 * 'start' is inclusive, 'end' is not.
7201 */ 7145 */
7202int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh, 7146int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
@@ -7270,18 +7214,3 @@ out_commit:
7270out: 7214out:
7271 return ret; 7215 return ret;
7272} 7216}
7273
7274static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc)
7275{
7276 /*
7277 * The caller is responsible for completing deallocation
7278 * before freeing the context.
7279 */
7280 if (tc->tc_dealloc.c_first_suballocator != NULL)
7281 mlog(ML_NOTICE,
7282 "Truncate completion has non-empty dealloc context\n");
7283
7284 brelse(tc->tc_last_eb_bh);
7285
7286 kfree(tc);
7287}
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 55762b554b99..3bd08a03251c 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -228,10 +228,6 @@ struct ocfs2_truncate_context {
228 228
229int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle, 229int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
230 u64 range_start, u64 range_end); 230 u64 range_start, u64 range_end);
231int ocfs2_prepare_truncate(struct ocfs2_super *osb,
232 struct inode *inode,
233 struct buffer_head *fe_bh,
234 struct ocfs2_truncate_context **tc);
235int ocfs2_commit_truncate(struct ocfs2_super *osb, 231int ocfs2_commit_truncate(struct ocfs2_super *osb,
236 struct inode *inode, 232 struct inode *inode,
237 struct buffer_head *di_bh); 233 struct buffer_head *di_bh);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 0d7c5540ad66..1fbb0e20131b 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1630,6 +1630,43 @@ static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
1630 return ret; 1630 return ret;
1631} 1631}
1632 1632
1633/*
1634 * Try to flush truncate logs if we can free enough clusters from it.
1635 * As for return value, "< 0" means error, "0" no space and "1" means
1636 * we have freed enough spaces and let the caller try to allocate again.
1637 */
1638static int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
1639 unsigned int needed)
1640{
1641 tid_t target;
1642 int ret = 0;
1643 unsigned int truncated_clusters;
1644
1645 mutex_lock(&osb->osb_tl_inode->i_mutex);
1646 truncated_clusters = osb->truncated_clusters;
1647 mutex_unlock(&osb->osb_tl_inode->i_mutex);
1648
1649 /*
1650 * Check whether we can succeed in allocating if we free
1651 * the truncate log.
1652 */
1653 if (truncated_clusters < needed)
1654 goto out;
1655
1656 ret = ocfs2_flush_truncate_log(osb);
1657 if (ret) {
1658 mlog_errno(ret);
1659 goto out;
1660 }
1661
1662 if (jbd2_journal_start_commit(osb->journal->j_journal, &target)) {
1663 jbd2_log_wait_commit(osb->journal->j_journal, target);
1664 ret = 1;
1665 }
1666out:
1667 return ret;
1668}
1669
1633int ocfs2_write_begin_nolock(struct file *filp, 1670int ocfs2_write_begin_nolock(struct file *filp,
1634 struct address_space *mapping, 1671 struct address_space *mapping,
1635 loff_t pos, unsigned len, unsigned flags, 1672 loff_t pos, unsigned len, unsigned flags,
@@ -1637,7 +1674,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
1637 struct buffer_head *di_bh, struct page *mmap_page) 1674 struct buffer_head *di_bh, struct page *mmap_page)
1638{ 1675{
1639 int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS; 1676 int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS;
1640 unsigned int clusters_to_alloc, extents_to_split; 1677 unsigned int clusters_to_alloc, extents_to_split, clusters_need = 0;
1641 struct ocfs2_write_ctxt *wc; 1678 struct ocfs2_write_ctxt *wc;
1642 struct inode *inode = mapping->host; 1679 struct inode *inode = mapping->host;
1643 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1680 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -1646,7 +1683,9 @@ int ocfs2_write_begin_nolock(struct file *filp,
1646 struct ocfs2_alloc_context *meta_ac = NULL; 1683 struct ocfs2_alloc_context *meta_ac = NULL;
1647 handle_t *handle; 1684 handle_t *handle;
1648 struct ocfs2_extent_tree et; 1685 struct ocfs2_extent_tree et;
1686 int try_free = 1, ret1;
1649 1687
1688try_again:
1650 ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh); 1689 ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh);
1651 if (ret) { 1690 if (ret) {
1652 mlog_errno(ret); 1691 mlog_errno(ret);
@@ -1681,6 +1720,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
1681 mlog_errno(ret); 1720 mlog_errno(ret);
1682 goto out; 1721 goto out;
1683 } else if (ret == 1) { 1722 } else if (ret == 1) {
1723 clusters_need = wc->w_clen;
1684 ret = ocfs2_refcount_cow(inode, filp, di_bh, 1724 ret = ocfs2_refcount_cow(inode, filp, di_bh,
1685 wc->w_cpos, wc->w_clen, UINT_MAX); 1725 wc->w_cpos, wc->w_clen, UINT_MAX);
1686 if (ret) { 1726 if (ret) {
@@ -1695,6 +1735,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
1695 mlog_errno(ret); 1735 mlog_errno(ret);
1696 goto out; 1736 goto out;
1697 } 1737 }
1738 clusters_need += clusters_to_alloc;
1698 1739
1699 di = (struct ocfs2_dinode *)wc->w_di_bh->b_data; 1740 di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
1700 1741
@@ -1817,6 +1858,22 @@ out:
1817 ocfs2_free_alloc_context(data_ac); 1858 ocfs2_free_alloc_context(data_ac);
1818 if (meta_ac) 1859 if (meta_ac)
1819 ocfs2_free_alloc_context(meta_ac); 1860 ocfs2_free_alloc_context(meta_ac);
1861
1862 if (ret == -ENOSPC && try_free) {
1863 /*
1864 * Try to free some truncate log so that we can have enough
1865 * clusters to allocate.
1866 */
1867 try_free = 0;
1868
1869 ret1 = ocfs2_try_to_free_truncate_log(osb, clusters_need);
1870 if (ret1 == 1)
1871 goto try_again;
1872
1873 if (ret1 < 0)
1874 mlog_errno(ret1);
1875 }
1876
1820 return ret; 1877 return ret;
1821} 1878}
1822 1879
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 9e3d45bcb5fd..a6cc05302e9f 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -82,6 +82,7 @@ static unsigned long o2hb_failed_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)];
82#define O2HB_DB_TYPE_REGION_LIVENODES 4 82#define O2HB_DB_TYPE_REGION_LIVENODES 4
83#define O2HB_DB_TYPE_REGION_NUMBER 5 83#define O2HB_DB_TYPE_REGION_NUMBER 5
84#define O2HB_DB_TYPE_REGION_ELAPSED_TIME 6 84#define O2HB_DB_TYPE_REGION_ELAPSED_TIME 6
85#define O2HB_DB_TYPE_REGION_PINNED 7
85struct o2hb_debug_buf { 86struct o2hb_debug_buf {
86 int db_type; 87 int db_type;
87 int db_size; 88 int db_size;
@@ -101,6 +102,7 @@ static struct o2hb_debug_buf *o2hb_db_failedregions;
101#define O2HB_DEBUG_FAILEDREGIONS "failed_regions" 102#define O2HB_DEBUG_FAILEDREGIONS "failed_regions"
102#define O2HB_DEBUG_REGION_NUMBER "num" 103#define O2HB_DEBUG_REGION_NUMBER "num"
103#define O2HB_DEBUG_REGION_ELAPSED_TIME "elapsed_time_in_ms" 104#define O2HB_DEBUG_REGION_ELAPSED_TIME "elapsed_time_in_ms"
105#define O2HB_DEBUG_REGION_PINNED "pinned"
104 106
105static struct dentry *o2hb_debug_dir; 107static struct dentry *o2hb_debug_dir;
106static struct dentry *o2hb_debug_livenodes; 108static struct dentry *o2hb_debug_livenodes;
@@ -132,6 +134,33 @@ char *o2hb_heartbeat_mode_desc[O2HB_HEARTBEAT_NUM_MODES] = {
132unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD; 134unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD;
133unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL; 135unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL;
134 136
137/*
138 * o2hb_dependent_users tracks the number of registered callbacks that depend
139 * on heartbeat. o2net and o2dlm are two entities that register this callback.
140 * However only o2dlm depends on the heartbeat. It does not want the heartbeat
141 * to stop while a dlm domain is still active.
142 */
143unsigned int o2hb_dependent_users;
144
145/*
146 * In global heartbeat mode, all regions are pinned if there are one or more
147 * dependent users and the quorum region count is <= O2HB_PIN_CUT_OFF. All
148 * regions are unpinned if the region count exceeds the cut off or the number
149 * of dependent users falls to zero.
150 */
151#define O2HB_PIN_CUT_OFF 3
152
153/*
154 * In local heartbeat mode, we assume the dlm domain name to be the same as
155 * region uuid. This is true for domains created for the file system but not
156 * necessarily true for userdlm domains. This is a known limitation.
157 *
158 * In global heartbeat mode, we pin/unpin all o2hb regions. This solution
159 * works for both file system and userdlm domains.
160 */
161static int o2hb_region_pin(const char *region_uuid);
162static void o2hb_region_unpin(const char *region_uuid);
163
135/* Only sets a new threshold if there are no active regions. 164/* Only sets a new threshold if there are no active regions.
136 * 165 *
137 * No locking or otherwise interesting code is required for reading 166 * No locking or otherwise interesting code is required for reading
@@ -186,7 +215,9 @@ struct o2hb_region {
186 struct config_item hr_item; 215 struct config_item hr_item;
187 216
188 struct list_head hr_all_item; 217 struct list_head hr_all_item;
189 unsigned hr_unclean_stop:1; 218 unsigned hr_unclean_stop:1,
219 hr_item_pinned:1,
220 hr_item_dropped:1;
190 221
191 /* protected by the hr_callback_sem */ 222 /* protected by the hr_callback_sem */
192 struct task_struct *hr_task; 223 struct task_struct *hr_task;
@@ -212,9 +243,11 @@ struct o2hb_region {
212 struct dentry *hr_debug_livenodes; 243 struct dentry *hr_debug_livenodes;
213 struct dentry *hr_debug_regnum; 244 struct dentry *hr_debug_regnum;
214 struct dentry *hr_debug_elapsed_time; 245 struct dentry *hr_debug_elapsed_time;
246 struct dentry *hr_debug_pinned;
215 struct o2hb_debug_buf *hr_db_livenodes; 247 struct o2hb_debug_buf *hr_db_livenodes;
216 struct o2hb_debug_buf *hr_db_regnum; 248 struct o2hb_debug_buf *hr_db_regnum;
217 struct o2hb_debug_buf *hr_db_elapsed_time; 249 struct o2hb_debug_buf *hr_db_elapsed_time;
250 struct o2hb_debug_buf *hr_db_pinned;
218 251
219 /* let the person setting up hb wait for it to return until it 252 /* let the person setting up hb wait for it to return until it
220 * has reached a 'steady' state. This will be fixed when we have 253 * has reached a 'steady' state. This will be fixed when we have
@@ -701,6 +734,14 @@ static void o2hb_set_quorum_device(struct o2hb_region *reg,
701 config_item_name(&reg->hr_item)); 734 config_item_name(&reg->hr_item));
702 735
703 set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap); 736 set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
737
738 /*
739 * If global heartbeat active, unpin all regions if the
740 * region count > CUT_OFF
741 */
742 if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
743 O2NM_MAX_REGIONS) > O2HB_PIN_CUT_OFF)
744 o2hb_region_unpin(NULL);
704} 745}
705 746
706static int o2hb_check_slot(struct o2hb_region *reg, 747static int o2hb_check_slot(struct o2hb_region *reg,
@@ -1041,6 +1082,9 @@ static int o2hb_thread(void *data)
1041 1082
1042 set_user_nice(current, -20); 1083 set_user_nice(current, -20);
1043 1084
1085 /* Pin node */
1086 o2nm_depend_this_node();
1087
1044 while (!kthread_should_stop() && !reg->hr_unclean_stop) { 1088 while (!kthread_should_stop() && !reg->hr_unclean_stop) {
1045 /* We track the time spent inside 1089 /* We track the time spent inside
1046 * o2hb_do_disk_heartbeat so that we avoid more than 1090 * o2hb_do_disk_heartbeat so that we avoid more than
@@ -1090,6 +1134,9 @@ static int o2hb_thread(void *data)
1090 mlog_errno(ret); 1134 mlog_errno(ret);
1091 } 1135 }
1092 1136
1137 /* Unpin node */
1138 o2nm_undepend_this_node();
1139
1093 mlog(ML_HEARTBEAT|ML_KTHREAD, "hb thread exiting\n"); 1140 mlog(ML_HEARTBEAT|ML_KTHREAD, "hb thread exiting\n");
1094 1141
1095 return 0; 1142 return 0;
@@ -1142,6 +1189,12 @@ static int o2hb_debug_open(struct inode *inode, struct file *file)
1142 reg->hr_last_timeout_start)); 1189 reg->hr_last_timeout_start));
1143 goto done; 1190 goto done;
1144 1191
1192 case O2HB_DB_TYPE_REGION_PINNED:
1193 reg = (struct o2hb_region *)db->db_data;
1194 out += snprintf(buf + out, PAGE_SIZE - out, "%u\n",
1195 !!reg->hr_item_pinned);
1196 goto done;
1197
1145 default: 1198 default:
1146 goto done; 1199 goto done;
1147 } 1200 }
@@ -1315,6 +1368,8 @@ int o2hb_init(void)
1315 memset(o2hb_quorum_region_bitmap, 0, sizeof(o2hb_quorum_region_bitmap)); 1368 memset(o2hb_quorum_region_bitmap, 0, sizeof(o2hb_quorum_region_bitmap));
1316 memset(o2hb_failed_region_bitmap, 0, sizeof(o2hb_failed_region_bitmap)); 1369 memset(o2hb_failed_region_bitmap, 0, sizeof(o2hb_failed_region_bitmap));
1317 1370
1371 o2hb_dependent_users = 0;
1372
1318 return o2hb_debug_init(); 1373 return o2hb_debug_init();
1319} 1374}
1320 1375
@@ -1384,6 +1439,7 @@ static void o2hb_region_release(struct config_item *item)
1384 debugfs_remove(reg->hr_debug_livenodes); 1439 debugfs_remove(reg->hr_debug_livenodes);
1385 debugfs_remove(reg->hr_debug_regnum); 1440 debugfs_remove(reg->hr_debug_regnum);
1386 debugfs_remove(reg->hr_debug_elapsed_time); 1441 debugfs_remove(reg->hr_debug_elapsed_time);
1442 debugfs_remove(reg->hr_debug_pinned);
1387 debugfs_remove(reg->hr_debug_dir); 1443 debugfs_remove(reg->hr_debug_dir);
1388 1444
1389 spin_lock(&o2hb_live_lock); 1445 spin_lock(&o2hb_live_lock);
@@ -1948,6 +2004,18 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir)
1948 goto bail; 2004 goto bail;
1949 } 2005 }
1950 2006
2007 reg->hr_debug_pinned =
2008 o2hb_debug_create(O2HB_DEBUG_REGION_PINNED,
2009 reg->hr_debug_dir,
2010 &(reg->hr_db_pinned),
2011 sizeof(*(reg->hr_db_pinned)),
2012 O2HB_DB_TYPE_REGION_PINNED,
2013 0, 0, reg);
2014 if (!reg->hr_debug_pinned) {
2015 mlog_errno(ret);
2016 goto bail;
2017 }
2018
1951 ret = 0; 2019 ret = 0;
1952bail: 2020bail:
1953 return ret; 2021 return ret;
@@ -2002,15 +2070,20 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
2002{ 2070{
2003 struct task_struct *hb_task; 2071 struct task_struct *hb_task;
2004 struct o2hb_region *reg = to_o2hb_region(item); 2072 struct o2hb_region *reg = to_o2hb_region(item);
2073 int quorum_region = 0;
2005 2074
2006 /* stop the thread when the user removes the region dir */ 2075 /* stop the thread when the user removes the region dir */
2007 spin_lock(&o2hb_live_lock); 2076 spin_lock(&o2hb_live_lock);
2008 if (o2hb_global_heartbeat_active()) { 2077 if (o2hb_global_heartbeat_active()) {
2009 clear_bit(reg->hr_region_num, o2hb_region_bitmap); 2078 clear_bit(reg->hr_region_num, o2hb_region_bitmap);
2010 clear_bit(reg->hr_region_num, o2hb_live_region_bitmap); 2079 clear_bit(reg->hr_region_num, o2hb_live_region_bitmap);
2080 if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap))
2081 quorum_region = 1;
2082 clear_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
2011 } 2083 }
2012 hb_task = reg->hr_task; 2084 hb_task = reg->hr_task;
2013 reg->hr_task = NULL; 2085 reg->hr_task = NULL;
2086 reg->hr_item_dropped = 1;
2014 spin_unlock(&o2hb_live_lock); 2087 spin_unlock(&o2hb_live_lock);
2015 2088
2016 if (hb_task) 2089 if (hb_task)
@@ -2028,7 +2101,27 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
2028 if (o2hb_global_heartbeat_active()) 2101 if (o2hb_global_heartbeat_active())
2029 printk(KERN_NOTICE "o2hb: Heartbeat stopped on region %s\n", 2102 printk(KERN_NOTICE "o2hb: Heartbeat stopped on region %s\n",
2030 config_item_name(&reg->hr_item)); 2103 config_item_name(&reg->hr_item));
2104
2031 config_item_put(item); 2105 config_item_put(item);
2106
2107 if (!o2hb_global_heartbeat_active() || !quorum_region)
2108 return;
2109
2110 /*
2111 * If global heartbeat active and there are dependent users,
2112 * pin all regions if quorum region count <= CUT_OFF
2113 */
2114 spin_lock(&o2hb_live_lock);
2115
2116 if (!o2hb_dependent_users)
2117 goto unlock;
2118
2119 if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
2120 O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF)
2121 o2hb_region_pin(NULL);
2122
2123unlock:
2124 spin_unlock(&o2hb_live_lock);
2032} 2125}
2033 2126
2034struct o2hb_heartbeat_group_attribute { 2127struct o2hb_heartbeat_group_attribute {
@@ -2214,63 +2307,138 @@ void o2hb_setup_callback(struct o2hb_callback_func *hc,
2214} 2307}
2215EXPORT_SYMBOL_GPL(o2hb_setup_callback); 2308EXPORT_SYMBOL_GPL(o2hb_setup_callback);
2216 2309
2217static struct o2hb_region *o2hb_find_region(const char *region_uuid) 2310/*
2311 * In local heartbeat mode, region_uuid passed matches the dlm domain name.
2312 * In global heartbeat mode, region_uuid passed is NULL.
2313 *
2314 * In local, we only pin the matching region. In global we pin all the active
2315 * regions.
2316 */
2317static int o2hb_region_pin(const char *region_uuid)
2218{ 2318{
2219 struct o2hb_region *p, *reg = NULL; 2319 int ret = 0, found = 0;
2320 struct o2hb_region *reg;
2321 char *uuid;
2220 2322
2221 assert_spin_locked(&o2hb_live_lock); 2323 assert_spin_locked(&o2hb_live_lock);
2222 2324
2223 list_for_each_entry(p, &o2hb_all_regions, hr_all_item) { 2325 list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) {
2224 if (!strcmp(region_uuid, config_item_name(&p->hr_item))) { 2326 uuid = config_item_name(&reg->hr_item);
2225 reg = p; 2327
2226 break; 2328 /* local heartbeat */
2329 if (region_uuid) {
2330 if (strcmp(region_uuid, uuid))
2331 continue;
2332 found = 1;
2333 }
2334
2335 if (reg->hr_item_pinned || reg->hr_item_dropped)
2336 goto skip_pin;
2337
2338 /* Ignore ENOENT only for local hb (userdlm domain) */
2339 ret = o2nm_depend_item(&reg->hr_item);
2340 if (!ret) {
2341 mlog(ML_CLUSTER, "Pin region %s\n", uuid);
2342 reg->hr_item_pinned = 1;
2343 } else {
2344 if (ret == -ENOENT && found)
2345 ret = 0;
2346 else {
2347 mlog(ML_ERROR, "Pin region %s fails with %d\n",
2348 uuid, ret);
2349 break;
2350 }
2227 } 2351 }
2352skip_pin:
2353 if (found)
2354 break;
2228 } 2355 }
2229 2356
2230 return reg; 2357 return ret;
2231} 2358}
2232 2359
2233static int o2hb_region_get(const char *region_uuid) 2360/*
2361 * In local heartbeat mode, region_uuid passed matches the dlm domain name.
2362 * In global heartbeat mode, region_uuid passed is NULL.
2363 *
2364 * In local, we only unpin the matching region. In global we unpin all the
2365 * active regions.
2366 */
2367static void o2hb_region_unpin(const char *region_uuid)
2234{ 2368{
2235 int ret = 0;
2236 struct o2hb_region *reg; 2369 struct o2hb_region *reg;
2370 char *uuid;
2371 int found = 0;
2237 2372
2238 spin_lock(&o2hb_live_lock); 2373 assert_spin_locked(&o2hb_live_lock);
2239 2374
2240 reg = o2hb_find_region(region_uuid); 2375 list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) {
2241 if (!reg) 2376 uuid = config_item_name(&reg->hr_item);
2242 ret = -ENOENT; 2377 if (region_uuid) {
2243 spin_unlock(&o2hb_live_lock); 2378 if (strcmp(region_uuid, uuid))
2379 continue;
2380 found = 1;
2381 }
2244 2382
2245 if (ret) 2383 if (reg->hr_item_pinned) {
2246 goto out; 2384 mlog(ML_CLUSTER, "Unpin region %s\n", uuid);
2385 o2nm_undepend_item(&reg->hr_item);
2386 reg->hr_item_pinned = 0;
2387 }
2388 if (found)
2389 break;
2390 }
2391}
2247 2392
2248 ret = o2nm_depend_this_node(); 2393static int o2hb_region_inc_user(const char *region_uuid)
2249 if (ret) 2394{
2250 goto out; 2395 int ret = 0;
2251 2396
2252 ret = o2nm_depend_item(&reg->hr_item); 2397 spin_lock(&o2hb_live_lock);
2253 if (ret)
2254 o2nm_undepend_this_node();
2255 2398
2256out: 2399 /* local heartbeat */
2400 if (!o2hb_global_heartbeat_active()) {
2401 ret = o2hb_region_pin(region_uuid);
2402 goto unlock;
2403 }
2404
2405 /*
2406 * if global heartbeat active and this is the first dependent user,
2407 * pin all regions if quorum region count <= CUT_OFF
2408 */
2409 o2hb_dependent_users++;
2410 if (o2hb_dependent_users > 1)
2411 goto unlock;
2412
2413 if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
2414 O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF)
2415 ret = o2hb_region_pin(NULL);
2416
2417unlock:
2418 spin_unlock(&o2hb_live_lock);
2257 return ret; 2419 return ret;
2258} 2420}
2259 2421
2260static void o2hb_region_put(const char *region_uuid) 2422void o2hb_region_dec_user(const char *region_uuid)
2261{ 2423{
2262 struct o2hb_region *reg;
2263
2264 spin_lock(&o2hb_live_lock); 2424 spin_lock(&o2hb_live_lock);
2265 2425
2266 reg = o2hb_find_region(region_uuid); 2426 /* local heartbeat */
2427 if (!o2hb_global_heartbeat_active()) {
2428 o2hb_region_unpin(region_uuid);
2429 goto unlock;
2430 }
2267 2431
2268 spin_unlock(&o2hb_live_lock); 2432 /*
2433 * if global heartbeat active and there are no dependent users,
2434 * unpin all quorum regions
2435 */
2436 o2hb_dependent_users--;
2437 if (!o2hb_dependent_users)
2438 o2hb_region_unpin(NULL);
2269 2439
2270 if (reg) { 2440unlock:
2271 o2nm_undepend_item(&reg->hr_item); 2441 spin_unlock(&o2hb_live_lock);
2272 o2nm_undepend_this_node();
2273 }
2274} 2442}
2275 2443
2276int o2hb_register_callback(const char *region_uuid, 2444int o2hb_register_callback(const char *region_uuid,
@@ -2291,9 +2459,11 @@ int o2hb_register_callback(const char *region_uuid,
2291 } 2459 }
2292 2460
2293 if (region_uuid) { 2461 if (region_uuid) {
2294 ret = o2hb_region_get(region_uuid); 2462 ret = o2hb_region_inc_user(region_uuid);
2295 if (ret) 2463 if (ret) {
2464 mlog_errno(ret);
2296 goto out; 2465 goto out;
2466 }
2297 } 2467 }
2298 2468
2299 down_write(&o2hb_callback_sem); 2469 down_write(&o2hb_callback_sem);
@@ -2311,7 +2481,7 @@ int o2hb_register_callback(const char *region_uuid,
2311 up_write(&o2hb_callback_sem); 2481 up_write(&o2hb_callback_sem);
2312 ret = 0; 2482 ret = 0;
2313out: 2483out:
2314 mlog(ML_HEARTBEAT, "returning %d on behalf of %p for funcs %p\n", 2484 mlog(ML_CLUSTER, "returning %d on behalf of %p for funcs %p\n",
2315 ret, __builtin_return_address(0), hc); 2485 ret, __builtin_return_address(0), hc);
2316 return ret; 2486 return ret;
2317} 2487}
@@ -2322,7 +2492,7 @@ void o2hb_unregister_callback(const char *region_uuid,
2322{ 2492{
2323 BUG_ON(hc->hc_magic != O2HB_CB_MAGIC); 2493 BUG_ON(hc->hc_magic != O2HB_CB_MAGIC);
2324 2494
2325 mlog(ML_HEARTBEAT, "on behalf of %p for funcs %p\n", 2495 mlog(ML_CLUSTER, "on behalf of %p for funcs %p\n",
2326 __builtin_return_address(0), hc); 2496 __builtin_return_address(0), hc);
2327 2497
2328 /* XXX Can this happen _with_ a region reference? */ 2498 /* XXX Can this happen _with_ a region reference? */
@@ -2330,7 +2500,7 @@ void o2hb_unregister_callback(const char *region_uuid,
2330 return; 2500 return;
2331 2501
2332 if (region_uuid) 2502 if (region_uuid)
2333 o2hb_region_put(region_uuid); 2503 o2hb_region_dec_user(region_uuid);
2334 2504
2335 down_write(&o2hb_callback_sem); 2505 down_write(&o2hb_callback_sem);
2336 2506
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index a3f150e52b02..3a5835904b3d 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -46,10 +46,15 @@
46#define O2NET_DEBUG_DIR "o2net" 46#define O2NET_DEBUG_DIR "o2net"
47#define SC_DEBUG_NAME "sock_containers" 47#define SC_DEBUG_NAME "sock_containers"
48#define NST_DEBUG_NAME "send_tracking" 48#define NST_DEBUG_NAME "send_tracking"
49#define STATS_DEBUG_NAME "stats"
50
51#define SHOW_SOCK_CONTAINERS 0
52#define SHOW_SOCK_STATS 1
49 53
50static struct dentry *o2net_dentry; 54static struct dentry *o2net_dentry;
51static struct dentry *sc_dentry; 55static struct dentry *sc_dentry;
52static struct dentry *nst_dentry; 56static struct dentry *nst_dentry;
57static struct dentry *stats_dentry;
53 58
54static DEFINE_SPINLOCK(o2net_debug_lock); 59static DEFINE_SPINLOCK(o2net_debug_lock);
55 60
@@ -123,37 +128,42 @@ static void *nst_seq_next(struct seq_file *seq, void *v, loff_t *pos)
123static int nst_seq_show(struct seq_file *seq, void *v) 128static int nst_seq_show(struct seq_file *seq, void *v)
124{ 129{
125 struct o2net_send_tracking *nst, *dummy_nst = seq->private; 130 struct o2net_send_tracking *nst, *dummy_nst = seq->private;
131 ktime_t now;
132 s64 sock, send, status;
126 133
127 spin_lock(&o2net_debug_lock); 134 spin_lock(&o2net_debug_lock);
128 nst = next_nst(dummy_nst); 135 nst = next_nst(dummy_nst);
136 if (!nst)
137 goto out;
129 138
130 if (nst != NULL) { 139 now = ktime_get();
131 /* get_task_comm isn't exported. oh well. */ 140 sock = ktime_to_us(ktime_sub(now, nst->st_sock_time));
132 seq_printf(seq, "%p:\n" 141 send = ktime_to_us(ktime_sub(now, nst->st_send_time));
133 " pid: %lu\n" 142 status = ktime_to_us(ktime_sub(now, nst->st_status_time));
134 " tgid: %lu\n" 143
135 " process name: %s\n" 144 /* get_task_comm isn't exported. oh well. */
136 " node: %u\n" 145 seq_printf(seq, "%p:\n"
137 " sc: %p\n" 146 " pid: %lu\n"
138 " message id: %d\n" 147 " tgid: %lu\n"
139 " message type: %u\n" 148 " process name: %s\n"
140 " message key: 0x%08x\n" 149 " node: %u\n"
141 " sock acquiry: %lu.%ld\n" 150 " sc: %p\n"
142 " send start: %lu.%ld\n" 151 " message id: %d\n"
143 " wait start: %lu.%ld\n", 152 " message type: %u\n"
144 nst, (unsigned long)nst->st_task->pid, 153 " message key: 0x%08x\n"
145 (unsigned long)nst->st_task->tgid, 154 " sock acquiry: %lld usecs ago\n"
146 nst->st_task->comm, nst->st_node, 155 " send start: %lld usecs ago\n"
147 nst->st_sc, nst->st_id, nst->st_msg_type, 156 " wait start: %lld usecs ago\n",
148 nst->st_msg_key, 157 nst, (unsigned long)task_pid_nr(nst->st_task),
149 nst->st_sock_time.tv_sec, 158 (unsigned long)nst->st_task->tgid,
150 (long)nst->st_sock_time.tv_usec, 159 nst->st_task->comm, nst->st_node,
151 nst->st_send_time.tv_sec, 160 nst->st_sc, nst->st_id, nst->st_msg_type,
152 (long)nst->st_send_time.tv_usec, 161 nst->st_msg_key,
153 nst->st_status_time.tv_sec, 162 (long long)sock,
154 (long)nst->st_status_time.tv_usec); 163 (long long)send,
155 } 164 (long long)status);
156 165
166out:
157 spin_unlock(&o2net_debug_lock); 167 spin_unlock(&o2net_debug_lock);
158 168
159 return 0; 169 return 0;
@@ -228,6 +238,11 @@ void o2net_debug_del_sc(struct o2net_sock_container *sc)
228 spin_unlock(&o2net_debug_lock); 238 spin_unlock(&o2net_debug_lock);
229} 239}
230 240
241struct o2net_sock_debug {
242 int dbg_ctxt;
243 struct o2net_sock_container *dbg_sock;
244};
245
231static struct o2net_sock_container 246static struct o2net_sock_container
232 *next_sc(struct o2net_sock_container *sc_start) 247 *next_sc(struct o2net_sock_container *sc_start)
233{ 248{
@@ -253,7 +268,8 @@ static struct o2net_sock_container
253 268
254static void *sc_seq_start(struct seq_file *seq, loff_t *pos) 269static void *sc_seq_start(struct seq_file *seq, loff_t *pos)
255{ 270{
256 struct o2net_sock_container *sc, *dummy_sc = seq->private; 271 struct o2net_sock_debug *sd = seq->private;
272 struct o2net_sock_container *sc, *dummy_sc = sd->dbg_sock;
257 273
258 spin_lock(&o2net_debug_lock); 274 spin_lock(&o2net_debug_lock);
259 sc = next_sc(dummy_sc); 275 sc = next_sc(dummy_sc);
@@ -264,7 +280,8 @@ static void *sc_seq_start(struct seq_file *seq, loff_t *pos)
264 280
265static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos) 281static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
266{ 282{
267 struct o2net_sock_container *sc, *dummy_sc = seq->private; 283 struct o2net_sock_debug *sd = seq->private;
284 struct o2net_sock_container *sc, *dummy_sc = sd->dbg_sock;
268 285
269 spin_lock(&o2net_debug_lock); 286 spin_lock(&o2net_debug_lock);
270 sc = next_sc(dummy_sc); 287 sc = next_sc(dummy_sc);
@@ -276,65 +293,107 @@ static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
276 return sc; /* unused, just needs to be null when done */ 293 return sc; /* unused, just needs to be null when done */
277} 294}
278 295
279#define TV_SEC_USEC(TV) TV.tv_sec, (long)TV.tv_usec 296#ifdef CONFIG_OCFS2_FS_STATS
297# define sc_send_count(_s) ((_s)->sc_send_count)
298# define sc_recv_count(_s) ((_s)->sc_recv_count)
299# define sc_tv_acquiry_total_ns(_s) (ktime_to_ns((_s)->sc_tv_acquiry_total))
300# define sc_tv_send_total_ns(_s) (ktime_to_ns((_s)->sc_tv_send_total))
301# define sc_tv_status_total_ns(_s) (ktime_to_ns((_s)->sc_tv_status_total))
302# define sc_tv_process_total_ns(_s) (ktime_to_ns((_s)->sc_tv_process_total))
303#else
304# define sc_send_count(_s) (0U)
305# define sc_recv_count(_s) (0U)
306# define sc_tv_acquiry_total_ns(_s) (0LL)
307# define sc_tv_send_total_ns(_s) (0LL)
308# define sc_tv_status_total_ns(_s) (0LL)
309# define sc_tv_process_total_ns(_s) (0LL)
310#endif
311
312/* So that debugfs.ocfs2 can determine which format is being used */
313#define O2NET_STATS_STR_VERSION 1
314static void sc_show_sock_stats(struct seq_file *seq,
315 struct o2net_sock_container *sc)
316{
317 if (!sc)
318 return;
319
320 seq_printf(seq, "%d,%u,%lu,%lld,%lld,%lld,%lu,%lld\n", O2NET_STATS_STR_VERSION,
321 sc->sc_node->nd_num, (unsigned long)sc_send_count(sc),
322 (long long)sc_tv_acquiry_total_ns(sc),
323 (long long)sc_tv_send_total_ns(sc),
324 (long long)sc_tv_status_total_ns(sc),
325 (unsigned long)sc_recv_count(sc),
326 (long long)sc_tv_process_total_ns(sc));
327}
328
329static void sc_show_sock_container(struct seq_file *seq,
330 struct o2net_sock_container *sc)
331{
332 struct inet_sock *inet = NULL;
333 __be32 saddr = 0, daddr = 0;
334 __be16 sport = 0, dport = 0;
335
336 if (!sc)
337 return;
338
339 if (sc->sc_sock) {
340 inet = inet_sk(sc->sc_sock->sk);
341 /* the stack's structs aren't sparse endian clean */
342 saddr = (__force __be32)inet->inet_saddr;
343 daddr = (__force __be32)inet->inet_daddr;
344 sport = (__force __be16)inet->inet_sport;
345 dport = (__force __be16)inet->inet_dport;
346 }
347
348 /* XXX sigh, inet-> doesn't have sparse annotation so any
349 * use of it here generates a warning with -Wbitwise */
350 seq_printf(seq, "%p:\n"
351 " krefs: %d\n"
352 " sock: %pI4:%u -> "
353 "%pI4:%u\n"
354 " remote node: %s\n"
355 " page off: %zu\n"
356 " handshake ok: %u\n"
357 " timer: %lld usecs\n"
358 " data ready: %lld usecs\n"
359 " advance start: %lld usecs\n"
360 " advance stop: %lld usecs\n"
361 " func start: %lld usecs\n"
362 " func stop: %lld usecs\n"
363 " func key: 0x%08x\n"
364 " func type: %u\n",
365 sc,
366 atomic_read(&sc->sc_kref.refcount),
367 &saddr, inet ? ntohs(sport) : 0,
368 &daddr, inet ? ntohs(dport) : 0,
369 sc->sc_node->nd_name,
370 sc->sc_page_off,
371 sc->sc_handshake_ok,
372 (long long)ktime_to_us(sc->sc_tv_timer),
373 (long long)ktime_to_us(sc->sc_tv_data_ready),
374 (long long)ktime_to_us(sc->sc_tv_advance_start),
375 (long long)ktime_to_us(sc->sc_tv_advance_stop),
376 (long long)ktime_to_us(sc->sc_tv_func_start),
377 (long long)ktime_to_us(sc->sc_tv_func_stop),
378 sc->sc_msg_key,
379 sc->sc_msg_type);
380}
280 381
281static int sc_seq_show(struct seq_file *seq, void *v) 382static int sc_seq_show(struct seq_file *seq, void *v)
282{ 383{
283 struct o2net_sock_container *sc, *dummy_sc = seq->private; 384 struct o2net_sock_debug *sd = seq->private;
385 struct o2net_sock_container *sc, *dummy_sc = sd->dbg_sock;
284 386
285 spin_lock(&o2net_debug_lock); 387 spin_lock(&o2net_debug_lock);
286 sc = next_sc(dummy_sc); 388 sc = next_sc(dummy_sc);
287 389
288 if (sc != NULL) { 390 if (sc) {
289 struct inet_sock *inet = NULL; 391 if (sd->dbg_ctxt == SHOW_SOCK_CONTAINERS)
290 392 sc_show_sock_container(seq, sc);
291 __be32 saddr = 0, daddr = 0; 393 else
292 __be16 sport = 0, dport = 0; 394 sc_show_sock_stats(seq, sc);
293
294 if (sc->sc_sock) {
295 inet = inet_sk(sc->sc_sock->sk);
296 /* the stack's structs aren't sparse endian clean */
297 saddr = (__force __be32)inet->inet_saddr;
298 daddr = (__force __be32)inet->inet_daddr;
299 sport = (__force __be16)inet->inet_sport;
300 dport = (__force __be16)inet->inet_dport;
301 }
302
303 /* XXX sigh, inet-> doesn't have sparse annotation so any
304 * use of it here generates a warning with -Wbitwise */
305 seq_printf(seq, "%p:\n"
306 " krefs: %d\n"
307 " sock: %pI4:%u -> "
308 "%pI4:%u\n"
309 " remote node: %s\n"
310 " page off: %zu\n"
311 " handshake ok: %u\n"
312 " timer: %lu.%ld\n"
313 " data ready: %lu.%ld\n"
314 " advance start: %lu.%ld\n"
315 " advance stop: %lu.%ld\n"
316 " func start: %lu.%ld\n"
317 " func stop: %lu.%ld\n"
318 " func key: %u\n"
319 " func type: %u\n",
320 sc,
321 atomic_read(&sc->sc_kref.refcount),
322 &saddr, inet ? ntohs(sport) : 0,
323 &daddr, inet ? ntohs(dport) : 0,
324 sc->sc_node->nd_name,
325 sc->sc_page_off,
326 sc->sc_handshake_ok,
327 TV_SEC_USEC(sc->sc_tv_timer),
328 TV_SEC_USEC(sc->sc_tv_data_ready),
329 TV_SEC_USEC(sc->sc_tv_advance_start),
330 TV_SEC_USEC(sc->sc_tv_advance_stop),
331 TV_SEC_USEC(sc->sc_tv_func_start),
332 TV_SEC_USEC(sc->sc_tv_func_stop),
333 sc->sc_msg_key,
334 sc->sc_msg_type);
335 } 395 }
336 396
337
338 spin_unlock(&o2net_debug_lock); 397 spin_unlock(&o2net_debug_lock);
339 398
340 return 0; 399 return 0;
@@ -351,7 +410,7 @@ static const struct seq_operations sc_seq_ops = {
351 .show = sc_seq_show, 410 .show = sc_seq_show,
352}; 411};
353 412
354static int sc_fop_open(struct inode *inode, struct file *file) 413static int sc_common_open(struct file *file, struct o2net_sock_debug *sd)
355{ 414{
356 struct o2net_sock_container *dummy_sc; 415 struct o2net_sock_container *dummy_sc;
357 struct seq_file *seq; 416 struct seq_file *seq;
@@ -369,7 +428,8 @@ static int sc_fop_open(struct inode *inode, struct file *file)
369 goto out; 428 goto out;
370 429
371 seq = file->private_data; 430 seq = file->private_data;
372 seq->private = dummy_sc; 431 seq->private = sd;
432 sd->dbg_sock = dummy_sc;
373 o2net_debug_add_sc(dummy_sc); 433 o2net_debug_add_sc(dummy_sc);
374 434
375 dummy_sc = NULL; 435 dummy_sc = NULL;
@@ -382,12 +442,48 @@ out:
382static int sc_fop_release(struct inode *inode, struct file *file) 442static int sc_fop_release(struct inode *inode, struct file *file)
383{ 443{
384 struct seq_file *seq = file->private_data; 444 struct seq_file *seq = file->private_data;
385 struct o2net_sock_container *dummy_sc = seq->private; 445 struct o2net_sock_debug *sd = seq->private;
446 struct o2net_sock_container *dummy_sc = sd->dbg_sock;
386 447
387 o2net_debug_del_sc(dummy_sc); 448 o2net_debug_del_sc(dummy_sc);
388 return seq_release_private(inode, file); 449 return seq_release_private(inode, file);
389} 450}
390 451
452static int stats_fop_open(struct inode *inode, struct file *file)
453{
454 struct o2net_sock_debug *sd;
455
456 sd = kmalloc(sizeof(struct o2net_sock_debug), GFP_KERNEL);
457 if (sd == NULL)
458 return -ENOMEM;
459
460 sd->dbg_ctxt = SHOW_SOCK_STATS;
461 sd->dbg_sock = NULL;
462
463 return sc_common_open(file, sd);
464}
465
466static const struct file_operations stats_seq_fops = {
467 .open = stats_fop_open,
468 .read = seq_read,
469 .llseek = seq_lseek,
470 .release = sc_fop_release,
471};
472
473static int sc_fop_open(struct inode *inode, struct file *file)
474{
475 struct o2net_sock_debug *sd;
476
477 sd = kmalloc(sizeof(struct o2net_sock_debug), GFP_KERNEL);
478 if (sd == NULL)
479 return -ENOMEM;
480
481 sd->dbg_ctxt = SHOW_SOCK_CONTAINERS;
482 sd->dbg_sock = NULL;
483
484 return sc_common_open(file, sd);
485}
486
391static const struct file_operations sc_seq_fops = { 487static const struct file_operations sc_seq_fops = {
392 .open = sc_fop_open, 488 .open = sc_fop_open,
393 .read = seq_read, 489 .read = seq_read,
@@ -419,25 +515,29 @@ int o2net_debugfs_init(void)
419 goto bail; 515 goto bail;
420 } 516 }
421 517
518 stats_dentry = debugfs_create_file(STATS_DEBUG_NAME, S_IFREG|S_IRUSR,
519 o2net_dentry, NULL,
520 &stats_seq_fops);
521 if (!stats_dentry) {
522 mlog_errno(-ENOMEM);
523 goto bail;
524 }
525
422 return 0; 526 return 0;
423bail: 527bail:
424 if (sc_dentry) 528 debugfs_remove(stats_dentry);
425 debugfs_remove(sc_dentry); 529 debugfs_remove(sc_dentry);
426 if (nst_dentry) 530 debugfs_remove(nst_dentry);
427 debugfs_remove(nst_dentry); 531 debugfs_remove(o2net_dentry);
428 if (o2net_dentry)
429 debugfs_remove(o2net_dentry);
430 return -ENOMEM; 532 return -ENOMEM;
431} 533}
432 534
433void o2net_debugfs_exit(void) 535void o2net_debugfs_exit(void)
434{ 536{
435 if (sc_dentry) 537 debugfs_remove(stats_dentry);
436 debugfs_remove(sc_dentry); 538 debugfs_remove(sc_dentry);
437 if (nst_dentry) 539 debugfs_remove(nst_dentry);
438 debugfs_remove(nst_dentry); 540 debugfs_remove(o2net_dentry);
439 if (o2net_dentry)
440 debugfs_remove(o2net_dentry);
441} 541}
442 542
443#endif /* CONFIG_DEBUG_FS */ 543#endif /* CONFIG_DEBUG_FS */
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 9aa426e42123..3b11cb1e38fc 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -153,63 +153,114 @@ static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
153 nst->st_node = node; 153 nst->st_node = node;
154} 154}
155 155
156static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) 156static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
157{ 157{
158 do_gettimeofday(&nst->st_sock_time); 158 nst->st_sock_time = ktime_get();
159} 159}
160 160
161static void o2net_set_nst_send_time(struct o2net_send_tracking *nst) 161static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
162{ 162{
163 do_gettimeofday(&nst->st_send_time); 163 nst->st_send_time = ktime_get();
164} 164}
165 165
166static void o2net_set_nst_status_time(struct o2net_send_tracking *nst) 166static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
167{ 167{
168 do_gettimeofday(&nst->st_status_time); 168 nst->st_status_time = ktime_get();
169} 169}
170 170
171static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, 171static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
172 struct o2net_sock_container *sc) 172 struct o2net_sock_container *sc)
173{ 173{
174 nst->st_sc = sc; 174 nst->st_sc = sc;
175} 175}
176 176
177static void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id) 177static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst,
178 u32 msg_id)
178{ 179{
179 nst->st_id = msg_id; 180 nst->st_id = msg_id;
180} 181}
181 182
182#else /* CONFIG_DEBUG_FS */ 183static inline void o2net_set_sock_timer(struct o2net_sock_container *sc)
183
184static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
185 u32 msgkey, struct task_struct *task, u8 node)
186{ 184{
185 sc->sc_tv_timer = ktime_get();
187} 186}
188 187
189static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) 188static inline void o2net_set_data_ready_time(struct o2net_sock_container *sc)
190{ 189{
190 sc->sc_tv_data_ready = ktime_get();
191} 191}
192 192
193static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst) 193static inline void o2net_set_advance_start_time(struct o2net_sock_container *sc)
194{ 194{
195 sc->sc_tv_advance_start = ktime_get();
195} 196}
196 197
197static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst) 198static inline void o2net_set_advance_stop_time(struct o2net_sock_container *sc)
198{ 199{
200 sc->sc_tv_advance_stop = ktime_get();
199} 201}
200 202
201static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, 203static inline void o2net_set_func_start_time(struct o2net_sock_container *sc)
202 struct o2net_sock_container *sc)
203{ 204{
205 sc->sc_tv_func_start = ktime_get();
204} 206}
205 207
206static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, 208static inline void o2net_set_func_stop_time(struct o2net_sock_container *sc)
207 u32 msg_id)
208{ 209{
210 sc->sc_tv_func_stop = ktime_get();
209} 211}
210 212
213static ktime_t o2net_get_func_run_time(struct o2net_sock_container *sc)
214{
215 return ktime_sub(sc->sc_tv_func_stop, sc->sc_tv_func_start);
216}
217#else /* CONFIG_DEBUG_FS */
218# define o2net_init_nst(a, b, c, d, e)
219# define o2net_set_nst_sock_time(a)
220# define o2net_set_nst_send_time(a)
221# define o2net_set_nst_status_time(a)
222# define o2net_set_nst_sock_container(a, b)
223# define o2net_set_nst_msg_id(a, b)
224# define o2net_set_sock_timer(a)
225# define o2net_set_data_ready_time(a)
226# define o2net_set_advance_start_time(a)
227# define o2net_set_advance_stop_time(a)
228# define o2net_set_func_start_time(a)
229# define o2net_set_func_stop_time(a)
230# define o2net_get_func_run_time(a) (ktime_t)0
211#endif /* CONFIG_DEBUG_FS */ 231#endif /* CONFIG_DEBUG_FS */
212 232
233#ifdef CONFIG_OCFS2_FS_STATS
234static void o2net_update_send_stats(struct o2net_send_tracking *nst,
235 struct o2net_sock_container *sc)
236{
237 sc->sc_tv_status_total = ktime_add(sc->sc_tv_status_total,
238 ktime_sub(ktime_get(),
239 nst->st_status_time));
240 sc->sc_tv_send_total = ktime_add(sc->sc_tv_send_total,
241 ktime_sub(nst->st_status_time,
242 nst->st_send_time));
243 sc->sc_tv_acquiry_total = ktime_add(sc->sc_tv_acquiry_total,
244 ktime_sub(nst->st_send_time,
245 nst->st_sock_time));
246 sc->sc_send_count++;
247}
248
249static void o2net_update_recv_stats(struct o2net_sock_container *sc)
250{
251 sc->sc_tv_process_total = ktime_add(sc->sc_tv_process_total,
252 o2net_get_func_run_time(sc));
253 sc->sc_recv_count++;
254}
255
256#else
257
258# define o2net_update_send_stats(a, b)
259
260# define o2net_update_recv_stats(sc)
261
262#endif /* CONFIG_OCFS2_FS_STATS */
263
213static inline int o2net_reconnect_delay(void) 264static inline int o2net_reconnect_delay(void)
214{ 265{
215 return o2nm_single_cluster->cl_reconnect_delay_ms; 266 return o2nm_single_cluster->cl_reconnect_delay_ms;
@@ -355,6 +406,7 @@ static void sc_kref_release(struct kref *kref)
355 sc->sc_sock = NULL; 406 sc->sc_sock = NULL;
356 } 407 }
357 408
409 o2nm_undepend_item(&sc->sc_node->nd_item);
358 o2nm_node_put(sc->sc_node); 410 o2nm_node_put(sc->sc_node);
359 sc->sc_node = NULL; 411 sc->sc_node = NULL;
360 412
@@ -376,6 +428,7 @@ static struct o2net_sock_container *sc_alloc(struct o2nm_node *node)
376{ 428{
377 struct o2net_sock_container *sc, *ret = NULL; 429 struct o2net_sock_container *sc, *ret = NULL;
378 struct page *page = NULL; 430 struct page *page = NULL;
431 int status = 0;
379 432
380 page = alloc_page(GFP_NOFS); 433 page = alloc_page(GFP_NOFS);
381 sc = kzalloc(sizeof(*sc), GFP_NOFS); 434 sc = kzalloc(sizeof(*sc), GFP_NOFS);
@@ -386,6 +439,13 @@ static struct o2net_sock_container *sc_alloc(struct o2nm_node *node)
386 o2nm_node_get(node); 439 o2nm_node_get(node);
387 sc->sc_node = node; 440 sc->sc_node = node;
388 441
442 /* pin the node item of the remote node */
443 status = o2nm_depend_item(&node->nd_item);
444 if (status) {
445 mlog_errno(status);
446 o2nm_node_put(node);
447 goto out;
448 }
389 INIT_WORK(&sc->sc_connect_work, o2net_sc_connect_completed); 449 INIT_WORK(&sc->sc_connect_work, o2net_sc_connect_completed);
390 INIT_WORK(&sc->sc_rx_work, o2net_rx_until_empty); 450 INIT_WORK(&sc->sc_rx_work, o2net_rx_until_empty);
391 INIT_WORK(&sc->sc_shutdown_work, o2net_shutdown_sc); 451 INIT_WORK(&sc->sc_shutdown_work, o2net_shutdown_sc);
@@ -546,7 +606,7 @@ static void o2net_data_ready(struct sock *sk, int bytes)
546 if (sk->sk_user_data) { 606 if (sk->sk_user_data) {
547 struct o2net_sock_container *sc = sk->sk_user_data; 607 struct o2net_sock_container *sc = sk->sk_user_data;
548 sclog(sc, "data_ready hit\n"); 608 sclog(sc, "data_ready hit\n");
549 do_gettimeofday(&sc->sc_tv_data_ready); 609 o2net_set_data_ready_time(sc);
550 o2net_sc_queue_work(sc, &sc->sc_rx_work); 610 o2net_sc_queue_work(sc, &sc->sc_rx_work);
551 ready = sc->sc_data_ready; 611 ready = sc->sc_data_ready;
552 } else { 612 } else {
@@ -1070,6 +1130,8 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
1070 o2net_set_nst_status_time(&nst); 1130 o2net_set_nst_status_time(&nst);
1071 wait_event(nsw.ns_wq, o2net_nsw_completed(nn, &nsw)); 1131 wait_event(nsw.ns_wq, o2net_nsw_completed(nn, &nsw));
1072 1132
1133 o2net_update_send_stats(&nst, sc);
1134
1073 /* Note that we avoid overwriting the callers status return 1135 /* Note that we avoid overwriting the callers status return
1074 * variable if a system error was reported on the other 1136 * variable if a system error was reported on the other
1075 * side. Callers beware. */ 1137 * side. Callers beware. */
@@ -1183,13 +1245,15 @@ static int o2net_process_message(struct o2net_sock_container *sc,
1183 if (syserr != O2NET_ERR_NONE) 1245 if (syserr != O2NET_ERR_NONE)
1184 goto out_respond; 1246 goto out_respond;
1185 1247
1186 do_gettimeofday(&sc->sc_tv_func_start); 1248 o2net_set_func_start_time(sc);
1187 sc->sc_msg_key = be32_to_cpu(hdr->key); 1249 sc->sc_msg_key = be32_to_cpu(hdr->key);
1188 sc->sc_msg_type = be16_to_cpu(hdr->msg_type); 1250 sc->sc_msg_type = be16_to_cpu(hdr->msg_type);
1189 handler_status = (nmh->nh_func)(hdr, sizeof(struct o2net_msg) + 1251 handler_status = (nmh->nh_func)(hdr, sizeof(struct o2net_msg) +
1190 be16_to_cpu(hdr->data_len), 1252 be16_to_cpu(hdr->data_len),
1191 nmh->nh_func_data, &ret_data); 1253 nmh->nh_func_data, &ret_data);
1192 do_gettimeofday(&sc->sc_tv_func_stop); 1254 o2net_set_func_stop_time(sc);
1255
1256 o2net_update_recv_stats(sc);
1193 1257
1194out_respond: 1258out_respond:
1195 /* this destroys the hdr, so don't use it after this */ 1259 /* this destroys the hdr, so don't use it after this */
@@ -1300,7 +1364,7 @@ static int o2net_advance_rx(struct o2net_sock_container *sc)
1300 size_t datalen; 1364 size_t datalen;
1301 1365
1302 sclog(sc, "receiving\n"); 1366 sclog(sc, "receiving\n");
1303 do_gettimeofday(&sc->sc_tv_advance_start); 1367 o2net_set_advance_start_time(sc);
1304 1368
1305 if (unlikely(sc->sc_handshake_ok == 0)) { 1369 if (unlikely(sc->sc_handshake_ok == 0)) {
1306 if(sc->sc_page_off < sizeof(struct o2net_handshake)) { 1370 if(sc->sc_page_off < sizeof(struct o2net_handshake)) {
@@ -1375,7 +1439,7 @@ static int o2net_advance_rx(struct o2net_sock_container *sc)
1375 1439
1376out: 1440out:
1377 sclog(sc, "ret = %d\n", ret); 1441 sclog(sc, "ret = %d\n", ret);
1378 do_gettimeofday(&sc->sc_tv_advance_stop); 1442 o2net_set_advance_stop_time(sc);
1379 return ret; 1443 return ret;
1380} 1444}
1381 1445
@@ -1475,27 +1539,28 @@ static void o2net_idle_timer(unsigned long data)
1475{ 1539{
1476 struct o2net_sock_container *sc = (struct o2net_sock_container *)data; 1540 struct o2net_sock_container *sc = (struct o2net_sock_container *)data;
1477 struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); 1541 struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num);
1478 struct timeval now;
1479 1542
1480 do_gettimeofday(&now); 1543#ifdef CONFIG_DEBUG_FS
1544 ktime_t now = ktime_get();
1545#endif
1481 1546
1482 printk(KERN_NOTICE "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u " 1547 printk(KERN_NOTICE "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u "
1483 "seconds, shutting it down.\n", SC_NODEF_ARGS(sc), 1548 "seconds, shutting it down.\n", SC_NODEF_ARGS(sc),
1484 o2net_idle_timeout() / 1000, 1549 o2net_idle_timeout() / 1000,
1485 o2net_idle_timeout() % 1000); 1550 o2net_idle_timeout() % 1000);
1486 mlog(ML_NOTICE, "here are some times that might help debug the " 1551
1487 "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " 1552#ifdef CONFIG_DEBUG_FS
1488 "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", 1553 mlog(ML_NOTICE, "Here are some times that might help debug the "
1489 sc->sc_tv_timer.tv_sec, (long) sc->sc_tv_timer.tv_usec, 1554 "situation: (Timer: %lld, Now %lld, DataReady %lld, Advance %lld-%lld, "
1490 now.tv_sec, (long) now.tv_usec, 1555 "Key 0x%08x, Func %u, FuncTime %lld-%lld)\n",
1491 sc->sc_tv_data_ready.tv_sec, (long) sc->sc_tv_data_ready.tv_usec, 1556 (long long)ktime_to_us(sc->sc_tv_timer), (long long)ktime_to_us(now),
1492 sc->sc_tv_advance_start.tv_sec, 1557 (long long)ktime_to_us(sc->sc_tv_data_ready),
1493 (long) sc->sc_tv_advance_start.tv_usec, 1558 (long long)ktime_to_us(sc->sc_tv_advance_start),
1494 sc->sc_tv_advance_stop.tv_sec, 1559 (long long)ktime_to_us(sc->sc_tv_advance_stop),
1495 (long) sc->sc_tv_advance_stop.tv_usec,
1496 sc->sc_msg_key, sc->sc_msg_type, 1560 sc->sc_msg_key, sc->sc_msg_type,
1497 sc->sc_tv_func_start.tv_sec, (long) sc->sc_tv_func_start.tv_usec, 1561 (long long)ktime_to_us(sc->sc_tv_func_start),
1498 sc->sc_tv_func_stop.tv_sec, (long) sc->sc_tv_func_stop.tv_usec); 1562 (long long)ktime_to_us(sc->sc_tv_func_stop));
1563#endif
1499 1564
1500 /* 1565 /*
1501 * Initialize the nn_timeout so that the next connection attempt 1566 * Initialize the nn_timeout so that the next connection attempt
@@ -1511,7 +1576,7 @@ static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc)
1511 o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); 1576 o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work);
1512 o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work, 1577 o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work,
1513 msecs_to_jiffies(o2net_keepalive_delay())); 1578 msecs_to_jiffies(o2net_keepalive_delay()));
1514 do_gettimeofday(&sc->sc_tv_timer); 1579 o2net_set_sock_timer(sc);
1515 mod_timer(&sc->sc_idle_timeout, 1580 mod_timer(&sc->sc_idle_timeout,
1516 jiffies + msecs_to_jiffies(o2net_idle_timeout())); 1581 jiffies + msecs_to_jiffies(o2net_idle_timeout()));
1517} 1582}
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 15fdbdf9eb4b..4cbcb65784a3 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -166,18 +166,27 @@ struct o2net_sock_container {
166 /* original handlers for the sockets */ 166 /* original handlers for the sockets */
167 void (*sc_state_change)(struct sock *sk); 167 void (*sc_state_change)(struct sock *sk);
168 void (*sc_data_ready)(struct sock *sk, int bytes); 168 void (*sc_data_ready)(struct sock *sk, int bytes);
169#ifdef CONFIG_DEBUG_FS 169
170 struct list_head sc_net_debug_item;
171#endif
172 struct timeval sc_tv_timer;
173 struct timeval sc_tv_data_ready;
174 struct timeval sc_tv_advance_start;
175 struct timeval sc_tv_advance_stop;
176 struct timeval sc_tv_func_start;
177 struct timeval sc_tv_func_stop;
178 u32 sc_msg_key; 170 u32 sc_msg_key;
179 u16 sc_msg_type; 171 u16 sc_msg_type;
180 172
173#ifdef CONFIG_DEBUG_FS
174 struct list_head sc_net_debug_item;
175 ktime_t sc_tv_timer;
176 ktime_t sc_tv_data_ready;
177 ktime_t sc_tv_advance_start;
178 ktime_t sc_tv_advance_stop;
179 ktime_t sc_tv_func_start;
180 ktime_t sc_tv_func_stop;
181#endif
182#ifdef CONFIG_OCFS2_FS_STATS
183 ktime_t sc_tv_acquiry_total;
184 ktime_t sc_tv_send_total;
185 ktime_t sc_tv_status_total;
186 u32 sc_send_count;
187 u32 sc_recv_count;
188 ktime_t sc_tv_process_total;
189#endif
181 struct mutex sc_send_lock; 190 struct mutex sc_send_lock;
182}; 191};
183 192
@@ -220,9 +229,9 @@ struct o2net_send_tracking {
220 u32 st_msg_type; 229 u32 st_msg_type;
221 u32 st_msg_key; 230 u32 st_msg_key;
222 u8 st_node; 231 u8 st_node;
223 struct timeval st_sock_time; 232 ktime_t st_sock_time;
224 struct timeval st_send_time; 233 ktime_t st_send_time;
225 struct timeval st_status_time; 234 ktime_t st_status_time;
226}; 235};
227#else 236#else
228struct o2net_send_tracking { 237struct o2net_send_tracking {
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index f44999156839..3a3ed4bb794b 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -90,19 +90,29 @@ static int dlm_should_cancel_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
90 90
91void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock) 91void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
92{ 92{
93 mlog_entry_void(); 93 struct dlm_lock_resource *res;
94 94
95 BUG_ON(!dlm); 95 BUG_ON(!dlm);
96 BUG_ON(!lock); 96 BUG_ON(!lock);
97 97
98 res = lock->lockres;
99
98 assert_spin_locked(&dlm->ast_lock); 100 assert_spin_locked(&dlm->ast_lock);
101
99 if (!list_empty(&lock->ast_list)) { 102 if (!list_empty(&lock->ast_list)) {
100 mlog(ML_ERROR, "ast list not empty!! pending=%d, newlevel=%d\n", 103 mlog(ML_ERROR, "%s: res %.*s, lock %u:%llu, "
104 "AST list not empty, pending %d, newlevel %d\n",
105 dlm->name, res->lockname.len, res->lockname.name,
106 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
107 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
101 lock->ast_pending, lock->ml.type); 108 lock->ast_pending, lock->ml.type);
102 BUG(); 109 BUG();
103 } 110 }
104 if (lock->ast_pending) 111 if (lock->ast_pending)
105 mlog(0, "lock has an ast getting flushed right now\n"); 112 mlog(0, "%s: res %.*s, lock %u:%llu, AST getting flushed\n",
113 dlm->name, res->lockname.len, res->lockname.name,
114 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
115 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
106 116
107 /* putting lock on list, add a ref */ 117 /* putting lock on list, add a ref */
108 dlm_lock_get(lock); 118 dlm_lock_get(lock);
@@ -110,9 +120,10 @@ void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
110 120
111 /* check to see if this ast obsoletes the bast */ 121 /* check to see if this ast obsoletes the bast */
112 if (dlm_should_cancel_bast(dlm, lock)) { 122 if (dlm_should_cancel_bast(dlm, lock)) {
113 struct dlm_lock_resource *res = lock->lockres; 123 mlog(0, "%s: res %.*s, lock %u:%llu, Cancelling BAST\n",
114 mlog(0, "%s: cancelling bast for %.*s\n", 124 dlm->name, res->lockname.len, res->lockname.name,
115 dlm->name, res->lockname.len, res->lockname.name); 125 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
126 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
116 lock->bast_pending = 0; 127 lock->bast_pending = 0;
117 list_del_init(&lock->bast_list); 128 list_del_init(&lock->bast_list);
118 lock->ml.highest_blocked = LKM_IVMODE; 129 lock->ml.highest_blocked = LKM_IVMODE;
@@ -134,8 +145,6 @@ void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
134 145
135void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock) 146void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
136{ 147{
137 mlog_entry_void();
138
139 BUG_ON(!dlm); 148 BUG_ON(!dlm);
140 BUG_ON(!lock); 149 BUG_ON(!lock);
141 150
@@ -147,15 +156,21 @@ void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
147 156
148void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock) 157void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
149{ 158{
150 mlog_entry_void(); 159 struct dlm_lock_resource *res;
151 160
152 BUG_ON(!dlm); 161 BUG_ON(!dlm);
153 BUG_ON(!lock); 162 BUG_ON(!lock);
163
154 assert_spin_locked(&dlm->ast_lock); 164 assert_spin_locked(&dlm->ast_lock);
155 165
166 res = lock->lockres;
167
156 BUG_ON(!list_empty(&lock->bast_list)); 168 BUG_ON(!list_empty(&lock->bast_list));
157 if (lock->bast_pending) 169 if (lock->bast_pending)
158 mlog(0, "lock has a bast getting flushed right now\n"); 170 mlog(0, "%s: res %.*s, lock %u:%llu, BAST getting flushed\n",
171 dlm->name, res->lockname.len, res->lockname.name,
172 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
173 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
159 174
160 /* putting lock on list, add a ref */ 175 /* putting lock on list, add a ref */
161 dlm_lock_get(lock); 176 dlm_lock_get(lock);
@@ -167,8 +182,6 @@ void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
167 182
168void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock) 183void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
169{ 184{
170 mlog_entry_void();
171
172 BUG_ON(!dlm); 185 BUG_ON(!dlm);
173 BUG_ON(!lock); 186 BUG_ON(!lock);
174 187
@@ -213,7 +226,10 @@ void dlm_do_local_ast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
213 dlm_astlockfunc_t *fn; 226 dlm_astlockfunc_t *fn;
214 struct dlm_lockstatus *lksb; 227 struct dlm_lockstatus *lksb;
215 228
216 mlog_entry_void(); 229 mlog(0, "%s: res %.*s, lock %u:%llu, Local AST\n", dlm->name,
230 res->lockname.len, res->lockname.name,
231 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
232 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
217 233
218 lksb = lock->lksb; 234 lksb = lock->lksb;
219 fn = lock->ast; 235 fn = lock->ast;
@@ -231,7 +247,10 @@ int dlm_do_remote_ast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
231 struct dlm_lockstatus *lksb; 247 struct dlm_lockstatus *lksb;
232 int lksbflags; 248 int lksbflags;
233 249
234 mlog_entry_void(); 250 mlog(0, "%s: res %.*s, lock %u:%llu, Remote AST\n", dlm->name,
251 res->lockname.len, res->lockname.name,
252 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
253 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
235 254
236 lksb = lock->lksb; 255 lksb = lock->lksb;
237 BUG_ON(lock->ml.node == dlm->node_num); 256 BUG_ON(lock->ml.node == dlm->node_num);
@@ -250,9 +269,14 @@ void dlm_do_local_bast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
250{ 269{
251 dlm_bastlockfunc_t *fn = lock->bast; 270 dlm_bastlockfunc_t *fn = lock->bast;
252 271
253 mlog_entry_void();
254 BUG_ON(lock->ml.node != dlm->node_num); 272 BUG_ON(lock->ml.node != dlm->node_num);
255 273
274 mlog(0, "%s: res %.*s, lock %u:%llu, Local BAST, blocked %d\n",
275 dlm->name, res->lockname.len, res->lockname.name,
276 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
277 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
278 blocked_type);
279
256 (*fn)(lock->astdata, blocked_type); 280 (*fn)(lock->astdata, blocked_type);
257} 281}
258 282
@@ -332,7 +356,8 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
332 /* cannot get a proxy ast message if this node owns it */ 356 /* cannot get a proxy ast message if this node owns it */
333 BUG_ON(res->owner == dlm->node_num); 357 BUG_ON(res->owner == dlm->node_num);
334 358
335 mlog(0, "lockres %.*s\n", res->lockname.len, res->lockname.name); 359 mlog(0, "%s: res %.*s\n", dlm->name, res->lockname.len,
360 res->lockname.name);
336 361
337 spin_lock(&res->spinlock); 362 spin_lock(&res->spinlock);
338 if (res->state & DLM_LOCK_RES_RECOVERING) { 363 if (res->state & DLM_LOCK_RES_RECOVERING) {
@@ -382,8 +407,12 @@ do_ast:
382 if (past->type == DLM_AST) { 407 if (past->type == DLM_AST) {
383 /* do not alter lock refcount. switching lists. */ 408 /* do not alter lock refcount. switching lists. */
384 list_move_tail(&lock->list, &res->granted); 409 list_move_tail(&lock->list, &res->granted);
385 mlog(0, "ast: Adding to granted list... type=%d, " 410 mlog(0, "%s: res %.*s, lock %u:%llu, Granted type %d => %d\n",
386 "convert_type=%d\n", lock->ml.type, lock->ml.convert_type); 411 dlm->name, res->lockname.len, res->lockname.name,
412 dlm_get_lock_cookie_node(be64_to_cpu(cookie)),
413 dlm_get_lock_cookie_seq(be64_to_cpu(cookie)),
414 lock->ml.type, lock->ml.convert_type);
415
387 if (lock->ml.convert_type != LKM_IVMODE) { 416 if (lock->ml.convert_type != LKM_IVMODE) {
388 lock->ml.type = lock->ml.convert_type; 417 lock->ml.type = lock->ml.convert_type;
389 lock->ml.convert_type = LKM_IVMODE; 418 lock->ml.convert_type = LKM_IVMODE;
@@ -426,9 +455,9 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
426 size_t veclen = 1; 455 size_t veclen = 1;
427 int status; 456 int status;
428 457
429 mlog_entry("res %.*s, to=%u, type=%d, blocked_type=%d\n", 458 mlog(0, "%s: res %.*s, to %u, type %d, blocked_type %d\n", dlm->name,
430 res->lockname.len, res->lockname.name, lock->ml.node, 459 res->lockname.len, res->lockname.name, lock->ml.node, msg_type,
431 msg_type, blocked_type); 460 blocked_type);
432 461
433 memset(&past, 0, sizeof(struct dlm_proxy_ast)); 462 memset(&past, 0, sizeof(struct dlm_proxy_ast));
434 past.node_idx = dlm->node_num; 463 past.node_idx = dlm->node_num;
@@ -441,7 +470,6 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
441 vec[0].iov_len = sizeof(struct dlm_proxy_ast); 470 vec[0].iov_len = sizeof(struct dlm_proxy_ast);
442 vec[0].iov_base = &past; 471 vec[0].iov_base = &past;
443 if (flags & DLM_LKSB_GET_LVB) { 472 if (flags & DLM_LKSB_GET_LVB) {
444 mlog(0, "returning requested LVB data\n");
445 be32_add_cpu(&past.flags, LKM_GET_LVB); 473 be32_add_cpu(&past.flags, LKM_GET_LVB);
446 vec[1].iov_len = DLM_LVB_LEN; 474 vec[1].iov_len = DLM_LVB_LEN;
447 vec[1].iov_base = lock->lksb->lvb; 475 vec[1].iov_base = lock->lksb->lvb;
@@ -451,8 +479,8 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
451 ret = o2net_send_message_vec(DLM_PROXY_AST_MSG, dlm->key, vec, veclen, 479 ret = o2net_send_message_vec(DLM_PROXY_AST_MSG, dlm->key, vec, veclen,
452 lock->ml.node, &status); 480 lock->ml.node, &status);
453 if (ret < 0) 481 if (ret < 0)
454 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " 482 mlog(ML_ERROR, "%s: res %.*s, error %d send AST to node %u\n",
455 "node %u\n", ret, DLM_PROXY_AST_MSG, dlm->key, 483 dlm->name, res->lockname.len, res->lockname.name, ret,
456 lock->ml.node); 484 lock->ml.node);
457 else { 485 else {
458 if (status == DLM_RECOVERING) { 486 if (status == DLM_RECOVERING) {
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index b36d0bf77a5a..4bdf7baee344 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -50,10 +50,10 @@
50#define dlm_lockid_hash(_n, _l) full_name_hash(_n, _l) 50#define dlm_lockid_hash(_n, _l) full_name_hash(_n, _l)
51 51
52enum dlm_mle_type { 52enum dlm_mle_type {
53 DLM_MLE_BLOCK, 53 DLM_MLE_BLOCK = 0,
54 DLM_MLE_MASTER, 54 DLM_MLE_MASTER = 1,
55 DLM_MLE_MIGRATION, 55 DLM_MLE_MIGRATION = 2,
56 DLM_MLE_NUM_TYPES 56 DLM_MLE_NUM_TYPES = 3,
57}; 57};
58 58
59struct dlm_master_list_entry { 59struct dlm_master_list_entry {
@@ -82,8 +82,8 @@ struct dlm_master_list_entry {
82 82
83enum dlm_ast_type { 83enum dlm_ast_type {
84 DLM_AST = 0, 84 DLM_AST = 0,
85 DLM_BAST, 85 DLM_BAST = 1,
86 DLM_ASTUNLOCK 86 DLM_ASTUNLOCK = 2,
87}; 87};
88 88
89 89
@@ -119,9 +119,9 @@ struct dlm_recovery_ctxt
119 119
120enum dlm_ctxt_state { 120enum dlm_ctxt_state {
121 DLM_CTXT_NEW = 0, 121 DLM_CTXT_NEW = 0,
122 DLM_CTXT_JOINED, 122 DLM_CTXT_JOINED = 1,
123 DLM_CTXT_IN_SHUTDOWN, 123 DLM_CTXT_IN_SHUTDOWN = 2,
124 DLM_CTXT_LEAVING, 124 DLM_CTXT_LEAVING = 3,
125}; 125};
126 126
127struct dlm_ctxt 127struct dlm_ctxt
@@ -388,8 +388,8 @@ struct dlm_lock
388 388
389enum dlm_lockres_list { 389enum dlm_lockres_list {
390 DLM_GRANTED_LIST = 0, 390 DLM_GRANTED_LIST = 0,
391 DLM_CONVERTING_LIST, 391 DLM_CONVERTING_LIST = 1,
392 DLM_BLOCKED_LIST 392 DLM_BLOCKED_LIST = 2,
393}; 393};
394 394
395static inline int dlm_lvb_is_empty(char *lvb) 395static inline int dlm_lvb_is_empty(char *lvb)
@@ -427,27 +427,27 @@ struct dlm_node_iter
427 427
428 428
429enum { 429enum {
430 DLM_MASTER_REQUEST_MSG = 500, 430 DLM_MASTER_REQUEST_MSG = 500,
431 DLM_UNUSED_MSG1, /* 501 */ 431 DLM_UNUSED_MSG1 = 501,
432 DLM_ASSERT_MASTER_MSG, /* 502 */ 432 DLM_ASSERT_MASTER_MSG = 502,
433 DLM_CREATE_LOCK_MSG, /* 503 */ 433 DLM_CREATE_LOCK_MSG = 503,
434 DLM_CONVERT_LOCK_MSG, /* 504 */ 434 DLM_CONVERT_LOCK_MSG = 504,
435 DLM_PROXY_AST_MSG, /* 505 */ 435 DLM_PROXY_AST_MSG = 505,
436 DLM_UNLOCK_LOCK_MSG, /* 506 */ 436 DLM_UNLOCK_LOCK_MSG = 506,
437 DLM_DEREF_LOCKRES_MSG, /* 507 */ 437 DLM_DEREF_LOCKRES_MSG = 507,
438 DLM_MIGRATE_REQUEST_MSG, /* 508 */ 438 DLM_MIGRATE_REQUEST_MSG = 508,
439 DLM_MIG_LOCKRES_MSG, /* 509 */ 439 DLM_MIG_LOCKRES_MSG = 509,
440 DLM_QUERY_JOIN_MSG, /* 510 */ 440 DLM_QUERY_JOIN_MSG = 510,
441 DLM_ASSERT_JOINED_MSG, /* 511 */ 441 DLM_ASSERT_JOINED_MSG = 511,
442 DLM_CANCEL_JOIN_MSG, /* 512 */ 442 DLM_CANCEL_JOIN_MSG = 512,
443 DLM_EXIT_DOMAIN_MSG, /* 513 */ 443 DLM_EXIT_DOMAIN_MSG = 513,
444 DLM_MASTER_REQUERY_MSG, /* 514 */ 444 DLM_MASTER_REQUERY_MSG = 514,
445 DLM_LOCK_REQUEST_MSG, /* 515 */ 445 DLM_LOCK_REQUEST_MSG = 515,
446 DLM_RECO_DATA_DONE_MSG, /* 516 */ 446 DLM_RECO_DATA_DONE_MSG = 516,
447 DLM_BEGIN_RECO_MSG, /* 517 */ 447 DLM_BEGIN_RECO_MSG = 517,
448 DLM_FINALIZE_RECO_MSG, /* 518 */ 448 DLM_FINALIZE_RECO_MSG = 518,
449 DLM_QUERY_REGION, /* 519 */ 449 DLM_QUERY_REGION = 519,
450 DLM_QUERY_NODEINFO, /* 520 */ 450 DLM_QUERY_NODEINFO = 520,
451}; 451};
452 452
453struct dlm_reco_node_data 453struct dlm_reco_node_data
@@ -460,19 +460,19 @@ struct dlm_reco_node_data
460enum { 460enum {
461 DLM_RECO_NODE_DATA_DEAD = -1, 461 DLM_RECO_NODE_DATA_DEAD = -1,
462 DLM_RECO_NODE_DATA_INIT = 0, 462 DLM_RECO_NODE_DATA_INIT = 0,
463 DLM_RECO_NODE_DATA_REQUESTING, 463 DLM_RECO_NODE_DATA_REQUESTING = 1,
464 DLM_RECO_NODE_DATA_REQUESTED, 464 DLM_RECO_NODE_DATA_REQUESTED = 2,
465 DLM_RECO_NODE_DATA_RECEIVING, 465 DLM_RECO_NODE_DATA_RECEIVING = 3,
466 DLM_RECO_NODE_DATA_DONE, 466 DLM_RECO_NODE_DATA_DONE = 4,
467 DLM_RECO_NODE_DATA_FINALIZE_SENT, 467 DLM_RECO_NODE_DATA_FINALIZE_SENT = 5,
468}; 468};
469 469
470 470
471enum { 471enum {
472 DLM_MASTER_RESP_NO = 0, 472 DLM_MASTER_RESP_NO = 0,
473 DLM_MASTER_RESP_YES, 473 DLM_MASTER_RESP_YES = 1,
474 DLM_MASTER_RESP_MAYBE, 474 DLM_MASTER_RESP_MAYBE = 2,
475 DLM_MASTER_RESP_ERROR 475 DLM_MASTER_RESP_ERROR = 3,
476}; 476};
477 477
478 478
@@ -649,9 +649,9 @@ struct dlm_proxy_ast
649#define DLM_MOD_KEY (0x666c6172) 649#define DLM_MOD_KEY (0x666c6172)
650enum dlm_query_join_response_code { 650enum dlm_query_join_response_code {
651 JOIN_DISALLOW = 0, 651 JOIN_DISALLOW = 0,
652 JOIN_OK, 652 JOIN_OK = 1,
653 JOIN_OK_NO_MAP, 653 JOIN_OK_NO_MAP = 2,
654 JOIN_PROTOCOL_MISMATCH, 654 JOIN_PROTOCOL_MISMATCH = 3,
655}; 655};
656 656
657struct dlm_query_join_packet { 657struct dlm_query_join_packet {
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 272ec8631a51..04a32be0aeb9 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -370,92 +370,46 @@ static void dlm_debug_get(struct dlm_debug_ctxt *dc)
370 kref_get(&dc->debug_refcnt); 370 kref_get(&dc->debug_refcnt);
371} 371}
372 372
373static struct debug_buffer *debug_buffer_allocate(void) 373static int debug_release(struct inode *inode, struct file *file)
374{ 374{
375 struct debug_buffer *db = NULL; 375 free_page((unsigned long)file->private_data);
376 376 return 0;
377 db = kzalloc(sizeof(struct debug_buffer), GFP_KERNEL);
378 if (!db)
379 goto bail;
380
381 db->len = PAGE_SIZE;
382 db->buf = kmalloc(db->len, GFP_KERNEL);
383 if (!db->buf)
384 goto bail;
385
386 return db;
387bail:
388 kfree(db);
389 return NULL;
390}
391
392static ssize_t debug_buffer_read(struct file *file, char __user *buf,
393 size_t nbytes, loff_t *ppos)
394{
395 struct debug_buffer *db = file->private_data;
396
397 return simple_read_from_buffer(buf, nbytes, ppos, db->buf, db->len);
398}
399
400static loff_t debug_buffer_llseek(struct file *file, loff_t off, int whence)
401{
402 struct debug_buffer *db = file->private_data;
403 loff_t new = -1;
404
405 switch (whence) {
406 case 0:
407 new = off;
408 break;
409 case 1:
410 new = file->f_pos + off;
411 break;
412 }
413
414 if (new < 0 || new > db->len)
415 return -EINVAL;
416
417 return (file->f_pos = new);
418} 377}
419 378
420static int debug_buffer_release(struct inode *inode, struct file *file) 379static ssize_t debug_read(struct file *file, char __user *buf,
380 size_t nbytes, loff_t *ppos)
421{ 381{
422 struct debug_buffer *db = file->private_data; 382 return simple_read_from_buffer(buf, nbytes, ppos, file->private_data,
423 383 i_size_read(file->f_mapping->host));
424 if (db)
425 kfree(db->buf);
426 kfree(db);
427
428 return 0;
429} 384}
430/* end - util funcs */ 385/* end - util funcs */
431 386
432/* begin - purge list funcs */ 387/* begin - purge list funcs */
433static int debug_purgelist_print(struct dlm_ctxt *dlm, struct debug_buffer *db) 388static int debug_purgelist_print(struct dlm_ctxt *dlm, char *buf, int len)
434{ 389{
435 struct dlm_lock_resource *res; 390 struct dlm_lock_resource *res;
436 int out = 0; 391 int out = 0;
437 unsigned long total = 0; 392 unsigned long total = 0;
438 393
439 out += snprintf(db->buf + out, db->len - out, 394 out += snprintf(buf + out, len - out,
440 "Dumping Purgelist for Domain: %s\n", dlm->name); 395 "Dumping Purgelist for Domain: %s\n", dlm->name);
441 396
442 spin_lock(&dlm->spinlock); 397 spin_lock(&dlm->spinlock);
443 list_for_each_entry(res, &dlm->purge_list, purge) { 398 list_for_each_entry(res, &dlm->purge_list, purge) {
444 ++total; 399 ++total;
445 if (db->len - out < 100) 400 if (len - out < 100)
446 continue; 401 continue;
447 spin_lock(&res->spinlock); 402 spin_lock(&res->spinlock);
448 out += stringify_lockname(res->lockname.name, 403 out += stringify_lockname(res->lockname.name,
449 res->lockname.len, 404 res->lockname.len,
450 db->buf + out, db->len - out); 405 buf + out, len - out);
451 out += snprintf(db->buf + out, db->len - out, "\t%ld\n", 406 out += snprintf(buf + out, len - out, "\t%ld\n",
452 (jiffies - res->last_used)/HZ); 407 (jiffies - res->last_used)/HZ);
453 spin_unlock(&res->spinlock); 408 spin_unlock(&res->spinlock);
454 } 409 }
455 spin_unlock(&dlm->spinlock); 410 spin_unlock(&dlm->spinlock);
456 411
457 out += snprintf(db->buf + out, db->len - out, 412 out += snprintf(buf + out, len - out, "Total on list: %ld\n", total);
458 "Total on list: %ld\n", total);
459 413
460 return out; 414 return out;
461} 415}
@@ -463,15 +417,15 @@ static int debug_purgelist_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
463static int debug_purgelist_open(struct inode *inode, struct file *file) 417static int debug_purgelist_open(struct inode *inode, struct file *file)
464{ 418{
465 struct dlm_ctxt *dlm = inode->i_private; 419 struct dlm_ctxt *dlm = inode->i_private;
466 struct debug_buffer *db; 420 char *buf = NULL;
467 421
468 db = debug_buffer_allocate(); 422 buf = (char *) get_zeroed_page(GFP_NOFS);
469 if (!db) 423 if (!buf)
470 goto bail; 424 goto bail;
471 425
472 db->len = debug_purgelist_print(dlm, db); 426 i_size_write(inode, debug_purgelist_print(dlm, buf, PAGE_SIZE - 1));
473 427
474 file->private_data = db; 428 file->private_data = buf;
475 429
476 return 0; 430 return 0;
477bail: 431bail:
@@ -480,14 +434,14 @@ bail:
480 434
481static const struct file_operations debug_purgelist_fops = { 435static const struct file_operations debug_purgelist_fops = {
482 .open = debug_purgelist_open, 436 .open = debug_purgelist_open,
483 .release = debug_buffer_release, 437 .release = debug_release,
484 .read = debug_buffer_read, 438 .read = debug_read,
485 .llseek = debug_buffer_llseek, 439 .llseek = generic_file_llseek,
486}; 440};
487/* end - purge list funcs */ 441/* end - purge list funcs */
488 442
489/* begin - debug mle funcs */ 443/* begin - debug mle funcs */
490static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db) 444static int debug_mle_print(struct dlm_ctxt *dlm, char *buf, int len)
491{ 445{
492 struct dlm_master_list_entry *mle; 446 struct dlm_master_list_entry *mle;
493 struct hlist_head *bucket; 447 struct hlist_head *bucket;
@@ -495,7 +449,7 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
495 int i, out = 0; 449 int i, out = 0;
496 unsigned long total = 0, longest = 0, bucket_count = 0; 450 unsigned long total = 0, longest = 0, bucket_count = 0;
497 451
498 out += snprintf(db->buf + out, db->len - out, 452 out += snprintf(buf + out, len - out,
499 "Dumping MLEs for Domain: %s\n", dlm->name); 453 "Dumping MLEs for Domain: %s\n", dlm->name);
500 454
501 spin_lock(&dlm->master_lock); 455 spin_lock(&dlm->master_lock);
@@ -506,16 +460,16 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
506 master_hash_node); 460 master_hash_node);
507 ++total; 461 ++total;
508 ++bucket_count; 462 ++bucket_count;
509 if (db->len - out < 200) 463 if (len - out < 200)
510 continue; 464 continue;
511 out += dump_mle(mle, db->buf + out, db->len - out); 465 out += dump_mle(mle, buf + out, len - out);
512 } 466 }
513 longest = max(longest, bucket_count); 467 longest = max(longest, bucket_count);
514 bucket_count = 0; 468 bucket_count = 0;
515 } 469 }
516 spin_unlock(&dlm->master_lock); 470 spin_unlock(&dlm->master_lock);
517 471
518 out += snprintf(db->buf + out, db->len - out, 472 out += snprintf(buf + out, len - out,
519 "Total: %ld, Longest: %ld\n", total, longest); 473 "Total: %ld, Longest: %ld\n", total, longest);
520 return out; 474 return out;
521} 475}
@@ -523,15 +477,15 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
523static int debug_mle_open(struct inode *inode, struct file *file) 477static int debug_mle_open(struct inode *inode, struct file *file)
524{ 478{
525 struct dlm_ctxt *dlm = inode->i_private; 479 struct dlm_ctxt *dlm = inode->i_private;
526 struct debug_buffer *db; 480 char *buf = NULL;
527 481
528 db = debug_buffer_allocate(); 482 buf = (char *) get_zeroed_page(GFP_NOFS);
529 if (!db) 483 if (!buf)
530 goto bail; 484 goto bail;
531 485
532 db->len = debug_mle_print(dlm, db); 486 i_size_write(inode, debug_mle_print(dlm, buf, PAGE_SIZE - 1));
533 487
534 file->private_data = db; 488 file->private_data = buf;
535 489
536 return 0; 490 return 0;
537bail: 491bail:
@@ -540,9 +494,9 @@ bail:
540 494
541static const struct file_operations debug_mle_fops = { 495static const struct file_operations debug_mle_fops = {
542 .open = debug_mle_open, 496 .open = debug_mle_open,
543 .release = debug_buffer_release, 497 .release = debug_release,
544 .read = debug_buffer_read, 498 .read = debug_read,
545 .llseek = debug_buffer_llseek, 499 .llseek = generic_file_llseek,
546}; 500};
547 501
548/* end - debug mle funcs */ 502/* end - debug mle funcs */
@@ -757,7 +711,7 @@ static const struct file_operations debug_lockres_fops = {
757/* end - debug lockres funcs */ 711/* end - debug lockres funcs */
758 712
759/* begin - debug state funcs */ 713/* begin - debug state funcs */
760static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db) 714static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len)
761{ 715{
762 int out = 0; 716 int out = 0;
763 struct dlm_reco_node_data *node; 717 struct dlm_reco_node_data *node;
@@ -781,35 +735,35 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
781 } 735 }
782 736
783 /* Domain: xxxxxxxxxx Key: 0xdfbac769 */ 737 /* Domain: xxxxxxxxxx Key: 0xdfbac769 */
784 out += snprintf(db->buf + out, db->len - out, 738 out += snprintf(buf + out, len - out,
785 "Domain: %s Key: 0x%08x Protocol: %d.%d\n", 739 "Domain: %s Key: 0x%08x Protocol: %d.%d\n",
786 dlm->name, dlm->key, dlm->dlm_locking_proto.pv_major, 740 dlm->name, dlm->key, dlm->dlm_locking_proto.pv_major,
787 dlm->dlm_locking_proto.pv_minor); 741 dlm->dlm_locking_proto.pv_minor);
788 742
789 /* Thread Pid: xxx Node: xxx State: xxxxx */ 743 /* Thread Pid: xxx Node: xxx State: xxxxx */
790 out += snprintf(db->buf + out, db->len - out, 744 out += snprintf(buf + out, len - out,
791 "Thread Pid: %d Node: %d State: %s\n", 745 "Thread Pid: %d Node: %d State: %s\n",
792 dlm->dlm_thread_task->pid, dlm->node_num, state); 746 task_pid_nr(dlm->dlm_thread_task), dlm->node_num, state);
793 747
794 /* Number of Joins: xxx Joining Node: xxx */ 748 /* Number of Joins: xxx Joining Node: xxx */
795 out += snprintf(db->buf + out, db->len - out, 749 out += snprintf(buf + out, len - out,
796 "Number of Joins: %d Joining Node: %d\n", 750 "Number of Joins: %d Joining Node: %d\n",
797 dlm->num_joins, dlm->joining_node); 751 dlm->num_joins, dlm->joining_node);
798 752
799 /* Domain Map: xx xx xx */ 753 /* Domain Map: xx xx xx */
800 out += snprintf(db->buf + out, db->len - out, "Domain Map: "); 754 out += snprintf(buf + out, len - out, "Domain Map: ");
801 out += stringify_nodemap(dlm->domain_map, O2NM_MAX_NODES, 755 out += stringify_nodemap(dlm->domain_map, O2NM_MAX_NODES,
802 db->buf + out, db->len - out); 756 buf + out, len - out);
803 out += snprintf(db->buf + out, db->len - out, "\n"); 757 out += snprintf(buf + out, len - out, "\n");
804 758
805 /* Live Map: xx xx xx */ 759 /* Live Map: xx xx xx */
806 out += snprintf(db->buf + out, db->len - out, "Live Map: "); 760 out += snprintf(buf + out, len - out, "Live Map: ");
807 out += stringify_nodemap(dlm->live_nodes_map, O2NM_MAX_NODES, 761 out += stringify_nodemap(dlm->live_nodes_map, O2NM_MAX_NODES,
808 db->buf + out, db->len - out); 762 buf + out, len - out);
809 out += snprintf(db->buf + out, db->len - out, "\n"); 763 out += snprintf(buf + out, len - out, "\n");
810 764
811 /* Lock Resources: xxx (xxx) */ 765 /* Lock Resources: xxx (xxx) */
812 out += snprintf(db->buf + out, db->len - out, 766 out += snprintf(buf + out, len - out,
813 "Lock Resources: %d (%d)\n", 767 "Lock Resources: %d (%d)\n",
814 atomic_read(&dlm->res_cur_count), 768 atomic_read(&dlm->res_cur_count),
815 atomic_read(&dlm->res_tot_count)); 769 atomic_read(&dlm->res_tot_count));
@@ -821,29 +775,29 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
821 cur_mles += atomic_read(&dlm->mle_cur_count[i]); 775 cur_mles += atomic_read(&dlm->mle_cur_count[i]);
822 776
823 /* MLEs: xxx (xxx) */ 777 /* MLEs: xxx (xxx) */
824 out += snprintf(db->buf + out, db->len - out, 778 out += snprintf(buf + out, len - out,
825 "MLEs: %d (%d)\n", cur_mles, tot_mles); 779 "MLEs: %d (%d)\n", cur_mles, tot_mles);
826 780
827 /* Blocking: xxx (xxx) */ 781 /* Blocking: xxx (xxx) */
828 out += snprintf(db->buf + out, db->len - out, 782 out += snprintf(buf + out, len - out,
829 " Blocking: %d (%d)\n", 783 " Blocking: %d (%d)\n",
830 atomic_read(&dlm->mle_cur_count[DLM_MLE_BLOCK]), 784 atomic_read(&dlm->mle_cur_count[DLM_MLE_BLOCK]),
831 atomic_read(&dlm->mle_tot_count[DLM_MLE_BLOCK])); 785 atomic_read(&dlm->mle_tot_count[DLM_MLE_BLOCK]));
832 786
833 /* Mastery: xxx (xxx) */ 787 /* Mastery: xxx (xxx) */
834 out += snprintf(db->buf + out, db->len - out, 788 out += snprintf(buf + out, len - out,
835 " Mastery: %d (%d)\n", 789 " Mastery: %d (%d)\n",
836 atomic_read(&dlm->mle_cur_count[DLM_MLE_MASTER]), 790 atomic_read(&dlm->mle_cur_count[DLM_MLE_MASTER]),
837 atomic_read(&dlm->mle_tot_count[DLM_MLE_MASTER])); 791 atomic_read(&dlm->mle_tot_count[DLM_MLE_MASTER]));
838 792
839 /* Migration: xxx (xxx) */ 793 /* Migration: xxx (xxx) */
840 out += snprintf(db->buf + out, db->len - out, 794 out += snprintf(buf + out, len - out,
841 " Migration: %d (%d)\n", 795 " Migration: %d (%d)\n",
842 atomic_read(&dlm->mle_cur_count[DLM_MLE_MIGRATION]), 796 atomic_read(&dlm->mle_cur_count[DLM_MLE_MIGRATION]),
843 atomic_read(&dlm->mle_tot_count[DLM_MLE_MIGRATION])); 797 atomic_read(&dlm->mle_tot_count[DLM_MLE_MIGRATION]));
844 798
845 /* Lists: Dirty=Empty Purge=InUse PendingASTs=Empty ... */ 799 /* Lists: Dirty=Empty Purge=InUse PendingASTs=Empty ... */
846 out += snprintf(db->buf + out, db->len - out, 800 out += snprintf(buf + out, len - out,
847 "Lists: Dirty=%s Purge=%s PendingASTs=%s " 801 "Lists: Dirty=%s Purge=%s PendingASTs=%s "
848 "PendingBASTs=%s\n", 802 "PendingBASTs=%s\n",
849 (list_empty(&dlm->dirty_list) ? "Empty" : "InUse"), 803 (list_empty(&dlm->dirty_list) ? "Empty" : "InUse"),
@@ -852,12 +806,12 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
852 (list_empty(&dlm->pending_basts) ? "Empty" : "InUse")); 806 (list_empty(&dlm->pending_basts) ? "Empty" : "InUse"));
853 807
854 /* Purge Count: xxx Refs: xxx */ 808 /* Purge Count: xxx Refs: xxx */
855 out += snprintf(db->buf + out, db->len - out, 809 out += snprintf(buf + out, len - out,
856 "Purge Count: %d Refs: %d\n", dlm->purge_count, 810 "Purge Count: %d Refs: %d\n", dlm->purge_count,
857 atomic_read(&dlm->dlm_refs.refcount)); 811 atomic_read(&dlm->dlm_refs.refcount));
858 812
859 /* Dead Node: xxx */ 813 /* Dead Node: xxx */
860 out += snprintf(db->buf + out, db->len - out, 814 out += snprintf(buf + out, len - out,
861 "Dead Node: %d\n", dlm->reco.dead_node); 815 "Dead Node: %d\n", dlm->reco.dead_node);
862 816
863 /* What about DLM_RECO_STATE_FINALIZE? */ 817 /* What about DLM_RECO_STATE_FINALIZE? */
@@ -867,19 +821,19 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
867 state = "INACTIVE"; 821 state = "INACTIVE";
868 822
869 /* Recovery Pid: xxxx Master: xxx State: xxxx */ 823 /* Recovery Pid: xxxx Master: xxx State: xxxx */
870 out += snprintf(db->buf + out, db->len - out, 824 out += snprintf(buf + out, len - out,
871 "Recovery Pid: %d Master: %d State: %s\n", 825 "Recovery Pid: %d Master: %d State: %s\n",
872 dlm->dlm_reco_thread_task->pid, 826 task_pid_nr(dlm->dlm_reco_thread_task),
873 dlm->reco.new_master, state); 827 dlm->reco.new_master, state);
874 828
875 /* Recovery Map: xx xx */ 829 /* Recovery Map: xx xx */
876 out += snprintf(db->buf + out, db->len - out, "Recovery Map: "); 830 out += snprintf(buf + out, len - out, "Recovery Map: ");
877 out += stringify_nodemap(dlm->recovery_map, O2NM_MAX_NODES, 831 out += stringify_nodemap(dlm->recovery_map, O2NM_MAX_NODES,
878 db->buf + out, db->len - out); 832 buf + out, len - out);
879 out += snprintf(db->buf + out, db->len - out, "\n"); 833 out += snprintf(buf + out, len - out, "\n");
880 834
881 /* Recovery Node State: */ 835 /* Recovery Node State: */
882 out += snprintf(db->buf + out, db->len - out, "Recovery Node State:\n"); 836 out += snprintf(buf + out, len - out, "Recovery Node State:\n");
883 list_for_each_entry(node, &dlm->reco.node_data, list) { 837 list_for_each_entry(node, &dlm->reco.node_data, list) {
884 switch (node->state) { 838 switch (node->state) {
885 case DLM_RECO_NODE_DATA_INIT: 839 case DLM_RECO_NODE_DATA_INIT:
@@ -907,7 +861,7 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
907 state = "BAD"; 861 state = "BAD";
908 break; 862 break;
909 } 863 }
910 out += snprintf(db->buf + out, db->len - out, "\t%u - %s\n", 864 out += snprintf(buf + out, len - out, "\t%u - %s\n",
911 node->node_num, state); 865 node->node_num, state);
912 } 866 }
913 867
@@ -919,15 +873,15 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
919static int debug_state_open(struct inode *inode, struct file *file) 873static int debug_state_open(struct inode *inode, struct file *file)
920{ 874{
921 struct dlm_ctxt *dlm = inode->i_private; 875 struct dlm_ctxt *dlm = inode->i_private;
922 struct debug_buffer *db = NULL; 876 char *buf = NULL;
923 877
924 db = debug_buffer_allocate(); 878 buf = (char *) get_zeroed_page(GFP_NOFS);
925 if (!db) 879 if (!buf)
926 goto bail; 880 goto bail;
927 881
928 db->len = debug_state_print(dlm, db); 882 i_size_write(inode, debug_state_print(dlm, buf, PAGE_SIZE - 1));
929 883
930 file->private_data = db; 884 file->private_data = buf;
931 885
932 return 0; 886 return 0;
933bail: 887bail:
@@ -936,9 +890,9 @@ bail:
936 890
937static const struct file_operations debug_state_fops = { 891static const struct file_operations debug_state_fops = {
938 .open = debug_state_open, 892 .open = debug_state_open,
939 .release = debug_buffer_release, 893 .release = debug_release,
940 .read = debug_buffer_read, 894 .read = debug_read,
941 .llseek = debug_buffer_llseek, 895 .llseek = generic_file_llseek,
942}; 896};
943/* end - debug state funcs */ 897/* end - debug state funcs */
944 898
@@ -1002,14 +956,10 @@ void dlm_debug_shutdown(struct dlm_ctxt *dlm)
1002 struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt; 956 struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt;
1003 957
1004 if (dc) { 958 if (dc) {
1005 if (dc->debug_purgelist_dentry) 959 debugfs_remove(dc->debug_purgelist_dentry);
1006 debugfs_remove(dc->debug_purgelist_dentry); 960 debugfs_remove(dc->debug_mle_dentry);
1007 if (dc->debug_mle_dentry) 961 debugfs_remove(dc->debug_lockres_dentry);
1008 debugfs_remove(dc->debug_mle_dentry); 962 debugfs_remove(dc->debug_state_dentry);
1009 if (dc->debug_lockres_dentry)
1010 debugfs_remove(dc->debug_lockres_dentry);
1011 if (dc->debug_state_dentry)
1012 debugfs_remove(dc->debug_state_dentry);
1013 dlm_debug_put(dc); 963 dlm_debug_put(dc);
1014 } 964 }
1015} 965}
@@ -1040,8 +990,7 @@ bail:
1040 990
1041void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm) 991void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm)
1042{ 992{
1043 if (dlm->dlm_debugfs_subroot) 993 debugfs_remove(dlm->dlm_debugfs_subroot);
1044 debugfs_remove(dlm->dlm_debugfs_subroot);
1045} 994}
1046 995
1047/* debugfs root */ 996/* debugfs root */
@@ -1057,7 +1006,6 @@ int dlm_create_debugfs_root(void)
1057 1006
1058void dlm_destroy_debugfs_root(void) 1007void dlm_destroy_debugfs_root(void)
1059{ 1008{
1060 if (dlm_debugfs_root) 1009 debugfs_remove(dlm_debugfs_root);
1061 debugfs_remove(dlm_debugfs_root);
1062} 1010}
1063#endif /* CONFIG_DEBUG_FS */ 1011#endif /* CONFIG_DEBUG_FS */
diff --git a/fs/ocfs2/dlm/dlmdebug.h b/fs/ocfs2/dlm/dlmdebug.h
index 8c686d22f9c7..1f27c4812d1a 100644
--- a/fs/ocfs2/dlm/dlmdebug.h
+++ b/fs/ocfs2/dlm/dlmdebug.h
@@ -37,11 +37,6 @@ struct dlm_debug_ctxt {
37 struct dentry *debug_purgelist_dentry; 37 struct dentry *debug_purgelist_dentry;
38}; 38};
39 39
40struct debug_buffer {
41 int len;
42 char *buf;
43};
44
45struct debug_lockres { 40struct debug_lockres {
46 int dl_len; 41 int dl_len;
47 char *dl_buf; 42 char *dl_buf;
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index cc2aaa96cfe5..7e38a072d720 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -460,8 +460,6 @@ redo_bucket:
460 } 460 }
461 cond_resched_lock(&dlm->spinlock); 461 cond_resched_lock(&dlm->spinlock);
462 num += n; 462 num += n;
463 mlog(0, "%s: touched %d lockreses in bucket %d "
464 "(tot=%d)\n", dlm->name, n, i, num);
465 } 463 }
466 spin_unlock(&dlm->spinlock); 464 spin_unlock(&dlm->spinlock);
467 wake_up(&dlm->dlm_thread_wq); 465 wake_up(&dlm->dlm_thread_wq);
@@ -1661,8 +1659,8 @@ bail:
1661 1659
1662static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm) 1660static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm)
1663{ 1661{
1664 o2hb_unregister_callback(NULL, &dlm->dlm_hb_up); 1662 o2hb_unregister_callback(dlm->name, &dlm->dlm_hb_up);
1665 o2hb_unregister_callback(NULL, &dlm->dlm_hb_down); 1663 o2hb_unregister_callback(dlm->name, &dlm->dlm_hb_down);
1666 o2net_unregister_handler_list(&dlm->dlm_domain_handlers); 1664 o2net_unregister_handler_list(&dlm->dlm_domain_handlers);
1667} 1665}
1668 1666
@@ -1674,13 +1672,13 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm)
1674 1672
1675 o2hb_setup_callback(&dlm->dlm_hb_down, O2HB_NODE_DOWN_CB, 1673 o2hb_setup_callback(&dlm->dlm_hb_down, O2HB_NODE_DOWN_CB,
1676 dlm_hb_node_down_cb, dlm, DLM_HB_NODE_DOWN_PRI); 1674 dlm_hb_node_down_cb, dlm, DLM_HB_NODE_DOWN_PRI);
1677 status = o2hb_register_callback(NULL, &dlm->dlm_hb_down); 1675 status = o2hb_register_callback(dlm->name, &dlm->dlm_hb_down);
1678 if (status) 1676 if (status)
1679 goto bail; 1677 goto bail;
1680 1678
1681 o2hb_setup_callback(&dlm->dlm_hb_up, O2HB_NODE_UP_CB, 1679 o2hb_setup_callback(&dlm->dlm_hb_up, O2HB_NODE_UP_CB,
1682 dlm_hb_node_up_cb, dlm, DLM_HB_NODE_UP_PRI); 1680 dlm_hb_node_up_cb, dlm, DLM_HB_NODE_UP_PRI);
1683 status = o2hb_register_callback(NULL, &dlm->dlm_hb_up); 1681 status = o2hb_register_callback(dlm->name, &dlm->dlm_hb_up);
1684 if (status) 1682 if (status)
1685 goto bail; 1683 goto bail;
1686 1684
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 69cf369961c4..7009292aac5a 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -106,6 +106,9 @@ static int dlm_can_grant_new_lock(struct dlm_lock_resource *res,
106 106
107 if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type)) 107 if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type))
108 return 0; 108 return 0;
109 if (!dlm_lock_compatible(tmplock->ml.convert_type,
110 lock->ml.type))
111 return 0;
109 } 112 }
110 113
111 return 1; 114 return 1;
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 2211acf33d9b..1d6d1d22c471 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -122,15 +122,13 @@ int __dlm_lockres_unused(struct dlm_lock_resource *res)
122void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm, 122void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
123 struct dlm_lock_resource *res) 123 struct dlm_lock_resource *res)
124{ 124{
125 mlog_entry("%.*s\n", res->lockname.len, res->lockname.name);
126
127 assert_spin_locked(&dlm->spinlock); 125 assert_spin_locked(&dlm->spinlock);
128 assert_spin_locked(&res->spinlock); 126 assert_spin_locked(&res->spinlock);
129 127
130 if (__dlm_lockres_unused(res)){ 128 if (__dlm_lockres_unused(res)){
131 if (list_empty(&res->purge)) { 129 if (list_empty(&res->purge)) {
132 mlog(0, "putting lockres %.*s:%p onto purge list\n", 130 mlog(0, "%s: Adding res %.*s to purge list\n",
133 res->lockname.len, res->lockname.name, res); 131 dlm->name, res->lockname.len, res->lockname.name);
134 132
135 res->last_used = jiffies; 133 res->last_used = jiffies;
136 dlm_lockres_get(res); 134 dlm_lockres_get(res);
@@ -138,8 +136,8 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
138 dlm->purge_count++; 136 dlm->purge_count++;
139 } 137 }
140 } else if (!list_empty(&res->purge)) { 138 } else if (!list_empty(&res->purge)) {
141 mlog(0, "removing lockres %.*s:%p from purge list, owner=%u\n", 139 mlog(0, "%s: Removing res %.*s from purge list\n",
142 res->lockname.len, res->lockname.name, res, res->owner); 140 dlm->name, res->lockname.len, res->lockname.name);
143 141
144 list_del_init(&res->purge); 142 list_del_init(&res->purge);
145 dlm_lockres_put(res); 143 dlm_lockres_put(res);
@@ -150,7 +148,6 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
150void dlm_lockres_calc_usage(struct dlm_ctxt *dlm, 148void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
151 struct dlm_lock_resource *res) 149 struct dlm_lock_resource *res)
152{ 150{
153 mlog_entry("%.*s\n", res->lockname.len, res->lockname.name);
154 spin_lock(&dlm->spinlock); 151 spin_lock(&dlm->spinlock);
155 spin_lock(&res->spinlock); 152 spin_lock(&res->spinlock);
156 153
@@ -171,9 +168,8 @@ static void dlm_purge_lockres(struct dlm_ctxt *dlm,
171 168
172 master = (res->owner == dlm->node_num); 169 master = (res->owner == dlm->node_num);
173 170
174 171 mlog(0, "%s: Purging res %.*s, master %d\n", dlm->name,
175 mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len, 172 res->lockname.len, res->lockname.name, master);
176 res->lockname.name, master);
177 173
178 if (!master) { 174 if (!master) {
179 res->state |= DLM_LOCK_RES_DROPPING_REF; 175 res->state |= DLM_LOCK_RES_DROPPING_REF;
@@ -189,27 +185,25 @@ static void dlm_purge_lockres(struct dlm_ctxt *dlm,
189 /* clear our bit from the master's refmap, ignore errors */ 185 /* clear our bit from the master's refmap, ignore errors */
190 ret = dlm_drop_lockres_ref(dlm, res); 186 ret = dlm_drop_lockres_ref(dlm, res);
191 if (ret < 0) { 187 if (ret < 0) {
192 mlog_errno(ret); 188 mlog(ML_ERROR, "%s: deref %.*s failed %d\n", dlm->name,
189 res->lockname.len, res->lockname.name, ret);
193 if (!dlm_is_host_down(ret)) 190 if (!dlm_is_host_down(ret))
194 BUG(); 191 BUG();
195 } 192 }
196 mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n",
197 dlm->name, res->lockname.len, res->lockname.name, ret);
198 spin_lock(&dlm->spinlock); 193 spin_lock(&dlm->spinlock);
199 spin_lock(&res->spinlock); 194 spin_lock(&res->spinlock);
200 } 195 }
201 196
202 if (!list_empty(&res->purge)) { 197 if (!list_empty(&res->purge)) {
203 mlog(0, "removing lockres %.*s:%p from purgelist, " 198 mlog(0, "%s: Removing res %.*s from purgelist, master %d\n",
204 "master = %d\n", res->lockname.len, res->lockname.name, 199 dlm->name, res->lockname.len, res->lockname.name, master);
205 res, master);
206 list_del_init(&res->purge); 200 list_del_init(&res->purge);
207 dlm_lockres_put(res); 201 dlm_lockres_put(res);
208 dlm->purge_count--; 202 dlm->purge_count--;
209 } 203 }
210 204
211 if (!__dlm_lockres_unused(res)) { 205 if (!__dlm_lockres_unused(res)) {
212 mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n", 206 mlog(ML_ERROR, "%s: res %.*s in use after deref\n",
213 dlm->name, res->lockname.len, res->lockname.name); 207 dlm->name, res->lockname.len, res->lockname.name);
214 __dlm_print_one_lock_resource(res); 208 __dlm_print_one_lock_resource(res);
215 BUG(); 209 BUG();
@@ -266,10 +260,10 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
266 unused = __dlm_lockres_unused(lockres); 260 unused = __dlm_lockres_unused(lockres);
267 if (!unused || 261 if (!unused ||
268 (lockres->state & DLM_LOCK_RES_MIGRATING)) { 262 (lockres->state & DLM_LOCK_RES_MIGRATING)) {
269 mlog(0, "lockres %s:%.*s: is in use or " 263 mlog(0, "%s: res %.*s is in use or being remastered, "
270 "being remastered, used %d, state %d\n", 264 "used %d, state %d\n", dlm->name,
271 dlm->name, lockres->lockname.len, 265 lockres->lockname.len, lockres->lockname.name,
272 lockres->lockname.name, !unused, lockres->state); 266 !unused, lockres->state);
273 list_move_tail(&dlm->purge_list, &lockres->purge); 267 list_move_tail(&dlm->purge_list, &lockres->purge);
274 spin_unlock(&lockres->spinlock); 268 spin_unlock(&lockres->spinlock);
275 continue; 269 continue;
@@ -296,15 +290,12 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm,
296 struct list_head *head; 290 struct list_head *head;
297 int can_grant = 1; 291 int can_grant = 1;
298 292
299 //mlog(0, "res->lockname.len=%d\n", res->lockname.len); 293 /*
300 //mlog(0, "res->lockname.name=%p\n", res->lockname.name); 294 * Because this function is called with the lockres
301 //mlog(0, "shuffle res %.*s\n", res->lockname.len,
302 // res->lockname.name);
303
304 /* because this function is called with the lockres
305 * spinlock, and because we know that it is not migrating/ 295 * spinlock, and because we know that it is not migrating/
306 * recovering/in-progress, it is fine to reserve asts and 296 * recovering/in-progress, it is fine to reserve asts and
307 * basts right before queueing them all throughout */ 297 * basts right before queueing them all throughout
298 */
308 assert_spin_locked(&dlm->ast_lock); 299 assert_spin_locked(&dlm->ast_lock);
309 assert_spin_locked(&res->spinlock); 300 assert_spin_locked(&res->spinlock);
310 BUG_ON((res->state & (DLM_LOCK_RES_MIGRATING| 301 BUG_ON((res->state & (DLM_LOCK_RES_MIGRATING|
@@ -314,13 +305,13 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm,
314converting: 305converting:
315 if (list_empty(&res->converting)) 306 if (list_empty(&res->converting))
316 goto blocked; 307 goto blocked;
317 mlog(0, "res %.*s has locks on a convert queue\n", res->lockname.len, 308 mlog(0, "%s: res %.*s has locks on the convert queue\n", dlm->name,
318 res->lockname.name); 309 res->lockname.len, res->lockname.name);
319 310
320 target = list_entry(res->converting.next, struct dlm_lock, list); 311 target = list_entry(res->converting.next, struct dlm_lock, list);
321 if (target->ml.convert_type == LKM_IVMODE) { 312 if (target->ml.convert_type == LKM_IVMODE) {
322 mlog(ML_ERROR, "%.*s: converting a lock with no " 313 mlog(ML_ERROR, "%s: res %.*s converting lock to invalid mode\n",
323 "convert_type!\n", res->lockname.len, res->lockname.name); 314 dlm->name, res->lockname.len, res->lockname.name);
324 BUG(); 315 BUG();
325 } 316 }
326 head = &res->granted; 317 head = &res->granted;
@@ -365,9 +356,12 @@ converting:
365 spin_lock(&target->spinlock); 356 spin_lock(&target->spinlock);
366 BUG_ON(target->ml.highest_blocked != LKM_IVMODE); 357 BUG_ON(target->ml.highest_blocked != LKM_IVMODE);
367 358
368 mlog(0, "calling ast for converting lock: %.*s, have: %d, " 359 mlog(0, "%s: res %.*s, AST for Converting lock %u:%llu, type "
369 "granting: %d, node: %u\n", res->lockname.len, 360 "%d => %d, node %u\n", dlm->name, res->lockname.len,
370 res->lockname.name, target->ml.type, 361 res->lockname.name,
362 dlm_get_lock_cookie_node(be64_to_cpu(target->ml.cookie)),
363 dlm_get_lock_cookie_seq(be64_to_cpu(target->ml.cookie)),
364 target->ml.type,
371 target->ml.convert_type, target->ml.node); 365 target->ml.convert_type, target->ml.node);
372 366
373 target->ml.type = target->ml.convert_type; 367 target->ml.type = target->ml.convert_type;
@@ -428,11 +422,14 @@ blocked:
428 spin_lock(&target->spinlock); 422 spin_lock(&target->spinlock);
429 BUG_ON(target->ml.highest_blocked != LKM_IVMODE); 423 BUG_ON(target->ml.highest_blocked != LKM_IVMODE);
430 424
431 mlog(0, "calling ast for blocked lock: %.*s, granting: %d, " 425 mlog(0, "%s: res %.*s, AST for Blocked lock %u:%llu, type %d, "
432 "node: %u\n", res->lockname.len, res->lockname.name, 426 "node %u\n", dlm->name, res->lockname.len,
427 res->lockname.name,
428 dlm_get_lock_cookie_node(be64_to_cpu(target->ml.cookie)),
429 dlm_get_lock_cookie_seq(be64_to_cpu(target->ml.cookie)),
433 target->ml.type, target->ml.node); 430 target->ml.type, target->ml.node);
434 431
435 // target->ml.type is already correct 432 /* target->ml.type is already correct */
436 list_move_tail(&target->list, &res->granted); 433 list_move_tail(&target->list, &res->granted);
437 434
438 BUG_ON(!target->lksb); 435 BUG_ON(!target->lksb);
@@ -453,7 +450,6 @@ leave:
453/* must have NO locks when calling this with res !=NULL * */ 450/* must have NO locks when calling this with res !=NULL * */
454void dlm_kick_thread(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) 451void dlm_kick_thread(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
455{ 452{
456 mlog_entry("dlm=%p, res=%p\n", dlm, res);
457 if (res) { 453 if (res) {
458 spin_lock(&dlm->spinlock); 454 spin_lock(&dlm->spinlock);
459 spin_lock(&res->spinlock); 455 spin_lock(&res->spinlock);
@@ -466,8 +462,6 @@ void dlm_kick_thread(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
466 462
467void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) 463void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
468{ 464{
469 mlog_entry("dlm=%p, res=%p\n", dlm, res);
470
471 assert_spin_locked(&dlm->spinlock); 465 assert_spin_locked(&dlm->spinlock);
472 assert_spin_locked(&res->spinlock); 466 assert_spin_locked(&res->spinlock);
473 467
@@ -484,13 +478,16 @@ void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
484 res->state |= DLM_LOCK_RES_DIRTY; 478 res->state |= DLM_LOCK_RES_DIRTY;
485 } 479 }
486 } 480 }
481
482 mlog(0, "%s: res %.*s\n", dlm->name, res->lockname.len,
483 res->lockname.name);
487} 484}
488 485
489 486
490/* Launch the NM thread for the mounted volume */ 487/* Launch the NM thread for the mounted volume */
491int dlm_launch_thread(struct dlm_ctxt *dlm) 488int dlm_launch_thread(struct dlm_ctxt *dlm)
492{ 489{
493 mlog(0, "starting dlm thread...\n"); 490 mlog(0, "Starting dlm_thread...\n");
494 491
495 dlm->dlm_thread_task = kthread_run(dlm_thread, dlm, "dlm_thread"); 492 dlm->dlm_thread_task = kthread_run(dlm_thread, dlm, "dlm_thread");
496 if (IS_ERR(dlm->dlm_thread_task)) { 493 if (IS_ERR(dlm->dlm_thread_task)) {
@@ -505,7 +502,7 @@ int dlm_launch_thread(struct dlm_ctxt *dlm)
505void dlm_complete_thread(struct dlm_ctxt *dlm) 502void dlm_complete_thread(struct dlm_ctxt *dlm)
506{ 503{
507 if (dlm->dlm_thread_task) { 504 if (dlm->dlm_thread_task) {
508 mlog(ML_KTHREAD, "waiting for dlm thread to exit\n"); 505 mlog(ML_KTHREAD, "Waiting for dlm thread to exit\n");
509 kthread_stop(dlm->dlm_thread_task); 506 kthread_stop(dlm->dlm_thread_task);
510 dlm->dlm_thread_task = NULL; 507 dlm->dlm_thread_task = NULL;
511 } 508 }
@@ -536,7 +533,12 @@ static void dlm_flush_asts(struct dlm_ctxt *dlm)
536 /* get an extra ref on lock */ 533 /* get an extra ref on lock */
537 dlm_lock_get(lock); 534 dlm_lock_get(lock);
538 res = lock->lockres; 535 res = lock->lockres;
539 mlog(0, "delivering an ast for this lockres\n"); 536 mlog(0, "%s: res %.*s, Flush AST for lock %u:%llu, type %d, "
537 "node %u\n", dlm->name, res->lockname.len,
538 res->lockname.name,
539 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
540 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
541 lock->ml.type, lock->ml.node);
540 542
541 BUG_ON(!lock->ast_pending); 543 BUG_ON(!lock->ast_pending);
542 544
@@ -557,9 +559,9 @@ static void dlm_flush_asts(struct dlm_ctxt *dlm)
557 /* possible that another ast was queued while 559 /* possible that another ast was queued while
558 * we were delivering the last one */ 560 * we were delivering the last one */
559 if (!list_empty(&lock->ast_list)) { 561 if (!list_empty(&lock->ast_list)) {
560 mlog(0, "aha another ast got queued while " 562 mlog(0, "%s: res %.*s, AST queued while flushing last "
561 "we were finishing the last one. will " 563 "one\n", dlm->name, res->lockname.len,
562 "keep the ast_pending flag set.\n"); 564 res->lockname.name);
563 } else 565 } else
564 lock->ast_pending = 0; 566 lock->ast_pending = 0;
565 567
@@ -590,8 +592,12 @@ static void dlm_flush_asts(struct dlm_ctxt *dlm)
590 dlm_lock_put(lock); 592 dlm_lock_put(lock);
591 spin_unlock(&dlm->ast_lock); 593 spin_unlock(&dlm->ast_lock);
592 594
593 mlog(0, "delivering a bast for this lockres " 595 mlog(0, "%s: res %.*s, Flush BAST for lock %u:%llu, "
594 "(blocked = %d\n", hi); 596 "blocked %d, node %u\n",
597 dlm->name, res->lockname.len, res->lockname.name,
598 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
599 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
600 hi, lock->ml.node);
595 601
596 if (lock->ml.node != dlm->node_num) { 602 if (lock->ml.node != dlm->node_num) {
597 ret = dlm_send_proxy_bast(dlm, res, lock, hi); 603 ret = dlm_send_proxy_bast(dlm, res, lock, hi);
@@ -605,9 +611,9 @@ static void dlm_flush_asts(struct dlm_ctxt *dlm)
605 /* possible that another bast was queued while 611 /* possible that another bast was queued while
606 * we were delivering the last one */ 612 * we were delivering the last one */
607 if (!list_empty(&lock->bast_list)) { 613 if (!list_empty(&lock->bast_list)) {
608 mlog(0, "aha another bast got queued while " 614 mlog(0, "%s: res %.*s, BAST queued while flushing last "
609 "we were finishing the last one. will " 615 "one\n", dlm->name, res->lockname.len,
610 "keep the bast_pending flag set.\n"); 616 res->lockname.name);
611 } else 617 } else
612 lock->bast_pending = 0; 618 lock->bast_pending = 0;
613 619
@@ -675,11 +681,12 @@ static int dlm_thread(void *data)
675 spin_lock(&res->spinlock); 681 spin_lock(&res->spinlock);
676 if (res->owner != dlm->node_num) { 682 if (res->owner != dlm->node_num) {
677 __dlm_print_one_lock_resource(res); 683 __dlm_print_one_lock_resource(res);
678 mlog(ML_ERROR, "inprog:%s, mig:%s, reco:%s, dirty:%s\n", 684 mlog(ML_ERROR, "%s: inprog %d, mig %d, reco %d,"
679 res->state & DLM_LOCK_RES_IN_PROGRESS ? "yes" : "no", 685 " dirty %d\n", dlm->name,
680 res->state & DLM_LOCK_RES_MIGRATING ? "yes" : "no", 686 !!(res->state & DLM_LOCK_RES_IN_PROGRESS),
681 res->state & DLM_LOCK_RES_RECOVERING ? "yes" : "no", 687 !!(res->state & DLM_LOCK_RES_MIGRATING),
682 res->state & DLM_LOCK_RES_DIRTY ? "yes" : "no"); 688 !!(res->state & DLM_LOCK_RES_RECOVERING),
689 !!(res->state & DLM_LOCK_RES_DIRTY));
683 } 690 }
684 BUG_ON(res->owner != dlm->node_num); 691 BUG_ON(res->owner != dlm->node_num);
685 692
@@ -693,8 +700,8 @@ static int dlm_thread(void *data)
693 res->state &= ~DLM_LOCK_RES_DIRTY; 700 res->state &= ~DLM_LOCK_RES_DIRTY;
694 spin_unlock(&res->spinlock); 701 spin_unlock(&res->spinlock);
695 spin_unlock(&dlm->ast_lock); 702 spin_unlock(&dlm->ast_lock);
696 mlog(0, "delaying list shuffling for in-" 703 mlog(0, "%s: res %.*s, inprogress, delay list "
697 "progress lockres %.*s, state=%d\n", 704 "shuffle, state %d\n", dlm->name,
698 res->lockname.len, res->lockname.name, 705 res->lockname.len, res->lockname.name,
699 res->state); 706 res->state);
700 delay = 1; 707 delay = 1;
@@ -706,10 +713,6 @@ static int dlm_thread(void *data)
706 * spinlock and do NOT have the dlm lock. 713 * spinlock and do NOT have the dlm lock.
707 * safe to reserve/queue asts and run the lists. */ 714 * safe to reserve/queue asts and run the lists. */
708 715
709 mlog(0, "calling dlm_shuffle_lists with dlm=%s, "
710 "res=%.*s\n", dlm->name,
711 res->lockname.len, res->lockname.name);
712
713 /* called while holding lockres lock */ 716 /* called while holding lockres lock */
714 dlm_shuffle_lists(dlm, res); 717 dlm_shuffle_lists(dlm, res);
715 res->state &= ~DLM_LOCK_RES_DIRTY; 718 res->state &= ~DLM_LOCK_RES_DIRTY;
@@ -733,7 +736,8 @@ in_progress:
733 /* unlikely, but we may need to give time to 736 /* unlikely, but we may need to give time to
734 * other tasks */ 737 * other tasks */
735 if (!--n) { 738 if (!--n) {
736 mlog(0, "throttling dlm_thread\n"); 739 mlog(0, "%s: Throttling dlm thread\n",
740 dlm->name);
737 break; 741 break;
738 } 742 }
739 } 743 }
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index d14cad6e2e41..30c523144452 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1017,8 +1017,11 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
1017 * An error return must mean that no cluster locks 1017 * An error return must mean that no cluster locks
1018 * were held on function exit. 1018 * were held on function exit.
1019 */ 1019 */
1020 if (oi1->ip_blkno != oi2->ip_blkno) 1020 if (oi1->ip_blkno != oi2->ip_blkno) {
1021 ocfs2_inode_unlock(inode2, 1); 1021 ocfs2_inode_unlock(inode2, 1);
1022 brelse(*bh2);
1023 *bh2 = NULL;
1024 }
1022 1025
1023 if (status != -ENOENT) 1026 if (status != -ENOENT)
1024 mlog_errno(status); 1027 mlog_errno(status);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 70dd3b1798f1..51cd6898e7f1 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -420,6 +420,11 @@ struct ocfs2_super
420 struct inode *osb_tl_inode; 420 struct inode *osb_tl_inode;
421 struct buffer_head *osb_tl_bh; 421 struct buffer_head *osb_tl_bh;
422 struct delayed_work osb_truncate_log_wq; 422 struct delayed_work osb_truncate_log_wq;
423 /*
424 * How many clusters in our truncate log.
425 * It must be protected by osb_tl_inode->i_mutex.
426 */
427 unsigned int truncated_clusters;
423 428
424 struct ocfs2_node_map osb_recovering_orphan_dirs; 429 struct ocfs2_node_map osb_recovering_orphan_dirs;
425 unsigned int *osb_orphan_wipes; 430 unsigned int *osb_orphan_wipes;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 0fed41e6efcd..84becd3e4772 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -133,16 +133,20 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock);
133EXPORT_SYMBOL(dq_data_lock); 133EXPORT_SYMBOL(dq_data_lock);
134 134
135void __quota_error(struct super_block *sb, const char *func, 135void __quota_error(struct super_block *sb, const char *func,
136 const char *fmt, ...) 136 const char *fmt, ...)
137{ 137{
138 va_list args;
139
140 if (printk_ratelimit()) { 138 if (printk_ratelimit()) {
139 va_list args;
140 struct va_format vaf;
141
141 va_start(args, fmt); 142 va_start(args, fmt);
142 printk(KERN_ERR "Quota error (device %s): %s: ", 143
143 sb->s_id, func); 144 vaf.fmt = fmt;
144 vprintk(fmt, args); 145 vaf.va = &args;
145 printk("\n"); 146
147 printk(KERN_ERR "Quota error (device %s): %s: %pV\n",
148 sb->s_id, func, &vaf);
149
146 va_end(args); 150 va_end(args);
147 } 151 }
148} 152}
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index 9e48874eabcc..e41c1becf096 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -468,8 +468,8 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
468 return -ENOMEM; 468 return -ENOMEM;
469 ret = read_blk(info, *blk, buf); 469 ret = read_blk(info, *blk, buf);
470 if (ret < 0) { 470 if (ret < 0) {
471 quota_error(dquot->dq_sb, "Can't read quota data " 471 quota_error(dquot->dq_sb, "Can't read quota data block %u",
472 "block %u", blk); 472 *blk);
473 goto out_buf; 473 goto out_buf;
474 } 474 }
475 newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]); 475 newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
@@ -493,8 +493,9 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
493 } else { 493 } else {
494 ret = write_blk(info, *blk, buf); 494 ret = write_blk(info, *blk, buf);
495 if (ret < 0) 495 if (ret < 0)
496 quota_error(dquot->dq_sb, "Can't write quota " 496 quota_error(dquot->dq_sb,
497 "tree block %u", blk); 497 "Can't write quota tree block %u",
498 *blk);
498 } 499 }
499 } 500 }
500out_buf: 501out_buf:
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 442f34ff1af8..c8769dc222d8 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -165,10 +165,7 @@ int sysfs_merge_group(struct kobject *kobj,
165 struct attribute *const *attr; 165 struct attribute *const *attr;
166 int i; 166 int i;
167 167
168 if (grp) 168 dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name);
169 dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name);
170 else
171 dir_sd = sysfs_get(kobj->sd);
172 if (!dir_sd) 169 if (!dir_sd)
173 return -ENOENT; 170 return -ENOENT;
174 171
@@ -195,10 +192,7 @@ void sysfs_unmerge_group(struct kobject *kobj,
195 struct sysfs_dirent *dir_sd; 192 struct sysfs_dirent *dir_sd;
196 struct attribute *const *attr; 193 struct attribute *const *attr;
197 194
198 if (grp) 195 dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name);
199 dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name);
200 else
201 dir_sd = sysfs_get(kobj->sd);
202 if (dir_sd) { 196 if (dir_sd) {
203 for (attr = grp->attrs; *attr; ++attr) 197 for (attr = grp->attrs; *attr; ++attr)
204 sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name); 198 sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name);
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 30ac27345586..0a12eb89cd32 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -19,6 +19,7 @@
19#include <linux/errno.h> 19#include <linux/errno.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/slab.h> 21#include <linux/slab.h>
22#include <linux/sysfs.h>
22#include <linux/xattr.h> 23#include <linux/xattr.h>
23#include <linux/security.h> 24#include <linux/security.h>
24#include "sysfs.h" 25#include "sysfs.h"
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index ffaaa816bfba..3d28af31d863 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -9,6 +9,7 @@
9 */ 9 */
10 10
11#include <linux/lockdep.h> 11#include <linux/lockdep.h>
12#include <linux/kobject_ns.h>
12#include <linux/fs.h> 13#include <linux/fs.h>
13 14
14struct sysfs_open_dirent; 15struct sysfs_open_dirent;
diff --git a/fs/udf/Kconfig b/fs/udf/Kconfig
index f8def3c8ea4c..0e0e99bd6bce 100644
--- a/fs/udf/Kconfig
+++ b/fs/udf/Kconfig
@@ -1,6 +1,5 @@
1config UDF_FS 1config UDF_FS
2 tristate "UDF file system support" 2 tristate "UDF file system support"
3 depends on BKL # needs serious work to remove
4 select CRC_ITU_T 3 select CRC_ITU_T
5 help 4 help
6 This is the new file system used on some CD-ROMs and DVDs. Say Y if 5 This is the new file system used on some CD-ROMs and DVDs. Say Y if
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index b608efaa4cee..306ee39ef2c3 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -157,10 +157,9 @@ static void udf_bitmap_free_blocks(struct super_block *sb,
157 udf_debug("bit %ld already set\n", bit + i); 157 udf_debug("bit %ld already set\n", bit + i);
158 udf_debug("byte=%2x\n", 158 udf_debug("byte=%2x\n",
159 ((char *)bh->b_data)[(bit + i) >> 3]); 159 ((char *)bh->b_data)[(bit + i) >> 3]);
160 } else {
161 udf_add_free_space(sb, sbi->s_partition, 1);
162 } 160 }
163 } 161 }
162 udf_add_free_space(sb, sbi->s_partition, count);
164 mark_buffer_dirty(bh); 163 mark_buffer_dirty(bh);
165 if (overflow) { 164 if (overflow) {
166 block += count; 165 block += count;
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index 51552bf50225..eb8bfe2b89a5 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -30,7 +30,6 @@
30#include <linux/errno.h> 30#include <linux/errno.h>
31#include <linux/mm.h> 31#include <linux/mm.h>
32#include <linux/slab.h> 32#include <linux/slab.h>
33#include <linux/smp_lock.h>
34#include <linux/buffer_head.h> 33#include <linux/buffer_head.h>
35 34
36#include "udf_i.h" 35#include "udf_i.h"
@@ -190,18 +189,14 @@ static int udf_readdir(struct file *filp, void *dirent, filldir_t filldir)
190 struct inode *dir = filp->f_path.dentry->d_inode; 189 struct inode *dir = filp->f_path.dentry->d_inode;
191 int result; 190 int result;
192 191
193 lock_kernel();
194
195 if (filp->f_pos == 0) { 192 if (filp->f_pos == 0) {
196 if (filldir(dirent, ".", 1, filp->f_pos, dir->i_ino, DT_DIR) < 0) { 193 if (filldir(dirent, ".", 1, filp->f_pos, dir->i_ino, DT_DIR) < 0) {
197 unlock_kernel();
198 return 0; 194 return 0;
199 } 195 }
200 filp->f_pos++; 196 filp->f_pos++;
201 } 197 }
202 198
203 result = do_udf_readdir(dir, filp, filldir, dirent); 199 result = do_udf_readdir(dir, filp, filldir, dirent);
204 unlock_kernel();
205 return result; 200 return result;
206} 201}
207 202
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 66b9e7e7e4c5..89c78486cbbe 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -32,7 +32,6 @@
32#include <linux/string.h> /* memset */ 32#include <linux/string.h> /* memset */
33#include <linux/capability.h> 33#include <linux/capability.h>
34#include <linux/errno.h> 34#include <linux/errno.h>
35#include <linux/smp_lock.h>
36#include <linux/pagemap.h> 35#include <linux/pagemap.h>
37#include <linux/buffer_head.h> 36#include <linux/buffer_head.h>
38#include <linux/aio.h> 37#include <linux/aio.h>
@@ -114,6 +113,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
114 size_t count = iocb->ki_left; 113 size_t count = iocb->ki_left;
115 struct udf_inode_info *iinfo = UDF_I(inode); 114 struct udf_inode_info *iinfo = UDF_I(inode);
116 115
116 down_write(&iinfo->i_data_sem);
117 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { 117 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
118 if (file->f_flags & O_APPEND) 118 if (file->f_flags & O_APPEND)
119 pos = inode->i_size; 119 pos = inode->i_size;
@@ -126,6 +126,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
126 udf_expand_file_adinicb(inode, pos + count, &err); 126 udf_expand_file_adinicb(inode, pos + count, &err);
127 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { 127 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
128 udf_debug("udf_expand_adinicb: err=%d\n", err); 128 udf_debug("udf_expand_adinicb: err=%d\n", err);
129 up_write(&iinfo->i_data_sem);
129 return err; 130 return err;
130 } 131 }
131 } else { 132 } else {
@@ -135,6 +136,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
135 iinfo->i_lenAlloc = inode->i_size; 136 iinfo->i_lenAlloc = inode->i_size;
136 } 137 }
137 } 138 }
139 up_write(&iinfo->i_data_sem);
138 140
139 retval = generic_file_aio_write(iocb, iov, nr_segs, ppos); 141 retval = generic_file_aio_write(iocb, iov, nr_segs, ppos);
140 if (retval > 0) 142 if (retval > 0)
@@ -149,8 +151,6 @@ long udf_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
149 long old_block, new_block; 151 long old_block, new_block;
150 int result = -EINVAL; 152 int result = -EINVAL;
151 153
152 lock_kernel();
153
154 if (file_permission(filp, MAY_READ) != 0) { 154 if (file_permission(filp, MAY_READ) != 0) {
155 udf_debug("no permission to access inode %lu\n", inode->i_ino); 155 udf_debug("no permission to access inode %lu\n", inode->i_ino);
156 result = -EPERM; 156 result = -EPERM;
@@ -196,7 +196,6 @@ long udf_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
196 } 196 }
197 197
198out: 198out:
199 unlock_kernel();
200 return result; 199 return result;
201} 200}
202 201
@@ -204,10 +203,10 @@ static int udf_release_file(struct inode *inode, struct file *filp)
204{ 203{
205 if (filp->f_mode & FMODE_WRITE) { 204 if (filp->f_mode & FMODE_WRITE) {
206 mutex_lock(&inode->i_mutex); 205 mutex_lock(&inode->i_mutex);
207 lock_kernel(); 206 down_write(&UDF_I(inode)->i_data_sem);
208 udf_discard_prealloc(inode); 207 udf_discard_prealloc(inode);
209 udf_truncate_tail_extent(inode); 208 udf_truncate_tail_extent(inode);
210 unlock_kernel(); 209 up_write(&UDF_I(inode)->i_data_sem);
211 mutex_unlock(&inode->i_mutex); 210 mutex_unlock(&inode->i_mutex);
212 } 211 }
213 return 0; 212 return 0;
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 75d9304d0dc3..6fb7e0adcda0 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -92,28 +92,19 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
92 return NULL; 92 return NULL;
93 } 93 }
94 94
95 mutex_lock(&sbi->s_alloc_mutex);
96 if (sbi->s_lvid_bh) { 95 if (sbi->s_lvid_bh) {
97 struct logicalVolIntegrityDesc *lvid = 96 struct logicalVolIntegrityDescImpUse *lvidiu;
98 (struct logicalVolIntegrityDesc *) 97
99 sbi->s_lvid_bh->b_data; 98 iinfo->i_unique = lvid_get_unique_id(sb);
100 struct logicalVolIntegrityDescImpUse *lvidiu = 99 mutex_lock(&sbi->s_alloc_mutex);
101 udf_sb_lvidiu(sbi); 100 lvidiu = udf_sb_lvidiu(sbi);
102 struct logicalVolHeaderDesc *lvhd;
103 uint64_t uniqueID;
104 lvhd = (struct logicalVolHeaderDesc *)
105 (lvid->logicalVolContentsUse);
106 if (S_ISDIR(mode)) 101 if (S_ISDIR(mode))
107 le32_add_cpu(&lvidiu->numDirs, 1); 102 le32_add_cpu(&lvidiu->numDirs, 1);
108 else 103 else
109 le32_add_cpu(&lvidiu->numFiles, 1); 104 le32_add_cpu(&lvidiu->numFiles, 1);
110 iinfo->i_unique = uniqueID = le64_to_cpu(lvhd->uniqueID);
111 if (!(++uniqueID & 0x00000000FFFFFFFFUL))
112 uniqueID += 16;
113 lvhd->uniqueID = cpu_to_le64(uniqueID);
114 udf_updated_lvid(sb); 105 udf_updated_lvid(sb);
106 mutex_unlock(&sbi->s_alloc_mutex);
115 } 107 }
116 mutex_unlock(&sbi->s_alloc_mutex);
117 108
118 inode_init_owner(inode, dir, mode); 109 inode_init_owner(inode, dir, mode);
119 110
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index fc48f37aa2dd..c6a2e782b97b 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -31,7 +31,6 @@
31 31
32#include "udfdecl.h" 32#include "udfdecl.h"
33#include <linux/mm.h> 33#include <linux/mm.h>
34#include <linux/smp_lock.h>
35#include <linux/module.h> 34#include <linux/module.h>
36#include <linux/pagemap.h> 35#include <linux/pagemap.h>
37#include <linux/buffer_head.h> 36#include <linux/buffer_head.h>
@@ -51,6 +50,7 @@ MODULE_LICENSE("GPL");
51static mode_t udf_convert_permissions(struct fileEntry *); 50static mode_t udf_convert_permissions(struct fileEntry *);
52static int udf_update_inode(struct inode *, int); 51static int udf_update_inode(struct inode *, int);
53static void udf_fill_inode(struct inode *, struct buffer_head *); 52static void udf_fill_inode(struct inode *, struct buffer_head *);
53static int udf_sync_inode(struct inode *inode);
54static int udf_alloc_i_data(struct inode *inode, size_t size); 54static int udf_alloc_i_data(struct inode *inode, size_t size);
55static struct buffer_head *inode_getblk(struct inode *, sector_t, int *, 55static struct buffer_head *inode_getblk(struct inode *, sector_t, int *,
56 sector_t *, int *); 56 sector_t *, int *);
@@ -79,9 +79,7 @@ void udf_evict_inode(struct inode *inode)
79 want_delete = 1; 79 want_delete = 1;
80 inode->i_size = 0; 80 inode->i_size = 0;
81 udf_truncate(inode); 81 udf_truncate(inode);
82 lock_kernel();
83 udf_update_inode(inode, IS_SYNC(inode)); 82 udf_update_inode(inode, IS_SYNC(inode));
84 unlock_kernel();
85 } 83 }
86 invalidate_inode_buffers(inode); 84 invalidate_inode_buffers(inode);
87 end_writeback(inode); 85 end_writeback(inode);
@@ -97,9 +95,7 @@ void udf_evict_inode(struct inode *inode)
97 kfree(iinfo->i_ext.i_data); 95 kfree(iinfo->i_ext.i_data);
98 iinfo->i_ext.i_data = NULL; 96 iinfo->i_ext.i_data = NULL;
99 if (want_delete) { 97 if (want_delete) {
100 lock_kernel();
101 udf_free_inode(inode); 98 udf_free_inode(inode);
102 unlock_kernel();
103 } 99 }
104} 100}
105 101
@@ -302,10 +298,9 @@ static int udf_get_block(struct inode *inode, sector_t block,
302 err = -EIO; 298 err = -EIO;
303 new = 0; 299 new = 0;
304 bh = NULL; 300 bh = NULL;
305
306 lock_kernel();
307
308 iinfo = UDF_I(inode); 301 iinfo = UDF_I(inode);
302
303 down_write(&iinfo->i_data_sem);
309 if (block == iinfo->i_next_alloc_block + 1) { 304 if (block == iinfo->i_next_alloc_block + 1) {
310 iinfo->i_next_alloc_block++; 305 iinfo->i_next_alloc_block++;
311 iinfo->i_next_alloc_goal++; 306 iinfo->i_next_alloc_goal++;
@@ -324,7 +319,7 @@ static int udf_get_block(struct inode *inode, sector_t block,
324 map_bh(bh_result, inode->i_sb, phys); 319 map_bh(bh_result, inode->i_sb, phys);
325 320
326abort: 321abort:
327 unlock_kernel(); 322 up_write(&iinfo->i_data_sem);
328 return err; 323 return err;
329} 324}
330 325
@@ -1022,16 +1017,16 @@ void udf_truncate(struct inode *inode)
1022 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 1017 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1023 return; 1018 return;
1024 1019
1025 lock_kernel();
1026 iinfo = UDF_I(inode); 1020 iinfo = UDF_I(inode);
1027 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { 1021 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
1022 down_write(&iinfo->i_data_sem);
1028 if (inode->i_sb->s_blocksize < 1023 if (inode->i_sb->s_blocksize <
1029 (udf_file_entry_alloc_offset(inode) + 1024 (udf_file_entry_alloc_offset(inode) +
1030 inode->i_size)) { 1025 inode->i_size)) {
1031 udf_expand_file_adinicb(inode, inode->i_size, &err); 1026 udf_expand_file_adinicb(inode, inode->i_size, &err);
1032 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { 1027 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
1033 inode->i_size = iinfo->i_lenAlloc; 1028 inode->i_size = iinfo->i_lenAlloc;
1034 unlock_kernel(); 1029 up_write(&iinfo->i_data_sem);
1035 return; 1030 return;
1036 } else 1031 } else
1037 udf_truncate_extents(inode); 1032 udf_truncate_extents(inode);
@@ -1042,10 +1037,13 @@ void udf_truncate(struct inode *inode)
1042 offset - udf_file_entry_alloc_offset(inode)); 1037 offset - udf_file_entry_alloc_offset(inode));
1043 iinfo->i_lenAlloc = inode->i_size; 1038 iinfo->i_lenAlloc = inode->i_size;
1044 } 1039 }
1040 up_write(&iinfo->i_data_sem);
1045 } else { 1041 } else {
1046 block_truncate_page(inode->i_mapping, inode->i_size, 1042 block_truncate_page(inode->i_mapping, inode->i_size,
1047 udf_get_block); 1043 udf_get_block);
1044 down_write(&iinfo->i_data_sem);
1048 udf_truncate_extents(inode); 1045 udf_truncate_extents(inode);
1046 up_write(&iinfo->i_data_sem);
1049 } 1047 }
1050 1048
1051 inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb); 1049 inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb);
@@ -1053,7 +1051,6 @@ void udf_truncate(struct inode *inode)
1053 udf_sync_inode(inode); 1051 udf_sync_inode(inode);
1054 else 1052 else
1055 mark_inode_dirty(inode); 1053 mark_inode_dirty(inode);
1056 unlock_kernel();
1057} 1054}
1058 1055
1059static void __udf_read_inode(struct inode *inode) 1056static void __udf_read_inode(struct inode *inode)
@@ -1202,6 +1199,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1202 return; 1199 return;
1203 } 1200 }
1204 1201
1202 read_lock(&sbi->s_cred_lock);
1205 inode->i_uid = le32_to_cpu(fe->uid); 1203 inode->i_uid = le32_to_cpu(fe->uid);
1206 if (inode->i_uid == -1 || 1204 if (inode->i_uid == -1 ||
1207 UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_IGNORE) || 1205 UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_IGNORE) ||
@@ -1214,13 +1212,6 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1214 UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_SET)) 1212 UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_SET))
1215 inode->i_gid = UDF_SB(inode->i_sb)->s_gid; 1213 inode->i_gid = UDF_SB(inode->i_sb)->s_gid;
1216 1214
1217 inode->i_nlink = le16_to_cpu(fe->fileLinkCount);
1218 if (!inode->i_nlink)
1219 inode->i_nlink = 1;
1220
1221 inode->i_size = le64_to_cpu(fe->informationLength);
1222 iinfo->i_lenExtents = inode->i_size;
1223
1224 if (fe->icbTag.fileType != ICBTAG_FILE_TYPE_DIRECTORY && 1215 if (fe->icbTag.fileType != ICBTAG_FILE_TYPE_DIRECTORY &&
1225 sbi->s_fmode != UDF_INVALID_MODE) 1216 sbi->s_fmode != UDF_INVALID_MODE)
1226 inode->i_mode = sbi->s_fmode; 1217 inode->i_mode = sbi->s_fmode;
@@ -1230,6 +1221,14 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1230 else 1221 else
1231 inode->i_mode = udf_convert_permissions(fe); 1222 inode->i_mode = udf_convert_permissions(fe);
1232 inode->i_mode &= ~sbi->s_umask; 1223 inode->i_mode &= ~sbi->s_umask;
1224 read_unlock(&sbi->s_cred_lock);
1225
1226 inode->i_nlink = le16_to_cpu(fe->fileLinkCount);
1227 if (!inode->i_nlink)
1228 inode->i_nlink = 1;
1229
1230 inode->i_size = le64_to_cpu(fe->informationLength);
1231 iinfo->i_lenExtents = inode->i_size;
1233 1232
1234 if (iinfo->i_efe == 0) { 1233 if (iinfo->i_efe == 0) {
1235 inode->i_blocks = le64_to_cpu(fe->logicalBlocksRecorded) << 1234 inode->i_blocks = le64_to_cpu(fe->logicalBlocksRecorded) <<
@@ -1373,16 +1372,10 @@ static mode_t udf_convert_permissions(struct fileEntry *fe)
1373 1372
1374int udf_write_inode(struct inode *inode, struct writeback_control *wbc) 1373int udf_write_inode(struct inode *inode, struct writeback_control *wbc)
1375{ 1374{
1376 int ret; 1375 return udf_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
1377
1378 lock_kernel();
1379 ret = udf_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
1380 unlock_kernel();
1381
1382 return ret;
1383} 1376}
1384 1377
1385int udf_sync_inode(struct inode *inode) 1378static int udf_sync_inode(struct inode *inode)
1386{ 1379{
1387 return udf_update_inode(inode, 1); 1380 return udf_update_inode(inode, 1);
1388} 1381}
@@ -2048,7 +2041,7 @@ long udf_block_map(struct inode *inode, sector_t block)
2048 struct extent_position epos = {}; 2041 struct extent_position epos = {};
2049 int ret; 2042 int ret;
2050 2043
2051 lock_kernel(); 2044 down_read(&UDF_I(inode)->i_data_sem);
2052 2045
2053 if (inode_bmap(inode, block, &epos, &eloc, &elen, &offset) == 2046 if (inode_bmap(inode, block, &epos, &eloc, &elen, &offset) ==
2054 (EXT_RECORDED_ALLOCATED >> 30)) 2047 (EXT_RECORDED_ALLOCATED >> 30))
@@ -2056,7 +2049,7 @@ long udf_block_map(struct inode *inode, sector_t block)
2056 else 2049 else
2057 ret = 0; 2050 ret = 0;
2058 2051
2059 unlock_kernel(); 2052 up_read(&UDF_I(inode)->i_data_sem);
2060 brelse(epos.bh); 2053 brelse(epos.bh);
2061 2054
2062 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_VARCONV)) 2055 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_VARCONV))
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 6d8dc02baebb..2be0f9eb86d2 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -27,7 +27,6 @@
27#include <linux/errno.h> 27#include <linux/errno.h>
28#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include <linux/smp_lock.h>
31#include <linux/buffer_head.h> 30#include <linux/buffer_head.h>
32#include <linux/sched.h> 31#include <linux/sched.h>
33#include <linux/crc-itu-t.h> 32#include <linux/crc-itu-t.h>
@@ -228,10 +227,8 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
228 } 227 }
229 228
230 if ((cfi->fileCharacteristics & FID_FILE_CHAR_PARENT) && 229 if ((cfi->fileCharacteristics & FID_FILE_CHAR_PARENT) &&
231 isdotdot) { 230 isdotdot)
232 brelse(epos.bh); 231 goto out_ok;
233 return fi;
234 }
235 232
236 if (!lfi) 233 if (!lfi)
237 continue; 234 continue;
@@ -263,7 +260,6 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
263 if (dentry->d_name.len > UDF_NAME_LEN - 2) 260 if (dentry->d_name.len > UDF_NAME_LEN - 2)
264 return ERR_PTR(-ENAMETOOLONG); 261 return ERR_PTR(-ENAMETOOLONG);
265 262
266 lock_kernel();
267#ifdef UDF_RECOVERY 263#ifdef UDF_RECOVERY
268 /* temporary shorthand for specifying files by inode number */ 264 /* temporary shorthand for specifying files by inode number */
269 if (!strncmp(dentry->d_name.name, ".B=", 3)) { 265 if (!strncmp(dentry->d_name.name, ".B=", 3)) {
@@ -275,7 +271,6 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
275 }; 271 };
276 inode = udf_iget(dir->i_sb, lb); 272 inode = udf_iget(dir->i_sb, lb);
277 if (!inode) { 273 if (!inode) {
278 unlock_kernel();
279 return ERR_PTR(-EACCES); 274 return ERR_PTR(-EACCES);
280 } 275 }
281 } else 276 } else
@@ -291,11 +286,9 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
291 loc = lelb_to_cpu(cfi.icb.extLocation); 286 loc = lelb_to_cpu(cfi.icb.extLocation);
292 inode = udf_iget(dir->i_sb, &loc); 287 inode = udf_iget(dir->i_sb, &loc);
293 if (!inode) { 288 if (!inode) {
294 unlock_kernel();
295 return ERR_PTR(-EACCES); 289 return ERR_PTR(-EACCES);
296 } 290 }
297 } 291 }
298 unlock_kernel();
299 292
300 return d_splice_alias(inode, dentry); 293 return d_splice_alias(inode, dentry);
301} 294}
@@ -476,15 +469,19 @@ add:
476 f_pos >> dir->i_sb->s_blocksize_bits, 1, err); 469 f_pos >> dir->i_sb->s_blocksize_bits, 1, err);
477 if (!fibh->ebh) 470 if (!fibh->ebh)
478 goto out_err; 471 goto out_err;
472 /* Extents could have been merged, invalidate our position */
473 brelse(epos.bh);
474 epos.bh = NULL;
475 epos.block = dinfo->i_location;
476 epos.offset = udf_file_entry_alloc_offset(dir);
479 477
480 if (!fibh->soffset) { 478 if (!fibh->soffset) {
481 if (udf_next_aext(dir, &epos, &eloc, &elen, 1) == 479 /* Find the freshly allocated block */
482 (EXT_RECORDED_ALLOCATED >> 30)) { 480 while (udf_next_aext(dir, &epos, &eloc, &elen, 1) ==
483 block = eloc.logicalBlockNum + ((elen - 1) >> 481 (EXT_RECORDED_ALLOCATED >> 30))
482 ;
483 block = eloc.logicalBlockNum + ((elen - 1) >>
484 dir->i_sb->s_blocksize_bits); 484 dir->i_sb->s_blocksize_bits);
485 } else
486 block++;
487
488 brelse(fibh->sbh); 485 brelse(fibh->sbh);
489 fibh->sbh = fibh->ebh; 486 fibh->sbh = fibh->ebh;
490 fi = (struct fileIdentDesc *)(fibh->sbh->b_data); 487 fi = (struct fileIdentDesc *)(fibh->sbh->b_data);
@@ -562,10 +559,8 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode,
562 int err; 559 int err;
563 struct udf_inode_info *iinfo; 560 struct udf_inode_info *iinfo;
564 561
565 lock_kernel();
566 inode = udf_new_inode(dir, mode, &err); 562 inode = udf_new_inode(dir, mode, &err);
567 if (!inode) { 563 if (!inode) {
568 unlock_kernel();
569 return err; 564 return err;
570 } 565 }
571 566
@@ -583,7 +578,6 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode,
583 inode->i_nlink--; 578 inode->i_nlink--;
584 mark_inode_dirty(inode); 579 mark_inode_dirty(inode);
585 iput(inode); 580 iput(inode);
586 unlock_kernel();
587 return err; 581 return err;
588 } 582 }
589 cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); 583 cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
@@ -596,7 +590,6 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode,
596 if (fibh.sbh != fibh.ebh) 590 if (fibh.sbh != fibh.ebh)
597 brelse(fibh.ebh); 591 brelse(fibh.ebh);
598 brelse(fibh.sbh); 592 brelse(fibh.sbh);
599 unlock_kernel();
600 d_instantiate(dentry, inode); 593 d_instantiate(dentry, inode);
601 594
602 return 0; 595 return 0;
@@ -614,7 +607,6 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode,
614 if (!old_valid_dev(rdev)) 607 if (!old_valid_dev(rdev))
615 return -EINVAL; 608 return -EINVAL;
616 609
617 lock_kernel();
618 err = -EIO; 610 err = -EIO;
619 inode = udf_new_inode(dir, mode, &err); 611 inode = udf_new_inode(dir, mode, &err);
620 if (!inode) 612 if (!inode)
@@ -627,7 +619,6 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode,
627 inode->i_nlink--; 619 inode->i_nlink--;
628 mark_inode_dirty(inode); 620 mark_inode_dirty(inode);
629 iput(inode); 621 iput(inode);
630 unlock_kernel();
631 return err; 622 return err;
632 } 623 }
633 cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); 624 cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
@@ -646,7 +637,6 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode,
646 err = 0; 637 err = 0;
647 638
648out: 639out:
649 unlock_kernel();
650 return err; 640 return err;
651} 641}
652 642
@@ -659,7 +649,6 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
659 struct udf_inode_info *dinfo = UDF_I(dir); 649 struct udf_inode_info *dinfo = UDF_I(dir);
660 struct udf_inode_info *iinfo; 650 struct udf_inode_info *iinfo;
661 651
662 lock_kernel();
663 err = -EMLINK; 652 err = -EMLINK;
664 if (dir->i_nlink >= (256 << sizeof(dir->i_nlink)) - 1) 653 if (dir->i_nlink >= (256 << sizeof(dir->i_nlink)) - 1)
665 goto out; 654 goto out;
@@ -712,7 +701,6 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
712 err = 0; 701 err = 0;
713 702
714out: 703out:
715 unlock_kernel();
716 return err; 704 return err;
717} 705}
718 706
@@ -794,7 +782,6 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry)
794 struct kernel_lb_addr tloc; 782 struct kernel_lb_addr tloc;
795 783
796 retval = -ENOENT; 784 retval = -ENOENT;
797 lock_kernel();
798 fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi); 785 fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
799 if (!fi) 786 if (!fi)
800 goto out; 787 goto out;
@@ -826,7 +813,6 @@ end_rmdir:
826 brelse(fibh.sbh); 813 brelse(fibh.sbh);
827 814
828out: 815out:
829 unlock_kernel();
830 return retval; 816 return retval;
831} 817}
832 818
@@ -840,7 +826,6 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry)
840 struct kernel_lb_addr tloc; 826 struct kernel_lb_addr tloc;
841 827
842 retval = -ENOENT; 828 retval = -ENOENT;
843 lock_kernel();
844 fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi); 829 fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
845 if (!fi) 830 if (!fi)
846 goto out; 831 goto out;
@@ -870,7 +855,6 @@ end_unlink:
870 brelse(fibh.sbh); 855 brelse(fibh.sbh);
871 856
872out: 857out:
873 unlock_kernel();
874 return retval; 858 return retval;
875} 859}
876 860
@@ -890,21 +874,21 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
890 int block; 874 int block;
891 unsigned char *name = NULL; 875 unsigned char *name = NULL;
892 int namelen; 876 int namelen;
893 struct buffer_head *bh;
894 struct udf_inode_info *iinfo; 877 struct udf_inode_info *iinfo;
878 struct super_block *sb = dir->i_sb;
895 879
896 lock_kernel();
897 inode = udf_new_inode(dir, S_IFLNK | S_IRWXUGO, &err); 880 inode = udf_new_inode(dir, S_IFLNK | S_IRWXUGO, &err);
898 if (!inode) 881 if (!inode)
899 goto out; 882 goto out;
900 883
884 iinfo = UDF_I(inode);
885 down_write(&iinfo->i_data_sem);
901 name = kmalloc(UDF_NAME_LEN, GFP_NOFS); 886 name = kmalloc(UDF_NAME_LEN, GFP_NOFS);
902 if (!name) { 887 if (!name) {
903 err = -ENOMEM; 888 err = -ENOMEM;
904 goto out_no_entry; 889 goto out_no_entry;
905 } 890 }
906 891
907 iinfo = UDF_I(inode);
908 inode->i_data.a_ops = &udf_symlink_aops; 892 inode->i_data.a_ops = &udf_symlink_aops;
909 inode->i_op = &udf_symlink_inode_operations; 893 inode->i_op = &udf_symlink_inode_operations;
910 894
@@ -912,7 +896,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
912 struct kernel_lb_addr eloc; 896 struct kernel_lb_addr eloc;
913 uint32_t bsize; 897 uint32_t bsize;
914 898
915 block = udf_new_block(inode->i_sb, inode, 899 block = udf_new_block(sb, inode,
916 iinfo->i_location.partitionReferenceNum, 900 iinfo->i_location.partitionReferenceNum,
917 iinfo->i_location.logicalBlockNum, &err); 901 iinfo->i_location.logicalBlockNum, &err);
918 if (!block) 902 if (!block)
@@ -923,17 +907,17 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
923 eloc.logicalBlockNum = block; 907 eloc.logicalBlockNum = block;
924 eloc.partitionReferenceNum = 908 eloc.partitionReferenceNum =
925 iinfo->i_location.partitionReferenceNum; 909 iinfo->i_location.partitionReferenceNum;
926 bsize = inode->i_sb->s_blocksize; 910 bsize = sb->s_blocksize;
927 iinfo->i_lenExtents = bsize; 911 iinfo->i_lenExtents = bsize;
928 udf_add_aext(inode, &epos, &eloc, bsize, 0); 912 udf_add_aext(inode, &epos, &eloc, bsize, 0);
929 brelse(epos.bh); 913 brelse(epos.bh);
930 914
931 block = udf_get_pblock(inode->i_sb, block, 915 block = udf_get_pblock(sb, block,
932 iinfo->i_location.partitionReferenceNum, 916 iinfo->i_location.partitionReferenceNum,
933 0); 917 0);
934 epos.bh = udf_tgetblk(inode->i_sb, block); 918 epos.bh = udf_tgetblk(sb, block);
935 lock_buffer(epos.bh); 919 lock_buffer(epos.bh);
936 memset(epos.bh->b_data, 0x00, inode->i_sb->s_blocksize); 920 memset(epos.bh->b_data, 0x00, bsize);
937 set_buffer_uptodate(epos.bh); 921 set_buffer_uptodate(epos.bh);
938 unlock_buffer(epos.bh); 922 unlock_buffer(epos.bh);
939 mark_buffer_dirty_inode(epos.bh, inode); 923 mark_buffer_dirty_inode(epos.bh, inode);
@@ -941,7 +925,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
941 } else 925 } else
942 ea = iinfo->i_ext.i_data + iinfo->i_lenEAttr; 926 ea = iinfo->i_ext.i_data + iinfo->i_lenEAttr;
943 927
944 eoffset = inode->i_sb->s_blocksize - udf_ext0_offset(inode); 928 eoffset = sb->s_blocksize - udf_ext0_offset(inode);
945 pc = (struct pathComponent *)ea; 929 pc = (struct pathComponent *)ea;
946 930
947 if (*symname == '/') { 931 if (*symname == '/') {
@@ -981,7 +965,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
981 } 965 }
982 966
983 if (pc->componentType == 5) { 967 if (pc->componentType == 5) {
984 namelen = udf_put_filename(inode->i_sb, compstart, name, 968 namelen = udf_put_filename(sb, compstart, name,
985 symname - compstart); 969 symname - compstart);
986 if (!namelen) 970 if (!namelen)
987 goto out_no_entry; 971 goto out_no_entry;
@@ -1015,27 +999,16 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
1015 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); 999 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
1016 if (!fi) 1000 if (!fi)
1017 goto out_no_entry; 1001 goto out_no_entry;
1018 cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); 1002 cfi.icb.extLength = cpu_to_le32(sb->s_blocksize);
1019 cfi.icb.extLocation = cpu_to_lelb(iinfo->i_location); 1003 cfi.icb.extLocation = cpu_to_lelb(iinfo->i_location);
1020 bh = UDF_SB(inode->i_sb)->s_lvid_bh; 1004 if (UDF_SB(inode->i_sb)->s_lvid_bh) {
1021 if (bh) {
1022 struct logicalVolIntegrityDesc *lvid =
1023 (struct logicalVolIntegrityDesc *)bh->b_data;
1024 struct logicalVolHeaderDesc *lvhd;
1025 uint64_t uniqueID;
1026 lvhd = (struct logicalVolHeaderDesc *)
1027 lvid->logicalVolContentsUse;
1028 uniqueID = le64_to_cpu(lvhd->uniqueID);
1029 *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse = 1005 *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse =
1030 cpu_to_le32(uniqueID & 0x00000000FFFFFFFFUL); 1006 cpu_to_le32(lvid_get_unique_id(sb));
1031 if (!(++uniqueID & 0x00000000FFFFFFFFUL))
1032 uniqueID += 16;
1033 lvhd->uniqueID = cpu_to_le64(uniqueID);
1034 mark_buffer_dirty(bh);
1035 } 1007 }
1036 udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); 1008 udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL);
1037 if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) 1009 if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
1038 mark_inode_dirty(dir); 1010 mark_inode_dirty(dir);
1011 up_write(&iinfo->i_data_sem);
1039 if (fibh.sbh != fibh.ebh) 1012 if (fibh.sbh != fibh.ebh)
1040 brelse(fibh.ebh); 1013 brelse(fibh.ebh);
1041 brelse(fibh.sbh); 1014 brelse(fibh.sbh);
@@ -1044,10 +1017,10 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
1044 1017
1045out: 1018out:
1046 kfree(name); 1019 kfree(name);
1047 unlock_kernel();
1048 return err; 1020 return err;
1049 1021
1050out_no_entry: 1022out_no_entry:
1023 up_write(&iinfo->i_data_sem);
1051 inode_dec_link_count(inode); 1024 inode_dec_link_count(inode);
1052 iput(inode); 1025 iput(inode);
1053 goto out; 1026 goto out;
@@ -1060,36 +1033,20 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
1060 struct udf_fileident_bh fibh; 1033 struct udf_fileident_bh fibh;
1061 struct fileIdentDesc cfi, *fi; 1034 struct fileIdentDesc cfi, *fi;
1062 int err; 1035 int err;
1063 struct buffer_head *bh;
1064 1036
1065 lock_kernel();
1066 if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) { 1037 if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) {
1067 unlock_kernel();
1068 return -EMLINK; 1038 return -EMLINK;
1069 } 1039 }
1070 1040
1071 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); 1041 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
1072 if (!fi) { 1042 if (!fi) {
1073 unlock_kernel();
1074 return err; 1043 return err;
1075 } 1044 }
1076 cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); 1045 cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
1077 cfi.icb.extLocation = cpu_to_lelb(UDF_I(inode)->i_location); 1046 cfi.icb.extLocation = cpu_to_lelb(UDF_I(inode)->i_location);
1078 bh = UDF_SB(inode->i_sb)->s_lvid_bh; 1047 if (UDF_SB(inode->i_sb)->s_lvid_bh) {
1079 if (bh) {
1080 struct logicalVolIntegrityDesc *lvid =
1081 (struct logicalVolIntegrityDesc *)bh->b_data;
1082 struct logicalVolHeaderDesc *lvhd;
1083 uint64_t uniqueID;
1084 lvhd = (struct logicalVolHeaderDesc *)
1085 (lvid->logicalVolContentsUse);
1086 uniqueID = le64_to_cpu(lvhd->uniqueID);
1087 *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse = 1048 *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse =
1088 cpu_to_le32(uniqueID & 0x00000000FFFFFFFFUL); 1049 cpu_to_le32(lvid_get_unique_id(inode->i_sb));
1089 if (!(++uniqueID & 0x00000000FFFFFFFFUL))
1090 uniqueID += 16;
1091 lvhd->uniqueID = cpu_to_le64(uniqueID);
1092 mark_buffer_dirty(bh);
1093 } 1050 }
1094 udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); 1051 udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL);
1095 if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) 1052 if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
@@ -1103,7 +1060,6 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
1103 mark_inode_dirty(inode); 1060 mark_inode_dirty(inode);
1104 ihold(inode); 1061 ihold(inode);
1105 d_instantiate(dentry, inode); 1062 d_instantiate(dentry, inode);
1106 unlock_kernel();
1107 1063
1108 return 0; 1064 return 0;
1109} 1065}
@@ -1124,7 +1080,6 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
1124 struct kernel_lb_addr tloc; 1080 struct kernel_lb_addr tloc;
1125 struct udf_inode_info *old_iinfo = UDF_I(old_inode); 1081 struct udf_inode_info *old_iinfo = UDF_I(old_inode);
1126 1082
1127 lock_kernel();
1128 ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi); 1083 ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi);
1129 if (ofi) { 1084 if (ofi) {
1130 if (ofibh.sbh != ofibh.ebh) 1085 if (ofibh.sbh != ofibh.ebh)
@@ -1248,7 +1203,6 @@ end_rename:
1248 brelse(nfibh.ebh); 1203 brelse(nfibh.ebh);
1249 brelse(nfibh.sbh); 1204 brelse(nfibh.sbh);
1250 } 1205 }
1251 unlock_kernel();
1252 1206
1253 return retval; 1207 return retval;
1254} 1208}
@@ -1261,7 +1215,6 @@ static struct dentry *udf_get_parent(struct dentry *child)
1261 struct fileIdentDesc cfi; 1215 struct fileIdentDesc cfi;
1262 struct udf_fileident_bh fibh; 1216 struct udf_fileident_bh fibh;
1263 1217
1264 lock_kernel();
1265 if (!udf_find_entry(child->d_inode, &dotdot, &fibh, &cfi)) 1218 if (!udf_find_entry(child->d_inode, &dotdot, &fibh, &cfi))
1266 goto out_unlock; 1219 goto out_unlock;
1267 1220
@@ -1273,11 +1226,9 @@ static struct dentry *udf_get_parent(struct dentry *child)
1273 inode = udf_iget(child->d_inode->i_sb, &tloc); 1226 inode = udf_iget(child->d_inode->i_sb, &tloc);
1274 if (!inode) 1227 if (!inode)
1275 goto out_unlock; 1228 goto out_unlock;
1276 unlock_kernel();
1277 1229
1278 return d_obtain_alias(inode); 1230 return d_obtain_alias(inode);
1279out_unlock: 1231out_unlock:
1280 unlock_kernel();
1281 return ERR_PTR(-EACCES); 1232 return ERR_PTR(-EACCES);
1282} 1233}
1283 1234
diff --git a/fs/udf/partition.c b/fs/udf/partition.c
index 745eb209be0c..a71090ea0e07 100644
--- a/fs/udf/partition.c
+++ b/fs/udf/partition.c
@@ -25,6 +25,7 @@
25#include <linux/fs.h> 25#include <linux/fs.h>
26#include <linux/string.h> 26#include <linux/string.h>
27#include <linux/buffer_head.h> 27#include <linux/buffer_head.h>
28#include <linux/mutex.h>
28 29
29uint32_t udf_get_pblock(struct super_block *sb, uint32_t block, 30uint32_t udf_get_pblock(struct super_block *sb, uint32_t block,
30 uint16_t partition, uint32_t offset) 31 uint16_t partition, uint32_t offset)
@@ -159,7 +160,9 @@ int udf_relocate_blocks(struct super_block *sb, long old_block, long *new_block)
159 struct udf_sb_info *sbi = UDF_SB(sb); 160 struct udf_sb_info *sbi = UDF_SB(sb);
160 u16 reallocationTableLen; 161 u16 reallocationTableLen;
161 struct buffer_head *bh; 162 struct buffer_head *bh;
163 int ret = 0;
162 164
165 mutex_lock(&sbi->s_alloc_mutex);
163 for (i = 0; i < sbi->s_partitions; i++) { 166 for (i = 0; i < sbi->s_partitions; i++) {
164 struct udf_part_map *map = &sbi->s_partmaps[i]; 167 struct udf_part_map *map = &sbi->s_partmaps[i];
165 if (old_block > map->s_partition_root && 168 if (old_block > map->s_partition_root &&
@@ -175,8 +178,10 @@ int udf_relocate_blocks(struct super_block *sb, long old_block, long *new_block)
175 break; 178 break;
176 } 179 }
177 180
178 if (!st) 181 if (!st) {
179 return 1; 182 ret = 1;
183 goto out;
184 }
180 185
181 reallocationTableLen = 186 reallocationTableLen =
182 le16_to_cpu(st->reallocationTableLen); 187 le16_to_cpu(st->reallocationTableLen);
@@ -207,14 +212,16 @@ int udf_relocate_blocks(struct super_block *sb, long old_block, long *new_block)
207 ((old_block - 212 ((old_block -
208 map->s_partition_root) & 213 map->s_partition_root) &
209 (sdata->s_packet_len - 1)); 214 (sdata->s_packet_len - 1));
210 return 0; 215 ret = 0;
216 goto out;
211 } else if (origLoc == packet) { 217 } else if (origLoc == packet) {
212 *new_block = le32_to_cpu( 218 *new_block = le32_to_cpu(
213 entry->mappedLocation) + 219 entry->mappedLocation) +
214 ((old_block - 220 ((old_block -
215 map->s_partition_root) & 221 map->s_partition_root) &
216 (sdata->s_packet_len - 1)); 222 (sdata->s_packet_len - 1));
217 return 0; 223 ret = 0;
224 goto out;
218 } else if (origLoc > packet) 225 } else if (origLoc > packet)
219 break; 226 break;
220 } 227 }
@@ -251,20 +258,24 @@ int udf_relocate_blocks(struct super_block *sb, long old_block, long *new_block)
251 st->mapEntry[k].mappedLocation) + 258 st->mapEntry[k].mappedLocation) +
252 ((old_block - map->s_partition_root) & 259 ((old_block - map->s_partition_root) &
253 (sdata->s_packet_len - 1)); 260 (sdata->s_packet_len - 1));
254 return 0; 261 ret = 0;
262 goto out;
255 } 263 }
256 264
257 return 1; 265 ret = 1;
266 goto out;
258 } /* if old_block */ 267 } /* if old_block */
259 } 268 }
260 269
261 if (i == sbi->s_partitions) { 270 if (i == sbi->s_partitions) {
262 /* outside of partitions */ 271 /* outside of partitions */
263 /* for now, fail =) */ 272 /* for now, fail =) */
264 return 1; 273 ret = 1;
265 } 274 }
266 275
267 return 0; 276out:
277 mutex_unlock(&sbi->s_alloc_mutex);
278 return ret;
268} 279}
269 280
270static uint32_t udf_try_read_meta(struct inode *inode, uint32_t block, 281static uint32_t udf_try_read_meta(struct inode *inode, uint32_t block,
diff --git a/fs/udf/super.c b/fs/udf/super.c
index b539d53320fb..7b27b063ff6d 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -48,7 +48,6 @@
48#include <linux/stat.h> 48#include <linux/stat.h>
49#include <linux/cdrom.h> 49#include <linux/cdrom.h>
50#include <linux/nls.h> 50#include <linux/nls.h>
51#include <linux/smp_lock.h>
52#include <linux/buffer_head.h> 51#include <linux/buffer_head.h>
53#include <linux/vfs.h> 52#include <linux/vfs.h>
54#include <linux/vmalloc.h> 53#include <linux/vmalloc.h>
@@ -135,6 +134,7 @@ static struct inode *udf_alloc_inode(struct super_block *sb)
135 ei->i_next_alloc_block = 0; 134 ei->i_next_alloc_block = 0;
136 ei->i_next_alloc_goal = 0; 135 ei->i_next_alloc_goal = 0;
137 ei->i_strat4096 = 0; 136 ei->i_strat4096 = 0;
137 init_rwsem(&ei->i_data_sem);
138 138
139 return &ei->vfs_inode; 139 return &ei->vfs_inode;
140} 140}
@@ -574,13 +574,14 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
574 if (!udf_parse_options(options, &uopt, true)) 574 if (!udf_parse_options(options, &uopt, true))
575 return -EINVAL; 575 return -EINVAL;
576 576
577 lock_kernel(); 577 write_lock(&sbi->s_cred_lock);
578 sbi->s_flags = uopt.flags; 578 sbi->s_flags = uopt.flags;
579 sbi->s_uid = uopt.uid; 579 sbi->s_uid = uopt.uid;
580 sbi->s_gid = uopt.gid; 580 sbi->s_gid = uopt.gid;
581 sbi->s_umask = uopt.umask; 581 sbi->s_umask = uopt.umask;
582 sbi->s_fmode = uopt.fmode; 582 sbi->s_fmode = uopt.fmode;
583 sbi->s_dmode = uopt.dmode; 583 sbi->s_dmode = uopt.dmode;
584 write_unlock(&sbi->s_cred_lock);
584 585
585 if (sbi->s_lvid_bh) { 586 if (sbi->s_lvid_bh) {
586 int write_rev = le16_to_cpu(udf_sb_lvidiu(sbi)->minUDFWriteRev); 587 int write_rev = le16_to_cpu(udf_sb_lvidiu(sbi)->minUDFWriteRev);
@@ -597,7 +598,6 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
597 udf_open_lvid(sb); 598 udf_open_lvid(sb);
598 599
599out_unlock: 600out_unlock:
600 unlock_kernel();
601 return error; 601 return error;
602} 602}
603 603
@@ -966,9 +966,9 @@ static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index)
966 (sizeof(struct buffer_head *) * nr_groups); 966 (sizeof(struct buffer_head *) * nr_groups);
967 967
968 if (size <= PAGE_SIZE) 968 if (size <= PAGE_SIZE)
969 bitmap = kmalloc(size, GFP_KERNEL); 969 bitmap = kzalloc(size, GFP_KERNEL);
970 else 970 else
971 bitmap = vmalloc(size); /* TODO: get rid of vmalloc */ 971 bitmap = vzalloc(size); /* TODO: get rid of vzalloc */
972 972
973 if (bitmap == NULL) { 973 if (bitmap == NULL) {
974 udf_error(sb, __func__, 974 udf_error(sb, __func__,
@@ -977,7 +977,6 @@ static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index)
977 return NULL; 977 return NULL;
978 } 978 }
979 979
980 memset(bitmap, 0x00, size);
981 bitmap->s_block_bitmap = (struct buffer_head **)(bitmap + 1); 980 bitmap->s_block_bitmap = (struct buffer_head **)(bitmap + 1);
982 bitmap->s_nr_groups = nr_groups; 981 bitmap->s_nr_groups = nr_groups;
983 return bitmap; 982 return bitmap;
@@ -1781,6 +1780,8 @@ static void udf_open_lvid(struct super_block *sb)
1781 1780
1782 if (!bh) 1781 if (!bh)
1783 return; 1782 return;
1783
1784 mutex_lock(&sbi->s_alloc_mutex);
1784 lvid = (struct logicalVolIntegrityDesc *)bh->b_data; 1785 lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
1785 lvidiu = udf_sb_lvidiu(sbi); 1786 lvidiu = udf_sb_lvidiu(sbi);
1786 1787
@@ -1797,6 +1798,7 @@ static void udf_open_lvid(struct super_block *sb)
1797 lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag); 1798 lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag);
1798 mark_buffer_dirty(bh); 1799 mark_buffer_dirty(bh);
1799 sbi->s_lvid_dirty = 0; 1800 sbi->s_lvid_dirty = 0;
1801 mutex_unlock(&sbi->s_alloc_mutex);
1800} 1802}
1801 1803
1802static void udf_close_lvid(struct super_block *sb) 1804static void udf_close_lvid(struct super_block *sb)
@@ -1809,6 +1811,7 @@ static void udf_close_lvid(struct super_block *sb)
1809 if (!bh) 1811 if (!bh)
1810 return; 1812 return;
1811 1813
1814 mutex_lock(&sbi->s_alloc_mutex);
1812 lvid = (struct logicalVolIntegrityDesc *)bh->b_data; 1815 lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
1813 lvidiu = udf_sb_lvidiu(sbi); 1816 lvidiu = udf_sb_lvidiu(sbi);
1814 lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; 1817 lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
@@ -1829,6 +1832,34 @@ static void udf_close_lvid(struct super_block *sb)
1829 lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag); 1832 lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag);
1830 mark_buffer_dirty(bh); 1833 mark_buffer_dirty(bh);
1831 sbi->s_lvid_dirty = 0; 1834 sbi->s_lvid_dirty = 0;
1835 mutex_unlock(&sbi->s_alloc_mutex);
1836}
1837
1838u64 lvid_get_unique_id(struct super_block *sb)
1839{
1840 struct buffer_head *bh;
1841 struct udf_sb_info *sbi = UDF_SB(sb);
1842 struct logicalVolIntegrityDesc *lvid;
1843 struct logicalVolHeaderDesc *lvhd;
1844 u64 uniqueID;
1845 u64 ret;
1846
1847 bh = sbi->s_lvid_bh;
1848 if (!bh)
1849 return 0;
1850
1851 lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
1852 lvhd = (struct logicalVolHeaderDesc *)lvid->logicalVolContentsUse;
1853
1854 mutex_lock(&sbi->s_alloc_mutex);
1855 ret = uniqueID = le64_to_cpu(lvhd->uniqueID);
1856 if (!(++uniqueID & 0xFFFFFFFF))
1857 uniqueID += 16;
1858 lvhd->uniqueID = cpu_to_le64(uniqueID);
1859 mutex_unlock(&sbi->s_alloc_mutex);
1860 mark_buffer_dirty(bh);
1861
1862 return ret;
1832} 1863}
1833 1864
1834static void udf_sb_free_bitmap(struct udf_bitmap *bitmap) 1865static void udf_sb_free_bitmap(struct udf_bitmap *bitmap)
@@ -1886,8 +1917,6 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
1886 struct kernel_lb_addr rootdir, fileset; 1917 struct kernel_lb_addr rootdir, fileset;
1887 struct udf_sb_info *sbi; 1918 struct udf_sb_info *sbi;
1888 1919
1889 lock_kernel();
1890
1891 uopt.flags = (1 << UDF_FLAG_USE_AD_IN_ICB) | (1 << UDF_FLAG_STRICT); 1920 uopt.flags = (1 << UDF_FLAG_USE_AD_IN_ICB) | (1 << UDF_FLAG_STRICT);
1892 uopt.uid = -1; 1921 uopt.uid = -1;
1893 uopt.gid = -1; 1922 uopt.gid = -1;
@@ -1896,10 +1925,8 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
1896 uopt.dmode = UDF_INVALID_MODE; 1925 uopt.dmode = UDF_INVALID_MODE;
1897 1926
1898 sbi = kzalloc(sizeof(struct udf_sb_info), GFP_KERNEL); 1927 sbi = kzalloc(sizeof(struct udf_sb_info), GFP_KERNEL);
1899 if (!sbi) { 1928 if (!sbi)
1900 unlock_kernel();
1901 return -ENOMEM; 1929 return -ENOMEM;
1902 }
1903 1930
1904 sb->s_fs_info = sbi; 1931 sb->s_fs_info = sbi;
1905 1932
@@ -1936,6 +1963,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
1936 sbi->s_fmode = uopt.fmode; 1963 sbi->s_fmode = uopt.fmode;
1937 sbi->s_dmode = uopt.dmode; 1964 sbi->s_dmode = uopt.dmode;
1938 sbi->s_nls_map = uopt.nls_map; 1965 sbi->s_nls_map = uopt.nls_map;
1966 rwlock_init(&sbi->s_cred_lock);
1939 1967
1940 if (uopt.session == 0xFFFFFFFF) 1968 if (uopt.session == 0xFFFFFFFF)
1941 sbi->s_session = udf_get_last_session(sb); 1969 sbi->s_session = udf_get_last_session(sb);
@@ -2045,7 +2073,6 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
2045 goto error_out; 2073 goto error_out;
2046 } 2074 }
2047 sb->s_maxbytes = MAX_LFS_FILESIZE; 2075 sb->s_maxbytes = MAX_LFS_FILESIZE;
2048 unlock_kernel();
2049 return 0; 2076 return 0;
2050 2077
2051error_out: 2078error_out:
@@ -2066,7 +2093,6 @@ error_out:
2066 kfree(sbi); 2093 kfree(sbi);
2067 sb->s_fs_info = NULL; 2094 sb->s_fs_info = NULL;
2068 2095
2069 unlock_kernel();
2070 return -EINVAL; 2096 return -EINVAL;
2071} 2097}
2072 2098
@@ -2105,8 +2131,6 @@ static void udf_put_super(struct super_block *sb)
2105 2131
2106 sbi = UDF_SB(sb); 2132 sbi = UDF_SB(sb);
2107 2133
2108 lock_kernel();
2109
2110 if (sbi->s_vat_inode) 2134 if (sbi->s_vat_inode)
2111 iput(sbi->s_vat_inode); 2135 iput(sbi->s_vat_inode);
2112 if (sbi->s_partitions) 2136 if (sbi->s_partitions)
@@ -2122,8 +2146,6 @@ static void udf_put_super(struct super_block *sb)
2122 kfree(sbi->s_partmaps); 2146 kfree(sbi->s_partmaps);
2123 kfree(sb->s_fs_info); 2147 kfree(sb->s_fs_info);
2124 sb->s_fs_info = NULL; 2148 sb->s_fs_info = NULL;
2125
2126 unlock_kernel();
2127} 2149}
2128 2150
2129static int udf_sync_fs(struct super_block *sb, int wait) 2151static int udf_sync_fs(struct super_block *sb, int wait)
@@ -2186,8 +2208,6 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb,
2186 uint16_t ident; 2208 uint16_t ident;
2187 struct spaceBitmapDesc *bm; 2209 struct spaceBitmapDesc *bm;
2188 2210
2189 lock_kernel();
2190
2191 loc.logicalBlockNum = bitmap->s_extPosition; 2211 loc.logicalBlockNum = bitmap->s_extPosition;
2192 loc.partitionReferenceNum = UDF_SB(sb)->s_partition; 2212 loc.partitionReferenceNum = UDF_SB(sb)->s_partition;
2193 bh = udf_read_ptagged(sb, &loc, 0, &ident); 2213 bh = udf_read_ptagged(sb, &loc, 0, &ident);
@@ -2224,10 +2244,7 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb,
2224 } 2244 }
2225 } 2245 }
2226 brelse(bh); 2246 brelse(bh);
2227
2228out: 2247out:
2229 unlock_kernel();
2230
2231 return accum; 2248 return accum;
2232} 2249}
2233 2250
@@ -2240,8 +2257,7 @@ static unsigned int udf_count_free_table(struct super_block *sb,
2240 int8_t etype; 2257 int8_t etype;
2241 struct extent_position epos; 2258 struct extent_position epos;
2242 2259
2243 lock_kernel(); 2260 mutex_lock(&UDF_SB(sb)->s_alloc_mutex);
2244
2245 epos.block = UDF_I(table)->i_location; 2261 epos.block = UDF_I(table)->i_location;
2246 epos.offset = sizeof(struct unallocSpaceEntry); 2262 epos.offset = sizeof(struct unallocSpaceEntry);
2247 epos.bh = NULL; 2263 epos.bh = NULL;
@@ -2250,8 +2266,7 @@ static unsigned int udf_count_free_table(struct super_block *sb,
2250 accum += (elen >> table->i_sb->s_blocksize_bits); 2266 accum += (elen >> table->i_sb->s_blocksize_bits);
2251 2267
2252 brelse(epos.bh); 2268 brelse(epos.bh);
2253 2269 mutex_unlock(&UDF_SB(sb)->s_alloc_mutex);
2254 unlock_kernel();
2255 2270
2256 return accum; 2271 return accum;
2257} 2272}
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index 16064787d2b7..b1d4488b0f14 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -27,7 +27,6 @@
27#include <linux/mm.h> 27#include <linux/mm.h>
28#include <linux/stat.h> 28#include <linux/stat.h>
29#include <linux/pagemap.h> 29#include <linux/pagemap.h>
30#include <linux/smp_lock.h>
31#include <linux/buffer_head.h> 30#include <linux/buffer_head.h>
32#include "udf_i.h" 31#include "udf_i.h"
33 32
@@ -78,13 +77,16 @@ static int udf_symlink_filler(struct file *file, struct page *page)
78 int err = -EIO; 77 int err = -EIO;
79 unsigned char *p = kmap(page); 78 unsigned char *p = kmap(page);
80 struct udf_inode_info *iinfo; 79 struct udf_inode_info *iinfo;
80 uint32_t pos;
81 81
82 lock_kernel();
83 iinfo = UDF_I(inode); 82 iinfo = UDF_I(inode);
83 pos = udf_block_map(inode, 0);
84
85 down_read(&iinfo->i_data_sem);
84 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { 86 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
85 symlink = iinfo->i_ext.i_data + iinfo->i_lenEAttr; 87 symlink = iinfo->i_ext.i_data + iinfo->i_lenEAttr;
86 } else { 88 } else {
87 bh = sb_bread(inode->i_sb, udf_block_map(inode, 0)); 89 bh = sb_bread(inode->i_sb, pos);
88 90
89 if (!bh) 91 if (!bh)
90 goto out; 92 goto out;
@@ -95,14 +97,14 @@ static int udf_symlink_filler(struct file *file, struct page *page)
95 udf_pc_to_char(inode->i_sb, symlink, inode->i_size, p); 97 udf_pc_to_char(inode->i_sb, symlink, inode->i_size, p);
96 brelse(bh); 98 brelse(bh);
97 99
98 unlock_kernel(); 100 up_read(&iinfo->i_data_sem);
99 SetPageUptodate(page); 101 SetPageUptodate(page);
100 kunmap(page); 102 kunmap(page);
101 unlock_page(page); 103 unlock_page(page);
102 return 0; 104 return 0;
103 105
104out: 106out:
105 unlock_kernel(); 107 up_read(&iinfo->i_data_sem);
106 SetPageError(page); 108 SetPageError(page);
107 kunmap(page); 109 kunmap(page);
108 unlock_page(page); 110 unlock_page(page);
diff --git a/fs/udf/udf_i.h b/fs/udf/udf_i.h
index e58d1de41073..d1bd31ea724e 100644
--- a/fs/udf/udf_i.h
+++ b/fs/udf/udf_i.h
@@ -1,6 +1,18 @@
1#ifndef _UDF_I_H 1#ifndef _UDF_I_H
2#define _UDF_I_H 2#define _UDF_I_H
3 3
4/*
5 * The i_data_sem and i_mutex serve for protection of allocation information
6 * of a regular files and symlinks. This includes all extents belonging to
7 * the file/symlink, a fact whether data are in-inode or in external data
8 * blocks, preallocation, goal block information... When extents are read,
9 * i_mutex or i_data_sem must be held (for reading is enough in case of
10 * i_data_sem). When extents are changed, i_data_sem must be held for writing
11 * and also i_mutex must be held.
12 *
13 * For directories i_mutex is used for all the necessary protection.
14 */
15
4struct udf_inode_info { 16struct udf_inode_info {
5 struct timespec i_crtime; 17 struct timespec i_crtime;
6 /* Physical address of inode */ 18 /* Physical address of inode */
@@ -21,6 +33,7 @@ struct udf_inode_info {
21 struct long_ad *i_lad; 33 struct long_ad *i_lad;
22 __u8 *i_data; 34 __u8 *i_data;
23 } i_ext; 35 } i_ext;
36 struct rw_semaphore i_data_sem;
24 struct inode vfs_inode; 37 struct inode vfs_inode;
25}; 38};
26 39
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index d113b72c2768..4858c191242b 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -2,6 +2,7 @@
2#define __LINUX_UDF_SB_H 2#define __LINUX_UDF_SB_H
3 3
4#include <linux/mutex.h> 4#include <linux/mutex.h>
5#include <linux/bitops.h>
5 6
6/* Since UDF 2.01 is ISO 13346 based... */ 7/* Since UDF 2.01 is ISO 13346 based... */
7#define UDF_SUPER_MAGIC 0x15013346 8#define UDF_SUPER_MAGIC 0x15013346
@@ -128,6 +129,8 @@ struct udf_sb_info {
128 uid_t s_uid; 129 uid_t s_uid;
129 mode_t s_fmode; 130 mode_t s_fmode;
130 mode_t s_dmode; 131 mode_t s_dmode;
132 /* Lock protecting consistency of above permission settings */
133 rwlock_t s_cred_lock;
131 134
132 /* Root Info */ 135 /* Root Info */
133 struct timespec s_record_time; 136 struct timespec s_record_time;
@@ -139,7 +142,7 @@ struct udf_sb_info {
139 __u16 s_udfrev; 142 __u16 s_udfrev;
140 143
141 /* Miscellaneous flags */ 144 /* Miscellaneous flags */
142 __u32 s_flags; 145 unsigned long s_flags;
143 146
144 /* Encoding info */ 147 /* Encoding info */
145 struct nls_table *s_nls_map; 148 struct nls_table *s_nls_map;
@@ -161,8 +164,19 @@ struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi);
161 164
162int udf_compute_nr_groups(struct super_block *sb, u32 partition); 165int udf_compute_nr_groups(struct super_block *sb, u32 partition);
163 166
164#define UDF_QUERY_FLAG(X,Y) ( UDF_SB(X)->s_flags & ( 1 << (Y) ) ) 167static inline int UDF_QUERY_FLAG(struct super_block *sb, int flag)
165#define UDF_SET_FLAG(X,Y) ( UDF_SB(X)->s_flags |= ( 1 << (Y) ) ) 168{
166#define UDF_CLEAR_FLAG(X,Y) ( UDF_SB(X)->s_flags &= ~( 1 << (Y) ) ) 169 return test_bit(flag, &UDF_SB(sb)->s_flags);
170}
171
172static inline void UDF_SET_FLAG(struct super_block *sb, int flag)
173{
174 set_bit(flag, &UDF_SB(sb)->s_flags);
175}
176
177static inline void UDF_CLEAR_FLAG(struct super_block *sb, int flag)
178{
179 clear_bit(flag, &UDF_SB(sb)->s_flags);
180}
167 181
168#endif /* __LINUX_UDF_SB_H */ 182#endif /* __LINUX_UDF_SB_H */
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 6995ab1f4305..eba48209f9f3 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -111,6 +111,8 @@ struct extent_position {
111}; 111};
112 112
113/* super.c */ 113/* super.c */
114
115__attribute__((format(printf, 3, 4)))
114extern void udf_warning(struct super_block *, const char *, const char *, ...); 116extern void udf_warning(struct super_block *, const char *, const char *, ...);
115static inline void udf_updated_lvid(struct super_block *sb) 117static inline void udf_updated_lvid(struct super_block *sb)
116{ 118{
@@ -123,6 +125,7 @@ static inline void udf_updated_lvid(struct super_block *sb)
123 sb->s_dirt = 1; 125 sb->s_dirt = 1;
124 UDF_SB(sb)->s_lvid_dirty = 1; 126 UDF_SB(sb)->s_lvid_dirty = 1;
125} 127}
128extern u64 lvid_get_unique_id(struct super_block *sb);
126 129
127/* namei.c */ 130/* namei.c */
128extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *, 131extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *,
@@ -133,7 +136,6 @@ extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *,
133extern long udf_ioctl(struct file *, unsigned int, unsigned long); 136extern long udf_ioctl(struct file *, unsigned int, unsigned long);
134/* inode.c */ 137/* inode.c */
135extern struct inode *udf_iget(struct super_block *, struct kernel_lb_addr *); 138extern struct inode *udf_iget(struct super_block *, struct kernel_lb_addr *);
136extern int udf_sync_inode(struct inode *);
137extern void udf_expand_file_adinicb(struct inode *, int, int *); 139extern void udf_expand_file_adinicb(struct inode *, int, int *);
138extern struct buffer_head *udf_expand_dir_adinicb(struct inode *, int *, int *); 140extern struct buffer_head *udf_expand_dir_adinicb(struct inode *, int *, int *);
139extern struct buffer_head *udf_bread(struct inode *, int, int, int *); 141extern struct buffer_head *udf_bread(struct inode *, int, int, int *);
diff --git a/fs/xfs/linux-2.6/sv.h b/fs/xfs/linux-2.6/sv.h
deleted file mode 100644
index 4dfc7c370819..000000000000
--- a/fs/xfs/linux-2.6/sv.h
+++ /dev/null
@@ -1,59 +0,0 @@
1/*
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_SUPPORT_SV_H__
19#define __XFS_SUPPORT_SV_H__
20
21#include <linux/wait.h>
22#include <linux/sched.h>
23#include <linux/spinlock.h>
24
25/*
26 * Synchronisation variables.
27 *
28 * (Parameters "pri", "svf" and "rts" are not implemented)
29 */
30
31typedef struct sv_s {
32 wait_queue_head_t waiters;
33} sv_t;
34
35static inline void _sv_wait(sv_t *sv, spinlock_t *lock)
36{
37 DECLARE_WAITQUEUE(wait, current);
38
39 add_wait_queue_exclusive(&sv->waiters, &wait);
40 __set_current_state(TASK_UNINTERRUPTIBLE);
41 spin_unlock(lock);
42
43 schedule();
44
45 remove_wait_queue(&sv->waiters, &wait);
46}
47
48#define sv_init(sv,flag,name) \
49 init_waitqueue_head(&(sv)->waiters)
50#define sv_destroy(sv) \
51 /*NOTHING*/
52#define sv_wait(sv, pri, lock, s) \
53 _sv_wait(sv, lock)
54#define sv_signal(sv) \
55 wake_up(&(sv)->waiters)
56#define sv_broadcast(sv) \
57 wake_up_all(&(sv)->waiters)
58
59#endif /* __XFS_SUPPORT_SV_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 691f61223ed6..ec7bbb5645b6 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -38,15 +38,6 @@
38#include <linux/pagevec.h> 38#include <linux/pagevec.h>
39#include <linux/writeback.h> 39#include <linux/writeback.h>
40 40
41/*
42 * Types of I/O for bmap clustering and I/O completion tracking.
43 */
44enum {
45 IO_READ, /* mapping for a read */
46 IO_DELAY, /* mapping covers delalloc region */
47 IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */
48 IO_NEW /* just allocated */
49};
50 41
51/* 42/*
52 * Prime number of hash buckets since address is used as the key. 43 * Prime number of hash buckets since address is used as the key.
@@ -182,9 +173,6 @@ xfs_setfilesize(
182 xfs_inode_t *ip = XFS_I(ioend->io_inode); 173 xfs_inode_t *ip = XFS_I(ioend->io_inode);
183 xfs_fsize_t isize; 174 xfs_fsize_t isize;
184 175
185 ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
186 ASSERT(ioend->io_type != IO_READ);
187
188 if (unlikely(ioend->io_error)) 176 if (unlikely(ioend->io_error))
189 return 0; 177 return 0;
190 178
@@ -244,10 +232,8 @@ xfs_end_io(
244 * We might have to update the on-disk file size after extending 232 * We might have to update the on-disk file size after extending
245 * writes. 233 * writes.
246 */ 234 */
247 if (ioend->io_type != IO_READ) { 235 error = xfs_setfilesize(ioend);
248 error = xfs_setfilesize(ioend); 236 ASSERT(!error || error == EAGAIN);
249 ASSERT(!error || error == EAGAIN);
250 }
251 237
252 /* 238 /*
253 * If we didn't complete processing of the ioend, requeue it to the 239 * If we didn't complete processing of the ioend, requeue it to the
@@ -318,14 +304,63 @@ STATIC int
318xfs_map_blocks( 304xfs_map_blocks(
319 struct inode *inode, 305 struct inode *inode,
320 loff_t offset, 306 loff_t offset,
321 ssize_t count,
322 struct xfs_bmbt_irec *imap, 307 struct xfs_bmbt_irec *imap,
323 int flags) 308 int type,
309 int nonblocking)
324{ 310{
325 int nmaps = 1; 311 struct xfs_inode *ip = XFS_I(inode);
326 int new = 0; 312 struct xfs_mount *mp = ip->i_mount;
313 ssize_t count = 1 << inode->i_blkbits;
314 xfs_fileoff_t offset_fsb, end_fsb;
315 int error = 0;
316 int bmapi_flags = XFS_BMAPI_ENTIRE;
317 int nimaps = 1;
318
319 if (XFS_FORCED_SHUTDOWN(mp))
320 return -XFS_ERROR(EIO);
321
322 if (type == IO_UNWRITTEN)
323 bmapi_flags |= XFS_BMAPI_IGSTATE;
324
325 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
326 if (nonblocking)
327 return -XFS_ERROR(EAGAIN);
328 xfs_ilock(ip, XFS_ILOCK_SHARED);
329 }
327 330
328 return -xfs_iomap(XFS_I(inode), offset, count, flags, imap, &nmaps, &new); 331 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
332 (ip->i_df.if_flags & XFS_IFEXTENTS));
333 ASSERT(offset <= mp->m_maxioffset);
334
335 if (offset + count > mp->m_maxioffset)
336 count = mp->m_maxioffset - offset;
337 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
338 offset_fsb = XFS_B_TO_FSBT(mp, offset);
339 error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
340 bmapi_flags, NULL, 0, imap, &nimaps, NULL);
341 xfs_iunlock(ip, XFS_ILOCK_SHARED);
342
343 if (error)
344 return -XFS_ERROR(error);
345
346 if (type == IO_DELALLOC &&
347 (!nimaps || isnullstartblock(imap->br_startblock))) {
348 error = xfs_iomap_write_allocate(ip, offset, count, imap);
349 if (!error)
350 trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
351 return -XFS_ERROR(error);
352 }
353
354#ifdef DEBUG
355 if (type == IO_UNWRITTEN) {
356 ASSERT(nimaps);
357 ASSERT(imap->br_startblock != HOLESTARTBLOCK);
358 ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
359 }
360#endif
361 if (nimaps)
362 trace_xfs_map_blocks_found(ip, offset, count, type, imap);
363 return 0;
329} 364}
330 365
331STATIC int 366STATIC int
@@ -380,26 +415,18 @@ xfs_submit_ioend_bio(
380 415
381 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? 416 submit_bio(wbc->sync_mode == WB_SYNC_ALL ?
382 WRITE_SYNC_PLUG : WRITE, bio); 417 WRITE_SYNC_PLUG : WRITE, bio);
383 ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP));
384 bio_put(bio);
385} 418}
386 419
387STATIC struct bio * 420STATIC struct bio *
388xfs_alloc_ioend_bio( 421xfs_alloc_ioend_bio(
389 struct buffer_head *bh) 422 struct buffer_head *bh)
390{ 423{
391 struct bio *bio;
392 int nvecs = bio_get_nr_vecs(bh->b_bdev); 424 int nvecs = bio_get_nr_vecs(bh->b_bdev);
393 425 struct bio *bio = bio_alloc(GFP_NOIO, nvecs);
394 do {
395 bio = bio_alloc(GFP_NOIO, nvecs);
396 nvecs >>= 1;
397 } while (!bio);
398 426
399 ASSERT(bio->bi_private == NULL); 427 ASSERT(bio->bi_private == NULL);
400 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); 428 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
401 bio->bi_bdev = bh->b_bdev; 429 bio->bi_bdev = bh->b_bdev;
402 bio_get(bio);
403 return bio; 430 return bio;
404} 431}
405 432
@@ -470,9 +497,8 @@ xfs_submit_ioend(
470 /* Pass 1 - start writeback */ 497 /* Pass 1 - start writeback */
471 do { 498 do {
472 next = ioend->io_list; 499 next = ioend->io_list;
473 for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { 500 for (bh = ioend->io_buffer_head; bh; bh = bh->b_private)
474 xfs_start_buffer_writeback(bh); 501 xfs_start_buffer_writeback(bh);
475 }
476 } while ((ioend = next) != NULL); 502 } while ((ioend = next) != NULL);
477 503
478 /* Pass 2 - submit I/O */ 504 /* Pass 2 - submit I/O */
@@ -600,117 +626,13 @@ xfs_map_at_offset(
600 ASSERT(imap->br_startblock != HOLESTARTBLOCK); 626 ASSERT(imap->br_startblock != HOLESTARTBLOCK);
601 ASSERT(imap->br_startblock != DELAYSTARTBLOCK); 627 ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
602 628
603 lock_buffer(bh);
604 xfs_map_buffer(inode, bh, imap, offset); 629 xfs_map_buffer(inode, bh, imap, offset);
605 bh->b_bdev = xfs_find_bdev_for_inode(inode);
606 set_buffer_mapped(bh); 630 set_buffer_mapped(bh);
607 clear_buffer_delay(bh); 631 clear_buffer_delay(bh);
608 clear_buffer_unwritten(bh); 632 clear_buffer_unwritten(bh);
609} 633}
610 634
611/* 635/*
612 * Look for a page at index that is suitable for clustering.
613 */
614STATIC unsigned int
615xfs_probe_page(
616 struct page *page,
617 unsigned int pg_offset)
618{
619 struct buffer_head *bh, *head;
620 int ret = 0;
621
622 if (PageWriteback(page))
623 return 0;
624 if (!PageDirty(page))
625 return 0;
626 if (!page->mapping)
627 return 0;
628 if (!page_has_buffers(page))
629 return 0;
630
631 bh = head = page_buffers(page);
632 do {
633 if (!buffer_uptodate(bh))
634 break;
635 if (!buffer_mapped(bh))
636 break;
637 ret += bh->b_size;
638 if (ret >= pg_offset)
639 break;
640 } while ((bh = bh->b_this_page) != head);
641
642 return ret;
643}
644
645STATIC size_t
646xfs_probe_cluster(
647 struct inode *inode,
648 struct page *startpage,
649 struct buffer_head *bh,
650 struct buffer_head *head)
651{
652 struct pagevec pvec;
653 pgoff_t tindex, tlast, tloff;
654 size_t total = 0;
655 int done = 0, i;
656
657 /* First sum forwards in this page */
658 do {
659 if (!buffer_uptodate(bh) || !buffer_mapped(bh))
660 return total;
661 total += bh->b_size;
662 } while ((bh = bh->b_this_page) != head);
663
664 /* if we reached the end of the page, sum forwards in following pages */
665 tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
666 tindex = startpage->index + 1;
667
668 /* Prune this back to avoid pathological behavior */
669 tloff = min(tlast, startpage->index + 64);
670
671 pagevec_init(&pvec, 0);
672 while (!done && tindex <= tloff) {
673 unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
674
675 if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
676 break;
677
678 for (i = 0; i < pagevec_count(&pvec); i++) {
679 struct page *page = pvec.pages[i];
680 size_t pg_offset, pg_len = 0;
681
682 if (tindex == tlast) {
683 pg_offset =
684 i_size_read(inode) & (PAGE_CACHE_SIZE - 1);
685 if (!pg_offset) {
686 done = 1;
687 break;
688 }
689 } else
690 pg_offset = PAGE_CACHE_SIZE;
691
692 if (page->index == tindex && trylock_page(page)) {
693 pg_len = xfs_probe_page(page, pg_offset);
694 unlock_page(page);
695 }
696
697 if (!pg_len) {
698 done = 1;
699 break;
700 }
701
702 total += pg_len;
703 tindex++;
704 }
705
706 pagevec_release(&pvec);
707 cond_resched();
708 }
709
710 return total;
711}
712
713/*
714 * Test if a given page is suitable for writing as part of an unwritten 636 * Test if a given page is suitable for writing as part of an unwritten
715 * or delayed allocate extent. 637 * or delayed allocate extent.
716 */ 638 */
@@ -731,9 +653,9 @@ xfs_is_delayed_page(
731 if (buffer_unwritten(bh)) 653 if (buffer_unwritten(bh))
732 acceptable = (type == IO_UNWRITTEN); 654 acceptable = (type == IO_UNWRITTEN);
733 else if (buffer_delay(bh)) 655 else if (buffer_delay(bh))
734 acceptable = (type == IO_DELAY); 656 acceptable = (type == IO_DELALLOC);
735 else if (buffer_dirty(bh) && buffer_mapped(bh)) 657 else if (buffer_dirty(bh) && buffer_mapped(bh))
736 acceptable = (type == IO_NEW); 658 acceptable = (type == IO_OVERWRITE);
737 else 659 else
738 break; 660 break;
739 } while ((bh = bh->b_this_page) != head); 661 } while ((bh = bh->b_this_page) != head);
@@ -758,8 +680,7 @@ xfs_convert_page(
758 loff_t tindex, 680 loff_t tindex,
759 struct xfs_bmbt_irec *imap, 681 struct xfs_bmbt_irec *imap,
760 xfs_ioend_t **ioendp, 682 xfs_ioend_t **ioendp,
761 struct writeback_control *wbc, 683 struct writeback_control *wbc)
762 int all_bh)
763{ 684{
764 struct buffer_head *bh, *head; 685 struct buffer_head *bh, *head;
765 xfs_off_t end_offset; 686 xfs_off_t end_offset;
@@ -814,37 +735,30 @@ xfs_convert_page(
814 continue; 735 continue;
815 } 736 }
816 737
817 if (buffer_unwritten(bh) || buffer_delay(bh)) { 738 if (buffer_unwritten(bh) || buffer_delay(bh) ||
739 buffer_mapped(bh)) {
818 if (buffer_unwritten(bh)) 740 if (buffer_unwritten(bh))
819 type = IO_UNWRITTEN; 741 type = IO_UNWRITTEN;
742 else if (buffer_delay(bh))
743 type = IO_DELALLOC;
820 else 744 else
821 type = IO_DELAY; 745 type = IO_OVERWRITE;
822 746
823 if (!xfs_imap_valid(inode, imap, offset)) { 747 if (!xfs_imap_valid(inode, imap, offset)) {
824 done = 1; 748 done = 1;
825 continue; 749 continue;
826 } 750 }
827 751
828 ASSERT(imap->br_startblock != HOLESTARTBLOCK); 752 lock_buffer(bh);
829 ASSERT(imap->br_startblock != DELAYSTARTBLOCK); 753 if (type != IO_OVERWRITE)
830 754 xfs_map_at_offset(inode, bh, imap, offset);
831 xfs_map_at_offset(inode, bh, imap, offset);
832 xfs_add_to_ioend(inode, bh, offset, type, 755 xfs_add_to_ioend(inode, bh, offset, type,
833 ioendp, done); 756 ioendp, done);
834 757
835 page_dirty--; 758 page_dirty--;
836 count++; 759 count++;
837 } else { 760 } else {
838 type = IO_NEW; 761 done = 1;
839 if (buffer_mapped(bh) && all_bh) {
840 lock_buffer(bh);
841 xfs_add_to_ioend(inode, bh, offset,
842 type, ioendp, done);
843 count++;
844 page_dirty--;
845 } else {
846 done = 1;
847 }
848 } 762 }
849 } while (offset += len, (bh = bh->b_this_page) != head); 763 } while (offset += len, (bh = bh->b_this_page) != head);
850 764
@@ -876,7 +790,6 @@ xfs_cluster_write(
876 struct xfs_bmbt_irec *imap, 790 struct xfs_bmbt_irec *imap,
877 xfs_ioend_t **ioendp, 791 xfs_ioend_t **ioendp,
878 struct writeback_control *wbc, 792 struct writeback_control *wbc,
879 int all_bh,
880 pgoff_t tlast) 793 pgoff_t tlast)
881{ 794{
882 struct pagevec pvec; 795 struct pagevec pvec;
@@ -891,7 +804,7 @@ xfs_cluster_write(
891 804
892 for (i = 0; i < pagevec_count(&pvec); i++) { 805 for (i = 0; i < pagevec_count(&pvec); i++) {
893 done = xfs_convert_page(inode, pvec.pages[i], tindex++, 806 done = xfs_convert_page(inode, pvec.pages[i], tindex++,
894 imap, ioendp, wbc, all_bh); 807 imap, ioendp, wbc);
895 if (done) 808 if (done)
896 break; 809 break;
897 } 810 }
@@ -935,7 +848,7 @@ xfs_aops_discard_page(
935 struct buffer_head *bh, *head; 848 struct buffer_head *bh, *head;
936 loff_t offset = page_offset(page); 849 loff_t offset = page_offset(page);
937 850
938 if (!xfs_is_delayed_page(page, IO_DELAY)) 851 if (!xfs_is_delayed_page(page, IO_DELALLOC))
939 goto out_invalidate; 852 goto out_invalidate;
940 853
941 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 854 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
@@ -1002,10 +915,10 @@ xfs_vm_writepage(
1002 unsigned int type; 915 unsigned int type;
1003 __uint64_t end_offset; 916 __uint64_t end_offset;
1004 pgoff_t end_index, last_index; 917 pgoff_t end_index, last_index;
1005 ssize_t size, len; 918 ssize_t len;
1006 int flags, err, imap_valid = 0, uptodate = 1; 919 int err, imap_valid = 0, uptodate = 1;
1007 int count = 0; 920 int count = 0;
1008 int all_bh = 0; 921 int nonblocking = 0;
1009 922
1010 trace_xfs_writepage(inode, page, 0); 923 trace_xfs_writepage(inode, page, 0);
1011 924
@@ -1056,10 +969,14 @@ xfs_vm_writepage(
1056 969
1057 bh = head = page_buffers(page); 970 bh = head = page_buffers(page);
1058 offset = page_offset(page); 971 offset = page_offset(page);
1059 flags = BMAPI_READ; 972 type = IO_OVERWRITE;
1060 type = IO_NEW; 973
974 if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking)
975 nonblocking = 1;
1061 976
1062 do { 977 do {
978 int new_ioend = 0;
979
1063 if (offset >= end_offset) 980 if (offset >= end_offset)
1064 break; 981 break;
1065 if (!buffer_uptodate(bh)) 982 if (!buffer_uptodate(bh))
@@ -1076,90 +993,54 @@ xfs_vm_writepage(
1076 continue; 993 continue;
1077 } 994 }
1078 995
1079 if (imap_valid) 996 if (buffer_unwritten(bh)) {
1080 imap_valid = xfs_imap_valid(inode, &imap, offset); 997 if (type != IO_UNWRITTEN) {
1081
1082 if (buffer_unwritten(bh) || buffer_delay(bh)) {
1083 int new_ioend = 0;
1084
1085 /*
1086 * Make sure we don't use a read-only iomap
1087 */
1088 if (flags == BMAPI_READ)
1089 imap_valid = 0;
1090
1091 if (buffer_unwritten(bh)) {
1092 type = IO_UNWRITTEN; 998 type = IO_UNWRITTEN;
1093 flags = BMAPI_WRITE | BMAPI_IGNSTATE; 999 imap_valid = 0;
1094 } else if (buffer_delay(bh)) {
1095 type = IO_DELAY;
1096 flags = BMAPI_ALLOCATE;
1097
1098 if (wbc->sync_mode == WB_SYNC_NONE)
1099 flags |= BMAPI_TRYLOCK;
1100 }
1101
1102 if (!imap_valid) {
1103 /*
1104 * If we didn't have a valid mapping then we
1105 * need to ensure that we put the new mapping
1106 * in a new ioend structure. This needs to be
1107 * done to ensure that the ioends correctly
1108 * reflect the block mappings at io completion
1109 * for unwritten extent conversion.
1110 */
1111 new_ioend = 1;
1112 err = xfs_map_blocks(inode, offset, len,
1113 &imap, flags);
1114 if (err)
1115 goto error;
1116 imap_valid = xfs_imap_valid(inode, &imap,
1117 offset);
1118 } 1000 }
1119 if (imap_valid) { 1001 } else if (buffer_delay(bh)) {
1120 xfs_map_at_offset(inode, bh, &imap, offset); 1002 if (type != IO_DELALLOC) {
1121 xfs_add_to_ioend(inode, bh, offset, type, 1003 type = IO_DELALLOC;
1122 &ioend, new_ioend); 1004 imap_valid = 0;
1123 count++;
1124 } 1005 }
1125 } else if (buffer_uptodate(bh)) { 1006 } else if (buffer_uptodate(bh)) {
1126 /* 1007 if (type != IO_OVERWRITE) {
1127 * we got here because the buffer is already mapped. 1008 type = IO_OVERWRITE;
1128 * That means it must already have extents allocated 1009 imap_valid = 0;
1129 * underneath it. Map the extent by reading it.
1130 */
1131 if (!imap_valid || flags != BMAPI_READ) {
1132 flags = BMAPI_READ;
1133 size = xfs_probe_cluster(inode, page, bh, head);
1134 err = xfs_map_blocks(inode, offset, size,
1135 &imap, flags);
1136 if (err)
1137 goto error;
1138 imap_valid = xfs_imap_valid(inode, &imap,
1139 offset);
1140 } 1010 }
1011 } else {
1012 if (PageUptodate(page)) {
1013 ASSERT(buffer_mapped(bh));
1014 imap_valid = 0;
1015 }
1016 continue;
1017 }
1141 1018
1019 if (imap_valid)
1020 imap_valid = xfs_imap_valid(inode, &imap, offset);
1021 if (!imap_valid) {
1142 /* 1022 /*
1143 * We set the type to IO_NEW in case we are doing a 1023 * If we didn't have a valid mapping then we need to
1144 * small write at EOF that is extending the file but 1024 * put the new mapping into a separate ioend structure.
1145 * without needing an allocation. We need to update the 1025 * This ensures non-contiguous extents always have
1146 * file size on I/O completion in this case so it is 1026 * separate ioends, which is particularly important
1147 * the same case as having just allocated a new extent 1027 * for unwritten extent conversion at I/O completion
1148 * that we are writing into for the first time. 1028 * time.
1149 */ 1029 */
1150 type = IO_NEW; 1030 new_ioend = 1;
1151 if (trylock_buffer(bh)) { 1031 err = xfs_map_blocks(inode, offset, &imap, type,
1152 if (imap_valid) 1032 nonblocking);
1153 all_bh = 1; 1033 if (err)
1154 xfs_add_to_ioend(inode, bh, offset, type, 1034 goto error;
1155 &ioend, !imap_valid); 1035 imap_valid = xfs_imap_valid(inode, &imap, offset);
1156 count++; 1036 }
1157 } else { 1037 if (imap_valid) {
1158 imap_valid = 0; 1038 lock_buffer(bh);
1159 } 1039 if (type != IO_OVERWRITE)
1160 } else if (PageUptodate(page)) { 1040 xfs_map_at_offset(inode, bh, &imap, offset);
1161 ASSERT(buffer_mapped(bh)); 1041 xfs_add_to_ioend(inode, bh, offset, type, &ioend,
1162 imap_valid = 0; 1042 new_ioend);
1043 count++;
1163 } 1044 }
1164 1045
1165 if (!iohead) 1046 if (!iohead)
@@ -1188,7 +1069,7 @@ xfs_vm_writepage(
1188 end_index = last_index; 1069 end_index = last_index;
1189 1070
1190 xfs_cluster_write(inode, page->index + 1, &imap, &ioend, 1071 xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
1191 wbc, all_bh, end_index); 1072 wbc, end_index);
1192 } 1073 }
1193 1074
1194 if (iohead) 1075 if (iohead)
@@ -1257,13 +1138,19 @@ __xfs_get_blocks(
1257 int create, 1138 int create,
1258 int direct) 1139 int direct)
1259{ 1140{
1260 int flags = create ? BMAPI_WRITE : BMAPI_READ; 1141 struct xfs_inode *ip = XFS_I(inode);
1142 struct xfs_mount *mp = ip->i_mount;
1143 xfs_fileoff_t offset_fsb, end_fsb;
1144 int error = 0;
1145 int lockmode = 0;
1261 struct xfs_bmbt_irec imap; 1146 struct xfs_bmbt_irec imap;
1147 int nimaps = 1;
1262 xfs_off_t offset; 1148 xfs_off_t offset;
1263 ssize_t size; 1149 ssize_t size;
1264 int nimap = 1;
1265 int new = 0; 1150 int new = 0;
1266 int error; 1151
1152 if (XFS_FORCED_SHUTDOWN(mp))
1153 return -XFS_ERROR(EIO);
1267 1154
1268 offset = (xfs_off_t)iblock << inode->i_blkbits; 1155 offset = (xfs_off_t)iblock << inode->i_blkbits;
1269 ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); 1156 ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
@@ -1272,15 +1159,45 @@ __xfs_get_blocks(
1272 if (!create && direct && offset >= i_size_read(inode)) 1159 if (!create && direct && offset >= i_size_read(inode))
1273 return 0; 1160 return 0;
1274 1161
1275 if (direct && create) 1162 if (create) {
1276 flags |= BMAPI_DIRECT; 1163 lockmode = XFS_ILOCK_EXCL;
1164 xfs_ilock(ip, lockmode);
1165 } else {
1166 lockmode = xfs_ilock_map_shared(ip);
1167 }
1168
1169 ASSERT(offset <= mp->m_maxioffset);
1170 if (offset + size > mp->m_maxioffset)
1171 size = mp->m_maxioffset - offset;
1172 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
1173 offset_fsb = XFS_B_TO_FSBT(mp, offset);
1277 1174
1278 error = xfs_iomap(XFS_I(inode), offset, size, flags, &imap, &nimap, 1175 error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
1279 &new); 1176 XFS_BMAPI_ENTIRE, NULL, 0, &imap, &nimaps, NULL);
1280 if (error) 1177 if (error)
1281 return -error; 1178 goto out_unlock;
1282 if (nimap == 0) 1179
1283 return 0; 1180 if (create &&
1181 (!nimaps ||
1182 (imap.br_startblock == HOLESTARTBLOCK ||
1183 imap.br_startblock == DELAYSTARTBLOCK))) {
1184 if (direct) {
1185 error = xfs_iomap_write_direct(ip, offset, size,
1186 &imap, nimaps);
1187 } else {
1188 error = xfs_iomap_write_delay(ip, offset, size, &imap);
1189 }
1190 if (error)
1191 goto out_unlock;
1192
1193 trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap);
1194 } else if (nimaps) {
1195 trace_xfs_get_blocks_found(ip, offset, size, 0, &imap);
1196 } else {
1197 trace_xfs_get_blocks_notfound(ip, offset, size);
1198 goto out_unlock;
1199 }
1200 xfs_iunlock(ip, lockmode);
1284 1201
1285 if (imap.br_startblock != HOLESTARTBLOCK && 1202 if (imap.br_startblock != HOLESTARTBLOCK &&
1286 imap.br_startblock != DELAYSTARTBLOCK) { 1203 imap.br_startblock != DELAYSTARTBLOCK) {
@@ -1347,6 +1264,10 @@ __xfs_get_blocks(
1347 } 1264 }
1348 1265
1349 return 0; 1266 return 0;
1267
1268out_unlock:
1269 xfs_iunlock(ip, lockmode);
1270 return -error;
1350} 1271}
1351 1272
1352int 1273int
@@ -1434,7 +1355,7 @@ xfs_vm_direct_IO(
1434 ssize_t ret; 1355 ssize_t ret;
1435 1356
1436 if (rw & WRITE) { 1357 if (rw & WRITE) {
1437 iocb->private = xfs_alloc_ioend(inode, IO_NEW); 1358 iocb->private = xfs_alloc_ioend(inode, IO_DIRECT);
1438 1359
1439 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, 1360 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
1440 offset, nr_segs, 1361 offset, nr_segs,
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index c5057fb6237a..71f721e1a71f 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -23,6 +23,22 @@ extern struct workqueue_struct *xfsconvertd_workqueue;
23extern mempool_t *xfs_ioend_pool; 23extern mempool_t *xfs_ioend_pool;
24 24
25/* 25/*
26 * Types of I/O for bmap clustering and I/O completion tracking.
27 */
28enum {
29 IO_DIRECT = 0, /* special case for direct I/O ioends */
30 IO_DELALLOC, /* mapping covers delalloc region */
31 IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */
32 IO_OVERWRITE, /* mapping covers already allocated extent */
33};
34
35#define XFS_IO_TYPES \
36 { 0, "" }, \
37 { IO_DELALLOC, "delalloc" }, \
38 { IO_UNWRITTEN, "unwritten" }, \
39 { IO_OVERWRITE, "overwrite" }
40
41/*
26 * xfs_ioend struct manages large extent writes for XFS. 42 * xfs_ioend struct manages large extent writes for XFS.
27 * It can manage several multi-page bio's at once. 43 * It can manage several multi-page bio's at once.
28 */ 44 */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 4c5deb6e9e31..92f1f2acc6ab 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -44,12 +44,7 @@
44 44
45static kmem_zone_t *xfs_buf_zone; 45static kmem_zone_t *xfs_buf_zone;
46STATIC int xfsbufd(void *); 46STATIC int xfsbufd(void *);
47STATIC int xfsbufd_wakeup(struct shrinker *, int, gfp_t);
48STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); 47STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
49static struct shrinker xfs_buf_shake = {
50 .shrink = xfsbufd_wakeup,
51 .seeks = DEFAULT_SEEKS,
52};
53 48
54static struct workqueue_struct *xfslogd_workqueue; 49static struct workqueue_struct *xfslogd_workqueue;
55struct workqueue_struct *xfsdatad_workqueue; 50struct workqueue_struct *xfsdatad_workqueue;
@@ -168,8 +163,79 @@ test_page_region(
168} 163}
169 164
170/* 165/*
171 * Internal xfs_buf_t object manipulation 166 * xfs_buf_lru_add - add a buffer to the LRU.
167 *
168 * The LRU takes a new reference to the buffer so that it will only be freed
169 * once the shrinker takes the buffer off the LRU.
172 */ 170 */
171STATIC void
172xfs_buf_lru_add(
173 struct xfs_buf *bp)
174{
175 struct xfs_buftarg *btp = bp->b_target;
176
177 spin_lock(&btp->bt_lru_lock);
178 if (list_empty(&bp->b_lru)) {
179 atomic_inc(&bp->b_hold);
180 list_add_tail(&bp->b_lru, &btp->bt_lru);
181 btp->bt_lru_nr++;
182 }
183 spin_unlock(&btp->bt_lru_lock);
184}
185
186/*
187 * xfs_buf_lru_del - remove a buffer from the LRU
188 *
189 * The unlocked check is safe here because it only occurs when there are not
190 * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there
191 * to optimise the shrinker removing the buffer from the LRU and calling
192 * xfs_buf_free(). i.e. it removes an unneccessary round trip on the
193 * bt_lru_lock.
194 */
195STATIC void
196xfs_buf_lru_del(
197 struct xfs_buf *bp)
198{
199 struct xfs_buftarg *btp = bp->b_target;
200
201 if (list_empty(&bp->b_lru))
202 return;
203
204 spin_lock(&btp->bt_lru_lock);
205 if (!list_empty(&bp->b_lru)) {
206 list_del_init(&bp->b_lru);
207 btp->bt_lru_nr--;
208 }
209 spin_unlock(&btp->bt_lru_lock);
210}
211
212/*
213 * When we mark a buffer stale, we remove the buffer from the LRU and clear the
214 * b_lru_ref count so that the buffer is freed immediately when the buffer
215 * reference count falls to zero. If the buffer is already on the LRU, we need
216 * to remove the reference that LRU holds on the buffer.
217 *
218 * This prevents build-up of stale buffers on the LRU.
219 */
220void
221xfs_buf_stale(
222 struct xfs_buf *bp)
223{
224 bp->b_flags |= XBF_STALE;
225 atomic_set(&(bp)->b_lru_ref, 0);
226 if (!list_empty(&bp->b_lru)) {
227 struct xfs_buftarg *btp = bp->b_target;
228
229 spin_lock(&btp->bt_lru_lock);
230 if (!list_empty(&bp->b_lru)) {
231 list_del_init(&bp->b_lru);
232 btp->bt_lru_nr--;
233 atomic_dec(&bp->b_hold);
234 }
235 spin_unlock(&btp->bt_lru_lock);
236 }
237 ASSERT(atomic_read(&bp->b_hold) >= 1);
238}
173 239
174STATIC void 240STATIC void
175_xfs_buf_initialize( 241_xfs_buf_initialize(
@@ -186,7 +252,9 @@ _xfs_buf_initialize(
186 252
187 memset(bp, 0, sizeof(xfs_buf_t)); 253 memset(bp, 0, sizeof(xfs_buf_t));
188 atomic_set(&bp->b_hold, 1); 254 atomic_set(&bp->b_hold, 1);
255 atomic_set(&bp->b_lru_ref, 1);
189 init_completion(&bp->b_iowait); 256 init_completion(&bp->b_iowait);
257 INIT_LIST_HEAD(&bp->b_lru);
190 INIT_LIST_HEAD(&bp->b_list); 258 INIT_LIST_HEAD(&bp->b_list);
191 RB_CLEAR_NODE(&bp->b_rbnode); 259 RB_CLEAR_NODE(&bp->b_rbnode);
192 sema_init(&bp->b_sema, 0); /* held, no waiters */ 260 sema_init(&bp->b_sema, 0); /* held, no waiters */
@@ -262,6 +330,8 @@ xfs_buf_free(
262{ 330{
263 trace_xfs_buf_free(bp, _RET_IP_); 331 trace_xfs_buf_free(bp, _RET_IP_);
264 332
333 ASSERT(list_empty(&bp->b_lru));
334
265 if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { 335 if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
266 uint i; 336 uint i;
267 337
@@ -337,7 +407,6 @@ _xfs_buf_lookup_pages(
337 __func__, gfp_mask); 407 __func__, gfp_mask);
338 408
339 XFS_STATS_INC(xb_page_retries); 409 XFS_STATS_INC(xb_page_retries);
340 xfsbufd_wakeup(NULL, 0, gfp_mask);
341 congestion_wait(BLK_RW_ASYNC, HZ/50); 410 congestion_wait(BLK_RW_ASYNC, HZ/50);
342 goto retry; 411 goto retry;
343 } 412 }
@@ -828,6 +897,7 @@ xfs_buf_rele(
828 897
829 if (!pag) { 898 if (!pag) {
830 ASSERT(!bp->b_relse); 899 ASSERT(!bp->b_relse);
900 ASSERT(list_empty(&bp->b_lru));
831 ASSERT(RB_EMPTY_NODE(&bp->b_rbnode)); 901 ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
832 if (atomic_dec_and_test(&bp->b_hold)) 902 if (atomic_dec_and_test(&bp->b_hold))
833 xfs_buf_free(bp); 903 xfs_buf_free(bp);
@@ -835,13 +905,19 @@ xfs_buf_rele(
835 } 905 }
836 906
837 ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode)); 907 ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
908
838 ASSERT(atomic_read(&bp->b_hold) > 0); 909 ASSERT(atomic_read(&bp->b_hold) > 0);
839 if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { 910 if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
840 if (bp->b_relse) { 911 if (bp->b_relse) {
841 atomic_inc(&bp->b_hold); 912 atomic_inc(&bp->b_hold);
842 spin_unlock(&pag->pag_buf_lock); 913 spin_unlock(&pag->pag_buf_lock);
843 bp->b_relse(bp); 914 bp->b_relse(bp);
915 } else if (!(bp->b_flags & XBF_STALE) &&
916 atomic_read(&bp->b_lru_ref)) {
917 xfs_buf_lru_add(bp);
918 spin_unlock(&pag->pag_buf_lock);
844 } else { 919 } else {
920 xfs_buf_lru_del(bp);
845 ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); 921 ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
846 rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); 922 rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
847 spin_unlock(&pag->pag_buf_lock); 923 spin_unlock(&pag->pag_buf_lock);
@@ -1438,51 +1514,84 @@ xfs_buf_iomove(
1438 */ 1514 */
1439 1515
1440/* 1516/*
1441 * Wait for any bufs with callbacks that have been submitted but 1517 * Wait for any bufs with callbacks that have been submitted but have not yet
1442 * have not yet returned... walk the hash list for the target. 1518 * returned. These buffers will have an elevated hold count, so wait on those
1519 * while freeing all the buffers only held by the LRU.
1443 */ 1520 */
1444void 1521void
1445xfs_wait_buftarg( 1522xfs_wait_buftarg(
1446 struct xfs_buftarg *btp) 1523 struct xfs_buftarg *btp)
1447{ 1524{
1448 struct xfs_perag *pag; 1525 struct xfs_buf *bp;
1449 uint i;
1450 1526
1451 for (i = 0; i < btp->bt_mount->m_sb.sb_agcount; i++) { 1527restart:
1452 pag = xfs_perag_get(btp->bt_mount, i); 1528 spin_lock(&btp->bt_lru_lock);
1453 spin_lock(&pag->pag_buf_lock); 1529 while (!list_empty(&btp->bt_lru)) {
1454 while (rb_first(&pag->pag_buf_tree)) { 1530 bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
1455 spin_unlock(&pag->pag_buf_lock); 1531 if (atomic_read(&bp->b_hold) > 1) {
1532 spin_unlock(&btp->bt_lru_lock);
1456 delay(100); 1533 delay(100);
1457 spin_lock(&pag->pag_buf_lock); 1534 goto restart;
1458 } 1535 }
1459 spin_unlock(&pag->pag_buf_lock); 1536 /*
1460 xfs_perag_put(pag); 1537 * clear the LRU reference count so the bufer doesn't get
1538 * ignored in xfs_buf_rele().
1539 */
1540 atomic_set(&bp->b_lru_ref, 0);
1541 spin_unlock(&btp->bt_lru_lock);
1542 xfs_buf_rele(bp);
1543 spin_lock(&btp->bt_lru_lock);
1461 } 1544 }
1545 spin_unlock(&btp->bt_lru_lock);
1462} 1546}
1463 1547
1464/* 1548int
1465 * buftarg list for delwrite queue processing 1549xfs_buftarg_shrink(
1466 */ 1550 struct shrinker *shrink,
1467static LIST_HEAD(xfs_buftarg_list); 1551 int nr_to_scan,
1468static DEFINE_SPINLOCK(xfs_buftarg_lock); 1552 gfp_t mask)
1469
1470STATIC void
1471xfs_register_buftarg(
1472 xfs_buftarg_t *btp)
1473{ 1553{
1474 spin_lock(&xfs_buftarg_lock); 1554 struct xfs_buftarg *btp = container_of(shrink,
1475 list_add(&btp->bt_list, &xfs_buftarg_list); 1555 struct xfs_buftarg, bt_shrinker);
1476 spin_unlock(&xfs_buftarg_lock); 1556 struct xfs_buf *bp;
1477} 1557 LIST_HEAD(dispose);
1478 1558
1479STATIC void 1559 if (!nr_to_scan)
1480xfs_unregister_buftarg( 1560 return btp->bt_lru_nr;
1481 xfs_buftarg_t *btp) 1561
1482{ 1562 spin_lock(&btp->bt_lru_lock);
1483 spin_lock(&xfs_buftarg_lock); 1563 while (!list_empty(&btp->bt_lru)) {
1484 list_del(&btp->bt_list); 1564 if (nr_to_scan-- <= 0)
1485 spin_unlock(&xfs_buftarg_lock); 1565 break;
1566
1567 bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
1568
1569 /*
1570 * Decrement the b_lru_ref count unless the value is already
1571 * zero. If the value is already zero, we need to reclaim the
1572 * buffer, otherwise it gets another trip through the LRU.
1573 */
1574 if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
1575 list_move_tail(&bp->b_lru, &btp->bt_lru);
1576 continue;
1577 }
1578
1579 /*
1580 * remove the buffer from the LRU now to avoid needing another
1581 * lock round trip inside xfs_buf_rele().
1582 */
1583 list_move(&bp->b_lru, &dispose);
1584 btp->bt_lru_nr--;
1585 }
1586 spin_unlock(&btp->bt_lru_lock);
1587
1588 while (!list_empty(&dispose)) {
1589 bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
1590 list_del_init(&bp->b_lru);
1591 xfs_buf_rele(bp);
1592 }
1593
1594 return btp->bt_lru_nr;
1486} 1595}
1487 1596
1488void 1597void
@@ -1490,17 +1599,14 @@ xfs_free_buftarg(
1490 struct xfs_mount *mp, 1599 struct xfs_mount *mp,
1491 struct xfs_buftarg *btp) 1600 struct xfs_buftarg *btp)
1492{ 1601{
1602 unregister_shrinker(&btp->bt_shrinker);
1603
1493 xfs_flush_buftarg(btp, 1); 1604 xfs_flush_buftarg(btp, 1);
1494 if (mp->m_flags & XFS_MOUNT_BARRIER) 1605 if (mp->m_flags & XFS_MOUNT_BARRIER)
1495 xfs_blkdev_issue_flush(btp); 1606 xfs_blkdev_issue_flush(btp);
1496 iput(btp->bt_mapping->host); 1607 iput(btp->bt_mapping->host);
1497 1608
1498 /* Unregister the buftarg first so that we don't get a
1499 * wakeup finding a non-existent task
1500 */
1501 xfs_unregister_buftarg(btp);
1502 kthread_stop(btp->bt_task); 1609 kthread_stop(btp->bt_task);
1503
1504 kmem_free(btp); 1610 kmem_free(btp);
1505} 1611}
1506 1612
@@ -1597,20 +1703,13 @@ xfs_alloc_delwrite_queue(
1597 xfs_buftarg_t *btp, 1703 xfs_buftarg_t *btp,
1598 const char *fsname) 1704 const char *fsname)
1599{ 1705{
1600 int error = 0;
1601
1602 INIT_LIST_HEAD(&btp->bt_list);
1603 INIT_LIST_HEAD(&btp->bt_delwrite_queue); 1706 INIT_LIST_HEAD(&btp->bt_delwrite_queue);
1604 spin_lock_init(&btp->bt_delwrite_lock); 1707 spin_lock_init(&btp->bt_delwrite_lock);
1605 btp->bt_flags = 0; 1708 btp->bt_flags = 0;
1606 btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname); 1709 btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname);
1607 if (IS_ERR(btp->bt_task)) { 1710 if (IS_ERR(btp->bt_task))
1608 error = PTR_ERR(btp->bt_task); 1711 return PTR_ERR(btp->bt_task);
1609 goto out_error; 1712 return 0;
1610 }
1611 xfs_register_buftarg(btp);
1612out_error:
1613 return error;
1614} 1713}
1615 1714
1616xfs_buftarg_t * 1715xfs_buftarg_t *
@@ -1627,12 +1726,17 @@ xfs_alloc_buftarg(
1627 btp->bt_mount = mp; 1726 btp->bt_mount = mp;
1628 btp->bt_dev = bdev->bd_dev; 1727 btp->bt_dev = bdev->bd_dev;
1629 btp->bt_bdev = bdev; 1728 btp->bt_bdev = bdev;
1729 INIT_LIST_HEAD(&btp->bt_lru);
1730 spin_lock_init(&btp->bt_lru_lock);
1630 if (xfs_setsize_buftarg_early(btp, bdev)) 1731 if (xfs_setsize_buftarg_early(btp, bdev))
1631 goto error; 1732 goto error;
1632 if (xfs_mapping_buftarg(btp, bdev)) 1733 if (xfs_mapping_buftarg(btp, bdev))
1633 goto error; 1734 goto error;
1634 if (xfs_alloc_delwrite_queue(btp, fsname)) 1735 if (xfs_alloc_delwrite_queue(btp, fsname))
1635 goto error; 1736 goto error;
1737 btp->bt_shrinker.shrink = xfs_buftarg_shrink;
1738 btp->bt_shrinker.seeks = DEFAULT_SEEKS;
1739 register_shrinker(&btp->bt_shrinker);
1636 return btp; 1740 return btp;
1637 1741
1638error: 1742error:
@@ -1737,27 +1841,6 @@ xfs_buf_runall_queues(
1737 flush_workqueue(queue); 1841 flush_workqueue(queue);
1738} 1842}
1739 1843
1740STATIC int
1741xfsbufd_wakeup(
1742 struct shrinker *shrink,
1743 int priority,
1744 gfp_t mask)
1745{
1746 xfs_buftarg_t *btp;
1747
1748 spin_lock(&xfs_buftarg_lock);
1749 list_for_each_entry(btp, &xfs_buftarg_list, bt_list) {
1750 if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags))
1751 continue;
1752 if (list_empty(&btp->bt_delwrite_queue))
1753 continue;
1754 set_bit(XBT_FORCE_FLUSH, &btp->bt_flags);
1755 wake_up_process(btp->bt_task);
1756 }
1757 spin_unlock(&xfs_buftarg_lock);
1758 return 0;
1759}
1760
1761/* 1844/*
1762 * Move as many buffers as specified to the supplied list 1845 * Move as many buffers as specified to the supplied list
1763 * idicating if we skipped any buffers to prevent deadlocks. 1846 * idicating if we skipped any buffers to prevent deadlocks.
@@ -1952,7 +2035,6 @@ xfs_buf_init(void)
1952 if (!xfsconvertd_workqueue) 2035 if (!xfsconvertd_workqueue)
1953 goto out_destroy_xfsdatad_workqueue; 2036 goto out_destroy_xfsdatad_workqueue;
1954 2037
1955 register_shrinker(&xfs_buf_shake);
1956 return 0; 2038 return 0;
1957 2039
1958 out_destroy_xfsdatad_workqueue: 2040 out_destroy_xfsdatad_workqueue:
@@ -1968,7 +2050,6 @@ xfs_buf_init(void)
1968void 2050void
1969xfs_buf_terminate(void) 2051xfs_buf_terminate(void)
1970{ 2052{
1971 unregister_shrinker(&xfs_buf_shake);
1972 destroy_workqueue(xfsconvertd_workqueue); 2053 destroy_workqueue(xfsconvertd_workqueue);
1973 destroy_workqueue(xfsdatad_workqueue); 2054 destroy_workqueue(xfsdatad_workqueue);
1974 destroy_workqueue(xfslogd_workqueue); 2055 destroy_workqueue(xfslogd_workqueue);
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 383a3f37cf98..a76c2428faff 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -128,10 +128,15 @@ typedef struct xfs_buftarg {
128 128
129 /* per device delwri queue */ 129 /* per device delwri queue */
130 struct task_struct *bt_task; 130 struct task_struct *bt_task;
131 struct list_head bt_list;
132 struct list_head bt_delwrite_queue; 131 struct list_head bt_delwrite_queue;
133 spinlock_t bt_delwrite_lock; 132 spinlock_t bt_delwrite_lock;
134 unsigned long bt_flags; 133 unsigned long bt_flags;
134
135 /* LRU control structures */
136 struct shrinker bt_shrinker;
137 struct list_head bt_lru;
138 spinlock_t bt_lru_lock;
139 unsigned int bt_lru_nr;
135} xfs_buftarg_t; 140} xfs_buftarg_t;
136 141
137/* 142/*
@@ -164,9 +169,11 @@ typedef struct xfs_buf {
164 xfs_off_t b_file_offset; /* offset in file */ 169 xfs_off_t b_file_offset; /* offset in file */
165 size_t b_buffer_length;/* size of buffer in bytes */ 170 size_t b_buffer_length;/* size of buffer in bytes */
166 atomic_t b_hold; /* reference count */ 171 atomic_t b_hold; /* reference count */
172 atomic_t b_lru_ref; /* lru reclaim ref count */
167 xfs_buf_flags_t b_flags; /* status flags */ 173 xfs_buf_flags_t b_flags; /* status flags */
168 struct semaphore b_sema; /* semaphore for lockables */ 174 struct semaphore b_sema; /* semaphore for lockables */
169 175
176 struct list_head b_lru; /* lru list */
170 wait_queue_head_t b_waiters; /* unpin waiters */ 177 wait_queue_head_t b_waiters; /* unpin waiters */
171 struct list_head b_list; 178 struct list_head b_list;
172 struct xfs_perag *b_pag; /* contains rbtree root */ 179 struct xfs_perag *b_pag; /* contains rbtree root */
@@ -264,7 +271,8 @@ extern void xfs_buf_terminate(void);
264#define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \ 271#define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \
265 ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED)) 272 ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED))
266 273
267#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XBF_STALE) 274void xfs_buf_stale(struct xfs_buf *bp);
275#define XFS_BUF_STALE(bp) xfs_buf_stale(bp);
268#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) 276#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE)
269#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE) 277#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE)
270#define XFS_BUF_SUPER_STALE(bp) do { \ 278#define XFS_BUF_SUPER_STALE(bp) do { \
@@ -328,9 +336,15 @@ extern void xfs_buf_terminate(void);
328#define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length) 336#define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length)
329#define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt)) 337#define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt))
330 338
331#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) do { } while (0) 339static inline void
340xfs_buf_set_ref(
341 struct xfs_buf *bp,
342 int lru_ref)
343{
344 atomic_set(&bp->b_lru_ref, lru_ref);
345}
346#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) xfs_buf_set_ref(bp, ref)
332#define XFS_BUF_SET_VTYPE(bp, type) do { } while (0) 347#define XFS_BUF_SET_VTYPE(bp, type) do { } while (0)
333#define XFS_BUF_SET_REF(bp, ref) do { } while (0)
334 348
335#define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count)) 349#define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count))
336 350
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 3764d74790ec..fc0114da7fdd 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -70,8 +70,16 @@ xfs_fs_encode_fh(
70 else 70 else
71 fileid_type = FILEID_INO32_GEN_PARENT; 71 fileid_type = FILEID_INO32_GEN_PARENT;
72 72
73 /* filesystem may contain 64bit inode numbers */ 73 /*
74 if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS)) 74 * If the the filesystem may contain 64bit inode numbers, we need
75 * to use larger file handles that can represent them.
76 *
77 * While we only allocate inodes that do not fit into 32 bits any
78 * large enough filesystem may contain them, thus the slightly
79 * confusing looking conditional below.
80 */
81 if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS) ||
82 (XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_32BITINODES))
75 fileid_type |= XFS_FILEID_TYPE_64FLAG; 83 fileid_type |= XFS_FILEID_TYPE_64FLAG;
76 84
77 /* 85 /*
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 214ddd71ff79..096494997747 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -37,7 +37,6 @@
37 37
38#include <kmem.h> 38#include <kmem.h>
39#include <mrlock.h> 39#include <mrlock.h>
40#include <sv.h>
41#include <time.h> 40#include <time.h>
42 41
43#include <support/debug.h> 42#include <support/debug.h>
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 064f964d4f3c..c51faaa5e291 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -834,8 +834,11 @@ xfsaild_wakeup(
834 struct xfs_ail *ailp, 834 struct xfs_ail *ailp,
835 xfs_lsn_t threshold_lsn) 835 xfs_lsn_t threshold_lsn)
836{ 836{
837 ailp->xa_target = threshold_lsn; 837 /* only ever move the target forwards */
838 wake_up_process(ailp->xa_task); 838 if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) {
839 ailp->xa_target = threshold_lsn;
840 wake_up_process(ailp->xa_task);
841 }
839} 842}
840 843
841STATIC int 844STATIC int
@@ -847,8 +850,17 @@ xfsaild(
847 long tout = 0; /* milliseconds */ 850 long tout = 0; /* milliseconds */
848 851
849 while (!kthread_should_stop()) { 852 while (!kthread_should_stop()) {
850 schedule_timeout_interruptible(tout ? 853 /*
851 msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT); 854 * for short sleeps indicating congestion, don't allow us to
855 * get woken early. Otherwise all we do is bang on the AIL lock
856 * without making progress.
857 */
858 if (tout && tout <= 20)
859 __set_current_state(TASK_KILLABLE);
860 else
861 __set_current_state(TASK_INTERRUPTIBLE);
862 schedule_timeout(tout ?
863 msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
852 864
853 /* swsusp */ 865 /* swsusp */
854 try_to_freeze(); 866 try_to_freeze();
@@ -1118,6 +1130,8 @@ xfs_fs_evict_inode(
1118 */ 1130 */
1119 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); 1131 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
1120 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); 1132 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
1133 lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
1134 &xfs_iolock_reclaimable, "xfs_iolock_reclaimable");
1121 1135
1122 xfs_inactive(ip); 1136 xfs_inactive(ip);
1123} 1137}
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index afb0d7cfad1c..a02480de9759 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -53,14 +53,30 @@ xfs_inode_ag_walk_grab(
53{ 53{
54 struct inode *inode = VFS_I(ip); 54 struct inode *inode = VFS_I(ip);
55 55
56 ASSERT(rcu_read_lock_held());
57
58 /*
59 * check for stale RCU freed inode
60 *
61 * If the inode has been reallocated, it doesn't matter if it's not in
62 * the AG we are walking - we are walking for writeback, so if it
63 * passes all the "valid inode" checks and is dirty, then we'll write
64 * it back anyway. If it has been reallocated and still being
65 * initialised, the XFS_INEW check below will catch it.
66 */
67 spin_lock(&ip->i_flags_lock);
68 if (!ip->i_ino)
69 goto out_unlock_noent;
70
71 /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
72 if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
73 goto out_unlock_noent;
74 spin_unlock(&ip->i_flags_lock);
75
56 /* nothing to sync during shutdown */ 76 /* nothing to sync during shutdown */
57 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 77 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
58 return EFSCORRUPTED; 78 return EFSCORRUPTED;
59 79
60 /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
61 if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
62 return ENOENT;
63
64 /* If we can't grab the inode, it must on it's way to reclaim. */ 80 /* If we can't grab the inode, it must on it's way to reclaim. */
65 if (!igrab(inode)) 81 if (!igrab(inode))
66 return ENOENT; 82 return ENOENT;
@@ -72,6 +88,10 @@ xfs_inode_ag_walk_grab(
72 88
73 /* inode is valid */ 89 /* inode is valid */
74 return 0; 90 return 0;
91
92out_unlock_noent:
93 spin_unlock(&ip->i_flags_lock);
94 return ENOENT;
75} 95}
76 96
77STATIC int 97STATIC int
@@ -98,12 +118,12 @@ restart:
98 int error = 0; 118 int error = 0;
99 int i; 119 int i;
100 120
101 read_lock(&pag->pag_ici_lock); 121 rcu_read_lock();
102 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, 122 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
103 (void **)batch, first_index, 123 (void **)batch, first_index,
104 XFS_LOOKUP_BATCH); 124 XFS_LOOKUP_BATCH);
105 if (!nr_found) { 125 if (!nr_found) {
106 read_unlock(&pag->pag_ici_lock); 126 rcu_read_unlock();
107 break; 127 break;
108 } 128 }
109 129
@@ -118,18 +138,26 @@ restart:
118 batch[i] = NULL; 138 batch[i] = NULL;
119 139
120 /* 140 /*
121 * Update the index for the next lookup. Catch overflows 141 * Update the index for the next lookup. Catch
122 * into the next AG range which can occur if we have inodes 142 * overflows into the next AG range which can occur if
123 * in the last block of the AG and we are currently 143 * we have inodes in the last block of the AG and we
124 * pointing to the last inode. 144 * are currently pointing to the last inode.
145 *
146 * Because we may see inodes that are from the wrong AG
147 * due to RCU freeing and reallocation, only update the
148 * index if it lies in this AG. It was a race that lead
149 * us to see this inode, so another lookup from the
150 * same index will not find it again.
125 */ 151 */
152 if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno)
153 continue;
126 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 154 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
127 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) 155 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
128 done = 1; 156 done = 1;
129 } 157 }
130 158
131 /* unlock now we've grabbed the inodes. */ 159 /* unlock now we've grabbed the inodes. */
132 read_unlock(&pag->pag_ici_lock); 160 rcu_read_unlock();
133 161
134 for (i = 0; i < nr_found; i++) { 162 for (i = 0; i < nr_found; i++) {
135 if (!batch[i]) 163 if (!batch[i])
@@ -592,12 +620,12 @@ xfs_inode_set_reclaim_tag(
592 struct xfs_perag *pag; 620 struct xfs_perag *pag;
593 621
594 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 622 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
595 write_lock(&pag->pag_ici_lock); 623 spin_lock(&pag->pag_ici_lock);
596 spin_lock(&ip->i_flags_lock); 624 spin_lock(&ip->i_flags_lock);
597 __xfs_inode_set_reclaim_tag(pag, ip); 625 __xfs_inode_set_reclaim_tag(pag, ip);
598 __xfs_iflags_set(ip, XFS_IRECLAIMABLE); 626 __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
599 spin_unlock(&ip->i_flags_lock); 627 spin_unlock(&ip->i_flags_lock);
600 write_unlock(&pag->pag_ici_lock); 628 spin_unlock(&pag->pag_ici_lock);
601 xfs_perag_put(pag); 629 xfs_perag_put(pag);
602} 630}
603 631
@@ -639,9 +667,14 @@ xfs_reclaim_inode_grab(
639 struct xfs_inode *ip, 667 struct xfs_inode *ip,
640 int flags) 668 int flags)
641{ 669{
670 ASSERT(rcu_read_lock_held());
671
672 /* quick check for stale RCU freed inode */
673 if (!ip->i_ino)
674 return 1;
642 675
643 /* 676 /*
644 * do some unlocked checks first to avoid unnecceary lock traffic. 677 * do some unlocked checks first to avoid unnecessary lock traffic.
645 * The first is a flush lock check, the second is a already in reclaim 678 * The first is a flush lock check, the second is a already in reclaim
646 * check. Only do these checks if we are not going to block on locks. 679 * check. Only do these checks if we are not going to block on locks.
647 */ 680 */
@@ -654,11 +687,16 @@ xfs_reclaim_inode_grab(
654 * The radix tree lock here protects a thread in xfs_iget from racing 687 * The radix tree lock here protects a thread in xfs_iget from racing
655 * with us starting reclaim on the inode. Once we have the 688 * with us starting reclaim on the inode. Once we have the
656 * XFS_IRECLAIM flag set it will not touch us. 689 * XFS_IRECLAIM flag set it will not touch us.
690 *
691 * Due to RCU lookup, we may find inodes that have been freed and only
692 * have XFS_IRECLAIM set. Indeed, we may see reallocated inodes that
693 * aren't candidates for reclaim at all, so we must check the
694 * XFS_IRECLAIMABLE is set first before proceeding to reclaim.
657 */ 695 */
658 spin_lock(&ip->i_flags_lock); 696 spin_lock(&ip->i_flags_lock);
659 ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE)); 697 if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) ||
660 if (__xfs_iflags_test(ip, XFS_IRECLAIM)) { 698 __xfs_iflags_test(ip, XFS_IRECLAIM)) {
661 /* ignore as it is already under reclaim */ 699 /* not a reclaim candidate. */
662 spin_unlock(&ip->i_flags_lock); 700 spin_unlock(&ip->i_flags_lock);
663 return 1; 701 return 1;
664 } 702 }
@@ -795,12 +833,12 @@ reclaim:
795 * added to the tree assert that it's been there before to catch 833 * added to the tree assert that it's been there before to catch
796 * problems with the inode life time early on. 834 * problems with the inode life time early on.
797 */ 835 */
798 write_lock(&pag->pag_ici_lock); 836 spin_lock(&pag->pag_ici_lock);
799 if (!radix_tree_delete(&pag->pag_ici_root, 837 if (!radix_tree_delete(&pag->pag_ici_root,
800 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) 838 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
801 ASSERT(0); 839 ASSERT(0);
802 __xfs_inode_clear_reclaim(pag, ip); 840 __xfs_inode_clear_reclaim(pag, ip);
803 write_unlock(&pag->pag_ici_lock); 841 spin_unlock(&pag->pag_ici_lock);
804 842
805 /* 843 /*
806 * Here we do an (almost) spurious inode lock in order to coordinate 844 * Here we do an (almost) spurious inode lock in order to coordinate
@@ -864,14 +902,14 @@ restart:
864 struct xfs_inode *batch[XFS_LOOKUP_BATCH]; 902 struct xfs_inode *batch[XFS_LOOKUP_BATCH];
865 int i; 903 int i;
866 904
867 write_lock(&pag->pag_ici_lock); 905 rcu_read_lock();
868 nr_found = radix_tree_gang_lookup_tag( 906 nr_found = radix_tree_gang_lookup_tag(
869 &pag->pag_ici_root, 907 &pag->pag_ici_root,
870 (void **)batch, first_index, 908 (void **)batch, first_index,
871 XFS_LOOKUP_BATCH, 909 XFS_LOOKUP_BATCH,
872 XFS_ICI_RECLAIM_TAG); 910 XFS_ICI_RECLAIM_TAG);
873 if (!nr_found) { 911 if (!nr_found) {
874 write_unlock(&pag->pag_ici_lock); 912 rcu_read_unlock();
875 break; 913 break;
876 } 914 }
877 915
@@ -891,14 +929,24 @@ restart:
891 * occur if we have inodes in the last block of 929 * occur if we have inodes in the last block of
892 * the AG and we are currently pointing to the 930 * the AG and we are currently pointing to the
893 * last inode. 931 * last inode.
932 *
933 * Because we may see inodes that are from the
934 * wrong AG due to RCU freeing and
935 * reallocation, only update the index if it
936 * lies in this AG. It was a race that lead us
937 * to see this inode, so another lookup from
938 * the same index will not find it again.
894 */ 939 */
940 if (XFS_INO_TO_AGNO(mp, ip->i_ino) !=
941 pag->pag_agno)
942 continue;
895 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 943 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
896 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) 944 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
897 done = 1; 945 done = 1;
898 } 946 }
899 947
900 /* unlock now we've grabbed the inodes. */ 948 /* unlock now we've grabbed the inodes. */
901 write_unlock(&pag->pag_ici_lock); 949 rcu_read_unlock();
902 950
903 for (i = 0; i < nr_found; i++) { 951 for (i = 0; i < nr_found; i++) {
904 if (!batch[i]) 952 if (!batch[i])
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index acef2e98c594..647af2a2e7aa 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -766,8 +766,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
766 __field(int, curr_res) 766 __field(int, curr_res)
767 __field(int, unit_res) 767 __field(int, unit_res)
768 __field(unsigned int, flags) 768 __field(unsigned int, flags)
769 __field(void *, reserve_headq) 769 __field(int, reserveq)
770 __field(void *, write_headq) 770 __field(int, writeq)
771 __field(int, grant_reserve_cycle) 771 __field(int, grant_reserve_cycle)
772 __field(int, grant_reserve_bytes) 772 __field(int, grant_reserve_bytes)
773 __field(int, grant_write_cycle) 773 __field(int, grant_write_cycle)
@@ -784,19 +784,21 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
784 __entry->curr_res = tic->t_curr_res; 784 __entry->curr_res = tic->t_curr_res;
785 __entry->unit_res = tic->t_unit_res; 785 __entry->unit_res = tic->t_unit_res;
786 __entry->flags = tic->t_flags; 786 __entry->flags = tic->t_flags;
787 __entry->reserve_headq = log->l_reserve_headq; 787 __entry->reserveq = list_empty(&log->l_reserveq);
788 __entry->write_headq = log->l_write_headq; 788 __entry->writeq = list_empty(&log->l_writeq);
789 __entry->grant_reserve_cycle = log->l_grant_reserve_cycle; 789 xlog_crack_grant_head(&log->l_grant_reserve_head,
790 __entry->grant_reserve_bytes = log->l_grant_reserve_bytes; 790 &__entry->grant_reserve_cycle,
791 __entry->grant_write_cycle = log->l_grant_write_cycle; 791 &__entry->grant_reserve_bytes);
792 __entry->grant_write_bytes = log->l_grant_write_bytes; 792 xlog_crack_grant_head(&log->l_grant_write_head,
793 &__entry->grant_write_cycle,
794 &__entry->grant_write_bytes);
793 __entry->curr_cycle = log->l_curr_cycle; 795 __entry->curr_cycle = log->l_curr_cycle;
794 __entry->curr_block = log->l_curr_block; 796 __entry->curr_block = log->l_curr_block;
795 __entry->tail_lsn = log->l_tail_lsn; 797 __entry->tail_lsn = atomic64_read(&log->l_tail_lsn);
796 ), 798 ),
797 TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u " 799 TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u "
798 "t_unit_res %u t_flags %s reserve_headq 0x%p " 800 "t_unit_res %u t_flags %s reserveq %s "
799 "write_headq 0x%p grant_reserve_cycle %d " 801 "writeq %s grant_reserve_cycle %d "
800 "grant_reserve_bytes %d grant_write_cycle %d " 802 "grant_reserve_bytes %d grant_write_cycle %d "
801 "grant_write_bytes %d curr_cycle %d curr_block %d " 803 "grant_write_bytes %d curr_cycle %d curr_block %d "
802 "tail_cycle %d tail_block %d", 804 "tail_cycle %d tail_block %d",
@@ -807,8 +809,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
807 __entry->curr_res, 809 __entry->curr_res,
808 __entry->unit_res, 810 __entry->unit_res,
809 __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS), 811 __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS),
810 __entry->reserve_headq, 812 __entry->reserveq ? "empty" : "active",
811 __entry->write_headq, 813 __entry->writeq ? "empty" : "active",
812 __entry->grant_reserve_cycle, 814 __entry->grant_reserve_cycle,
813 __entry->grant_reserve_bytes, 815 __entry->grant_reserve_bytes,
814 __entry->grant_write_cycle, 816 __entry->grant_write_cycle,
@@ -835,6 +837,7 @@ DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1);
835DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1); 837DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1);
836DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2); 838DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2);
837DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2); 839DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2);
840DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up);
838DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter); 841DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter);
839DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit); 842DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit);
840DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error); 843DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error);
@@ -842,6 +845,7 @@ DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1);
842DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1); 845DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1);
843DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2); 846DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2);
844DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2); 847DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2);
848DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up);
845DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter); 849DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter);
846DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit); 850DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit);
847DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub); 851DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub);
@@ -935,10 +939,10 @@ DEFINE_PAGE_EVENT(xfs_writepage);
935DEFINE_PAGE_EVENT(xfs_releasepage); 939DEFINE_PAGE_EVENT(xfs_releasepage);
936DEFINE_PAGE_EVENT(xfs_invalidatepage); 940DEFINE_PAGE_EVENT(xfs_invalidatepage);
937 941
938DECLARE_EVENT_CLASS(xfs_iomap_class, 942DECLARE_EVENT_CLASS(xfs_imap_class,
939 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, 943 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
940 int flags, struct xfs_bmbt_irec *irec), 944 int type, struct xfs_bmbt_irec *irec),
941 TP_ARGS(ip, offset, count, flags, irec), 945 TP_ARGS(ip, offset, count, type, irec),
942 TP_STRUCT__entry( 946 TP_STRUCT__entry(
943 __field(dev_t, dev) 947 __field(dev_t, dev)
944 __field(xfs_ino_t, ino) 948 __field(xfs_ino_t, ino)
@@ -946,7 +950,7 @@ DECLARE_EVENT_CLASS(xfs_iomap_class,
946 __field(loff_t, new_size) 950 __field(loff_t, new_size)
947 __field(loff_t, offset) 951 __field(loff_t, offset)
948 __field(size_t, count) 952 __field(size_t, count)
949 __field(int, flags) 953 __field(int, type)
950 __field(xfs_fileoff_t, startoff) 954 __field(xfs_fileoff_t, startoff)
951 __field(xfs_fsblock_t, startblock) 955 __field(xfs_fsblock_t, startblock)
952 __field(xfs_filblks_t, blockcount) 956 __field(xfs_filblks_t, blockcount)
@@ -958,13 +962,13 @@ DECLARE_EVENT_CLASS(xfs_iomap_class,
958 __entry->new_size = ip->i_new_size; 962 __entry->new_size = ip->i_new_size;
959 __entry->offset = offset; 963 __entry->offset = offset;
960 __entry->count = count; 964 __entry->count = count;
961 __entry->flags = flags; 965 __entry->type = type;
962 __entry->startoff = irec ? irec->br_startoff : 0; 966 __entry->startoff = irec ? irec->br_startoff : 0;
963 __entry->startblock = irec ? irec->br_startblock : 0; 967 __entry->startblock = irec ? irec->br_startblock : 0;
964 __entry->blockcount = irec ? irec->br_blockcount : 0; 968 __entry->blockcount = irec ? irec->br_blockcount : 0;
965 ), 969 ),
966 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " 970 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
967 "offset 0x%llx count %zd flags %s " 971 "offset 0x%llx count %zd type %s "
968 "startoff 0x%llx startblock %lld blockcount 0x%llx", 972 "startoff 0x%llx startblock %lld blockcount 0x%llx",
969 MAJOR(__entry->dev), MINOR(__entry->dev), 973 MAJOR(__entry->dev), MINOR(__entry->dev),
970 __entry->ino, 974 __entry->ino,
@@ -972,20 +976,21 @@ DECLARE_EVENT_CLASS(xfs_iomap_class,
972 __entry->new_size, 976 __entry->new_size,
973 __entry->offset, 977 __entry->offset,
974 __entry->count, 978 __entry->count,
975 __print_flags(__entry->flags, "|", BMAPI_FLAGS), 979 __print_symbolic(__entry->type, XFS_IO_TYPES),
976 __entry->startoff, 980 __entry->startoff,
977 (__int64_t)__entry->startblock, 981 (__int64_t)__entry->startblock,
978 __entry->blockcount) 982 __entry->blockcount)
979) 983)
980 984
981#define DEFINE_IOMAP_EVENT(name) \ 985#define DEFINE_IOMAP_EVENT(name) \
982DEFINE_EVENT(xfs_iomap_class, name, \ 986DEFINE_EVENT(xfs_imap_class, name, \
983 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \ 987 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \
984 int flags, struct xfs_bmbt_irec *irec), \ 988 int type, struct xfs_bmbt_irec *irec), \
985 TP_ARGS(ip, offset, count, flags, irec)) 989 TP_ARGS(ip, offset, count, type, irec))
986DEFINE_IOMAP_EVENT(xfs_iomap_enter); 990DEFINE_IOMAP_EVENT(xfs_map_blocks_found);
987DEFINE_IOMAP_EVENT(xfs_iomap_found); 991DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
988DEFINE_IOMAP_EVENT(xfs_iomap_alloc); 992DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
993DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
989 994
990DECLARE_EVENT_CLASS(xfs_simple_io_class, 995DECLARE_EVENT_CLASS(xfs_simple_io_class,
991 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), 996 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
@@ -1022,6 +1027,7 @@ DEFINE_EVENT(xfs_simple_io_class, name, \
1022 TP_ARGS(ip, offset, count)) 1027 TP_ARGS(ip, offset, count))
1023DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); 1028DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
1024DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); 1029DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
1030DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound);
1025 1031
1026 1032
1027TRACE_EVENT(xfs_itruncate_start, 1033TRACE_EVENT(xfs_itruncate_start,
@@ -1420,6 +1426,7 @@ DEFINE_EVENT(xfs_alloc_class, name, \
1420 TP_PROTO(struct xfs_alloc_arg *args), \ 1426 TP_PROTO(struct xfs_alloc_arg *args), \
1421 TP_ARGS(args)) 1427 TP_ARGS(args))
1422DEFINE_ALLOC_EVENT(xfs_alloc_exact_done); 1428DEFINE_ALLOC_EVENT(xfs_alloc_exact_done);
1429DEFINE_ALLOC_EVENT(xfs_alloc_exact_notfound);
1423DEFINE_ALLOC_EVENT(xfs_alloc_exact_error); 1430DEFINE_ALLOC_EVENT(xfs_alloc_exact_error);
1424DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft); 1431DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft);
1425DEFINE_ALLOC_EVENT(xfs_alloc_near_first); 1432DEFINE_ALLOC_EVENT(xfs_alloc_near_first);
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index faf8e1a83a12..d22aa3103106 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -149,7 +149,6 @@ xfs_qm_dqdestroy(
149 ASSERT(list_empty(&dqp->q_freelist)); 149 ASSERT(list_empty(&dqp->q_freelist));
150 150
151 mutex_destroy(&dqp->q_qlock); 151 mutex_destroy(&dqp->q_qlock);
152 sv_destroy(&dqp->q_pinwait);
153 kmem_zone_free(xfs_Gqm->qm_dqzone, dqp); 152 kmem_zone_free(xfs_Gqm->qm_dqzone, dqp);
154 153
155 atomic_dec(&xfs_Gqm->qm_totaldquots); 154 atomic_dec(&xfs_Gqm->qm_totaldquots);
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 63c7a1a6c022..58632cc17f2d 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -227,7 +227,7 @@ typedef struct xfs_perag {
227 227
228 atomic_t pagf_fstrms; /* # of filestreams active in this AG */ 228 atomic_t pagf_fstrms; /* # of filestreams active in this AG */
229 229
230 rwlock_t pag_ici_lock; /* incore inode lock */ 230 spinlock_t pag_ici_lock; /* incore inode cache lock */
231 struct radix_tree_root pag_ici_root; /* incore inode cache root */ 231 struct radix_tree_root pag_ici_root; /* incore inode cache root */
232 int pag_ici_reclaimable; /* reclaimable inodes */ 232 int pag_ici_reclaimable; /* reclaimable inodes */
233 struct mutex pag_ici_reclaim_lock; /* serialisation point */ 233 struct mutex pag_ici_reclaim_lock; /* serialisation point */
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 112abc439ca5..fa8723f5870a 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -577,61 +577,58 @@ xfs_alloc_ag_vextent_exact(
577 xfs_extlen_t rlen; /* length of returned extent */ 577 xfs_extlen_t rlen; /* length of returned extent */
578 578
579 ASSERT(args->alignment == 1); 579 ASSERT(args->alignment == 1);
580
580 /* 581 /*
581 * Allocate/initialize a cursor for the by-number freespace btree. 582 * Allocate/initialize a cursor for the by-number freespace btree.
582 */ 583 */
583 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 584 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
584 args->agno, XFS_BTNUM_BNO); 585 args->agno, XFS_BTNUM_BNO);
586
585 /* 587 /*
586 * Lookup bno and minlen in the btree (minlen is irrelevant, really). 588 * Lookup bno and minlen in the btree (minlen is irrelevant, really).
587 * Look for the closest free block <= bno, it must contain bno 589 * Look for the closest free block <= bno, it must contain bno
588 * if any free block does. 590 * if any free block does.
589 */ 591 */
590 if ((error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i))) 592 error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
593 if (error)
591 goto error0; 594 goto error0;
592 if (!i) { 595 if (!i)
593 /* 596 goto not_found;
594 * Didn't find it, return null. 597
595 */
596 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
597 args->agbno = NULLAGBLOCK;
598 return 0;
599 }
600 /* 598 /*
601 * Grab the freespace record. 599 * Grab the freespace record.
602 */ 600 */
603 if ((error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i))) 601 error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
602 if (error)
604 goto error0; 603 goto error0;
605 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 604 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
606 ASSERT(fbno <= args->agbno); 605 ASSERT(fbno <= args->agbno);
607 minend = args->agbno + args->minlen; 606 minend = args->agbno + args->minlen;
608 maxend = args->agbno + args->maxlen; 607 maxend = args->agbno + args->maxlen;
609 fend = fbno + flen; 608 fend = fbno + flen;
609
610 /* 610 /*
611 * Give up if the freespace isn't long enough for the minimum request. 611 * Give up if the freespace isn't long enough for the minimum request.
612 */ 612 */
613 if (fend < minend) { 613 if (fend < minend)
614 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 614 goto not_found;
615 args->agbno = NULLAGBLOCK; 615
616 return 0;
617 }
618 /* 616 /*
619 * End of extent will be smaller of the freespace end and the 617 * End of extent will be smaller of the freespace end and the
620 * maximal requested end. 618 * maximal requested end.
621 */ 619 *
622 end = XFS_AGBLOCK_MIN(fend, maxend);
623 /*
624 * Fix the length according to mod and prod if given. 620 * Fix the length according to mod and prod if given.
625 */ 621 */
622 end = XFS_AGBLOCK_MIN(fend, maxend);
626 args->len = end - args->agbno; 623 args->len = end - args->agbno;
627 xfs_alloc_fix_len(args); 624 xfs_alloc_fix_len(args);
628 if (!xfs_alloc_fix_minleft(args)) { 625 if (!xfs_alloc_fix_minleft(args))
629 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 626 goto not_found;
630 return 0; 627
631 }
632 rlen = args->len; 628 rlen = args->len;
633 ASSERT(args->agbno + rlen <= fend); 629 ASSERT(args->agbno + rlen <= fend);
634 end = args->agbno + rlen; 630 end = args->agbno + rlen;
631
635 /* 632 /*
636 * We are allocating agbno for rlen [agbno .. end] 633 * We are allocating agbno for rlen [agbno .. end]
637 * Allocate/initialize a cursor for the by-size btree. 634 * Allocate/initialize a cursor for the by-size btree.
@@ -640,16 +637,25 @@ xfs_alloc_ag_vextent_exact(
640 args->agno, XFS_BTNUM_CNT); 637 args->agno, XFS_BTNUM_CNT);
641 ASSERT(args->agbno + args->len <= 638 ASSERT(args->agbno + args->len <=
642 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 639 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
643 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, 640 error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
644 args->agbno, args->len, XFSA_FIXUP_BNO_OK))) { 641 args->len, XFSA_FIXUP_BNO_OK);
642 if (error) {
645 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 643 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
646 goto error0; 644 goto error0;
647 } 645 }
646
648 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 647 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
649 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 648 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
650 649
651 trace_xfs_alloc_exact_done(args);
652 args->wasfromfl = 0; 650 args->wasfromfl = 0;
651 trace_xfs_alloc_exact_done(args);
652 return 0;
653
654not_found:
655 /* Didn't find it, return null. */
656 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
657 args->agbno = NULLAGBLOCK;
658 trace_xfs_alloc_exact_notfound(args);
653 return 0; 659 return 0;
654 660
655error0: 661error0:
@@ -659,6 +665,95 @@ error0:
659} 665}
660 666
661/* 667/*
668 * Search the btree in a given direction via the search cursor and compare
669 * the records found against the good extent we've already found.
670 */
671STATIC int
672xfs_alloc_find_best_extent(
673 struct xfs_alloc_arg *args, /* allocation argument structure */
674 struct xfs_btree_cur **gcur, /* good cursor */
675 struct xfs_btree_cur **scur, /* searching cursor */
676 xfs_agblock_t gdiff, /* difference for search comparison */
677 xfs_agblock_t *sbno, /* extent found by search */
678 xfs_extlen_t *slen,
679 xfs_extlen_t *slena, /* aligned length */
680 int dir) /* 0 = search right, 1 = search left */
681{
682 xfs_agblock_t bno;
683 xfs_agblock_t new;
684 xfs_agblock_t sdiff;
685 int error;
686 int i;
687
688 /* The good extent is perfect, no need to search. */
689 if (!gdiff)
690 goto out_use_good;
691
692 /*
693 * Look until we find a better one, run out of space or run off the end.
694 */
695 do {
696 error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
697 if (error)
698 goto error0;
699 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
700 xfs_alloc_compute_aligned(*sbno, *slen, args->alignment,
701 args->minlen, &bno, slena);
702
703 /*
704 * The good extent is closer than this one.
705 */
706 if (!dir) {
707 if (bno >= args->agbno + gdiff)
708 goto out_use_good;
709 } else {
710 if (bno <= args->agbno - gdiff)
711 goto out_use_good;
712 }
713
714 /*
715 * Same distance, compare length and pick the best.
716 */
717 if (*slena >= args->minlen) {
718 args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
719 xfs_alloc_fix_len(args);
720
721 sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
722 args->alignment, *sbno,
723 *slen, &new);
724
725 /*
726 * Choose closer size and invalidate other cursor.
727 */
728 if (sdiff < gdiff)
729 goto out_use_search;
730 goto out_use_good;
731 }
732
733 if (!dir)
734 error = xfs_btree_increment(*scur, 0, &i);
735 else
736 error = xfs_btree_decrement(*scur, 0, &i);
737 if (error)
738 goto error0;
739 } while (i);
740
741out_use_good:
742 xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR);
743 *scur = NULL;
744 return 0;
745
746out_use_search:
747 xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR);
748 *gcur = NULL;
749 return 0;
750
751error0:
752 /* caller invalidates cursors */
753 return error;
754}
755
756/*
662 * Allocate a variable extent near bno in the allocation group agno. 757 * Allocate a variable extent near bno in the allocation group agno.
663 * Extent's length (returned in len) will be between minlen and maxlen, 758 * Extent's length (returned in len) will be between minlen and maxlen,
664 * and of the form k * prod + mod unless there's nothing that large. 759 * and of the form k * prod + mod unless there's nothing that large.
@@ -925,203 +1020,45 @@ xfs_alloc_ag_vextent_near(
925 } 1020 }
926 } 1021 }
927 } while (bno_cur_lt || bno_cur_gt); 1022 } while (bno_cur_lt || bno_cur_gt);
1023
928 /* 1024 /*
929 * Got both cursors still active, need to find better entry. 1025 * Got both cursors still active, need to find better entry.
930 */ 1026 */
931 if (bno_cur_lt && bno_cur_gt) { 1027 if (bno_cur_lt && bno_cur_gt) {
932 /*
933 * Left side is long enough, look for a right side entry.
934 */
935 if (ltlena >= args->minlen) { 1028 if (ltlena >= args->minlen) {
936 /* 1029 /*
937 * Fix up the length. 1030 * Left side is good, look for a right side entry.
938 */ 1031 */
939 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 1032 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
940 xfs_alloc_fix_len(args); 1033 xfs_alloc_fix_len(args);
941 rlen = args->len; 1034 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
942 ltdiff = xfs_alloc_compute_diff(args->agbno, rlen,
943 args->alignment, ltbno, ltlen, &ltnew); 1035 args->alignment, ltbno, ltlen, &ltnew);
1036
1037 error = xfs_alloc_find_best_extent(args,
1038 &bno_cur_lt, &bno_cur_gt,
1039 ltdiff, &gtbno, &gtlen, &gtlena,
1040 0 /* search right */);
1041 } else {
1042 ASSERT(gtlena >= args->minlen);
1043
944 /* 1044 /*
945 * Not perfect. 1045 * Right side is good, look for a left side entry.
946 */
947 if (ltdiff) {
948 /*
949 * Look until we find a better one, run out of
950 * space, or run off the end.
951 */
952 while (bno_cur_lt && bno_cur_gt) {
953 if ((error = xfs_alloc_get_rec(
954 bno_cur_gt, &gtbno,
955 &gtlen, &i)))
956 goto error0;
957 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
958 xfs_alloc_compute_aligned(gtbno, gtlen,
959 args->alignment, args->minlen,
960 &gtbnoa, &gtlena);
961 /*
962 * The left one is clearly better.
963 */
964 if (gtbnoa >= args->agbno + ltdiff) {
965 xfs_btree_del_cursor(
966 bno_cur_gt,
967 XFS_BTREE_NOERROR);
968 bno_cur_gt = NULL;
969 break;
970 }
971 /*
972 * If we reach a big enough entry,
973 * compare the two and pick the best.
974 */
975 if (gtlena >= args->minlen) {
976 args->len =
977 XFS_EXTLEN_MIN(gtlena,
978 args->maxlen);
979 xfs_alloc_fix_len(args);
980 rlen = args->len;
981 gtdiff = xfs_alloc_compute_diff(
982 args->agbno, rlen,
983 args->alignment,
984 gtbno, gtlen, &gtnew);
985 /*
986 * Right side is better.
987 */
988 if (gtdiff < ltdiff) {
989 xfs_btree_del_cursor(
990 bno_cur_lt,
991 XFS_BTREE_NOERROR);
992 bno_cur_lt = NULL;
993 }
994 /*
995 * Left side is better.
996 */
997 else {
998 xfs_btree_del_cursor(
999 bno_cur_gt,
1000 XFS_BTREE_NOERROR);
1001 bno_cur_gt = NULL;
1002 }
1003 break;
1004 }
1005 /*
1006 * Fell off the right end.
1007 */
1008 if ((error = xfs_btree_increment(
1009 bno_cur_gt, 0, &i)))
1010 goto error0;
1011 if (!i) {
1012 xfs_btree_del_cursor(
1013 bno_cur_gt,
1014 XFS_BTREE_NOERROR);
1015 bno_cur_gt = NULL;
1016 break;
1017 }
1018 }
1019 }
1020 /*
1021 * The left side is perfect, trash the right side.
1022 */
1023 else {
1024 xfs_btree_del_cursor(bno_cur_gt,
1025 XFS_BTREE_NOERROR);
1026 bno_cur_gt = NULL;
1027 }
1028 }
1029 /*
1030 * It's the right side that was found first, look left.
1031 */
1032 else {
1033 /*
1034 * Fix up the length.
1035 */ 1046 */
1036 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); 1047 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1037 xfs_alloc_fix_len(args); 1048 xfs_alloc_fix_len(args);
1038 rlen = args->len; 1049 gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1039 gtdiff = xfs_alloc_compute_diff(args->agbno, rlen,
1040 args->alignment, gtbno, gtlen, &gtnew); 1050 args->alignment, gtbno, gtlen, &gtnew);
1041 /* 1051
1042 * Right side entry isn't perfect. 1052 error = xfs_alloc_find_best_extent(args,
1043 */ 1053 &bno_cur_gt, &bno_cur_lt,
1044 if (gtdiff) { 1054 gtdiff, &ltbno, &ltlen, &ltlena,
1045 /* 1055 1 /* search left */);
1046 * Look until we find a better one, run out of
1047 * space, or run off the end.
1048 */
1049 while (bno_cur_lt && bno_cur_gt) {
1050 if ((error = xfs_alloc_get_rec(
1051 bno_cur_lt, &ltbno,
1052 &ltlen, &i)))
1053 goto error0;
1054 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1055 xfs_alloc_compute_aligned(ltbno, ltlen,
1056 args->alignment, args->minlen,
1057 &ltbnoa, &ltlena);
1058 /*
1059 * The right one is clearly better.
1060 */
1061 if (ltbnoa <= args->agbno - gtdiff) {
1062 xfs_btree_del_cursor(
1063 bno_cur_lt,
1064 XFS_BTREE_NOERROR);
1065 bno_cur_lt = NULL;
1066 break;
1067 }
1068 /*
1069 * If we reach a big enough entry,
1070 * compare the two and pick the best.
1071 */
1072 if (ltlena >= args->minlen) {
1073 args->len = XFS_EXTLEN_MIN(
1074 ltlena, args->maxlen);
1075 xfs_alloc_fix_len(args);
1076 rlen = args->len;
1077 ltdiff = xfs_alloc_compute_diff(
1078 args->agbno, rlen,
1079 args->alignment,
1080 ltbno, ltlen, &ltnew);
1081 /*
1082 * Left side is better.
1083 */
1084 if (ltdiff < gtdiff) {
1085 xfs_btree_del_cursor(
1086 bno_cur_gt,
1087 XFS_BTREE_NOERROR);
1088 bno_cur_gt = NULL;
1089 }
1090 /*
1091 * Right side is better.
1092 */
1093 else {
1094 xfs_btree_del_cursor(
1095 bno_cur_lt,
1096 XFS_BTREE_NOERROR);
1097 bno_cur_lt = NULL;
1098 }
1099 break;
1100 }
1101 /*
1102 * Fell off the left end.
1103 */
1104 if ((error = xfs_btree_decrement(
1105 bno_cur_lt, 0, &i)))
1106 goto error0;
1107 if (!i) {
1108 xfs_btree_del_cursor(bno_cur_lt,
1109 XFS_BTREE_NOERROR);
1110 bno_cur_lt = NULL;
1111 break;
1112 }
1113 }
1114 }
1115 /*
1116 * The right side is perfect, trash the left side.
1117 */
1118 else {
1119 xfs_btree_del_cursor(bno_cur_lt,
1120 XFS_BTREE_NOERROR);
1121 bno_cur_lt = NULL;
1122 }
1123 } 1056 }
1057
1058 if (error)
1059 goto error0;
1124 } 1060 }
1061
1125 /* 1062 /*
1126 * If we couldn't get anything, give up. 1063 * If we couldn't get anything, give up.
1127 */ 1064 */
@@ -1130,6 +1067,7 @@ xfs_alloc_ag_vextent_near(
1130 args->agbno = NULLAGBLOCK; 1067 args->agbno = NULLAGBLOCK;
1131 return 0; 1068 return 0;
1132 } 1069 }
1070
1133 /* 1071 /*
1134 * At this point we have selected a freespace entry, either to the 1072 * At this point we have selected a freespace entry, either to the
1135 * left or to the right. If it's on the right, copy all the 1073 * left or to the right. If it's on the right, copy all the
@@ -1146,6 +1084,7 @@ xfs_alloc_ag_vextent_near(
1146 j = 1; 1084 j = 1;
1147 } else 1085 } else
1148 j = 0; 1086 j = 0;
1087
1149 /* 1088 /*
1150 * Fix up the length and compute the useful address. 1089 * Fix up the length and compute the useful address.
1151 */ 1090 */
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index a6cff8edcdb6..71e90dc2aeb1 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -637,7 +637,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
637 * It didn't all fit, so we have to sort everything on hashval. 637 * It didn't all fit, so we have to sort everything on hashval.
638 */ 638 */
639 sbsize = sf->hdr.count * sizeof(*sbuf); 639 sbsize = sf->hdr.count * sizeof(*sbuf);
640 sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP); 640 sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP | KM_NOFS);
641 641
642 /* 642 /*
643 * Scan the attribute list for the rest of the entries, storing 643 * Scan the attribute list for the rest of the entries, storing
@@ -2386,7 +2386,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
2386 args.dp = context->dp; 2386 args.dp = context->dp;
2387 args.whichfork = XFS_ATTR_FORK; 2387 args.whichfork = XFS_ATTR_FORK;
2388 args.valuelen = valuelen; 2388 args.valuelen = valuelen;
2389 args.value = kmem_alloc(valuelen, KM_SLEEP); 2389 args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS);
2390 args.rmtblkno = be32_to_cpu(name_rmt->valueblk); 2390 args.rmtblkno = be32_to_cpu(name_rmt->valueblk);
2391 args.rmtblkcnt = XFS_B_TO_FSB(args.dp->i_mount, valuelen); 2391 args.rmtblkcnt = XFS_B_TO_FSB(args.dp->i_mount, valuelen);
2392 retval = xfs_attr_rmtval_get(&args); 2392 retval = xfs_attr_rmtval_get(&args);
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 04f9cca8da7e..2f9e97c128a0 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -634,9 +634,8 @@ xfs_btree_read_bufl(
634 return error; 634 return error;
635 } 635 }
636 ASSERT(!bp || !XFS_BUF_GETERROR(bp)); 636 ASSERT(!bp || !XFS_BUF_GETERROR(bp));
637 if (bp != NULL) { 637 if (bp)
638 XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval); 638 XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
639 }
640 *bpp = bp; 639 *bpp = bp;
641 return 0; 640 return 0;
642} 641}
@@ -944,13 +943,13 @@ xfs_btree_set_refs(
944 switch (cur->bc_btnum) { 943 switch (cur->bc_btnum) {
945 case XFS_BTNUM_BNO: 944 case XFS_BTNUM_BNO:
946 case XFS_BTNUM_CNT: 945 case XFS_BTNUM_CNT:
947 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_ALLOC_BTREE_REF); 946 XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_ALLOC_BTREE_REF);
948 break; 947 break;
949 case XFS_BTNUM_INO: 948 case XFS_BTNUM_INO:
950 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_INOMAP, XFS_INO_BTREE_REF); 949 XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, XFS_INO_BTREE_REF);
951 break; 950 break;
952 case XFS_BTNUM_BMAP: 951 case XFS_BTNUM_BMAP:
953 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_BMAP_BTREE_REF); 952 XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_BMAP_BTREE_REF);
954 break; 953 break;
955 default: 954 default:
956 ASSERT(0); 955 ASSERT(0);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 2686d0d54c5b..ed2b65f3f8b9 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -142,7 +142,7 @@ xfs_buf_item_log_check(
142#endif 142#endif
143 143
144STATIC void xfs_buf_error_relse(xfs_buf_t *bp); 144STATIC void xfs_buf_error_relse(xfs_buf_t *bp);
145STATIC void xfs_buf_do_callbacks(xfs_buf_t *bp, xfs_log_item_t *lip); 145STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp);
146 146
147/* 147/*
148 * This returns the number of log iovecs needed to log the 148 * This returns the number of log iovecs needed to log the
@@ -450,7 +450,7 @@ xfs_buf_item_unpin(
450 * xfs_trans_ail_delete() drops the AIL lock. 450 * xfs_trans_ail_delete() drops the AIL lock.
451 */ 451 */
452 if (bip->bli_flags & XFS_BLI_STALE_INODE) { 452 if (bip->bli_flags & XFS_BLI_STALE_INODE) {
453 xfs_buf_do_callbacks(bp, (xfs_log_item_t *)bip); 453 xfs_buf_do_callbacks(bp);
454 XFS_BUF_SET_FSPRIVATE(bp, NULL); 454 XFS_BUF_SET_FSPRIVATE(bp, NULL);
455 XFS_BUF_CLR_IODONE_FUNC(bp); 455 XFS_BUF_CLR_IODONE_FUNC(bp);
456 } else { 456 } else {
@@ -918,15 +918,26 @@ xfs_buf_attach_iodone(
918 XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); 918 XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks);
919} 919}
920 920
921/*
922 * We can have many callbacks on a buffer. Running the callbacks individually
923 * can cause a lot of contention on the AIL lock, so we allow for a single
924 * callback to be able to scan the remaining lip->li_bio_list for other items
925 * of the same type and callback to be processed in the first call.
926 *
927 * As a result, the loop walking the callback list below will also modify the
928 * list. it removes the first item from the list and then runs the callback.
929 * The loop then restarts from the new head of the list. This allows the
930 * callback to scan and modify the list attached to the buffer and we don't
931 * have to care about maintaining a next item pointer.
932 */
921STATIC void 933STATIC void
922xfs_buf_do_callbacks( 934xfs_buf_do_callbacks(
923 xfs_buf_t *bp, 935 struct xfs_buf *bp)
924 xfs_log_item_t *lip)
925{ 936{
926 xfs_log_item_t *nlip; 937 struct xfs_log_item *lip;
927 938
928 while (lip != NULL) { 939 while ((lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *)) != NULL) {
929 nlip = lip->li_bio_list; 940 XFS_BUF_SET_FSPRIVATE(bp, lip->li_bio_list);
930 ASSERT(lip->li_cb != NULL); 941 ASSERT(lip->li_cb != NULL);
931 /* 942 /*
932 * Clear the next pointer so we don't have any 943 * Clear the next pointer so we don't have any
@@ -936,7 +947,6 @@ xfs_buf_do_callbacks(
936 */ 947 */
937 lip->li_bio_list = NULL; 948 lip->li_bio_list = NULL;
938 lip->li_cb(bp, lip); 949 lip->li_cb(bp, lip);
939 lip = nlip;
940 } 950 }
941} 951}
942 952
@@ -970,7 +980,7 @@ xfs_buf_iodone_callbacks(
970 ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp); 980 ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp);
971 XFS_BUF_SUPER_STALE(bp); 981 XFS_BUF_SUPER_STALE(bp);
972 trace_xfs_buf_item_iodone(bp, _RET_IP_); 982 trace_xfs_buf_item_iodone(bp, _RET_IP_);
973 xfs_buf_do_callbacks(bp, lip); 983 xfs_buf_do_callbacks(bp);
974 XFS_BUF_SET_FSPRIVATE(bp, NULL); 984 XFS_BUF_SET_FSPRIVATE(bp, NULL);
975 XFS_BUF_CLR_IODONE_FUNC(bp); 985 XFS_BUF_CLR_IODONE_FUNC(bp);
976 xfs_buf_ioend(bp, 0); 986 xfs_buf_ioend(bp, 0);
@@ -1029,7 +1039,7 @@ xfs_buf_iodone_callbacks(
1029 return; 1039 return;
1030 } 1040 }
1031 1041
1032 xfs_buf_do_callbacks(bp, lip); 1042 xfs_buf_do_callbacks(bp);
1033 XFS_BUF_SET_FSPRIVATE(bp, NULL); 1043 XFS_BUF_SET_FSPRIVATE(bp, NULL);
1034 XFS_BUF_CLR_IODONE_FUNC(bp); 1044 XFS_BUF_CLR_IODONE_FUNC(bp);
1035 xfs_buf_ioend(bp, 0); 1045 xfs_buf_ioend(bp, 0);
@@ -1063,7 +1073,7 @@ xfs_buf_error_relse(
1063 * We have to unpin the pinned buffers so do the 1073 * We have to unpin the pinned buffers so do the
1064 * callbacks. 1074 * callbacks.
1065 */ 1075 */
1066 xfs_buf_do_callbacks(bp, lip); 1076 xfs_buf_do_callbacks(bp);
1067 XFS_BUF_SET_FSPRIVATE(bp, NULL); 1077 XFS_BUF_SET_FSPRIVATE(bp, NULL);
1068 XFS_BUF_CLR_IODONE_FUNC(bp); 1078 XFS_BUF_CLR_IODONE_FUNC(bp);
1069 XFS_BUF_SET_BRELSE_FUNC(bp,NULL); 1079 XFS_BUF_SET_BRELSE_FUNC(bp,NULL);
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 0e2ed43f16c7..b6ecd2061e7c 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -105,17 +105,6 @@ typedef struct xfs_buf_log_item {
105 xfs_buf_log_format_t bli_format; /* in-log header */ 105 xfs_buf_log_format_t bli_format; /* in-log header */
106} xfs_buf_log_item_t; 106} xfs_buf_log_item_t;
107 107
108/*
109 * This structure is used during recovery to record the buf log
110 * items which have been canceled and should not be replayed.
111 */
112typedef struct xfs_buf_cancel {
113 xfs_daddr_t bc_blkno;
114 uint bc_len;
115 int bc_refcount;
116 struct xfs_buf_cancel *bc_next;
117} xfs_buf_cancel_t;
118
119void xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); 108void xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
120void xfs_buf_item_relse(struct xfs_buf *); 109void xfs_buf_item_relse(struct xfs_buf *);
121void xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint); 110void xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint);
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index a55e687bf562..75f2ef60e579 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -48,6 +48,28 @@ xfs_efi_item_free(
48} 48}
49 49
50/* 50/*
51 * Freeing the efi requires that we remove it from the AIL if it has already
52 * been placed there. However, the EFI may not yet have been placed in the AIL
53 * when called by xfs_efi_release() from EFD processing due to the ordering of
54 * committed vs unpin operations in bulk insert operations. Hence the
55 * test_and_clear_bit(XFS_EFI_COMMITTED) to ensure only the last caller frees
56 * the EFI.
57 */
58STATIC void
59__xfs_efi_release(
60 struct xfs_efi_log_item *efip)
61{
62 struct xfs_ail *ailp = efip->efi_item.li_ailp;
63
64 if (!test_and_clear_bit(XFS_EFI_COMMITTED, &efip->efi_flags)) {
65 spin_lock(&ailp->xa_lock);
66 /* xfs_trans_ail_delete() drops the AIL lock. */
67 xfs_trans_ail_delete(ailp, &efip->efi_item);
68 xfs_efi_item_free(efip);
69 }
70}
71
72/*
51 * This returns the number of iovecs needed to log the given efi item. 73 * This returns the number of iovecs needed to log the given efi item.
52 * We only need 1 iovec for an efi item. It just logs the efi_log_format 74 * We only need 1 iovec for an efi item. It just logs the efi_log_format
53 * structure. 75 * structure.
@@ -74,7 +96,8 @@ xfs_efi_item_format(
74 struct xfs_efi_log_item *efip = EFI_ITEM(lip); 96 struct xfs_efi_log_item *efip = EFI_ITEM(lip);
75 uint size; 97 uint size;
76 98
77 ASSERT(efip->efi_next_extent == efip->efi_format.efi_nextents); 99 ASSERT(atomic_read(&efip->efi_next_extent) ==
100 efip->efi_format.efi_nextents);
78 101
79 efip->efi_format.efi_type = XFS_LI_EFI; 102 efip->efi_format.efi_type = XFS_LI_EFI;
80 103
@@ -99,10 +122,12 @@ xfs_efi_item_pin(
99} 122}
100 123
101/* 124/*
102 * While EFIs cannot really be pinned, the unpin operation is the 125 * While EFIs cannot really be pinned, the unpin operation is the last place at
103 * last place at which the EFI is manipulated during a transaction. 126 * which the EFI is manipulated during a transaction. If we are being asked to
104 * Here we coordinate with xfs_efi_cancel() to determine who gets to 127 * remove the EFI it's because the transaction has been cancelled and by
105 * free the EFI. 128 * definition that means the EFI cannot be in the AIL so remove it from the
129 * transaction and free it. Otherwise coordinate with xfs_efi_release() (via
130 * XFS_EFI_COMMITTED) to determine who gets to free the EFI.
106 */ 131 */
107STATIC void 132STATIC void
108xfs_efi_item_unpin( 133xfs_efi_item_unpin(
@@ -110,20 +135,14 @@ xfs_efi_item_unpin(
110 int remove) 135 int remove)
111{ 136{
112 struct xfs_efi_log_item *efip = EFI_ITEM(lip); 137 struct xfs_efi_log_item *efip = EFI_ITEM(lip);
113 struct xfs_ail *ailp = lip->li_ailp;
114
115 spin_lock(&ailp->xa_lock);
116 if (efip->efi_flags & XFS_EFI_CANCELED) {
117 if (remove)
118 xfs_trans_del_item(lip);
119 138
120 /* xfs_trans_ail_delete() drops the AIL lock. */ 139 if (remove) {
121 xfs_trans_ail_delete(ailp, lip); 140 ASSERT(!(lip->li_flags & XFS_LI_IN_AIL));
141 xfs_trans_del_item(lip);
122 xfs_efi_item_free(efip); 142 xfs_efi_item_free(efip);
123 } else { 143 return;
124 efip->efi_flags |= XFS_EFI_COMMITTED;
125 spin_unlock(&ailp->xa_lock);
126 } 144 }
145 __xfs_efi_release(efip);
127} 146}
128 147
129/* 148/*
@@ -152,16 +171,20 @@ xfs_efi_item_unlock(
152} 171}
153 172
154/* 173/*
155 * The EFI is logged only once and cannot be moved in the log, so 174 * The EFI is logged only once and cannot be moved in the log, so simply return
156 * simply return the lsn at which it's been logged. The canceled 175 * the lsn at which it's been logged. For bulk transaction committed
157 * flag is not paid any attention here. Checking for that is delayed 176 * processing, the EFI may be processed but not yet unpinned prior to the EFD
158 * until the EFI is unpinned. 177 * being processed. Set the XFS_EFI_COMMITTED flag so this case can be detected
178 * when processing the EFD.
159 */ 179 */
160STATIC xfs_lsn_t 180STATIC xfs_lsn_t
161xfs_efi_item_committed( 181xfs_efi_item_committed(
162 struct xfs_log_item *lip, 182 struct xfs_log_item *lip,
163 xfs_lsn_t lsn) 183 xfs_lsn_t lsn)
164{ 184{
185 struct xfs_efi_log_item *efip = EFI_ITEM(lip);
186
187 set_bit(XFS_EFI_COMMITTED, &efip->efi_flags);
165 return lsn; 188 return lsn;
166} 189}
167 190
@@ -230,6 +253,7 @@ xfs_efi_init(
230 xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops); 253 xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops);
231 efip->efi_format.efi_nextents = nextents; 254 efip->efi_format.efi_nextents = nextents;
232 efip->efi_format.efi_id = (__psint_t)(void*)efip; 255 efip->efi_format.efi_id = (__psint_t)(void*)efip;
256 atomic_set(&efip->efi_next_extent, 0);
233 257
234 return efip; 258 return efip;
235} 259}
@@ -289,37 +313,18 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
289} 313}
290 314
291/* 315/*
292 * This is called by the efd item code below to release references to 316 * This is called by the efd item code below to release references to the given
293 * the given efi item. Each efd calls this with the number of 317 * efi item. Each efd calls this with the number of extents that it has
294 * extents that it has logged, and when the sum of these reaches 318 * logged, and when the sum of these reaches the total number of extents logged
295 * the total number of extents logged by this efi item we can free 319 * by this efi item we can free the efi item.
296 * the efi item.
297 *
298 * Freeing the efi item requires that we remove it from the AIL.
299 * We'll use the AIL lock to protect our counters as well as
300 * the removal from the AIL.
301 */ 320 */
302void 321void
303xfs_efi_release(xfs_efi_log_item_t *efip, 322xfs_efi_release(xfs_efi_log_item_t *efip,
304 uint nextents) 323 uint nextents)
305{ 324{
306 struct xfs_ail *ailp = efip->efi_item.li_ailp; 325 ASSERT(atomic_read(&efip->efi_next_extent) >= nextents);
307 int extents_left; 326 if (atomic_sub_and_test(nextents, &efip->efi_next_extent))
308 327 __xfs_efi_release(efip);
309 ASSERT(efip->efi_next_extent > 0);
310 ASSERT(efip->efi_flags & XFS_EFI_COMMITTED);
311
312 spin_lock(&ailp->xa_lock);
313 ASSERT(efip->efi_next_extent >= nextents);
314 efip->efi_next_extent -= nextents;
315 extents_left = efip->efi_next_extent;
316 if (extents_left == 0) {
317 /* xfs_trans_ail_delete() drops the AIL lock. */
318 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip);
319 xfs_efi_item_free(efip);
320 } else {
321 spin_unlock(&ailp->xa_lock);
322 }
323} 328}
324 329
325static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip) 330static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip)
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index 0d22c56fdf64..375f68e42531 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -111,11 +111,10 @@ typedef struct xfs_efd_log_format_64 {
111#define XFS_EFI_MAX_FAST_EXTENTS 16 111#define XFS_EFI_MAX_FAST_EXTENTS 16
112 112
113/* 113/*
114 * Define EFI flags. 114 * Define EFI flag bits. Manipulated by set/clear/test_bit operators.
115 */ 115 */
116#define XFS_EFI_RECOVERED 0x1 116#define XFS_EFI_RECOVERED 1
117#define XFS_EFI_COMMITTED 0x2 117#define XFS_EFI_COMMITTED 2
118#define XFS_EFI_CANCELED 0x4
119 118
120/* 119/*
121 * This is the "extent free intention" log item. It is used 120 * This is the "extent free intention" log item. It is used
@@ -125,8 +124,8 @@ typedef struct xfs_efd_log_format_64 {
125 */ 124 */
126typedef struct xfs_efi_log_item { 125typedef struct xfs_efi_log_item {
127 xfs_log_item_t efi_item; 126 xfs_log_item_t efi_item;
128 uint efi_flags; /* misc flags */ 127 atomic_t efi_next_extent;
129 uint efi_next_extent; 128 unsigned long efi_flags; /* misc flags */
130 xfs_efi_log_format_t efi_format; 129 xfs_efi_log_format_t efi_format;
131} xfs_efi_log_item_t; 130} xfs_efi_log_item_t;
132 131
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index a7c116e814af..f56d30e8040c 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -374,6 +374,7 @@ xfs_growfs_data_private(
374 mp->m_maxicount = icount << mp->m_sb.sb_inopblog; 374 mp->m_maxicount = icount << mp->m_sb.sb_inopblog;
375 } else 375 } else
376 mp->m_maxicount = 0; 376 mp->m_maxicount = 0;
377 xfs_set_low_space_thresholds(mp);
377 378
378 /* update secondary superblocks. */ 379 /* update secondary superblocks. */
379 for (agno = 1; agno < nagcount; agno++) { 380 for (agno = 1; agno < nagcount; agno++) {
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index d7de5a3f7867..cb9b6d1469f7 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -43,6 +43,17 @@
43 43
44 44
45/* 45/*
46 * Define xfs inode iolock lockdep classes. We need to ensure that all active
47 * inodes are considered the same for lockdep purposes, including inodes that
48 * are recycled through the XFS_IRECLAIMABLE state. This is the the only way to
49 * guarantee the locks are considered the same when there are multiple lock
50 * initialisation siteѕ. Also, define a reclaimable inode class so it is
51 * obvious in lockdep reports which class the report is against.
52 */
53static struct lock_class_key xfs_iolock_active;
54struct lock_class_key xfs_iolock_reclaimable;
55
56/*
46 * Allocate and initialise an xfs_inode. 57 * Allocate and initialise an xfs_inode.
47 */ 58 */
48STATIC struct xfs_inode * 59STATIC struct xfs_inode *
@@ -69,8 +80,11 @@ xfs_inode_alloc(
69 ASSERT(atomic_read(&ip->i_pincount) == 0); 80 ASSERT(atomic_read(&ip->i_pincount) == 0);
70 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 81 ASSERT(!spin_is_locked(&ip->i_flags_lock));
71 ASSERT(completion_done(&ip->i_flush)); 82 ASSERT(completion_done(&ip->i_flush));
83 ASSERT(ip->i_ino == 0);
72 84
73 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); 85 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
86 lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
87 &xfs_iolock_active, "xfs_iolock_active");
74 88
75 /* initialise the xfs inode */ 89 /* initialise the xfs inode */
76 ip->i_ino = ino; 90 ip->i_ino = ino;
@@ -85,9 +99,6 @@ xfs_inode_alloc(
85 ip->i_size = 0; 99 ip->i_size = 0;
86 ip->i_new_size = 0; 100 ip->i_new_size = 0;
87 101
88 /* prevent anyone from using this yet */
89 VFS_I(ip)->i_state = I_NEW;
90
91 return ip; 102 return ip;
92} 103}
93 104
@@ -145,7 +156,18 @@ xfs_inode_free(
145 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 156 ASSERT(!spin_is_locked(&ip->i_flags_lock));
146 ASSERT(completion_done(&ip->i_flush)); 157 ASSERT(completion_done(&ip->i_flush));
147 158
148 call_rcu(&ip->i_vnode.i_rcu, xfs_inode_free_callback); 159 /*
160 * Because we use RCU freeing we need to ensure the inode always
161 * appears to be reclaimed with an invalid inode number when in the
162 * free state. The ip->i_flags_lock provides the barrier against lookup
163 * races.
164 */
165 spin_lock(&ip->i_flags_lock);
166 ip->i_flags = XFS_IRECLAIM;
167 ip->i_ino = 0;
168 spin_unlock(&ip->i_flags_lock);
169
170 call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
149} 171}
150 172
151/* 173/*
@@ -155,14 +177,29 @@ static int
155xfs_iget_cache_hit( 177xfs_iget_cache_hit(
156 struct xfs_perag *pag, 178 struct xfs_perag *pag,
157 struct xfs_inode *ip, 179 struct xfs_inode *ip,
180 xfs_ino_t ino,
158 int flags, 181 int flags,
159 int lock_flags) __releases(pag->pag_ici_lock) 182 int lock_flags) __releases(RCU)
160{ 183{
161 struct inode *inode = VFS_I(ip); 184 struct inode *inode = VFS_I(ip);
162 struct xfs_mount *mp = ip->i_mount; 185 struct xfs_mount *mp = ip->i_mount;
163 int error; 186 int error;
164 187
188 /*
189 * check for re-use of an inode within an RCU grace period due to the
190 * radix tree nodes not being updated yet. We monitor for this by
191 * setting the inode number to zero before freeing the inode structure.
192 * If the inode has been reallocated and set up, then the inode number
193 * will not match, so check for that, too.
194 */
165 spin_lock(&ip->i_flags_lock); 195 spin_lock(&ip->i_flags_lock);
196 if (ip->i_ino != ino) {
197 trace_xfs_iget_skip(ip);
198 XFS_STATS_INC(xs_ig_frecycle);
199 error = EAGAIN;
200 goto out_error;
201 }
202
166 203
167 /* 204 /*
168 * If we are racing with another cache hit that is currently 205 * If we are racing with another cache hit that is currently
@@ -205,7 +242,7 @@ xfs_iget_cache_hit(
205 ip->i_flags |= XFS_IRECLAIM; 242 ip->i_flags |= XFS_IRECLAIM;
206 243
207 spin_unlock(&ip->i_flags_lock); 244 spin_unlock(&ip->i_flags_lock);
208 read_unlock(&pag->pag_ici_lock); 245 rcu_read_unlock();
209 246
210 error = -inode_init_always(mp->m_super, inode); 247 error = -inode_init_always(mp->m_super, inode);
211 if (error) { 248 if (error) {
@@ -213,7 +250,7 @@ xfs_iget_cache_hit(
213 * Re-initializing the inode failed, and we are in deep 250 * Re-initializing the inode failed, and we are in deep
214 * trouble. Try to re-add it to the reclaim list. 251 * trouble. Try to re-add it to the reclaim list.
215 */ 252 */
216 read_lock(&pag->pag_ici_lock); 253 rcu_read_lock();
217 spin_lock(&ip->i_flags_lock); 254 spin_lock(&ip->i_flags_lock);
218 255
219 ip->i_flags &= ~XFS_INEW; 256 ip->i_flags &= ~XFS_INEW;
@@ -223,14 +260,20 @@ xfs_iget_cache_hit(
223 goto out_error; 260 goto out_error;
224 } 261 }
225 262
226 write_lock(&pag->pag_ici_lock); 263 spin_lock(&pag->pag_ici_lock);
227 spin_lock(&ip->i_flags_lock); 264 spin_lock(&ip->i_flags_lock);
228 ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM); 265 ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM);
229 ip->i_flags |= XFS_INEW; 266 ip->i_flags |= XFS_INEW;
230 __xfs_inode_clear_reclaim_tag(mp, pag, ip); 267 __xfs_inode_clear_reclaim_tag(mp, pag, ip);
231 inode->i_state = I_NEW; 268 inode->i_state = I_NEW;
269
270 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
271 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
272 lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
273 &xfs_iolock_active, "xfs_iolock_active");
274
232 spin_unlock(&ip->i_flags_lock); 275 spin_unlock(&ip->i_flags_lock);
233 write_unlock(&pag->pag_ici_lock); 276 spin_unlock(&pag->pag_ici_lock);
234 } else { 277 } else {
235 /* If the VFS inode is being torn down, pause and try again. */ 278 /* If the VFS inode is being torn down, pause and try again. */
236 if (!igrab(inode)) { 279 if (!igrab(inode)) {
@@ -241,7 +284,7 @@ xfs_iget_cache_hit(
241 284
242 /* We've got a live one. */ 285 /* We've got a live one. */
243 spin_unlock(&ip->i_flags_lock); 286 spin_unlock(&ip->i_flags_lock);
244 read_unlock(&pag->pag_ici_lock); 287 rcu_read_unlock();
245 trace_xfs_iget_hit(ip); 288 trace_xfs_iget_hit(ip);
246 } 289 }
247 290
@@ -255,7 +298,7 @@ xfs_iget_cache_hit(
255 298
256out_error: 299out_error:
257 spin_unlock(&ip->i_flags_lock); 300 spin_unlock(&ip->i_flags_lock);
258 read_unlock(&pag->pag_ici_lock); 301 rcu_read_unlock();
259 return error; 302 return error;
260} 303}
261 304
@@ -308,7 +351,7 @@ xfs_iget_cache_miss(
308 BUG(); 351 BUG();
309 } 352 }
310 353
311 write_lock(&pag->pag_ici_lock); 354 spin_lock(&pag->pag_ici_lock);
312 355
313 /* insert the new inode */ 356 /* insert the new inode */
314 error = radix_tree_insert(&pag->pag_ici_root, agino, ip); 357 error = radix_tree_insert(&pag->pag_ici_root, agino, ip);
@@ -323,14 +366,14 @@ xfs_iget_cache_miss(
323 ip->i_udquot = ip->i_gdquot = NULL; 366 ip->i_udquot = ip->i_gdquot = NULL;
324 xfs_iflags_set(ip, XFS_INEW); 367 xfs_iflags_set(ip, XFS_INEW);
325 368
326 write_unlock(&pag->pag_ici_lock); 369 spin_unlock(&pag->pag_ici_lock);
327 radix_tree_preload_end(); 370 radix_tree_preload_end();
328 371
329 *ipp = ip; 372 *ipp = ip;
330 return 0; 373 return 0;
331 374
332out_preload_end: 375out_preload_end:
333 write_unlock(&pag->pag_ici_lock); 376 spin_unlock(&pag->pag_ici_lock);
334 radix_tree_preload_end(); 377 radix_tree_preload_end();
335 if (lock_flags) 378 if (lock_flags)
336 xfs_iunlock(ip, lock_flags); 379 xfs_iunlock(ip, lock_flags);
@@ -377,7 +420,7 @@ xfs_iget(
377 xfs_agino_t agino; 420 xfs_agino_t agino;
378 421
379 /* reject inode numbers outside existing AGs */ 422 /* reject inode numbers outside existing AGs */
380 if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) 423 if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
381 return EINVAL; 424 return EINVAL;
382 425
383 /* get the perag structure and ensure that it's inode capable */ 426 /* get the perag structure and ensure that it's inode capable */
@@ -386,15 +429,15 @@ xfs_iget(
386 429
387again: 430again:
388 error = 0; 431 error = 0;
389 read_lock(&pag->pag_ici_lock); 432 rcu_read_lock();
390 ip = radix_tree_lookup(&pag->pag_ici_root, agino); 433 ip = radix_tree_lookup(&pag->pag_ici_root, agino);
391 434
392 if (ip) { 435 if (ip) {
393 error = xfs_iget_cache_hit(pag, ip, flags, lock_flags); 436 error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags);
394 if (error) 437 if (error)
395 goto out_error_or_again; 438 goto out_error_or_again;
396 } else { 439 } else {
397 read_unlock(&pag->pag_ici_lock); 440 rcu_read_unlock();
398 XFS_STATS_INC(xs_ig_missed); 441 XFS_STATS_INC(xs_ig_missed);
399 442
400 error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, 443 error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip,
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 108c7a085f94..be7cf625421f 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -887,7 +887,7 @@ xfs_iread(
887 * around for a while. This helps to keep recently accessed 887 * around for a while. This helps to keep recently accessed
888 * meta-data in-core longer. 888 * meta-data in-core longer.
889 */ 889 */
890 XFS_BUF_SET_REF(bp, XFS_INO_REF); 890 xfs_buf_set_ref(bp, XFS_INO_REF);
891 891
892 /* 892 /*
893 * Use xfs_trans_brelse() to release the buffer containing the 893 * Use xfs_trans_brelse() to release the buffer containing the
@@ -2000,17 +2000,33 @@ xfs_ifree_cluster(
2000 */ 2000 */
2001 for (i = 0; i < ninodes; i++) { 2001 for (i = 0; i < ninodes; i++) {
2002retry: 2002retry:
2003 read_lock(&pag->pag_ici_lock); 2003 rcu_read_lock();
2004 ip = radix_tree_lookup(&pag->pag_ici_root, 2004 ip = radix_tree_lookup(&pag->pag_ici_root,
2005 XFS_INO_TO_AGINO(mp, (inum + i))); 2005 XFS_INO_TO_AGINO(mp, (inum + i)));
2006 2006
2007 /* Inode not in memory or stale, nothing to do */ 2007 /* Inode not in memory, nothing to do */
2008 if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) { 2008 if (!ip) {
2009 read_unlock(&pag->pag_ici_lock); 2009 rcu_read_unlock();
2010 continue; 2010 continue;
2011 } 2011 }
2012 2012
2013 /* 2013 /*
2014 * because this is an RCU protected lookup, we could
2015 * find a recently freed or even reallocated inode
2016 * during the lookup. We need to check under the
2017 * i_flags_lock for a valid inode here. Skip it if it
2018 * is not valid, the wrong inode or stale.
2019 */
2020 spin_lock(&ip->i_flags_lock);
2021 if (ip->i_ino != inum + i ||
2022 __xfs_iflags_test(ip, XFS_ISTALE)) {
2023 spin_unlock(&ip->i_flags_lock);
2024 rcu_read_unlock();
2025 continue;
2026 }
2027 spin_unlock(&ip->i_flags_lock);
2028
2029 /*
2014 * Don't try to lock/unlock the current inode, but we 2030 * Don't try to lock/unlock the current inode, but we
2015 * _cannot_ skip the other inodes that we did not find 2031 * _cannot_ skip the other inodes that we did not find
2016 * in the list attached to the buffer and are not 2032 * in the list attached to the buffer and are not
@@ -2019,11 +2035,11 @@ retry:
2019 */ 2035 */
2020 if (ip != free_ip && 2036 if (ip != free_ip &&
2021 !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { 2037 !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
2022 read_unlock(&pag->pag_ici_lock); 2038 rcu_read_unlock();
2023 delay(1); 2039 delay(1);
2024 goto retry; 2040 goto retry;
2025 } 2041 }
2026 read_unlock(&pag->pag_ici_lock); 2042 rcu_read_unlock();
2027 2043
2028 xfs_iflock(ip); 2044 xfs_iflock(ip);
2029 xfs_iflags_set(ip, XFS_ISTALE); 2045 xfs_iflags_set(ip, XFS_ISTALE);
@@ -2629,7 +2645,7 @@ xfs_iflush_cluster(
2629 2645
2630 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); 2646 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
2631 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; 2647 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
2632 read_lock(&pag->pag_ici_lock); 2648 rcu_read_lock();
2633 /* really need a gang lookup range call here */ 2649 /* really need a gang lookup range call here */
2634 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, 2650 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist,
2635 first_index, inodes_per_cluster); 2651 first_index, inodes_per_cluster);
@@ -2640,9 +2656,21 @@ xfs_iflush_cluster(
2640 iq = ilist[i]; 2656 iq = ilist[i];
2641 if (iq == ip) 2657 if (iq == ip)
2642 continue; 2658 continue;
2643 /* if the inode lies outside this cluster, we're done. */ 2659
2644 if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) 2660 /*
2645 break; 2661 * because this is an RCU protected lookup, we could find a
2662 * recently freed or even reallocated inode during the lookup.
2663 * We need to check under the i_flags_lock for a valid inode
2664 * here. Skip it if it is not valid or the wrong inode.
2665 */
2666 spin_lock(&ip->i_flags_lock);
2667 if (!ip->i_ino ||
2668 (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) {
2669 spin_unlock(&ip->i_flags_lock);
2670 continue;
2671 }
2672 spin_unlock(&ip->i_flags_lock);
2673
2646 /* 2674 /*
2647 * Do an un-protected check to see if the inode is dirty and 2675 * Do an un-protected check to see if the inode is dirty and
2648 * is a candidate for flushing. These checks will be repeated 2676 * is a candidate for flushing. These checks will be repeated
@@ -2692,7 +2720,7 @@ xfs_iflush_cluster(
2692 } 2720 }
2693 2721
2694out_free: 2722out_free:
2695 read_unlock(&pag->pag_ici_lock); 2723 rcu_read_unlock();
2696 kmem_free(ilist); 2724 kmem_free(ilist);
2697out_put: 2725out_put:
2698 xfs_perag_put(pag); 2726 xfs_perag_put(pag);
@@ -2704,7 +2732,7 @@ cluster_corrupt_out:
2704 * Corruption detected in the clustering loop. Invalidate the 2732 * Corruption detected in the clustering loop. Invalidate the
2705 * inode buffer and shut down the filesystem. 2733 * inode buffer and shut down the filesystem.
2706 */ 2734 */
2707 read_unlock(&pag->pag_ici_lock); 2735 rcu_read_unlock();
2708 /* 2736 /*
2709 * Clean up the buffer. If it was B_DELWRI, just release it -- 2737 * Clean up the buffer. If it was B_DELWRI, just release it --
2710 * brelse can handle it with no problems. If not, shut down the 2738 * brelse can handle it with no problems. If not, shut down the
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index fb2ca2e4cdc9..5c95fa8ec11d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -376,12 +376,13 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
376/* 376/*
377 * In-core inode flags. 377 * In-core inode flags.
378 */ 378 */
379#define XFS_IRECLAIM 0x0001 /* we have started reclaiming this inode */ 379#define XFS_IRECLAIM 0x0001 /* started reclaiming this inode */
380#define XFS_ISTALE 0x0002 /* inode has been staled */ 380#define XFS_ISTALE 0x0002 /* inode has been staled */
381#define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */ 381#define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */
382#define XFS_INEW 0x0008 /* inode has just been allocated */ 382#define XFS_INEW 0x0008 /* inode has just been allocated */
383#define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */ 383#define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */
384#define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */ 384#define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */
385#define XFS_IDIRTY_RELEASE 0x0040 /* dirty release already seen */
385 386
386/* 387/*
387 * Flags for inode locking. 388 * Flags for inode locking.
@@ -438,6 +439,8 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
438#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT) 439#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT)
439#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT) 440#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT)
440 441
442extern struct lock_class_key xfs_iolock_reclaimable;
443
441/* 444/*
442 * Flags for xfs_itruncate_start(). 445 * Flags for xfs_itruncate_start().
443 */ 446 */
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 7c8d30c453c3..fd4f398bd6f1 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -842,15 +842,64 @@ xfs_inode_item_destroy(
842 * flushed to disk. It is responsible for removing the inode item 842 * flushed to disk. It is responsible for removing the inode item
843 * from the AIL if it has not been re-logged, and unlocking the inode's 843 * from the AIL if it has not been re-logged, and unlocking the inode's
844 * flush lock. 844 * flush lock.
845 *
846 * To reduce AIL lock traffic as much as possible, we scan the buffer log item
847 * list for other inodes that will run this function. We remove them from the
848 * buffer list so we can process all the inode IO completions in one AIL lock
849 * traversal.
845 */ 850 */
846void 851void
847xfs_iflush_done( 852xfs_iflush_done(
848 struct xfs_buf *bp, 853 struct xfs_buf *bp,
849 struct xfs_log_item *lip) 854 struct xfs_log_item *lip)
850{ 855{
851 struct xfs_inode_log_item *iip = INODE_ITEM(lip); 856 struct xfs_inode_log_item *iip;
852 xfs_inode_t *ip = iip->ili_inode; 857 struct xfs_log_item *blip;
858 struct xfs_log_item *next;
859 struct xfs_log_item *prev;
853 struct xfs_ail *ailp = lip->li_ailp; 860 struct xfs_ail *ailp = lip->li_ailp;
861 int need_ail = 0;
862
863 /*
864 * Scan the buffer IO completions for other inodes being completed and
865 * attach them to the current inode log item.
866 */
867 blip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
868 prev = NULL;
869 while (blip != NULL) {
870 if (lip->li_cb != xfs_iflush_done) {
871 prev = blip;
872 blip = blip->li_bio_list;
873 continue;
874 }
875
876 /* remove from list */
877 next = blip->li_bio_list;
878 if (!prev) {
879 XFS_BUF_SET_FSPRIVATE(bp, next);
880 } else {
881 prev->li_bio_list = next;
882 }
883
884 /* add to current list */
885 blip->li_bio_list = lip->li_bio_list;
886 lip->li_bio_list = blip;
887
888 /*
889 * while we have the item, do the unlocked check for needing
890 * the AIL lock.
891 */
892 iip = INODE_ITEM(blip);
893 if (iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn)
894 need_ail++;
895
896 blip = next;
897 }
898
899 /* make sure we capture the state of the initial inode. */
900 iip = INODE_ITEM(lip);
901 if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn)
902 need_ail++;
854 903
855 /* 904 /*
856 * We only want to pull the item from the AIL if it is 905 * We only want to pull the item from the AIL if it is
@@ -861,28 +910,37 @@ xfs_iflush_done(
861 * the lock since it's cheaper, and then we recheck while 910 * the lock since it's cheaper, and then we recheck while
862 * holding the lock before removing the inode from the AIL. 911 * holding the lock before removing the inode from the AIL.
863 */ 912 */
864 if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) { 913 if (need_ail) {
914 struct xfs_log_item *log_items[need_ail];
915 int i = 0;
865 spin_lock(&ailp->xa_lock); 916 spin_lock(&ailp->xa_lock);
866 if (lip->li_lsn == iip->ili_flush_lsn) { 917 for (blip = lip; blip; blip = blip->li_bio_list) {
867 /* xfs_trans_ail_delete() drops the AIL lock. */ 918 iip = INODE_ITEM(blip);
868 xfs_trans_ail_delete(ailp, lip); 919 if (iip->ili_logged &&
869 } else { 920 blip->li_lsn == iip->ili_flush_lsn) {
870 spin_unlock(&ailp->xa_lock); 921 log_items[i++] = blip;
922 }
923 ASSERT(i <= need_ail);
871 } 924 }
925 /* xfs_trans_ail_delete_bulk() drops the AIL lock. */
926 xfs_trans_ail_delete_bulk(ailp, log_items, i);
872 } 927 }
873 928
874 iip->ili_logged = 0;
875 929
876 /* 930 /*
877 * Clear the ili_last_fields bits now that we know that the 931 * clean up and unlock the flush lock now we are done. We can clear the
878 * data corresponding to them is safely on disk. 932 * ili_last_fields bits now that we know that the data corresponding to
933 * them is safely on disk.
879 */ 934 */
880 iip->ili_last_fields = 0; 935 for (blip = lip; blip; blip = next) {
936 next = blip->li_bio_list;
937 blip->li_bio_list = NULL;
881 938
882 /* 939 iip = INODE_ITEM(blip);
883 * Release the inode's flush lock since we're done with it. 940 iip->ili_logged = 0;
884 */ 941 iip->ili_last_fields = 0;
885 xfs_ifunlock(ip); 942 xfs_ifunlock(iip->ili_inode);
943 }
886} 944}
887 945
888/* 946/*
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 20576146369f..55582bd66659 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -47,127 +47,8 @@
47 47
48#define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ 48#define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \
49 << mp->m_writeio_log) 49 << mp->m_writeio_log)
50#define XFS_STRAT_WRITE_IMAPS 2
51#define XFS_WRITE_IMAPS XFS_BMAP_MAX_NMAP 50#define XFS_WRITE_IMAPS XFS_BMAP_MAX_NMAP
52 51
53STATIC int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
54 int, struct xfs_bmbt_irec *, int *);
55STATIC int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, int,
56 struct xfs_bmbt_irec *, int *);
57STATIC int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t,
58 struct xfs_bmbt_irec *, int *);
59
60int
61xfs_iomap(
62 struct xfs_inode *ip,
63 xfs_off_t offset,
64 ssize_t count,
65 int flags,
66 struct xfs_bmbt_irec *imap,
67 int *nimaps,
68 int *new)
69{
70 struct xfs_mount *mp = ip->i_mount;
71 xfs_fileoff_t offset_fsb, end_fsb;
72 int error = 0;
73 int lockmode = 0;
74 int bmapi_flags = 0;
75
76 ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
77
78 *new = 0;
79
80 if (XFS_FORCED_SHUTDOWN(mp))
81 return XFS_ERROR(EIO);
82
83 trace_xfs_iomap_enter(ip, offset, count, flags, NULL);
84
85 switch (flags & (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE)) {
86 case BMAPI_READ:
87 lockmode = xfs_ilock_map_shared(ip);
88 bmapi_flags = XFS_BMAPI_ENTIRE;
89 break;
90 case BMAPI_WRITE:
91 lockmode = XFS_ILOCK_EXCL;
92 if (flags & BMAPI_IGNSTATE)
93 bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE;
94 xfs_ilock(ip, lockmode);
95 break;
96 case BMAPI_ALLOCATE:
97 lockmode = XFS_ILOCK_SHARED;
98 bmapi_flags = XFS_BMAPI_ENTIRE;
99
100 /* Attempt non-blocking lock */
101 if (flags & BMAPI_TRYLOCK) {
102 if (!xfs_ilock_nowait(ip, lockmode))
103 return XFS_ERROR(EAGAIN);
104 } else {
105 xfs_ilock(ip, lockmode);
106 }
107 break;
108 default:
109 BUG();
110 }
111
112 ASSERT(offset <= mp->m_maxioffset);
113 if ((xfs_fsize_t)offset + count > mp->m_maxioffset)
114 count = mp->m_maxioffset - offset;
115 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
116 offset_fsb = XFS_B_TO_FSBT(mp, offset);
117
118 error = xfs_bmapi(NULL, ip, offset_fsb,
119 (xfs_filblks_t)(end_fsb - offset_fsb),
120 bmapi_flags, NULL, 0, imap,
121 nimaps, NULL);
122
123 if (error)
124 goto out;
125
126 switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) {
127 case BMAPI_WRITE:
128 /* If we found an extent, return it */
129 if (*nimaps &&
130 (imap->br_startblock != HOLESTARTBLOCK) &&
131 (imap->br_startblock != DELAYSTARTBLOCK)) {
132 trace_xfs_iomap_found(ip, offset, count, flags, imap);
133 break;
134 }
135
136 if (flags & BMAPI_DIRECT) {
137 error = xfs_iomap_write_direct(ip, offset, count, flags,
138 imap, nimaps);
139 } else {
140 error = xfs_iomap_write_delay(ip, offset, count, flags,
141 imap, nimaps);
142 }
143 if (!error) {
144 trace_xfs_iomap_alloc(ip, offset, count, flags, imap);
145 }
146 *new = 1;
147 break;
148 case BMAPI_ALLOCATE:
149 /* If we found an extent, return it */
150 xfs_iunlock(ip, lockmode);
151 lockmode = 0;
152
153 if (*nimaps && !isnullstartblock(imap->br_startblock)) {
154 trace_xfs_iomap_found(ip, offset, count, flags, imap);
155 break;
156 }
157
158 error = xfs_iomap_write_allocate(ip, offset, count,
159 imap, nimaps);
160 break;
161 }
162
163 ASSERT(*nimaps <= 1);
164
165out:
166 if (lockmode)
167 xfs_iunlock(ip, lockmode);
168 return XFS_ERROR(error);
169}
170
171STATIC int 52STATIC int
172xfs_iomap_eof_align_last_fsb( 53xfs_iomap_eof_align_last_fsb(
173 xfs_mount_t *mp, 54 xfs_mount_t *mp,
@@ -236,14 +117,13 @@ xfs_cmn_err_fsblock_zero(
236 return EFSCORRUPTED; 117 return EFSCORRUPTED;
237} 118}
238 119
239STATIC int 120int
240xfs_iomap_write_direct( 121xfs_iomap_write_direct(
241 xfs_inode_t *ip, 122 xfs_inode_t *ip,
242 xfs_off_t offset, 123 xfs_off_t offset,
243 size_t count, 124 size_t count,
244 int flags,
245 xfs_bmbt_irec_t *imap, 125 xfs_bmbt_irec_t *imap,
246 int *nmaps) 126 int nmaps)
247{ 127{
248 xfs_mount_t *mp = ip->i_mount; 128 xfs_mount_t *mp = ip->i_mount;
249 xfs_fileoff_t offset_fsb; 129 xfs_fileoff_t offset_fsb;
@@ -279,7 +159,7 @@ xfs_iomap_write_direct(
279 if (error) 159 if (error)
280 goto error_out; 160 goto error_out;
281 } else { 161 } else {
282 if (*nmaps && (imap->br_startblock == HOLESTARTBLOCK)) 162 if (nmaps && (imap->br_startblock == HOLESTARTBLOCK))
283 last_fsb = MIN(last_fsb, (xfs_fileoff_t) 163 last_fsb = MIN(last_fsb, (xfs_fileoff_t)
284 imap->br_blockcount + 164 imap->br_blockcount +
285 imap->br_startoff); 165 imap->br_startoff);
@@ -331,7 +211,7 @@ xfs_iomap_write_direct(
331 xfs_trans_ijoin(tp, ip); 211 xfs_trans_ijoin(tp, ip);
332 212
333 bmapi_flag = XFS_BMAPI_WRITE; 213 bmapi_flag = XFS_BMAPI_WRITE;
334 if ((flags & BMAPI_DIRECT) && (offset < ip->i_size || extsz)) 214 if (offset < ip->i_size || extsz)
335 bmapi_flag |= XFS_BMAPI_PREALLOC; 215 bmapi_flag |= XFS_BMAPI_PREALLOC;
336 216
337 /* 217 /*
@@ -370,7 +250,6 @@ xfs_iomap_write_direct(
370 goto error_out; 250 goto error_out;
371 } 251 }
372 252
373 *nmaps = 1;
374 return 0; 253 return 0;
375 254
376error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ 255error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
@@ -379,7 +258,6 @@ error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
379 258
380error1: /* Just cancel transaction */ 259error1: /* Just cancel transaction */
381 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 260 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
382 *nmaps = 0; /* nothing set-up here */
383 261
384error_out: 262error_out:
385 return XFS_ERROR(error); 263 return XFS_ERROR(error);
@@ -389,6 +267,9 @@ error_out:
389 * If the caller is doing a write at the end of the file, then extend the 267 * If the caller is doing a write at the end of the file, then extend the
390 * allocation out to the file system's write iosize. We clean up any extra 268 * allocation out to the file system's write iosize. We clean up any extra
391 * space left over when the file is closed in xfs_inactive(). 269 * space left over when the file is closed in xfs_inactive().
270 *
271 * If we find we already have delalloc preallocation beyond EOF, don't do more
272 * preallocation as it it not needed.
392 */ 273 */
393STATIC int 274STATIC int
394xfs_iomap_eof_want_preallocate( 275xfs_iomap_eof_want_preallocate(
@@ -396,7 +277,6 @@ xfs_iomap_eof_want_preallocate(
396 xfs_inode_t *ip, 277 xfs_inode_t *ip,
397 xfs_off_t offset, 278 xfs_off_t offset,
398 size_t count, 279 size_t count,
399 int ioflag,
400 xfs_bmbt_irec_t *imap, 280 xfs_bmbt_irec_t *imap,
401 int nimaps, 281 int nimaps,
402 int *prealloc) 282 int *prealloc)
@@ -405,6 +285,7 @@ xfs_iomap_eof_want_preallocate(
405 xfs_filblks_t count_fsb; 285 xfs_filblks_t count_fsb;
406 xfs_fsblock_t firstblock; 286 xfs_fsblock_t firstblock;
407 int n, error, imaps; 287 int n, error, imaps;
288 int found_delalloc = 0;
408 289
409 *prealloc = 0; 290 *prealloc = 0;
410 if ((offset + count) <= ip->i_size) 291 if ((offset + count) <= ip->i_size)
@@ -429,20 +310,66 @@ xfs_iomap_eof_want_preallocate(
429 return 0; 310 return 0;
430 start_fsb += imap[n].br_blockcount; 311 start_fsb += imap[n].br_blockcount;
431 count_fsb -= imap[n].br_blockcount; 312 count_fsb -= imap[n].br_blockcount;
313
314 if (imap[n].br_startblock == DELAYSTARTBLOCK)
315 found_delalloc = 1;
432 } 316 }
433 } 317 }
434 *prealloc = 1; 318 if (!found_delalloc)
319 *prealloc = 1;
435 return 0; 320 return 0;
436} 321}
437 322
438STATIC int 323/*
324 * If we don't have a user specified preallocation size, dynamically increase
325 * the preallocation size as the size of the file grows. Cap the maximum size
326 * at a single extent or less if the filesystem is near full. The closer the
327 * filesystem is to full, the smaller the maximum prealocation.
328 */
329STATIC xfs_fsblock_t
330xfs_iomap_prealloc_size(
331 struct xfs_mount *mp,
332 struct xfs_inode *ip)
333{
334 xfs_fsblock_t alloc_blocks = 0;
335
336 if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) {
337 int shift = 0;
338 int64_t freesp;
339
340 alloc_blocks = XFS_B_TO_FSB(mp, ip->i_size);
341 alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN,
342 rounddown_pow_of_two(alloc_blocks));
343
344 xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
345 freesp = mp->m_sb.sb_fdblocks;
346 if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) {
347 shift = 2;
348 if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT])
349 shift++;
350 if (freesp < mp->m_low_space[XFS_LOWSP_3_PCNT])
351 shift++;
352 if (freesp < mp->m_low_space[XFS_LOWSP_2_PCNT])
353 shift++;
354 if (freesp < mp->m_low_space[XFS_LOWSP_1_PCNT])
355 shift++;
356 }
357 if (shift)
358 alloc_blocks >>= shift;
359 }
360
361 if (alloc_blocks < mp->m_writeio_blocks)
362 alloc_blocks = mp->m_writeio_blocks;
363
364 return alloc_blocks;
365}
366
367int
439xfs_iomap_write_delay( 368xfs_iomap_write_delay(
440 xfs_inode_t *ip, 369 xfs_inode_t *ip,
441 xfs_off_t offset, 370 xfs_off_t offset,
442 size_t count, 371 size_t count,
443 int ioflag, 372 xfs_bmbt_irec_t *ret_imap)
444 xfs_bmbt_irec_t *ret_imap,
445 int *nmaps)
446{ 373{
447 xfs_mount_t *mp = ip->i_mount; 374 xfs_mount_t *mp = ip->i_mount;
448 xfs_fileoff_t offset_fsb; 375 xfs_fileoff_t offset_fsb;
@@ -469,16 +396,19 @@ xfs_iomap_write_delay(
469 extsz = xfs_get_extsz_hint(ip); 396 extsz = xfs_get_extsz_hint(ip);
470 offset_fsb = XFS_B_TO_FSBT(mp, offset); 397 offset_fsb = XFS_B_TO_FSBT(mp, offset);
471 398
399
472 error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, 400 error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count,
473 ioflag, imap, XFS_WRITE_IMAPS, &prealloc); 401 imap, XFS_WRITE_IMAPS, &prealloc);
474 if (error) 402 if (error)
475 return error; 403 return error;
476 404
477retry: 405retry:
478 if (prealloc) { 406 if (prealloc) {
407 xfs_fsblock_t alloc_blocks = xfs_iomap_prealloc_size(mp, ip);
408
479 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); 409 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
480 ioalign = XFS_B_TO_FSBT(mp, aligned_offset); 410 ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
481 last_fsb = ioalign + mp->m_writeio_blocks; 411 last_fsb = ioalign + alloc_blocks;
482 } else { 412 } else {
483 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); 413 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
484 } 414 }
@@ -496,22 +426,31 @@ retry:
496 XFS_BMAPI_DELAY | XFS_BMAPI_WRITE | 426 XFS_BMAPI_DELAY | XFS_BMAPI_WRITE |
497 XFS_BMAPI_ENTIRE, &firstblock, 1, imap, 427 XFS_BMAPI_ENTIRE, &firstblock, 1, imap,
498 &nimaps, NULL); 428 &nimaps, NULL);
499 if (error && (error != ENOSPC)) 429 switch (error) {
430 case 0:
431 case ENOSPC:
432 case EDQUOT:
433 break;
434 default:
500 return XFS_ERROR(error); 435 return XFS_ERROR(error);
436 }
501 437
502 /* 438 /*
503 * If bmapi returned us nothing, and if we didn't get back EDQUOT, 439 * If bmapi returned us nothing, we got either ENOSPC or EDQUOT. For
504 * then we must have run out of space - flush all other inodes with 440 * ENOSPC, * flush all other inodes with delalloc blocks to free up
505 * delalloc blocks and retry without EOF preallocation. 441 * some of the excess reserved metadata space. For both cases, retry
442 * without EOF preallocation.
506 */ 443 */
507 if (nimaps == 0) { 444 if (nimaps == 0) {
508 trace_xfs_delalloc_enospc(ip, offset, count); 445 trace_xfs_delalloc_enospc(ip, offset, count);
509 if (flushed) 446 if (flushed)
510 return XFS_ERROR(ENOSPC); 447 return XFS_ERROR(error ? error : ENOSPC);
511 448
512 xfs_iunlock(ip, XFS_ILOCK_EXCL); 449 if (error == ENOSPC) {
513 xfs_flush_inodes(ip); 450 xfs_iunlock(ip, XFS_ILOCK_EXCL);
514 xfs_ilock(ip, XFS_ILOCK_EXCL); 451 xfs_flush_inodes(ip);
452 xfs_ilock(ip, XFS_ILOCK_EXCL);
453 }
515 454
516 flushed = 1; 455 flushed = 1;
517 error = 0; 456 error = 0;
@@ -523,8 +462,6 @@ retry:
523 return xfs_cmn_err_fsblock_zero(ip, &imap[0]); 462 return xfs_cmn_err_fsblock_zero(ip, &imap[0]);
524 463
525 *ret_imap = imap[0]; 464 *ret_imap = imap[0];
526 *nmaps = 1;
527
528 return 0; 465 return 0;
529} 466}
530 467
@@ -538,13 +475,12 @@ retry:
538 * We no longer bother to look at the incoming map - all we have to 475 * We no longer bother to look at the incoming map - all we have to
539 * guarantee is that whatever we allocate fills the required range. 476 * guarantee is that whatever we allocate fills the required range.
540 */ 477 */
541STATIC int 478int
542xfs_iomap_write_allocate( 479xfs_iomap_write_allocate(
543 xfs_inode_t *ip, 480 xfs_inode_t *ip,
544 xfs_off_t offset, 481 xfs_off_t offset,
545 size_t count, 482 size_t count,
546 xfs_bmbt_irec_t *imap, 483 xfs_bmbt_irec_t *imap)
547 int *retmap)
548{ 484{
549 xfs_mount_t *mp = ip->i_mount; 485 xfs_mount_t *mp = ip->i_mount;
550 xfs_fileoff_t offset_fsb, last_block; 486 xfs_fileoff_t offset_fsb, last_block;
@@ -557,8 +493,6 @@ xfs_iomap_write_allocate(
557 int error = 0; 493 int error = 0;
558 int nres; 494 int nres;
559 495
560 *retmap = 0;
561
562 /* 496 /*
563 * Make sure that the dquots are there. 497 * Make sure that the dquots are there.
564 */ 498 */
@@ -680,7 +614,6 @@ xfs_iomap_write_allocate(
680 if ((offset_fsb >= imap->br_startoff) && 614 if ((offset_fsb >= imap->br_startoff) &&
681 (offset_fsb < (imap->br_startoff + 615 (offset_fsb < (imap->br_startoff +
682 imap->br_blockcount))) { 616 imap->br_blockcount))) {
683 *retmap = 1;
684 XFS_STATS_INC(xs_xstrat_quick); 617 XFS_STATS_INC(xs_xstrat_quick);
685 return 0; 618 return 0;
686 } 619 }
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 7748a430f50d..80615760959a 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -18,30 +18,15 @@
18#ifndef __XFS_IOMAP_H__ 18#ifndef __XFS_IOMAP_H__
19#define __XFS_IOMAP_H__ 19#define __XFS_IOMAP_H__
20 20
21/* base extent manipulation calls */
22#define BMAPI_READ (1 << 0) /* read extents */
23#define BMAPI_WRITE (1 << 1) /* create extents */
24#define BMAPI_ALLOCATE (1 << 2) /* delayed allocate to real extents */
25
26/* modifiers */
27#define BMAPI_IGNSTATE (1 << 4) /* ignore unwritten state on read */
28#define BMAPI_DIRECT (1 << 5) /* direct instead of buffered write */
29#define BMAPI_MMA (1 << 6) /* allocate for mmap write */
30#define BMAPI_TRYLOCK (1 << 7) /* non-blocking request */
31
32#define BMAPI_FLAGS \
33 { BMAPI_READ, "READ" }, \
34 { BMAPI_WRITE, "WRITE" }, \
35 { BMAPI_ALLOCATE, "ALLOCATE" }, \
36 { BMAPI_IGNSTATE, "IGNSTATE" }, \
37 { BMAPI_DIRECT, "DIRECT" }, \
38 { BMAPI_TRYLOCK, "TRYLOCK" }
39
40struct xfs_inode; 21struct xfs_inode;
41struct xfs_bmbt_irec; 22struct xfs_bmbt_irec;
42 23
43extern int xfs_iomap(struct xfs_inode *, xfs_off_t, ssize_t, int, 24extern int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
44 struct xfs_bmbt_irec *, int *, int *); 25 struct xfs_bmbt_irec *, int);
26extern int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t,
27 struct xfs_bmbt_irec *);
28extern int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t,
29 struct xfs_bmbt_irec *);
45extern int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t); 30extern int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t);
46 31
47#endif /* __XFS_IOMAP_H__*/ 32#endif /* __XFS_IOMAP_H__*/
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index cee4ab9f8a9e..0bf24b11d0c4 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -47,7 +47,7 @@ STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp,
47 xfs_buftarg_t *log_target, 47 xfs_buftarg_t *log_target,
48 xfs_daddr_t blk_offset, 48 xfs_daddr_t blk_offset,
49 int num_bblks); 49 int num_bblks);
50STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes); 50STATIC int xlog_space_left(struct log *log, atomic64_t *head);
51STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); 51STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog);
52STATIC void xlog_dealloc_log(xlog_t *log); 52STATIC void xlog_dealloc_log(xlog_t *log);
53 53
@@ -70,7 +70,7 @@ STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog);
70/* local functions to manipulate grant head */ 70/* local functions to manipulate grant head */
71STATIC int xlog_grant_log_space(xlog_t *log, 71STATIC int xlog_grant_log_space(xlog_t *log,
72 xlog_ticket_t *xtic); 72 xlog_ticket_t *xtic);
73STATIC void xlog_grant_push_ail(xfs_mount_t *mp, 73STATIC void xlog_grant_push_ail(struct log *log,
74 int need_bytes); 74 int need_bytes);
75STATIC void xlog_regrant_reserve_log_space(xlog_t *log, 75STATIC void xlog_regrant_reserve_log_space(xlog_t *log,
76 xlog_ticket_t *ticket); 76 xlog_ticket_t *ticket);
@@ -81,98 +81,73 @@ STATIC void xlog_ungrant_log_space(xlog_t *log,
81 81
82#if defined(DEBUG) 82#if defined(DEBUG)
83STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr); 83STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr);
84STATIC void xlog_verify_grant_head(xlog_t *log, int equals); 84STATIC void xlog_verify_grant_tail(struct log *log);
85STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog, 85STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog,
86 int count, boolean_t syncing); 86 int count, boolean_t syncing);
87STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, 87STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
88 xfs_lsn_t tail_lsn); 88 xfs_lsn_t tail_lsn);
89#else 89#else
90#define xlog_verify_dest_ptr(a,b) 90#define xlog_verify_dest_ptr(a,b)
91#define xlog_verify_grant_head(a,b) 91#define xlog_verify_grant_tail(a)
92#define xlog_verify_iclog(a,b,c,d) 92#define xlog_verify_iclog(a,b,c,d)
93#define xlog_verify_tail_lsn(a,b,c) 93#define xlog_verify_tail_lsn(a,b,c)
94#endif 94#endif
95 95
96STATIC int xlog_iclogs_empty(xlog_t *log); 96STATIC int xlog_iclogs_empty(xlog_t *log);
97 97
98
99static void 98static void
100xlog_ins_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic) 99xlog_grant_sub_space(
100 struct log *log,
101 atomic64_t *head,
102 int bytes)
101{ 103{
102 if (*qp) { 104 int64_t head_val = atomic64_read(head);
103 tic->t_next = (*qp); 105 int64_t new, old;
104 tic->t_prev = (*qp)->t_prev;
105 (*qp)->t_prev->t_next = tic;
106 (*qp)->t_prev = tic;
107 } else {
108 tic->t_prev = tic->t_next = tic;
109 *qp = tic;
110 }
111 106
112 tic->t_flags |= XLOG_TIC_IN_Q; 107 do {
113} 108 int cycle, space;
114 109
115static void 110 xlog_crack_grant_head_val(head_val, &cycle, &space);
116xlog_del_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)
117{
118 if (tic == tic->t_next) {
119 *qp = NULL;
120 } else {
121 *qp = tic->t_next;
122 tic->t_next->t_prev = tic->t_prev;
123 tic->t_prev->t_next = tic->t_next;
124 }
125 111
126 tic->t_next = tic->t_prev = NULL; 112 space -= bytes;
127 tic->t_flags &= ~XLOG_TIC_IN_Q; 113 if (space < 0) {
114 space += log->l_logsize;
115 cycle--;
116 }
117
118 old = head_val;
119 new = xlog_assign_grant_head_val(cycle, space);
120 head_val = atomic64_cmpxchg(head, old, new);
121 } while (head_val != old);
128} 122}
129 123
130static void 124static void
131xlog_grant_sub_space(struct log *log, int bytes) 125xlog_grant_add_space(
126 struct log *log,
127 atomic64_t *head,
128 int bytes)
132{ 129{
133 log->l_grant_write_bytes -= bytes; 130 int64_t head_val = atomic64_read(head);
134 if (log->l_grant_write_bytes < 0) { 131 int64_t new, old;
135 log->l_grant_write_bytes += log->l_logsize;
136 log->l_grant_write_cycle--;
137 }
138
139 log->l_grant_reserve_bytes -= bytes;
140 if ((log)->l_grant_reserve_bytes < 0) {
141 log->l_grant_reserve_bytes += log->l_logsize;
142 log->l_grant_reserve_cycle--;
143 }
144 132
145} 133 do {
134 int tmp;
135 int cycle, space;
146 136
147static void 137 xlog_crack_grant_head_val(head_val, &cycle, &space);
148xlog_grant_add_space_write(struct log *log, int bytes)
149{
150 int tmp = log->l_logsize - log->l_grant_write_bytes;
151 if (tmp > bytes)
152 log->l_grant_write_bytes += bytes;
153 else {
154 log->l_grant_write_cycle++;
155 log->l_grant_write_bytes = bytes - tmp;
156 }
157}
158 138
159static void 139 tmp = log->l_logsize - space;
160xlog_grant_add_space_reserve(struct log *log, int bytes) 140 if (tmp > bytes)
161{ 141 space += bytes;
162 int tmp = log->l_logsize - log->l_grant_reserve_bytes; 142 else {
163 if (tmp > bytes) 143 space = bytes - tmp;
164 log->l_grant_reserve_bytes += bytes; 144 cycle++;
165 else { 145 }
166 log->l_grant_reserve_cycle++;
167 log->l_grant_reserve_bytes = bytes - tmp;
168 }
169}
170 146
171static inline void 147 old = head_val;
172xlog_grant_add_space(struct log *log, int bytes) 148 new = xlog_assign_grant_head_val(cycle, space);
173{ 149 head_val = atomic64_cmpxchg(head, old, new);
174 xlog_grant_add_space_write(log, bytes); 150 } while (head_val != old);
175 xlog_grant_add_space_reserve(log, bytes);
176} 151}
177 152
178static void 153static void
@@ -355,7 +330,7 @@ xfs_log_reserve(
355 330
356 trace_xfs_log_reserve(log, internal_ticket); 331 trace_xfs_log_reserve(log, internal_ticket);
357 332
358 xlog_grant_push_ail(mp, internal_ticket->t_unit_res); 333 xlog_grant_push_ail(log, internal_ticket->t_unit_res);
359 retval = xlog_regrant_write_log_space(log, internal_ticket); 334 retval = xlog_regrant_write_log_space(log, internal_ticket);
360 } else { 335 } else {
361 /* may sleep if need to allocate more tickets */ 336 /* may sleep if need to allocate more tickets */
@@ -369,7 +344,7 @@ xfs_log_reserve(
369 344
370 trace_xfs_log_reserve(log, internal_ticket); 345 trace_xfs_log_reserve(log, internal_ticket);
371 346
372 xlog_grant_push_ail(mp, 347 xlog_grant_push_ail(log,
373 (internal_ticket->t_unit_res * 348 (internal_ticket->t_unit_res *
374 internal_ticket->t_cnt)); 349 internal_ticket->t_cnt));
375 retval = xlog_grant_log_space(log, internal_ticket); 350 retval = xlog_grant_log_space(log, internal_ticket);
@@ -584,8 +559,8 @@ xfs_log_unmount_write(xfs_mount_t *mp)
584 if (!(iclog->ic_state == XLOG_STATE_ACTIVE || 559 if (!(iclog->ic_state == XLOG_STATE_ACTIVE ||
585 iclog->ic_state == XLOG_STATE_DIRTY)) { 560 iclog->ic_state == XLOG_STATE_DIRTY)) {
586 if (!XLOG_FORCED_SHUTDOWN(log)) { 561 if (!XLOG_FORCED_SHUTDOWN(log)) {
587 sv_wait(&iclog->ic_force_wait, PMEM, 562 xlog_wait(&iclog->ic_force_wait,
588 &log->l_icloglock, s); 563 &log->l_icloglock);
589 } else { 564 } else {
590 spin_unlock(&log->l_icloglock); 565 spin_unlock(&log->l_icloglock);
591 } 566 }
@@ -625,8 +600,8 @@ xfs_log_unmount_write(xfs_mount_t *mp)
625 || iclog->ic_state == XLOG_STATE_DIRTY 600 || iclog->ic_state == XLOG_STATE_DIRTY
626 || iclog->ic_state == XLOG_STATE_IOERROR) ) { 601 || iclog->ic_state == XLOG_STATE_IOERROR) ) {
627 602
628 sv_wait(&iclog->ic_force_wait, PMEM, 603 xlog_wait(&iclog->ic_force_wait,
629 &log->l_icloglock, s); 604 &log->l_icloglock);
630 } else { 605 } else {
631 spin_unlock(&log->l_icloglock); 606 spin_unlock(&log->l_icloglock);
632 } 607 }
@@ -703,55 +678,46 @@ xfs_log_move_tail(xfs_mount_t *mp,
703{ 678{
704 xlog_ticket_t *tic; 679 xlog_ticket_t *tic;
705 xlog_t *log = mp->m_log; 680 xlog_t *log = mp->m_log;
706 int need_bytes, free_bytes, cycle, bytes; 681 int need_bytes, free_bytes;
707 682
708 if (XLOG_FORCED_SHUTDOWN(log)) 683 if (XLOG_FORCED_SHUTDOWN(log))
709 return; 684 return;
710 685
711 if (tail_lsn == 0) { 686 if (tail_lsn == 0)
712 /* needed since sync_lsn is 64 bits */ 687 tail_lsn = atomic64_read(&log->l_last_sync_lsn);
713 spin_lock(&log->l_icloglock);
714 tail_lsn = log->l_last_sync_lsn;
715 spin_unlock(&log->l_icloglock);
716 }
717
718 spin_lock(&log->l_grant_lock);
719 688
720 /* Also an invalid lsn. 1 implies that we aren't passing in a valid 689 /* tail_lsn == 1 implies that we weren't passed a valid value. */
721 * tail_lsn. 690 if (tail_lsn != 1)
722 */ 691 atomic64_set(&log->l_tail_lsn, tail_lsn);
723 if (tail_lsn != 1) {
724 log->l_tail_lsn = tail_lsn;
725 }
726 692
727 if ((tic = log->l_write_headq)) { 693 if (!list_empty_careful(&log->l_writeq)) {
728#ifdef DEBUG 694#ifdef DEBUG
729 if (log->l_flags & XLOG_ACTIVE_RECOVERY) 695 if (log->l_flags & XLOG_ACTIVE_RECOVERY)
730 panic("Recovery problem"); 696 panic("Recovery problem");
731#endif 697#endif
732 cycle = log->l_grant_write_cycle; 698 spin_lock(&log->l_grant_write_lock);
733 bytes = log->l_grant_write_bytes; 699 free_bytes = xlog_space_left(log, &log->l_grant_write_head);
734 free_bytes = xlog_space_left(log, cycle, bytes); 700 list_for_each_entry(tic, &log->l_writeq, t_queue) {
735 do {
736 ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); 701 ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
737 702
738 if (free_bytes < tic->t_unit_res && tail_lsn != 1) 703 if (free_bytes < tic->t_unit_res && tail_lsn != 1)
739 break; 704 break;
740 tail_lsn = 0; 705 tail_lsn = 0;
741 free_bytes -= tic->t_unit_res; 706 free_bytes -= tic->t_unit_res;
742 sv_signal(&tic->t_wait); 707 trace_xfs_log_regrant_write_wake_up(log, tic);
743 tic = tic->t_next; 708 wake_up(&tic->t_wait);
744 } while (tic != log->l_write_headq); 709 }
710 spin_unlock(&log->l_grant_write_lock);
745 } 711 }
746 if ((tic = log->l_reserve_headq)) { 712
713 if (!list_empty_careful(&log->l_reserveq)) {
747#ifdef DEBUG 714#ifdef DEBUG
748 if (log->l_flags & XLOG_ACTIVE_RECOVERY) 715 if (log->l_flags & XLOG_ACTIVE_RECOVERY)
749 panic("Recovery problem"); 716 panic("Recovery problem");
750#endif 717#endif
751 cycle = log->l_grant_reserve_cycle; 718 spin_lock(&log->l_grant_reserve_lock);
752 bytes = log->l_grant_reserve_bytes; 719 free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
753 free_bytes = xlog_space_left(log, cycle, bytes); 720 list_for_each_entry(tic, &log->l_reserveq, t_queue) {
754 do {
755 if (tic->t_flags & XLOG_TIC_PERM_RESERV) 721 if (tic->t_flags & XLOG_TIC_PERM_RESERV)
756 need_bytes = tic->t_unit_res*tic->t_cnt; 722 need_bytes = tic->t_unit_res*tic->t_cnt;
757 else 723 else
@@ -760,12 +726,12 @@ xfs_log_move_tail(xfs_mount_t *mp,
760 break; 726 break;
761 tail_lsn = 0; 727 tail_lsn = 0;
762 free_bytes -= need_bytes; 728 free_bytes -= need_bytes;
763 sv_signal(&tic->t_wait); 729 trace_xfs_log_grant_wake_up(log, tic);
764 tic = tic->t_next; 730 wake_up(&tic->t_wait);
765 } while (tic != log->l_reserve_headq); 731 }
732 spin_unlock(&log->l_grant_reserve_lock);
766 } 733 }
767 spin_unlock(&log->l_grant_lock); 734}
768} /* xfs_log_move_tail */
769 735
770/* 736/*
771 * Determine if we have a transaction that has gone to disk 737 * Determine if we have a transaction that has gone to disk
@@ -831,23 +797,19 @@ xfs_log_need_covered(xfs_mount_t *mp)
831 * We may be holding the log iclog lock upon entering this routine. 797 * We may be holding the log iclog lock upon entering this routine.
832 */ 798 */
833xfs_lsn_t 799xfs_lsn_t
834xlog_assign_tail_lsn(xfs_mount_t *mp) 800xlog_assign_tail_lsn(
801 struct xfs_mount *mp)
835{ 802{
836 xfs_lsn_t tail_lsn; 803 xfs_lsn_t tail_lsn;
837 xlog_t *log = mp->m_log; 804 struct log *log = mp->m_log;
838 805
839 tail_lsn = xfs_trans_ail_tail(mp->m_ail); 806 tail_lsn = xfs_trans_ail_tail(mp->m_ail);
840 spin_lock(&log->l_grant_lock); 807 if (!tail_lsn)
841 if (tail_lsn != 0) { 808 tail_lsn = atomic64_read(&log->l_last_sync_lsn);
842 log->l_tail_lsn = tail_lsn;
843 } else {
844 tail_lsn = log->l_tail_lsn = log->l_last_sync_lsn;
845 }
846 spin_unlock(&log->l_grant_lock);
847 809
810 atomic64_set(&log->l_tail_lsn, tail_lsn);
848 return tail_lsn; 811 return tail_lsn;
849} /* xlog_assign_tail_lsn */ 812}
850
851 813
852/* 814/*
853 * Return the space in the log between the tail and the head. The head 815 * Return the space in the log between the tail and the head. The head
@@ -864,21 +826,26 @@ xlog_assign_tail_lsn(xfs_mount_t *mp)
864 * result is that we return the size of the log as the amount of space left. 826 * result is that we return the size of the log as the amount of space left.
865 */ 827 */
866STATIC int 828STATIC int
867xlog_space_left(xlog_t *log, int cycle, int bytes) 829xlog_space_left(
868{ 830 struct log *log,
869 int free_bytes; 831 atomic64_t *head)
870 int tail_bytes; 832{
871 int tail_cycle; 833 int free_bytes;
872 834 int tail_bytes;
873 tail_bytes = BBTOB(BLOCK_LSN(log->l_tail_lsn)); 835 int tail_cycle;
874 tail_cycle = CYCLE_LSN(log->l_tail_lsn); 836 int head_cycle;
875 if ((tail_cycle == cycle) && (bytes >= tail_bytes)) { 837 int head_bytes;
876 free_bytes = log->l_logsize - (bytes - tail_bytes); 838
877 } else if ((tail_cycle + 1) < cycle) { 839 xlog_crack_grant_head(head, &head_cycle, &head_bytes);
840 xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_bytes);
841 tail_bytes = BBTOB(tail_bytes);
842 if (tail_cycle == head_cycle && head_bytes >= tail_bytes)
843 free_bytes = log->l_logsize - (head_bytes - tail_bytes);
844 else if (tail_cycle + 1 < head_cycle)
878 return 0; 845 return 0;
879 } else if (tail_cycle < cycle) { 846 else if (tail_cycle < head_cycle) {
880 ASSERT(tail_cycle == (cycle - 1)); 847 ASSERT(tail_cycle == (head_cycle - 1));
881 free_bytes = tail_bytes - bytes; 848 free_bytes = tail_bytes - head_bytes;
882 } else { 849 } else {
883 /* 850 /*
884 * The reservation head is behind the tail. 851 * The reservation head is behind the tail.
@@ -889,12 +856,12 @@ xlog_space_left(xlog_t *log, int cycle, int bytes)
889 "xlog_space_left: head behind tail\n" 856 "xlog_space_left: head behind tail\n"
890 " tail_cycle = %d, tail_bytes = %d\n" 857 " tail_cycle = %d, tail_bytes = %d\n"
891 " GH cycle = %d, GH bytes = %d", 858 " GH cycle = %d, GH bytes = %d",
892 tail_cycle, tail_bytes, cycle, bytes); 859 tail_cycle, tail_bytes, head_cycle, head_bytes);
893 ASSERT(0); 860 ASSERT(0);
894 free_bytes = log->l_logsize; 861 free_bytes = log->l_logsize;
895 } 862 }
896 return free_bytes; 863 return free_bytes;
897} /* xlog_space_left */ 864}
898 865
899 866
900/* 867/*
@@ -1047,12 +1014,16 @@ xlog_alloc_log(xfs_mount_t *mp,
1047 log->l_flags |= XLOG_ACTIVE_RECOVERY; 1014 log->l_flags |= XLOG_ACTIVE_RECOVERY;
1048 1015
1049 log->l_prev_block = -1; 1016 log->l_prev_block = -1;
1050 log->l_tail_lsn = xlog_assign_lsn(1, 0);
1051 /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */ 1017 /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */
1052 log->l_last_sync_lsn = log->l_tail_lsn; 1018 xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0);
1019 xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0);
1053 log->l_curr_cycle = 1; /* 0 is bad since this is initial value */ 1020 log->l_curr_cycle = 1; /* 0 is bad since this is initial value */
1054 log->l_grant_reserve_cycle = 1; 1021 xlog_assign_grant_head(&log->l_grant_reserve_head, 1, 0);
1055 log->l_grant_write_cycle = 1; 1022 xlog_assign_grant_head(&log->l_grant_write_head, 1, 0);
1023 INIT_LIST_HEAD(&log->l_reserveq);
1024 INIT_LIST_HEAD(&log->l_writeq);
1025 spin_lock_init(&log->l_grant_reserve_lock);
1026 spin_lock_init(&log->l_grant_write_lock);
1056 1027
1057 error = EFSCORRUPTED; 1028 error = EFSCORRUPTED;
1058 if (xfs_sb_version_hassector(&mp->m_sb)) { 1029 if (xfs_sb_version_hassector(&mp->m_sb)) {
@@ -1094,8 +1065,7 @@ xlog_alloc_log(xfs_mount_t *mp,
1094 log->l_xbuf = bp; 1065 log->l_xbuf = bp;
1095 1066
1096 spin_lock_init(&log->l_icloglock); 1067 spin_lock_init(&log->l_icloglock);
1097 spin_lock_init(&log->l_grant_lock); 1068 init_waitqueue_head(&log->l_flush_wait);
1098 sv_init(&log->l_flush_wait, 0, "flush_wait");
1099 1069
1100 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ 1070 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
1101 ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); 1071 ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
@@ -1151,8 +1121,8 @@ xlog_alloc_log(xfs_mount_t *mp,
1151 1121
1152 ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); 1122 ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp));
1153 ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); 1123 ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0);
1154 sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force"); 1124 init_waitqueue_head(&iclog->ic_force_wait);
1155 sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write"); 1125 init_waitqueue_head(&iclog->ic_write_wait);
1156 1126
1157 iclogp = &iclog->ic_next; 1127 iclogp = &iclog->ic_next;
1158 } 1128 }
@@ -1167,15 +1137,11 @@ xlog_alloc_log(xfs_mount_t *mp,
1167out_free_iclog: 1137out_free_iclog:
1168 for (iclog = log->l_iclog; iclog; iclog = prev_iclog) { 1138 for (iclog = log->l_iclog; iclog; iclog = prev_iclog) {
1169 prev_iclog = iclog->ic_next; 1139 prev_iclog = iclog->ic_next;
1170 if (iclog->ic_bp) { 1140 if (iclog->ic_bp)
1171 sv_destroy(&iclog->ic_force_wait);
1172 sv_destroy(&iclog->ic_write_wait);
1173 xfs_buf_free(iclog->ic_bp); 1141 xfs_buf_free(iclog->ic_bp);
1174 }
1175 kmem_free(iclog); 1142 kmem_free(iclog);
1176 } 1143 }
1177 spinlock_destroy(&log->l_icloglock); 1144 spinlock_destroy(&log->l_icloglock);
1178 spinlock_destroy(&log->l_grant_lock);
1179 xfs_buf_free(log->l_xbuf); 1145 xfs_buf_free(log->l_xbuf);
1180out_free_log: 1146out_free_log:
1181 kmem_free(log); 1147 kmem_free(log);
@@ -1223,61 +1189,60 @@ xlog_commit_record(
1223 * water mark. In this manner, we would be creating a low water mark. 1189 * water mark. In this manner, we would be creating a low water mark.
1224 */ 1190 */
1225STATIC void 1191STATIC void
1226xlog_grant_push_ail(xfs_mount_t *mp, 1192xlog_grant_push_ail(
1227 int need_bytes) 1193 struct log *log,
1194 int need_bytes)
1228{ 1195{
1229 xlog_t *log = mp->m_log; /* pointer to the log */ 1196 xfs_lsn_t threshold_lsn = 0;
1230 xfs_lsn_t tail_lsn; /* lsn of the log tail */ 1197 xfs_lsn_t last_sync_lsn;
1231 xfs_lsn_t threshold_lsn = 0; /* lsn we'd like to be at */ 1198 int free_blocks;
1232 int free_blocks; /* free blocks left to write to */ 1199 int free_bytes;
1233 int free_bytes; /* free bytes left to write to */ 1200 int threshold_block;
1234 int threshold_block; /* block in lsn we'd like to be at */ 1201 int threshold_cycle;
1235 int threshold_cycle; /* lsn cycle we'd like to be at */ 1202 int free_threshold;
1236 int free_threshold; 1203
1237 1204 ASSERT(BTOBB(need_bytes) < log->l_logBBsize);
1238 ASSERT(BTOBB(need_bytes) < log->l_logBBsize); 1205
1239 1206 free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
1240 spin_lock(&log->l_grant_lock); 1207 free_blocks = BTOBBT(free_bytes);
1241 free_bytes = xlog_space_left(log, 1208
1242 log->l_grant_reserve_cycle, 1209 /*
1243 log->l_grant_reserve_bytes); 1210 * Set the threshold for the minimum number of free blocks in the
1244 tail_lsn = log->l_tail_lsn; 1211 * log to the maximum of what the caller needs, one quarter of the
1245 free_blocks = BTOBBT(free_bytes); 1212 * log, and 256 blocks.
1246 1213 */
1247 /* 1214 free_threshold = BTOBB(need_bytes);
1248 * Set the threshold for the minimum number of free blocks in the 1215 free_threshold = MAX(free_threshold, (log->l_logBBsize >> 2));
1249 * log to the maximum of what the caller needs, one quarter of the 1216 free_threshold = MAX(free_threshold, 256);
1250 * log, and 256 blocks. 1217 if (free_blocks >= free_threshold)
1251 */ 1218 return;
1252 free_threshold = BTOBB(need_bytes); 1219
1253 free_threshold = MAX(free_threshold, (log->l_logBBsize >> 2)); 1220 xlog_crack_atomic_lsn(&log->l_tail_lsn, &threshold_cycle,
1254 free_threshold = MAX(free_threshold, 256); 1221 &threshold_block);
1255 if (free_blocks < free_threshold) { 1222 threshold_block += free_threshold;
1256 threshold_block = BLOCK_LSN(tail_lsn) + free_threshold;
1257 threshold_cycle = CYCLE_LSN(tail_lsn);
1258 if (threshold_block >= log->l_logBBsize) { 1223 if (threshold_block >= log->l_logBBsize) {
1259 threshold_block -= log->l_logBBsize; 1224 threshold_block -= log->l_logBBsize;
1260 threshold_cycle += 1; 1225 threshold_cycle += 1;
1261 } 1226 }
1262 threshold_lsn = xlog_assign_lsn(threshold_cycle, threshold_block); 1227 threshold_lsn = xlog_assign_lsn(threshold_cycle,
1228 threshold_block);
1229 /*
1230 * Don't pass in an lsn greater than the lsn of the last
1231 * log record known to be on disk. Use a snapshot of the last sync lsn
1232 * so that it doesn't change between the compare and the set.
1233 */
1234 last_sync_lsn = atomic64_read(&log->l_last_sync_lsn);
1235 if (XFS_LSN_CMP(threshold_lsn, last_sync_lsn) > 0)
1236 threshold_lsn = last_sync_lsn;
1263 1237
1264 /* Don't pass in an lsn greater than the lsn of the last 1238 /*
1265 * log record known to be on disk. 1239 * Get the transaction layer to kick the dirty buffers out to
1240 * disk asynchronously. No point in trying to do this if
1241 * the filesystem is shutting down.
1266 */ 1242 */
1267 if (XFS_LSN_CMP(threshold_lsn, log->l_last_sync_lsn) > 0) 1243 if (!XLOG_FORCED_SHUTDOWN(log))
1268 threshold_lsn = log->l_last_sync_lsn; 1244 xfs_trans_ail_push(log->l_ailp, threshold_lsn);
1269 } 1245}
1270 spin_unlock(&log->l_grant_lock);
1271
1272 /*
1273 * Get the transaction layer to kick the dirty buffers out to
1274 * disk asynchronously. No point in trying to do this if
1275 * the filesystem is shutting down.
1276 */
1277 if (threshold_lsn &&
1278 !XLOG_FORCED_SHUTDOWN(log))
1279 xfs_trans_ail_push(log->l_ailp, threshold_lsn);
1280} /* xlog_grant_push_ail */
1281 1246
1282/* 1247/*
1283 * The bdstrat callback function for log bufs. This gives us a central 1248 * The bdstrat callback function for log bufs. This gives us a central
@@ -1372,9 +1337,8 @@ xlog_sync(xlog_t *log,
1372 roundoff < BBTOB(1))); 1337 roundoff < BBTOB(1)));
1373 1338
1374 /* move grant heads by roundoff in sync */ 1339 /* move grant heads by roundoff in sync */
1375 spin_lock(&log->l_grant_lock); 1340 xlog_grant_add_space(log, &log->l_grant_reserve_head, roundoff);
1376 xlog_grant_add_space(log, roundoff); 1341 xlog_grant_add_space(log, &log->l_grant_write_head, roundoff);
1377 spin_unlock(&log->l_grant_lock);
1378 1342
1379 /* put cycle number in every block */ 1343 /* put cycle number in every block */
1380 xlog_pack_data(log, iclog, roundoff); 1344 xlog_pack_data(log, iclog, roundoff);
@@ -1489,15 +1453,12 @@ xlog_dealloc_log(xlog_t *log)
1489 1453
1490 iclog = log->l_iclog; 1454 iclog = log->l_iclog;
1491 for (i=0; i<log->l_iclog_bufs; i++) { 1455 for (i=0; i<log->l_iclog_bufs; i++) {
1492 sv_destroy(&iclog->ic_force_wait);
1493 sv_destroy(&iclog->ic_write_wait);
1494 xfs_buf_free(iclog->ic_bp); 1456 xfs_buf_free(iclog->ic_bp);
1495 next_iclog = iclog->ic_next; 1457 next_iclog = iclog->ic_next;
1496 kmem_free(iclog); 1458 kmem_free(iclog);
1497 iclog = next_iclog; 1459 iclog = next_iclog;
1498 } 1460 }
1499 spinlock_destroy(&log->l_icloglock); 1461 spinlock_destroy(&log->l_icloglock);
1500 spinlock_destroy(&log->l_grant_lock);
1501 1462
1502 xfs_buf_free(log->l_xbuf); 1463 xfs_buf_free(log->l_xbuf);
1503 log->l_mp->m_log = NULL; 1464 log->l_mp->m_log = NULL;
@@ -2232,7 +2193,7 @@ xlog_state_do_callback(
2232 lowest_lsn = xlog_get_lowest_lsn(log); 2193 lowest_lsn = xlog_get_lowest_lsn(log);
2233 if (lowest_lsn && 2194 if (lowest_lsn &&
2234 XFS_LSN_CMP(lowest_lsn, 2195 XFS_LSN_CMP(lowest_lsn,
2235 be64_to_cpu(iclog->ic_header.h_lsn)) < 0) { 2196 be64_to_cpu(iclog->ic_header.h_lsn)) < 0) {
2236 iclog = iclog->ic_next; 2197 iclog = iclog->ic_next;
2237 continue; /* Leave this iclog for 2198 continue; /* Leave this iclog for
2238 * another thread */ 2199 * another thread */
@@ -2240,23 +2201,21 @@ xlog_state_do_callback(
2240 2201
2241 iclog->ic_state = XLOG_STATE_CALLBACK; 2202 iclog->ic_state = XLOG_STATE_CALLBACK;
2242 2203
2243 spin_unlock(&log->l_icloglock);
2244 2204
2245 /* l_last_sync_lsn field protected by 2205 /*
2246 * l_grant_lock. Don't worry about iclog's lsn. 2206 * update the last_sync_lsn before we drop the
2247 * No one else can be here except us. 2207 * icloglock to ensure we are the only one that
2208 * can update it.
2248 */ 2209 */
2249 spin_lock(&log->l_grant_lock); 2210 ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
2250 ASSERT(XFS_LSN_CMP(log->l_last_sync_lsn, 2211 be64_to_cpu(iclog->ic_header.h_lsn)) <= 0);
2251 be64_to_cpu(iclog->ic_header.h_lsn)) <= 0); 2212 atomic64_set(&log->l_last_sync_lsn,
2252 log->l_last_sync_lsn = 2213 be64_to_cpu(iclog->ic_header.h_lsn));
2253 be64_to_cpu(iclog->ic_header.h_lsn);
2254 spin_unlock(&log->l_grant_lock);
2255 2214
2256 } else { 2215 } else
2257 spin_unlock(&log->l_icloglock);
2258 ioerrors++; 2216 ioerrors++;
2259 } 2217
2218 spin_unlock(&log->l_icloglock);
2260 2219
2261 /* 2220 /*
2262 * Keep processing entries in the callback list until 2221 * Keep processing entries in the callback list until
@@ -2297,7 +2256,7 @@ xlog_state_do_callback(
2297 xlog_state_clean_log(log); 2256 xlog_state_clean_log(log);
2298 2257
2299 /* wake up threads waiting in xfs_log_force() */ 2258 /* wake up threads waiting in xfs_log_force() */
2300 sv_broadcast(&iclog->ic_force_wait); 2259 wake_up_all(&iclog->ic_force_wait);
2301 2260
2302 iclog = iclog->ic_next; 2261 iclog = iclog->ic_next;
2303 } while (first_iclog != iclog); 2262 } while (first_iclog != iclog);
@@ -2344,7 +2303,7 @@ xlog_state_do_callback(
2344 spin_unlock(&log->l_icloglock); 2303 spin_unlock(&log->l_icloglock);
2345 2304
2346 if (wake) 2305 if (wake)
2347 sv_broadcast(&log->l_flush_wait); 2306 wake_up_all(&log->l_flush_wait);
2348} 2307}
2349 2308
2350 2309
@@ -2395,7 +2354,7 @@ xlog_state_done_syncing(
2395 * iclog buffer, we wake them all, one will get to do the 2354 * iclog buffer, we wake them all, one will get to do the
2396 * I/O, the others get to wait for the result. 2355 * I/O, the others get to wait for the result.
2397 */ 2356 */
2398 sv_broadcast(&iclog->ic_write_wait); 2357 wake_up_all(&iclog->ic_write_wait);
2399 spin_unlock(&log->l_icloglock); 2358 spin_unlock(&log->l_icloglock);
2400 xlog_state_do_callback(log, aborted, iclog); /* also cleans log */ 2359 xlog_state_do_callback(log, aborted, iclog); /* also cleans log */
2401} /* xlog_state_done_syncing */ 2360} /* xlog_state_done_syncing */
@@ -2444,7 +2403,7 @@ restart:
2444 XFS_STATS_INC(xs_log_noiclogs); 2403 XFS_STATS_INC(xs_log_noiclogs);
2445 2404
2446 /* Wait for log writes to have flushed */ 2405 /* Wait for log writes to have flushed */
2447 sv_wait(&log->l_flush_wait, 0, &log->l_icloglock, 0); 2406 xlog_wait(&log->l_flush_wait, &log->l_icloglock);
2448 goto restart; 2407 goto restart;
2449 } 2408 }
2450 2409
@@ -2527,6 +2486,18 @@ restart:
2527 * 2486 *
2528 * Once a ticket gets put onto the reserveq, it will only return after 2487 * Once a ticket gets put onto the reserveq, it will only return after
2529 * the needed reservation is satisfied. 2488 * the needed reservation is satisfied.
2489 *
2490 * This function is structured so that it has a lock free fast path. This is
2491 * necessary because every new transaction reservation will come through this
2492 * path. Hence any lock will be globally hot if we take it unconditionally on
2493 * every pass.
2494 *
2495 * As tickets are only ever moved on and off the reserveq under the
2496 * l_grant_reserve_lock, we only need to take that lock if we are going
2497 * to add the ticket to the queue and sleep. We can avoid taking the lock if the
2498 * ticket was never added to the reserveq because the t_queue list head will be
2499 * empty and we hold the only reference to it so it can safely be checked
2500 * unlocked.
2530 */ 2501 */
2531STATIC int 2502STATIC int
2532xlog_grant_log_space(xlog_t *log, 2503xlog_grant_log_space(xlog_t *log,
@@ -2534,24 +2505,27 @@ xlog_grant_log_space(xlog_t *log,
2534{ 2505{
2535 int free_bytes; 2506 int free_bytes;
2536 int need_bytes; 2507 int need_bytes;
2537#ifdef DEBUG
2538 xfs_lsn_t tail_lsn;
2539#endif
2540
2541 2508
2542#ifdef DEBUG 2509#ifdef DEBUG
2543 if (log->l_flags & XLOG_ACTIVE_RECOVERY) 2510 if (log->l_flags & XLOG_ACTIVE_RECOVERY)
2544 panic("grant Recovery problem"); 2511 panic("grant Recovery problem");
2545#endif 2512#endif
2546 2513
2547 /* Is there space or do we need to sleep? */
2548 spin_lock(&log->l_grant_lock);
2549
2550 trace_xfs_log_grant_enter(log, tic); 2514 trace_xfs_log_grant_enter(log, tic);
2551 2515
2516 need_bytes = tic->t_unit_res;
2517 if (tic->t_flags & XFS_LOG_PERM_RESERV)
2518 need_bytes *= tic->t_ocnt;
2519
2552 /* something is already sleeping; insert new transaction at end */ 2520 /* something is already sleeping; insert new transaction at end */
2553 if (log->l_reserve_headq) { 2521 if (!list_empty_careful(&log->l_reserveq)) {
2554 xlog_ins_ticketq(&log->l_reserve_headq, tic); 2522 spin_lock(&log->l_grant_reserve_lock);
2523 /* recheck the queue now we are locked */
2524 if (list_empty(&log->l_reserveq)) {
2525 spin_unlock(&log->l_grant_reserve_lock);
2526 goto redo;
2527 }
2528 list_add_tail(&tic->t_queue, &log->l_reserveq);
2555 2529
2556 trace_xfs_log_grant_sleep1(log, tic); 2530 trace_xfs_log_grant_sleep1(log, tic);
2557 2531
@@ -2563,72 +2537,57 @@ xlog_grant_log_space(xlog_t *log,
2563 goto error_return; 2537 goto error_return;
2564 2538
2565 XFS_STATS_INC(xs_sleep_logspace); 2539 XFS_STATS_INC(xs_sleep_logspace);
2566 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); 2540 xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
2541
2567 /* 2542 /*
2568 * If we got an error, and the filesystem is shutting down, 2543 * If we got an error, and the filesystem is shutting down,
2569 * we'll catch it down below. So just continue... 2544 * we'll catch it down below. So just continue...
2570 */ 2545 */
2571 trace_xfs_log_grant_wake1(log, tic); 2546 trace_xfs_log_grant_wake1(log, tic);
2572 spin_lock(&log->l_grant_lock);
2573 } 2547 }
2574 if (tic->t_flags & XFS_LOG_PERM_RESERV)
2575 need_bytes = tic->t_unit_res*tic->t_ocnt;
2576 else
2577 need_bytes = tic->t_unit_res;
2578 2548
2579redo: 2549redo:
2580 if (XLOG_FORCED_SHUTDOWN(log)) 2550 if (XLOG_FORCED_SHUTDOWN(log))
2581 goto error_return; 2551 goto error_return_unlocked;
2582 2552
2583 free_bytes = xlog_space_left(log, log->l_grant_reserve_cycle, 2553 free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
2584 log->l_grant_reserve_bytes);
2585 if (free_bytes < need_bytes) { 2554 if (free_bytes < need_bytes) {
2586 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) 2555 spin_lock(&log->l_grant_reserve_lock);
2587 xlog_ins_ticketq(&log->l_reserve_headq, tic); 2556 if (list_empty(&tic->t_queue))
2557 list_add_tail(&tic->t_queue, &log->l_reserveq);
2588 2558
2589 trace_xfs_log_grant_sleep2(log, tic); 2559 trace_xfs_log_grant_sleep2(log, tic);
2590 2560
2591 spin_unlock(&log->l_grant_lock);
2592 xlog_grant_push_ail(log->l_mp, need_bytes);
2593 spin_lock(&log->l_grant_lock);
2594
2595 XFS_STATS_INC(xs_sleep_logspace);
2596 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
2597
2598 spin_lock(&log->l_grant_lock);
2599 if (XLOG_FORCED_SHUTDOWN(log)) 2561 if (XLOG_FORCED_SHUTDOWN(log))
2600 goto error_return; 2562 goto error_return;
2601 2563
2602 trace_xfs_log_grant_wake2(log, tic); 2564 xlog_grant_push_ail(log, need_bytes);
2565
2566 XFS_STATS_INC(xs_sleep_logspace);
2567 xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
2603 2568
2569 trace_xfs_log_grant_wake2(log, tic);
2604 goto redo; 2570 goto redo;
2605 } else if (tic->t_flags & XLOG_TIC_IN_Q) 2571 }
2606 xlog_del_ticketq(&log->l_reserve_headq, tic);
2607 2572
2608 /* we've got enough space */ 2573 if (!list_empty(&tic->t_queue)) {
2609 xlog_grant_add_space(log, need_bytes); 2574 spin_lock(&log->l_grant_reserve_lock);
2610#ifdef DEBUG 2575 list_del_init(&tic->t_queue);
2611 tail_lsn = log->l_tail_lsn; 2576 spin_unlock(&log->l_grant_reserve_lock);
2612 /*
2613 * Check to make sure the grant write head didn't just over lap the
2614 * tail. If the cycles are the same, we can't be overlapping.
2615 * Otherwise, make sure that the cycles differ by exactly one and
2616 * check the byte count.
2617 */
2618 if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) {
2619 ASSERT(log->l_grant_write_cycle-1 == CYCLE_LSN(tail_lsn));
2620 ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn)));
2621 } 2577 }
2622#endif 2578
2579 /* we've got enough space */
2580 xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes);
2581 xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
2623 trace_xfs_log_grant_exit(log, tic); 2582 trace_xfs_log_grant_exit(log, tic);
2624 xlog_verify_grant_head(log, 1); 2583 xlog_verify_grant_tail(log);
2625 spin_unlock(&log->l_grant_lock);
2626 return 0; 2584 return 0;
2627 2585
2628 error_return: 2586error_return_unlocked:
2629 if (tic->t_flags & XLOG_TIC_IN_Q) 2587 spin_lock(&log->l_grant_reserve_lock);
2630 xlog_del_ticketq(&log->l_reserve_headq, tic); 2588error_return:
2631 2589 list_del_init(&tic->t_queue);
2590 spin_unlock(&log->l_grant_reserve_lock);
2632 trace_xfs_log_grant_error(log, tic); 2591 trace_xfs_log_grant_error(log, tic);
2633 2592
2634 /* 2593 /*
@@ -2638,7 +2597,6 @@ redo:
2638 */ 2597 */
2639 tic->t_curr_res = 0; 2598 tic->t_curr_res = 0;
2640 tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ 2599 tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
2641 spin_unlock(&log->l_grant_lock);
2642 return XFS_ERROR(EIO); 2600 return XFS_ERROR(EIO);
2643} /* xlog_grant_log_space */ 2601} /* xlog_grant_log_space */
2644 2602
@@ -2646,17 +2604,14 @@ redo:
2646/* 2604/*
2647 * Replenish the byte reservation required by moving the grant write head. 2605 * Replenish the byte reservation required by moving the grant write head.
2648 * 2606 *
2649 * 2607 * Similar to xlog_grant_log_space, the function is structured to have a lock
2608 * free fast path.
2650 */ 2609 */
2651STATIC int 2610STATIC int
2652xlog_regrant_write_log_space(xlog_t *log, 2611xlog_regrant_write_log_space(xlog_t *log,
2653 xlog_ticket_t *tic) 2612 xlog_ticket_t *tic)
2654{ 2613{
2655 int free_bytes, need_bytes; 2614 int free_bytes, need_bytes;
2656 xlog_ticket_t *ntic;
2657#ifdef DEBUG
2658 xfs_lsn_t tail_lsn;
2659#endif
2660 2615
2661 tic->t_curr_res = tic->t_unit_res; 2616 tic->t_curr_res = tic->t_unit_res;
2662 xlog_tic_reset_res(tic); 2617 xlog_tic_reset_res(tic);
@@ -2669,12 +2624,9 @@ xlog_regrant_write_log_space(xlog_t *log,
2669 panic("regrant Recovery problem"); 2624 panic("regrant Recovery problem");
2670#endif 2625#endif
2671 2626
2672 spin_lock(&log->l_grant_lock);
2673
2674 trace_xfs_log_regrant_write_enter(log, tic); 2627 trace_xfs_log_regrant_write_enter(log, tic);
2675
2676 if (XLOG_FORCED_SHUTDOWN(log)) 2628 if (XLOG_FORCED_SHUTDOWN(log))
2677 goto error_return; 2629 goto error_return_unlocked;
2678 2630
2679 /* If there are other waiters on the queue then give them a 2631 /* If there are other waiters on the queue then give them a
2680 * chance at logspace before us. Wake up the first waiters, 2632 * chance at logspace before us. Wake up the first waiters,
@@ -2683,92 +2635,76 @@ xlog_regrant_write_log_space(xlog_t *log,
2683 * this transaction. 2635 * this transaction.
2684 */ 2636 */
2685 need_bytes = tic->t_unit_res; 2637 need_bytes = tic->t_unit_res;
2686 if ((ntic = log->l_write_headq)) { 2638 if (!list_empty_careful(&log->l_writeq)) {
2687 free_bytes = xlog_space_left(log, log->l_grant_write_cycle, 2639 struct xlog_ticket *ntic;
2688 log->l_grant_write_bytes); 2640
2689 do { 2641 spin_lock(&log->l_grant_write_lock);
2642 free_bytes = xlog_space_left(log, &log->l_grant_write_head);
2643 list_for_each_entry(ntic, &log->l_writeq, t_queue) {
2690 ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV); 2644 ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV);
2691 2645
2692 if (free_bytes < ntic->t_unit_res) 2646 if (free_bytes < ntic->t_unit_res)
2693 break; 2647 break;
2694 free_bytes -= ntic->t_unit_res; 2648 free_bytes -= ntic->t_unit_res;
2695 sv_signal(&ntic->t_wait); 2649 wake_up(&ntic->t_wait);
2696 ntic = ntic->t_next; 2650 }
2697 } while (ntic != log->l_write_headq);
2698
2699 if (ntic != log->l_write_headq) {
2700 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
2701 xlog_ins_ticketq(&log->l_write_headq, tic);
2702 2651
2652 if (ntic != list_first_entry(&log->l_writeq,
2653 struct xlog_ticket, t_queue)) {
2654 if (list_empty(&tic->t_queue))
2655 list_add_tail(&tic->t_queue, &log->l_writeq);
2703 trace_xfs_log_regrant_write_sleep1(log, tic); 2656 trace_xfs_log_regrant_write_sleep1(log, tic);
2704 2657
2705 spin_unlock(&log->l_grant_lock); 2658 xlog_grant_push_ail(log, need_bytes);
2706 xlog_grant_push_ail(log->l_mp, need_bytes);
2707 spin_lock(&log->l_grant_lock);
2708 2659
2709 XFS_STATS_INC(xs_sleep_logspace); 2660 XFS_STATS_INC(xs_sleep_logspace);
2710 sv_wait(&tic->t_wait, PINOD|PLTWAIT, 2661 xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
2711 &log->l_grant_lock, s);
2712
2713 /* If we're shutting down, this tic is already
2714 * off the queue */
2715 spin_lock(&log->l_grant_lock);
2716 if (XLOG_FORCED_SHUTDOWN(log))
2717 goto error_return;
2718
2719 trace_xfs_log_regrant_write_wake1(log, tic); 2662 trace_xfs_log_regrant_write_wake1(log, tic);
2720 } 2663 } else
2664 spin_unlock(&log->l_grant_write_lock);
2721 } 2665 }
2722 2666
2723redo: 2667redo:
2724 if (XLOG_FORCED_SHUTDOWN(log)) 2668 if (XLOG_FORCED_SHUTDOWN(log))
2725 goto error_return; 2669 goto error_return_unlocked;
2726 2670
2727 free_bytes = xlog_space_left(log, log->l_grant_write_cycle, 2671 free_bytes = xlog_space_left(log, &log->l_grant_write_head);
2728 log->l_grant_write_bytes);
2729 if (free_bytes < need_bytes) { 2672 if (free_bytes < need_bytes) {
2730 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) 2673 spin_lock(&log->l_grant_write_lock);
2731 xlog_ins_ticketq(&log->l_write_headq, tic); 2674 if (list_empty(&tic->t_queue))
2732 spin_unlock(&log->l_grant_lock); 2675 list_add_tail(&tic->t_queue, &log->l_writeq);
2733 xlog_grant_push_ail(log->l_mp, need_bytes);
2734 spin_lock(&log->l_grant_lock);
2735
2736 XFS_STATS_INC(xs_sleep_logspace);
2737 trace_xfs_log_regrant_write_sleep2(log, tic);
2738
2739 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
2740 2676
2741 /* If we're shutting down, this tic is already off the queue */
2742 spin_lock(&log->l_grant_lock);
2743 if (XLOG_FORCED_SHUTDOWN(log)) 2677 if (XLOG_FORCED_SHUTDOWN(log))
2744 goto error_return; 2678 goto error_return;
2745 2679
2680 xlog_grant_push_ail(log, need_bytes);
2681
2682 XFS_STATS_INC(xs_sleep_logspace);
2683 trace_xfs_log_regrant_write_sleep2(log, tic);
2684 xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
2685
2746 trace_xfs_log_regrant_write_wake2(log, tic); 2686 trace_xfs_log_regrant_write_wake2(log, tic);
2747 goto redo; 2687 goto redo;
2748 } else if (tic->t_flags & XLOG_TIC_IN_Q) 2688 }
2749 xlog_del_ticketq(&log->l_write_headq, tic);
2750 2689
2751 /* we've got enough space */ 2690 if (!list_empty(&tic->t_queue)) {
2752 xlog_grant_add_space_write(log, need_bytes); 2691 spin_lock(&log->l_grant_write_lock);
2753#ifdef DEBUG 2692 list_del_init(&tic->t_queue);
2754 tail_lsn = log->l_tail_lsn; 2693 spin_unlock(&log->l_grant_write_lock);
2755 if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) {
2756 ASSERT(log->l_grant_write_cycle-1 == CYCLE_LSN(tail_lsn));
2757 ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn)));
2758 } 2694 }
2759#endif
2760 2695
2696 /* we've got enough space */
2697 xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
2761 trace_xfs_log_regrant_write_exit(log, tic); 2698 trace_xfs_log_regrant_write_exit(log, tic);
2762 2699 xlog_verify_grant_tail(log);
2763 xlog_verify_grant_head(log, 1);
2764 spin_unlock(&log->l_grant_lock);
2765 return 0; 2700 return 0;
2766 2701
2767 2702
2703 error_return_unlocked:
2704 spin_lock(&log->l_grant_write_lock);
2768 error_return: 2705 error_return:
2769 if (tic->t_flags & XLOG_TIC_IN_Q) 2706 list_del_init(&tic->t_queue);
2770 xlog_del_ticketq(&log->l_reserve_headq, tic); 2707 spin_unlock(&log->l_grant_write_lock);
2771
2772 trace_xfs_log_regrant_write_error(log, tic); 2708 trace_xfs_log_regrant_write_error(log, tic);
2773 2709
2774 /* 2710 /*
@@ -2778,7 +2714,6 @@ redo:
2778 */ 2714 */
2779 tic->t_curr_res = 0; 2715 tic->t_curr_res = 0;
2780 tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ 2716 tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
2781 spin_unlock(&log->l_grant_lock);
2782 return XFS_ERROR(EIO); 2717 return XFS_ERROR(EIO);
2783} /* xlog_regrant_write_log_space */ 2718} /* xlog_regrant_write_log_space */
2784 2719
@@ -2799,27 +2734,24 @@ xlog_regrant_reserve_log_space(xlog_t *log,
2799 if (ticket->t_cnt > 0) 2734 if (ticket->t_cnt > 0)
2800 ticket->t_cnt--; 2735 ticket->t_cnt--;
2801 2736
2802 spin_lock(&log->l_grant_lock); 2737 xlog_grant_sub_space(log, &log->l_grant_reserve_head,
2803 xlog_grant_sub_space(log, ticket->t_curr_res); 2738 ticket->t_curr_res);
2739 xlog_grant_sub_space(log, &log->l_grant_write_head,
2740 ticket->t_curr_res);
2804 ticket->t_curr_res = ticket->t_unit_res; 2741 ticket->t_curr_res = ticket->t_unit_res;
2805 xlog_tic_reset_res(ticket); 2742 xlog_tic_reset_res(ticket);
2806 2743
2807 trace_xfs_log_regrant_reserve_sub(log, ticket); 2744 trace_xfs_log_regrant_reserve_sub(log, ticket);
2808 2745
2809 xlog_verify_grant_head(log, 1);
2810
2811 /* just return if we still have some of the pre-reserved space */ 2746 /* just return if we still have some of the pre-reserved space */
2812 if (ticket->t_cnt > 0) { 2747 if (ticket->t_cnt > 0)
2813 spin_unlock(&log->l_grant_lock);
2814 return; 2748 return;
2815 }
2816 2749
2817 xlog_grant_add_space_reserve(log, ticket->t_unit_res); 2750 xlog_grant_add_space(log, &log->l_grant_reserve_head,
2751 ticket->t_unit_res);
2818 2752
2819 trace_xfs_log_regrant_reserve_exit(log, ticket); 2753 trace_xfs_log_regrant_reserve_exit(log, ticket);
2820 2754
2821 xlog_verify_grant_head(log, 0);
2822 spin_unlock(&log->l_grant_lock);
2823 ticket->t_curr_res = ticket->t_unit_res; 2755 ticket->t_curr_res = ticket->t_unit_res;
2824 xlog_tic_reset_res(ticket); 2756 xlog_tic_reset_res(ticket);
2825} /* xlog_regrant_reserve_log_space */ 2757} /* xlog_regrant_reserve_log_space */
@@ -2843,28 +2775,29 @@ STATIC void
2843xlog_ungrant_log_space(xlog_t *log, 2775xlog_ungrant_log_space(xlog_t *log,
2844 xlog_ticket_t *ticket) 2776 xlog_ticket_t *ticket)
2845{ 2777{
2778 int bytes;
2779
2846 if (ticket->t_cnt > 0) 2780 if (ticket->t_cnt > 0)
2847 ticket->t_cnt--; 2781 ticket->t_cnt--;
2848 2782
2849 spin_lock(&log->l_grant_lock);
2850 trace_xfs_log_ungrant_enter(log, ticket); 2783 trace_xfs_log_ungrant_enter(log, ticket);
2851
2852 xlog_grant_sub_space(log, ticket->t_curr_res);
2853
2854 trace_xfs_log_ungrant_sub(log, ticket); 2784 trace_xfs_log_ungrant_sub(log, ticket);
2855 2785
2856 /* If this is a permanent reservation ticket, we may be able to free 2786 /*
2787 * If this is a permanent reservation ticket, we may be able to free
2857 * up more space based on the remaining count. 2788 * up more space based on the remaining count.
2858 */ 2789 */
2790 bytes = ticket->t_curr_res;
2859 if (ticket->t_cnt > 0) { 2791 if (ticket->t_cnt > 0) {
2860 ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV); 2792 ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV);
2861 xlog_grant_sub_space(log, ticket->t_unit_res*ticket->t_cnt); 2793 bytes += ticket->t_unit_res*ticket->t_cnt;
2862 } 2794 }
2863 2795
2796 xlog_grant_sub_space(log, &log->l_grant_reserve_head, bytes);
2797 xlog_grant_sub_space(log, &log->l_grant_write_head, bytes);
2798
2864 trace_xfs_log_ungrant_exit(log, ticket); 2799 trace_xfs_log_ungrant_exit(log, ticket);
2865 2800
2866 xlog_verify_grant_head(log, 1);
2867 spin_unlock(&log->l_grant_lock);
2868 xfs_log_move_tail(log->l_mp, 1); 2801 xfs_log_move_tail(log->l_mp, 1);
2869} /* xlog_ungrant_log_space */ 2802} /* xlog_ungrant_log_space */
2870 2803
@@ -2901,11 +2834,11 @@ xlog_state_release_iclog(
2901 2834
2902 if (iclog->ic_state == XLOG_STATE_WANT_SYNC) { 2835 if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
2903 /* update tail before writing to iclog */ 2836 /* update tail before writing to iclog */
2904 xlog_assign_tail_lsn(log->l_mp); 2837 xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp);
2905 sync++; 2838 sync++;
2906 iclog->ic_state = XLOG_STATE_SYNCING; 2839 iclog->ic_state = XLOG_STATE_SYNCING;
2907 iclog->ic_header.h_tail_lsn = cpu_to_be64(log->l_tail_lsn); 2840 iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
2908 xlog_verify_tail_lsn(log, iclog, log->l_tail_lsn); 2841 xlog_verify_tail_lsn(log, iclog, tail_lsn);
2909 /* cycle incremented when incrementing curr_block */ 2842 /* cycle incremented when incrementing curr_block */
2910 } 2843 }
2911 spin_unlock(&log->l_icloglock); 2844 spin_unlock(&log->l_icloglock);
@@ -3088,7 +3021,7 @@ maybe_sleep:
3088 return XFS_ERROR(EIO); 3021 return XFS_ERROR(EIO);
3089 } 3022 }
3090 XFS_STATS_INC(xs_log_force_sleep); 3023 XFS_STATS_INC(xs_log_force_sleep);
3091 sv_wait(&iclog->ic_force_wait, PINOD, &log->l_icloglock, s); 3024 xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
3092 /* 3025 /*
3093 * No need to grab the log lock here since we're 3026 * No need to grab the log lock here since we're
3094 * only deciding whether or not to return EIO 3027 * only deciding whether or not to return EIO
@@ -3206,8 +3139,8 @@ try_again:
3206 3139
3207 XFS_STATS_INC(xs_log_force_sleep); 3140 XFS_STATS_INC(xs_log_force_sleep);
3208 3141
3209 sv_wait(&iclog->ic_prev->ic_write_wait, 3142 xlog_wait(&iclog->ic_prev->ic_write_wait,
3210 PSWP, &log->l_icloglock, s); 3143 &log->l_icloglock);
3211 if (log_flushed) 3144 if (log_flushed)
3212 *log_flushed = 1; 3145 *log_flushed = 1;
3213 already_slept = 1; 3146 already_slept = 1;
@@ -3235,7 +3168,7 @@ try_again:
3235 return XFS_ERROR(EIO); 3168 return XFS_ERROR(EIO);
3236 } 3169 }
3237 XFS_STATS_INC(xs_log_force_sleep); 3170 XFS_STATS_INC(xs_log_force_sleep);
3238 sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s); 3171 xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
3239 /* 3172 /*
3240 * No need to grab the log lock here since we're 3173 * No need to grab the log lock here since we're
3241 * only deciding whether or not to return EIO 3174 * only deciding whether or not to return EIO
@@ -3310,10 +3243,8 @@ xfs_log_ticket_put(
3310 xlog_ticket_t *ticket) 3243 xlog_ticket_t *ticket)
3311{ 3244{
3312 ASSERT(atomic_read(&ticket->t_ref) > 0); 3245 ASSERT(atomic_read(&ticket->t_ref) > 0);
3313 if (atomic_dec_and_test(&ticket->t_ref)) { 3246 if (atomic_dec_and_test(&ticket->t_ref))
3314 sv_destroy(&ticket->t_wait);
3315 kmem_zone_free(xfs_log_ticket_zone, ticket); 3247 kmem_zone_free(xfs_log_ticket_zone, ticket);
3316 }
3317} 3248}
3318 3249
3319xlog_ticket_t * 3250xlog_ticket_t *
@@ -3435,6 +3366,7 @@ xlog_ticket_alloc(
3435 } 3366 }
3436 3367
3437 atomic_set(&tic->t_ref, 1); 3368 atomic_set(&tic->t_ref, 1);
3369 INIT_LIST_HEAD(&tic->t_queue);
3438 tic->t_unit_res = unit_bytes; 3370 tic->t_unit_res = unit_bytes;
3439 tic->t_curr_res = unit_bytes; 3371 tic->t_curr_res = unit_bytes;
3440 tic->t_cnt = cnt; 3372 tic->t_cnt = cnt;
@@ -3445,7 +3377,7 @@ xlog_ticket_alloc(
3445 tic->t_trans_type = 0; 3377 tic->t_trans_type = 0;
3446 if (xflags & XFS_LOG_PERM_RESERV) 3378 if (xflags & XFS_LOG_PERM_RESERV)
3447 tic->t_flags |= XLOG_TIC_PERM_RESERV; 3379 tic->t_flags |= XLOG_TIC_PERM_RESERV;
3448 sv_init(&tic->t_wait, SV_DEFAULT, "logtick"); 3380 init_waitqueue_head(&tic->t_wait);
3449 3381
3450 xlog_tic_reset_res(tic); 3382 xlog_tic_reset_res(tic);
3451 3383
@@ -3484,18 +3416,25 @@ xlog_verify_dest_ptr(
3484} 3416}
3485 3417
3486STATIC void 3418STATIC void
3487xlog_verify_grant_head(xlog_t *log, int equals) 3419xlog_verify_grant_tail(
3420 struct log *log)
3488{ 3421{
3489 if (log->l_grant_reserve_cycle == log->l_grant_write_cycle) { 3422 int tail_cycle, tail_blocks;
3490 if (equals) 3423 int cycle, space;
3491 ASSERT(log->l_grant_reserve_bytes >= log->l_grant_write_bytes); 3424
3492 else 3425 /*
3493 ASSERT(log->l_grant_reserve_bytes > log->l_grant_write_bytes); 3426 * Check to make sure the grant write head didn't just over lap the
3494 } else { 3427 * tail. If the cycles are the same, we can't be overlapping.
3495 ASSERT(log->l_grant_reserve_cycle-1 == log->l_grant_write_cycle); 3428 * Otherwise, make sure that the cycles differ by exactly one and
3496 ASSERT(log->l_grant_write_bytes >= log->l_grant_reserve_bytes); 3429 * check the byte count.
3497 } 3430 */
3498} /* xlog_verify_grant_head */ 3431 xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space);
3432 xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks);
3433 if (tail_cycle != cycle) {
3434 ASSERT(cycle - 1 == tail_cycle);
3435 ASSERT(space <= BBTOB(tail_blocks));
3436 }
3437}
3499 3438
3500/* check if it will fit */ 3439/* check if it will fit */
3501STATIC void 3440STATIC void
@@ -3716,12 +3655,10 @@ xfs_log_force_umount(
3716 xlog_cil_force(log); 3655 xlog_cil_force(log);
3717 3656
3718 /* 3657 /*
3719 * We must hold both the GRANT lock and the LOG lock, 3658 * mark the filesystem and the as in a shutdown state and wake
3720 * before we mark the filesystem SHUTDOWN and wake 3659 * everybody up to tell them the bad news.
3721 * everybody up to tell the bad news.
3722 */ 3660 */
3723 spin_lock(&log->l_icloglock); 3661 spin_lock(&log->l_icloglock);
3724 spin_lock(&log->l_grant_lock);
3725 mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; 3662 mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
3726 if (mp->m_sb_bp) 3663 if (mp->m_sb_bp)
3727 XFS_BUF_DONE(mp->m_sb_bp); 3664 XFS_BUF_DONE(mp->m_sb_bp);
@@ -3742,27 +3679,21 @@ xfs_log_force_umount(
3742 spin_unlock(&log->l_icloglock); 3679 spin_unlock(&log->l_icloglock);
3743 3680
3744 /* 3681 /*
3745 * We don't want anybody waiting for log reservations 3682 * We don't want anybody waiting for log reservations after this. That
3746 * after this. That means we have to wake up everybody 3683 * means we have to wake up everybody queued up on reserveq as well as
3747 * queued up on reserve_headq as well as write_headq. 3684 * writeq. In addition, we make sure in xlog_{re}grant_log_space that
3748 * In addition, we make sure in xlog_{re}grant_log_space 3685 * we don't enqueue anything once the SHUTDOWN flag is set, and this
3749 * that we don't enqueue anything once the SHUTDOWN flag 3686 * action is protected by the grant locks.
3750 * is set, and this action is protected by the GRANTLOCK.
3751 */ 3687 */
3752 if ((tic = log->l_reserve_headq)) { 3688 spin_lock(&log->l_grant_reserve_lock);
3753 do { 3689 list_for_each_entry(tic, &log->l_reserveq, t_queue)
3754 sv_signal(&tic->t_wait); 3690 wake_up(&tic->t_wait);
3755 tic = tic->t_next; 3691 spin_unlock(&log->l_grant_reserve_lock);
3756 } while (tic != log->l_reserve_headq); 3692
3757 } 3693 spin_lock(&log->l_grant_write_lock);
3758 3694 list_for_each_entry(tic, &log->l_writeq, t_queue)
3759 if ((tic = log->l_write_headq)) { 3695 wake_up(&tic->t_wait);
3760 do { 3696 spin_unlock(&log->l_grant_write_lock);
3761 sv_signal(&tic->t_wait);
3762 tic = tic->t_next;
3763 } while (tic != log->l_write_headq);
3764 }
3765 spin_unlock(&log->l_grant_lock);
3766 3697
3767 if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { 3698 if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) {
3768 ASSERT(!logerror); 3699 ASSERT(!logerror);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 23d6ceb5e97b..9dc8125d04e5 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -61,7 +61,7 @@ xlog_cil_init(
61 INIT_LIST_HEAD(&cil->xc_committing); 61 INIT_LIST_HEAD(&cil->xc_committing);
62 spin_lock_init(&cil->xc_cil_lock); 62 spin_lock_init(&cil->xc_cil_lock);
63 init_rwsem(&cil->xc_ctx_lock); 63 init_rwsem(&cil->xc_ctx_lock);
64 sv_init(&cil->xc_commit_wait, SV_DEFAULT, "cilwait"); 64 init_waitqueue_head(&cil->xc_commit_wait);
65 65
66 INIT_LIST_HEAD(&ctx->committing); 66 INIT_LIST_HEAD(&ctx->committing);
67 INIT_LIST_HEAD(&ctx->busy_extents); 67 INIT_LIST_HEAD(&ctx->busy_extents);
@@ -361,15 +361,10 @@ xlog_cil_committed(
361 int abort) 361 int abort)
362{ 362{
363 struct xfs_cil_ctx *ctx = args; 363 struct xfs_cil_ctx *ctx = args;
364 struct xfs_log_vec *lv;
365 int abortflag = abort ? XFS_LI_ABORTED : 0;
366 struct xfs_busy_extent *busyp, *n; 364 struct xfs_busy_extent *busyp, *n;
367 365
368 /* unpin all the log items */ 366 xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain,
369 for (lv = ctx->lv_chain; lv; lv = lv->lv_next ) { 367 ctx->start_lsn, abort);
370 xfs_trans_item_committed(lv->lv_item, ctx->start_lsn,
371 abortflag);
372 }
373 368
374 list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list) 369 list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list)
375 xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp); 370 xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp);
@@ -568,7 +563,7 @@ restart:
568 * It is still being pushed! Wait for the push to 563 * It is still being pushed! Wait for the push to
569 * complete, then start again from the beginning. 564 * complete, then start again from the beginning.
570 */ 565 */
571 sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); 566 xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock);
572 goto restart; 567 goto restart;
573 } 568 }
574 } 569 }
@@ -592,7 +587,7 @@ restart:
592 */ 587 */
593 spin_lock(&cil->xc_cil_lock); 588 spin_lock(&cil->xc_cil_lock);
594 ctx->commit_lsn = commit_lsn; 589 ctx->commit_lsn = commit_lsn;
595 sv_broadcast(&cil->xc_commit_wait); 590 wake_up_all(&cil->xc_commit_wait);
596 spin_unlock(&cil->xc_cil_lock); 591 spin_unlock(&cil->xc_cil_lock);
597 592
598 /* release the hounds! */ 593 /* release the hounds! */
@@ -757,7 +752,7 @@ restart:
757 * It is still being pushed! Wait for the push to 752 * It is still being pushed! Wait for the push to
758 * complete, then start again from the beginning. 753 * complete, then start again from the beginning.
759 */ 754 */
760 sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); 755 xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock);
761 goto restart; 756 goto restart;
762 } 757 }
763 if (ctx->sequence != sequence) 758 if (ctx->sequence != sequence)
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index edcdfe01617f..d5f8be8f4bf6 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -21,7 +21,6 @@
21struct xfs_buf; 21struct xfs_buf;
22struct log; 22struct log;
23struct xlog_ticket; 23struct xlog_ticket;
24struct xfs_buf_cancel;
25struct xfs_mount; 24struct xfs_mount;
26 25
27/* 26/*
@@ -54,7 +53,6 @@ struct xfs_mount;
54 BTOBB(XLOG_MAX_ICLOGS << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \ 53 BTOBB(XLOG_MAX_ICLOGS << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \
55 XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT)) 54 XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT))
56 55
57
58static inline xfs_lsn_t xlog_assign_lsn(uint cycle, uint block) 56static inline xfs_lsn_t xlog_assign_lsn(uint cycle, uint block)
59{ 57{
60 return ((xfs_lsn_t)cycle << 32) | block; 58 return ((xfs_lsn_t)cycle << 32) | block;
@@ -133,12 +131,10 @@ static inline uint xlog_get_client_id(__be32 i)
133 */ 131 */
134#define XLOG_TIC_INITED 0x1 /* has been initialized */ 132#define XLOG_TIC_INITED 0x1 /* has been initialized */
135#define XLOG_TIC_PERM_RESERV 0x2 /* permanent reservation */ 133#define XLOG_TIC_PERM_RESERV 0x2 /* permanent reservation */
136#define XLOG_TIC_IN_Q 0x4
137 134
138#define XLOG_TIC_FLAGS \ 135#define XLOG_TIC_FLAGS \
139 { XLOG_TIC_INITED, "XLOG_TIC_INITED" }, \ 136 { XLOG_TIC_INITED, "XLOG_TIC_INITED" }, \
140 { XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" }, \ 137 { XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" }
141 { XLOG_TIC_IN_Q, "XLOG_TIC_IN_Q" }
142 138
143#endif /* __KERNEL__ */ 139#endif /* __KERNEL__ */
144 140
@@ -244,9 +240,8 @@ typedef struct xlog_res {
244} xlog_res_t; 240} xlog_res_t;
245 241
246typedef struct xlog_ticket { 242typedef struct xlog_ticket {
247 sv_t t_wait; /* ticket wait queue : 20 */ 243 wait_queue_head_t t_wait; /* ticket wait queue */
248 struct xlog_ticket *t_next; /* :4|8 */ 244 struct list_head t_queue; /* reserve/write queue */
249 struct xlog_ticket *t_prev; /* :4|8 */
250 xlog_tid_t t_tid; /* transaction identifier : 4 */ 245 xlog_tid_t t_tid; /* transaction identifier : 4 */
251 atomic_t t_ref; /* ticket reference count : 4 */ 246 atomic_t t_ref; /* ticket reference count : 4 */
252 int t_curr_res; /* current reservation in bytes : 4 */ 247 int t_curr_res; /* current reservation in bytes : 4 */
@@ -353,8 +348,8 @@ typedef union xlog_in_core2 {
353 * and move everything else out to subsequent cachelines. 348 * and move everything else out to subsequent cachelines.
354 */ 349 */
355typedef struct xlog_in_core { 350typedef struct xlog_in_core {
356 sv_t ic_force_wait; 351 wait_queue_head_t ic_force_wait;
357 sv_t ic_write_wait; 352 wait_queue_head_t ic_write_wait;
358 struct xlog_in_core *ic_next; 353 struct xlog_in_core *ic_next;
359 struct xlog_in_core *ic_prev; 354 struct xlog_in_core *ic_prev;
360 struct xfs_buf *ic_bp; 355 struct xfs_buf *ic_bp;
@@ -421,7 +416,7 @@ struct xfs_cil {
421 struct xfs_cil_ctx *xc_ctx; 416 struct xfs_cil_ctx *xc_ctx;
422 struct rw_semaphore xc_ctx_lock; 417 struct rw_semaphore xc_ctx_lock;
423 struct list_head xc_committing; 418 struct list_head xc_committing;
424 sv_t xc_commit_wait; 419 wait_queue_head_t xc_commit_wait;
425 xfs_lsn_t xc_current_sequence; 420 xfs_lsn_t xc_current_sequence;
426}; 421};
427 422
@@ -491,7 +486,7 @@ typedef struct log {
491 struct xfs_buftarg *l_targ; /* buftarg of log */ 486 struct xfs_buftarg *l_targ; /* buftarg of log */
492 uint l_flags; 487 uint l_flags;
493 uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */ 488 uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */
494 struct xfs_buf_cancel **l_buf_cancel_table; 489 struct list_head *l_buf_cancel_table;
495 int l_iclog_hsize; /* size of iclog header */ 490 int l_iclog_hsize; /* size of iclog header */
496 int l_iclog_heads; /* # of iclog header sectors */ 491 int l_iclog_heads; /* # of iclog header sectors */
497 uint l_sectBBsize; /* sector size in BBs (2^n) */ 492 uint l_sectBBsize; /* sector size in BBs (2^n) */
@@ -503,29 +498,40 @@ typedef struct log {
503 int l_logBBsize; /* size of log in BB chunks */ 498 int l_logBBsize; /* size of log in BB chunks */
504 499
505 /* The following block of fields are changed while holding icloglock */ 500 /* The following block of fields are changed while holding icloglock */
506 sv_t l_flush_wait ____cacheline_aligned_in_smp; 501 wait_queue_head_t l_flush_wait ____cacheline_aligned_in_smp;
507 /* waiting for iclog flush */ 502 /* waiting for iclog flush */
508 int l_covered_state;/* state of "covering disk 503 int l_covered_state;/* state of "covering disk
509 * log entries" */ 504 * log entries" */
510 xlog_in_core_t *l_iclog; /* head log queue */ 505 xlog_in_core_t *l_iclog; /* head log queue */
511 spinlock_t l_icloglock; /* grab to change iclog state */ 506 spinlock_t l_icloglock; /* grab to change iclog state */
512 xfs_lsn_t l_tail_lsn; /* lsn of 1st LR with unflushed
513 * buffers */
514 xfs_lsn_t l_last_sync_lsn;/* lsn of last LR on disk */
515 int l_curr_cycle; /* Cycle number of log writes */ 507 int l_curr_cycle; /* Cycle number of log writes */
516 int l_prev_cycle; /* Cycle number before last 508 int l_prev_cycle; /* Cycle number before last
517 * block increment */ 509 * block increment */
518 int l_curr_block; /* current logical log block */ 510 int l_curr_block; /* current logical log block */
519 int l_prev_block; /* previous logical log block */ 511 int l_prev_block; /* previous logical log block */
520 512
521 /* The following block of fields are changed while holding grant_lock */ 513 /*
522 spinlock_t l_grant_lock ____cacheline_aligned_in_smp; 514 * l_last_sync_lsn and l_tail_lsn are atomics so they can be set and
523 xlog_ticket_t *l_reserve_headq; 515 * read without needing to hold specific locks. To avoid operations
524 xlog_ticket_t *l_write_headq; 516 * contending with other hot objects, place each of them on a separate
525 int l_grant_reserve_cycle; 517 * cacheline.
526 int l_grant_reserve_bytes; 518 */
527 int l_grant_write_cycle; 519 /* lsn of last LR on disk */
528 int l_grant_write_bytes; 520 atomic64_t l_last_sync_lsn ____cacheline_aligned_in_smp;
521 /* lsn of 1st LR with unflushed * buffers */
522 atomic64_t l_tail_lsn ____cacheline_aligned_in_smp;
523
524 /*
525 * ticket grant locks, queues and accounting have their own cachlines
526 * as these are quite hot and can be operated on concurrently.
527 */
528 spinlock_t l_grant_reserve_lock ____cacheline_aligned_in_smp;
529 struct list_head l_reserveq;
530 atomic64_t l_grant_reserve_head;
531
532 spinlock_t l_grant_write_lock ____cacheline_aligned_in_smp;
533 struct list_head l_writeq;
534 atomic64_t l_grant_write_head;
529 535
530 /* The following field are used for debugging; need to hold icloglock */ 536 /* The following field are used for debugging; need to hold icloglock */
531#ifdef DEBUG 537#ifdef DEBUG
@@ -534,6 +540,9 @@ typedef struct log {
534 540
535} xlog_t; 541} xlog_t;
536 542
543#define XLOG_BUF_CANCEL_BUCKET(log, blkno) \
544 ((log)->l_buf_cancel_table + ((__uint64_t)blkno % XLOG_BC_TABLE_SIZE))
545
537#define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) 546#define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR)
538 547
539/* common routines */ 548/* common routines */
@@ -562,6 +571,61 @@ int xlog_write(struct log *log, struct xfs_log_vec *log_vector,
562 xlog_in_core_t **commit_iclog, uint flags); 571 xlog_in_core_t **commit_iclog, uint flags);
563 572
564/* 573/*
574 * When we crack an atomic LSN, we sample it first so that the value will not
575 * change while we are cracking it into the component values. This means we
576 * will always get consistent component values to work from. This should always
577 * be used to smaple and crack LSNs taht are stored and updated in atomic
578 * variables.
579 */
580static inline void
581xlog_crack_atomic_lsn(atomic64_t *lsn, uint *cycle, uint *block)
582{
583 xfs_lsn_t val = atomic64_read(lsn);
584
585 *cycle = CYCLE_LSN(val);
586 *block = BLOCK_LSN(val);
587}
588
589/*
590 * Calculate and assign a value to an atomic LSN variable from component pieces.
591 */
592static inline void
593xlog_assign_atomic_lsn(atomic64_t *lsn, uint cycle, uint block)
594{
595 atomic64_set(lsn, xlog_assign_lsn(cycle, block));
596}
597
598/*
599 * When we crack the grant head, we sample it first so that the value will not
600 * change while we are cracking it into the component values. This means we
601 * will always get consistent component values to work from.
602 */
603static inline void
604xlog_crack_grant_head_val(int64_t val, int *cycle, int *space)
605{
606 *cycle = val >> 32;
607 *space = val & 0xffffffff;
608}
609
610static inline void
611xlog_crack_grant_head(atomic64_t *head, int *cycle, int *space)
612{
613 xlog_crack_grant_head_val(atomic64_read(head), cycle, space);
614}
615
616static inline int64_t
617xlog_assign_grant_head_val(int cycle, int space)
618{
619 return ((int64_t)cycle << 32) | space;
620}
621
622static inline void
623xlog_assign_grant_head(atomic64_t *head, int cycle, int space)
624{
625 atomic64_set(head, xlog_assign_grant_head_val(cycle, space));
626}
627
628/*
565 * Committed Item List interfaces 629 * Committed Item List interfaces
566 */ 630 */
567int xlog_cil_init(struct log *log); 631int xlog_cil_init(struct log *log);
@@ -585,6 +649,21 @@ xlog_cil_force(struct log *log)
585 */ 649 */
586#define XLOG_UNMOUNT_REC_TYPE (-1U) 650#define XLOG_UNMOUNT_REC_TYPE (-1U)
587 651
652/*
653 * Wrapper function for waiting on a wait queue serialised against wakeups
654 * by a spinlock. This matches the semantics of all the wait queues used in the
655 * log code.
656 */
657static inline void xlog_wait(wait_queue_head_t *wq, spinlock_t *lock)
658{
659 DECLARE_WAITQUEUE(wait, current);
660
661 add_wait_queue_exclusive(wq, &wait);
662 __set_current_state(TASK_UNINTERRUPTIBLE);
663 spin_unlock(lock);
664 schedule();
665 remove_wait_queue(wq, &wait);
666}
588#endif /* __KERNEL__ */ 667#endif /* __KERNEL__ */
589 668
590#endif /* __XFS_LOG_PRIV_H__ */ 669#endif /* __XFS_LOG_PRIV_H__ */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 966d3f97458c..204d8e5fa7fa 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -53,6 +53,17 @@ STATIC void xlog_recover_check_summary(xlog_t *);
53#endif 53#endif
54 54
55/* 55/*
56 * This structure is used during recovery to record the buf log items which
57 * have been canceled and should not be replayed.
58 */
59struct xfs_buf_cancel {
60 xfs_daddr_t bc_blkno;
61 uint bc_len;
62 int bc_refcount;
63 struct list_head bc_list;
64};
65
66/*
56 * Sector aligned buffer routines for buffer create/read/write/access 67 * Sector aligned buffer routines for buffer create/read/write/access
57 */ 68 */
58 69
@@ -925,12 +936,12 @@ xlog_find_tail(
925 log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); 936 log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
926 if (found == 2) 937 if (found == 2)
927 log->l_curr_cycle++; 938 log->l_curr_cycle++;
928 log->l_tail_lsn = be64_to_cpu(rhead->h_tail_lsn); 939 atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
929 log->l_last_sync_lsn = be64_to_cpu(rhead->h_lsn); 940 atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
930 log->l_grant_reserve_cycle = log->l_curr_cycle; 941 xlog_assign_grant_head(&log->l_grant_reserve_head, log->l_curr_cycle,
931 log->l_grant_reserve_bytes = BBTOB(log->l_curr_block); 942 BBTOB(log->l_curr_block));
932 log->l_grant_write_cycle = log->l_curr_cycle; 943 xlog_assign_grant_head(&log->l_grant_write_head, log->l_curr_cycle,
933 log->l_grant_write_bytes = BBTOB(log->l_curr_block); 944 BBTOB(log->l_curr_block));
934 945
935 /* 946 /*
936 * Look for unmount record. If we find it, then we know there 947 * Look for unmount record. If we find it, then we know there
@@ -960,7 +971,7 @@ xlog_find_tail(
960 } 971 }
961 after_umount_blk = (i + hblks + (int) 972 after_umount_blk = (i + hblks + (int)
962 BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize; 973 BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize;
963 tail_lsn = log->l_tail_lsn; 974 tail_lsn = atomic64_read(&log->l_tail_lsn);
964 if (*head_blk == after_umount_blk && 975 if (*head_blk == after_umount_blk &&
965 be32_to_cpu(rhead->h_num_logops) == 1) { 976 be32_to_cpu(rhead->h_num_logops) == 1) {
966 umount_data_blk = (i + hblks) % log->l_logBBsize; 977 umount_data_blk = (i + hblks) % log->l_logBBsize;
@@ -975,12 +986,10 @@ xlog_find_tail(
975 * log records will point recovery to after the 986 * log records will point recovery to after the
976 * current unmount record. 987 * current unmount record.
977 */ 988 */
978 log->l_tail_lsn = 989 xlog_assign_atomic_lsn(&log->l_tail_lsn,
979 xlog_assign_lsn(log->l_curr_cycle, 990 log->l_curr_cycle, after_umount_blk);
980 after_umount_blk); 991 xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
981 log->l_last_sync_lsn = 992 log->l_curr_cycle, after_umount_blk);
982 xlog_assign_lsn(log->l_curr_cycle,
983 after_umount_blk);
984 *tail_blk = after_umount_blk; 993 *tail_blk = after_umount_blk;
985 994
986 /* 995 /*
@@ -1605,82 +1614,45 @@ xlog_recover_reorder_trans(
1605 * record in the table to tell us how many times we expect to see this 1614 * record in the table to tell us how many times we expect to see this
1606 * record during the second pass. 1615 * record during the second pass.
1607 */ 1616 */
1608STATIC void 1617STATIC int
1609xlog_recover_do_buffer_pass1( 1618xlog_recover_buffer_pass1(
1610 xlog_t *log, 1619 struct log *log,
1611 xfs_buf_log_format_t *buf_f) 1620 xlog_recover_item_t *item)
1612{ 1621{
1613 xfs_buf_cancel_t *bcp; 1622 xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr;
1614 xfs_buf_cancel_t *nextp; 1623 struct list_head *bucket;
1615 xfs_buf_cancel_t *prevp; 1624 struct xfs_buf_cancel *bcp;
1616 xfs_buf_cancel_t **bucket;
1617 xfs_daddr_t blkno = 0;
1618 uint len = 0;
1619 ushort flags = 0;
1620
1621 switch (buf_f->blf_type) {
1622 case XFS_LI_BUF:
1623 blkno = buf_f->blf_blkno;
1624 len = buf_f->blf_len;
1625 flags = buf_f->blf_flags;
1626 break;
1627 }
1628 1625
1629 /* 1626 /*
1630 * If this isn't a cancel buffer item, then just return. 1627 * If this isn't a cancel buffer item, then just return.
1631 */ 1628 */
1632 if (!(flags & XFS_BLF_CANCEL)) { 1629 if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) {
1633 trace_xfs_log_recover_buf_not_cancel(log, buf_f); 1630 trace_xfs_log_recover_buf_not_cancel(log, buf_f);
1634 return; 1631 return 0;
1635 }
1636
1637 /*
1638 * Insert an xfs_buf_cancel record into the hash table of
1639 * them. If there is already an identical record, bump
1640 * its reference count.
1641 */
1642 bucket = &log->l_buf_cancel_table[(__uint64_t)blkno %
1643 XLOG_BC_TABLE_SIZE];
1644 /*
1645 * If the hash bucket is empty then just insert a new record into
1646 * the bucket.
1647 */
1648 if (*bucket == NULL) {
1649 bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t),
1650 KM_SLEEP);
1651 bcp->bc_blkno = blkno;
1652 bcp->bc_len = len;
1653 bcp->bc_refcount = 1;
1654 bcp->bc_next = NULL;
1655 *bucket = bcp;
1656 return;
1657 } 1632 }
1658 1633
1659 /* 1634 /*
1660 * The hash bucket is not empty, so search for duplicates of our 1635 * Insert an xfs_buf_cancel record into the hash table of them.
1661 * record. If we find one them just bump its refcount. If not 1636 * If there is already an identical record, bump its reference count.
1662 * then add us at the end of the list.
1663 */ 1637 */
1664 prevp = NULL; 1638 bucket = XLOG_BUF_CANCEL_BUCKET(log, buf_f->blf_blkno);
1665 nextp = *bucket; 1639 list_for_each_entry(bcp, bucket, bc_list) {
1666 while (nextp != NULL) { 1640 if (bcp->bc_blkno == buf_f->blf_blkno &&
1667 if (nextp->bc_blkno == blkno && nextp->bc_len == len) { 1641 bcp->bc_len == buf_f->blf_len) {
1668 nextp->bc_refcount++; 1642 bcp->bc_refcount++;
1669 trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f); 1643 trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f);
1670 return; 1644 return 0;
1671 } 1645 }
1672 prevp = nextp; 1646 }
1673 nextp = nextp->bc_next; 1647
1674 } 1648 bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), KM_SLEEP);
1675 ASSERT(prevp != NULL); 1649 bcp->bc_blkno = buf_f->blf_blkno;
1676 bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t), 1650 bcp->bc_len = buf_f->blf_len;
1677 KM_SLEEP);
1678 bcp->bc_blkno = blkno;
1679 bcp->bc_len = len;
1680 bcp->bc_refcount = 1; 1651 bcp->bc_refcount = 1;
1681 bcp->bc_next = NULL; 1652 list_add_tail(&bcp->bc_list, bucket);
1682 prevp->bc_next = bcp; 1653
1683 trace_xfs_log_recover_buf_cancel_add(log, buf_f); 1654 trace_xfs_log_recover_buf_cancel_add(log, buf_f);
1655 return 0;
1684} 1656}
1685 1657
1686/* 1658/*
@@ -1698,14 +1670,13 @@ xlog_recover_do_buffer_pass1(
1698 */ 1670 */
1699STATIC int 1671STATIC int
1700xlog_check_buffer_cancelled( 1672xlog_check_buffer_cancelled(
1701 xlog_t *log, 1673 struct log *log,
1702 xfs_daddr_t blkno, 1674 xfs_daddr_t blkno,
1703 uint len, 1675 uint len,
1704 ushort flags) 1676 ushort flags)
1705{ 1677{
1706 xfs_buf_cancel_t *bcp; 1678 struct list_head *bucket;
1707 xfs_buf_cancel_t *prevp; 1679 struct xfs_buf_cancel *bcp;
1708 xfs_buf_cancel_t **bucket;
1709 1680
1710 if (log->l_buf_cancel_table == NULL) { 1681 if (log->l_buf_cancel_table == NULL) {
1711 /* 1682 /*
@@ -1716,128 +1687,70 @@ xlog_check_buffer_cancelled(
1716 return 0; 1687 return 0;
1717 } 1688 }
1718 1689
1719 bucket = &log->l_buf_cancel_table[(__uint64_t)blkno %
1720 XLOG_BC_TABLE_SIZE];
1721 bcp = *bucket;
1722 if (bcp == NULL) {
1723 /*
1724 * There is no corresponding entry in the table built
1725 * in pass one, so this buffer has not been cancelled.
1726 */
1727 ASSERT(!(flags & XFS_BLF_CANCEL));
1728 return 0;
1729 }
1730
1731 /* 1690 /*
1732 * Search for an entry in the buffer cancel table that 1691 * Search for an entry in the cancel table that matches our buffer.
1733 * matches our buffer.
1734 */ 1692 */
1735 prevp = NULL; 1693 bucket = XLOG_BUF_CANCEL_BUCKET(log, blkno);
1736 while (bcp != NULL) { 1694 list_for_each_entry(bcp, bucket, bc_list) {
1737 if (bcp->bc_blkno == blkno && bcp->bc_len == len) { 1695 if (bcp->bc_blkno == blkno && bcp->bc_len == len)
1738 /* 1696 goto found;
1739 * We've go a match, so return 1 so that the
1740 * recovery of this buffer is cancelled.
1741 * If this buffer is actually a buffer cancel
1742 * log item, then decrement the refcount on the
1743 * one in the table and remove it if this is the
1744 * last reference.
1745 */
1746 if (flags & XFS_BLF_CANCEL) {
1747 bcp->bc_refcount--;
1748 if (bcp->bc_refcount == 0) {
1749 if (prevp == NULL) {
1750 *bucket = bcp->bc_next;
1751 } else {
1752 prevp->bc_next = bcp->bc_next;
1753 }
1754 kmem_free(bcp);
1755 }
1756 }
1757 return 1;
1758 }
1759 prevp = bcp;
1760 bcp = bcp->bc_next;
1761 } 1697 }
1698
1762 /* 1699 /*
1763 * We didn't find a corresponding entry in the table, so 1700 * We didn't find a corresponding entry in the table, so return 0 so
1764 * return 0 so that the buffer is NOT cancelled. 1701 * that the buffer is NOT cancelled.
1765 */ 1702 */
1766 ASSERT(!(flags & XFS_BLF_CANCEL)); 1703 ASSERT(!(flags & XFS_BLF_CANCEL));
1767 return 0; 1704 return 0;
1768}
1769 1705
1770STATIC int 1706found:
1771xlog_recover_do_buffer_pass2( 1707 /*
1772 xlog_t *log, 1708 * We've go a match, so return 1 so that the recovery of this buffer
1773 xfs_buf_log_format_t *buf_f) 1709 * is cancelled. If this buffer is actually a buffer cancel log
1774{ 1710 * item, then decrement the refcount on the one in the table and
1775 xfs_daddr_t blkno = 0; 1711 * remove it if this is the last reference.
1776 ushort flags = 0; 1712 */
1777 uint len = 0; 1713 if (flags & XFS_BLF_CANCEL) {
1778 1714 if (--bcp->bc_refcount == 0) {
1779 switch (buf_f->blf_type) { 1715 list_del(&bcp->bc_list);
1780 case XFS_LI_BUF: 1716 kmem_free(bcp);
1781 blkno = buf_f->blf_blkno; 1717 }
1782 flags = buf_f->blf_flags;
1783 len = buf_f->blf_len;
1784 break;
1785 } 1718 }
1786 1719 return 1;
1787 return xlog_check_buffer_cancelled(log, blkno, len, flags);
1788} 1720}
1789 1721
1790/* 1722/*
1791 * Perform recovery for a buffer full of inodes. In these buffers, 1723 * Perform recovery for a buffer full of inodes. In these buffers, the only
1792 * the only data which should be recovered is that which corresponds 1724 * data which should be recovered is that which corresponds to the
1793 * to the di_next_unlinked pointers in the on disk inode structures. 1725 * di_next_unlinked pointers in the on disk inode structures. The rest of the
1794 * The rest of the data for the inodes is always logged through the 1726 * data for the inodes is always logged through the inodes themselves rather
1795 * inodes themselves rather than the inode buffer and is recovered 1727 * than the inode buffer and is recovered in xlog_recover_inode_pass2().
1796 * in xlog_recover_do_inode_trans().
1797 * 1728 *
1798 * The only time when buffers full of inodes are fully recovered is 1729 * The only time when buffers full of inodes are fully recovered is when the
1799 * when the buffer is full of newly allocated inodes. In this case 1730 * buffer is full of newly allocated inodes. In this case the buffer will
1800 * the buffer will not be marked as an inode buffer and so will be 1731 * not be marked as an inode buffer and so will be sent to
1801 * sent to xlog_recover_do_reg_buffer() below during recovery. 1732 * xlog_recover_do_reg_buffer() below during recovery.
1802 */ 1733 */
1803STATIC int 1734STATIC int
1804xlog_recover_do_inode_buffer( 1735xlog_recover_do_inode_buffer(
1805 xfs_mount_t *mp, 1736 struct xfs_mount *mp,
1806 xlog_recover_item_t *item, 1737 xlog_recover_item_t *item,
1807 xfs_buf_t *bp, 1738 struct xfs_buf *bp,
1808 xfs_buf_log_format_t *buf_f) 1739 xfs_buf_log_format_t *buf_f)
1809{ 1740{
1810 int i; 1741 int i;
1811 int item_index; 1742 int item_index = 0;
1812 int bit; 1743 int bit = 0;
1813 int nbits; 1744 int nbits = 0;
1814 int reg_buf_offset; 1745 int reg_buf_offset = 0;
1815 int reg_buf_bytes; 1746 int reg_buf_bytes = 0;
1816 int next_unlinked_offset; 1747 int next_unlinked_offset;
1817 int inodes_per_buf; 1748 int inodes_per_buf;
1818 xfs_agino_t *logged_nextp; 1749 xfs_agino_t *logged_nextp;
1819 xfs_agino_t *buffer_nextp; 1750 xfs_agino_t *buffer_nextp;
1820 unsigned int *data_map = NULL;
1821 unsigned int map_size = 0;
1822 1751
1823 trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); 1752 trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
1824 1753
1825 switch (buf_f->blf_type) {
1826 case XFS_LI_BUF:
1827 data_map = buf_f->blf_data_map;
1828 map_size = buf_f->blf_map_size;
1829 break;
1830 }
1831 /*
1832 * Set the variables corresponding to the current region to
1833 * 0 so that we'll initialize them on the first pass through
1834 * the loop.
1835 */
1836 reg_buf_offset = 0;
1837 reg_buf_bytes = 0;
1838 bit = 0;
1839 nbits = 0;
1840 item_index = 0;
1841 inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog; 1754 inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog;
1842 for (i = 0; i < inodes_per_buf; i++) { 1755 for (i = 0; i < inodes_per_buf; i++) {
1843 next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + 1756 next_unlinked_offset = (i * mp->m_sb.sb_inodesize) +
@@ -1852,18 +1765,18 @@ xlog_recover_do_inode_buffer(
1852 * the current di_next_unlinked field. 1765 * the current di_next_unlinked field.
1853 */ 1766 */
1854 bit += nbits; 1767 bit += nbits;
1855 bit = xfs_next_bit(data_map, map_size, bit); 1768 bit = xfs_next_bit(buf_f->blf_data_map,
1769 buf_f->blf_map_size, bit);
1856 1770
1857 /* 1771 /*
1858 * If there are no more logged regions in the 1772 * If there are no more logged regions in the
1859 * buffer, then we're done. 1773 * buffer, then we're done.
1860 */ 1774 */
1861 if (bit == -1) { 1775 if (bit == -1)
1862 return 0; 1776 return 0;
1863 }
1864 1777
1865 nbits = xfs_contig_bits(data_map, map_size, 1778 nbits = xfs_contig_bits(buf_f->blf_data_map,
1866 bit); 1779 buf_f->blf_map_size, bit);
1867 ASSERT(nbits > 0); 1780 ASSERT(nbits > 0);
1868 reg_buf_offset = bit << XFS_BLF_SHIFT; 1781 reg_buf_offset = bit << XFS_BLF_SHIFT;
1869 reg_buf_bytes = nbits << XFS_BLF_SHIFT; 1782 reg_buf_bytes = nbits << XFS_BLF_SHIFT;
@@ -1875,9 +1788,8 @@ xlog_recover_do_inode_buffer(
1875 * di_next_unlinked field, then move on to the next 1788 * di_next_unlinked field, then move on to the next
1876 * di_next_unlinked field. 1789 * di_next_unlinked field.
1877 */ 1790 */
1878 if (next_unlinked_offset < reg_buf_offset) { 1791 if (next_unlinked_offset < reg_buf_offset)
1879 continue; 1792 continue;
1880 }
1881 1793
1882 ASSERT(item->ri_buf[item_index].i_addr != NULL); 1794 ASSERT(item->ri_buf[item_index].i_addr != NULL);
1883 ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); 1795 ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0);
@@ -1913,36 +1825,29 @@ xlog_recover_do_inode_buffer(
1913 * given buffer. The bitmap in the buf log format structure indicates 1825 * given buffer. The bitmap in the buf log format structure indicates
1914 * where to place the logged data. 1826 * where to place the logged data.
1915 */ 1827 */
1916/*ARGSUSED*/
1917STATIC void 1828STATIC void
1918xlog_recover_do_reg_buffer( 1829xlog_recover_do_reg_buffer(
1919 struct xfs_mount *mp, 1830 struct xfs_mount *mp,
1920 xlog_recover_item_t *item, 1831 xlog_recover_item_t *item,
1921 xfs_buf_t *bp, 1832 struct xfs_buf *bp,
1922 xfs_buf_log_format_t *buf_f) 1833 xfs_buf_log_format_t *buf_f)
1923{ 1834{
1924 int i; 1835 int i;
1925 int bit; 1836 int bit;
1926 int nbits; 1837 int nbits;
1927 unsigned int *data_map = NULL;
1928 unsigned int map_size = 0;
1929 int error; 1838 int error;
1930 1839
1931 trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f); 1840 trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f);
1932 1841
1933 switch (buf_f->blf_type) {
1934 case XFS_LI_BUF:
1935 data_map = buf_f->blf_data_map;
1936 map_size = buf_f->blf_map_size;
1937 break;
1938 }
1939 bit = 0; 1842 bit = 0;
1940 i = 1; /* 0 is the buf format structure */ 1843 i = 1; /* 0 is the buf format structure */
1941 while (1) { 1844 while (1) {
1942 bit = xfs_next_bit(data_map, map_size, bit); 1845 bit = xfs_next_bit(buf_f->blf_data_map,
1846 buf_f->blf_map_size, bit);
1943 if (bit == -1) 1847 if (bit == -1)
1944 break; 1848 break;
1945 nbits = xfs_contig_bits(data_map, map_size, bit); 1849 nbits = xfs_contig_bits(buf_f->blf_data_map,
1850 buf_f->blf_map_size, bit);
1946 ASSERT(nbits > 0); 1851 ASSERT(nbits > 0);
1947 ASSERT(item->ri_buf[i].i_addr != NULL); 1852 ASSERT(item->ri_buf[i].i_addr != NULL);
1948 ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); 1853 ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0);
@@ -2176,77 +2081,46 @@ xlog_recover_do_dquot_buffer(
2176 * for more details on the implementation of the table of cancel records. 2081 * for more details on the implementation of the table of cancel records.
2177 */ 2082 */
2178STATIC int 2083STATIC int
2179xlog_recover_do_buffer_trans( 2084xlog_recover_buffer_pass2(
2180 xlog_t *log, 2085 xlog_t *log,
2181 xlog_recover_item_t *item, 2086 xlog_recover_item_t *item)
2182 int pass)
2183{ 2087{
2184 xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; 2088 xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr;
2185 xfs_mount_t *mp; 2089 xfs_mount_t *mp = log->l_mp;
2186 xfs_buf_t *bp; 2090 xfs_buf_t *bp;
2187 int error; 2091 int error;
2188 int cancel;
2189 xfs_daddr_t blkno;
2190 int len;
2191 ushort flags;
2192 uint buf_flags; 2092 uint buf_flags;
2193 2093
2194 if (pass == XLOG_RECOVER_PASS1) { 2094 /*
2195 /* 2095 * In this pass we only want to recover all the buffers which have
2196 * In this pass we're only looking for buf items 2096 * not been cancelled and are not cancellation buffers themselves.
2197 * with the XFS_BLF_CANCEL bit set. 2097 */
2198 */ 2098 if (xlog_check_buffer_cancelled(log, buf_f->blf_blkno,
2199 xlog_recover_do_buffer_pass1(log, buf_f); 2099 buf_f->blf_len, buf_f->blf_flags)) {
2100 trace_xfs_log_recover_buf_cancel(log, buf_f);
2200 return 0; 2101 return 0;
2201 } else {
2202 /*
2203 * In this pass we want to recover all the buffers
2204 * which have not been cancelled and are not
2205 * cancellation buffers themselves. The routine
2206 * we call here will tell us whether or not to
2207 * continue with the replay of this buffer.
2208 */
2209 cancel = xlog_recover_do_buffer_pass2(log, buf_f);
2210 if (cancel) {
2211 trace_xfs_log_recover_buf_cancel(log, buf_f);
2212 return 0;
2213 }
2214 } 2102 }
2103
2215 trace_xfs_log_recover_buf_recover(log, buf_f); 2104 trace_xfs_log_recover_buf_recover(log, buf_f);
2216 switch (buf_f->blf_type) {
2217 case XFS_LI_BUF:
2218 blkno = buf_f->blf_blkno;
2219 len = buf_f->blf_len;
2220 flags = buf_f->blf_flags;
2221 break;
2222 default:
2223 xfs_fs_cmn_err(CE_ALERT, log->l_mp,
2224 "xfs_log_recover: unknown buffer type 0x%x, logdev %s",
2225 buf_f->blf_type, log->l_mp->m_logname ?
2226 log->l_mp->m_logname : "internal");
2227 XFS_ERROR_REPORT("xlog_recover_do_buffer_trans",
2228 XFS_ERRLEVEL_LOW, log->l_mp);
2229 return XFS_ERROR(EFSCORRUPTED);
2230 }
2231 2105
2232 mp = log->l_mp;
2233 buf_flags = XBF_LOCK; 2106 buf_flags = XBF_LOCK;
2234 if (!(flags & XFS_BLF_INODE_BUF)) 2107 if (!(buf_f->blf_flags & XFS_BLF_INODE_BUF))
2235 buf_flags |= XBF_MAPPED; 2108 buf_flags |= XBF_MAPPED;
2236 2109
2237 bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags); 2110 bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len,
2111 buf_flags);
2238 if (XFS_BUF_ISERROR(bp)) { 2112 if (XFS_BUF_ISERROR(bp)) {
2239 xfs_ioerror_alert("xlog_recover_do..(read#1)", log->l_mp, 2113 xfs_ioerror_alert("xlog_recover_do..(read#1)", mp,
2240 bp, blkno); 2114 bp, buf_f->blf_blkno);
2241 error = XFS_BUF_GETERROR(bp); 2115 error = XFS_BUF_GETERROR(bp);
2242 xfs_buf_relse(bp); 2116 xfs_buf_relse(bp);
2243 return error; 2117 return error;
2244 } 2118 }
2245 2119
2246 error = 0; 2120 error = 0;
2247 if (flags & XFS_BLF_INODE_BUF) { 2121 if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
2248 error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); 2122 error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);
2249 } else if (flags & 2123 } else if (buf_f->blf_flags &
2250 (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { 2124 (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
2251 xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); 2125 xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
2252 } else { 2126 } else {
@@ -2286,16 +2160,14 @@ xlog_recover_do_buffer_trans(
2286} 2160}
2287 2161
2288STATIC int 2162STATIC int
2289xlog_recover_do_inode_trans( 2163xlog_recover_inode_pass2(
2290 xlog_t *log, 2164 xlog_t *log,
2291 xlog_recover_item_t *item, 2165 xlog_recover_item_t *item)
2292 int pass)
2293{ 2166{
2294 xfs_inode_log_format_t *in_f; 2167 xfs_inode_log_format_t *in_f;
2295 xfs_mount_t *mp; 2168 xfs_mount_t *mp = log->l_mp;
2296 xfs_buf_t *bp; 2169 xfs_buf_t *bp;
2297 xfs_dinode_t *dip; 2170 xfs_dinode_t *dip;
2298 xfs_ino_t ino;
2299 int len; 2171 int len;
2300 xfs_caddr_t src; 2172 xfs_caddr_t src;
2301 xfs_caddr_t dest; 2173 xfs_caddr_t dest;
@@ -2305,10 +2177,6 @@ xlog_recover_do_inode_trans(
2305 xfs_icdinode_t *dicp; 2177 xfs_icdinode_t *dicp;
2306 int need_free = 0; 2178 int need_free = 0;
2307 2179
2308 if (pass == XLOG_RECOVER_PASS1) {
2309 return 0;
2310 }
2311
2312 if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) { 2180 if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) {
2313 in_f = item->ri_buf[0].i_addr; 2181 in_f = item->ri_buf[0].i_addr;
2314 } else { 2182 } else {
@@ -2318,8 +2186,6 @@ xlog_recover_do_inode_trans(
2318 if (error) 2186 if (error)
2319 goto error; 2187 goto error;
2320 } 2188 }
2321 ino = in_f->ilf_ino;
2322 mp = log->l_mp;
2323 2189
2324 /* 2190 /*
2325 * Inode buffers can be freed, look out for it, 2191 * Inode buffers can be freed, look out for it,
@@ -2354,8 +2220,8 @@ xlog_recover_do_inode_trans(
2354 xfs_buf_relse(bp); 2220 xfs_buf_relse(bp);
2355 xfs_fs_cmn_err(CE_ALERT, mp, 2221 xfs_fs_cmn_err(CE_ALERT, mp,
2356 "xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld", 2222 "xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld",
2357 dip, bp, ino); 2223 dip, bp, in_f->ilf_ino);
2358 XFS_ERROR_REPORT("xlog_recover_do_inode_trans(1)", 2224 XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)",
2359 XFS_ERRLEVEL_LOW, mp); 2225 XFS_ERRLEVEL_LOW, mp);
2360 error = EFSCORRUPTED; 2226 error = EFSCORRUPTED;
2361 goto error; 2227 goto error;
@@ -2365,8 +2231,8 @@ xlog_recover_do_inode_trans(
2365 xfs_buf_relse(bp); 2231 xfs_buf_relse(bp);
2366 xfs_fs_cmn_err(CE_ALERT, mp, 2232 xfs_fs_cmn_err(CE_ALERT, mp,
2367 "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, ino %Ld", 2233 "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, ino %Ld",
2368 item, ino); 2234 item, in_f->ilf_ino);
2369 XFS_ERROR_REPORT("xlog_recover_do_inode_trans(2)", 2235 XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)",
2370 XFS_ERRLEVEL_LOW, mp); 2236 XFS_ERRLEVEL_LOW, mp);
2371 error = EFSCORRUPTED; 2237 error = EFSCORRUPTED;
2372 goto error; 2238 goto error;
@@ -2394,12 +2260,12 @@ xlog_recover_do_inode_trans(
2394 if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) { 2260 if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) {
2395 if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && 2261 if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
2396 (dicp->di_format != XFS_DINODE_FMT_BTREE)) { 2262 (dicp->di_format != XFS_DINODE_FMT_BTREE)) {
2397 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(3)", 2263 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
2398 XFS_ERRLEVEL_LOW, mp, dicp); 2264 XFS_ERRLEVEL_LOW, mp, dicp);
2399 xfs_buf_relse(bp); 2265 xfs_buf_relse(bp);
2400 xfs_fs_cmn_err(CE_ALERT, mp, 2266 xfs_fs_cmn_err(CE_ALERT, mp,
2401 "xfs_inode_recover: Bad regular inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", 2267 "xfs_inode_recover: Bad regular inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
2402 item, dip, bp, ino); 2268 item, dip, bp, in_f->ilf_ino);
2403 error = EFSCORRUPTED; 2269 error = EFSCORRUPTED;
2404 goto error; 2270 goto error;
2405 } 2271 }
@@ -2407,40 +2273,40 @@ xlog_recover_do_inode_trans(
2407 if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && 2273 if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
2408 (dicp->di_format != XFS_DINODE_FMT_BTREE) && 2274 (dicp->di_format != XFS_DINODE_FMT_BTREE) &&
2409 (dicp->di_format != XFS_DINODE_FMT_LOCAL)) { 2275 (dicp->di_format != XFS_DINODE_FMT_LOCAL)) {
2410 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(4)", 2276 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)",
2411 XFS_ERRLEVEL_LOW, mp, dicp); 2277 XFS_ERRLEVEL_LOW, mp, dicp);
2412 xfs_buf_relse(bp); 2278 xfs_buf_relse(bp);
2413 xfs_fs_cmn_err(CE_ALERT, mp, 2279 xfs_fs_cmn_err(CE_ALERT, mp,
2414 "xfs_inode_recover: Bad dir inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", 2280 "xfs_inode_recover: Bad dir inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
2415 item, dip, bp, ino); 2281 item, dip, bp, in_f->ilf_ino);
2416 error = EFSCORRUPTED; 2282 error = EFSCORRUPTED;
2417 goto error; 2283 goto error;
2418 } 2284 }
2419 } 2285 }
2420 if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){ 2286 if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){
2421 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(5)", 2287 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
2422 XFS_ERRLEVEL_LOW, mp, dicp); 2288 XFS_ERRLEVEL_LOW, mp, dicp);
2423 xfs_buf_relse(bp); 2289 xfs_buf_relse(bp);
2424 xfs_fs_cmn_err(CE_ALERT, mp, 2290 xfs_fs_cmn_err(CE_ALERT, mp,
2425 "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", 2291 "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld",
2426 item, dip, bp, ino, 2292 item, dip, bp, in_f->ilf_ino,
2427 dicp->di_nextents + dicp->di_anextents, 2293 dicp->di_nextents + dicp->di_anextents,
2428 dicp->di_nblocks); 2294 dicp->di_nblocks);
2429 error = EFSCORRUPTED; 2295 error = EFSCORRUPTED;
2430 goto error; 2296 goto error;
2431 } 2297 }
2432 if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) { 2298 if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) {
2433 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(6)", 2299 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
2434 XFS_ERRLEVEL_LOW, mp, dicp); 2300 XFS_ERRLEVEL_LOW, mp, dicp);
2435 xfs_buf_relse(bp); 2301 xfs_buf_relse(bp);
2436 xfs_fs_cmn_err(CE_ALERT, mp, 2302 xfs_fs_cmn_err(CE_ALERT, mp,
2437 "xfs_inode_recover: Bad inode log rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, forkoff 0x%x", 2303 "xfs_inode_recover: Bad inode log rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, forkoff 0x%x",
2438 item, dip, bp, ino, dicp->di_forkoff); 2304 item, dip, bp, in_f->ilf_ino, dicp->di_forkoff);
2439 error = EFSCORRUPTED; 2305 error = EFSCORRUPTED;
2440 goto error; 2306 goto error;
2441 } 2307 }
2442 if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) { 2308 if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) {
2443 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(7)", 2309 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
2444 XFS_ERRLEVEL_LOW, mp, dicp); 2310 XFS_ERRLEVEL_LOW, mp, dicp);
2445 xfs_buf_relse(bp); 2311 xfs_buf_relse(bp);
2446 xfs_fs_cmn_err(CE_ALERT, mp, 2312 xfs_fs_cmn_err(CE_ALERT, mp,
@@ -2532,7 +2398,7 @@ xlog_recover_do_inode_trans(
2532 break; 2398 break;
2533 2399
2534 default: 2400 default:
2535 xlog_warn("XFS: xlog_recover_do_inode_trans: Invalid flag"); 2401 xlog_warn("XFS: xlog_recover_inode_pass2: Invalid flag");
2536 ASSERT(0); 2402 ASSERT(0);
2537 xfs_buf_relse(bp); 2403 xfs_buf_relse(bp);
2538 error = EIO; 2404 error = EIO;
@@ -2556,18 +2422,11 @@ error:
2556 * of that type. 2422 * of that type.
2557 */ 2423 */
2558STATIC int 2424STATIC int
2559xlog_recover_do_quotaoff_trans( 2425xlog_recover_quotaoff_pass1(
2560 xlog_t *log, 2426 xlog_t *log,
2561 xlog_recover_item_t *item, 2427 xlog_recover_item_t *item)
2562 int pass)
2563{ 2428{
2564 xfs_qoff_logformat_t *qoff_f; 2429 xfs_qoff_logformat_t *qoff_f = item->ri_buf[0].i_addr;
2565
2566 if (pass == XLOG_RECOVER_PASS2) {
2567 return (0);
2568 }
2569
2570 qoff_f = item->ri_buf[0].i_addr;
2571 ASSERT(qoff_f); 2430 ASSERT(qoff_f);
2572 2431
2573 /* 2432 /*
@@ -2588,22 +2447,17 @@ xlog_recover_do_quotaoff_trans(
2588 * Recover a dquot record 2447 * Recover a dquot record
2589 */ 2448 */
2590STATIC int 2449STATIC int
2591xlog_recover_do_dquot_trans( 2450xlog_recover_dquot_pass2(
2592 xlog_t *log, 2451 xlog_t *log,
2593 xlog_recover_item_t *item, 2452 xlog_recover_item_t *item)
2594 int pass)
2595{ 2453{
2596 xfs_mount_t *mp; 2454 xfs_mount_t *mp = log->l_mp;
2597 xfs_buf_t *bp; 2455 xfs_buf_t *bp;
2598 struct xfs_disk_dquot *ddq, *recddq; 2456 struct xfs_disk_dquot *ddq, *recddq;
2599 int error; 2457 int error;
2600 xfs_dq_logformat_t *dq_f; 2458 xfs_dq_logformat_t *dq_f;
2601 uint type; 2459 uint type;
2602 2460
2603 if (pass == XLOG_RECOVER_PASS1) {
2604 return 0;
2605 }
2606 mp = log->l_mp;
2607 2461
2608 /* 2462 /*
2609 * Filesystems are required to send in quota flags at mount time. 2463 * Filesystems are required to send in quota flags at mount time.
@@ -2647,7 +2501,7 @@ xlog_recover_do_dquot_trans(
2647 if ((error = xfs_qm_dqcheck(recddq, 2501 if ((error = xfs_qm_dqcheck(recddq,
2648 dq_f->qlf_id, 2502 dq_f->qlf_id,
2649 0, XFS_QMOPT_DOWARN, 2503 0, XFS_QMOPT_DOWARN,
2650 "xlog_recover_do_dquot_trans (log copy)"))) { 2504 "xlog_recover_dquot_pass2 (log copy)"))) {
2651 return XFS_ERROR(EIO); 2505 return XFS_ERROR(EIO);
2652 } 2506 }
2653 ASSERT(dq_f->qlf_len == 1); 2507 ASSERT(dq_f->qlf_len == 1);
@@ -2670,7 +2524,7 @@ xlog_recover_do_dquot_trans(
2670 * minimal initialization then. 2524 * minimal initialization then.
2671 */ 2525 */
2672 if (xfs_qm_dqcheck(ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, 2526 if (xfs_qm_dqcheck(ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
2673 "xlog_recover_do_dquot_trans")) { 2527 "xlog_recover_dquot_pass2")) {
2674 xfs_buf_relse(bp); 2528 xfs_buf_relse(bp);
2675 return XFS_ERROR(EIO); 2529 return XFS_ERROR(EIO);
2676 } 2530 }
@@ -2693,38 +2547,31 @@ xlog_recover_do_dquot_trans(
2693 * LSN. 2547 * LSN.
2694 */ 2548 */
2695STATIC int 2549STATIC int
2696xlog_recover_do_efi_trans( 2550xlog_recover_efi_pass2(
2697 xlog_t *log, 2551 xlog_t *log,
2698 xlog_recover_item_t *item, 2552 xlog_recover_item_t *item,
2699 xfs_lsn_t lsn, 2553 xfs_lsn_t lsn)
2700 int pass)
2701{ 2554{
2702 int error; 2555 int error;
2703 xfs_mount_t *mp; 2556 xfs_mount_t *mp = log->l_mp;
2704 xfs_efi_log_item_t *efip; 2557 xfs_efi_log_item_t *efip;
2705 xfs_efi_log_format_t *efi_formatp; 2558 xfs_efi_log_format_t *efi_formatp;
2706 2559
2707 if (pass == XLOG_RECOVER_PASS1) {
2708 return 0;
2709 }
2710
2711 efi_formatp = item->ri_buf[0].i_addr; 2560 efi_formatp = item->ri_buf[0].i_addr;
2712 2561
2713 mp = log->l_mp;
2714 efip = xfs_efi_init(mp, efi_formatp->efi_nextents); 2562 efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
2715 if ((error = xfs_efi_copy_format(&(item->ri_buf[0]), 2563 if ((error = xfs_efi_copy_format(&(item->ri_buf[0]),
2716 &(efip->efi_format)))) { 2564 &(efip->efi_format)))) {
2717 xfs_efi_item_free(efip); 2565 xfs_efi_item_free(efip);
2718 return error; 2566 return error;
2719 } 2567 }
2720 efip->efi_next_extent = efi_formatp->efi_nextents; 2568 atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents);
2721 efip->efi_flags |= XFS_EFI_COMMITTED;
2722 2569
2723 spin_lock(&log->l_ailp->xa_lock); 2570 spin_lock(&log->l_ailp->xa_lock);
2724 /* 2571 /*
2725 * xfs_trans_ail_update() drops the AIL lock. 2572 * xfs_trans_ail_update() drops the AIL lock.
2726 */ 2573 */
2727 xfs_trans_ail_update(log->l_ailp, (xfs_log_item_t *)efip, lsn); 2574 xfs_trans_ail_update(log->l_ailp, &efip->efi_item, lsn);
2728 return 0; 2575 return 0;
2729} 2576}
2730 2577
@@ -2737,11 +2584,10 @@ xlog_recover_do_efi_trans(
2737 * efd format structure. If we find it, we remove the efi from the 2584 * efd format structure. If we find it, we remove the efi from the
2738 * AIL and free it. 2585 * AIL and free it.
2739 */ 2586 */
2740STATIC void 2587STATIC int
2741xlog_recover_do_efd_trans( 2588xlog_recover_efd_pass2(
2742 xlog_t *log, 2589 xlog_t *log,
2743 xlog_recover_item_t *item, 2590 xlog_recover_item_t *item)
2744 int pass)
2745{ 2591{
2746 xfs_efd_log_format_t *efd_formatp; 2592 xfs_efd_log_format_t *efd_formatp;
2747 xfs_efi_log_item_t *efip = NULL; 2593 xfs_efi_log_item_t *efip = NULL;
@@ -2750,10 +2596,6 @@ xlog_recover_do_efd_trans(
2750 struct xfs_ail_cursor cur; 2596 struct xfs_ail_cursor cur;
2751 struct xfs_ail *ailp = log->l_ailp; 2597 struct xfs_ail *ailp = log->l_ailp;
2752 2598
2753 if (pass == XLOG_RECOVER_PASS1) {
2754 return;
2755 }
2756
2757 efd_formatp = item->ri_buf[0].i_addr; 2599 efd_formatp = item->ri_buf[0].i_addr;
2758 ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + 2600 ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) +
2759 ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || 2601 ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) ||
@@ -2785,62 +2627,6 @@ xlog_recover_do_efd_trans(
2785 } 2627 }
2786 xfs_trans_ail_cursor_done(ailp, &cur); 2628 xfs_trans_ail_cursor_done(ailp, &cur);
2787 spin_unlock(&ailp->xa_lock); 2629 spin_unlock(&ailp->xa_lock);
2788}
2789
2790/*
2791 * Perform the transaction
2792 *
2793 * If the transaction modifies a buffer or inode, do it now. Otherwise,
2794 * EFIs and EFDs get queued up by adding entries into the AIL for them.
2795 */
2796STATIC int
2797xlog_recover_do_trans(
2798 xlog_t *log,
2799 xlog_recover_t *trans,
2800 int pass)
2801{
2802 int error = 0;
2803 xlog_recover_item_t *item;
2804
2805 error = xlog_recover_reorder_trans(log, trans, pass);
2806 if (error)
2807 return error;
2808
2809 list_for_each_entry(item, &trans->r_itemq, ri_list) {
2810 trace_xfs_log_recover_item_recover(log, trans, item, pass);
2811 switch (ITEM_TYPE(item)) {
2812 case XFS_LI_BUF:
2813 error = xlog_recover_do_buffer_trans(log, item, pass);
2814 break;
2815 case XFS_LI_INODE:
2816 error = xlog_recover_do_inode_trans(log, item, pass);
2817 break;
2818 case XFS_LI_EFI:
2819 error = xlog_recover_do_efi_trans(log, item,
2820 trans->r_lsn, pass);
2821 break;
2822 case XFS_LI_EFD:
2823 xlog_recover_do_efd_trans(log, item, pass);
2824 error = 0;
2825 break;
2826 case XFS_LI_DQUOT:
2827 error = xlog_recover_do_dquot_trans(log, item, pass);
2828 break;
2829 case XFS_LI_QUOTAOFF:
2830 error = xlog_recover_do_quotaoff_trans(log, item,
2831 pass);
2832 break;
2833 default:
2834 xlog_warn(
2835 "XFS: invalid item type (%d) xlog_recover_do_trans", ITEM_TYPE(item));
2836 ASSERT(0);
2837 error = XFS_ERROR(EIO);
2838 break;
2839 }
2840
2841 if (error)
2842 return error;
2843 }
2844 2630
2845 return 0; 2631 return 0;
2846} 2632}
@@ -2852,7 +2638,7 @@ xlog_recover_do_trans(
2852 */ 2638 */
2853STATIC void 2639STATIC void
2854xlog_recover_free_trans( 2640xlog_recover_free_trans(
2855 xlog_recover_t *trans) 2641 struct xlog_recover *trans)
2856{ 2642{
2857 xlog_recover_item_t *item, *n; 2643 xlog_recover_item_t *item, *n;
2858 int i; 2644 int i;
@@ -2871,17 +2657,95 @@ xlog_recover_free_trans(
2871} 2657}
2872 2658
2873STATIC int 2659STATIC int
2660xlog_recover_commit_pass1(
2661 struct log *log,
2662 struct xlog_recover *trans,
2663 xlog_recover_item_t *item)
2664{
2665 trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS1);
2666
2667 switch (ITEM_TYPE(item)) {
2668 case XFS_LI_BUF:
2669 return xlog_recover_buffer_pass1(log, item);
2670 case XFS_LI_QUOTAOFF:
2671 return xlog_recover_quotaoff_pass1(log, item);
2672 case XFS_LI_INODE:
2673 case XFS_LI_EFI:
2674 case XFS_LI_EFD:
2675 case XFS_LI_DQUOT:
2676 /* nothing to do in pass 1 */
2677 return 0;
2678 default:
2679 xlog_warn(
2680 "XFS: invalid item type (%d) xlog_recover_commit_pass1",
2681 ITEM_TYPE(item));
2682 ASSERT(0);
2683 return XFS_ERROR(EIO);
2684 }
2685}
2686
2687STATIC int
2688xlog_recover_commit_pass2(
2689 struct log *log,
2690 struct xlog_recover *trans,
2691 xlog_recover_item_t *item)
2692{
2693 trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2);
2694
2695 switch (ITEM_TYPE(item)) {
2696 case XFS_LI_BUF:
2697 return xlog_recover_buffer_pass2(log, item);
2698 case XFS_LI_INODE:
2699 return xlog_recover_inode_pass2(log, item);
2700 case XFS_LI_EFI:
2701 return xlog_recover_efi_pass2(log, item, trans->r_lsn);
2702 case XFS_LI_EFD:
2703 return xlog_recover_efd_pass2(log, item);
2704 case XFS_LI_DQUOT:
2705 return xlog_recover_dquot_pass2(log, item);
2706 case XFS_LI_QUOTAOFF:
2707 /* nothing to do in pass2 */
2708 return 0;
2709 default:
2710 xlog_warn(
2711 "XFS: invalid item type (%d) xlog_recover_commit_pass2",
2712 ITEM_TYPE(item));
2713 ASSERT(0);
2714 return XFS_ERROR(EIO);
2715 }
2716}
2717
2718/*
2719 * Perform the transaction.
2720 *
2721 * If the transaction modifies a buffer or inode, do it now. Otherwise,
2722 * EFIs and EFDs get queued up by adding entries into the AIL for them.
2723 */
2724STATIC int
2874xlog_recover_commit_trans( 2725xlog_recover_commit_trans(
2875 xlog_t *log, 2726 struct log *log,
2876 xlog_recover_t *trans, 2727 struct xlog_recover *trans,
2877 int pass) 2728 int pass)
2878{ 2729{
2879 int error; 2730 int error = 0;
2731 xlog_recover_item_t *item;
2880 2732
2881 hlist_del(&trans->r_list); 2733 hlist_del(&trans->r_list);
2882 if ((error = xlog_recover_do_trans(log, trans, pass))) 2734
2735 error = xlog_recover_reorder_trans(log, trans, pass);
2736 if (error)
2883 return error; 2737 return error;
2884 xlog_recover_free_trans(trans); /* no error */ 2738
2739 list_for_each_entry(item, &trans->r_itemq, ri_list) {
2740 if (pass == XLOG_RECOVER_PASS1)
2741 error = xlog_recover_commit_pass1(log, trans, item);
2742 else
2743 error = xlog_recover_commit_pass2(log, trans, item);
2744 if (error)
2745 return error;
2746 }
2747
2748 xlog_recover_free_trans(trans);
2885 return 0; 2749 return 0;
2886} 2750}
2887 2751
@@ -3011,7 +2875,7 @@ xlog_recover_process_efi(
3011 xfs_extent_t *extp; 2875 xfs_extent_t *extp;
3012 xfs_fsblock_t startblock_fsb; 2876 xfs_fsblock_t startblock_fsb;
3013 2877
3014 ASSERT(!(efip->efi_flags & XFS_EFI_RECOVERED)); 2878 ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags));
3015 2879
3016 /* 2880 /*
3017 * First check the validity of the extents described by the 2881 * First check the validity of the extents described by the
@@ -3050,7 +2914,7 @@ xlog_recover_process_efi(
3050 extp->ext_len); 2914 extp->ext_len);
3051 } 2915 }
3052 2916
3053 efip->efi_flags |= XFS_EFI_RECOVERED; 2917 set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
3054 error = xfs_trans_commit(tp, 0); 2918 error = xfs_trans_commit(tp, 0);
3055 return error; 2919 return error;
3056 2920
@@ -3107,7 +2971,7 @@ xlog_recover_process_efis(
3107 * Skip EFIs that we've already processed. 2971 * Skip EFIs that we've already processed.
3108 */ 2972 */
3109 efip = (xfs_efi_log_item_t *)lip; 2973 efip = (xfs_efi_log_item_t *)lip;
3110 if (efip->efi_flags & XFS_EFI_RECOVERED) { 2974 if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)) {
3111 lip = xfs_trans_ail_cursor_next(ailp, &cur); 2975 lip = xfs_trans_ail_cursor_next(ailp, &cur);
3112 continue; 2976 continue;
3113 } 2977 }
@@ -3724,7 +3588,7 @@ xlog_do_log_recovery(
3724 xfs_daddr_t head_blk, 3588 xfs_daddr_t head_blk,
3725 xfs_daddr_t tail_blk) 3589 xfs_daddr_t tail_blk)
3726{ 3590{
3727 int error; 3591 int error, i;
3728 3592
3729 ASSERT(head_blk != tail_blk); 3593 ASSERT(head_blk != tail_blk);
3730 3594
@@ -3732,10 +3596,12 @@ xlog_do_log_recovery(
3732 * First do a pass to find all of the cancelled buf log items. 3596 * First do a pass to find all of the cancelled buf log items.
3733 * Store them in the buf_cancel_table for use in the second pass. 3597 * Store them in the buf_cancel_table for use in the second pass.
3734 */ 3598 */
3735 log->l_buf_cancel_table = 3599 log->l_buf_cancel_table = kmem_zalloc(XLOG_BC_TABLE_SIZE *
3736 (xfs_buf_cancel_t **)kmem_zalloc(XLOG_BC_TABLE_SIZE * 3600 sizeof(struct list_head),
3737 sizeof(xfs_buf_cancel_t*),
3738 KM_SLEEP); 3601 KM_SLEEP);
3602 for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
3603 INIT_LIST_HEAD(&log->l_buf_cancel_table[i]);
3604
3739 error = xlog_do_recovery_pass(log, head_blk, tail_blk, 3605 error = xlog_do_recovery_pass(log, head_blk, tail_blk,
3740 XLOG_RECOVER_PASS1); 3606 XLOG_RECOVER_PASS1);
3741 if (error != 0) { 3607 if (error != 0) {
@@ -3754,7 +3620,7 @@ xlog_do_log_recovery(
3754 int i; 3620 int i;
3755 3621
3756 for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) 3622 for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
3757 ASSERT(log->l_buf_cancel_table[i] == NULL); 3623 ASSERT(list_empty(&log->l_buf_cancel_table[i]));
3758 } 3624 }
3759#endif /* DEBUG */ 3625#endif /* DEBUG */
3760 3626
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 19e9dfa1c254..d447aef84bc3 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -472,7 +472,7 @@ xfs_initialize_perag(
472 goto out_unwind; 472 goto out_unwind;
473 pag->pag_agno = index; 473 pag->pag_agno = index;
474 pag->pag_mount = mp; 474 pag->pag_mount = mp;
475 rwlock_init(&pag->pag_ici_lock); 475 spin_lock_init(&pag->pag_ici_lock);
476 mutex_init(&pag->pag_ici_reclaim_lock); 476 mutex_init(&pag->pag_ici_reclaim_lock);
477 INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); 477 INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
478 spin_lock_init(&pag->pag_buf_lock); 478 spin_lock_init(&pag->pag_buf_lock);
@@ -975,6 +975,24 @@ xfs_set_rw_sizes(xfs_mount_t *mp)
975} 975}
976 976
977/* 977/*
978 * precalculate the low space thresholds for dynamic speculative preallocation.
979 */
980void
981xfs_set_low_space_thresholds(
982 struct xfs_mount *mp)
983{
984 int i;
985
986 for (i = 0; i < XFS_LOWSP_MAX; i++) {
987 __uint64_t space = mp->m_sb.sb_dblocks;
988
989 do_div(space, 100);
990 mp->m_low_space[i] = space * (i + 1);
991 }
992}
993
994
995/*
978 * Set whether we're using inode alignment. 996 * Set whether we're using inode alignment.
979 */ 997 */
980STATIC void 998STATIC void
@@ -1196,6 +1214,9 @@ xfs_mountfs(
1196 */ 1214 */
1197 xfs_set_rw_sizes(mp); 1215 xfs_set_rw_sizes(mp);
1198 1216
1217 /* set the low space thresholds for dynamic preallocation */
1218 xfs_set_low_space_thresholds(mp);
1219
1199 /* 1220 /*
1200 * Set the inode cluster size. 1221 * Set the inode cluster size.
1201 * This may still be overridden by the file system 1222 * This may still be overridden by the file system
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 5861b4980740..a62e8971539d 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -103,6 +103,16 @@ extern int xfs_icsb_modify_counters(struct xfs_mount *, xfs_sb_field_t,
103 xfs_mod_incore_sb(mp, field, delta, rsvd) 103 xfs_mod_incore_sb(mp, field, delta, rsvd)
104#endif 104#endif
105 105
106/* dynamic preallocation free space thresholds, 5% down to 1% */
107enum {
108 XFS_LOWSP_1_PCNT = 0,
109 XFS_LOWSP_2_PCNT,
110 XFS_LOWSP_3_PCNT,
111 XFS_LOWSP_4_PCNT,
112 XFS_LOWSP_5_PCNT,
113 XFS_LOWSP_MAX,
114};
115
106typedef struct xfs_mount { 116typedef struct xfs_mount {
107 struct super_block *m_super; 117 struct super_block *m_super;
108 xfs_tid_t m_tid; /* next unused tid for fs */ 118 xfs_tid_t m_tid; /* next unused tid for fs */
@@ -202,6 +212,8 @@ typedef struct xfs_mount {
202 __int64_t m_update_flags; /* sb flags we need to update 212 __int64_t m_update_flags; /* sb flags we need to update
203 on the next remount,rw */ 213 on the next remount,rw */
204 struct shrinker m_inode_shrink; /* inode reclaim shrinker */ 214 struct shrinker m_inode_shrink; /* inode reclaim shrinker */
215 int64_t m_low_space[XFS_LOWSP_MAX];
216 /* low free space thresholds */
205} xfs_mount_t; 217} xfs_mount_t;
206 218
207/* 219/*
@@ -379,6 +391,8 @@ extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
379 391
380extern int xfs_dev_is_read_only(struct xfs_mount *, char *); 392extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
381 393
394extern void xfs_set_low_space_thresholds(struct xfs_mount *);
395
382#endif /* __KERNEL__ */ 396#endif /* __KERNEL__ */
383 397
384extern void xfs_mod_sb(struct xfs_trans *, __int64_t); 398extern void xfs_mod_sb(struct xfs_trans *, __int64_t);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index f6d956b7711e..f80a067a4658 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1350,7 +1350,7 @@ xfs_trans_fill_vecs(
1350 * they could be immediately flushed and we'd have to race with the flusher 1350 * they could be immediately flushed and we'd have to race with the flusher
1351 * trying to pull the item from the AIL as we add it. 1351 * trying to pull the item from the AIL as we add it.
1352 */ 1352 */
1353void 1353static void
1354xfs_trans_item_committed( 1354xfs_trans_item_committed(
1355 struct xfs_log_item *lip, 1355 struct xfs_log_item *lip,
1356 xfs_lsn_t commit_lsn, 1356 xfs_lsn_t commit_lsn,
@@ -1425,6 +1425,83 @@ xfs_trans_committed(
1425 xfs_trans_free(tp); 1425 xfs_trans_free(tp);
1426} 1426}
1427 1427
1428static inline void
1429xfs_log_item_batch_insert(
1430 struct xfs_ail *ailp,
1431 struct xfs_log_item **log_items,
1432 int nr_items,
1433 xfs_lsn_t commit_lsn)
1434{
1435 int i;
1436
1437 spin_lock(&ailp->xa_lock);
1438 /* xfs_trans_ail_update_bulk drops ailp->xa_lock */
1439 xfs_trans_ail_update_bulk(ailp, log_items, nr_items, commit_lsn);
1440
1441 for (i = 0; i < nr_items; i++)
1442 IOP_UNPIN(log_items[i], 0);
1443}
1444
1445/*
1446 * Bulk operation version of xfs_trans_committed that takes a log vector of
1447 * items to insert into the AIL. This uses bulk AIL insertion techniques to
1448 * minimise lock traffic.
1449 */
1450void
1451xfs_trans_committed_bulk(
1452 struct xfs_ail *ailp,
1453 struct xfs_log_vec *log_vector,
1454 xfs_lsn_t commit_lsn,
1455 int aborted)
1456{
1457#define LOG_ITEM_BATCH_SIZE 32
1458 struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE];
1459 struct xfs_log_vec *lv;
1460 int i = 0;
1461
1462 /* unpin all the log items */
1463 for (lv = log_vector; lv; lv = lv->lv_next ) {
1464 struct xfs_log_item *lip = lv->lv_item;
1465 xfs_lsn_t item_lsn;
1466
1467 if (aborted)
1468 lip->li_flags |= XFS_LI_ABORTED;
1469 item_lsn = IOP_COMMITTED(lip, commit_lsn);
1470
1471 /* item_lsn of -1 means the item was freed */
1472 if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
1473 continue;
1474
1475 if (item_lsn != commit_lsn) {
1476
1477 /*
1478 * Not a bulk update option due to unusual item_lsn.
1479 * Push into AIL immediately, rechecking the lsn once
1480 * we have the ail lock. Then unpin the item.
1481 */
1482 spin_lock(&ailp->xa_lock);
1483 if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0)
1484 xfs_trans_ail_update(ailp, lip, item_lsn);
1485 else
1486 spin_unlock(&ailp->xa_lock);
1487 IOP_UNPIN(lip, 0);
1488 continue;
1489 }
1490
1491 /* Item is a candidate for bulk AIL insert. */
1492 log_items[i++] = lv->lv_item;
1493 if (i >= LOG_ITEM_BATCH_SIZE) {
1494 xfs_log_item_batch_insert(ailp, log_items,
1495 LOG_ITEM_BATCH_SIZE, commit_lsn);
1496 i = 0;
1497 }
1498 }
1499
1500 /* make sure we insert the remainder! */
1501 if (i)
1502 xfs_log_item_batch_insert(ailp, log_items, i, commit_lsn);
1503}
1504
1428/* 1505/*
1429 * Called from the trans_commit code when we notice that 1506 * Called from the trans_commit code when we notice that
1430 * the filesystem is in the middle of a forced shutdown. 1507 * the filesystem is in the middle of a forced shutdown.
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 246286b77a86..c2042b736b81 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -294,8 +294,8 @@ struct xfs_log_item_desc {
294#define XFS_ALLOC_BTREE_REF 2 294#define XFS_ALLOC_BTREE_REF 2
295#define XFS_BMAP_BTREE_REF 2 295#define XFS_BMAP_BTREE_REF 2
296#define XFS_DIR_BTREE_REF 2 296#define XFS_DIR_BTREE_REF 2
297#define XFS_INO_REF 2
297#define XFS_ATTR_BTREE_REF 1 298#define XFS_ATTR_BTREE_REF 1
298#define XFS_INO_REF 1
299#define XFS_DQUOT_REF 1 299#define XFS_DQUOT_REF 1
300 300
301#ifdef __KERNEL__ 301#ifdef __KERNEL__
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index dc9069568ff7..c5bbbc45db91 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -28,8 +28,8 @@
28#include "xfs_trans_priv.h" 28#include "xfs_trans_priv.h"
29#include "xfs_error.h" 29#include "xfs_error.h"
30 30
31STATIC void xfs_ail_insert(struct xfs_ail *, xfs_log_item_t *); 31STATIC void xfs_ail_splice(struct xfs_ail *, struct list_head *, xfs_lsn_t);
32STATIC xfs_log_item_t * xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *); 32STATIC void xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *);
33STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *); 33STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *);
34STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *); 34STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *);
35 35
@@ -449,129 +449,152 @@ xfs_trans_unlocked_item(
449 xfs_log_move_tail(ailp->xa_mount, 1); 449 xfs_log_move_tail(ailp->xa_mount, 1);
450} /* xfs_trans_unlocked_item */ 450} /* xfs_trans_unlocked_item */
451 451
452
453/* 452/*
454 * Update the position of the item in the AIL with the new 453 * xfs_trans_ail_update - bulk AIL insertion operation.
455 * lsn. If it is not yet in the AIL, add it. Otherwise, move 454 *
456 * it to its new position by removing it and re-adding it. 455 * @xfs_trans_ail_update takes an array of log items that all need to be
456 * positioned at the same LSN in the AIL. If an item is not in the AIL, it will
457 * be added. Otherwise, it will be repositioned by removing it and re-adding
458 * it to the AIL. If we move the first item in the AIL, update the log tail to
459 * match the new minimum LSN in the AIL.
457 * 460 *
458 * Wakeup anyone with an lsn less than the item's lsn. If the item 461 * This function takes the AIL lock once to execute the update operations on
459 * we move in the AIL is the minimum one, update the tail lsn in the 462 * all the items in the array, and as such should not be called with the AIL
460 * log manager. 463 * lock held. As a result, once we have the AIL lock, we need to check each log
464 * item LSN to confirm it needs to be moved forward in the AIL.
461 * 465 *
462 * This function must be called with the AIL lock held. The lock 466 * To optimise the insert operation, we delete all the items from the AIL in
463 * is dropped before returning. 467 * the first pass, moving them into a temporary list, then splice the temporary
468 * list into the correct position in the AIL. This avoids needing to do an
469 * insert operation on every item.
470 *
471 * This function must be called with the AIL lock held. The lock is dropped
472 * before returning.
464 */ 473 */
465void 474void
466xfs_trans_ail_update( 475xfs_trans_ail_update_bulk(
467 struct xfs_ail *ailp, 476 struct xfs_ail *ailp,
468 xfs_log_item_t *lip, 477 struct xfs_log_item **log_items,
469 xfs_lsn_t lsn) __releases(ailp->xa_lock) 478 int nr_items,
479 xfs_lsn_t lsn) __releases(ailp->xa_lock)
470{ 480{
471 xfs_log_item_t *dlip = NULL; 481 xfs_log_item_t *mlip;
472 xfs_log_item_t *mlip; /* ptr to minimum lip */
473 xfs_lsn_t tail_lsn; 482 xfs_lsn_t tail_lsn;
483 int mlip_changed = 0;
484 int i;
485 LIST_HEAD(tmp);
474 486
475 mlip = xfs_ail_min(ailp); 487 mlip = xfs_ail_min(ailp);
476 488
477 if (lip->li_flags & XFS_LI_IN_AIL) { 489 for (i = 0; i < nr_items; i++) {
478 dlip = xfs_ail_delete(ailp, lip); 490 struct xfs_log_item *lip = log_items[i];
479 ASSERT(dlip == lip); 491 if (lip->li_flags & XFS_LI_IN_AIL) {
480 xfs_trans_ail_cursor_clear(ailp, dlip); 492 /* check if we really need to move the item */
481 } else { 493 if (XFS_LSN_CMP(lsn, lip->li_lsn) <= 0)
482 lip->li_flags |= XFS_LI_IN_AIL; 494 continue;
495
496 xfs_ail_delete(ailp, lip);
497 if (mlip == lip)
498 mlip_changed = 1;
499 } else {
500 lip->li_flags |= XFS_LI_IN_AIL;
501 }
502 lip->li_lsn = lsn;
503 list_add(&lip->li_ail, &tmp);
483 } 504 }
484 505
485 lip->li_lsn = lsn; 506 xfs_ail_splice(ailp, &tmp, lsn);
486 xfs_ail_insert(ailp, lip);
487 507
488 if (mlip == dlip) { 508 if (!mlip_changed) {
489 mlip = xfs_ail_min(ailp);
490 /*
491 * It is not safe to access mlip after the AIL lock is
492 * dropped, so we must get a copy of li_lsn before we do
493 * so. This is especially important on 32-bit platforms
494 * where accessing and updating 64-bit values like li_lsn
495 * is not atomic.
496 */
497 tail_lsn = mlip->li_lsn;
498 spin_unlock(&ailp->xa_lock);
499 xfs_log_move_tail(ailp->xa_mount, tail_lsn);
500 } else {
501 spin_unlock(&ailp->xa_lock); 509 spin_unlock(&ailp->xa_lock);
510 return;
502 } 511 }
503 512
504 513 /*
505} /* xfs_trans_update_ail */ 514 * It is not safe to access mlip after the AIL lock is dropped, so we
515 * must get a copy of li_lsn before we do so. This is especially
516 * important on 32-bit platforms where accessing and updating 64-bit
517 * values like li_lsn is not atomic.
518 */
519 mlip = xfs_ail_min(ailp);
520 tail_lsn = mlip->li_lsn;
521 spin_unlock(&ailp->xa_lock);
522 xfs_log_move_tail(ailp->xa_mount, tail_lsn);
523}
506 524
507/* 525/*
508 * Delete the given item from the AIL. It must already be in 526 * xfs_trans_ail_delete_bulk - remove multiple log items from the AIL
509 * the AIL.
510 * 527 *
511 * Wakeup anyone with an lsn less than item's lsn. If the item 528 * @xfs_trans_ail_delete_bulk takes an array of log items that all need to
512 * we delete in the AIL is the minimum one, update the tail lsn in the 529 * removed from the AIL. The caller is already holding the AIL lock, and done
513 * log manager. 530 * all the checks necessary to ensure the items passed in via @log_items are
531 * ready for deletion. This includes checking that the items are in the AIL.
514 * 532 *
515 * Clear the IN_AIL flag from the item, reset its lsn to 0, and 533 * For each log item to be removed, unlink it from the AIL, clear the IN_AIL
516 * bump the AIL's generation count to indicate that the tree 534 * flag from the item and reset the item's lsn to 0. If we remove the first
517 * has changed. 535 * item in the AIL, update the log tail to match the new minimum LSN in the
536 * AIL.
518 * 537 *
519 * This function must be called with the AIL lock held. The lock 538 * This function will not drop the AIL lock until all items are removed from
520 * is dropped before returning. 539 * the AIL to minimise the amount of lock traffic on the AIL. This does not
540 * greatly increase the AIL hold time, but does significantly reduce the amount
541 * of traffic on the lock, especially during IO completion.
542 *
543 * This function must be called with the AIL lock held. The lock is dropped
544 * before returning.
521 */ 545 */
522void 546void
523xfs_trans_ail_delete( 547xfs_trans_ail_delete_bulk(
524 struct xfs_ail *ailp, 548 struct xfs_ail *ailp,
525 xfs_log_item_t *lip) __releases(ailp->xa_lock) 549 struct xfs_log_item **log_items,
550 int nr_items) __releases(ailp->xa_lock)
526{ 551{
527 xfs_log_item_t *dlip;
528 xfs_log_item_t *mlip; 552 xfs_log_item_t *mlip;
529 xfs_lsn_t tail_lsn; 553 xfs_lsn_t tail_lsn;
554 int mlip_changed = 0;
555 int i;
530 556
531 if (lip->li_flags & XFS_LI_IN_AIL) { 557 mlip = xfs_ail_min(ailp);
532 mlip = xfs_ail_min(ailp);
533 dlip = xfs_ail_delete(ailp, lip);
534 ASSERT(dlip == lip);
535 xfs_trans_ail_cursor_clear(ailp, dlip);
536
537 558
538 lip->li_flags &= ~XFS_LI_IN_AIL; 559 for (i = 0; i < nr_items; i++) {
539 lip->li_lsn = 0; 560 struct xfs_log_item *lip = log_items[i];
561 if (!(lip->li_flags & XFS_LI_IN_AIL)) {
562 struct xfs_mount *mp = ailp->xa_mount;
540 563
541 if (mlip == dlip) {
542 mlip = xfs_ail_min(ailp);
543 /*
544 * It is not safe to access mlip after the AIL lock
545 * is dropped, so we must get a copy of li_lsn
546 * before we do so. This is especially important
547 * on 32-bit platforms where accessing and updating
548 * 64-bit values like li_lsn is not atomic.
549 */
550 tail_lsn = mlip ? mlip->li_lsn : 0;
551 spin_unlock(&ailp->xa_lock);
552 xfs_log_move_tail(ailp->xa_mount, tail_lsn);
553 } else {
554 spin_unlock(&ailp->xa_lock); 564 spin_unlock(&ailp->xa_lock);
565 if (!XFS_FORCED_SHUTDOWN(mp)) {
566 xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp,
567 "%s: attempting to delete a log item that is not in the AIL",
568 __func__);
569 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
570 }
571 return;
555 } 572 }
573
574 xfs_ail_delete(ailp, lip);
575 lip->li_flags &= ~XFS_LI_IN_AIL;
576 lip->li_lsn = 0;
577 if (mlip == lip)
578 mlip_changed = 1;
556 } 579 }
557 else {
558 /*
559 * If the file system is not being shutdown, we are in
560 * serious trouble if we get to this stage.
561 */
562 struct xfs_mount *mp = ailp->xa_mount;
563 580
581 if (!mlip_changed) {
564 spin_unlock(&ailp->xa_lock); 582 spin_unlock(&ailp->xa_lock);
565 if (!XFS_FORCED_SHUTDOWN(mp)) { 583 return;
566 xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp,
567 "%s: attempting to delete a log item that is not in the AIL",
568 __func__);
569 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
570 }
571 } 584 }
572}
573
574 585
586 /*
587 * It is not safe to access mlip after the AIL lock is dropped, so we
588 * must get a copy of li_lsn before we do so. This is especially
589 * important on 32-bit platforms where accessing and updating 64-bit
590 * values like li_lsn is not atomic. It is possible we've emptied the
591 * AIL here, so if that is the case, pass an LSN of 0 to the tail move.
592 */
593 mlip = xfs_ail_min(ailp);
594 tail_lsn = mlip ? mlip->li_lsn : 0;
595 spin_unlock(&ailp->xa_lock);
596 xfs_log_move_tail(ailp->xa_mount, tail_lsn);
597}
575 598
576/* 599/*
577 * The active item list (AIL) is a doubly linked list of log 600 * The active item list (AIL) is a doubly linked list of log
@@ -623,16 +646,13 @@ xfs_trans_ail_destroy(
623} 646}
624 647
625/* 648/*
626 * Insert the given log item into the AIL. 649 * splice the log item list into the AIL at the given LSN.
627 * We almost always insert at the end of the list, so on inserts
628 * we search from the end of the list to find where the
629 * new item belongs.
630 */ 650 */
631STATIC void 651STATIC void
632xfs_ail_insert( 652xfs_ail_splice(
633 struct xfs_ail *ailp, 653 struct xfs_ail *ailp,
634 xfs_log_item_t *lip) 654 struct list_head *list,
635/* ARGSUSED */ 655 xfs_lsn_t lsn)
636{ 656{
637 xfs_log_item_t *next_lip; 657 xfs_log_item_t *next_lip;
638 658
@@ -640,39 +660,33 @@ xfs_ail_insert(
640 * If the list is empty, just insert the item. 660 * If the list is empty, just insert the item.
641 */ 661 */
642 if (list_empty(&ailp->xa_ail)) { 662 if (list_empty(&ailp->xa_ail)) {
643 list_add(&lip->li_ail, &ailp->xa_ail); 663 list_splice(list, &ailp->xa_ail);
644 return; 664 return;
645 } 665 }
646 666
647 list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { 667 list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) {
648 if (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0) 668 if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)
649 break; 669 break;
650 } 670 }
651 671
652 ASSERT((&next_lip->li_ail == &ailp->xa_ail) || 672 ASSERT((&next_lip->li_ail == &ailp->xa_ail) ||
653 (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0)); 673 (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0));
654
655 list_add(&lip->li_ail, &next_lip->li_ail);
656 674
657 xfs_ail_check(ailp, lip); 675 list_splice_init(list, &next_lip->li_ail);
658 return; 676 return;
659} 677}
660 678
661/* 679/*
662 * Delete the given item from the AIL. Return a pointer to the item. 680 * Delete the given item from the AIL. Return a pointer to the item.
663 */ 681 */
664/*ARGSUSED*/ 682STATIC void
665STATIC xfs_log_item_t *
666xfs_ail_delete( 683xfs_ail_delete(
667 struct xfs_ail *ailp, 684 struct xfs_ail *ailp,
668 xfs_log_item_t *lip) 685 xfs_log_item_t *lip)
669/* ARGSUSED */
670{ 686{
671 xfs_ail_check(ailp, lip); 687 xfs_ail_check(ailp, lip);
672
673 list_del(&lip->li_ail); 688 list_del(&lip->li_ail);
674 689 xfs_trans_ail_cursor_clear(ailp, lip);
675 return lip;
676} 690}
677 691
678/* 692/*
@@ -682,7 +696,6 @@ xfs_ail_delete(
682STATIC xfs_log_item_t * 696STATIC xfs_log_item_t *
683xfs_ail_min( 697xfs_ail_min(
684 struct xfs_ail *ailp) 698 struct xfs_ail *ailp)
685/* ARGSUSED */
686{ 699{
687 if (list_empty(&ailp->xa_ail)) 700 if (list_empty(&ailp->xa_ail))
688 return NULL; 701 return NULL;
@@ -699,7 +712,6 @@ STATIC xfs_log_item_t *
699xfs_ail_next( 712xfs_ail_next(
700 struct xfs_ail *ailp, 713 struct xfs_ail *ailp,
701 xfs_log_item_t *lip) 714 xfs_log_item_t *lip)
702/* ARGSUSED */
703{ 715{
704 if (lip->li_ail.next == &ailp->xa_ail) 716 if (lip->li_ail.next == &ailp->xa_ail)
705 return NULL; 717 return NULL;
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c
index f783d5e9fa70..f7590f5badea 100644
--- a/fs/xfs/xfs_trans_extfree.c
+++ b/fs/xfs/xfs_trans_extfree.c
@@ -69,12 +69,16 @@ xfs_trans_log_efi_extent(xfs_trans_t *tp,
69 tp->t_flags |= XFS_TRANS_DIRTY; 69 tp->t_flags |= XFS_TRANS_DIRTY;
70 efip->efi_item.li_desc->lid_flags |= XFS_LID_DIRTY; 70 efip->efi_item.li_desc->lid_flags |= XFS_LID_DIRTY;
71 71
72 next_extent = efip->efi_next_extent; 72 /*
73 * atomic_inc_return gives us the value after the increment;
74 * we want to use it as an array index so we need to subtract 1 from
75 * it.
76 */
77 next_extent = atomic_inc_return(&efip->efi_next_extent) - 1;
73 ASSERT(next_extent < efip->efi_format.efi_nextents); 78 ASSERT(next_extent < efip->efi_format.efi_nextents);
74 extp = &(efip->efi_format.efi_extents[next_extent]); 79 extp = &(efip->efi_format.efi_extents[next_extent]);
75 extp->ext_start = start_block; 80 extp->ext_start = start_block;
76 extp->ext_len = ext_len; 81 extp->ext_len = ext_len;
77 efip->efi_next_extent++;
78} 82}
79 83
80 84
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 62da86c90de5..35162c238fa3 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -22,15 +22,17 @@ struct xfs_log_item;
22struct xfs_log_item_desc; 22struct xfs_log_item_desc;
23struct xfs_mount; 23struct xfs_mount;
24struct xfs_trans; 24struct xfs_trans;
25struct xfs_ail;
26struct xfs_log_vec;
25 27
26void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); 28void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
27void xfs_trans_del_item(struct xfs_log_item *); 29void xfs_trans_del_item(struct xfs_log_item *);
28void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, 30void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn,
29 int flags); 31 int flags);
30void xfs_trans_item_committed(struct xfs_log_item *lip,
31 xfs_lsn_t commit_lsn, int aborted);
32void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); 32void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp);
33 33
34void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv,
35 xfs_lsn_t commit_lsn, int aborted);
34/* 36/*
35 * AIL traversal cursor. 37 * AIL traversal cursor.
36 * 38 *
@@ -73,12 +75,29 @@ struct xfs_ail {
73/* 75/*
74 * From xfs_trans_ail.c 76 * From xfs_trans_ail.c
75 */ 77 */
76void xfs_trans_ail_update(struct xfs_ail *ailp, 78void xfs_trans_ail_update_bulk(struct xfs_ail *ailp,
77 struct xfs_log_item *lip, xfs_lsn_t lsn) 79 struct xfs_log_item **log_items, int nr_items,
78 __releases(ailp->xa_lock); 80 xfs_lsn_t lsn) __releases(ailp->xa_lock);
79void xfs_trans_ail_delete(struct xfs_ail *ailp, 81static inline void
80 struct xfs_log_item *lip) 82xfs_trans_ail_update(
81 __releases(ailp->xa_lock); 83 struct xfs_ail *ailp,
84 struct xfs_log_item *lip,
85 xfs_lsn_t lsn) __releases(ailp->xa_lock)
86{
87 xfs_trans_ail_update_bulk(ailp, &lip, 1, lsn);
88}
89
90void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp,
91 struct xfs_log_item **log_items, int nr_items)
92 __releases(ailp->xa_lock);
93static inline void
94xfs_trans_ail_delete(
95 struct xfs_ail *ailp,
96 xfs_log_item_t *lip) __releases(ailp->xa_lock)
97{
98 xfs_trans_ail_delete_bulk(ailp, &lip, 1);
99}
100
82void xfs_trans_ail_push(struct xfs_ail *, xfs_lsn_t); 101void xfs_trans_ail_push(struct xfs_ail *, xfs_lsn_t);
83void xfs_trans_unlocked_item(struct xfs_ail *, 102void xfs_trans_unlocked_item(struct xfs_ail *,
84 xfs_log_item_t *); 103 xfs_log_item_t *);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 8e4a63c4151a..d8e6f8cd6f0c 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -964,29 +964,48 @@ xfs_release(
964 xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE); 964 xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE);
965 } 965 }
966 966
967 if (ip->i_d.di_nlink != 0) { 967 if (ip->i_d.di_nlink == 0)
968 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 968 return 0;
969 ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
970 ip->i_delayed_blks > 0)) &&
971 (ip->i_df.if_flags & XFS_IFEXTENTS)) &&
972 (!(ip->i_d.di_flags &
973 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
974 969
975 /* 970 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
976 * If we can't get the iolock just skip truncating 971 ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
977 * the blocks past EOF because we could deadlock 972 ip->i_delayed_blks > 0)) &&
978 * with the mmap_sem otherwise. We'll get another 973 (ip->i_df.if_flags & XFS_IFEXTENTS)) &&
979 * chance to drop them once the last reference to 974 (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
980 * the inode is dropped, so we'll never leak blocks
981 * permanently.
982 */
983 error = xfs_free_eofblocks(mp, ip,
984 XFS_FREE_EOF_TRYLOCK);
985 if (error)
986 return error;
987 }
988 }
989 975
976 /*
977 * If we can't get the iolock just skip truncating the blocks
978 * past EOF because we could deadlock with the mmap_sem
979 * otherwise. We'll get another chance to drop them once the
980 * last reference to the inode is dropped, so we'll never leak
981 * blocks permanently.
982 *
983 * Further, check if the inode is being opened, written and
984 * closed frequently and we have delayed allocation blocks
985 * oustanding (e.g. streaming writes from the NFS server),
986 * truncating the blocks past EOF will cause fragmentation to
987 * occur.
988 *
989 * In this case don't do the truncation, either, but we have to
990 * be careful how we detect this case. Blocks beyond EOF show
991 * up as i_delayed_blks even when the inode is clean, so we
992 * need to truncate them away first before checking for a dirty
993 * release. Hence on the first dirty close we will still remove
994 * the speculative allocation, but after that we will leave it
995 * in place.
996 */
997 if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
998 return 0;
999
1000 error = xfs_free_eofblocks(mp, ip,
1001 XFS_FREE_EOF_TRYLOCK);
1002 if (error)
1003 return error;
1004
1005 /* delalloc blocks after truncation means it really is dirty */
1006 if (ip->i_delayed_blks)
1007 xfs_iflags_set(ip, XFS_IDIRTY_RELEASE);
1008 }
990 return 0; 1009 return 0;
991} 1010}
992 1011