aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@zytor.com>2012-05-30 15:11:26 -0400
committerH. Peter Anvin <hpa@zytor.com>2012-05-30 15:11:32 -0400
commitbbd771474ec44b516107685d77e1c80bbe09f141 (patch)
tree0cb15781539a68f27b4ea6c89f827282630cbce6 /fs
parent403e1c5b7495d7b80fae9fc4d0a7a6f5abdc3307 (diff)
parent319b6ffc6df892e4ccffff823cc5521a4a5d2dca (diff)
Merge branch 'x86/trampoline' into x86/urgent
x86/trampoline contains an urgent commit which is necessarily on a newer baseline. Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_inode.c2
-rw-r--r--fs/affs/inode.c2
-rw-r--r--fs/afs/inode.c2
-rw-r--r--fs/autofs4/inode.c2
-rw-r--r--fs/bad_inode.c1
-rw-r--r--fs/bfs/inode.c2
-rw-r--r--fs/binfmt_misc.c2
-rw-r--r--fs/block_dev.c2
-rw-r--r--fs/btrfs/inode.c2
-rw-r--r--fs/cifs/Kconfig20
-rw-r--r--fs/cifs/Makefile4
-rw-r--r--fs/cifs/README5
-rw-r--r--fs/cifs/cifs_debug.c56
-rw-r--r--fs/cifs/cifs_debug.h4
-rw-r--r--fs/cifs/cifsfs.c25
-rw-r--r--fs/cifs/cifsglob.h107
-rw-r--r--fs/cifs/cifsproto.h19
-rw-r--r--fs/cifs/cifssmb.c181
-rw-r--r--fs/cifs/connect.c178
-rw-r--r--fs/cifs/file.c683
-rw-r--r--fs/cifs/ioctl.c8
-rw-r--r--fs/cifs/misc.c66
-rw-r--r--fs/cifs/readdir.c15
-rw-r--r--fs/cifs/smb1ops.c154
-rw-r--r--fs/cifs/smb2ops.c27
-rw-r--r--fs/cifs/transport.c76
-rw-r--r--fs/coda/inode.c2
-rw-r--r--fs/debugfs/file.c128
-rw-r--r--fs/ecryptfs/super.c2
-rw-r--r--fs/exofs/Kbuild2
-rw-r--r--fs/exofs/exofs.h14
-rw-r--r--fs/exofs/inode.c4
-rw-r--r--fs/exofs/super.c16
-rw-r--r--fs/exofs/sys.c200
-rw-r--r--fs/ext2/balloc.c4
-rw-r--r--fs/ext2/ialloc.c2
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext2/super.c18
-rw-r--r--fs/ext2/xattr.c1
-rw-r--r--fs/ext3/dir.c167
-rw-r--r--fs/ext3/ext3.h6
-rw-r--r--fs/ext3/hash.c4
-rw-r--r--fs/ext3/ialloc.c20
-rw-r--r--fs/ext3/inode.c6
-rw-r--r--fs/ext3/super.c6
-rw-r--r--fs/ext4/super.c8
-rw-r--r--fs/fat/inode.c2
-rw-r--r--fs/freevxfs/vxfs_inode.c2
-rw-r--r--fs/fs-writeback.c336
-rw-r--r--fs/fuse/inode.c2
-rw-r--r--fs/gfs2/super.c2
-rw-r--r--fs/hfs/inode.c2
-rw-r--r--fs/hfsplus/super.c2
-rw-r--r--fs/hostfs/hostfs_kern.c2
-rw-r--r--fs/hpfs/inode.c2
-rw-r--r--fs/hppfs/hppfs.c2
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--fs/inode.c15
-rw-r--r--fs/jbd/checkpoint.c23
-rw-r--r--fs/jbd/commit.c21
-rw-r--r--fs/jbd/journal.c206
-rw-r--r--fs/jbd/transaction.c2
-rw-r--r--fs/jffs2/fs.c2
-rw-r--r--fs/jfs/inode.c2
-rw-r--r--fs/logfs/readwrite.c2
-rw-r--r--fs/minix/inode.c2
-rw-r--r--fs/namei.c22
-rw-r--r--fs/ncpfs/inode.c2
-rw-r--r--fs/nfs/Kconfig11
-rw-r--r--fs/nfs/Makefile5
-rw-r--r--fs/nfs/blocklayout/blocklayout.c90
-rw-r--r--fs/nfs/blocklayout/blocklayoutdev.c2
-rw-r--r--fs/nfs/client.c268
-rw-r--r--fs/nfs/delegation.c16
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/dir.c39
-rw-r--r--fs/nfs/direct.c746
-rw-r--r--fs/nfs/file.c8
-rw-r--r--fs/nfs/fscache.c15
-rw-r--r--fs/nfs/fscache.h10
-rw-r--r--fs/nfs/getroot.c85
-rw-r--r--fs/nfs/idmap.c30
-rw-r--r--fs/nfs/inode.c122
-rw-r--r--fs/nfs/internal.h135
-rw-r--r--fs/nfs/namespace.c103
-rw-r--r--fs/nfs/netns.h5
-rw-r--r--fs/nfs/nfs2xdr.c5
-rw-r--r--fs/nfs/nfs3proc.c27
-rw-r--r--fs/nfs/nfs3xdr.c112
-rw-r--r--fs/nfs/nfs4_fs.h23
-rw-r--r--fs/nfs/nfs4filelayout.c688
-rw-r--r--fs/nfs/nfs4filelayout.h63
-rw-r--r--fs/nfs/nfs4filelayoutdev.c102
-rw-r--r--fs/nfs/nfs4namespace.c55
-rw-r--r--fs/nfs/nfs4proc.c537
-rw-r--r--fs/nfs/nfs4renewd.c2
-rw-r--r--fs/nfs/nfs4state.c225
-rw-r--r--fs/nfs/nfs4xdr.c399
-rw-r--r--fs/nfs/objlayout/objio_osd.c18
-rw-r--r--fs/nfs/objlayout/objlayout.c19
-rw-r--r--fs/nfs/pagelist.c61
-rw-r--r--fs/nfs/pnfs.c352
-rw-r--r--fs/nfs/pnfs.h127
-rw-r--r--fs/nfs/proc.c21
-rw-r--r--fs/nfs/read.c437
-rw-r--r--fs/nfs/super.c760
-rw-r--r--fs/nfs/write.c809
-rw-r--r--fs/nilfs2/inode.c4
-rw-r--r--fs/ntfs/inode.c2
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c2
-rw-r--r--fs/ocfs2/inode.c2
-rw-r--r--fs/omfs/inode.c2
-rw-r--r--fs/proc/base.c5
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/task_mmu.c2
-rw-r--r--fs/pstore/inode.c2
-rw-r--r--fs/quota/dquot.c32
-rw-r--r--fs/reiserfs/inode.c4
-rw-r--r--fs/reiserfs/super.c6
-rw-r--r--fs/sysfs/inode.c2
-rw-r--r--fs/sysv/inode.c2
-rw-r--r--fs/ubifs/super.c2
-rw-r--r--fs/udf/inode.c2
-rw-r--r--fs/ufs/inode.c2
-rw-r--r--fs/xfs/xfs_super.c2
125 files changed, 5736 insertions, 3786 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 014c8dd62962..57ccb7537dae 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -448,7 +448,7 @@ void v9fs_evict_inode(struct inode *inode)
448 struct v9fs_inode *v9inode = V9FS_I(inode); 448 struct v9fs_inode *v9inode = V9FS_I(inode);
449 449
450 truncate_inode_pages(inode->i_mapping, 0); 450 truncate_inode_pages(inode->i_mapping, 0);
451 end_writeback(inode); 451 clear_inode(inode);
452 filemap_fdatawrite(inode->i_mapping); 452 filemap_fdatawrite(inode->i_mapping);
453 453
454#ifdef CONFIG_9P_FSCACHE 454#ifdef CONFIG_9P_FSCACHE
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 88a4b0b50058..8bc4a59f4e7e 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -264,7 +264,7 @@ affs_evict_inode(struct inode *inode)
264 } 264 }
265 265
266 invalidate_inode_buffers(inode); 266 invalidate_inode_buffers(inode);
267 end_writeback(inode); 267 clear_inode(inode);
268 affs_free_prealloc(inode); 268 affs_free_prealloc(inode);
269 cache_page = (unsigned long)AFFS_I(inode)->i_lc; 269 cache_page = (unsigned long)AFFS_I(inode)->i_lc;
270 if (cache_page) { 270 if (cache_page) {
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index d890ae3b2ce6..95cffd38239f 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -423,7 +423,7 @@ void afs_evict_inode(struct inode *inode)
423 ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode); 423 ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode);
424 424
425 truncate_inode_pages(&inode->i_data, 0); 425 truncate_inode_pages(&inode->i_data, 0);
426 end_writeback(inode); 426 clear_inode(inode);
427 427
428 afs_give_up_callback(vnode); 428 afs_give_up_callback(vnode);
429 429
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 6e488ebe7784..8a4fed8ead30 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -100,7 +100,7 @@ static int autofs4_show_options(struct seq_file *m, struct dentry *root)
100 100
101static void autofs4_evict_inode(struct inode *inode) 101static void autofs4_evict_inode(struct inode *inode)
102{ 102{
103 end_writeback(inode); 103 clear_inode(inode);
104 kfree(inode->i_private); 104 kfree(inode->i_private);
105} 105}
106 106
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 37268c5bb98b..1b35d6bd06b0 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -292,7 +292,6 @@ static const struct inode_operations bad_inode_ops =
292 .getxattr = bad_inode_getxattr, 292 .getxattr = bad_inode_getxattr,
293 .listxattr = bad_inode_listxattr, 293 .listxattr = bad_inode_listxattr,
294 .removexattr = bad_inode_removexattr, 294 .removexattr = bad_inode_removexattr,
295 /* truncate_range returns void */
296}; 295};
297 296
298 297
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index e23dc7c8b884..9870417c26e7 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -174,7 +174,7 @@ static void bfs_evict_inode(struct inode *inode)
174 174
175 truncate_inode_pages(&inode->i_data, 0); 175 truncate_inode_pages(&inode->i_data, 0);
176 invalidate_inode_buffers(inode); 176 invalidate_inode_buffers(inode);
177 end_writeback(inode); 177 clear_inode(inode);
178 178
179 if (inode->i_nlink) 179 if (inode->i_nlink)
180 return; 180 return;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 613aa0618235..790b3cddca67 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -505,7 +505,7 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode)
505 505
506static void bm_evict_inode(struct inode *inode) 506static void bm_evict_inode(struct inode *inode)
507{ 507{
508 end_writeback(inode); 508 clear_inode(inode);
509 kfree(inode->i_private); 509 kfree(inode->i_private);
510} 510}
511 511
diff --git a/fs/block_dev.c b/fs/block_dev.c
index ba11c30f302d..c2bbe1fb1326 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -487,7 +487,7 @@ static void bdev_evict_inode(struct inode *inode)
487 struct list_head *p; 487 struct list_head *p;
488 truncate_inode_pages(&inode->i_data, 0); 488 truncate_inode_pages(&inode->i_data, 0);
489 invalidate_inode_buffers(inode); /* is it needed here? */ 489 invalidate_inode_buffers(inode); /* is it needed here? */
490 end_writeback(inode); 490 clear_inode(inode);
491 spin_lock(&bdev_lock); 491 spin_lock(&bdev_lock);
492 while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) { 492 while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
493 __bd_forget(list_entry(p, struct inode, i_devices)); 493 __bd_forget(list_entry(p, struct inode, i_devices));
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 61b16c641ce0..ceb7b9c9edcc 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3756,7 +3756,7 @@ void btrfs_evict_inode(struct inode *inode)
3756 btrfs_end_transaction(trans, root); 3756 btrfs_end_transaction(trans, root);
3757 btrfs_btree_balance_dirty(root, nr); 3757 btrfs_btree_balance_dirty(root, nr);
3758no_delete: 3758no_delete:
3759 end_writeback(inode); 3759 clear_inode(inode);
3760 return; 3760 return;
3761} 3761}
3762 3762
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 2b243af70aa3..a08306a8bec9 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -158,3 +158,23 @@ config CIFS_NFSD_EXPORT
158 depends on CIFS && EXPERIMENTAL && BROKEN 158 depends on CIFS && EXPERIMENTAL && BROKEN
159 help 159 help
160 Allows NFS server to export a CIFS mounted share (nfsd over cifs) 160 Allows NFS server to export a CIFS mounted share (nfsd over cifs)
161
162config CIFS_SMB2
163 bool "SMB2 network file system support (EXPERIMENTAL)"
164 depends on EXPERIMENTAL && INET && BROKEN
165 select NLS
166 select KEYS
167 select FSCACHE
168 select DNS_RESOLVER
169
170 help
171 This enables experimental support for the SMB2 (Server Message Block
172 version 2) protocol. The SMB2 protocol is the successor to the
173 popular CIFS and SMB network file sharing protocols. SMB2 is the
174 native file sharing mechanism for recent versions of Windows
175 operating systems (since Vista). SMB2 enablement will eventually
176 allow users better performance, security and features, than would be
177 possible with cifs. Note that smb2 mount options also are simpler
178 (compared to cifs) due to protocol improvements.
179
180 Unless you are a developer or tester, say N.
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 005d524c3a4a..4b4127544349 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -6,7 +6,7 @@ obj-$(CONFIG_CIFS) += cifs.o
6cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \ 6cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
7 link.o misc.o netmisc.o smbencrypt.o transport.o asn1.o \ 7 link.o misc.o netmisc.o smbencrypt.o transport.o asn1.o \
8 cifs_unicode.o nterr.o xattr.o cifsencrypt.o \ 8 cifs_unicode.o nterr.o xattr.o cifsencrypt.o \
9 readdir.o ioctl.o sess.o export.o 9 readdir.o ioctl.o sess.o export.o smb1ops.o
10 10
11cifs-$(CONFIG_CIFS_ACL) += cifsacl.o 11cifs-$(CONFIG_CIFS_ACL) += cifsacl.o
12 12
@@ -15,3 +15,5 @@ cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o
15cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o 15cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o
16 16
17cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o 17cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o
18
19cifs-$(CONFIG_CIFS_SMB2) += smb2ops.o
diff --git a/fs/cifs/README b/fs/cifs/README
index b7d782bab797..22ab7b5b8da7 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -608,11 +608,6 @@ Stats Lists summary resource usage information as well as per
608 in the kernel configuration. 608 in the kernel configuration.
609 609
610Configuration pseudo-files: 610Configuration pseudo-files:
611MultiuserMount If set to one, more than one CIFS session to
612 the same server ip address can be established
613 if more than one uid accesses the same mount
614 point and if the uids user/password mapping
615 information is available. (default is 0)
616PacketSigningEnabled If set to one, cifs packet signing is enabled 611PacketSigningEnabled If set to one, cifs packet signing is enabled
617 and will be used if the server requires 612 and will be used if the server requires
618 it. If set to two, cifs packet signing is 613 it. If set to two, cifs packet signing is
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 270464629416..e8140528ca5c 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -57,19 +57,21 @@ cifs_dump_mem(char *label, void *data, int length)
57 } 57 }
58} 58}
59 59
60#ifdef CONFIG_CIFS_DEBUG2
61void cifs_dump_detail(void *buf) 60void cifs_dump_detail(void *buf)
62{ 61{
62#ifdef CONFIG_CIFS_DEBUG2
63 struct smb_hdr *smb = (struct smb_hdr *)buf; 63 struct smb_hdr *smb = (struct smb_hdr *)buf;
64 64
65 cERROR(1, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d", 65 cERROR(1, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d",
66 smb->Command, smb->Status.CifsError, 66 smb->Command, smb->Status.CifsError,
67 smb->Flags, smb->Flags2, smb->Mid, smb->Pid); 67 smb->Flags, smb->Flags2, smb->Mid, smb->Pid);
68 cERROR(1, "smb buf %p len %d", smb, smbCalcSize(smb)); 68 cERROR(1, "smb buf %p len %d", smb, smbCalcSize(smb));
69#endif /* CONFIG_CIFS_DEBUG2 */
69} 70}
70 71
71void cifs_dump_mids(struct TCP_Server_Info *server) 72void cifs_dump_mids(struct TCP_Server_Info *server)
72{ 73{
74#ifdef CONFIG_CIFS_DEBUG2
73 struct list_head *tmp; 75 struct list_head *tmp;
74 struct mid_q_entry *mid_entry; 76 struct mid_q_entry *mid_entry;
75 77
@@ -102,8 +104,8 @@ void cifs_dump_mids(struct TCP_Server_Info *server)
102 } 104 }
103 } 105 }
104 spin_unlock(&GlobalMid_Lock); 106 spin_unlock(&GlobalMid_Lock);
105}
106#endif /* CONFIG_CIFS_DEBUG2 */ 107#endif /* CONFIG_CIFS_DEBUG2 */
108}
107 109
108#ifdef CONFIG_PROC_FS 110#ifdef CONFIG_PROC_FS
109static int cifs_debug_data_proc_show(struct seq_file *m, void *v) 111static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
@@ -420,7 +422,6 @@ static struct proc_dir_entry *proc_fs_cifs;
420static const struct file_operations cifsFYI_proc_fops; 422static const struct file_operations cifsFYI_proc_fops;
421static const struct file_operations cifs_lookup_cache_proc_fops; 423static const struct file_operations cifs_lookup_cache_proc_fops;
422static const struct file_operations traceSMB_proc_fops; 424static const struct file_operations traceSMB_proc_fops;
423static const struct file_operations cifs_multiuser_mount_proc_fops;
424static const struct file_operations cifs_security_flags_proc_fops; 425static const struct file_operations cifs_security_flags_proc_fops;
425static const struct file_operations cifs_linux_ext_proc_fops; 426static const struct file_operations cifs_linux_ext_proc_fops;
426 427
@@ -440,8 +441,6 @@ cifs_proc_init(void)
440 proc_create("traceSMB", 0, proc_fs_cifs, &traceSMB_proc_fops); 441 proc_create("traceSMB", 0, proc_fs_cifs, &traceSMB_proc_fops);
441 proc_create("LinuxExtensionsEnabled", 0, proc_fs_cifs, 442 proc_create("LinuxExtensionsEnabled", 0, proc_fs_cifs,
442 &cifs_linux_ext_proc_fops); 443 &cifs_linux_ext_proc_fops);
443 proc_create("MultiuserMount", 0, proc_fs_cifs,
444 &cifs_multiuser_mount_proc_fops);
445 proc_create("SecurityFlags", 0, proc_fs_cifs, 444 proc_create("SecurityFlags", 0, proc_fs_cifs,
446 &cifs_security_flags_proc_fops); 445 &cifs_security_flags_proc_fops);
447 proc_create("LookupCacheEnabled", 0, proc_fs_cifs, 446 proc_create("LookupCacheEnabled", 0, proc_fs_cifs,
@@ -460,7 +459,6 @@ cifs_proc_clean(void)
460#ifdef CONFIG_CIFS_STATS 459#ifdef CONFIG_CIFS_STATS
461 remove_proc_entry("Stats", proc_fs_cifs); 460 remove_proc_entry("Stats", proc_fs_cifs);
462#endif 461#endif
463 remove_proc_entry("MultiuserMount", proc_fs_cifs);
464 remove_proc_entry("SecurityFlags", proc_fs_cifs); 462 remove_proc_entry("SecurityFlags", proc_fs_cifs);
465 remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs); 463 remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs);
466 remove_proc_entry("LookupCacheEnabled", proc_fs_cifs); 464 remove_proc_entry("LookupCacheEnabled", proc_fs_cifs);
@@ -617,52 +615,6 @@ static const struct file_operations traceSMB_proc_fops = {
617 .write = traceSMB_proc_write, 615 .write = traceSMB_proc_write,
618}; 616};
619 617
620static int cifs_multiuser_mount_proc_show(struct seq_file *m, void *v)
621{
622 seq_printf(m, "%d\n", multiuser_mount);
623 return 0;
624}
625
626static int cifs_multiuser_mount_proc_open(struct inode *inode, struct file *fh)
627{
628 return single_open(fh, cifs_multiuser_mount_proc_show, NULL);
629}
630
631static ssize_t cifs_multiuser_mount_proc_write(struct file *file,
632 const char __user *buffer, size_t count, loff_t *ppos)
633{
634 char c;
635 int rc;
636 static bool warned;
637
638 rc = get_user(c, buffer);
639 if (rc)
640 return rc;
641 if (c == '0' || c == 'n' || c == 'N')
642 multiuser_mount = 0;
643 else if (c == '1' || c == 'y' || c == 'Y') {
644 multiuser_mount = 1;
645 if (!warned) {
646 warned = true;
647 printk(KERN_WARNING "CIFS VFS: The legacy multiuser "
648 "mount code is scheduled to be deprecated in "
649 "3.5. Please switch to using the multiuser "
650 "mount option.");
651 }
652 }
653
654 return count;
655}
656
657static const struct file_operations cifs_multiuser_mount_proc_fops = {
658 .owner = THIS_MODULE,
659 .open = cifs_multiuser_mount_proc_open,
660 .read = seq_read,
661 .llseek = seq_lseek,
662 .release = single_release,
663 .write = cifs_multiuser_mount_proc_write,
664};
665
666static int cifs_security_flags_proc_show(struct seq_file *m, void *v) 618static int cifs_security_flags_proc_show(struct seq_file *m, void *v)
667{ 619{
668 seq_printf(m, "0x%x\n", global_secflags); 620 seq_printf(m, "0x%x\n", global_secflags);
diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h
index 566e0ae8dc2c..c0c68bb492d7 100644
--- a/fs/cifs/cifs_debug.h
+++ b/fs/cifs/cifs_debug.h
@@ -24,10 +24,10 @@
24#define _H_CIFS_DEBUG 24#define _H_CIFS_DEBUG
25 25
26void cifs_dump_mem(char *label, void *data, int length); 26void cifs_dump_mem(char *label, void *data, int length);
27#ifdef CONFIG_CIFS_DEBUG2
28#define DBG2 2
29void cifs_dump_detail(void *); 27void cifs_dump_detail(void *);
30void cifs_dump_mids(struct TCP_Server_Info *); 28void cifs_dump_mids(struct TCP_Server_Info *);
29#ifdef CONFIG_CIFS_DEBUG2
30#define DBG2 2
31#else 31#else
32#define DBG2 0 32#define DBG2 0
33#endif 33#endif
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 541ef81f6ae8..8b6e344eb0ba 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -56,7 +56,6 @@ int traceSMB = 0;
56bool enable_oplocks = true; 56bool enable_oplocks = true;
57unsigned int linuxExtEnabled = 1; 57unsigned int linuxExtEnabled = 1;
58unsigned int lookupCacheEnabled = 1; 58unsigned int lookupCacheEnabled = 1;
59unsigned int multiuser_mount = 0;
60unsigned int global_secflags = CIFSSEC_DEF; 59unsigned int global_secflags = CIFSSEC_DEF;
61/* unsigned int ntlmv2_support = 0; */ 60/* unsigned int ntlmv2_support = 0; */
62unsigned int sign_CIFS_PDUs = 1; 61unsigned int sign_CIFS_PDUs = 1;
@@ -125,7 +124,7 @@ cifs_read_super(struct super_block *sb)
125 goto out_no_root; 124 goto out_no_root;
126 } 125 }
127 126
128 /* do that *after* d_alloc_root() - we want NULL ->d_op for root here */ 127 /* do that *after* d_make_root() - we want NULL ->d_op for root here */
129 if (cifs_sb_master_tcon(cifs_sb)->nocase) 128 if (cifs_sb_master_tcon(cifs_sb)->nocase)
130 sb->s_d_op = &cifs_ci_dentry_ops; 129 sb->s_d_op = &cifs_ci_dentry_ops;
131 else 130 else
@@ -272,7 +271,7 @@ static void
272cifs_evict_inode(struct inode *inode) 271cifs_evict_inode(struct inode *inode)
273{ 272{
274 truncate_inode_pages(&inode->i_data, 0); 273 truncate_inode_pages(&inode->i_data, 0);
275 end_writeback(inode); 274 clear_inode(inode);
276 cifs_fscache_release_inode_cookie(inode); 275 cifs_fscache_release_inode_cookie(inode);
277} 276}
278 277
@@ -329,6 +328,19 @@ cifs_show_security(struct seq_file *s, struct TCP_Server_Info *server)
329 seq_printf(s, "i"); 328 seq_printf(s, "i");
330} 329}
331 330
331static void
332cifs_show_cache_flavor(struct seq_file *s, struct cifs_sb_info *cifs_sb)
333{
334 seq_printf(s, ",cache=");
335
336 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
337 seq_printf(s, "strict");
338 else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO)
339 seq_printf(s, "none");
340 else
341 seq_printf(s, "loose");
342}
343
332/* 344/*
333 * cifs_show_options() is for displaying mount options in /proc/mounts. 345 * cifs_show_options() is for displaying mount options in /proc/mounts.
334 * Not all settable options are displayed but most of the important 346 * Not all settable options are displayed but most of the important
@@ -342,7 +354,9 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
342 struct sockaddr *srcaddr; 354 struct sockaddr *srcaddr;
343 srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr; 355 srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr;
344 356
357 seq_printf(s, ",vers=%s", tcon->ses->server->vals->version_string);
345 cifs_show_security(s, tcon->ses->server); 358 cifs_show_security(s, tcon->ses->server);
359 cifs_show_cache_flavor(s, cifs_sb);
346 360
347 seq_printf(s, ",unc=%s", tcon->treeName); 361 seq_printf(s, ",unc=%s", tcon->treeName);
348 362
@@ -408,8 +422,6 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
408 seq_printf(s, ",rwpidforward"); 422 seq_printf(s, ",rwpidforward");
409 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) 423 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL)
410 seq_printf(s, ",forcemand"); 424 seq_printf(s, ",forcemand");
411 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO)
412 seq_printf(s, ",directio");
413 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) 425 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
414 seq_printf(s, ",nouser_xattr"); 426 seq_printf(s, ",nouser_xattr");
415 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR) 427 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR)
@@ -432,8 +444,6 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
432 seq_printf(s, ",nostrictsync"); 444 seq_printf(s, ",nostrictsync");
433 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) 445 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
434 seq_printf(s, ",noperm"); 446 seq_printf(s, ",noperm");
435 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
436 seq_printf(s, ",strictcache");
437 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID) 447 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID)
438 seq_printf(s, ",backupuid=%u", cifs_sb->mnt_backupuid); 448 seq_printf(s, ",backupuid=%u", cifs_sb->mnt_backupuid);
439 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPGID) 449 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPGID)
@@ -945,7 +955,6 @@ cifs_init_once(void *inode)
945 struct cifsInodeInfo *cifsi = inode; 955 struct cifsInodeInfo *cifsi = inode;
946 956
947 inode_init_once(&cifsi->vfs_inode); 957 inode_init_once(&cifsi->vfs_inode);
948 INIT_LIST_HEAD(&cifsi->llist);
949 mutex_init(&cifsi->lock_mutex); 958 mutex_init(&cifsi->lock_mutex);
950} 959}
951 960
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 4ff6313f0a91..20350a93ed99 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -43,6 +43,7 @@
43 43
44#define CIFS_MIN_RCV_POOL 4 44#define CIFS_MIN_RCV_POOL 4
45 45
46#define MAX_REOPEN_ATT 5 /* these many maximum attempts to reopen a file */
46/* 47/*
47 * default attribute cache timeout (jiffies) 48 * default attribute cache timeout (jiffies)
48 */ 49 */
@@ -150,6 +151,57 @@ struct cifs_cred {
150 ***************************************************************** 151 *****************************************************************
151 */ 152 */
152 153
154enum smb_version {
155 Smb_1 = 1,
156 Smb_21,
157};
158
159struct mid_q_entry;
160struct TCP_Server_Info;
161struct cifsFileInfo;
162struct cifs_ses;
163
164struct smb_version_operations {
165 int (*send_cancel)(struct TCP_Server_Info *, void *,
166 struct mid_q_entry *);
167 bool (*compare_fids)(struct cifsFileInfo *, struct cifsFileInfo *);
168 /* setup request: allocate mid, sign message */
169 int (*setup_request)(struct cifs_ses *, struct kvec *, unsigned int,
170 struct mid_q_entry **);
171 /* check response: verify signature, map error */
172 int (*check_receive)(struct mid_q_entry *, struct TCP_Server_Info *,
173 bool);
174 void (*add_credits)(struct TCP_Server_Info *, const unsigned int);
175 void (*set_credits)(struct TCP_Server_Info *, const int);
176 int * (*get_credits_field)(struct TCP_Server_Info *);
177 /* data offset from read response message */
178 unsigned int (*read_data_offset)(char *);
179 /* data length from read response message */
180 unsigned int (*read_data_length)(char *);
181 /* map smb to linux error */
182 int (*map_error)(char *, bool);
183 /* find mid corresponding to the response message */
184 struct mid_q_entry * (*find_mid)(struct TCP_Server_Info *, char *);
185 void (*dump_detail)(void *);
186 /* verify the message */
187 int (*check_message)(char *, unsigned int);
188 bool (*is_oplock_break)(char *, struct TCP_Server_Info *);
189};
190
191struct smb_version_values {
192 char *version_string;
193 __u32 large_lock_type;
194 __u32 exclusive_lock_type;
195 __u32 shared_lock_type;
196 __u32 unlock_lock_type;
197 size_t header_size;
198 size_t max_header_size;
199 size_t read_rsp_size;
200};
201
202#define HEADER_SIZE(server) (server->vals->header_size)
203#define MAX_HEADER_SIZE(server) (server->vals->max_header_size)
204
153struct smb_vol { 205struct smb_vol {
154 char *username; 206 char *username;
155 char *password; 207 char *password;
@@ -205,6 +257,8 @@ struct smb_vol {
205 bool sockopt_tcp_nodelay:1; 257 bool sockopt_tcp_nodelay:1;
206 unsigned short int port; 258 unsigned short int port;
207 unsigned long actimeo; /* attribute cache timeout (jiffies) */ 259 unsigned long actimeo; /* attribute cache timeout (jiffies) */
260 struct smb_version_operations *ops;
261 struct smb_version_values *vals;
208 char *prepath; 262 char *prepath;
209 struct sockaddr_storage srcaddr; /* allow binding to a local IP */ 263 struct sockaddr_storage srcaddr; /* allow binding to a local IP */
210 struct nls_table *local_nls; 264 struct nls_table *local_nls;
@@ -242,6 +296,8 @@ struct TCP_Server_Info {
242 int srv_count; /* reference counter */ 296 int srv_count; /* reference counter */
243 /* 15 character server name + 0x20 16th byte indicating type = srv */ 297 /* 15 character server name + 0x20 16th byte indicating type = srv */
244 char server_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; 298 char server_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
299 struct smb_version_operations *ops;
300 struct smb_version_values *vals;
245 enum statusEnum tcpStatus; /* what we think the status is */ 301 enum statusEnum tcpStatus; /* what we think the status is */
246 char *hostname; /* hostname portion of UNC string */ 302 char *hostname; /* hostname portion of UNC string */
247 struct socket *ssocket; 303 struct socket *ssocket;
@@ -321,16 +377,6 @@ in_flight(struct TCP_Server_Info *server)
321 return num; 377 return num;
322} 378}
323 379
324static inline int*
325get_credits_field(struct TCP_Server_Info *server)
326{
327 /*
328 * This will change to switch statement when we reserve slots for echos
329 * and oplock breaks.
330 */
331 return &server->credits;
332}
333
334static inline bool 380static inline bool
335has_credits(struct TCP_Server_Info *server, int *credits) 381has_credits(struct TCP_Server_Info *server, int *credits)
336{ 382{
@@ -341,16 +387,16 @@ has_credits(struct TCP_Server_Info *server, int *credits)
341 return num > 0; 387 return num > 0;
342} 388}
343 389
344static inline size_t 390static inline void
345header_size(void) 391add_credits(struct TCP_Server_Info *server, const unsigned int add)
346{ 392{
347 return sizeof(struct smb_hdr); 393 server->ops->add_credits(server, add);
348} 394}
349 395
350static inline size_t 396static inline void
351max_header_size(void) 397set_credits(struct TCP_Server_Info *server, const int val)
352{ 398{
353 return MAX_CIFS_HDR_SIZE; 399 server->ops->set_credits(server, val);
354} 400}
355 401
356/* 402/*
@@ -547,8 +593,7 @@ struct cifsLockInfo {
547 __u64 offset; 593 __u64 offset;
548 __u64 length; 594 __u64 length;
549 __u32 pid; 595 __u32 pid;
550 __u8 type; 596 __u32 type;
551 __u16 netfid;
552}; 597};
553 598
554/* 599/*
@@ -573,6 +618,10 @@ struct cifs_search_info {
573struct cifsFileInfo { 618struct cifsFileInfo {
574 struct list_head tlist; /* pointer to next fid owned by tcon */ 619 struct list_head tlist; /* pointer to next fid owned by tcon */
575 struct list_head flist; /* next fid (file instance) for this inode */ 620 struct list_head flist; /* next fid (file instance) for this inode */
621 struct list_head llist; /*
622 * brlocks held by this fid, protected by
623 * lock_mutex from cifsInodeInfo structure
624 */
576 unsigned int uid; /* allows finding which FileInfo structure */ 625 unsigned int uid; /* allows finding which FileInfo structure */
577 __u32 pid; /* process id who opened file */ 626 __u32 pid; /* process id who opened file */
578 __u16 netfid; /* file id from remote */ 627 __u16 netfid; /* file id from remote */
@@ -615,9 +664,12 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file);
615 */ 664 */
616 665
617struct cifsInodeInfo { 666struct cifsInodeInfo {
618 struct list_head llist; /* brlocks for this inode */
619 bool can_cache_brlcks; 667 bool can_cache_brlcks;
620 struct mutex lock_mutex; /* protect two fields above */ 668 struct mutex lock_mutex; /*
669 * protect the field above and llist
670 * from every cifsFileInfo structure
671 * from openFileList
672 */
621 /* BB add in lists for dirty pages i.e. write caching info for oplock */ 673 /* BB add in lists for dirty pages i.e. write caching info for oplock */
622 struct list_head openFileList; 674 struct list_head openFileList;
623 __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ 675 __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */
@@ -703,7 +755,6 @@ static inline void cifs_stats_bytes_read(struct cifs_tcon *tcon,
703 755
704#endif 756#endif
705 757
706struct mid_q_entry;
707 758
708/* 759/*
709 * This is the prototype for the mid receive function. This function is for 760 * This is the prototype for the mid receive function. This function is for
@@ -1042,12 +1093,7 @@ GLOBAL_EXTERN atomic_t smBufAllocCount;
1042GLOBAL_EXTERN atomic_t midCount; 1093GLOBAL_EXTERN atomic_t midCount;
1043 1094
1044/* Misc globals */ 1095/* Misc globals */
1045GLOBAL_EXTERN unsigned int multiuser_mount; /* if enabled allows new sessions 1096GLOBAL_EXTERN bool enable_oplocks; /* enable or disable oplocks */
1046 to be established on existing mount if we
1047 have the uid/password or Kerberos credential
1048 or equivalent for current user */
1049/* enable or disable oplocks */
1050GLOBAL_EXTERN bool enable_oplocks;
1051GLOBAL_EXTERN unsigned int lookupCacheEnabled; 1097GLOBAL_EXTERN unsigned int lookupCacheEnabled;
1052GLOBAL_EXTERN unsigned int global_secflags; /* if on, session setup sent 1098GLOBAL_EXTERN unsigned int global_secflags; /* if on, session setup sent
1053 with more secure ntlmssp2 challenge/resp */ 1099 with more secure ntlmssp2 challenge/resp */
@@ -1074,4 +1120,11 @@ void cifs_oplock_break(struct work_struct *work);
1074extern const struct slow_work_ops cifs_oplock_break_ops; 1120extern const struct slow_work_ops cifs_oplock_break_ops;
1075extern struct workqueue_struct *cifsiod_wq; 1121extern struct workqueue_struct *cifsiod_wq;
1076 1122
1123/* Operations for different SMB versions */
1124#define SMB1_VERSION_STRING "1.0"
1125extern struct smb_version_operations smb1_operations;
1126extern struct smb_version_values smb1_values;
1127#define SMB21_VERSION_STRING "2.1"
1128extern struct smb_version_operations smb21_operations;
1129extern struct smb_version_values smb21_values;
1077#endif /* _CIFS_GLOB_H */ 1130#endif /* _CIFS_GLOB_H */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 96192c1e380a..5ec21ecf7980 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -78,6 +78,8 @@ extern int SendReceive(const unsigned int /* xid */ , struct cifs_ses *,
78 int * /* bytes returned */ , const int long_op); 78 int * /* bytes returned */ , const int long_op);
79extern int SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses, 79extern int SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses,
80 char *in_buf, int flags); 80 char *in_buf, int flags);
81extern int cifs_setup_request(struct cifs_ses *, struct kvec *, unsigned int,
82 struct mid_q_entry **);
81extern int cifs_check_receive(struct mid_q_entry *mid, 83extern int cifs_check_receive(struct mid_q_entry *mid,
82 struct TCP_Server_Info *server, bool log_error); 84 struct TCP_Server_Info *server, bool log_error);
83extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *, 85extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *,
@@ -88,9 +90,6 @@ extern int SendReceiveBlockingLock(const unsigned int xid,
88 struct smb_hdr *in_buf , 90 struct smb_hdr *in_buf ,
89 struct smb_hdr *out_buf, 91 struct smb_hdr *out_buf,
90 int *bytes_returned); 92 int *bytes_returned);
91extern void cifs_add_credits(struct TCP_Server_Info *server,
92 const unsigned int add);
93extern void cifs_set_credits(struct TCP_Server_Info *server, const int val);
94extern int checkSMB(char *buf, unsigned int length); 93extern int checkSMB(char *buf, unsigned int length);
95extern bool is_valid_oplock_break(char *, struct TCP_Server_Info *); 94extern bool is_valid_oplock_break(char *, struct TCP_Server_Info *);
96extern bool backup_cred(struct cifs_sb_info *); 95extern bool backup_cred(struct cifs_sb_info *);
@@ -192,11 +191,13 @@ extern int CIFSTCon(unsigned int xid, struct cifs_ses *ses,
192 191
193extern int CIFSFindFirst(const int xid, struct cifs_tcon *tcon, 192extern int CIFSFindFirst(const int xid, struct cifs_tcon *tcon,
194 const char *searchName, const struct nls_table *nls_codepage, 193 const char *searchName, const struct nls_table *nls_codepage,
195 __u16 *searchHandle, struct cifs_search_info *psrch_inf, 194 __u16 *searchHandle, __u16 search_flags,
195 struct cifs_search_info *psrch_inf,
196 int map, const char dirsep); 196 int map, const char dirsep);
197 197
198extern int CIFSFindNext(const int xid, struct cifs_tcon *tcon, 198extern int CIFSFindNext(const int xid, struct cifs_tcon *tcon,
199 __u16 searchHandle, struct cifs_search_info *psrch_inf); 199 __u16 searchHandle, __u16 search_flags,
200 struct cifs_search_info *psrch_inf);
200 201
201extern int CIFSFindClose(const int, struct cifs_tcon *tcon, 202extern int CIFSFindClose(const int, struct cifs_tcon *tcon,
202 const __u16 search_handle); 203 const __u16 search_handle);
@@ -464,6 +465,9 @@ extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8,
464 465
465/* asynchronous read support */ 466/* asynchronous read support */
466struct cifs_readdata { 467struct cifs_readdata {
468 struct kref refcount;
469 struct list_head list;
470 struct completion done;
467 struct cifsFileInfo *cfile; 471 struct cifsFileInfo *cfile;
468 struct address_space *mapping; 472 struct address_space *mapping;
469 __u64 offset; 473 __u64 offset;
@@ -472,12 +476,13 @@ struct cifs_readdata {
472 int result; 476 int result;
473 struct list_head pages; 477 struct list_head pages;
474 struct work_struct work; 478 struct work_struct work;
479 int (*marshal_iov) (struct cifs_readdata *rdata,
480 unsigned int remaining);
475 unsigned int nr_iov; 481 unsigned int nr_iov;
476 struct kvec iov[1]; 482 struct kvec iov[1];
477}; 483};
478 484
479struct cifs_readdata *cifs_readdata_alloc(unsigned int nr_pages); 485void cifs_readdata_release(struct kref *refcount);
480void cifs_readdata_free(struct cifs_readdata *rdata);
481int cifs_async_readv(struct cifs_readdata *rdata); 486int cifs_async_readv(struct cifs_readdata *rdata);
482 487
483/* asynchronous write support */ 488/* asynchronous write support */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index da2f5446fa7a..b5ad716b2642 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -87,7 +87,6 @@ static struct {
87#endif /* CIFS_POSIX */ 87#endif /* CIFS_POSIX */
88 88
89/* Forward declarations */ 89/* Forward declarations */
90static void cifs_readv_complete(struct work_struct *work);
91 90
92/* Mark as invalid, all open files on tree connections since they 91/* Mark as invalid, all open files on tree connections since they
93 were closed when session to server was lost */ 92 were closed when session to server was lost */
@@ -461,7 +460,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifs_ses *ses)
461 server->maxReq = min_t(unsigned int, 460 server->maxReq = min_t(unsigned int,
462 le16_to_cpu(rsp->MaxMpxCount), 461 le16_to_cpu(rsp->MaxMpxCount),
463 cifs_max_pending); 462 cifs_max_pending);
464 cifs_set_credits(server, server->maxReq); 463 set_credits(server, server->maxReq);
465 server->maxBuf = le16_to_cpu(rsp->MaxBufSize); 464 server->maxBuf = le16_to_cpu(rsp->MaxBufSize);
466 server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs); 465 server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs);
467 /* even though we do not use raw we might as well set this 466 /* even though we do not use raw we might as well set this
@@ -569,7 +568,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifs_ses *ses)
569 little endian */ 568 little endian */
570 server->maxReq = min_t(unsigned int, le16_to_cpu(pSMBr->MaxMpxCount), 569 server->maxReq = min_t(unsigned int, le16_to_cpu(pSMBr->MaxMpxCount),
571 cifs_max_pending); 570 cifs_max_pending);
572 cifs_set_credits(server, server->maxReq); 571 set_credits(server, server->maxReq);
573 /* probably no need to store and check maxvcs */ 572 /* probably no need to store and check maxvcs */
574 server->maxBuf = le32_to_cpu(pSMBr->MaxBufferSize); 573 server->maxBuf = le32_to_cpu(pSMBr->MaxBufferSize);
575 server->max_rw = le32_to_cpu(pSMBr->MaxRawSize); 574 server->max_rw = le32_to_cpu(pSMBr->MaxRawSize);
@@ -721,7 +720,7 @@ cifs_echo_callback(struct mid_q_entry *mid)
721 struct TCP_Server_Info *server = mid->callback_data; 720 struct TCP_Server_Info *server = mid->callback_data;
722 721
723 DeleteMidQEntry(mid); 722 DeleteMidQEntry(mid);
724 cifs_add_credits(server, 1); 723 add_credits(server, 1);
725} 724}
726 725
727int 726int
@@ -1385,28 +1384,6 @@ openRetry:
1385 return rc; 1384 return rc;
1386} 1385}
1387 1386
1388struct cifs_readdata *
1389cifs_readdata_alloc(unsigned int nr_pages)
1390{
1391 struct cifs_readdata *rdata;
1392
1393 /* readdata + 1 kvec for each page */
1394 rdata = kzalloc(sizeof(*rdata) +
1395 sizeof(struct kvec) * nr_pages, GFP_KERNEL);
1396 if (rdata != NULL) {
1397 INIT_WORK(&rdata->work, cifs_readv_complete);
1398 INIT_LIST_HEAD(&rdata->pages);
1399 }
1400 return rdata;
1401}
1402
1403void
1404cifs_readdata_free(struct cifs_readdata *rdata)
1405{
1406 cifsFileInfo_put(rdata->cfile);
1407 kfree(rdata);
1408}
1409
1410/* 1387/*
1411 * Discard any remaining data in the current SMB. To do this, we borrow the 1388 * Discard any remaining data in the current SMB. To do this, we borrow the
1412 * current bigbuf. 1389 * current bigbuf.
@@ -1423,7 +1400,7 @@ cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1423 1400
1424 length = cifs_read_from_socket(server, server->bigbuf, 1401 length = cifs_read_from_socket(server, server->bigbuf,
1425 min_t(unsigned int, remaining, 1402 min_t(unsigned int, remaining,
1426 CIFSMaxBufSize + max_header_size())); 1403 CIFSMaxBufSize + MAX_HEADER_SIZE(server)));
1427 if (length < 0) 1404 if (length < 0)
1428 return length; 1405 return length;
1429 server->total_read += length; 1406 server->total_read += length;
@@ -1434,38 +1411,14 @@ cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1434 return 0; 1411 return 0;
1435} 1412}
1436 1413
1437static inline size_t
1438read_rsp_size(void)
1439{
1440 return sizeof(READ_RSP);
1441}
1442
1443static inline unsigned int
1444read_data_offset(char *buf)
1445{
1446 READ_RSP *rsp = (READ_RSP *)buf;
1447 return le16_to_cpu(rsp->DataOffset);
1448}
1449
1450static inline unsigned int
1451read_data_length(char *buf)
1452{
1453 READ_RSP *rsp = (READ_RSP *)buf;
1454 return (le16_to_cpu(rsp->DataLengthHigh) << 16) +
1455 le16_to_cpu(rsp->DataLength);
1456}
1457
1458static int 1414static int
1459cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) 1415cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1460{ 1416{
1461 int length, len; 1417 int length, len;
1462 unsigned int data_offset, remaining, data_len; 1418 unsigned int data_offset, data_len;
1463 struct cifs_readdata *rdata = mid->callback_data; 1419 struct cifs_readdata *rdata = mid->callback_data;
1464 char *buf = server->smallbuf; 1420 char *buf = server->smallbuf;
1465 unsigned int buflen = get_rfc1002_length(buf) + 4; 1421 unsigned int buflen = get_rfc1002_length(buf) + 4;
1466 u64 eof;
1467 pgoff_t eof_index;
1468 struct page *page, *tpage;
1469 1422
1470 cFYI(1, "%s: mid=%llu offset=%llu bytes=%u", __func__, 1423 cFYI(1, "%s: mid=%llu offset=%llu bytes=%u", __func__,
1471 mid->mid, rdata->offset, rdata->bytes); 1424 mid->mid, rdata->offset, rdata->bytes);
@@ -1475,9 +1428,10 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1475 * can if there's not enough data. At this point, we've read down to 1428 * can if there's not enough data. At this point, we've read down to
1476 * the Mid. 1429 * the Mid.
1477 */ 1430 */
1478 len = min_t(unsigned int, buflen, read_rsp_size()) - header_size() + 1; 1431 len = min_t(unsigned int, buflen, server->vals->read_rsp_size) -
1432 HEADER_SIZE(server) + 1;
1479 1433
1480 rdata->iov[0].iov_base = buf + header_size() - 1; 1434 rdata->iov[0].iov_base = buf + HEADER_SIZE(server) - 1;
1481 rdata->iov[0].iov_len = len; 1435 rdata->iov[0].iov_len = len;
1482 1436
1483 length = cifs_readv_from_socket(server, rdata->iov, 1, len); 1437 length = cifs_readv_from_socket(server, rdata->iov, 1, len);
@@ -1486,7 +1440,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1486 server->total_read += length; 1440 server->total_read += length;
1487 1441
1488 /* Was the SMB read successful? */ 1442 /* Was the SMB read successful? */
1489 rdata->result = map_smb_to_linux_error(buf, false); 1443 rdata->result = server->ops->map_error(buf, false);
1490 if (rdata->result != 0) { 1444 if (rdata->result != 0) {
1491 cFYI(1, "%s: server returned error %d", __func__, 1445 cFYI(1, "%s: server returned error %d", __func__,
1492 rdata->result); 1446 rdata->result);
@@ -1494,14 +1448,15 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1494 } 1448 }
1495 1449
1496 /* Is there enough to get to the rest of the READ_RSP header? */ 1450 /* Is there enough to get to the rest of the READ_RSP header? */
1497 if (server->total_read < read_rsp_size()) { 1451 if (server->total_read < server->vals->read_rsp_size) {
1498 cFYI(1, "%s: server returned short header. got=%u expected=%zu", 1452 cFYI(1, "%s: server returned short header. got=%u expected=%zu",
1499 __func__, server->total_read, read_rsp_size()); 1453 __func__, server->total_read,
1454 server->vals->read_rsp_size);
1500 rdata->result = -EIO; 1455 rdata->result = -EIO;
1501 return cifs_readv_discard(server, mid); 1456 return cifs_readv_discard(server, mid);
1502 } 1457 }
1503 1458
1504 data_offset = read_data_offset(buf) + 4; 1459 data_offset = server->ops->read_data_offset(buf) + 4;
1505 if (data_offset < server->total_read) { 1460 if (data_offset < server->total_read) {
1506 /* 1461 /*
1507 * win2k8 sometimes sends an offset of 0 when the read 1462 * win2k8 sometimes sends an offset of 0 when the read
@@ -1540,7 +1495,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1540 rdata->iov[0].iov_base, rdata->iov[0].iov_len); 1495 rdata->iov[0].iov_base, rdata->iov[0].iov_len);
1541 1496
1542 /* how much data is in the response? */ 1497 /* how much data is in the response? */
1543 data_len = read_data_length(buf); 1498 data_len = server->ops->read_data_length(buf);
1544 if (data_offset + data_len > buflen) { 1499 if (data_offset + data_len > buflen) {
1545 /* data_len is corrupt -- discard frame */ 1500 /* data_len is corrupt -- discard frame */
1546 rdata->result = -EIO; 1501 rdata->result = -EIO;
@@ -1548,64 +1503,8 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1548 } 1503 }
1549 1504
1550 /* marshal up the page array */ 1505 /* marshal up the page array */
1551 len = 0; 1506 len = rdata->marshal_iov(rdata, data_len);
1552 remaining = data_len; 1507 data_len -= len;
1553 rdata->nr_iov = 1;
1554
1555 /* determine the eof that the server (probably) has */
1556 eof = CIFS_I(rdata->mapping->host)->server_eof;
1557 eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
1558 cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
1559
1560 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
1561 if (remaining >= PAGE_CACHE_SIZE) {
1562 /* enough data to fill the page */
1563 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
1564 rdata->iov[rdata->nr_iov].iov_len = PAGE_CACHE_SIZE;
1565 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
1566 rdata->nr_iov, page->index,
1567 rdata->iov[rdata->nr_iov].iov_base,
1568 rdata->iov[rdata->nr_iov].iov_len);
1569 ++rdata->nr_iov;
1570 len += PAGE_CACHE_SIZE;
1571 remaining -= PAGE_CACHE_SIZE;
1572 } else if (remaining > 0) {
1573 /* enough for partial page, fill and zero the rest */
1574 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
1575 rdata->iov[rdata->nr_iov].iov_len = remaining;
1576 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
1577 rdata->nr_iov, page->index,
1578 rdata->iov[rdata->nr_iov].iov_base,
1579 rdata->iov[rdata->nr_iov].iov_len);
1580 memset(rdata->iov[rdata->nr_iov].iov_base + remaining,
1581 '\0', PAGE_CACHE_SIZE - remaining);
1582 ++rdata->nr_iov;
1583 len += remaining;
1584 remaining = 0;
1585 } else if (page->index > eof_index) {
1586 /*
1587 * The VFS will not try to do readahead past the
1588 * i_size, but it's possible that we have outstanding
1589 * writes with gaps in the middle and the i_size hasn't
1590 * caught up yet. Populate those with zeroed out pages
1591 * to prevent the VFS from repeatedly attempting to
1592 * fill them until the writes are flushed.
1593 */
1594 zero_user(page, 0, PAGE_CACHE_SIZE);
1595 list_del(&page->lru);
1596 lru_cache_add_file(page);
1597 flush_dcache_page(page);
1598 SetPageUptodate(page);
1599 unlock_page(page);
1600 page_cache_release(page);
1601 } else {
1602 /* no need to hold page hostage */
1603 list_del(&page->lru);
1604 lru_cache_add_file(page);
1605 unlock_page(page);
1606 page_cache_release(page);
1607 }
1608 }
1609 1508
1610 /* issue the read if we have any iovecs left to fill */ 1509 /* issue the read if we have any iovecs left to fill */
1611 if (rdata->nr_iov > 1) { 1510 if (rdata->nr_iov > 1) {
@@ -1621,7 +1520,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1621 rdata->bytes = length; 1520 rdata->bytes = length;
1622 1521
1623 cFYI(1, "total_read=%u buflen=%u remaining=%u", server->total_read, 1522 cFYI(1, "total_read=%u buflen=%u remaining=%u", server->total_read,
1624 buflen, remaining); 1523 buflen, data_len);
1625 1524
1626 /* discard anything left over */ 1525 /* discard anything left over */
1627 if (server->total_read < buflen) 1526 if (server->total_read < buflen)
@@ -1632,33 +1531,6 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1632} 1531}
1633 1532
1634static void 1533static void
1635cifs_readv_complete(struct work_struct *work)
1636{
1637 struct cifs_readdata *rdata = container_of(work,
1638 struct cifs_readdata, work);
1639 struct page *page, *tpage;
1640
1641 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
1642 list_del(&page->lru);
1643 lru_cache_add_file(page);
1644
1645 if (rdata->result == 0) {
1646 kunmap(page);
1647 flush_dcache_page(page);
1648 SetPageUptodate(page);
1649 }
1650
1651 unlock_page(page);
1652
1653 if (rdata->result == 0)
1654 cifs_readpage_to_fscache(rdata->mapping->host, page);
1655
1656 page_cache_release(page);
1657 }
1658 cifs_readdata_free(rdata);
1659}
1660
1661static void
1662cifs_readv_callback(struct mid_q_entry *mid) 1534cifs_readv_callback(struct mid_q_entry *mid)
1663{ 1535{
1664 struct cifs_readdata *rdata = mid->callback_data; 1536 struct cifs_readdata *rdata = mid->callback_data;
@@ -1691,7 +1563,7 @@ cifs_readv_callback(struct mid_q_entry *mid)
1691 1563
1692 queue_work(cifsiod_wq, &rdata->work); 1564 queue_work(cifsiod_wq, &rdata->work);
1693 DeleteMidQEntry(mid); 1565 DeleteMidQEntry(mid);
1694 cifs_add_credits(server, 1); 1566 add_credits(server, 1);
1695} 1567}
1696 1568
1697/* cifs_async_readv - send an async write, and set up mid to handle result */ 1569/* cifs_async_readv - send an async write, and set up mid to handle result */
@@ -1744,12 +1616,15 @@ cifs_async_readv(struct cifs_readdata *rdata)
1744 rdata->iov[0].iov_base = smb; 1616 rdata->iov[0].iov_base = smb;
1745 rdata->iov[0].iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4; 1617 rdata->iov[0].iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4;
1746 1618
1619 kref_get(&rdata->refcount);
1747 rc = cifs_call_async(tcon->ses->server, rdata->iov, 1, 1620 rc = cifs_call_async(tcon->ses->server, rdata->iov, 1,
1748 cifs_readv_receive, cifs_readv_callback, 1621 cifs_readv_receive, cifs_readv_callback,
1749 rdata, false); 1622 rdata, false);
1750 1623
1751 if (rc == 0) 1624 if (rc == 0)
1752 cifs_stats_inc(&tcon->num_reads); 1625 cifs_stats_inc(&tcon->num_reads);
1626 else
1627 kref_put(&rdata->refcount, cifs_readdata_release);
1753 1628
1754 cifs_small_buf_release(smb); 1629 cifs_small_buf_release(smb);
1755 return rc; 1630 return rc;
@@ -2135,7 +2010,7 @@ cifs_writev_callback(struct mid_q_entry *mid)
2135 2010
2136 queue_work(cifsiod_wq, &wdata->work); 2011 queue_work(cifsiod_wq, &wdata->work);
2137 DeleteMidQEntry(mid); 2012 DeleteMidQEntry(mid);
2138 cifs_add_credits(tcon->ses->server, 1); 2013 add_credits(tcon->ses->server, 1);
2139} 2014}
2140 2015
2141/* cifs_async_writev - send an async write, and set up mid to handle result */ 2016/* cifs_async_writev - send an async write, and set up mid to handle result */
@@ -4344,7 +4219,7 @@ int
4344CIFSFindFirst(const int xid, struct cifs_tcon *tcon, 4219CIFSFindFirst(const int xid, struct cifs_tcon *tcon,
4345 const char *searchName, 4220 const char *searchName,
4346 const struct nls_table *nls_codepage, 4221 const struct nls_table *nls_codepage,
4347 __u16 *pnetfid, 4222 __u16 *pnetfid, __u16 search_flags,
4348 struct cifs_search_info *psrch_inf, int remap, const char dirsep) 4223 struct cifs_search_info *psrch_inf, int remap, const char dirsep)
4349{ 4224{
4350/* level 257 SMB_ */ 4225/* level 257 SMB_ */
@@ -4416,8 +4291,7 @@ findFirstRetry:
4416 cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM | 4291 cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM |
4417 ATTR_DIRECTORY); 4292 ATTR_DIRECTORY);
4418 pSMB->SearchCount = cpu_to_le16(CIFSMaxBufSize/sizeof(FILE_UNIX_INFO)); 4293 pSMB->SearchCount = cpu_to_le16(CIFSMaxBufSize/sizeof(FILE_UNIX_INFO));
4419 pSMB->SearchFlags = cpu_to_le16(CIFS_SEARCH_CLOSE_AT_END | 4294 pSMB->SearchFlags = cpu_to_le16(search_flags);
4420 CIFS_SEARCH_RETURN_RESUME);
4421 pSMB->InformationLevel = cpu_to_le16(psrch_inf->info_level); 4295 pSMB->InformationLevel = cpu_to_le16(psrch_inf->info_level);
4422 4296
4423 /* BB what should we set StorageType to? Does it matter? BB */ 4297 /* BB what should we set StorageType to? Does it matter? BB */
@@ -4487,8 +4361,8 @@ findFirstRetry:
4487 return rc; 4361 return rc;
4488} 4362}
4489 4363
4490int CIFSFindNext(const int xid, struct cifs_tcon *tcon, 4364int CIFSFindNext(const int xid, struct cifs_tcon *tcon, __u16 searchHandle,
4491 __u16 searchHandle, struct cifs_search_info *psrch_inf) 4365 __u16 search_flags, struct cifs_search_info *psrch_inf)
4492{ 4366{
4493 TRANSACTION2_FNEXT_REQ *pSMB = NULL; 4367 TRANSACTION2_FNEXT_REQ *pSMB = NULL;
4494 TRANSACTION2_FNEXT_RSP *pSMBr = NULL; 4368 TRANSACTION2_FNEXT_RSP *pSMBr = NULL;
@@ -4531,8 +4405,7 @@ int CIFSFindNext(const int xid, struct cifs_tcon *tcon,
4531 cpu_to_le16(CIFSMaxBufSize / sizeof(FILE_UNIX_INFO)); 4405 cpu_to_le16(CIFSMaxBufSize / sizeof(FILE_UNIX_INFO));
4532 pSMB->InformationLevel = cpu_to_le16(psrch_inf->info_level); 4406 pSMB->InformationLevel = cpu_to_le16(psrch_inf->info_level);
4533 pSMB->ResumeKey = psrch_inf->resume_key; 4407 pSMB->ResumeKey = psrch_inf->resume_key;
4534 pSMB->SearchFlags = 4408 pSMB->SearchFlags = cpu_to_le16(search_flags);
4535 cpu_to_le16(CIFS_SEARCH_CLOSE_AT_END | CIFS_SEARCH_RETURN_RESUME);
4536 4409
4537 name_len = psrch_inf->resume_name_len; 4410 name_len = psrch_inf->resume_name_len;
4538 params += name_len; 4411 params += name_len;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index e0b56d7a19c5..ccafdedd0dbc 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/connect.c 2 * fs/cifs/connect.c
3 * 3 *
4 * Copyright (C) International Business Machines Corp., 2002,2009 4 * Copyright (C) International Business Machines Corp., 2002,2011
5 * Author(s): Steve French (sfrench@us.ibm.com) 5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 6 *
7 * This library is free software; you can redistribute it and/or modify 7 * This library is free software; you can redistribute it and/or modify
@@ -102,7 +102,7 @@ enum {
102 Opt_srcaddr, Opt_prefixpath, 102 Opt_srcaddr, Opt_prefixpath,
103 Opt_iocharset, Opt_sockopt, 103 Opt_iocharset, Opt_sockopt,
104 Opt_netbiosname, Opt_servern, 104 Opt_netbiosname, Opt_servern,
105 Opt_ver, Opt_sec, 105 Opt_ver, Opt_vers, Opt_sec, Opt_cache,
106 106
107 /* Mount options to be ignored */ 107 /* Mount options to be ignored */
108 Opt_ignore, 108 Opt_ignore,
@@ -210,9 +210,9 @@ static const match_table_t cifs_mount_option_tokens = {
210 { Opt_netbiosname, "netbiosname=%s" }, 210 { Opt_netbiosname, "netbiosname=%s" },
211 { Opt_servern, "servern=%s" }, 211 { Opt_servern, "servern=%s" },
212 { Opt_ver, "ver=%s" }, 212 { Opt_ver, "ver=%s" },
213 { Opt_ver, "vers=%s" }, 213 { Opt_vers, "vers=%s" },
214 { Opt_ver, "version=%s" },
215 { Opt_sec, "sec=%s" }, 214 { Opt_sec, "sec=%s" },
215 { Opt_cache, "cache=%s" },
216 216
217 { Opt_ignore, "cred" }, 217 { Opt_ignore, "cred" },
218 { Opt_ignore, "credentials" }, 218 { Opt_ignore, "credentials" },
@@ -261,6 +261,26 @@ static const match_table_t cifs_secflavor_tokens = {
261 { Opt_sec_err, NULL } 261 { Opt_sec_err, NULL }
262}; 262};
263 263
264/* cache flavors */
265enum {
266 Opt_cache_loose,
267 Opt_cache_strict,
268 Opt_cache_none,
269 Opt_cache_err
270};
271
272static const match_table_t cifs_cacheflavor_tokens = {
273 { Opt_cache_loose, "loose" },
274 { Opt_cache_strict, "strict" },
275 { Opt_cache_none, "none" },
276 { Opt_cache_err, NULL }
277};
278
279static const match_table_t cifs_smb_version_tokens = {
280 { Smb_1, SMB1_VERSION_STRING },
281 { Smb_21, SMB21_VERSION_STRING },
282};
283
264static int ip_connect(struct TCP_Server_Info *server); 284static int ip_connect(struct TCP_Server_Info *server);
265static int generic_ip_connect(struct TCP_Server_Info *server); 285static int generic_ip_connect(struct TCP_Server_Info *server);
266static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink); 286static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink);
@@ -549,7 +569,7 @@ allocate_buffers(struct TCP_Server_Info *server)
549 } 569 }
550 } else if (server->large_buf) { 570 } else if (server->large_buf) {
551 /* we are reusing a dirty large buf, clear its start */ 571 /* we are reusing a dirty large buf, clear its start */
552 memset(server->bigbuf, 0, header_size()); 572 memset(server->bigbuf, 0, HEADER_SIZE(server));
553 } 573 }
554 574
555 if (!server->smallbuf) { 575 if (!server->smallbuf) {
@@ -563,7 +583,7 @@ allocate_buffers(struct TCP_Server_Info *server)
563 /* beginning of smb buffer is cleared in our buf_get */ 583 /* beginning of smb buffer is cleared in our buf_get */
564 } else { 584 } else {
565 /* if existing small buf clear beginning */ 585 /* if existing small buf clear beginning */
566 memset(server->smallbuf, 0, header_size()); 586 memset(server->smallbuf, 0, HEADER_SIZE(server));
567 } 587 }
568 588
569 return true; 589 return true;
@@ -764,25 +784,6 @@ is_smb_response(struct TCP_Server_Info *server, unsigned char type)
764 return false; 784 return false;
765} 785}
766 786
767static struct mid_q_entry *
768find_mid(struct TCP_Server_Info *server, char *buffer)
769{
770 struct smb_hdr *buf = (struct smb_hdr *)buffer;
771 struct mid_q_entry *mid;
772
773 spin_lock(&GlobalMid_Lock);
774 list_for_each_entry(mid, &server->pending_mid_q, qhead) {
775 if (mid->mid == buf->Mid &&
776 mid->mid_state == MID_REQUEST_SUBMITTED &&
777 le16_to_cpu(mid->command) == buf->Command) {
778 spin_unlock(&GlobalMid_Lock);
779 return mid;
780 }
781 }
782 spin_unlock(&GlobalMid_Lock);
783 return NULL;
784}
785
786void 787void
787dequeue_mid(struct mid_q_entry *mid, bool malformed) 788dequeue_mid(struct mid_q_entry *mid, bool malformed)
788{ 789{
@@ -934,7 +935,7 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid)
934 unsigned int pdu_length = get_rfc1002_length(buf); 935 unsigned int pdu_length = get_rfc1002_length(buf);
935 936
936 /* make sure this will fit in a large buffer */ 937 /* make sure this will fit in a large buffer */
937 if (pdu_length > CIFSMaxBufSize + max_header_size() - 4) { 938 if (pdu_length > CIFSMaxBufSize + MAX_HEADER_SIZE(server) - 4) {
938 cERROR(1, "SMB response too long (%u bytes)", 939 cERROR(1, "SMB response too long (%u bytes)",
939 pdu_length); 940 pdu_length);
940 cifs_reconnect(server); 941 cifs_reconnect(server);
@@ -950,8 +951,8 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid)
950 } 951 }
951 952
952 /* now read the rest */ 953 /* now read the rest */
953 length = cifs_read_from_socket(server, buf + header_size() - 1, 954 length = cifs_read_from_socket(server, buf + HEADER_SIZE(server) - 1,
954 pdu_length - header_size() + 1 + 4); 955 pdu_length - HEADER_SIZE(server) + 1 + 4);
955 if (length < 0) 956 if (length < 0)
956 return length; 957 return length;
957 server->total_read += length; 958 server->total_read += length;
@@ -967,7 +968,7 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid)
967 * 48 bytes is enough to display the header and a little bit 968 * 48 bytes is enough to display the header and a little bit
968 * into the payload for debugging purposes. 969 * into the payload for debugging purposes.
969 */ 970 */
970 length = checkSMB(buf, server->total_read); 971 length = server->ops->check_message(buf, server->total_read);
971 if (length != 0) 972 if (length != 0)
972 cifs_dump_mem("Bad SMB: ", buf, 973 cifs_dump_mem("Bad SMB: ", buf,
973 min_t(unsigned int, server->total_read, 48)); 974 min_t(unsigned int, server->total_read, 48));
@@ -1025,7 +1026,7 @@ cifs_demultiplex_thread(void *p)
1025 continue; 1026 continue;
1026 1027
1027 /* make sure we have enough to get to the MID */ 1028 /* make sure we have enough to get to the MID */
1028 if (pdu_length < header_size() - 1 - 4) { 1029 if (pdu_length < HEADER_SIZE(server) - 1 - 4) {
1029 cERROR(1, "SMB response too short (%u bytes)", 1030 cERROR(1, "SMB response too short (%u bytes)",
1030 pdu_length); 1031 pdu_length);
1031 cifs_reconnect(server); 1032 cifs_reconnect(server);
@@ -1035,12 +1036,12 @@ cifs_demultiplex_thread(void *p)
1035 1036
1036 /* read down to the MID */ 1037 /* read down to the MID */
1037 length = cifs_read_from_socket(server, buf + 4, 1038 length = cifs_read_from_socket(server, buf + 4,
1038 header_size() - 1 - 4); 1039 HEADER_SIZE(server) - 1 - 4);
1039 if (length < 0) 1040 if (length < 0)
1040 continue; 1041 continue;
1041 server->total_read += length; 1042 server->total_read += length;
1042 1043
1043 mid_entry = find_mid(server, buf); 1044 mid_entry = server->ops->find_mid(server, buf);
1044 1045
1045 if (!mid_entry || !mid_entry->receive) 1046 if (!mid_entry || !mid_entry->receive)
1046 length = standard_receive3(server, mid_entry); 1047 length = standard_receive3(server, mid_entry);
@@ -1057,12 +1058,13 @@ cifs_demultiplex_thread(void *p)
1057 if (mid_entry != NULL) { 1058 if (mid_entry != NULL) {
1058 if (!mid_entry->multiRsp || mid_entry->multiEnd) 1059 if (!mid_entry->multiRsp || mid_entry->multiEnd)
1059 mid_entry->callback(mid_entry); 1060 mid_entry->callback(mid_entry);
1060 } else if (!is_valid_oplock_break(buf, server)) { 1061 } else if (!server->ops->is_oplock_break(buf, server)) {
1061 cERROR(1, "No task to wake, unknown frame received! " 1062 cERROR(1, "No task to wake, unknown frame received! "
1062 "NumMids %d", atomic_read(&midCount)); 1063 "NumMids %d", atomic_read(&midCount));
1063 cifs_dump_mem("Received Data is: ", buf, header_size()); 1064 cifs_dump_mem("Received Data is: ", buf,
1065 HEADER_SIZE(server));
1064#ifdef CONFIG_CIFS_DEBUG2 1066#ifdef CONFIG_CIFS_DEBUG2
1065 cifs_dump_detail(buf); 1067 server->ops->dump_detail(buf);
1066 cifs_dump_mids(server); 1068 cifs_dump_mids(server);
1067#endif /* CIFS_DEBUG2 */ 1069#endif /* CIFS_DEBUG2 */
1068 1070
@@ -1186,6 +1188,54 @@ static int cifs_parse_security_flavors(char *value,
1186} 1188}
1187 1189
1188static int 1190static int
1191cifs_parse_cache_flavor(char *value, struct smb_vol *vol)
1192{
1193 substring_t args[MAX_OPT_ARGS];
1194
1195 switch (match_token(value, cifs_cacheflavor_tokens, args)) {
1196 case Opt_cache_loose:
1197 vol->direct_io = false;
1198 vol->strict_io = false;
1199 break;
1200 case Opt_cache_strict:
1201 vol->direct_io = false;
1202 vol->strict_io = true;
1203 break;
1204 case Opt_cache_none:
1205 vol->direct_io = true;
1206 vol->strict_io = false;
1207 break;
1208 default:
1209 cERROR(1, "bad cache= option: %s", value);
1210 return 1;
1211 }
1212 return 0;
1213}
1214
1215static int
1216cifs_parse_smb_version(char *value, struct smb_vol *vol)
1217{
1218 substring_t args[MAX_OPT_ARGS];
1219
1220 switch (match_token(value, cifs_smb_version_tokens, args)) {
1221 case Smb_1:
1222 vol->ops = &smb1_operations;
1223 vol->vals = &smb1_values;
1224 break;
1225#ifdef CONFIG_CIFS_SMB2
1226 case Smb_21:
1227 vol->ops = &smb21_operations;
1228 vol->vals = &smb21_values;
1229 break;
1230#endif
1231 default:
1232 cERROR(1, "Unknown vers= option specified: %s", value);
1233 return 1;
1234 }
1235 return 0;
1236}
1237
1238static int
1189cifs_parse_mount_options(const char *mountdata, const char *devname, 1239cifs_parse_mount_options(const char *mountdata, const char *devname,
1190 struct smb_vol *vol) 1240 struct smb_vol *vol)
1191{ 1241{
@@ -1203,6 +1253,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1203 char *string = NULL; 1253 char *string = NULL;
1204 char *tmp_end, *value; 1254 char *tmp_end, *value;
1205 char delim; 1255 char delim;
1256 bool cache_specified = false;
1257 static bool cache_warned = false;
1206 1258
1207 separator[0] = ','; 1259 separator[0] = ',';
1208 separator[1] = 0; 1260 separator[1] = 0;
@@ -1236,6 +1288,10 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1236 1288
1237 vol->actimeo = CIFS_DEF_ACTIMEO; 1289 vol->actimeo = CIFS_DEF_ACTIMEO;
1238 1290
1291 /* FIXME: add autonegotiation -- for now, SMB1 is default */
1292 vol->ops = &smb1_operations;
1293 vol->vals = &smb1_values;
1294
1239 if (!mountdata) 1295 if (!mountdata)
1240 goto cifs_parse_mount_err; 1296 goto cifs_parse_mount_err;
1241 1297
@@ -1414,10 +1470,20 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1414 vol->seal = 1; 1470 vol->seal = 1;
1415 break; 1471 break;
1416 case Opt_direct: 1472 case Opt_direct:
1417 vol->direct_io = 1; 1473 cache_specified = true;
1474 vol->direct_io = true;
1475 vol->strict_io = false;
1476 cERROR(1, "The \"directio\" option will be removed in "
1477 "3.7. Please switch to the \"cache=none\" "
1478 "option.");
1418 break; 1479 break;
1419 case Opt_strictcache: 1480 case Opt_strictcache:
1420 vol->strict_io = 1; 1481 cache_specified = true;
1482 vol->direct_io = false;
1483 vol->strict_io = true;
1484 cERROR(1, "The \"strictcache\" option will be removed "
1485 "in 3.7. Please switch to the \"cache=strict\" "
1486 "option.");
1421 break; 1487 break;
1422 case Opt_noac: 1488 case Opt_noac:
1423 printk(KERN_WARNING "CIFS: Mount option noac not " 1489 printk(KERN_WARNING "CIFS: Mount option noac not "
@@ -1821,8 +1887,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1821 if (string == NULL) 1887 if (string == NULL)
1822 goto out_nomem; 1888 goto out_nomem;
1823 1889
1824 if (strnicmp(string, "cifs", 4) == 0 || 1890 if (strnicmp(string, "1", 1) == 0) {
1825 strnicmp(string, "1", 1) == 0) {
1826 /* This is the default */ 1891 /* This is the default */
1827 break; 1892 break;
1828 } 1893 }
@@ -1830,6 +1895,14 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1830 printk(KERN_WARNING "CIFS: Invalid version" 1895 printk(KERN_WARNING "CIFS: Invalid version"
1831 " specified\n"); 1896 " specified\n");
1832 goto cifs_parse_mount_err; 1897 goto cifs_parse_mount_err;
1898 case Opt_vers:
1899 string = match_strdup(args);
1900 if (string == NULL)
1901 goto out_nomem;
1902
1903 if (cifs_parse_smb_version(string, vol) != 0)
1904 goto cifs_parse_mount_err;
1905 break;
1833 case Opt_sec: 1906 case Opt_sec:
1834 string = match_strdup(args); 1907 string = match_strdup(args);
1835 if (string == NULL) 1908 if (string == NULL)
@@ -1838,6 +1911,15 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1838 if (cifs_parse_security_flavors(string, vol) != 0) 1911 if (cifs_parse_security_flavors(string, vol) != 0)
1839 goto cifs_parse_mount_err; 1912 goto cifs_parse_mount_err;
1840 break; 1913 break;
1914 case Opt_cache:
1915 cache_specified = true;
1916 string = match_strdup(args);
1917 if (string == NULL)
1918 goto out_nomem;
1919
1920 if (cifs_parse_cache_flavor(string, vol) != 0)
1921 goto cifs_parse_mount_err;
1922 break;
1841 default: 1923 default:
1842 /* 1924 /*
1843 * An option we don't recognize. Save it off for later 1925 * An option we don't recognize. Save it off for later
@@ -1881,6 +1963,14 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1881 printk(KERN_NOTICE "CIFS: ignoring forcegid mount option " 1963 printk(KERN_NOTICE "CIFS: ignoring forcegid mount option "
1882 "specified with no gid= option.\n"); 1964 "specified with no gid= option.\n");
1883 1965
1966 /* FIXME: remove this block in 3.7 */
1967 if (!cache_specified && !cache_warned) {
1968 cache_warned = true;
1969 printk(KERN_NOTICE "CIFS: no cache= option specified, using "
1970 "\"cache=loose\". This default will change "
1971 "to \"cache=strict\" in 3.7.\n");
1972 }
1973
1884 kfree(mountdata_copy); 1974 kfree(mountdata_copy);
1885 return 0; 1975 return 0;
1886 1976
@@ -2041,6 +2131,9 @@ match_security(struct TCP_Server_Info *server, struct smb_vol *vol)
2041static int match_server(struct TCP_Server_Info *server, struct sockaddr *addr, 2131static int match_server(struct TCP_Server_Info *server, struct sockaddr *addr,
2042 struct smb_vol *vol) 2132 struct smb_vol *vol)
2043{ 2133{
2134 if ((server->vals != vol->vals) || (server->ops != vol->ops))
2135 return 0;
2136
2044 if (!net_eq(cifs_net_ns(server), current->nsproxy->net_ns)) 2137 if (!net_eq(cifs_net_ns(server), current->nsproxy->net_ns))
2045 return 0; 2138 return 0;
2046 2139
@@ -2163,6 +2256,8 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
2163 goto out_err; 2256 goto out_err;
2164 } 2257 }
2165 2258
2259 tcp_ses->ops = volume_info->ops;
2260 tcp_ses->vals = volume_info->vals;
2166 cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns)); 2261 cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns));
2167 tcp_ses->hostname = extract_hostname(volume_info->UNC); 2262 tcp_ses->hostname = extract_hostname(volume_info->UNC);
2168 if (IS_ERR(tcp_ses->hostname)) { 2263 if (IS_ERR(tcp_ses->hostname)) {
@@ -3569,6 +3664,7 @@ cifs_setup_volume_info(struct smb_vol *volume_info, char *mount_data,
3569 if (cifs_parse_mount_options(mount_data, devname, volume_info)) 3664 if (cifs_parse_mount_options(mount_data, devname, volume_info))
3570 return -EINVAL; 3665 return -EINVAL;
3571 3666
3667
3572 if (volume_info->nullauth) { 3668 if (volume_info->nullauth) {
3573 cFYI(1, "Anonymous login"); 3669 cFYI(1, "Anonymous login");
3574 kfree(volume_info->username); 3670 kfree(volume_info->username);
@@ -4010,11 +4106,11 @@ int cifs_negotiate_protocol(unsigned int xid, struct cifs_ses *ses)
4010 if (server->maxBuf != 0) 4106 if (server->maxBuf != 0)
4011 return 0; 4107 return 0;
4012 4108
4013 cifs_set_credits(server, 1); 4109 set_credits(server, 1);
4014 rc = CIFSSMBNegotiate(xid, ses); 4110 rc = CIFSSMBNegotiate(xid, ses);
4015 if (rc == -EAGAIN) { 4111 if (rc == -EAGAIN) {
4016 /* retry only once on 1st time connection */ 4112 /* retry only once on 1st time connection */
4017 cifs_set_credits(server, 1); 4113 set_credits(server, 1);
4018 rc = CIFSSMBNegotiate(xid, ses); 4114 rc = CIFSSMBNegotiate(xid, ses);
4019 if (rc == -EAGAIN) 4115 if (rc == -EAGAIN)
4020 rc = -EHOSTDOWN; 4116 rc = -EHOSTDOWN;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 81725e9286e9..253170dfa716 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -264,6 +264,7 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
264 pCifsFile->tlink = cifs_get_tlink(tlink); 264 pCifsFile->tlink = cifs_get_tlink(tlink);
265 mutex_init(&pCifsFile->fh_mutex); 265 mutex_init(&pCifsFile->fh_mutex);
266 INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break); 266 INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break);
267 INIT_LIST_HEAD(&pCifsFile->llist);
267 268
268 spin_lock(&cifs_file_list_lock); 269 spin_lock(&cifs_file_list_lock);
269 list_add(&pCifsFile->tlist, &(tlink_tcon(tlink)->openFileList)); 270 list_add(&pCifsFile->tlist, &(tlink_tcon(tlink)->openFileList));
@@ -334,9 +335,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
334 * is closed anyway. 335 * is closed anyway.
335 */ 336 */
336 mutex_lock(&cifsi->lock_mutex); 337 mutex_lock(&cifsi->lock_mutex);
337 list_for_each_entry_safe(li, tmp, &cifsi->llist, llist) { 338 list_for_each_entry_safe(li, tmp, &cifs_file->llist, llist) {
338 if (li->netfid != cifs_file->netfid)
339 continue;
340 list_del(&li->llist); 339 list_del(&li->llist);
341 cifs_del_lock_waiters(li); 340 cifs_del_lock_waiters(li);
342 kfree(li); 341 kfree(li);
@@ -645,7 +644,7 @@ int cifs_closedir(struct inode *inode, struct file *file)
645} 644}
646 645
647static struct cifsLockInfo * 646static struct cifsLockInfo *
648cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 netfid) 647cifs_lock_init(__u64 offset, __u64 length, __u8 type)
649{ 648{
650 struct cifsLockInfo *lock = 649 struct cifsLockInfo *lock =
651 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL); 650 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
@@ -654,7 +653,6 @@ cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 netfid)
654 lock->offset = offset; 653 lock->offset = offset;
655 lock->length = length; 654 lock->length = length;
656 lock->type = type; 655 lock->type = type;
657 lock->netfid = netfid;
658 lock->pid = current->tgid; 656 lock->pid = current->tgid;
659 INIT_LIST_HEAD(&lock->blist); 657 INIT_LIST_HEAD(&lock->blist);
660 init_waitqueue_head(&lock->block_q); 658 init_waitqueue_head(&lock->block_q);
@@ -672,19 +670,20 @@ cifs_del_lock_waiters(struct cifsLockInfo *lock)
672} 670}
673 671
674static bool 672static bool
675__cifs_find_lock_conflict(struct cifsInodeInfo *cinode, __u64 offset, 673cifs_find_fid_lock_conflict(struct cifsFileInfo *cfile, __u64 offset,
676 __u64 length, __u8 type, __u16 netfid, 674 __u64 length, __u8 type, struct cifsFileInfo *cur,
677 struct cifsLockInfo **conf_lock) 675 struct cifsLockInfo **conf_lock)
678{ 676{
679 struct cifsLockInfo *li, *tmp; 677 struct cifsLockInfo *li;
678 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
680 679
681 list_for_each_entry_safe(li, tmp, &cinode->llist, llist) { 680 list_for_each_entry(li, &cfile->llist, llist) {
682 if (offset + length <= li->offset || 681 if (offset + length <= li->offset ||
683 offset >= li->offset + li->length) 682 offset >= li->offset + li->length)
684 continue; 683 continue;
685 else if ((type & LOCKING_ANDX_SHARED_LOCK) && 684 else if ((type & server->vals->shared_lock_type) &&
686 ((netfid == li->netfid && current->tgid == li->pid) || 685 ((server->ops->compare_fids(cur, cfile) &&
687 type == li->type)) 686 current->tgid == li->pid) || type == li->type))
688 continue; 687 continue;
689 else { 688 else {
690 *conf_lock = li; 689 *conf_lock = li;
@@ -695,11 +694,23 @@ __cifs_find_lock_conflict(struct cifsInodeInfo *cinode, __u64 offset,
695} 694}
696 695
697static bool 696static bool
698cifs_find_lock_conflict(struct cifsInodeInfo *cinode, struct cifsLockInfo *lock, 697cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
699 struct cifsLockInfo **conf_lock) 698 __u8 type, struct cifsLockInfo **conf_lock)
700{ 699{
701 return __cifs_find_lock_conflict(cinode, lock->offset, lock->length, 700 bool rc = false;
702 lock->type, lock->netfid, conf_lock); 701 struct cifsFileInfo *fid, *tmp;
702 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
703
704 spin_lock(&cifs_file_list_lock);
705 list_for_each_entry_safe(fid, tmp, &cinode->openFileList, flist) {
706 rc = cifs_find_fid_lock_conflict(fid, offset, length, type,
707 cfile, conf_lock);
708 if (rc)
709 break;
710 }
711 spin_unlock(&cifs_file_list_lock);
712
713 return rc;
703} 714}
704 715
705/* 716/*
@@ -710,22 +721,24 @@ cifs_find_lock_conflict(struct cifsInodeInfo *cinode, struct cifsLockInfo *lock,
710 * the server or 1 otherwise. 721 * the server or 1 otherwise.
711 */ 722 */
712static int 723static int
713cifs_lock_test(struct cifsInodeInfo *cinode, __u64 offset, __u64 length, 724cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
714 __u8 type, __u16 netfid, struct file_lock *flock) 725 __u8 type, struct file_lock *flock)
715{ 726{
716 int rc = 0; 727 int rc = 0;
717 struct cifsLockInfo *conf_lock; 728 struct cifsLockInfo *conf_lock;
729 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
730 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
718 bool exist; 731 bool exist;
719 732
720 mutex_lock(&cinode->lock_mutex); 733 mutex_lock(&cinode->lock_mutex);
721 734
722 exist = __cifs_find_lock_conflict(cinode, offset, length, type, netfid, 735 exist = cifs_find_lock_conflict(cfile, offset, length, type,
723 &conf_lock); 736 &conf_lock);
724 if (exist) { 737 if (exist) {
725 flock->fl_start = conf_lock->offset; 738 flock->fl_start = conf_lock->offset;
726 flock->fl_end = conf_lock->offset + conf_lock->length - 1; 739 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
727 flock->fl_pid = conf_lock->pid; 740 flock->fl_pid = conf_lock->pid;
728 if (conf_lock->type & LOCKING_ANDX_SHARED_LOCK) 741 if (conf_lock->type & server->vals->shared_lock_type)
729 flock->fl_type = F_RDLCK; 742 flock->fl_type = F_RDLCK;
730 else 743 else
731 flock->fl_type = F_WRLCK; 744 flock->fl_type = F_WRLCK;
@@ -739,10 +752,11 @@ cifs_lock_test(struct cifsInodeInfo *cinode, __u64 offset, __u64 length,
739} 752}
740 753
741static void 754static void
742cifs_lock_add(struct cifsInodeInfo *cinode, struct cifsLockInfo *lock) 755cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
743{ 756{
757 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
744 mutex_lock(&cinode->lock_mutex); 758 mutex_lock(&cinode->lock_mutex);
745 list_add_tail(&lock->llist, &cinode->llist); 759 list_add_tail(&lock->llist, &cfile->llist);
746 mutex_unlock(&cinode->lock_mutex); 760 mutex_unlock(&cinode->lock_mutex);
747} 761}
748 762
@@ -753,10 +767,11 @@ cifs_lock_add(struct cifsInodeInfo *cinode, struct cifsLockInfo *lock)
753 * 3) -EACCESS, if there is a lock that prevents us and wait is false. 767 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
754 */ 768 */
755static int 769static int
756cifs_lock_add_if(struct cifsInodeInfo *cinode, struct cifsLockInfo *lock, 770cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
757 bool wait) 771 bool wait)
758{ 772{
759 struct cifsLockInfo *conf_lock; 773 struct cifsLockInfo *conf_lock;
774 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
760 bool exist; 775 bool exist;
761 int rc = 0; 776 int rc = 0;
762 777
@@ -764,9 +779,10 @@ try_again:
764 exist = false; 779 exist = false;
765 mutex_lock(&cinode->lock_mutex); 780 mutex_lock(&cinode->lock_mutex);
766 781
767 exist = cifs_find_lock_conflict(cinode, lock, &conf_lock); 782 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
783 lock->type, &conf_lock);
768 if (!exist && cinode->can_cache_brlcks) { 784 if (!exist && cinode->can_cache_brlcks) {
769 list_add_tail(&lock->llist, &cinode->llist); 785 list_add_tail(&lock->llist, &cfile->llist);
770 mutex_unlock(&cinode->lock_mutex); 786 mutex_unlock(&cinode->lock_mutex);
771 return rc; 787 return rc;
772 } 788 }
@@ -888,7 +904,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
888 for (i = 0; i < 2; i++) { 904 for (i = 0; i < 2; i++) {
889 cur = buf; 905 cur = buf;
890 num = 0; 906 num = 0;
891 list_for_each_entry_safe(li, tmp, &cinode->llist, llist) { 907 list_for_each_entry_safe(li, tmp, &cfile->llist, llist) {
892 if (li->type != types[i]) 908 if (li->type != types[i])
893 continue; 909 continue;
894 cur->Pid = cpu_to_le16(li->pid); 910 cur->Pid = cpu_to_le16(li->pid);
@@ -898,7 +914,8 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
898 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32)); 914 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
899 if (++num == max_num) { 915 if (++num == max_num) {
900 stored_rc = cifs_lockv(xid, tcon, cfile->netfid, 916 stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
901 li->type, 0, num, buf); 917 (__u8)li->type, 0, num,
918 buf);
902 if (stored_rc) 919 if (stored_rc)
903 rc = stored_rc; 920 rc = stored_rc;
904 cur = buf; 921 cur = buf;
@@ -909,7 +926,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
909 926
910 if (num) { 927 if (num) {
911 stored_rc = cifs_lockv(xid, tcon, cfile->netfid, 928 stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
912 types[i], 0, num, buf); 929 (__u8)types[i], 0, num, buf);
913 if (stored_rc) 930 if (stored_rc)
914 rc = stored_rc; 931 rc = stored_rc;
915 } 932 }
@@ -1053,8 +1070,8 @@ cifs_push_locks(struct cifsFileInfo *cfile)
1053} 1070}
1054 1071
1055static void 1072static void
1056cifs_read_flock(struct file_lock *flock, __u8 *type, int *lock, int *unlock, 1073cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1057 bool *wait_flag) 1074 bool *wait_flag, struct TCP_Server_Info *server)
1058{ 1075{
1059 if (flock->fl_flags & FL_POSIX) 1076 if (flock->fl_flags & FL_POSIX)
1060 cFYI(1, "Posix"); 1077 cFYI(1, "Posix");
@@ -1073,38 +1090,50 @@ cifs_read_flock(struct file_lock *flock, __u8 *type, int *lock, int *unlock,
1073 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE))) 1090 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
1074 cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags); 1091 cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags);
1075 1092
1076 *type = LOCKING_ANDX_LARGE_FILES; 1093 *type = server->vals->large_lock_type;
1077 if (flock->fl_type == F_WRLCK) { 1094 if (flock->fl_type == F_WRLCK) {
1078 cFYI(1, "F_WRLCK "); 1095 cFYI(1, "F_WRLCK ");
1096 *type |= server->vals->exclusive_lock_type;
1079 *lock = 1; 1097 *lock = 1;
1080 } else if (flock->fl_type == F_UNLCK) { 1098 } else if (flock->fl_type == F_UNLCK) {
1081 cFYI(1, "F_UNLCK"); 1099 cFYI(1, "F_UNLCK");
1100 *type |= server->vals->unlock_lock_type;
1082 *unlock = 1; 1101 *unlock = 1;
1083 /* Check if unlock includes more than one lock range */ 1102 /* Check if unlock includes more than one lock range */
1084 } else if (flock->fl_type == F_RDLCK) { 1103 } else if (flock->fl_type == F_RDLCK) {
1085 cFYI(1, "F_RDLCK"); 1104 cFYI(1, "F_RDLCK");
1086 *type |= LOCKING_ANDX_SHARED_LOCK; 1105 *type |= server->vals->shared_lock_type;
1087 *lock = 1; 1106 *lock = 1;
1088 } else if (flock->fl_type == F_EXLCK) { 1107 } else if (flock->fl_type == F_EXLCK) {
1089 cFYI(1, "F_EXLCK"); 1108 cFYI(1, "F_EXLCK");
1109 *type |= server->vals->exclusive_lock_type;
1090 *lock = 1; 1110 *lock = 1;
1091 } else if (flock->fl_type == F_SHLCK) { 1111 } else if (flock->fl_type == F_SHLCK) {
1092 cFYI(1, "F_SHLCK"); 1112 cFYI(1, "F_SHLCK");
1093 *type |= LOCKING_ANDX_SHARED_LOCK; 1113 *type |= server->vals->shared_lock_type;
1094 *lock = 1; 1114 *lock = 1;
1095 } else 1115 } else
1096 cFYI(1, "Unknown type of lock"); 1116 cFYI(1, "Unknown type of lock");
1097} 1117}
1098 1118
1099static int 1119static int
1100cifs_getlk(struct file *file, struct file_lock *flock, __u8 type, 1120cifs_mandatory_lock(int xid, struct cifsFileInfo *cfile, __u64 offset,
1121 __u64 length, __u32 type, int lock, int unlock, bool wait)
1122{
1123 return CIFSSMBLock(xid, tlink_tcon(cfile->tlink), cfile->netfid,
1124 current->tgid, length, offset, unlock, lock,
1125 (__u8)type, wait, 0);
1126}
1127
1128static int
1129cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1101 bool wait_flag, bool posix_lck, int xid) 1130 bool wait_flag, bool posix_lck, int xid)
1102{ 1131{
1103 int rc = 0; 1132 int rc = 0;
1104 __u64 length = 1 + flock->fl_end - flock->fl_start; 1133 __u64 length = 1 + flock->fl_end - flock->fl_start;
1105 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 1134 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1106 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1135 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1107 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); 1136 struct TCP_Server_Info *server = tcon->ses->server;
1108 __u16 netfid = cfile->netfid; 1137 __u16 netfid = cfile->netfid;
1109 1138
1110 if (posix_lck) { 1139 if (posix_lck) {
@@ -1114,7 +1143,7 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u8 type,
1114 if (!rc) 1143 if (!rc)
1115 return rc; 1144 return rc;
1116 1145
1117 if (type & LOCKING_ANDX_SHARED_LOCK) 1146 if (type & server->vals->shared_lock_type)
1118 posix_lock_type = CIFS_RDLCK; 1147 posix_lock_type = CIFS_RDLCK;
1119 else 1148 else
1120 posix_lock_type = CIFS_WRLCK; 1149 posix_lock_type = CIFS_WRLCK;
@@ -1124,38 +1153,35 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u8 type,
1124 return rc; 1153 return rc;
1125 } 1154 }
1126 1155
1127 rc = cifs_lock_test(cinode, flock->fl_start, length, type, netfid, 1156 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1128 flock);
1129 if (!rc) 1157 if (!rc)
1130 return rc; 1158 return rc;
1131 1159
1132 /* BB we could chain these into one lock request BB */ 1160 /* BB we could chain these into one lock request BB */
1133 rc = CIFSSMBLock(xid, tcon, netfid, current->tgid, length, 1161 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length, type,
1134 flock->fl_start, 0, 1, type, 0, 0); 1162 1, 0, false);
1135 if (rc == 0) { 1163 if (rc == 0) {
1136 rc = CIFSSMBLock(xid, tcon, netfid, current->tgid, 1164 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1137 length, flock->fl_start, 1, 0, 1165 type, 0, 1, false);
1138 type, 0, 0);
1139 flock->fl_type = F_UNLCK; 1166 flock->fl_type = F_UNLCK;
1140 if (rc != 0) 1167 if (rc != 0)
1141 cERROR(1, "Error unlocking previously locked " 1168 cERROR(1, "Error unlocking previously locked "
1142 "range %d during test of lock", rc); 1169 "range %d during test of lock", rc);
1143 return 0; 1170 return 0;
1144 } 1171 }
1145 1172
1146 if (type & LOCKING_ANDX_SHARED_LOCK) { 1173 if (type & server->vals->shared_lock_type) {
1147 flock->fl_type = F_WRLCK; 1174 flock->fl_type = F_WRLCK;
1148 return 0; 1175 return 0;
1149 } 1176 }
1150 1177
1151 rc = CIFSSMBLock(xid, tcon, netfid, current->tgid, length, 1178 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1152 flock->fl_start, 0, 1, 1179 type | server->vals->shared_lock_type, 1, 0,
1153 type | LOCKING_ANDX_SHARED_LOCK, 0, 0); 1180 false);
1154 if (rc == 0) { 1181 if (rc == 0) {
1155 rc = CIFSSMBLock(xid, tcon, netfid, current->tgid, 1182 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1156 length, flock->fl_start, 1, 0, 1183 type | server->vals->shared_lock_type,
1157 type | LOCKING_ANDX_SHARED_LOCK, 1184 0, 1, false);
1158 0, 0);
1159 flock->fl_type = F_RDLCK; 1185 flock->fl_type = F_RDLCK;
1160 if (rc != 0) 1186 if (rc != 0)
1161 cERROR(1, "Error unlocking previously locked " 1187 cERROR(1, "Error unlocking previously locked "
@@ -1212,15 +1238,13 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid)
1212 for (i = 0; i < 2; i++) { 1238 for (i = 0; i < 2; i++) {
1213 cur = buf; 1239 cur = buf;
1214 num = 0; 1240 num = 0;
1215 list_for_each_entry_safe(li, tmp, &cinode->llist, llist) { 1241 list_for_each_entry_safe(li, tmp, &cfile->llist, llist) {
1216 if (flock->fl_start > li->offset || 1242 if (flock->fl_start > li->offset ||
1217 (flock->fl_start + length) < 1243 (flock->fl_start + length) <
1218 (li->offset + li->length)) 1244 (li->offset + li->length))
1219 continue; 1245 continue;
1220 if (current->tgid != li->pid) 1246 if (current->tgid != li->pid)
1221 continue; 1247 continue;
1222 if (cfile->netfid != li->netfid)
1223 continue;
1224 if (types[i] != li->type) 1248 if (types[i] != li->type)
1225 continue; 1249 continue;
1226 if (!cinode->can_cache_brlcks) { 1250 if (!cinode->can_cache_brlcks) {
@@ -1233,7 +1257,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid)
1233 cpu_to_le32((u32)(li->offset>>32)); 1257 cpu_to_le32((u32)(li->offset>>32));
1234 /* 1258 /*
1235 * We need to save a lock here to let us add 1259 * We need to save a lock here to let us add
1236 * it again to the inode list if the unlock 1260 * it again to the file's list if the unlock
1237 * range request fails on the server. 1261 * range request fails on the server.
1238 */ 1262 */
1239 list_move(&li->llist, &tmp_llist); 1263 list_move(&li->llist, &tmp_llist);
@@ -1247,10 +1271,10 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid)
1247 * We failed on the unlock range 1271 * We failed on the unlock range
1248 * request - add all locks from 1272 * request - add all locks from
1249 * the tmp list to the head of 1273 * the tmp list to the head of
1250 * the inode list. 1274 * the file's list.
1251 */ 1275 */
1252 cifs_move_llist(&tmp_llist, 1276 cifs_move_llist(&tmp_llist,
1253 &cinode->llist); 1277 &cfile->llist);
1254 rc = stored_rc; 1278 rc = stored_rc;
1255 } else 1279 } else
1256 /* 1280 /*
@@ -1265,7 +1289,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid)
1265 } else { 1289 } else {
1266 /* 1290 /*
1267 * We can cache brlock requests - simply remove 1291 * We can cache brlock requests - simply remove
1268 * a lock from the inode list. 1292 * a lock from the file's list.
1269 */ 1293 */
1270 list_del(&li->llist); 1294 list_del(&li->llist);
1271 cifs_del_lock_waiters(li); 1295 cifs_del_lock_waiters(li);
@@ -1276,7 +1300,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid)
1276 stored_rc = cifs_lockv(xid, tcon, cfile->netfid, 1300 stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
1277 types[i], num, 0, buf); 1301 types[i], num, 0, buf);
1278 if (stored_rc) { 1302 if (stored_rc) {
1279 cifs_move_llist(&tmp_llist, &cinode->llist); 1303 cifs_move_llist(&tmp_llist, &cfile->llist);
1280 rc = stored_rc; 1304 rc = stored_rc;
1281 } else 1305 } else
1282 cifs_free_llist(&tmp_llist); 1306 cifs_free_llist(&tmp_llist);
@@ -1289,14 +1313,14 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid)
1289} 1313}
1290 1314
1291static int 1315static int
1292cifs_setlk(struct file *file, struct file_lock *flock, __u8 type, 1316cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1293 bool wait_flag, bool posix_lck, int lock, int unlock, int xid) 1317 bool wait_flag, bool posix_lck, int lock, int unlock, int xid)
1294{ 1318{
1295 int rc = 0; 1319 int rc = 0;
1296 __u64 length = 1 + flock->fl_end - flock->fl_start; 1320 __u64 length = 1 + flock->fl_end - flock->fl_start;
1297 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 1321 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1298 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1322 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1299 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode); 1323 struct TCP_Server_Info *server = tcon->ses->server;
1300 __u16 netfid = cfile->netfid; 1324 __u16 netfid = cfile->netfid;
1301 1325
1302 if (posix_lck) { 1326 if (posix_lck) {
@@ -1306,7 +1330,7 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u8 type,
1306 if (!rc || rc < 0) 1330 if (!rc || rc < 0)
1307 return rc; 1331 return rc;
1308 1332
1309 if (type & LOCKING_ANDX_SHARED_LOCK) 1333 if (type & server->vals->shared_lock_type)
1310 posix_lock_type = CIFS_RDLCK; 1334 posix_lock_type = CIFS_RDLCK;
1311 else 1335 else
1312 posix_lock_type = CIFS_WRLCK; 1336 posix_lock_type = CIFS_WRLCK;
@@ -1323,24 +1347,24 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u8 type,
1323 if (lock) { 1347 if (lock) {
1324 struct cifsLockInfo *lock; 1348 struct cifsLockInfo *lock;
1325 1349
1326 lock = cifs_lock_init(flock->fl_start, length, type, netfid); 1350 lock = cifs_lock_init(flock->fl_start, length, type);
1327 if (!lock) 1351 if (!lock)
1328 return -ENOMEM; 1352 return -ENOMEM;
1329 1353
1330 rc = cifs_lock_add_if(cinode, lock, wait_flag); 1354 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1331 if (rc < 0) 1355 if (rc < 0)
1332 kfree(lock); 1356 kfree(lock);
1333 if (rc <= 0) 1357 if (rc <= 0)
1334 goto out; 1358 goto out;
1335 1359
1336 rc = CIFSSMBLock(xid, tcon, netfid, current->tgid, length, 1360 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1337 flock->fl_start, 0, 1, type, wait_flag, 0); 1361 type, 1, 0, wait_flag);
1338 if (rc) { 1362 if (rc) {
1339 kfree(lock); 1363 kfree(lock);
1340 goto out; 1364 goto out;
1341 } 1365 }
1342 1366
1343 cifs_lock_add(cinode, lock); 1367 cifs_lock_add(cfile, lock);
1344 } else if (unlock) 1368 } else if (unlock)
1345 rc = cifs_unlock_range(cfile, flock, xid); 1369 rc = cifs_unlock_range(cfile, flock, xid);
1346 1370
@@ -1361,7 +1385,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1361 struct cifsInodeInfo *cinode; 1385 struct cifsInodeInfo *cinode;
1362 struct cifsFileInfo *cfile; 1386 struct cifsFileInfo *cfile;
1363 __u16 netfid; 1387 __u16 netfid;
1364 __u8 type; 1388 __u32 type;
1365 1389
1366 rc = -EACCES; 1390 rc = -EACCES;
1367 xid = GetXid(); 1391 xid = GetXid();
@@ -1370,11 +1394,13 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1370 "end: %lld", cmd, flock->fl_flags, flock->fl_type, 1394 "end: %lld", cmd, flock->fl_flags, flock->fl_type,
1371 flock->fl_start, flock->fl_end); 1395 flock->fl_start, flock->fl_end);
1372 1396
1373 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag);
1374
1375 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1376 cfile = (struct cifsFileInfo *)file->private_data; 1397 cfile = (struct cifsFileInfo *)file->private_data;
1377 tcon = tlink_tcon(cfile->tlink); 1398 tcon = tlink_tcon(cfile->tlink);
1399
1400 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1401 tcon->ses->server);
1402
1403 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1378 netfid = cfile->netfid; 1404 netfid = cfile->netfid;
1379 cinode = CIFS_I(file->f_path.dentry->d_inode); 1405 cinode = CIFS_I(file->f_path.dentry->d_inode);
1380 1406
@@ -1539,10 +1565,11 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1539struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode, 1565struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1540 bool fsuid_only) 1566 bool fsuid_only)
1541{ 1567{
1542 struct cifsFileInfo *open_file; 1568 struct cifsFileInfo *open_file, *inv_file = NULL;
1543 struct cifs_sb_info *cifs_sb; 1569 struct cifs_sb_info *cifs_sb;
1544 bool any_available = false; 1570 bool any_available = false;
1545 int rc; 1571 int rc;
1572 unsigned int refind = 0;
1546 1573
1547 /* Having a null inode here (because mapping->host was set to zero by 1574 /* Having a null inode here (because mapping->host was set to zero by
1548 the VFS or MM) should not happen but we had reports of on oops (due to 1575 the VFS or MM) should not happen but we had reports of on oops (due to
@@ -1562,40 +1589,25 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1562 1589
1563 spin_lock(&cifs_file_list_lock); 1590 spin_lock(&cifs_file_list_lock);
1564refind_writable: 1591refind_writable:
1592 if (refind > MAX_REOPEN_ATT) {
1593 spin_unlock(&cifs_file_list_lock);
1594 return NULL;
1595 }
1565 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 1596 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1566 if (!any_available && open_file->pid != current->tgid) 1597 if (!any_available && open_file->pid != current->tgid)
1567 continue; 1598 continue;
1568 if (fsuid_only && open_file->uid != current_fsuid()) 1599 if (fsuid_only && open_file->uid != current_fsuid())
1569 continue; 1600 continue;
1570 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { 1601 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1571 cifsFileInfo_get(open_file);
1572
1573 if (!open_file->invalidHandle) { 1602 if (!open_file->invalidHandle) {
1574 /* found a good writable file */ 1603 /* found a good writable file */
1604 cifsFileInfo_get(open_file);
1575 spin_unlock(&cifs_file_list_lock); 1605 spin_unlock(&cifs_file_list_lock);
1576 return open_file; 1606 return open_file;
1607 } else {
1608 if (!inv_file)
1609 inv_file = open_file;
1577 } 1610 }
1578
1579 spin_unlock(&cifs_file_list_lock);
1580
1581 /* Had to unlock since following call can block */
1582 rc = cifs_reopen_file(open_file, false);
1583 if (!rc)
1584 return open_file;
1585
1586 /* if it fails, try another handle if possible */
1587 cFYI(1, "wp failed on reopen file");
1588 cifsFileInfo_put(open_file);
1589
1590 spin_lock(&cifs_file_list_lock);
1591
1592 /* else we simply continue to the next entry. Thus
1593 we do not loop on reopen errors. If we
1594 can not reopen the file, for example if we
1595 reconnected to a server with another client
1596 racing to delete or lock the file we would not
1597 make progress if we restarted before the beginning
1598 of the loop here. */
1599 } 1611 }
1600 } 1612 }
1601 /* couldn't find useable FH with same pid, try any available */ 1613 /* couldn't find useable FH with same pid, try any available */
@@ -1603,7 +1615,30 @@ refind_writable:
1603 any_available = true; 1615 any_available = true;
1604 goto refind_writable; 1616 goto refind_writable;
1605 } 1617 }
1618
1619 if (inv_file) {
1620 any_available = false;
1621 cifsFileInfo_get(inv_file);
1622 }
1623
1606 spin_unlock(&cifs_file_list_lock); 1624 spin_unlock(&cifs_file_list_lock);
1625
1626 if (inv_file) {
1627 rc = cifs_reopen_file(inv_file, false);
1628 if (!rc)
1629 return inv_file;
1630 else {
1631 spin_lock(&cifs_file_list_lock);
1632 list_move_tail(&inv_file->flist,
1633 &cifs_inode->openFileList);
1634 spin_unlock(&cifs_file_list_lock);
1635 cifsFileInfo_put(inv_file);
1636 spin_lock(&cifs_file_list_lock);
1637 ++refind;
1638 goto refind_writable;
1639 }
1640 }
1641
1607 return NULL; 1642 return NULL;
1608} 1643}
1609 1644
@@ -2339,24 +2374,224 @@ ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2339 return cifs_user_writev(iocb, iov, nr_segs, pos); 2374 return cifs_user_writev(iocb, iov, nr_segs, pos);
2340} 2375}
2341 2376
2377static struct cifs_readdata *
2378cifs_readdata_alloc(unsigned int nr_vecs, work_func_t complete)
2379{
2380 struct cifs_readdata *rdata;
2381
2382 rdata = kzalloc(sizeof(*rdata) +
2383 sizeof(struct kvec) * nr_vecs, GFP_KERNEL);
2384 if (rdata != NULL) {
2385 kref_init(&rdata->refcount);
2386 INIT_LIST_HEAD(&rdata->list);
2387 init_completion(&rdata->done);
2388 INIT_WORK(&rdata->work, complete);
2389 INIT_LIST_HEAD(&rdata->pages);
2390 }
2391 return rdata;
2392}
2393
2394void
2395cifs_readdata_release(struct kref *refcount)
2396{
2397 struct cifs_readdata *rdata = container_of(refcount,
2398 struct cifs_readdata, refcount);
2399
2400 if (rdata->cfile)
2401 cifsFileInfo_put(rdata->cfile);
2402
2403 kfree(rdata);
2404}
2405
2406static int
2407cifs_read_allocate_pages(struct list_head *list, unsigned int npages)
2408{
2409 int rc = 0;
2410 struct page *page, *tpage;
2411 unsigned int i;
2412
2413 for (i = 0; i < npages; i++) {
2414 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2415 if (!page) {
2416 rc = -ENOMEM;
2417 break;
2418 }
2419 list_add(&page->lru, list);
2420 }
2421
2422 if (rc) {
2423 list_for_each_entry_safe(page, tpage, list, lru) {
2424 list_del(&page->lru);
2425 put_page(page);
2426 }
2427 }
2428 return rc;
2429}
2430
2431static void
2432cifs_uncached_readdata_release(struct kref *refcount)
2433{
2434 struct page *page, *tpage;
2435 struct cifs_readdata *rdata = container_of(refcount,
2436 struct cifs_readdata, refcount);
2437
2438 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2439 list_del(&page->lru);
2440 put_page(page);
2441 }
2442 cifs_readdata_release(refcount);
2443}
2444
2445static int
2446cifs_retry_async_readv(struct cifs_readdata *rdata)
2447{
2448 int rc;
2449
2450 do {
2451 if (rdata->cfile->invalidHandle) {
2452 rc = cifs_reopen_file(rdata->cfile, true);
2453 if (rc != 0)
2454 continue;
2455 }
2456 rc = cifs_async_readv(rdata);
2457 } while (rc == -EAGAIN);
2458
2459 return rc;
2460}
2461
2462/**
2463 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2464 * @rdata: the readdata response with list of pages holding data
2465 * @iov: vector in which we should copy the data
2466 * @nr_segs: number of segments in vector
2467 * @offset: offset into file of the first iovec
2468 * @copied: used to return the amount of data copied to the iov
2469 *
2470 * This function copies data from a list of pages in a readdata response into
2471 * an array of iovecs. It will first calculate where the data should go
2472 * based on the info in the readdata and then copy the data into that spot.
2473 */
2474static ssize_t
2475cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2476 unsigned long nr_segs, loff_t offset, ssize_t *copied)
2477{
2478 int rc = 0;
2479 struct iov_iter ii;
2480 size_t pos = rdata->offset - offset;
2481 struct page *page, *tpage;
2482 ssize_t remaining = rdata->bytes;
2483 unsigned char *pdata;
2484
2485 /* set up iov_iter and advance to the correct offset */
2486 iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2487 iov_iter_advance(&ii, pos);
2488
2489 *copied = 0;
2490 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2491 ssize_t copy;
2492
2493 /* copy a whole page or whatever's left */
2494 copy = min_t(ssize_t, remaining, PAGE_SIZE);
2495
2496 /* ...but limit it to whatever space is left in the iov */
2497 copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2498
2499 /* go while there's data to be copied and no errors */
2500 if (copy && !rc) {
2501 pdata = kmap(page);
2502 rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2503 (int)copy);
2504 kunmap(page);
2505 if (!rc) {
2506 *copied += copy;
2507 remaining -= copy;
2508 iov_iter_advance(&ii, copy);
2509 }
2510 }
2511
2512 list_del(&page->lru);
2513 put_page(page);
2514 }
2515
2516 return rc;
2517}
2518
2519static void
2520cifs_uncached_readv_complete(struct work_struct *work)
2521{
2522 struct cifs_readdata *rdata = container_of(work,
2523 struct cifs_readdata, work);
2524
2525 /* if the result is non-zero then the pages weren't kmapped */
2526 if (rdata->result == 0) {
2527 struct page *page;
2528
2529 list_for_each_entry(page, &rdata->pages, lru)
2530 kunmap(page);
2531 }
2532
2533 complete(&rdata->done);
2534 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2535}
2536
2537static int
2538cifs_uncached_read_marshal_iov(struct cifs_readdata *rdata,
2539 unsigned int remaining)
2540{
2541 int len = 0;
2542 struct page *page, *tpage;
2543
2544 rdata->nr_iov = 1;
2545 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2546 if (remaining >= PAGE_SIZE) {
2547 /* enough data to fill the page */
2548 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2549 rdata->iov[rdata->nr_iov].iov_len = PAGE_SIZE;
2550 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2551 rdata->nr_iov, page->index,
2552 rdata->iov[rdata->nr_iov].iov_base,
2553 rdata->iov[rdata->nr_iov].iov_len);
2554 ++rdata->nr_iov;
2555 len += PAGE_SIZE;
2556 remaining -= PAGE_SIZE;
2557 } else if (remaining > 0) {
2558 /* enough for partial page, fill and zero the rest */
2559 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2560 rdata->iov[rdata->nr_iov].iov_len = remaining;
2561 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2562 rdata->nr_iov, page->index,
2563 rdata->iov[rdata->nr_iov].iov_base,
2564 rdata->iov[rdata->nr_iov].iov_len);
2565 memset(rdata->iov[rdata->nr_iov].iov_base + remaining,
2566 '\0', PAGE_SIZE - remaining);
2567 ++rdata->nr_iov;
2568 len += remaining;
2569 remaining = 0;
2570 } else {
2571 /* no need to hold page hostage */
2572 list_del(&page->lru);
2573 put_page(page);
2574 }
2575 }
2576
2577 return len;
2578}
2579
2342static ssize_t 2580static ssize_t
2343cifs_iovec_read(struct file *file, const struct iovec *iov, 2581cifs_iovec_read(struct file *file, const struct iovec *iov,
2344 unsigned long nr_segs, loff_t *poffset) 2582 unsigned long nr_segs, loff_t *poffset)
2345{ 2583{
2346 int rc; 2584 ssize_t rc;
2347 int xid;
2348 ssize_t total_read;
2349 unsigned int bytes_read = 0;
2350 size_t len, cur_len; 2585 size_t len, cur_len;
2351 int iov_offset = 0; 2586 ssize_t total_read = 0;
2587 loff_t offset = *poffset;
2588 unsigned int npages;
2352 struct cifs_sb_info *cifs_sb; 2589 struct cifs_sb_info *cifs_sb;
2353 struct cifs_tcon *pTcon; 2590 struct cifs_tcon *tcon;
2354 struct cifsFileInfo *open_file; 2591 struct cifsFileInfo *open_file;
2355 struct smb_com_read_rsp *pSMBr; 2592 struct cifs_readdata *rdata, *tmp;
2356 struct cifs_io_parms io_parms; 2593 struct list_head rdata_list;
2357 char *read_data; 2594 pid_t pid;
2358 unsigned int rsize;
2359 __u32 pid;
2360 2595
2361 if (!nr_segs) 2596 if (!nr_segs)
2362 return 0; 2597 return 0;
@@ -2365,14 +2600,10 @@ cifs_iovec_read(struct file *file, const struct iovec *iov,
2365 if (!len) 2600 if (!len)
2366 return 0; 2601 return 0;
2367 2602
2368 xid = GetXid(); 2603 INIT_LIST_HEAD(&rdata_list);
2369 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 2604 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2370
2371 /* FIXME: set up handlers for larger reads and/or convert to async */
2372 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
2373
2374 open_file = file->private_data; 2605 open_file = file->private_data;
2375 pTcon = tlink_tcon(open_file->tlink); 2606 tcon = tlink_tcon(open_file->tlink);
2376 2607
2377 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 2608 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2378 pid = open_file->pid; 2609 pid = open_file->pid;
@@ -2382,56 +2613,78 @@ cifs_iovec_read(struct file *file, const struct iovec *iov,
2382 if ((file->f_flags & O_ACCMODE) == O_WRONLY) 2613 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2383 cFYI(1, "attempting read on write only file instance"); 2614 cFYI(1, "attempting read on write only file instance");
2384 2615
2385 for (total_read = 0; total_read < len; total_read += bytes_read) { 2616 do {
2386 cur_len = min_t(const size_t, len - total_read, rsize); 2617 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2387 rc = -EAGAIN; 2618 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2388 read_data = NULL;
2389 2619
2390 while (rc == -EAGAIN) { 2620 /* allocate a readdata struct */
2391 int buf_type = CIFS_NO_BUFFER; 2621 rdata = cifs_readdata_alloc(npages,
2392 if (open_file->invalidHandle) { 2622 cifs_uncached_readv_complete);
2393 rc = cifs_reopen_file(open_file, true); 2623 if (!rdata) {
2394 if (rc != 0) 2624 rc = -ENOMEM;
2395 break; 2625 goto error;
2396 }
2397 io_parms.netfid = open_file->netfid;
2398 io_parms.pid = pid;
2399 io_parms.tcon = pTcon;
2400 io_parms.offset = *poffset;
2401 io_parms.length = cur_len;
2402 rc = CIFSSMBRead(xid, &io_parms, &bytes_read,
2403 &read_data, &buf_type);
2404 pSMBr = (struct smb_com_read_rsp *)read_data;
2405 if (read_data) {
2406 char *data_offset = read_data + 4 +
2407 le16_to_cpu(pSMBr->DataOffset);
2408 if (memcpy_toiovecend(iov, data_offset,
2409 iov_offset, bytes_read))
2410 rc = -EFAULT;
2411 if (buf_type == CIFS_SMALL_BUFFER)
2412 cifs_small_buf_release(read_data);
2413 else if (buf_type == CIFS_LARGE_BUFFER)
2414 cifs_buf_release(read_data);
2415 read_data = NULL;
2416 iov_offset += bytes_read;
2417 }
2418 } 2626 }
2419 2627
2420 if (rc || (bytes_read == 0)) { 2628 rc = cifs_read_allocate_pages(&rdata->pages, npages);
2421 if (total_read) { 2629 if (rc)
2422 break; 2630 goto error;
2423 } else { 2631
2424 FreeXid(xid); 2632 rdata->cfile = cifsFileInfo_get(open_file);
2425 return rc; 2633 rdata->offset = offset;
2634 rdata->bytes = cur_len;
2635 rdata->pid = pid;
2636 rdata->marshal_iov = cifs_uncached_read_marshal_iov;
2637
2638 rc = cifs_retry_async_readv(rdata);
2639error:
2640 if (rc) {
2641 kref_put(&rdata->refcount,
2642 cifs_uncached_readdata_release);
2643 break;
2644 }
2645
2646 list_add_tail(&rdata->list, &rdata_list);
2647 offset += cur_len;
2648 len -= cur_len;
2649 } while (len > 0);
2650
2651 /* if at least one read request send succeeded, then reset rc */
2652 if (!list_empty(&rdata_list))
2653 rc = 0;
2654
2655 /* the loop below should proceed in the order of increasing offsets */
2656restart_loop:
2657 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2658 if (!rc) {
2659 ssize_t copied;
2660
2661 /* FIXME: freezable sleep too? */
2662 rc = wait_for_completion_killable(&rdata->done);
2663 if (rc)
2664 rc = -EINTR;
2665 else if (rdata->result)
2666 rc = rdata->result;
2667 else {
2668 rc = cifs_readdata_to_iov(rdata, iov,
2669 nr_segs, *poffset,
2670 &copied);
2671 total_read += copied;
2672 }
2673
2674 /* resend call if it's a retryable error */
2675 if (rc == -EAGAIN) {
2676 rc = cifs_retry_async_readv(rdata);
2677 goto restart_loop;
2426 } 2678 }
2427 } else {
2428 cifs_stats_bytes_read(pTcon, bytes_read);
2429 *poffset += bytes_read;
2430 } 2679 }
2680 list_del_init(&rdata->list);
2681 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2431 } 2682 }
2432 2683
2433 FreeXid(xid); 2684 cifs_stats_bytes_read(tcon, total_read);
2434 return total_read; 2685 *poffset += total_read;
2686
2687 return total_read ? total_read : rc;
2435} 2688}
2436 2689
2437ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov, 2690ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
@@ -2606,6 +2859,100 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
2606 return rc; 2859 return rc;
2607} 2860}
2608 2861
2862static void
2863cifs_readv_complete(struct work_struct *work)
2864{
2865 struct cifs_readdata *rdata = container_of(work,
2866 struct cifs_readdata, work);
2867 struct page *page, *tpage;
2868
2869 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2870 list_del(&page->lru);
2871 lru_cache_add_file(page);
2872
2873 if (rdata->result == 0) {
2874 kunmap(page);
2875 flush_dcache_page(page);
2876 SetPageUptodate(page);
2877 }
2878
2879 unlock_page(page);
2880
2881 if (rdata->result == 0)
2882 cifs_readpage_to_fscache(rdata->mapping->host, page);
2883
2884 page_cache_release(page);
2885 }
2886 kref_put(&rdata->refcount, cifs_readdata_release);
2887}
2888
2889static int
2890cifs_readpages_marshal_iov(struct cifs_readdata *rdata, unsigned int remaining)
2891{
2892 int len = 0;
2893 struct page *page, *tpage;
2894 u64 eof;
2895 pgoff_t eof_index;
2896
2897 /* determine the eof that the server (probably) has */
2898 eof = CIFS_I(rdata->mapping->host)->server_eof;
2899 eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
2900 cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
2901
2902 rdata->nr_iov = 1;
2903 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2904 if (remaining >= PAGE_CACHE_SIZE) {
2905 /* enough data to fill the page */
2906 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2907 rdata->iov[rdata->nr_iov].iov_len = PAGE_CACHE_SIZE;
2908 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2909 rdata->nr_iov, page->index,
2910 rdata->iov[rdata->nr_iov].iov_base,
2911 rdata->iov[rdata->nr_iov].iov_len);
2912 ++rdata->nr_iov;
2913 len += PAGE_CACHE_SIZE;
2914 remaining -= PAGE_CACHE_SIZE;
2915 } else if (remaining > 0) {
2916 /* enough for partial page, fill and zero the rest */
2917 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2918 rdata->iov[rdata->nr_iov].iov_len = remaining;
2919 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2920 rdata->nr_iov, page->index,
2921 rdata->iov[rdata->nr_iov].iov_base,
2922 rdata->iov[rdata->nr_iov].iov_len);
2923 memset(rdata->iov[rdata->nr_iov].iov_base + remaining,
2924 '\0', PAGE_CACHE_SIZE - remaining);
2925 ++rdata->nr_iov;
2926 len += remaining;
2927 remaining = 0;
2928 } else if (page->index > eof_index) {
2929 /*
2930 * The VFS will not try to do readahead past the
2931 * i_size, but it's possible that we have outstanding
2932 * writes with gaps in the middle and the i_size hasn't
2933 * caught up yet. Populate those with zeroed out pages
2934 * to prevent the VFS from repeatedly attempting to
2935 * fill them until the writes are flushed.
2936 */
2937 zero_user(page, 0, PAGE_CACHE_SIZE);
2938 list_del(&page->lru);
2939 lru_cache_add_file(page);
2940 flush_dcache_page(page);
2941 SetPageUptodate(page);
2942 unlock_page(page);
2943 page_cache_release(page);
2944 } else {
2945 /* no need to hold page hostage */
2946 list_del(&page->lru);
2947 lru_cache_add_file(page);
2948 unlock_page(page);
2949 page_cache_release(page);
2950 }
2951 }
2952
2953 return len;
2954}
2955
2609static int cifs_readpages(struct file *file, struct address_space *mapping, 2956static int cifs_readpages(struct file *file, struct address_space *mapping,
2610 struct list_head *page_list, unsigned num_pages) 2957 struct list_head *page_list, unsigned num_pages)
2611{ 2958{
@@ -2708,7 +3055,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
2708 nr_pages++; 3055 nr_pages++;
2709 } 3056 }
2710 3057
2711 rdata = cifs_readdata_alloc(nr_pages); 3058 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
2712 if (!rdata) { 3059 if (!rdata) {
2713 /* best to give up if we're out of mem */ 3060 /* best to give up if we're out of mem */
2714 list_for_each_entry_safe(page, tpage, &tmplist, lru) { 3061 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
@@ -2722,24 +3069,16 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
2722 } 3069 }
2723 3070
2724 spin_lock(&cifs_file_list_lock); 3071 spin_lock(&cifs_file_list_lock);
2725 cifsFileInfo_get(open_file);
2726 spin_unlock(&cifs_file_list_lock); 3072 spin_unlock(&cifs_file_list_lock);
2727 rdata->cfile = open_file; 3073 rdata->cfile = cifsFileInfo_get(open_file);
2728 rdata->mapping = mapping; 3074 rdata->mapping = mapping;
2729 rdata->offset = offset; 3075 rdata->offset = offset;
2730 rdata->bytes = bytes; 3076 rdata->bytes = bytes;
2731 rdata->pid = pid; 3077 rdata->pid = pid;
3078 rdata->marshal_iov = cifs_readpages_marshal_iov;
2732 list_splice_init(&tmplist, &rdata->pages); 3079 list_splice_init(&tmplist, &rdata->pages);
2733 3080
2734 do { 3081 rc = cifs_retry_async_readv(rdata);
2735 if (open_file->invalidHandle) {
2736 rc = cifs_reopen_file(open_file, true);
2737 if (rc != 0)
2738 continue;
2739 }
2740 rc = cifs_async_readv(rdata);
2741 } while (rc == -EAGAIN);
2742
2743 if (rc != 0) { 3082 if (rc != 0) {
2744 list_for_each_entry_safe(page, tpage, &rdata->pages, 3083 list_for_each_entry_safe(page, tpage, &rdata->pages,
2745 lru) { 3084 lru) {
@@ -2748,9 +3087,11 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
2748 unlock_page(page); 3087 unlock_page(page);
2749 page_cache_release(page); 3088 page_cache_release(page);
2750 } 3089 }
2751 cifs_readdata_free(rdata); 3090 kref_put(&rdata->refcount, cifs_readdata_release);
2752 break; 3091 break;
2753 } 3092 }
3093
3094 kref_put(&rdata->refcount, cifs_readdata_release);
2754 } 3095 }
2755 3096
2756 return rc; 3097 return rc;
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 4221b5e48a42..6d2667f0c98c 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -51,7 +51,15 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
51 cifs_sb = CIFS_SB(inode->i_sb); 51 cifs_sb = CIFS_SB(inode->i_sb);
52 52
53 switch (command) { 53 switch (command) {
54 static bool warned = false;
54 case CIFS_IOC_CHECKUMOUNT: 55 case CIFS_IOC_CHECKUMOUNT:
56 if (!warned) {
57 warned = true;
58 cERROR(1, "the CIFS_IOC_CHECKMOUNT ioctl will "
59 "be deprecated in 3.7. Please "
60 "migrate away from the use of "
61 "umount.cifs");
62 }
55 cFYI(1, "User unmount attempted"); 63 cFYI(1, "User unmount attempted");
56 if (cifs_sb->mnt_uid == current_uid()) 64 if (cifs_sb->mnt_uid == current_uid())
57 rc = 0; 65 rc = 0;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index c29d1aa2c54f..e2552d2b2e42 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -306,8 +306,6 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
306 const struct cifs_tcon *treeCon, int word_count 306 const struct cifs_tcon *treeCon, int word_count
307 /* length of fixed section (word count) in two byte units */) 307 /* length of fixed section (word count) in two byte units */)
308{ 308{
309 struct list_head *temp_item;
310 struct cifs_ses *ses;
311 char *temp = (char *) buffer; 309 char *temp = (char *) buffer;
312 310
313 memset(temp, 0, 256); /* bigger than MAX_CIFS_HDR_SIZE */ 311 memset(temp, 0, 256); /* bigger than MAX_CIFS_HDR_SIZE */
@@ -337,51 +335,6 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
337 /* Uid is not converted */ 335 /* Uid is not converted */
338 buffer->Uid = treeCon->ses->Suid; 336 buffer->Uid = treeCon->ses->Suid;
339 buffer->Mid = GetNextMid(treeCon->ses->server); 337 buffer->Mid = GetNextMid(treeCon->ses->server);
340 if (multiuser_mount != 0) {
341 /* For the multiuser case, there are few obvious technically */
342 /* possible mechanisms to match the local linux user (uid) */
343 /* to a valid remote smb user (smb_uid): */
344 /* 1) Query Winbind (or other local pam/nss daemon */
345 /* for userid/password/logon_domain or credential */
346 /* 2) Query Winbind for uid to sid to username mapping */
347 /* and see if we have a matching password for existing*/
348 /* session for that user perhas getting password by */
349 /* adding a new pam_cifs module that stores passwords */
350 /* so that the cifs vfs can get at that for all logged*/
351 /* on users */
352 /* 3) (Which is the mechanism we have chosen) */
353 /* Search through sessions to the same server for a */
354 /* a match on the uid that was passed in on mount */
355 /* with the current processes uid (or euid?) and use */
356 /* that smb uid. If no existing smb session for */
357 /* that uid found, use the default smb session ie */
358 /* the smb session for the volume mounted which is */
359 /* the same as would be used if the multiuser mount */
360 /* flag were disabled. */
361
362 /* BB Add support for establishing new tCon and SMB Session */
363 /* with userid/password pairs found on the smb session */
364 /* for other target tcp/ip addresses BB */
365 if (current_fsuid() != treeCon->ses->linux_uid) {
366 cFYI(1, "Multiuser mode and UID "
367 "did not match tcon uid");
368 spin_lock(&cifs_tcp_ses_lock);
369 list_for_each(temp_item, &treeCon->ses->server->smb_ses_list) {
370 ses = list_entry(temp_item, struct cifs_ses, smb_ses_list);
371 if (ses->linux_uid == current_fsuid()) {
372 if (ses->server == treeCon->ses->server) {
373 cFYI(1, "found matching uid substitute right smb_uid");
374 buffer->Uid = ses->Suid;
375 break;
376 } else {
377 /* BB eventually call cifs_setup_session here */
378 cFYI(1, "local UID found but no smb sess with this server exists");
379 }
380 }
381 }
382 spin_unlock(&cifs_tcp_ses_lock);
383 }
384 }
385 } 338 }
386 if (treeCon->Flags & SMB_SHARE_IS_IN_DFS) 339 if (treeCon->Flags & SMB_SHARE_IS_IN_DFS)
387 buffer->Flags2 |= SMBFLG2_DFS; 340 buffer->Flags2 |= SMBFLG2_DFS;
@@ -700,22 +653,3 @@ backup_cred(struct cifs_sb_info *cifs_sb)
700 653
701 return false; 654 return false;
702} 655}
703
704void
705cifs_add_credits(struct TCP_Server_Info *server, const unsigned int add)
706{
707 spin_lock(&server->req_lock);
708 server->credits += add;
709 server->in_flight--;
710 spin_unlock(&server->req_lock);
711 wake_up(&server->request_q);
712}
713
714void
715cifs_set_credits(struct TCP_Server_Info *server, const int val)
716{
717 spin_lock(&server->req_lock);
718 server->credits = val;
719 server->oplocks = val > 1 ? enable_oplocks : false;
720 spin_unlock(&server->req_lock);
721}
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index e2bbc683e018..0a8224d1c4c5 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -219,6 +219,7 @@ int get_symlink_reparse_path(char *full_path, struct cifs_sb_info *cifs_sb,
219 219
220static int initiate_cifs_search(const int xid, struct file *file) 220static int initiate_cifs_search(const int xid, struct file *file)
221{ 221{
222 __u16 search_flags;
222 int rc = 0; 223 int rc = 0;
223 char *full_path = NULL; 224 char *full_path = NULL;
224 struct cifsFileInfo *cifsFile; 225 struct cifsFileInfo *cifsFile;
@@ -270,8 +271,12 @@ ffirst_retry:
270 cifsFile->srch_inf.info_level = SMB_FIND_FILE_DIRECTORY_INFO; 271 cifsFile->srch_inf.info_level = SMB_FIND_FILE_DIRECTORY_INFO;
271 } 272 }
272 273
274 search_flags = CIFS_SEARCH_CLOSE_AT_END | CIFS_SEARCH_RETURN_RESUME;
275 if (backup_cred(cifs_sb))
276 search_flags |= CIFS_SEARCH_BACKUP_SEARCH;
277
273 rc = CIFSFindFirst(xid, pTcon, full_path, cifs_sb->local_nls, 278 rc = CIFSFindFirst(xid, pTcon, full_path, cifs_sb->local_nls,
274 &cifsFile->netfid, &cifsFile->srch_inf, 279 &cifsFile->netfid, search_flags, &cifsFile->srch_inf,
275 cifs_sb->mnt_cifs_flags & 280 cifs_sb->mnt_cifs_flags &
276 CIFS_MOUNT_MAP_SPECIAL_CHR, CIFS_DIR_SEP(cifs_sb)); 281 CIFS_MOUNT_MAP_SPECIAL_CHR, CIFS_DIR_SEP(cifs_sb));
277 if (rc == 0) 282 if (rc == 0)
@@ -502,11 +507,13 @@ static int cifs_save_resume_key(const char *current_entry,
502static int find_cifs_entry(const int xid, struct cifs_tcon *pTcon, 507static int find_cifs_entry(const int xid, struct cifs_tcon *pTcon,
503 struct file *file, char **ppCurrentEntry, int *num_to_ret) 508 struct file *file, char **ppCurrentEntry, int *num_to_ret)
504{ 509{
510 __u16 search_flags;
505 int rc = 0; 511 int rc = 0;
506 int pos_in_buf = 0; 512 int pos_in_buf = 0;
507 loff_t first_entry_in_buffer; 513 loff_t first_entry_in_buffer;
508 loff_t index_to_find = file->f_pos; 514 loff_t index_to_find = file->f_pos;
509 struct cifsFileInfo *cifsFile = file->private_data; 515 struct cifsFileInfo *cifsFile = file->private_data;
516 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
510 /* check if index in the buffer */ 517 /* check if index in the buffer */
511 518
512 if ((cifsFile == NULL) || (ppCurrentEntry == NULL) || 519 if ((cifsFile == NULL) || (ppCurrentEntry == NULL) ||
@@ -560,10 +567,14 @@ static int find_cifs_entry(const int xid, struct cifs_tcon *pTcon,
560 cifsFile); 567 cifsFile);
561 } 568 }
562 569
570 search_flags = CIFS_SEARCH_CLOSE_AT_END | CIFS_SEARCH_RETURN_RESUME;
571 if (backup_cred(cifs_sb))
572 search_flags |= CIFS_SEARCH_BACKUP_SEARCH;
573
563 while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) && 574 while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) &&
564 (rc == 0) && !cifsFile->srch_inf.endOfSearch) { 575 (rc == 0) && !cifsFile->srch_inf.endOfSearch) {
565 cFYI(1, "calling findnext2"); 576 cFYI(1, "calling findnext2");
566 rc = CIFSFindNext(xid, pTcon, cifsFile->netfid, 577 rc = CIFSFindNext(xid, pTcon, cifsFile->netfid, search_flags,
567 &cifsFile->srch_inf); 578 &cifsFile->srch_inf);
568 /* FindFirst/Next set last_entry to NULL on malformed reply */ 579 /* FindFirst/Next set last_entry to NULL on malformed reply */
569 if (cifsFile->srch_inf.last_entry) 580 if (cifsFile->srch_inf.last_entry)
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
new file mode 100644
index 000000000000..d9d615fbed3f
--- /dev/null
+++ b/fs/cifs/smb1ops.c
@@ -0,0 +1,154 @@
1/*
2 * SMB1 (CIFS) version specific operations
3 *
4 * Copyright (c) 2012, Jeff Layton <jlayton@redhat.com>
5 *
6 * This library is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License v2 as published
8 * by the Free Software Foundation.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public License
16 * along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include "cifsglob.h"
21#include "cifsproto.h"
22#include "cifs_debug.h"
23#include "cifspdu.h"
24
25/*
26 * An NT cancel request header looks just like the original request except:
27 *
28 * The Command is SMB_COM_NT_CANCEL
29 * The WordCount is zeroed out
30 * The ByteCount is zeroed out
31 *
32 * This function mangles an existing request buffer into a
33 * SMB_COM_NT_CANCEL request and then sends it.
34 */
35static int
36send_nt_cancel(struct TCP_Server_Info *server, void *buf,
37 struct mid_q_entry *mid)
38{
39 int rc = 0;
40 struct smb_hdr *in_buf = (struct smb_hdr *)buf;
41
42 /* -4 for RFC1001 length and +2 for BCC field */
43 in_buf->smb_buf_length = cpu_to_be32(sizeof(struct smb_hdr) - 4 + 2);
44 in_buf->Command = SMB_COM_NT_CANCEL;
45 in_buf->WordCount = 0;
46 put_bcc(0, in_buf);
47
48 mutex_lock(&server->srv_mutex);
49 rc = cifs_sign_smb(in_buf, server, &mid->sequence_number);
50 if (rc) {
51 mutex_unlock(&server->srv_mutex);
52 return rc;
53 }
54 rc = smb_send(server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
55 mutex_unlock(&server->srv_mutex);
56
57 cFYI(1, "issued NT_CANCEL for mid %u, rc = %d",
58 in_buf->Mid, rc);
59
60 return rc;
61}
62
63static bool
64cifs_compare_fids(struct cifsFileInfo *ob1, struct cifsFileInfo *ob2)
65{
66 return ob1->netfid == ob2->netfid;
67}
68
69static unsigned int
70cifs_read_data_offset(char *buf)
71{
72 READ_RSP *rsp = (READ_RSP *)buf;
73 return le16_to_cpu(rsp->DataOffset);
74}
75
76static unsigned int
77cifs_read_data_length(char *buf)
78{
79 READ_RSP *rsp = (READ_RSP *)buf;
80 return (le16_to_cpu(rsp->DataLengthHigh) << 16) +
81 le16_to_cpu(rsp->DataLength);
82}
83
84static struct mid_q_entry *
85cifs_find_mid(struct TCP_Server_Info *server, char *buffer)
86{
87 struct smb_hdr *buf = (struct smb_hdr *)buffer;
88 struct mid_q_entry *mid;
89
90 spin_lock(&GlobalMid_Lock);
91 list_for_each_entry(mid, &server->pending_mid_q, qhead) {
92 if (mid->mid == buf->Mid &&
93 mid->mid_state == MID_REQUEST_SUBMITTED &&
94 le16_to_cpu(mid->command) == buf->Command) {
95 spin_unlock(&GlobalMid_Lock);
96 return mid;
97 }
98 }
99 spin_unlock(&GlobalMid_Lock);
100 return NULL;
101}
102
103static void
104cifs_add_credits(struct TCP_Server_Info *server, const unsigned int add)
105{
106 spin_lock(&server->req_lock);
107 server->credits += add;
108 server->in_flight--;
109 spin_unlock(&server->req_lock);
110 wake_up(&server->request_q);
111}
112
113static void
114cifs_set_credits(struct TCP_Server_Info *server, const int val)
115{
116 spin_lock(&server->req_lock);
117 server->credits = val;
118 server->oplocks = val > 1 ? enable_oplocks : false;
119 spin_unlock(&server->req_lock);
120}
121
122static int *
123cifs_get_credits_field(struct TCP_Server_Info *server)
124{
125 return &server->credits;
126}
127
128struct smb_version_operations smb1_operations = {
129 .send_cancel = send_nt_cancel,
130 .compare_fids = cifs_compare_fids,
131 .setup_request = cifs_setup_request,
132 .check_receive = cifs_check_receive,
133 .add_credits = cifs_add_credits,
134 .set_credits = cifs_set_credits,
135 .get_credits_field = cifs_get_credits_field,
136 .read_data_offset = cifs_read_data_offset,
137 .read_data_length = cifs_read_data_length,
138 .map_error = map_smb_to_linux_error,
139 .find_mid = cifs_find_mid,
140 .check_message = checkSMB,
141 .dump_detail = cifs_dump_detail,
142 .is_oplock_break = is_valid_oplock_break,
143};
144
145struct smb_version_values smb1_values = {
146 .version_string = SMB1_VERSION_STRING,
147 .large_lock_type = LOCKING_ANDX_LARGE_FILES,
148 .exclusive_lock_type = 0,
149 .shared_lock_type = LOCKING_ANDX_SHARED_LOCK,
150 .unlock_lock_type = 0,
151 .header_size = sizeof(struct smb_hdr),
152 .max_header_size = MAX_CIFS_HDR_SIZE,
153 .read_rsp_size = sizeof(READ_RSP),
154};
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
new file mode 100644
index 000000000000..f065e89756a1
--- /dev/null
+++ b/fs/cifs/smb2ops.c
@@ -0,0 +1,27 @@
1/*
2 * SMB2 version specific operations
3 *
4 * Copyright (c) 2012, Jeff Layton <jlayton@redhat.com>
5 *
6 * This library is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License v2 as published
8 * by the Free Software Foundation.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public License
16 * along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include "cifsglob.h"
21
22struct smb_version_operations smb21_operations = {
23};
24
25struct smb_version_values smb21_values = {
26 .version_string = SMB21_VERSION_STRING,
27};
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 0961336513d5..1b36ffe6a47b 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -304,7 +304,8 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int optype,
304static int 304static int
305wait_for_free_request(struct TCP_Server_Info *server, const int optype) 305wait_for_free_request(struct TCP_Server_Info *server, const int optype)
306{ 306{
307 return wait_for_free_credits(server, optype, get_credits_field(server)); 307 return wait_for_free_credits(server, optype,
308 server->ops->get_credits_field(server));
308} 309}
309 310
310static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf, 311static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf,
@@ -396,7 +397,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
396 rc = cifs_setup_async_request(server, iov, nvec, &mid); 397 rc = cifs_setup_async_request(server, iov, nvec, &mid);
397 if (rc) { 398 if (rc) {
398 mutex_unlock(&server->srv_mutex); 399 mutex_unlock(&server->srv_mutex);
399 cifs_add_credits(server, 1); 400 add_credits(server, 1);
400 wake_up(&server->request_q); 401 wake_up(&server->request_q);
401 return rc; 402 return rc;
402 } 403 }
@@ -418,7 +419,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
418 return rc; 419 return rc;
419out_err: 420out_err:
420 delete_mid(mid); 421 delete_mid(mid);
421 cifs_add_credits(server, 1); 422 add_credits(server, 1);
422 wake_up(&server->request_q); 423 wake_up(&server->request_q);
423 return rc; 424 return rc;
424} 425}
@@ -483,41 +484,11 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
483 return rc; 484 return rc;
484} 485}
485 486
486/* 487static inline int
487 * An NT cancel request header looks just like the original request except: 488send_cancel(struct TCP_Server_Info *server, void *buf, struct mid_q_entry *mid)
488 *
489 * The Command is SMB_COM_NT_CANCEL
490 * The WordCount is zeroed out
491 * The ByteCount is zeroed out
492 *
493 * This function mangles an existing request buffer into a
494 * SMB_COM_NT_CANCEL request and then sends it.
495 */
496static int
497send_nt_cancel(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
498 struct mid_q_entry *mid)
499{ 489{
500 int rc = 0; 490 return server->ops->send_cancel ?
501 491 server->ops->send_cancel(server, buf, mid) : 0;
502 /* -4 for RFC1001 length and +2 for BCC field */
503 in_buf->smb_buf_length = cpu_to_be32(sizeof(struct smb_hdr) - 4 + 2);
504 in_buf->Command = SMB_COM_NT_CANCEL;
505 in_buf->WordCount = 0;
506 put_bcc(0, in_buf);
507
508 mutex_lock(&server->srv_mutex);
509 rc = cifs_sign_smb(in_buf, server, &mid->sequence_number);
510 if (rc) {
511 mutex_unlock(&server->srv_mutex);
512 return rc;
513 }
514 rc = smb_send(server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
515 mutex_unlock(&server->srv_mutex);
516
517 cFYI(1, "issued NT_CANCEL for mid %u, rc = %d",
518 in_buf->Mid, rc);
519
520 return rc;
521} 492}
522 493
523int 494int
@@ -544,7 +515,7 @@ cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server,
544 return map_smb_to_linux_error(mid->resp_buf, log_error); 515 return map_smb_to_linux_error(mid->resp_buf, log_error);
545} 516}
546 517
547static int 518int
548cifs_setup_request(struct cifs_ses *ses, struct kvec *iov, 519cifs_setup_request(struct cifs_ses *ses, struct kvec *iov,
549 unsigned int nvec, struct mid_q_entry **ret_mid) 520 unsigned int nvec, struct mid_q_entry **ret_mid)
550{ 521{
@@ -607,12 +578,12 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
607 578
608 mutex_lock(&ses->server->srv_mutex); 579 mutex_lock(&ses->server->srv_mutex);
609 580
610 rc = cifs_setup_request(ses, iov, n_vec, &midQ); 581 rc = ses->server->ops->setup_request(ses, iov, n_vec, &midQ);
611 if (rc) { 582 if (rc) {
612 mutex_unlock(&ses->server->srv_mutex); 583 mutex_unlock(&ses->server->srv_mutex);
613 cifs_small_buf_release(buf); 584 cifs_small_buf_release(buf);
614 /* Update # of requests on wire to server */ 585 /* Update # of requests on wire to server */
615 cifs_add_credits(ses->server, 1); 586 add_credits(ses->server, 1);
616 return rc; 587 return rc;
617 } 588 }
618 589
@@ -636,13 +607,13 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
636 607
637 rc = wait_for_response(ses->server, midQ); 608 rc = wait_for_response(ses->server, midQ);
638 if (rc != 0) { 609 if (rc != 0) {
639 send_nt_cancel(ses->server, (struct smb_hdr *)buf, midQ); 610 send_cancel(ses->server, buf, midQ);
640 spin_lock(&GlobalMid_Lock); 611 spin_lock(&GlobalMid_Lock);
641 if (midQ->mid_state == MID_REQUEST_SUBMITTED) { 612 if (midQ->mid_state == MID_REQUEST_SUBMITTED) {
642 midQ->callback = DeleteMidQEntry; 613 midQ->callback = DeleteMidQEntry;
643 spin_unlock(&GlobalMid_Lock); 614 spin_unlock(&GlobalMid_Lock);
644 cifs_small_buf_release(buf); 615 cifs_small_buf_release(buf);
645 cifs_add_credits(ses->server, 1); 616 add_credits(ses->server, 1);
646 return rc; 617 return rc;
647 } 618 }
648 spin_unlock(&GlobalMid_Lock); 619 spin_unlock(&GlobalMid_Lock);
@@ -652,7 +623,7 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
652 623
653 rc = cifs_sync_mid_result(midQ, ses->server); 624 rc = cifs_sync_mid_result(midQ, ses->server);
654 if (rc != 0) { 625 if (rc != 0) {
655 cifs_add_credits(ses->server, 1); 626 add_credits(ses->server, 1);
656 return rc; 627 return rc;
657 } 628 }
658 629
@@ -670,14 +641,15 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
670 else 641 else
671 *pRespBufType = CIFS_SMALL_BUFFER; 642 *pRespBufType = CIFS_SMALL_BUFFER;
672 643
673 rc = cifs_check_receive(midQ, ses->server, flags & CIFS_LOG_ERROR); 644 rc = ses->server->ops->check_receive(midQ, ses->server,
645 flags & CIFS_LOG_ERROR);
674 646
675 /* mark it so buf will not be freed by delete_mid */ 647 /* mark it so buf will not be freed by delete_mid */
676 if ((flags & CIFS_NO_RESP) == 0) 648 if ((flags & CIFS_NO_RESP) == 0)
677 midQ->resp_buf = NULL; 649 midQ->resp_buf = NULL;
678out: 650out:
679 delete_mid(midQ); 651 delete_mid(midQ);
680 cifs_add_credits(ses->server, 1); 652 add_credits(ses->server, 1);
681 653
682 return rc; 654 return rc;
683} 655}
@@ -727,7 +699,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
727 if (rc) { 699 if (rc) {
728 mutex_unlock(&ses->server->srv_mutex); 700 mutex_unlock(&ses->server->srv_mutex);
729 /* Update # of requests on wire to server */ 701 /* Update # of requests on wire to server */
730 cifs_add_credits(ses->server, 1); 702 add_credits(ses->server, 1);
731 return rc; 703 return rc;
732 } 704 }
733 705
@@ -753,13 +725,13 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
753 725
754 rc = wait_for_response(ses->server, midQ); 726 rc = wait_for_response(ses->server, midQ);
755 if (rc != 0) { 727 if (rc != 0) {
756 send_nt_cancel(ses->server, in_buf, midQ); 728 send_cancel(ses->server, in_buf, midQ);
757 spin_lock(&GlobalMid_Lock); 729 spin_lock(&GlobalMid_Lock);
758 if (midQ->mid_state == MID_REQUEST_SUBMITTED) { 730 if (midQ->mid_state == MID_REQUEST_SUBMITTED) {
759 /* no longer considered to be "in-flight" */ 731 /* no longer considered to be "in-flight" */
760 midQ->callback = DeleteMidQEntry; 732 midQ->callback = DeleteMidQEntry;
761 spin_unlock(&GlobalMid_Lock); 733 spin_unlock(&GlobalMid_Lock);
762 cifs_add_credits(ses->server, 1); 734 add_credits(ses->server, 1);
763 return rc; 735 return rc;
764 } 736 }
765 spin_unlock(&GlobalMid_Lock); 737 spin_unlock(&GlobalMid_Lock);
@@ -767,7 +739,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
767 739
768 rc = cifs_sync_mid_result(midQ, ses->server); 740 rc = cifs_sync_mid_result(midQ, ses->server);
769 if (rc != 0) { 741 if (rc != 0) {
770 cifs_add_credits(ses->server, 1); 742 add_credits(ses->server, 1);
771 return rc; 743 return rc;
772 } 744 }
773 745
@@ -783,7 +755,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
783 rc = cifs_check_receive(midQ, ses->server, 0); 755 rc = cifs_check_receive(midQ, ses->server, 0);
784out: 756out:
785 delete_mid(midQ); 757 delete_mid(midQ);
786 cifs_add_credits(ses->server, 1); 758 add_credits(ses->server, 1);
787 759
788 return rc; 760 return rc;
789} 761}
@@ -898,7 +870,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
898 if (in_buf->Command == SMB_COM_TRANSACTION2) { 870 if (in_buf->Command == SMB_COM_TRANSACTION2) {
899 /* POSIX lock. We send a NT_CANCEL SMB to cause the 871 /* POSIX lock. We send a NT_CANCEL SMB to cause the
900 blocking lock to return. */ 872 blocking lock to return. */
901 rc = send_nt_cancel(ses->server, in_buf, midQ); 873 rc = send_cancel(ses->server, in_buf, midQ);
902 if (rc) { 874 if (rc) {
903 delete_mid(midQ); 875 delete_mid(midQ);
904 return rc; 876 return rc;
@@ -919,7 +891,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
919 891
920 rc = wait_for_response(ses->server, midQ); 892 rc = wait_for_response(ses->server, midQ);
921 if (rc) { 893 if (rc) {
922 send_nt_cancel(ses->server, in_buf, midQ); 894 send_cancel(ses->server, in_buf, midQ);
923 spin_lock(&GlobalMid_Lock); 895 spin_lock(&GlobalMid_Lock);
924 if (midQ->mid_state == MID_REQUEST_SUBMITTED) { 896 if (midQ->mid_state == MID_REQUEST_SUBMITTED) {
925 /* no longer considered to be "in-flight" */ 897 /* no longer considered to be "in-flight" */
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 2870597b5c9d..f1813120d753 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -244,7 +244,7 @@ static void coda_put_super(struct super_block *sb)
244static void coda_evict_inode(struct inode *inode) 244static void coda_evict_inode(struct inode *inode)
245{ 245{
246 truncate_inode_pages(&inode->i_data, 0); 246 truncate_inode_pages(&inode->i_data, 0);
247 end_writeback(inode); 247 clear_inode(inode);
248 coda_cache_clear_inode(inode); 248 coda_cache_clear_inode(inode);
249} 249}
250 250
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 5dfafdd1dbd3..2340f6978d6e 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -20,6 +20,7 @@
20#include <linux/namei.h> 20#include <linux/namei.h>
21#include <linux/debugfs.h> 21#include <linux/debugfs.h>
22#include <linux/io.h> 22#include <linux/io.h>
23#include <linux/slab.h>
23 24
24static ssize_t default_read_file(struct file *file, char __user *buf, 25static ssize_t default_read_file(struct file *file, char __user *buf,
25 size_t count, loff_t *ppos) 26 size_t count, loff_t *ppos)
@@ -520,6 +521,133 @@ struct dentry *debugfs_create_blob(const char *name, umode_t mode,
520} 521}
521EXPORT_SYMBOL_GPL(debugfs_create_blob); 522EXPORT_SYMBOL_GPL(debugfs_create_blob);
522 523
524struct array_data {
525 void *array;
526 u32 elements;
527};
528
529static int u32_array_open(struct inode *inode, struct file *file)
530{
531 file->private_data = NULL;
532 return nonseekable_open(inode, file);
533}
534
535static size_t format_array(char *buf, size_t bufsize, const char *fmt,
536 u32 *array, u32 array_size)
537{
538 size_t ret = 0;
539 u32 i;
540
541 for (i = 0; i < array_size; i++) {
542 size_t len;
543
544 len = snprintf(buf, bufsize, fmt, array[i]);
545 len++; /* ' ' or '\n' */
546 ret += len;
547
548 if (buf) {
549 buf += len;
550 bufsize -= len;
551 buf[-1] = (i == array_size-1) ? '\n' : ' ';
552 }
553 }
554
555 ret++; /* \0 */
556 if (buf)
557 *buf = '\0';
558
559 return ret;
560}
561
562static char *format_array_alloc(const char *fmt, u32 *array,
563 u32 array_size)
564{
565 size_t len = format_array(NULL, 0, fmt, array, array_size);
566 char *ret;
567
568 ret = kmalloc(len, GFP_KERNEL);
569 if (ret == NULL)
570 return NULL;
571
572 format_array(ret, len, fmt, array, array_size);
573 return ret;
574}
575
576static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len,
577 loff_t *ppos)
578{
579 struct inode *inode = file->f_path.dentry->d_inode;
580 struct array_data *data = inode->i_private;
581 size_t size;
582
583 if (*ppos == 0) {
584 if (file->private_data) {
585 kfree(file->private_data);
586 file->private_data = NULL;
587 }
588
589 file->private_data = format_array_alloc("%u", data->array,
590 data->elements);
591 }
592
593 size = 0;
594 if (file->private_data)
595 size = strlen(file->private_data);
596
597 return simple_read_from_buffer(buf, len, ppos,
598 file->private_data, size);
599}
600
601static int u32_array_release(struct inode *inode, struct file *file)
602{
603 kfree(file->private_data);
604
605 return 0;
606}
607
608static const struct file_operations u32_array_fops = {
609 .owner = THIS_MODULE,
610 .open = u32_array_open,
611 .release = u32_array_release,
612 .read = u32_array_read,
613 .llseek = no_llseek,
614};
615
616/**
617 * debugfs_create_u32_array - create a debugfs file that is used to read u32
618 * array.
619 * @name: a pointer to a string containing the name of the file to create.
620 * @mode: the permission that the file should have.
621 * @parent: a pointer to the parent dentry for this file. This should be a
622 * directory dentry if set. If this parameter is %NULL, then the
623 * file will be created in the root of the debugfs filesystem.
624 * @array: u32 array that provides data.
625 * @elements: total number of elements in the array.
626 *
627 * This function creates a file in debugfs with the given name that exports
628 * @array as data. If the @mode variable is so set it can be read from.
629 * Writing is not supported. Seek within the file is also not supported.
630 * Once array is created its size can not be changed.
631 *
632 * The function returns a pointer to dentry on success. If debugfs is not
633 * enabled in the kernel, the value -%ENODEV will be returned.
634 */
635struct dentry *debugfs_create_u32_array(const char *name, umode_t mode,
636 struct dentry *parent,
637 u32 *array, u32 elements)
638{
639 struct array_data *data = kmalloc(sizeof(*data), GFP_KERNEL);
640
641 if (data == NULL)
642 return NULL;
643
644 data->array = array;
645 data->elements = elements;
646
647 return debugfs_create_file(name, mode, parent, data, &u32_array_fops);
648}
649EXPORT_SYMBOL_GPL(debugfs_create_u32_array);
650
523#ifdef CONFIG_HAS_IOMEM 651#ifdef CONFIG_HAS_IOMEM
524 652
525/* 653/*
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 2dd946b636d2..e879cf8ff0b1 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -133,7 +133,7 @@ static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf)
133static void ecryptfs_evict_inode(struct inode *inode) 133static void ecryptfs_evict_inode(struct inode *inode)
134{ 134{
135 truncate_inode_pages(&inode->i_data, 0); 135 truncate_inode_pages(&inode->i_data, 0);
136 end_writeback(inode); 136 clear_inode(inode);
137 iput(ecryptfs_inode_to_lower(inode)); 137 iput(ecryptfs_inode_to_lower(inode));
138} 138}
139 139
diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild
index 352ba149d23e..389ba8312d5d 100644
--- a/fs/exofs/Kbuild
+++ b/fs/exofs/Kbuild
@@ -16,5 +16,5 @@
16libore-y := ore.o ore_raid.o 16libore-y := ore.o ore_raid.o
17obj-$(CONFIG_ORE) += libore.o 17obj-$(CONFIG_ORE) += libore.o
18 18
19exofs-y := inode.o file.o symlink.o namei.o dir.o super.o 19exofs-y := inode.o file.o symlink.o namei.o dir.o super.o sys.o
20obj-$(CONFIG_EXOFS_FS) += exofs.o 20obj-$(CONFIG_EXOFS_FS) += exofs.o
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index ca9d49665ef6..fffe86fd7a42 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -56,6 +56,9 @@
56struct exofs_dev { 56struct exofs_dev {
57 struct ore_dev ored; 57 struct ore_dev ored;
58 unsigned did; 58 unsigned did;
59 unsigned urilen;
60 uint8_t *uri;
61 struct kobject ed_kobj;
59}; 62};
60/* 63/*
61 * our extension to the in-memory superblock 64 * our extension to the in-memory superblock
@@ -73,6 +76,7 @@ struct exofs_sb_info {
73 struct ore_layout layout; /* Default files layout */ 76 struct ore_layout layout; /* Default files layout */
74 struct ore_comp one_comp; /* id & cred of partition id=0*/ 77 struct ore_comp one_comp; /* id & cred of partition id=0*/
75 struct ore_components oc; /* comps for the partition */ 78 struct ore_components oc; /* comps for the partition */
79 struct kobject s_kobj; /* holds per-sbi kobject */
76}; 80};
77 81
78/* 82/*
@@ -176,6 +180,16 @@ void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
176 const struct osd_obj_id *obj); 180 const struct osd_obj_id *obj);
177int exofs_sbi_write_stats(struct exofs_sb_info *sbi); 181int exofs_sbi_write_stats(struct exofs_sb_info *sbi);
178 182
183/* sys.c */
184int exofs_sysfs_init(void);
185void exofs_sysfs_uninit(void);
186int exofs_sysfs_sb_add(struct exofs_sb_info *sbi,
187 struct exofs_dt_device_info *dt_dev);
188void exofs_sysfs_sb_del(struct exofs_sb_info *sbi);
189int exofs_sysfs_odev_add(struct exofs_dev *edev,
190 struct exofs_sb_info *sbi);
191void exofs_sysfs_dbg_print(void);
192
179/********************* 193/*********************
180 * operation vectors * 194 * operation vectors *
181 *********************/ 195 *********************/
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index ea5e1f97806a..5badb0c039de 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1473,7 +1473,7 @@ void exofs_evict_inode(struct inode *inode)
1473 goto no_delete; 1473 goto no_delete;
1474 1474
1475 inode->i_size = 0; 1475 inode->i_size = 0;
1476 end_writeback(inode); 1476 clear_inode(inode);
1477 1477
1478 /* if we are deleting an obj that hasn't been created yet, wait. 1478 /* if we are deleting an obj that hasn't been created yet, wait.
1479 * This also makes sure that create_done cannot be called with an 1479 * This also makes sure that create_done cannot be called with an
@@ -1503,5 +1503,5 @@ void exofs_evict_inode(struct inode *inode)
1503 return; 1503 return;
1504 1504
1505no_delete: 1505no_delete:
1506 end_writeback(inode); 1506 clear_inode(inode);
1507} 1507}
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 735ca06430ac..433783624d10 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -472,6 +472,7 @@ static void exofs_put_super(struct super_block *sb)
472 _exofs_print_device("Unmounting", NULL, ore_comp_dev(&sbi->oc, 0), 472 _exofs_print_device("Unmounting", NULL, ore_comp_dev(&sbi->oc, 0),
473 sbi->one_comp.obj.partition); 473 sbi->one_comp.obj.partition);
474 474
475 exofs_sysfs_sb_del(sbi);
475 bdi_destroy(&sbi->bdi); 476 bdi_destroy(&sbi->bdi);
476 exofs_free_sbi(sbi); 477 exofs_free_sbi(sbi);
477 sb->s_fs_info = NULL; 478 sb->s_fs_info = NULL;
@@ -632,6 +633,12 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,
632 memcpy(&sbi->oc.ods[numdevs], &sbi->oc.ods[0], 633 memcpy(&sbi->oc.ods[numdevs], &sbi->oc.ods[0],
633 (numdevs - 1) * sizeof(sbi->oc.ods[0])); 634 (numdevs - 1) * sizeof(sbi->oc.ods[0]));
634 635
636 /* create sysfs subdir under which we put the device table
637 * And cluster layout. A Superblock is identified by the string:
638 * "dev[0].osdname"_"pid"
639 */
640 exofs_sysfs_sb_add(sbi, &dt->dt_dev_table[0]);
641
635 for (i = 0; i < numdevs; i++) { 642 for (i = 0; i < numdevs; i++) {
636 struct exofs_fscb fscb; 643 struct exofs_fscb fscb;
637 struct osd_dev_info odi; 644 struct osd_dev_info odi;
@@ -657,6 +664,7 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,
657 eds[i].ored.od = fscb_od; 664 eds[i].ored.od = fscb_od;
658 ++sbi->oc.numdevs; 665 ++sbi->oc.numdevs;
659 fscb_od = NULL; 666 fscb_od = NULL;
667 exofs_sysfs_odev_add(&eds[i], sbi);
660 continue; 668 continue;
661 } 669 }
662 670
@@ -682,6 +690,7 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,
682 odi.osdname); 690 odi.osdname);
683 goto out; 691 goto out;
684 } 692 }
693 exofs_sysfs_odev_add(&eds[i], sbi);
685 694
686 /* TODO: verify other information is correct and FS-uuid 695 /* TODO: verify other information is correct and FS-uuid
687 * matches. Benny what did you say about device table 696 * matches. Benny what did you say about device table
@@ -745,7 +754,6 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
745 sbi->one_comp.obj.partition = opts->pid; 754 sbi->one_comp.obj.partition = opts->pid;
746 sbi->one_comp.obj.id = 0; 755 sbi->one_comp.obj.id = 0;
747 exofs_make_credential(sbi->one_comp.cred, &sbi->one_comp.obj); 756 exofs_make_credential(sbi->one_comp.cred, &sbi->one_comp.obj);
748 sbi->oc.numdevs = 1;
749 sbi->oc.single_comp = EC_SINGLE_COMP; 757 sbi->oc.single_comp = EC_SINGLE_COMP;
750 sbi->oc.comps = &sbi->one_comp; 758 sbi->oc.comps = &sbi->one_comp;
751 759
@@ -804,6 +812,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
804 goto free_sbi; 812 goto free_sbi;
805 813
806 ore_comp_set_dev(&sbi->oc, 0, od); 814 ore_comp_set_dev(&sbi->oc, 0, od);
815 sbi->oc.numdevs = 1;
807 } 816 }
808 817
809 __sbi_read_stats(sbi); 818 __sbi_read_stats(sbi);
@@ -844,6 +853,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
844 goto free_sbi; 853 goto free_sbi;
845 } 854 }
846 855
856 exofs_sysfs_dbg_print();
847 _exofs_print_device("Mounting", opts->dev_name, 857 _exofs_print_device("Mounting", opts->dev_name,
848 ore_comp_dev(&sbi->oc, 0), 858 ore_comp_dev(&sbi->oc, 0),
849 sbi->one_comp.obj.partition); 859 sbi->one_comp.obj.partition);
@@ -1023,6 +1033,9 @@ static int __init init_exofs(void)
1023 if (err) 1033 if (err)
1024 goto out_d; 1034 goto out_d;
1025 1035
1036 /* We don't fail if sysfs creation failed */
1037 exofs_sysfs_init();
1038
1026 return 0; 1039 return 0;
1027out_d: 1040out_d:
1028 destroy_inodecache(); 1041 destroy_inodecache();
@@ -1032,6 +1045,7 @@ out:
1032 1045
1033static void __exit exit_exofs(void) 1046static void __exit exit_exofs(void)
1034{ 1047{
1048 exofs_sysfs_uninit();
1035 unregister_filesystem(&exofs_type); 1049 unregister_filesystem(&exofs_type);
1036 destroy_inodecache(); 1050 destroy_inodecache();
1037} 1051}
diff --git a/fs/exofs/sys.c b/fs/exofs/sys.c
new file mode 100644
index 000000000000..e32bc919e4e3
--- /dev/null
+++ b/fs/exofs/sys.c
@@ -0,0 +1,200 @@
1/*
2 * Copyright (C) 2012
3 * Sachin Bhamare <sbhamare@panasas.com>
4 * Boaz Harrosh <bharrosh@panasas.com>
5 *
6 * This file is part of exofs.
7 *
8 * exofs is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License 2 as published by
10 * the Free Software Foundation.
11 *
12 * exofs is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with exofs; if not, write to the:
19 * Free Software Foundation <licensing@fsf.org>
20 */
21
22#include <linux/kobject.h>
23#include <linux/device.h>
24
25#include "exofs.h"
26
27struct odev_attr {
28 struct attribute attr;
29 ssize_t (*show)(struct exofs_dev *, char *);
30 ssize_t (*store)(struct exofs_dev *, const char *, size_t);
31};
32
33static ssize_t odev_attr_show(struct kobject *kobj, struct attribute *attr,
34 char *buf)
35{
36 struct exofs_dev *edp = container_of(kobj, struct exofs_dev, ed_kobj);
37 struct odev_attr *a = container_of(attr, struct odev_attr, attr);
38
39 return a->show ? a->show(edp, buf) : 0;
40}
41
42static ssize_t odev_attr_store(struct kobject *kobj, struct attribute *attr,
43 const char *buf, size_t len)
44{
45 struct exofs_dev *edp = container_of(kobj, struct exofs_dev, ed_kobj);
46 struct odev_attr *a = container_of(attr, struct odev_attr, attr);
47
48 return a->store ? a->store(edp, buf, len) : len;
49}
50
51static const struct sysfs_ops odev_attr_ops = {
52 .show = odev_attr_show,
53 .store = odev_attr_store,
54};
55
56
57static struct kset *exofs_kset;
58
59static ssize_t osdname_show(struct exofs_dev *edp, char *buf)
60{
61 struct osd_dev *odev = edp->ored.od;
62 const struct osd_dev_info *odi = osduld_device_info(odev);
63
64 return snprintf(buf, odi->osdname_len + 1, "%s", odi->osdname);
65}
66
67static ssize_t systemid_show(struct exofs_dev *edp, char *buf)
68{
69 struct osd_dev *odev = edp->ored.od;
70 const struct osd_dev_info *odi = osduld_device_info(odev);
71
72 memcpy(buf, odi->systemid, odi->systemid_len);
73 return odi->systemid_len;
74}
75
76static ssize_t uri_show(struct exofs_dev *edp, char *buf)
77{
78 return snprintf(buf, edp->urilen, "%s", edp->uri);
79}
80
81static ssize_t uri_store(struct exofs_dev *edp, const char *buf, size_t len)
82{
83 edp->urilen = strlen(buf) + 1;
84 edp->uri = krealloc(edp->uri, edp->urilen, GFP_KERNEL);
85 strncpy(edp->uri, buf, edp->urilen);
86 return edp->urilen;
87}
88
89#define OSD_ATTR(name, mode, show, store) \
90 static struct odev_attr odev_attr_##name = \
91 __ATTR(name, mode, show, store)
92
93OSD_ATTR(osdname, S_IRUGO, osdname_show, NULL);
94OSD_ATTR(systemid, S_IRUGO, systemid_show, NULL);
95OSD_ATTR(uri, S_IRWXU, uri_show, uri_store);
96
97static struct attribute *odev_attrs[] = {
98 &odev_attr_osdname.attr,
99 &odev_attr_systemid.attr,
100 &odev_attr_uri.attr,
101 NULL,
102};
103
104static struct kobj_type odev_ktype = {
105 .default_attrs = odev_attrs,
106 .sysfs_ops = &odev_attr_ops,
107};
108
109static struct kobj_type uuid_ktype = {
110};
111
112void exofs_sysfs_dbg_print()
113{
114#ifdef CONFIG_EXOFS_DEBUG
115 struct kobject *k_name, *k_tmp;
116
117 list_for_each_entry_safe(k_name, k_tmp, &exofs_kset->list, entry) {
118 printk(KERN_INFO "%s: name %s ref %d\n",
119 __func__, kobject_name(k_name),
120 (int)atomic_read(&k_name->kref.refcount));
121 }
122#endif
123}
124/*
125 * This function removes all kobjects under exofs_kset
126 * At the end of it, exofs_kset kobject will have a refcount
127 * of 1 which gets decremented only on exofs module unload
128 */
129void exofs_sysfs_sb_del(struct exofs_sb_info *sbi)
130{
131 struct kobject *k_name, *k_tmp;
132 struct kobject *s_kobj = &sbi->s_kobj;
133
134 list_for_each_entry_safe(k_name, k_tmp, &exofs_kset->list, entry) {
135 /* Remove all that are children of this SBI */
136 if (k_name->parent == s_kobj)
137 kobject_put(k_name);
138 }
139 kobject_put(s_kobj);
140}
141
142/*
143 * This function creates sysfs entries to hold the current exofs cluster
144 * instance (uniquely identified by osdname,pid tuple).
145 * This function gets called once per exofs mount instance.
146 */
147int exofs_sysfs_sb_add(struct exofs_sb_info *sbi,
148 struct exofs_dt_device_info *dt_dev)
149{
150 struct kobject *s_kobj;
151 int retval = 0;
152 uint64_t pid = sbi->one_comp.obj.partition;
153
154 /* allocate new uuid dirent */
155 s_kobj = &sbi->s_kobj;
156 s_kobj->kset = exofs_kset;
157 retval = kobject_init_and_add(s_kobj, &uuid_ktype,
158 &exofs_kset->kobj, "%s_%llx", dt_dev->osdname, pid);
159 if (retval) {
160 EXOFS_ERR("ERROR: Failed to create sysfs entry for "
161 "uuid-%s_%llx => %d\n", dt_dev->osdname, pid, retval);
162 return -ENOMEM;
163 }
164 return 0;
165}
166
167int exofs_sysfs_odev_add(struct exofs_dev *edev, struct exofs_sb_info *sbi)
168{
169 struct kobject *d_kobj;
170 int retval = 0;
171
172 /* create osd device group which contains following attributes
173 * osdname, systemid & uri
174 */
175 d_kobj = &edev->ed_kobj;
176 d_kobj->kset = exofs_kset;
177 retval = kobject_init_and_add(d_kobj, &odev_ktype,
178 &sbi->s_kobj, "dev%u", edev->did);
179 if (retval) {
180 EXOFS_ERR("ERROR: Failed to create sysfs entry for "
181 "device dev%u\n", edev->did);
182 return retval;
183 }
184 return 0;
185}
186
187int exofs_sysfs_init(void)
188{
189 exofs_kset = kset_create_and_add("exofs", NULL, fs_kobj);
190 if (!exofs_kset) {
191 EXOFS_ERR("ERROR: kset_create_and_add exofs failed\n");
192 return -ENOMEM;
193 }
194 return 0;
195}
196
197void exofs_sysfs_uninit(void)
198{
199 kset_unregister(exofs_kset);
200}
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 030c6d277e14..1c3613998862 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -165,7 +165,6 @@ static void release_blocks(struct super_block *sb, int count)
165 struct ext2_sb_info *sbi = EXT2_SB(sb); 165 struct ext2_sb_info *sbi = EXT2_SB(sb);
166 166
167 percpu_counter_add(&sbi->s_freeblocks_counter, count); 167 percpu_counter_add(&sbi->s_freeblocks_counter, count);
168 sb->s_dirt = 1;
169 } 168 }
170} 169}
171 170
@@ -180,7 +179,6 @@ static void group_adjust_blocks(struct super_block *sb, int group_no,
180 free_blocks = le16_to_cpu(desc->bg_free_blocks_count); 179 free_blocks = le16_to_cpu(desc->bg_free_blocks_count);
181 desc->bg_free_blocks_count = cpu_to_le16(free_blocks + count); 180 desc->bg_free_blocks_count = cpu_to_le16(free_blocks + count);
182 spin_unlock(sb_bgl_lock(sbi, group_no)); 181 spin_unlock(sb_bgl_lock(sbi, group_no));
183 sb->s_dirt = 1;
184 mark_buffer_dirty(bh); 182 mark_buffer_dirty(bh);
185 } 183 }
186} 184}
@@ -479,7 +477,7 @@ void ext2_discard_reservation(struct inode *inode)
479} 477}
480 478
481/** 479/**
482 * ext2_free_blocks_sb() -- Free given blocks and update quota and i_blocks 480 * ext2_free_blocks() -- Free given blocks and update quota and i_blocks
483 * @inode: inode 481 * @inode: inode
484 * @block: start physcial block to free 482 * @block: start physcial block to free
485 * @count: number of blocks to free 483 * @count: number of blocks to free
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 8b15cf8cef37..c13eb7b91a11 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -81,7 +81,6 @@ static void ext2_release_inode(struct super_block *sb, int group, int dir)
81 spin_unlock(sb_bgl_lock(EXT2_SB(sb), group)); 81 spin_unlock(sb_bgl_lock(EXT2_SB(sb), group));
82 if (dir) 82 if (dir)
83 percpu_counter_dec(&EXT2_SB(sb)->s_dirs_counter); 83 percpu_counter_dec(&EXT2_SB(sb)->s_dirs_counter);
84 sb->s_dirt = 1;
85 mark_buffer_dirty(bh); 84 mark_buffer_dirty(bh);
86} 85}
87 86
@@ -543,7 +542,6 @@ got:
543 } 542 }
544 spin_unlock(sb_bgl_lock(sbi, group)); 543 spin_unlock(sb_bgl_lock(sbi, group));
545 544
546 sb->s_dirt = 1;
547 mark_buffer_dirty(bh2); 545 mark_buffer_dirty(bh2);
548 if (test_opt(sb, GRPID)) { 546 if (test_opt(sb, GRPID)) {
549 inode->i_mode = mode; 547 inode->i_mode = mode;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index f9fa95f8443d..264d315f6c47 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -90,7 +90,7 @@ void ext2_evict_inode(struct inode * inode)
90 } 90 }
91 91
92 invalidate_inode_buffers(inode); 92 invalidate_inode_buffers(inode);
93 end_writeback(inode); 93 clear_inode(inode);
94 94
95 ext2_discard_reservation(inode); 95 ext2_discard_reservation(inode);
96 rsv = EXT2_I(inode)->i_block_alloc_info; 96 rsv = EXT2_I(inode)->i_block_alloc_info;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 38f816071ddb..b3621cb7ea31 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -130,9 +130,6 @@ static void ext2_put_super (struct super_block * sb)
130 130
131 dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); 131 dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
132 132
133 if (sb->s_dirt)
134 ext2_write_super(sb);
135
136 ext2_xattr_put_super(sb); 133 ext2_xattr_put_super(sb);
137 if (!(sb->s_flags & MS_RDONLY)) { 134 if (!(sb->s_flags & MS_RDONLY)) {
138 struct ext2_super_block *es = sbi->s_es; 135 struct ext2_super_block *es = sbi->s_es;
@@ -307,7 +304,6 @@ static const struct super_operations ext2_sops = {
307 .write_inode = ext2_write_inode, 304 .write_inode = ext2_write_inode,
308 .evict_inode = ext2_evict_inode, 305 .evict_inode = ext2_evict_inode,
309 .put_super = ext2_put_super, 306 .put_super = ext2_put_super,
310 .write_super = ext2_write_super,
311 .sync_fs = ext2_sync_fs, 307 .sync_fs = ext2_sync_fs,
312 .statfs = ext2_statfs, 308 .statfs = ext2_statfs,
313 .remount_fs = ext2_remount, 309 .remount_fs = ext2_remount,
@@ -358,11 +354,6 @@ static struct dentry *ext2_fh_to_parent(struct super_block *sb, struct fid *fid,
358 ext2_nfs_get_inode); 354 ext2_nfs_get_inode);
359} 355}
360 356
361/* Yes, most of these are left as NULL!!
362 * A NULL value implies the default, which works with ext2-like file
363 * systems, but can be improved upon.
364 * Currently only get_parent is required.
365 */
366static const struct export_operations ext2_export_ops = { 357static const struct export_operations ext2_export_ops = {
367 .fh_to_dentry = ext2_fh_to_dentry, 358 .fh_to_dentry = ext2_fh_to_dentry,
368 .fh_to_parent = ext2_fh_to_parent, 359 .fh_to_parent = ext2_fh_to_parent,
@@ -1176,7 +1167,6 @@ static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es,
1176 mark_buffer_dirty(EXT2_SB(sb)->s_sbh); 1167 mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
1177 if (wait) 1168 if (wait)
1178 sync_dirty_buffer(EXT2_SB(sb)->s_sbh); 1169 sync_dirty_buffer(EXT2_SB(sb)->s_sbh);
1179 sb->s_dirt = 0;
1180} 1170}
1181 1171
1182/* 1172/*
@@ -1209,8 +1199,6 @@ void ext2_write_super(struct super_block *sb)
1209{ 1199{
1210 if (!(sb->s_flags & MS_RDONLY)) 1200 if (!(sb->s_flags & MS_RDONLY))
1211 ext2_sync_fs(sb, 1); 1201 ext2_sync_fs(sb, 1);
1212 else
1213 sb->s_dirt = 0;
1214} 1202}
1215 1203
1216static int ext2_remount (struct super_block * sb, int * flags, char * data) 1204static int ext2_remount (struct super_block * sb, int * flags, char * data)
@@ -1456,7 +1444,6 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type,
1456 struct buffer_head tmp_bh; 1444 struct buffer_head tmp_bh;
1457 struct buffer_head *bh; 1445 struct buffer_head *bh;
1458 1446
1459 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
1460 while (towrite > 0) { 1447 while (towrite > 0) {
1461 tocopy = sb->s_blocksize - offset < towrite ? 1448 tocopy = sb->s_blocksize - offset < towrite ?
1462 sb->s_blocksize - offset : towrite; 1449 sb->s_blocksize - offset : towrite;
@@ -1486,16 +1473,13 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type,
1486 blk++; 1473 blk++;
1487 } 1474 }
1488out: 1475out:
1489 if (len == towrite) { 1476 if (len == towrite)
1490 mutex_unlock(&inode->i_mutex);
1491 return err; 1477 return err;
1492 }
1493 if (inode->i_size < off+len-towrite) 1478 if (inode->i_size < off+len-towrite)
1494 i_size_write(inode, off+len-towrite); 1479 i_size_write(inode, off+len-towrite);
1495 inode->i_version++; 1480 inode->i_version++;
1496 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 1481 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
1497 mark_inode_dirty(inode); 1482 mark_inode_dirty(inode);
1498 mutex_unlock(&inode->i_mutex);
1499 return len - towrite; 1483 return len - towrite;
1500} 1484}
1501 1485
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 6dcafc7efdfd..b6754dbbce3c 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -339,7 +339,6 @@ static void ext2_xattr_update_super_block(struct super_block *sb)
339 spin_lock(&EXT2_SB(sb)->s_lock); 339 spin_lock(&EXT2_SB(sb)->s_lock);
340 EXT2_SET_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR); 340 EXT2_SET_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR);
341 spin_unlock(&EXT2_SB(sb)->s_lock); 341 spin_unlock(&EXT2_SB(sb)->s_lock);
342 sb->s_dirt = 1;
343 mark_buffer_dirty(EXT2_SB(sb)->s_sbh); 342 mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
344} 343}
345 344
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index cc761ad8fa57..92490e9f85ca 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -21,30 +21,15 @@
21 * 21 *
22 */ 22 */
23 23
24#include <linux/compat.h>
24#include "ext3.h" 25#include "ext3.h"
25 26
26static unsigned char ext3_filetype_table[] = { 27static unsigned char ext3_filetype_table[] = {
27 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK 28 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
28}; 29};
29 30
30static int ext3_readdir(struct file *, void *, filldir_t);
31static int ext3_dx_readdir(struct file * filp, 31static int ext3_dx_readdir(struct file * filp,
32 void * dirent, filldir_t filldir); 32 void * dirent, filldir_t filldir);
33static int ext3_release_dir (struct inode * inode,
34 struct file * filp);
35
36const struct file_operations ext3_dir_operations = {
37 .llseek = generic_file_llseek,
38 .read = generic_read_dir,
39 .readdir = ext3_readdir, /* we take BKL. needed?*/
40 .unlocked_ioctl = ext3_ioctl,
41#ifdef CONFIG_COMPAT
42 .compat_ioctl = ext3_compat_ioctl,
43#endif
44 .fsync = ext3_sync_file, /* BKL held */
45 .release = ext3_release_dir,
46};
47
48 33
49static unsigned char get_dtype(struct super_block *sb, int filetype) 34static unsigned char get_dtype(struct super_block *sb, int filetype)
50{ 35{
@@ -55,6 +40,25 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
55 return (ext3_filetype_table[filetype]); 40 return (ext3_filetype_table[filetype]);
56} 41}
57 42
43/**
44 * Check if the given dir-inode refers to an htree-indexed directory
45 * (or a directory which chould potentially get coverted to use htree
46 * indexing).
47 *
48 * Return 1 if it is a dx dir, 0 if not
49 */
50static int is_dx_dir(struct inode *inode)
51{
52 struct super_block *sb = inode->i_sb;
53
54 if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
55 EXT3_FEATURE_COMPAT_DIR_INDEX) &&
56 ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) ||
57 ((inode->i_size >> sb->s_blocksize_bits) == 1)))
58 return 1;
59
60 return 0;
61}
58 62
59int ext3_check_dir_entry (const char * function, struct inode * dir, 63int ext3_check_dir_entry (const char * function, struct inode * dir,
60 struct ext3_dir_entry_2 * de, 64 struct ext3_dir_entry_2 * de,
@@ -94,18 +98,13 @@ static int ext3_readdir(struct file * filp,
94 unsigned long offset; 98 unsigned long offset;
95 int i, stored; 99 int i, stored;
96 struct ext3_dir_entry_2 *de; 100 struct ext3_dir_entry_2 *de;
97 struct super_block *sb;
98 int err; 101 int err;
99 struct inode *inode = filp->f_path.dentry->d_inode; 102 struct inode *inode = filp->f_path.dentry->d_inode;
103 struct super_block *sb = inode->i_sb;
100 int ret = 0; 104 int ret = 0;
101 int dir_has_error = 0; 105 int dir_has_error = 0;
102 106
103 sb = inode->i_sb; 107 if (is_dx_dir(inode)) {
104
105 if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
106 EXT3_FEATURE_COMPAT_DIR_INDEX) &&
107 ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) ||
108 ((inode->i_size >> sb->s_blocksize_bits) == 1))) {
109 err = ext3_dx_readdir(filp, dirent, filldir); 108 err = ext3_dx_readdir(filp, dirent, filldir);
110 if (err != ERR_BAD_DX_DIR) { 109 if (err != ERR_BAD_DX_DIR) {
111 ret = err; 110 ret = err;
@@ -227,22 +226,87 @@ out:
227 return ret; 226 return ret;
228} 227}
229 228
229static inline int is_32bit_api(void)
230{
231#ifdef CONFIG_COMPAT
232 return is_compat_task();
233#else
234 return (BITS_PER_LONG == 32);
235#endif
236}
237
230/* 238/*
231 * These functions convert from the major/minor hash to an f_pos 239 * These functions convert from the major/minor hash to an f_pos
232 * value. 240 * value for dx directories
233 * 241 *
234 * Currently we only use major hash numer. This is unfortunate, but 242 * Upper layer (for example NFS) should specify FMODE_32BITHASH or
235 * on 32-bit machines, the same VFS interface is used for lseek and 243 * FMODE_64BITHASH explicitly. On the other hand, we allow ext3 to be mounted
236 * llseek, so if we use the 64 bit offset, then the 32-bit versions of 244 * directly on both 32-bit and 64-bit nodes, under such case, neither
237 * lseek/telldir/seekdir will blow out spectacularly, and from within 245 * FMODE_32BITHASH nor FMODE_64BITHASH is specified.
238 * the ext2 low-level routine, we don't know if we're being called by
239 * a 64-bit version of the system call or the 32-bit version of the
240 * system call. Worse yet, NFSv2 only allows for a 32-bit readdir
241 * cookie. Sigh.
242 */ 246 */
243#define hash2pos(major, minor) (major >> 1) 247static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor)
244#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) 248{
245#define pos2min_hash(pos) (0) 249 if ((filp->f_mode & FMODE_32BITHASH) ||
250 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
251 return major >> 1;
252 else
253 return ((__u64)(major >> 1) << 32) | (__u64)minor;
254}
255
256static inline __u32 pos2maj_hash(struct file *filp, loff_t pos)
257{
258 if ((filp->f_mode & FMODE_32BITHASH) ||
259 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
260 return (pos << 1) & 0xffffffff;
261 else
262 return ((pos >> 32) << 1) & 0xffffffff;
263}
264
265static inline __u32 pos2min_hash(struct file *filp, loff_t pos)
266{
267 if ((filp->f_mode & FMODE_32BITHASH) ||
268 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
269 return 0;
270 else
271 return pos & 0xffffffff;
272}
273
274/*
275 * Return 32- or 64-bit end-of-file for dx directories
276 */
277static inline loff_t ext3_get_htree_eof(struct file *filp)
278{
279 if ((filp->f_mode & FMODE_32BITHASH) ||
280 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
281 return EXT3_HTREE_EOF_32BIT;
282 else
283 return EXT3_HTREE_EOF_64BIT;
284}
285
286
287/*
288 * ext3_dir_llseek() calls generic_file_llseek[_size]() to handle both
289 * non-htree and htree directories, where the "offset" is in terms
290 * of the filename hash value instead of the byte offset.
291 *
292 * Because we may return a 64-bit hash that is well beyond s_maxbytes,
293 * we need to pass the max hash as the maximum allowable offset in
294 * the htree directory case.
295 *
296 * NOTE: offsets obtained *before* ext3_set_inode_flag(dir, EXT3_INODE_INDEX)
297 * will be invalid once the directory was converted into a dx directory
298 */
299loff_t ext3_dir_llseek(struct file *file, loff_t offset, int origin)
300{
301 struct inode *inode = file->f_mapping->host;
302 int dx_dir = is_dx_dir(inode);
303
304 if (likely(dx_dir))
305 return generic_file_llseek_size(file, offset, origin,
306 ext3_get_htree_eof(file));
307 else
308 return generic_file_llseek(file, offset, origin);
309}
246 310
247/* 311/*
248 * This structure holds the nodes of the red-black tree used to store 312 * This structure holds the nodes of the red-black tree used to store
@@ -303,15 +367,16 @@ static void free_rb_tree_fname(struct rb_root *root)
303} 367}
304 368
305 369
306static struct dir_private_info *ext3_htree_create_dir_info(loff_t pos) 370static struct dir_private_info *ext3_htree_create_dir_info(struct file *filp,
371 loff_t pos)
307{ 372{
308 struct dir_private_info *p; 373 struct dir_private_info *p;
309 374
310 p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); 375 p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
311 if (!p) 376 if (!p)
312 return NULL; 377 return NULL;
313 p->curr_hash = pos2maj_hash(pos); 378 p->curr_hash = pos2maj_hash(filp, pos);
314 p->curr_minor_hash = pos2min_hash(pos); 379 p->curr_minor_hash = pos2min_hash(filp, pos);
315 return p; 380 return p;
316} 381}
317 382
@@ -401,7 +466,7 @@ static int call_filldir(struct file * filp, void * dirent,
401 printk("call_filldir: called with null fname?!?\n"); 466 printk("call_filldir: called with null fname?!?\n");
402 return 0; 467 return 0;
403 } 468 }
404 curr_pos = hash2pos(fname->hash, fname->minor_hash); 469 curr_pos = hash2pos(filp, fname->hash, fname->minor_hash);
405 while (fname) { 470 while (fname) {
406 error = filldir(dirent, fname->name, 471 error = filldir(dirent, fname->name,
407 fname->name_len, curr_pos, 472 fname->name_len, curr_pos,
@@ -426,13 +491,13 @@ static int ext3_dx_readdir(struct file * filp,
426 int ret; 491 int ret;
427 492
428 if (!info) { 493 if (!info) {
429 info = ext3_htree_create_dir_info(filp->f_pos); 494 info = ext3_htree_create_dir_info(filp, filp->f_pos);
430 if (!info) 495 if (!info)
431 return -ENOMEM; 496 return -ENOMEM;
432 filp->private_data = info; 497 filp->private_data = info;
433 } 498 }
434 499
435 if (filp->f_pos == EXT3_HTREE_EOF) 500 if (filp->f_pos == ext3_get_htree_eof(filp))
436 return 0; /* EOF */ 501 return 0; /* EOF */
437 502
438 /* Some one has messed with f_pos; reset the world */ 503 /* Some one has messed with f_pos; reset the world */
@@ -440,8 +505,8 @@ static int ext3_dx_readdir(struct file * filp,
440 free_rb_tree_fname(&info->root); 505 free_rb_tree_fname(&info->root);
441 info->curr_node = NULL; 506 info->curr_node = NULL;
442 info->extra_fname = NULL; 507 info->extra_fname = NULL;
443 info->curr_hash = pos2maj_hash(filp->f_pos); 508 info->curr_hash = pos2maj_hash(filp, filp->f_pos);
444 info->curr_minor_hash = pos2min_hash(filp->f_pos); 509 info->curr_minor_hash = pos2min_hash(filp, filp->f_pos);
445 } 510 }
446 511
447 /* 512 /*
@@ -473,7 +538,7 @@ static int ext3_dx_readdir(struct file * filp,
473 if (ret < 0) 538 if (ret < 0)
474 return ret; 539 return ret;
475 if (ret == 0) { 540 if (ret == 0) {
476 filp->f_pos = EXT3_HTREE_EOF; 541 filp->f_pos = ext3_get_htree_eof(filp);
477 break; 542 break;
478 } 543 }
479 info->curr_node = rb_first(&info->root); 544 info->curr_node = rb_first(&info->root);
@@ -493,7 +558,7 @@ static int ext3_dx_readdir(struct file * filp,
493 info->curr_minor_hash = fname->minor_hash; 558 info->curr_minor_hash = fname->minor_hash;
494 } else { 559 } else {
495 if (info->next_hash == ~0) { 560 if (info->next_hash == ~0) {
496 filp->f_pos = EXT3_HTREE_EOF; 561 filp->f_pos = ext3_get_htree_eof(filp);
497 break; 562 break;
498 } 563 }
499 info->curr_hash = info->next_hash; 564 info->curr_hash = info->next_hash;
@@ -512,3 +577,15 @@ static int ext3_release_dir (struct inode * inode, struct file * filp)
512 577
513 return 0; 578 return 0;
514} 579}
580
581const struct file_operations ext3_dir_operations = {
582 .llseek = ext3_dir_llseek,
583 .read = generic_read_dir,
584 .readdir = ext3_readdir,
585 .unlocked_ioctl = ext3_ioctl,
586#ifdef CONFIG_COMPAT
587 .compat_ioctl = ext3_compat_ioctl,
588#endif
589 .fsync = ext3_sync_file,
590 .release = ext3_release_dir,
591};
diff --git a/fs/ext3/ext3.h b/fs/ext3/ext3.h
index 7977973a24f0..e85ff15a060e 100644
--- a/fs/ext3/ext3.h
+++ b/fs/ext3/ext3.h
@@ -920,7 +920,11 @@ struct dx_hash_info
920 u32 *seed; 920 u32 *seed;
921}; 921};
922 922
923#define EXT3_HTREE_EOF 0x7fffffff 923
924/* 32 and 64 bit signed EOF for dx directories */
925#define EXT3_HTREE_EOF_32BIT ((1UL << (32 - 1)) - 1)
926#define EXT3_HTREE_EOF_64BIT ((1ULL << (64 - 1)) - 1)
927
924 928
925/* 929/*
926 * Control parameters used by ext3_htree_next_block 930 * Control parameters used by ext3_htree_next_block
diff --git a/fs/ext3/hash.c b/fs/ext3/hash.c
index d10231ddcf8a..ede315cdf126 100644
--- a/fs/ext3/hash.c
+++ b/fs/ext3/hash.c
@@ -198,8 +198,8 @@ int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
198 return -1; 198 return -1;
199 } 199 }
200 hash = hash & ~1; 200 hash = hash & ~1;
201 if (hash == (EXT3_HTREE_EOF << 1)) 201 if (hash == (EXT3_HTREE_EOF_32BIT << 1))
202 hash = (EXT3_HTREE_EOF-1) << 1; 202 hash = (EXT3_HTREE_EOF_32BIT - 1) << 1;
203 hinfo->hash = hash; 203 hinfo->hash = hash;
204 hinfo->minor_hash = minor_hash; 204 hinfo->minor_hash = minor_hash;
205 return 0; 205 return 0;
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index e3c39e4cec19..082afd78b107 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -180,8 +180,7 @@ error_return:
180 * It's OK to put directory into a group unless 180 * It's OK to put directory into a group unless
181 * it has too many directories already (max_dirs) or 181 * it has too many directories already (max_dirs) or
182 * it has too few free inodes left (min_inodes) or 182 * it has too few free inodes left (min_inodes) or
183 * it has too few free blocks left (min_blocks) or 183 * it has too few free blocks left (min_blocks).
184 * it's already running too large debt (max_debt).
185 * Parent's group is preferred, if it doesn't satisfy these 184 * Parent's group is preferred, if it doesn't satisfy these
186 * conditions we search cyclically through the rest. If none 185 * conditions we search cyclically through the rest. If none
187 * of the groups look good we just look for a group with more 186 * of the groups look good we just look for a group with more
@@ -191,21 +190,16 @@ error_return:
191 * when we allocate an inode, within 0--255. 190 * when we allocate an inode, within 0--255.
192 */ 191 */
193 192
194#define INODE_COST 64
195#define BLOCK_COST 256
196
197static int find_group_orlov(struct super_block *sb, struct inode *parent) 193static int find_group_orlov(struct super_block *sb, struct inode *parent)
198{ 194{
199 int parent_group = EXT3_I(parent)->i_block_group; 195 int parent_group = EXT3_I(parent)->i_block_group;
200 struct ext3_sb_info *sbi = EXT3_SB(sb); 196 struct ext3_sb_info *sbi = EXT3_SB(sb);
201 struct ext3_super_block *es = sbi->s_es;
202 int ngroups = sbi->s_groups_count; 197 int ngroups = sbi->s_groups_count;
203 int inodes_per_group = EXT3_INODES_PER_GROUP(sb); 198 int inodes_per_group = EXT3_INODES_PER_GROUP(sb);
204 unsigned int freei, avefreei; 199 unsigned int freei, avefreei;
205 ext3_fsblk_t freeb, avefreeb; 200 ext3_fsblk_t freeb, avefreeb;
206 ext3_fsblk_t blocks_per_dir;
207 unsigned int ndirs; 201 unsigned int ndirs;
208 int max_debt, max_dirs, min_inodes; 202 int max_dirs, min_inodes;
209 ext3_grpblk_t min_blocks; 203 ext3_grpblk_t min_blocks;
210 int group = -1, i; 204 int group = -1, i;
211 struct ext3_group_desc *desc; 205 struct ext3_group_desc *desc;
@@ -242,20 +236,10 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
242 goto fallback; 236 goto fallback;
243 } 237 }
244 238
245 blocks_per_dir = (le32_to_cpu(es->s_blocks_count) - freeb) / ndirs;
246
247 max_dirs = ndirs / ngroups + inodes_per_group / 16; 239 max_dirs = ndirs / ngroups + inodes_per_group / 16;
248 min_inodes = avefreei - inodes_per_group / 4; 240 min_inodes = avefreei - inodes_per_group / 4;
249 min_blocks = avefreeb - EXT3_BLOCKS_PER_GROUP(sb) / 4; 241 min_blocks = avefreeb - EXT3_BLOCKS_PER_GROUP(sb) / 4;
250 242
251 max_debt = EXT3_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, (ext3_fsblk_t)BLOCK_COST);
252 if (max_debt * INODE_COST > inodes_per_group)
253 max_debt = inodes_per_group / INODE_COST;
254 if (max_debt > 255)
255 max_debt = 255;
256 if (max_debt == 0)
257 max_debt = 1;
258
259 for (i = 0; i < ngroups; i++) { 243 for (i = 0; i < ngroups; i++) {
260 group = (parent_group + i) % ngroups; 244 group = (parent_group + i) % ngroups;
261 desc = ext3_get_group_desc (sb, group, NULL); 245 desc = ext3_get_group_desc (sb, group, NULL);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index a09790a412b1..9a4a5c48b1c9 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -272,18 +272,18 @@ void ext3_evict_inode (struct inode *inode)
272 if (ext3_mark_inode_dirty(handle, inode)) { 272 if (ext3_mark_inode_dirty(handle, inode)) {
273 /* If that failed, just dquot_drop() and be done with that */ 273 /* If that failed, just dquot_drop() and be done with that */
274 dquot_drop(inode); 274 dquot_drop(inode);
275 end_writeback(inode); 275 clear_inode(inode);
276 } else { 276 } else {
277 ext3_xattr_delete_inode(handle, inode); 277 ext3_xattr_delete_inode(handle, inode);
278 dquot_free_inode(inode); 278 dquot_free_inode(inode);
279 dquot_drop(inode); 279 dquot_drop(inode);
280 end_writeback(inode); 280 clear_inode(inode);
281 ext3_free_inode(handle, inode); 281 ext3_free_inode(handle, inode);
282 } 282 }
283 ext3_journal_stop(handle); 283 ext3_journal_stop(handle);
284 return; 284 return;
285no_delete: 285no_delete:
286 end_writeback(inode); 286 clear_inode(inode);
287 dquot_drop(inode); 287 dquot_drop(inode);
288} 288}
289 289
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 94ef7e616129..8c3a44b7c375 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -3015,7 +3015,6 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
3015 (unsigned long long)off, (unsigned long long)len); 3015 (unsigned long long)off, (unsigned long long)len);
3016 return -EIO; 3016 return -EIO;
3017 } 3017 }
3018 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
3019 bh = ext3_bread(handle, inode, blk, 1, &err); 3018 bh = ext3_bread(handle, inode, blk, 1, &err);
3020 if (!bh) 3019 if (!bh)
3021 goto out; 3020 goto out;
@@ -3039,10 +3038,8 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
3039 } 3038 }
3040 brelse(bh); 3039 brelse(bh);
3041out: 3040out:
3042 if (err) { 3041 if (err)
3043 mutex_unlock(&inode->i_mutex);
3044 return err; 3042 return err;
3045 }
3046 if (inode->i_size < off + len) { 3043 if (inode->i_size < off + len) {
3047 i_size_write(inode, off + len); 3044 i_size_write(inode, off + len);
3048 EXT3_I(inode)->i_disksize = inode->i_size; 3045 EXT3_I(inode)->i_disksize = inode->i_size;
@@ -3050,7 +3047,6 @@ out:
3050 inode->i_version++; 3047 inode->i_version++;
3051 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 3048 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
3052 ext3_mark_inode_dirty(handle, inode); 3049 ext3_mark_inode_dirty(handle, inode);
3053 mutex_unlock(&inode->i_mutex);
3054 return len; 3050 return len;
3055} 3051}
3056 3052
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 436b4223df66..35b5954489ee 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1007,7 +1007,7 @@ static void destroy_inodecache(void)
1007void ext4_clear_inode(struct inode *inode) 1007void ext4_clear_inode(struct inode *inode)
1008{ 1008{
1009 invalidate_inode_buffers(inode); 1009 invalidate_inode_buffers(inode);
1010 end_writeback(inode); 1010 clear_inode(inode);
1011 dquot_drop(inode); 1011 dquot_drop(inode);
1012 ext4_discard_preallocations(inode); 1012 ext4_discard_preallocations(inode);
1013 if (EXT4_I(inode)->jinode) { 1013 if (EXT4_I(inode)->jinode) {
@@ -4758,7 +4758,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
4758 return -EIO; 4758 return -EIO;
4759 } 4759 }
4760 4760
4761 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
4762 bh = ext4_bread(handle, inode, blk, 1, &err); 4761 bh = ext4_bread(handle, inode, blk, 1, &err);
4763 if (!bh) 4762 if (!bh)
4764 goto out; 4763 goto out;
@@ -4774,16 +4773,13 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
4774 err = ext4_handle_dirty_metadata(handle, NULL, bh); 4773 err = ext4_handle_dirty_metadata(handle, NULL, bh);
4775 brelse(bh); 4774 brelse(bh);
4776out: 4775out:
4777 if (err) { 4776 if (err)
4778 mutex_unlock(&inode->i_mutex);
4779 return err; 4777 return err;
4780 }
4781 if (inode->i_size < off + len) { 4778 if (inode->i_size < off + len) {
4782 i_size_write(inode, off + len); 4779 i_size_write(inode, off + len);
4783 EXT4_I(inode)->i_disksize = inode->i_size; 4780 EXT4_I(inode)->i_disksize = inode->i_size;
4784 ext4_mark_inode_dirty(handle, inode); 4781 ext4_mark_inode_dirty(handle, inode);
4785 } 4782 }
4786 mutex_unlock(&inode->i_mutex);
4787 return len; 4783 return len;
4788} 4784}
4789 4785
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 21687e31acc0..b3d290c1b513 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -454,7 +454,7 @@ static void fat_evict_inode(struct inode *inode)
454 fat_truncate_blocks(inode, 0); 454 fat_truncate_blocks(inode, 0);
455 } 455 }
456 invalidate_inode_buffers(inode); 456 invalidate_inode_buffers(inode);
457 end_writeback(inode); 457 clear_inode(inode);
458 fat_cache_inval_inode(inode); 458 fat_cache_inval_inode(inode);
459 fat_detach(inode); 459 fat_detach(inode);
460} 460}
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index cf9ef918a2a9..ef67c95f12d4 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -355,6 +355,6 @@ void
355vxfs_evict_inode(struct inode *ip) 355vxfs_evict_inode(struct inode *ip)
356{ 356{
357 truncate_inode_pages(&ip->i_data, 0); 357 truncate_inode_pages(&ip->i_data, 0);
358 end_writeback(ip); 358 clear_inode(ip);
359 call_rcu(&ip->i_rcu, vxfs_i_callback); 359 call_rcu(&ip->i_rcu, vxfs_i_callback);
360} 360}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 539f36cf3e4a..8d2fb8c88cf3 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -231,11 +231,8 @@ static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
231 231
232static void inode_sync_complete(struct inode *inode) 232static void inode_sync_complete(struct inode *inode)
233{ 233{
234 /* 234 inode->i_state &= ~I_SYNC;
235 * Prevent speculative execution through 235 /* Waiters must see I_SYNC cleared before being woken up */
236 * spin_unlock(&wb->list_lock);
237 */
238
239 smp_mb(); 236 smp_mb();
240 wake_up_bit(&inode->i_state, __I_SYNC); 237 wake_up_bit(&inode->i_state, __I_SYNC);
241} 238}
@@ -329,10 +326,12 @@ static int write_inode(struct inode *inode, struct writeback_control *wbc)
329} 326}
330 327
331/* 328/*
332 * Wait for writeback on an inode to complete. 329 * Wait for writeback on an inode to complete. Called with i_lock held.
330 * Caller must make sure inode cannot go away when we drop i_lock.
333 */ 331 */
334static void inode_wait_for_writeback(struct inode *inode, 332static void __inode_wait_for_writeback(struct inode *inode)
335 struct bdi_writeback *wb) 333 __releases(inode->i_lock)
334 __acquires(inode->i_lock)
336{ 335{
337 DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC); 336 DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
338 wait_queue_head_t *wqh; 337 wait_queue_head_t *wqh;
@@ -340,70 +339,119 @@ static void inode_wait_for_writeback(struct inode *inode,
340 wqh = bit_waitqueue(&inode->i_state, __I_SYNC); 339 wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
341 while (inode->i_state & I_SYNC) { 340 while (inode->i_state & I_SYNC) {
342 spin_unlock(&inode->i_lock); 341 spin_unlock(&inode->i_lock);
343 spin_unlock(&wb->list_lock);
344 __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); 342 __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
345 spin_lock(&wb->list_lock);
346 spin_lock(&inode->i_lock); 343 spin_lock(&inode->i_lock);
347 } 344 }
348} 345}
349 346
350/* 347/*
351 * Write out an inode's dirty pages. Called under wb->list_lock and 348 * Wait for writeback on an inode to complete. Caller must have inode pinned.
352 * inode->i_lock. Either the caller has an active reference on the inode or
353 * the inode has I_WILL_FREE set.
354 *
355 * If `wait' is set, wait on the writeout.
356 *
357 * The whole writeout design is quite complex and fragile. We want to avoid
358 * starvation of particular inodes when others are being redirtied, prevent
359 * livelocks, etc.
360 */ 349 */
361static int 350void inode_wait_for_writeback(struct inode *inode)
362writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
363 struct writeback_control *wbc)
364{ 351{
365 struct address_space *mapping = inode->i_mapping; 352 spin_lock(&inode->i_lock);
366 long nr_to_write = wbc->nr_to_write; 353 __inode_wait_for_writeback(inode);
367 unsigned dirty; 354 spin_unlock(&inode->i_lock);
368 int ret; 355}
369 356
370 assert_spin_locked(&wb->list_lock); 357/*
371 assert_spin_locked(&inode->i_lock); 358 * Sleep until I_SYNC is cleared. This function must be called with i_lock
359 * held and drops it. It is aimed for callers not holding any inode reference
360 * so once i_lock is dropped, inode can go away.
361 */
362static void inode_sleep_on_writeback(struct inode *inode)
363 __releases(inode->i_lock)
364{
365 DEFINE_WAIT(wait);
366 wait_queue_head_t *wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
367 int sleep;
372 368
373 if (!atomic_read(&inode->i_count)) 369 prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
374 WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); 370 sleep = inode->i_state & I_SYNC;
375 else 371 spin_unlock(&inode->i_lock);
376 WARN_ON(inode->i_state & I_WILL_FREE); 372 if (sleep)
373 schedule();
374 finish_wait(wqh, &wait);
375}
377 376
378 if (inode->i_state & I_SYNC) { 377/*
378 * Find proper writeback list for the inode depending on its current state and
379 * possibly also change of its state while we were doing writeback. Here we
380 * handle things such as livelock prevention or fairness of writeback among
381 * inodes. This function can be called only by flusher thread - noone else
382 * processes all inodes in writeback lists and requeueing inodes behind flusher
383 * thread's back can have unexpected consequences.
384 */
385static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
386 struct writeback_control *wbc)
387{
388 if (inode->i_state & I_FREEING)
389 return;
390
391 /*
392 * Sync livelock prevention. Each inode is tagged and synced in one
393 * shot. If still dirty, it will be redirty_tail()'ed below. Update
394 * the dirty time to prevent enqueue and sync it again.
395 */
396 if ((inode->i_state & I_DIRTY) &&
397 (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages))
398 inode->dirtied_when = jiffies;
399
400 if (wbc->pages_skipped) {
379 /* 401 /*
380 * If this inode is locked for writeback and we are not doing 402 * writeback is not making progress due to locked
381 * writeback-for-data-integrity, move it to b_more_io so that 403 * buffers. Skip this inode for now.
382 * writeback can proceed with the other inodes on s_io.
383 *
384 * We'll have another go at writing back this inode when we
385 * completed a full scan of b_io.
386 */ 404 */
387 if (wbc->sync_mode != WB_SYNC_ALL) { 405 redirty_tail(inode, wb);
406 return;
407 }
408
409 if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
410 /*
411 * We didn't write back all the pages. nfs_writepages()
412 * sometimes bales out without doing anything.
413 */
414 if (wbc->nr_to_write <= 0) {
415 /* Slice used up. Queue for next turn. */
388 requeue_io(inode, wb); 416 requeue_io(inode, wb);
389 trace_writeback_single_inode_requeue(inode, wbc, 417 } else {
390 nr_to_write); 418 /*
391 return 0; 419 * Writeback blocked by something other than
420 * congestion. Delay the inode for some time to
421 * avoid spinning on the CPU (100% iowait)
422 * retrying writeback of the dirty page/inode
423 * that cannot be performed immediately.
424 */
425 redirty_tail(inode, wb);
392 } 426 }
393 427 } else if (inode->i_state & I_DIRTY) {
394 /* 428 /*
395 * It's a data-integrity sync. We must wait. 429 * Filesystems can dirty the inode during writeback operations,
430 * such as delayed allocation during submission or metadata
431 * updates after data IO completion.
396 */ 432 */
397 inode_wait_for_writeback(inode, wb); 433 redirty_tail(inode, wb);
434 } else {
435 /* The inode is clean. Remove from writeback lists. */
436 list_del_init(&inode->i_wb_list);
398 } 437 }
438}
399 439
400 BUG_ON(inode->i_state & I_SYNC); 440/*
441 * Write out an inode and its dirty pages. Do not update the writeback list
442 * linkage. That is left to the caller. The caller is also responsible for
443 * setting I_SYNC flag and calling inode_sync_complete() to clear it.
444 */
445static int
446__writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
447 struct writeback_control *wbc)
448{
449 struct address_space *mapping = inode->i_mapping;
450 long nr_to_write = wbc->nr_to_write;
451 unsigned dirty;
452 int ret;
401 453
402 /* Set I_SYNC, reset I_DIRTY_PAGES */ 454 WARN_ON(!(inode->i_state & I_SYNC));
403 inode->i_state |= I_SYNC;
404 inode->i_state &= ~I_DIRTY_PAGES;
405 spin_unlock(&inode->i_lock);
406 spin_unlock(&wb->list_lock);
407 455
408 ret = do_writepages(mapping, wbc); 456 ret = do_writepages(mapping, wbc);
409 457
@@ -424,6 +472,9 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
424 * write_inode() 472 * write_inode()
425 */ 473 */
426 spin_lock(&inode->i_lock); 474 spin_lock(&inode->i_lock);
475 /* Clear I_DIRTY_PAGES if we've written out all dirty pages */
476 if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
477 inode->i_state &= ~I_DIRTY_PAGES;
427 dirty = inode->i_state & I_DIRTY; 478 dirty = inode->i_state & I_DIRTY;
428 inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); 479 inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
429 spin_unlock(&inode->i_lock); 480 spin_unlock(&inode->i_lock);
@@ -433,60 +484,67 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
433 if (ret == 0) 484 if (ret == 0)
434 ret = err; 485 ret = err;
435 } 486 }
487 trace_writeback_single_inode(inode, wbc, nr_to_write);
488 return ret;
489}
490
491/*
492 * Write out an inode's dirty pages. Either the caller has an active reference
493 * on the inode or the inode has I_WILL_FREE set.
494 *
495 * This function is designed to be called for writing back one inode which
496 * we go e.g. from filesystem. Flusher thread uses __writeback_single_inode()
497 * and does more profound writeback list handling in writeback_sb_inodes().
498 */
499static int
500writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
501 struct writeback_control *wbc)
502{
503 int ret = 0;
436 504
437 spin_lock(&wb->list_lock);
438 spin_lock(&inode->i_lock); 505 spin_lock(&inode->i_lock);
439 inode->i_state &= ~I_SYNC; 506 if (!atomic_read(&inode->i_count))
440 if (!(inode->i_state & I_FREEING)) { 507 WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
508 else
509 WARN_ON(inode->i_state & I_WILL_FREE);
510
511 if (inode->i_state & I_SYNC) {
512 if (wbc->sync_mode != WB_SYNC_ALL)
513 goto out;
441 /* 514 /*
442 * Sync livelock prevention. Each inode is tagged and synced in 515 * It's a data-integrity sync. We must wait. Since callers hold
443 * one shot. If still dirty, it will be redirty_tail()'ed below. 516 * inode reference or inode has I_WILL_FREE set, it cannot go
444 * Update the dirty time to prevent enqueue and sync it again. 517 * away under us.
445 */ 518 */
446 if ((inode->i_state & I_DIRTY) && 519 __inode_wait_for_writeback(inode);
447 (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages))
448 inode->dirtied_when = jiffies;
449
450 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
451 /*
452 * We didn't write back all the pages. nfs_writepages()
453 * sometimes bales out without doing anything.
454 */
455 inode->i_state |= I_DIRTY_PAGES;
456 if (wbc->nr_to_write <= 0) {
457 /*
458 * slice used up: queue for next turn
459 */
460 requeue_io(inode, wb);
461 } else {
462 /*
463 * Writeback blocked by something other than
464 * congestion. Delay the inode for some time to
465 * avoid spinning on the CPU (100% iowait)
466 * retrying writeback of the dirty page/inode
467 * that cannot be performed immediately.
468 */
469 redirty_tail(inode, wb);
470 }
471 } else if (inode->i_state & I_DIRTY) {
472 /*
473 * Filesystems can dirty the inode during writeback
474 * operations, such as delayed allocation during
475 * submission or metadata updates after data IO
476 * completion.
477 */
478 redirty_tail(inode, wb);
479 } else {
480 /*
481 * The inode is clean. At this point we either have
482 * a reference to the inode or it's on it's way out.
483 * No need to add it back to the LRU.
484 */
485 list_del_init(&inode->i_wb_list);
486 }
487 } 520 }
521 WARN_ON(inode->i_state & I_SYNC);
522 /*
523 * Skip inode if it is clean. We don't want to mess with writeback
524 * lists in this function since flusher thread may be doing for example
525 * sync in parallel and if we move the inode, it could get skipped. So
526 * here we make sure inode is on some writeback list and leave it there
527 * unless we have completely cleaned the inode.
528 */
529 if (!(inode->i_state & I_DIRTY))
530 goto out;
531 inode->i_state |= I_SYNC;
532 spin_unlock(&inode->i_lock);
533
534 ret = __writeback_single_inode(inode, wb, wbc);
535
536 spin_lock(&wb->list_lock);
537 spin_lock(&inode->i_lock);
538 /*
539 * If inode is clean, remove it from writeback lists. Otherwise don't
540 * touch it. See comment above for explanation.
541 */
542 if (!(inode->i_state & I_DIRTY))
543 list_del_init(&inode->i_wb_list);
544 spin_unlock(&wb->list_lock);
488 inode_sync_complete(inode); 545 inode_sync_complete(inode);
489 trace_writeback_single_inode(inode, wbc, nr_to_write); 546out:
547 spin_unlock(&inode->i_lock);
490 return ret; 548 return ret;
491} 549}
492 550
@@ -580,29 +638,57 @@ static long writeback_sb_inodes(struct super_block *sb,
580 redirty_tail(inode, wb); 638 redirty_tail(inode, wb);
581 continue; 639 continue;
582 } 640 }
583 __iget(inode); 641 if ((inode->i_state & I_SYNC) && wbc.sync_mode != WB_SYNC_ALL) {
642 /*
643 * If this inode is locked for writeback and we are not
644 * doing writeback-for-data-integrity, move it to
645 * b_more_io so that writeback can proceed with the
646 * other inodes on s_io.
647 *
648 * We'll have another go at writing back this inode
649 * when we completed a full scan of b_io.
650 */
651 spin_unlock(&inode->i_lock);
652 requeue_io(inode, wb);
653 trace_writeback_sb_inodes_requeue(inode);
654 continue;
655 }
656 spin_unlock(&wb->list_lock);
657
658 /*
659 * We already requeued the inode if it had I_SYNC set and we
660 * are doing WB_SYNC_NONE writeback. So this catches only the
661 * WB_SYNC_ALL case.
662 */
663 if (inode->i_state & I_SYNC) {
664 /* Wait for I_SYNC. This function drops i_lock... */
665 inode_sleep_on_writeback(inode);
666 /* Inode may be gone, start again */
667 continue;
668 }
669 inode->i_state |= I_SYNC;
670 spin_unlock(&inode->i_lock);
671
584 write_chunk = writeback_chunk_size(wb->bdi, work); 672 write_chunk = writeback_chunk_size(wb->bdi, work);
585 wbc.nr_to_write = write_chunk; 673 wbc.nr_to_write = write_chunk;
586 wbc.pages_skipped = 0; 674 wbc.pages_skipped = 0;
587 675
588 writeback_single_inode(inode, wb, &wbc); 676 /*
677 * We use I_SYNC to pin the inode in memory. While it is set
678 * evict_inode() will wait so the inode cannot be freed.
679 */
680 __writeback_single_inode(inode, wb, &wbc);
589 681
590 work->nr_pages -= write_chunk - wbc.nr_to_write; 682 work->nr_pages -= write_chunk - wbc.nr_to_write;
591 wrote += write_chunk - wbc.nr_to_write; 683 wrote += write_chunk - wbc.nr_to_write;
684 spin_lock(&wb->list_lock);
685 spin_lock(&inode->i_lock);
592 if (!(inode->i_state & I_DIRTY)) 686 if (!(inode->i_state & I_DIRTY))
593 wrote++; 687 wrote++;
594 if (wbc.pages_skipped) { 688 requeue_inode(inode, wb, &wbc);
595 /* 689 inode_sync_complete(inode);
596 * writeback is not making progress due to locked
597 * buffers. Skip this inode for now.
598 */
599 redirty_tail(inode, wb);
600 }
601 spin_unlock(&inode->i_lock); 690 spin_unlock(&inode->i_lock);
602 spin_unlock(&wb->list_lock); 691 cond_resched_lock(&wb->list_lock);
603 iput(inode);
604 cond_resched();
605 spin_lock(&wb->list_lock);
606 /* 692 /*
607 * bail out to wb_writeback() often enough to check 693 * bail out to wb_writeback() often enough to check
608 * background threshold and other termination conditions. 694 * background threshold and other termination conditions.
@@ -796,8 +882,10 @@ static long wb_writeback(struct bdi_writeback *wb,
796 trace_writeback_wait(wb->bdi, work); 882 trace_writeback_wait(wb->bdi, work);
797 inode = wb_inode(wb->b_more_io.prev); 883 inode = wb_inode(wb->b_more_io.prev);
798 spin_lock(&inode->i_lock); 884 spin_lock(&inode->i_lock);
799 inode_wait_for_writeback(inode, wb); 885 spin_unlock(&wb->list_lock);
800 spin_unlock(&inode->i_lock); 886 /* This function drops i_lock... */
887 inode_sleep_on_writeback(inode);
888 spin_lock(&wb->list_lock);
801 } 889 }
802 } 890 }
803 spin_unlock(&wb->list_lock); 891 spin_unlock(&wb->list_lock);
@@ -1331,7 +1419,6 @@ EXPORT_SYMBOL(sync_inodes_sb);
1331int write_inode_now(struct inode *inode, int sync) 1419int write_inode_now(struct inode *inode, int sync)
1332{ 1420{
1333 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; 1421 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
1334 int ret;
1335 struct writeback_control wbc = { 1422 struct writeback_control wbc = {
1336 .nr_to_write = LONG_MAX, 1423 .nr_to_write = LONG_MAX,
1337 .sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE, 1424 .sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE,
@@ -1343,12 +1430,7 @@ int write_inode_now(struct inode *inode, int sync)
1343 wbc.nr_to_write = 0; 1430 wbc.nr_to_write = 0;
1344 1431
1345 might_sleep(); 1432 might_sleep();
1346 spin_lock(&wb->list_lock); 1433 return writeback_single_inode(inode, wb, &wbc);
1347 spin_lock(&inode->i_lock);
1348 ret = writeback_single_inode(inode, wb, &wbc);
1349 spin_unlock(&inode->i_lock);
1350 spin_unlock(&wb->list_lock);
1351 return ret;
1352} 1434}
1353EXPORT_SYMBOL(write_inode_now); 1435EXPORT_SYMBOL(write_inode_now);
1354 1436
@@ -1365,15 +1447,7 @@ EXPORT_SYMBOL(write_inode_now);
1365 */ 1447 */
1366int sync_inode(struct inode *inode, struct writeback_control *wbc) 1448int sync_inode(struct inode *inode, struct writeback_control *wbc)
1367{ 1449{
1368 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; 1450 return writeback_single_inode(inode, &inode_to_bdi(inode)->wb, wbc);
1369 int ret;
1370
1371 spin_lock(&wb->list_lock);
1372 spin_lock(&inode->i_lock);
1373 ret = writeback_single_inode(inode, wb, wbc);
1374 spin_unlock(&inode->i_lock);
1375 spin_unlock(&wb->list_lock);
1376 return ret;
1377} 1451}
1378EXPORT_SYMBOL(sync_inode); 1452EXPORT_SYMBOL(sync_inode);
1379 1453
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 26783eb2b1fc..56f6dcf30768 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -122,7 +122,7 @@ static void fuse_destroy_inode(struct inode *inode)
122static void fuse_evict_inode(struct inode *inode) 122static void fuse_evict_inode(struct inode *inode)
123{ 123{
124 truncate_inode_pages(&inode->i_data, 0); 124 truncate_inode_pages(&inode->i_data, 0);
125 end_writeback(inode); 125 clear_inode(inode);
126 if (inode->i_sb->s_flags & MS_ACTIVE) { 126 if (inode->i_sb->s_flags & MS_ACTIVE) {
127 struct fuse_conn *fc = get_fuse_conn(inode); 127 struct fuse_conn *fc = get_fuse_conn(inode);
128 struct fuse_inode *fi = get_fuse_inode(inode); 128 struct fuse_inode *fi = get_fuse_inode(inode);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 6172fa77ad59..713e621c240b 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1554,7 +1554,7 @@ out_unlock:
1554out: 1554out:
1555 /* Case 3 starts here */ 1555 /* Case 3 starts here */
1556 truncate_inode_pages(&inode->i_data, 0); 1556 truncate_inode_pages(&inode->i_data, 0);
1557 end_writeback(inode); 1557 clear_inode(inode);
1558 gfs2_dir_hash_inval(ip); 1558 gfs2_dir_hash_inval(ip);
1559 ip->i_gl->gl_object = NULL; 1559 ip->i_gl->gl_object = NULL;
1560 flush_delayed_work_sync(&ip->i_gl->gl_work); 1560 flush_delayed_work_sync(&ip->i_gl->gl_work);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 737dbeb64320..761ec06354b4 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -532,7 +532,7 @@ out:
532void hfs_evict_inode(struct inode *inode) 532void hfs_evict_inode(struct inode *inode)
533{ 533{
534 truncate_inode_pages(&inode->i_data, 0); 534 truncate_inode_pages(&inode->i_data, 0);
535 end_writeback(inode); 535 clear_inode(inode);
536 if (HFS_IS_RSRC(inode) && HFS_I(inode)->rsrc_inode) { 536 if (HFS_IS_RSRC(inode) && HFS_I(inode)->rsrc_inode) {
537 HFS_I(HFS_I(inode)->rsrc_inode)->rsrc_inode = NULL; 537 HFS_I(HFS_I(inode)->rsrc_inode)->rsrc_inode = NULL;
538 iput(HFS_I(inode)->rsrc_inode); 538 iput(HFS_I(inode)->rsrc_inode);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index ceb1c281eefb..a9bca4b8768b 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -154,7 +154,7 @@ static void hfsplus_evict_inode(struct inode *inode)
154{ 154{
155 dprint(DBG_INODE, "hfsplus_evict_inode: %lu\n", inode->i_ino); 155 dprint(DBG_INODE, "hfsplus_evict_inode: %lu\n", inode->i_ino);
156 truncate_inode_pages(&inode->i_data, 0); 156 truncate_inode_pages(&inode->i_data, 0);
157 end_writeback(inode); 157 clear_inode(inode);
158 if (HFSPLUS_IS_RSRC(inode)) { 158 if (HFSPLUS_IS_RSRC(inode)) {
159 HFSPLUS_I(HFSPLUS_I(inode)->rsrc_inode)->rsrc_inode = NULL; 159 HFSPLUS_I(HFSPLUS_I(inode)->rsrc_inode)->rsrc_inode = NULL;
160 iput(HFSPLUS_I(inode)->rsrc_inode); 160 iput(HFSPLUS_I(inode)->rsrc_inode);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 07c516bfea76..2afa5bbccf9b 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -240,7 +240,7 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb)
240static void hostfs_evict_inode(struct inode *inode) 240static void hostfs_evict_inode(struct inode *inode)
241{ 241{
242 truncate_inode_pages(&inode->i_data, 0); 242 truncate_inode_pages(&inode->i_data, 0);
243 end_writeback(inode); 243 clear_inode(inode);
244 if (HOSTFS_I(inode)->fd != -1) { 244 if (HOSTFS_I(inode)->fd != -1) {
245 close_file(&HOSTFS_I(inode)->fd); 245 close_file(&HOSTFS_I(inode)->fd);
246 HOSTFS_I(inode)->fd = -1; 246 HOSTFS_I(inode)->fd = -1;
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 3b2cec29972b..b43066cbdc6a 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -299,7 +299,7 @@ void hpfs_write_if_changed(struct inode *inode)
299void hpfs_evict_inode(struct inode *inode) 299void hpfs_evict_inode(struct inode *inode)
300{ 300{
301 truncate_inode_pages(&inode->i_data, 0); 301 truncate_inode_pages(&inode->i_data, 0);
302 end_writeback(inode); 302 clear_inode(inode);
303 if (!inode->i_nlink) { 303 if (!inode->i_nlink) {
304 hpfs_lock(inode->i_sb); 304 hpfs_lock(inode->i_sb);
305 hpfs_remove_fnode(inode->i_sb, inode->i_ino); 305 hpfs_remove_fnode(inode->i_sb, inode->i_ino);
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index a80e45a690ac..d4f93b52cec5 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -614,7 +614,7 @@ static struct inode *hppfs_alloc_inode(struct super_block *sb)
614 614
615void hppfs_evict_inode(struct inode *ino) 615void hppfs_evict_inode(struct inode *ino)
616{ 616{
617 end_writeback(ino); 617 clear_inode(ino);
618 dput(HPPFS_I(ino)->proc_dentry); 618 dput(HPPFS_I(ino)->proc_dentry);
619 mntput(ino->i_sb->s_fs_info); 619 mntput(ino->i_sb->s_fs_info);
620} 620}
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 001ef01d2fe2..cc9281b6c628 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -393,7 +393,7 @@ static void truncate_hugepages(struct inode *inode, loff_t lstart)
393static void hugetlbfs_evict_inode(struct inode *inode) 393static void hugetlbfs_evict_inode(struct inode *inode)
394{ 394{
395 truncate_hugepages(inode, 0); 395 truncate_hugepages(inode, 0);
396 end_writeback(inode); 396 clear_inode(inode);
397} 397}
398 398
399static inline void 399static inline void
diff --git a/fs/inode.c b/fs/inode.c
index da93f7d160d4..6bc8761cc333 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -486,7 +486,7 @@ void __remove_inode_hash(struct inode *inode)
486} 486}
487EXPORT_SYMBOL(__remove_inode_hash); 487EXPORT_SYMBOL(__remove_inode_hash);
488 488
489void end_writeback(struct inode *inode) 489void clear_inode(struct inode *inode)
490{ 490{
491 might_sleep(); 491 might_sleep();
492 /* 492 /*
@@ -500,11 +500,10 @@ void end_writeback(struct inode *inode)
500 BUG_ON(!list_empty(&inode->i_data.private_list)); 500 BUG_ON(!list_empty(&inode->i_data.private_list));
501 BUG_ON(!(inode->i_state & I_FREEING)); 501 BUG_ON(!(inode->i_state & I_FREEING));
502 BUG_ON(inode->i_state & I_CLEAR); 502 BUG_ON(inode->i_state & I_CLEAR);
503 inode_sync_wait(inode);
504 /* don't need i_lock here, no concurrent mods to i_state */ 503 /* don't need i_lock here, no concurrent mods to i_state */
505 inode->i_state = I_FREEING | I_CLEAR; 504 inode->i_state = I_FREEING | I_CLEAR;
506} 505}
507EXPORT_SYMBOL(end_writeback); 506EXPORT_SYMBOL(clear_inode);
508 507
509/* 508/*
510 * Free the inode passed in, removing it from the lists it is still connected 509 * Free the inode passed in, removing it from the lists it is still connected
@@ -531,12 +530,20 @@ static void evict(struct inode *inode)
531 530
532 inode_sb_list_del(inode); 531 inode_sb_list_del(inode);
533 532
533 /*
534 * Wait for flusher thread to be done with the inode so that filesystem
535 * does not start destroying it while writeback is still running. Since
536 * the inode has I_FREEING set, flusher thread won't start new work on
537 * the inode. We just have to wait for running writeback to finish.
538 */
539 inode_wait_for_writeback(inode);
540
534 if (op->evict_inode) { 541 if (op->evict_inode) {
535 op->evict_inode(inode); 542 op->evict_inode(inode);
536 } else { 543 } else {
537 if (inode->i_data.nrpages) 544 if (inode->i_data.nrpages)
538 truncate_inode_pages(&inode->i_data, 0); 545 truncate_inode_pages(&inode->i_data, 0);
539 end_writeback(inode); 546 clear_inode(inode);
540 } 547 }
541 if (S_ISBLK(inode->i_mode) && inode->i_bdev) 548 if (S_ISBLK(inode->i_mode) && inode->i_bdev)
542 bd_forget(inode); 549 bd_forget(inode);
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 05f0754f2b46..08c03044abdd 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -508,20 +508,19 @@ int cleanup_journal_tail(journal_t *journal)
508 /* 508 /*
509 * We need to make sure that any blocks that were recently written out 509 * We need to make sure that any blocks that were recently written out
510 * --- perhaps by log_do_checkpoint() --- are flushed out before we 510 * --- perhaps by log_do_checkpoint() --- are flushed out before we
511 * drop the transactions from the journal. It's unlikely this will be 511 * drop the transactions from the journal. Similarly we need to be sure
512 * necessary, especially with an appropriately sized journal, but we 512 * superblock makes it to disk before next transaction starts reusing
513 * need this to guarantee correctness. Fortunately 513 * freed space (otherwise we could replay some blocks of the new
514 * cleanup_journal_tail() doesn't get called all that often. 514 * transaction thinking they belong to the old one). So we use
515 * WRITE_FLUSH_FUA. It's unlikely this will be necessary, especially
516 * with an appropriately sized journal, but we need this to guarantee
517 * correctness. Fortunately cleanup_journal_tail() doesn't get called
518 * all that often.
515 */ 519 */
516 if (journal->j_flags & JFS_BARRIER) 520 journal_update_sb_log_tail(journal, first_tid, blocknr,
517 blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); 521 WRITE_FLUSH_FUA);
518 522
519 spin_lock(&journal->j_state_lock); 523 spin_lock(&journal->j_state_lock);
520 if (!tid_gt(first_tid, journal->j_tail_sequence)) {
521 spin_unlock(&journal->j_state_lock);
522 /* Someone else cleaned up journal so return 0 */
523 return 0;
524 }
525 /* OK, update the superblock to recover the freed space. 524 /* OK, update the superblock to recover the freed space.
526 * Physical blocks come first: have we wrapped beyond the end of 525 * Physical blocks come first: have we wrapped beyond the end of
527 * the log? */ 526 * the log? */
@@ -539,8 +538,6 @@ int cleanup_journal_tail(journal_t *journal)
539 journal->j_tail_sequence = first_tid; 538 journal->j_tail_sequence = first_tid;
540 journal->j_tail = blocknr; 539 journal->j_tail = blocknr;
541 spin_unlock(&journal->j_state_lock); 540 spin_unlock(&journal->j_state_lock);
542 if (!(journal->j_flags & JFS_ABORT))
543 journal_update_superblock(journal, 1);
544 return 0; 541 return 0;
545} 542}
546 543
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index f2b9a571f4cf..52c15c776029 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -298,6 +298,7 @@ void journal_commit_transaction(journal_t *journal)
298 int tag_flag; 298 int tag_flag;
299 int i; 299 int i;
300 struct blk_plug plug; 300 struct blk_plug plug;
301 int write_op = WRITE;
301 302
302 /* 303 /*
303 * First job: lock down the current transaction and wait for 304 * First job: lock down the current transaction and wait for
@@ -307,7 +308,16 @@ void journal_commit_transaction(journal_t *journal)
307 /* Do we need to erase the effects of a prior journal_flush? */ 308 /* Do we need to erase the effects of a prior journal_flush? */
308 if (journal->j_flags & JFS_FLUSHED) { 309 if (journal->j_flags & JFS_FLUSHED) {
309 jbd_debug(3, "super block updated\n"); 310 jbd_debug(3, "super block updated\n");
310 journal_update_superblock(journal, 1); 311 mutex_lock(&journal->j_checkpoint_mutex);
312 /*
313 * We hold j_checkpoint_mutex so tail cannot change under us.
314 * We don't need any special data guarantees for writing sb
315 * since journal is empty and it is ok for write to be
316 * flushed only with transaction commit.
317 */
318 journal_update_sb_log_tail(journal, journal->j_tail_sequence,
319 journal->j_tail, WRITE_SYNC);
320 mutex_unlock(&journal->j_checkpoint_mutex);
311 } else { 321 } else {
312 jbd_debug(3, "superblock not updated\n"); 322 jbd_debug(3, "superblock not updated\n");
313 } 323 }
@@ -413,13 +423,16 @@ void journal_commit_transaction(journal_t *journal)
413 423
414 jbd_debug (3, "JBD: commit phase 2\n"); 424 jbd_debug (3, "JBD: commit phase 2\n");
415 425
426 if (tid_geq(journal->j_commit_waited, commit_transaction->t_tid))
427 write_op = WRITE_SYNC;
428
416 /* 429 /*
417 * Now start flushing things to disk, in the order they appear 430 * Now start flushing things to disk, in the order they appear
418 * on the transaction lists. Data blocks go first. 431 * on the transaction lists. Data blocks go first.
419 */ 432 */
420 blk_start_plug(&plug); 433 blk_start_plug(&plug);
421 err = journal_submit_data_buffers(journal, commit_transaction, 434 err = journal_submit_data_buffers(journal, commit_transaction,
422 WRITE_SYNC); 435 write_op);
423 blk_finish_plug(&plug); 436 blk_finish_plug(&plug);
424 437
425 /* 438 /*
@@ -478,7 +491,7 @@ void journal_commit_transaction(journal_t *journal)
478 491
479 blk_start_plug(&plug); 492 blk_start_plug(&plug);
480 493
481 journal_write_revoke_records(journal, commit_transaction, WRITE_SYNC); 494 journal_write_revoke_records(journal, commit_transaction, write_op);
482 495
483 /* 496 /*
484 * If we found any dirty or locked buffers, then we should have 497 * If we found any dirty or locked buffers, then we should have
@@ -649,7 +662,7 @@ start_journal_io:
649 clear_buffer_dirty(bh); 662 clear_buffer_dirty(bh);
650 set_buffer_uptodate(bh); 663 set_buffer_uptodate(bh);
651 bh->b_end_io = journal_end_buffer_io_sync; 664 bh->b_end_io = journal_end_buffer_io_sync;
652 submit_bh(WRITE_SYNC, bh); 665 submit_bh(write_op, bh);
653 } 666 }
654 cond_resched(); 667 cond_resched();
655 668
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 0971e9217808..425c2f2cf170 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -563,6 +563,8 @@ int log_wait_commit(journal_t *journal, tid_t tid)
563 spin_unlock(&journal->j_state_lock); 563 spin_unlock(&journal->j_state_lock);
564#endif 564#endif
565 spin_lock(&journal->j_state_lock); 565 spin_lock(&journal->j_state_lock);
566 if (!tid_geq(journal->j_commit_waited, tid))
567 journal->j_commit_waited = tid;
566 while (tid_gt(tid, journal->j_commit_sequence)) { 568 while (tid_gt(tid, journal->j_commit_sequence)) {
567 jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n", 569 jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n",
568 tid, journal->j_commit_sequence); 570 tid, journal->j_commit_sequence);
@@ -921,8 +923,33 @@ static int journal_reset(journal_t *journal)
921 923
922 journal->j_max_transaction_buffers = journal->j_maxlen / 4; 924 journal->j_max_transaction_buffers = journal->j_maxlen / 4;
923 925
924 /* Add the dynamic fields and write it to disk. */ 926 /*
925 journal_update_superblock(journal, 1); 927 * As a special case, if the on-disk copy is already marked as needing
928 * no recovery (s_start == 0), then we can safely defer the superblock
929 * update until the next commit by setting JFS_FLUSHED. This avoids
930 * attempting a write to a potential-readonly device.
931 */
932 if (sb->s_start == 0) {
933 jbd_debug(1,"JBD: Skipping superblock update on recovered sb "
934 "(start %u, seq %d, errno %d)\n",
935 journal->j_tail, journal->j_tail_sequence,
936 journal->j_errno);
937 journal->j_flags |= JFS_FLUSHED;
938 } else {
939 /* Lock here to make assertions happy... */
940 mutex_lock(&journal->j_checkpoint_mutex);
941 /*
942 * Update log tail information. We use WRITE_FUA since new
943 * transaction will start reusing journal space and so we
944 * must make sure information about current log tail is on
945 * disk before that.
946 */
947 journal_update_sb_log_tail(journal,
948 journal->j_tail_sequence,
949 journal->j_tail,
950 WRITE_FUA);
951 mutex_unlock(&journal->j_checkpoint_mutex);
952 }
926 return journal_start_thread(journal); 953 return journal_start_thread(journal);
927} 954}
928 955
@@ -999,35 +1026,15 @@ int journal_create(journal_t *journal)
999 return journal_reset(journal); 1026 return journal_reset(journal);
1000} 1027}
1001 1028
1002/** 1029static void journal_write_superblock(journal_t *journal, int write_op)
1003 * void journal_update_superblock() - Update journal sb on disk.
1004 * @journal: The journal to update.
1005 * @wait: Set to '0' if you don't want to wait for IO completion.
1006 *
1007 * Update a journal's dynamic superblock fields and write it to disk,
1008 * optionally waiting for the IO to complete.
1009 */
1010void journal_update_superblock(journal_t *journal, int wait)
1011{ 1030{
1012 journal_superblock_t *sb = journal->j_superblock;
1013 struct buffer_head *bh = journal->j_sb_buffer; 1031 struct buffer_head *bh = journal->j_sb_buffer;
1032 int ret;
1014 1033
1015 /* 1034 trace_journal_write_superblock(journal, write_op);
1016 * As a special case, if the on-disk copy is already marked as needing 1035 if (!(journal->j_flags & JFS_BARRIER))
1017 * no recovery (s_start == 0) and there are no outstanding transactions 1036 write_op &= ~(REQ_FUA | REQ_FLUSH);
1018 * in the filesystem, then we can safely defer the superblock update 1037 lock_buffer(bh);
1019 * until the next commit by setting JFS_FLUSHED. This avoids
1020 * attempting a write to a potential-readonly device.
1021 */
1022 if (sb->s_start == 0 && journal->j_tail_sequence ==
1023 journal->j_transaction_sequence) {
1024 jbd_debug(1,"JBD: Skipping superblock update on recovered sb "
1025 "(start %u, seq %d, errno %d)\n",
1026 journal->j_tail, journal->j_tail_sequence,
1027 journal->j_errno);
1028 goto out;
1029 }
1030
1031 if (buffer_write_io_error(bh)) { 1038 if (buffer_write_io_error(bh)) {
1032 char b[BDEVNAME_SIZE]; 1039 char b[BDEVNAME_SIZE];
1033 /* 1040 /*
@@ -1045,42 +1052,100 @@ void journal_update_superblock(journal_t *journal, int wait)
1045 set_buffer_uptodate(bh); 1052 set_buffer_uptodate(bh);
1046 } 1053 }
1047 1054
1055 get_bh(bh);
1056 bh->b_end_io = end_buffer_write_sync;
1057 ret = submit_bh(write_op, bh);
1058 wait_on_buffer(bh);
1059 if (buffer_write_io_error(bh)) {
1060 clear_buffer_write_io_error(bh);
1061 set_buffer_uptodate(bh);
1062 ret = -EIO;
1063 }
1064 if (ret) {
1065 char b[BDEVNAME_SIZE];
1066 printk(KERN_ERR "JBD: Error %d detected "
1067 "when updating journal superblock for %s.\n",
1068 ret, journal_dev_name(journal, b));
1069 }
1070}
1071
1072/**
1073 * journal_update_sb_log_tail() - Update log tail in journal sb on disk.
1074 * @journal: The journal to update.
1075 * @tail_tid: TID of the new transaction at the tail of the log
1076 * @tail_block: The first block of the transaction at the tail of the log
1077 * @write_op: With which operation should we write the journal sb
1078 *
1079 * Update a journal's superblock information about log tail and write it to
1080 * disk, waiting for the IO to complete.
1081 */
1082void journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
1083 unsigned int tail_block, int write_op)
1084{
1085 journal_superblock_t *sb = journal->j_superblock;
1086
1087 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
1088 jbd_debug(1,"JBD: updating superblock (start %u, seq %u)\n",
1089 tail_block, tail_tid);
1090
1091 sb->s_sequence = cpu_to_be32(tail_tid);
1092 sb->s_start = cpu_to_be32(tail_block);
1093
1094 journal_write_superblock(journal, write_op);
1095
1096 /* Log is no longer empty */
1097 spin_lock(&journal->j_state_lock);
1098 WARN_ON(!sb->s_sequence);
1099 journal->j_flags &= ~JFS_FLUSHED;
1100 spin_unlock(&journal->j_state_lock);
1101}
1102
1103/**
1104 * mark_journal_empty() - Mark on disk journal as empty.
1105 * @journal: The journal to update.
1106 *
1107 * Update a journal's dynamic superblock fields to show that journal is empty.
1108 * Write updated superblock to disk waiting for IO to complete.
1109 */
1110static void mark_journal_empty(journal_t *journal)
1111{
1112 journal_superblock_t *sb = journal->j_superblock;
1113
1114 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
1048 spin_lock(&journal->j_state_lock); 1115 spin_lock(&journal->j_state_lock);
1049 jbd_debug(1,"JBD: updating superblock (start %u, seq %d, errno %d)\n", 1116 jbd_debug(1, "JBD: Marking journal as empty (seq %d)\n",
1050 journal->j_tail, journal->j_tail_sequence, journal->j_errno); 1117 journal->j_tail_sequence);
1051 1118
1052 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); 1119 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
1053 sb->s_start = cpu_to_be32(journal->j_tail); 1120 sb->s_start = cpu_to_be32(0);
1054 sb->s_errno = cpu_to_be32(journal->j_errno);
1055 spin_unlock(&journal->j_state_lock); 1121 spin_unlock(&journal->j_state_lock);
1056 1122
1057 BUFFER_TRACE(bh, "marking dirty"); 1123 journal_write_superblock(journal, WRITE_FUA);
1058 mark_buffer_dirty(bh);
1059 if (wait) {
1060 sync_dirty_buffer(bh);
1061 if (buffer_write_io_error(bh)) {
1062 char b[BDEVNAME_SIZE];
1063 printk(KERN_ERR "JBD: I/O error detected "
1064 "when updating journal superblock for %s.\n",
1065 journal_dev_name(journal, b));
1066 clear_buffer_write_io_error(bh);
1067 set_buffer_uptodate(bh);
1068 }
1069 } else
1070 write_dirty_buffer(bh, WRITE);
1071 1124
1072 trace_jbd_update_superblock_end(journal, wait); 1125 spin_lock(&journal->j_state_lock);
1073out: 1126 /* Log is empty */
1074 /* If we have just flushed the log (by marking s_start==0), then 1127 journal->j_flags |= JFS_FLUSHED;
1075 * any future commit will have to be careful to update the 1128 spin_unlock(&journal->j_state_lock);
1076 * superblock again to re-record the true start of the log. */ 1129}
1130
1131/**
1132 * journal_update_sb_errno() - Update error in the journal.
1133 * @journal: The journal to update.
1134 *
1135 * Update a journal's errno. Write updated superblock to disk waiting for IO
1136 * to complete.
1137 */
1138static void journal_update_sb_errno(journal_t *journal)
1139{
1140 journal_superblock_t *sb = journal->j_superblock;
1077 1141
1078 spin_lock(&journal->j_state_lock); 1142 spin_lock(&journal->j_state_lock);
1079 if (sb->s_start) 1143 jbd_debug(1, "JBD: updating superblock error (errno %d)\n",
1080 journal->j_flags &= ~JFS_FLUSHED; 1144 journal->j_errno);
1081 else 1145 sb->s_errno = cpu_to_be32(journal->j_errno);
1082 journal->j_flags |= JFS_FLUSHED;
1083 spin_unlock(&journal->j_state_lock); 1146 spin_unlock(&journal->j_state_lock);
1147
1148 journal_write_superblock(journal, WRITE_SYNC);
1084} 1149}
1085 1150
1086/* 1151/*
@@ -1251,6 +1316,8 @@ int journal_destroy(journal_t *journal)
1251 1316
1252 /* Force any old transactions to disk */ 1317 /* Force any old transactions to disk */
1253 1318
1319 /* We cannot race with anybody but must keep assertions happy */
1320 mutex_lock(&journal->j_checkpoint_mutex);
1254 /* Totally anal locking here... */ 1321 /* Totally anal locking here... */
1255 spin_lock(&journal->j_list_lock); 1322 spin_lock(&journal->j_list_lock);
1256 while (journal->j_checkpoint_transactions != NULL) { 1323 while (journal->j_checkpoint_transactions != NULL) {
@@ -1266,16 +1333,14 @@ int journal_destroy(journal_t *journal)
1266 1333
1267 if (journal->j_sb_buffer) { 1334 if (journal->j_sb_buffer) {
1268 if (!is_journal_aborted(journal)) { 1335 if (!is_journal_aborted(journal)) {
1269 /* We can now mark the journal as empty. */
1270 journal->j_tail = 0;
1271 journal->j_tail_sequence = 1336 journal->j_tail_sequence =
1272 ++journal->j_transaction_sequence; 1337 ++journal->j_transaction_sequence;
1273 journal_update_superblock(journal, 1); 1338 mark_journal_empty(journal);
1274 } else { 1339 } else
1275 err = -EIO; 1340 err = -EIO;
1276 }
1277 brelse(journal->j_sb_buffer); 1341 brelse(journal->j_sb_buffer);
1278 } 1342 }
1343 mutex_unlock(&journal->j_checkpoint_mutex);
1279 1344
1280 if (journal->j_inode) 1345 if (journal->j_inode)
1281 iput(journal->j_inode); 1346 iput(journal->j_inode);
@@ -1455,7 +1520,6 @@ int journal_flush(journal_t *journal)
1455{ 1520{
1456 int err = 0; 1521 int err = 0;
1457 transaction_t *transaction = NULL; 1522 transaction_t *transaction = NULL;
1458 unsigned int old_tail;
1459 1523
1460 spin_lock(&journal->j_state_lock); 1524 spin_lock(&journal->j_state_lock);
1461 1525
@@ -1490,6 +1554,7 @@ int journal_flush(journal_t *journal)
1490 if (is_journal_aborted(journal)) 1554 if (is_journal_aborted(journal))
1491 return -EIO; 1555 return -EIO;
1492 1556
1557 mutex_lock(&journal->j_checkpoint_mutex);
1493 cleanup_journal_tail(journal); 1558 cleanup_journal_tail(journal);
1494 1559
1495 /* Finally, mark the journal as really needing no recovery. 1560 /* Finally, mark the journal as really needing no recovery.
@@ -1497,14 +1562,9 @@ int journal_flush(journal_t *journal)
1497 * the magic code for a fully-recovered superblock. Any future 1562 * the magic code for a fully-recovered superblock. Any future
1498 * commits of data to the journal will restore the current 1563 * commits of data to the journal will restore the current
1499 * s_start value. */ 1564 * s_start value. */
1565 mark_journal_empty(journal);
1566 mutex_unlock(&journal->j_checkpoint_mutex);
1500 spin_lock(&journal->j_state_lock); 1567 spin_lock(&journal->j_state_lock);
1501 old_tail = journal->j_tail;
1502 journal->j_tail = 0;
1503 spin_unlock(&journal->j_state_lock);
1504 journal_update_superblock(journal, 1);
1505 spin_lock(&journal->j_state_lock);
1506 journal->j_tail = old_tail;
1507
1508 J_ASSERT(!journal->j_running_transaction); 1568 J_ASSERT(!journal->j_running_transaction);
1509 J_ASSERT(!journal->j_committing_transaction); 1569 J_ASSERT(!journal->j_committing_transaction);
1510 J_ASSERT(!journal->j_checkpoint_transactions); 1570 J_ASSERT(!journal->j_checkpoint_transactions);
@@ -1544,8 +1604,12 @@ int journal_wipe(journal_t *journal, int write)
1544 write ? "Clearing" : "Ignoring"); 1604 write ? "Clearing" : "Ignoring");
1545 1605
1546 err = journal_skip_recovery(journal); 1606 err = journal_skip_recovery(journal);
1547 if (write) 1607 if (write) {
1548 journal_update_superblock(journal, 1); 1608 /* Lock to make assertions happy... */
1609 mutex_lock(&journal->j_checkpoint_mutex);
1610 mark_journal_empty(journal);
1611 mutex_unlock(&journal->j_checkpoint_mutex);
1612 }
1549 1613
1550 no_recovery: 1614 no_recovery:
1551 return err; 1615 return err;
@@ -1613,7 +1677,7 @@ static void __journal_abort_soft (journal_t *journal, int errno)
1613 __journal_abort_hard(journal); 1677 __journal_abort_hard(journal);
1614 1678
1615 if (errno) 1679 if (errno)
1616 journal_update_superblock(journal, 1); 1680 journal_update_sb_errno(journal);
1617} 1681}
1618 1682
1619/** 1683/**
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index b2a7e5244e39..febc10db5ced 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1433,8 +1433,6 @@ int journal_stop(handle_t *handle)
1433 } 1433 }
1434 } 1434 }
1435 1435
1436 if (handle->h_sync)
1437 transaction->t_synchronous_commit = 1;
1438 current->journal_info = NULL; 1436 current->journal_info = NULL;
1439 spin_lock(&journal->j_state_lock); 1437 spin_lock(&journal->j_state_lock);
1440 spin_lock(&transaction->t_handle_lock); 1438 spin_lock(&transaction->t_handle_lock);
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index bb6f993ebca9..3d3092eda811 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -240,7 +240,7 @@ void jffs2_evict_inode (struct inode *inode)
240 jffs2_dbg(1, "%s(): ino #%lu mode %o\n", 240 jffs2_dbg(1, "%s(): ino #%lu mode %o\n",
241 __func__, inode->i_ino, inode->i_mode); 241 __func__, inode->i_ino, inode->i_mode);
242 truncate_inode_pages(&inode->i_data, 0); 242 truncate_inode_pages(&inode->i_data, 0);
243 end_writeback(inode); 243 clear_inode(inode);
244 jffs2_do_clear_inode(c, f); 244 jffs2_do_clear_inode(c, f);
245} 245}
246 246
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 77b69b27f825..4692bf3ca8cb 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -169,7 +169,7 @@ void jfs_evict_inode(struct inode *inode)
169 } else { 169 } else {
170 truncate_inode_pages(&inode->i_data, 0); 170 truncate_inode_pages(&inode->i_data, 0);
171 } 171 }
172 end_writeback(inode); 172 clear_inode(inode);
173 dquot_drop(inode); 173 dquot_drop(inode);
174} 174}
175 175
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index e3ab5e5a904c..f1cb512c5019 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -2175,7 +2175,7 @@ void logfs_evict_inode(struct inode *inode)
2175 } 2175 }
2176 } 2176 }
2177 truncate_inode_pages(&inode->i_data, 0); 2177 truncate_inode_pages(&inode->i_data, 0);
2178 end_writeback(inode); 2178 clear_inode(inode);
2179 2179
2180 /* Cheaper version of write_inode. All changes are concealed in 2180 /* Cheaper version of write_inode. All changes are concealed in
2181 * aliases, which are moved back. No write to the medium happens. 2181 * aliases, which are moved back. No write to the medium happens.
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index fcb05d2c6b5f..2a503ad020d5 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -32,7 +32,7 @@ static void minix_evict_inode(struct inode *inode)
32 minix_truncate(inode); 32 minix_truncate(inode);
33 } 33 }
34 invalidate_inode_buffers(inode); 34 invalidate_inode_buffers(inode);
35 end_writeback(inode); 35 clear_inode(inode);
36 if (!inode->i_nlink) 36 if (!inode->i_nlink)
37 minix_free_inode(inode); 37 minix_free_inode(inode);
38} 38}
diff --git a/fs/namei.c b/fs/namei.c
index 93ff12b1a1de..c651f02c9fec 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1452,7 +1452,8 @@ EXPORT_SYMBOL(full_name_hash);
1452 */ 1452 */
1453static inline unsigned long hash_name(const char *name, unsigned int *hashp) 1453static inline unsigned long hash_name(const char *name, unsigned int *hashp)
1454{ 1454{
1455 unsigned long a, mask, hash, len; 1455 unsigned long a, b, adata, bdata, mask, hash, len;
1456 const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
1456 1457
1457 hash = a = 0; 1458 hash = a = 0;
1458 len = -sizeof(unsigned long); 1459 len = -sizeof(unsigned long);
@@ -1460,17 +1461,18 @@ static inline unsigned long hash_name(const char *name, unsigned int *hashp)
1460 hash = (hash + a) * 9; 1461 hash = (hash + a) * 9;
1461 len += sizeof(unsigned long); 1462 len += sizeof(unsigned long);
1462 a = load_unaligned_zeropad(name+len); 1463 a = load_unaligned_zeropad(name+len);
1463 /* Do we have any NUL or '/' bytes in this word? */ 1464 b = a ^ REPEAT_BYTE('/');
1464 mask = has_zero(a) | has_zero(a ^ REPEAT_BYTE('/')); 1465 } while (!(has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants)));
1465 } while (!mask); 1466
1466 1467 adata = prep_zero_mask(a, adata, &constants);
1467 /* The mask *below* the first high bit set */ 1468 bdata = prep_zero_mask(b, bdata, &constants);
1468 mask = (mask - 1) & ~mask; 1469
1469 mask >>= 7; 1470 mask = create_zero_mask(adata | bdata);
1470 hash += a & mask; 1471
1472 hash += a & zero_bytemask(mask);
1471 *hashp = fold_hash(hash); 1473 *hashp = fold_hash(hash);
1472 1474
1473 return len + count_masked_bytes(mask); 1475 return len + find_zero(mask);
1474} 1476}
1475 1477
1476#else 1478#else
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 87484fb8d177..333df07ae3bd 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -292,7 +292,7 @@ static void
292ncp_evict_inode(struct inode *inode) 292ncp_evict_inode(struct inode *inode)
293{ 293{
294 truncate_inode_pages(&inode->i_data, 0); 294 truncate_inode_pages(&inode->i_data, 0);
295 end_writeback(inode); 295 clear_inode(inode);
296 296
297 if (S_ISDIR(inode->i_mode)) { 297 if (S_ISDIR(inode->i_mode)) {
298 DDPRINTK("ncp_evict_inode: put directory %ld\n", inode->i_ino); 298 DDPRINTK("ncp_evict_inode: put directory %ld\n", inode->i_ino);
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 2a0e6c599147..f90f4f5cd421 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -29,9 +29,20 @@ config NFS_FS
29 29
30 If unsure, say N. 30 If unsure, say N.
31 31
32config NFS_V2
33 bool "NFS client support for NFS version 2"
34 depends on NFS_FS
35 default y
36 help
37 This option enables support for version 2 of the NFS protocol
38 (RFC 1094) in the kernel's NFS client.
39
40 If unsure, say Y.
41
32config NFS_V3 42config NFS_V3
33 bool "NFS client support for NFS version 3" 43 bool "NFS client support for NFS version 3"
34 depends on NFS_FS 44 depends on NFS_FS
45 default y
35 help 46 help
36 This option enables support for version 3 of the NFS protocol 47 This option enables support for version 3 of the NFS protocol
37 (RFC 1813) in the kernel's NFS client. 48 (RFC 1813) in the kernel's NFS client.
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index b58613d0abb3..7ddd45d9f170 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -4,11 +4,12 @@
4 4
5obj-$(CONFIG_NFS_FS) += nfs.o 5obj-$(CONFIG_NFS_FS) += nfs.o
6 6
7nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \ 7nfs-y := client.o dir.o file.o getroot.o inode.o super.o \
8 direct.o pagelist.o proc.o read.o symlink.o unlink.o \ 8 direct.o pagelist.o read.o symlink.o unlink.o \
9 write.o namespace.o mount_clnt.o \ 9 write.o namespace.o mount_clnt.o \
10 dns_resolve.o cache_lib.o 10 dns_resolve.o cache_lib.o
11nfs-$(CONFIG_ROOT_NFS) += nfsroot.o 11nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
12nfs-$(CONFIG_NFS_V2) += proc.o nfs2xdr.o
12nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o 13nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o
13nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o 14nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
14nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ 15nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 7f6a23f0244e..7ae8a608956f 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -187,7 +187,6 @@ static void bl_end_io_read(struct bio *bio, int err)
187 struct parallel_io *par = bio->bi_private; 187 struct parallel_io *par = bio->bi_private;
188 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 188 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
189 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 189 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
190 struct nfs_read_data *rdata = (struct nfs_read_data *)par->data;
191 190
192 do { 191 do {
193 struct page *page = bvec->bv_page; 192 struct page *page = bvec->bv_page;
@@ -198,9 +197,12 @@ static void bl_end_io_read(struct bio *bio, int err)
198 SetPageUptodate(page); 197 SetPageUptodate(page);
199 } while (bvec >= bio->bi_io_vec); 198 } while (bvec >= bio->bi_io_vec);
200 if (!uptodate) { 199 if (!uptodate) {
201 if (!rdata->pnfs_error) 200 struct nfs_read_data *rdata = par->data;
202 rdata->pnfs_error = -EIO; 201 struct nfs_pgio_header *header = rdata->header;
203 pnfs_set_lo_fail(rdata->lseg); 202
203 if (!header->pnfs_error)
204 header->pnfs_error = -EIO;
205 pnfs_set_lo_fail(header->lseg);
204 } 206 }
205 bio_put(bio); 207 bio_put(bio);
206 put_parallel(par); 208 put_parallel(par);
@@ -221,7 +223,7 @@ bl_end_par_io_read(void *data, int unused)
221{ 223{
222 struct nfs_read_data *rdata = data; 224 struct nfs_read_data *rdata = data;
223 225
224 rdata->task.tk_status = rdata->pnfs_error; 226 rdata->task.tk_status = rdata->header->pnfs_error;
225 INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); 227 INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
226 schedule_work(&rdata->task.u.tk_work); 228 schedule_work(&rdata->task.u.tk_work);
227} 229}
@@ -229,6 +231,7 @@ bl_end_par_io_read(void *data, int unused)
229static enum pnfs_try_status 231static enum pnfs_try_status
230bl_read_pagelist(struct nfs_read_data *rdata) 232bl_read_pagelist(struct nfs_read_data *rdata)
231{ 233{
234 struct nfs_pgio_header *header = rdata->header;
232 int i, hole; 235 int i, hole;
233 struct bio *bio = NULL; 236 struct bio *bio = NULL;
234 struct pnfs_block_extent *be = NULL, *cow_read = NULL; 237 struct pnfs_block_extent *be = NULL, *cow_read = NULL;
@@ -239,7 +242,7 @@ bl_read_pagelist(struct nfs_read_data *rdata)
239 int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; 242 int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
240 243
241 dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, 244 dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
242 rdata->npages, f_offset, (unsigned int)rdata->args.count); 245 rdata->pages.npages, f_offset, (unsigned int)rdata->args.count);
243 246
244 par = alloc_parallel(rdata); 247 par = alloc_parallel(rdata);
245 if (!par) 248 if (!par)
@@ -249,17 +252,17 @@ bl_read_pagelist(struct nfs_read_data *rdata)
249 252
250 isect = (sector_t) (f_offset >> SECTOR_SHIFT); 253 isect = (sector_t) (f_offset >> SECTOR_SHIFT);
251 /* Code assumes extents are page-aligned */ 254 /* Code assumes extents are page-aligned */
252 for (i = pg_index; i < rdata->npages; i++) { 255 for (i = pg_index; i < rdata->pages.npages; i++) {
253 if (!extent_length) { 256 if (!extent_length) {
254 /* We've used up the previous extent */ 257 /* We've used up the previous extent */
255 bl_put_extent(be); 258 bl_put_extent(be);
256 bl_put_extent(cow_read); 259 bl_put_extent(cow_read);
257 bio = bl_submit_bio(READ, bio); 260 bio = bl_submit_bio(READ, bio);
258 /* Get the next one */ 261 /* Get the next one */
259 be = bl_find_get_extent(BLK_LSEG2EXT(rdata->lseg), 262 be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg),
260 isect, &cow_read); 263 isect, &cow_read);
261 if (!be) { 264 if (!be) {
262 rdata->pnfs_error = -EIO; 265 header->pnfs_error = -EIO;
263 goto out; 266 goto out;
264 } 267 }
265 extent_length = be->be_length - 268 extent_length = be->be_length -
@@ -282,11 +285,12 @@ bl_read_pagelist(struct nfs_read_data *rdata)
282 struct pnfs_block_extent *be_read; 285 struct pnfs_block_extent *be_read;
283 286
284 be_read = (hole && cow_read) ? cow_read : be; 287 be_read = (hole && cow_read) ? cow_read : be;
285 bio = bl_add_page_to_bio(bio, rdata->npages - i, READ, 288 bio = bl_add_page_to_bio(bio, rdata->pages.npages - i,
289 READ,
286 isect, pages[i], be_read, 290 isect, pages[i], be_read,
287 bl_end_io_read, par); 291 bl_end_io_read, par);
288 if (IS_ERR(bio)) { 292 if (IS_ERR(bio)) {
289 rdata->pnfs_error = PTR_ERR(bio); 293 header->pnfs_error = PTR_ERR(bio);
290 bio = NULL; 294 bio = NULL;
291 goto out; 295 goto out;
292 } 296 }
@@ -294,9 +298,9 @@ bl_read_pagelist(struct nfs_read_data *rdata)
294 isect += PAGE_CACHE_SECTORS; 298 isect += PAGE_CACHE_SECTORS;
295 extent_length -= PAGE_CACHE_SECTORS; 299 extent_length -= PAGE_CACHE_SECTORS;
296 } 300 }
297 if ((isect << SECTOR_SHIFT) >= rdata->inode->i_size) { 301 if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
298 rdata->res.eof = 1; 302 rdata->res.eof = 1;
299 rdata->res.count = rdata->inode->i_size - f_offset; 303 rdata->res.count = header->inode->i_size - f_offset;
300 } else { 304 } else {
301 rdata->res.count = (isect << SECTOR_SHIFT) - f_offset; 305 rdata->res.count = (isect << SECTOR_SHIFT) - f_offset;
302 } 306 }
@@ -345,7 +349,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
345 struct parallel_io *par = bio->bi_private; 349 struct parallel_io *par = bio->bi_private;
346 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 350 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
347 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 351 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
348 struct nfs_write_data *wdata = (struct nfs_write_data *)par->data;
349 352
350 do { 353 do {
351 struct page *page = bvec->bv_page; 354 struct page *page = bvec->bv_page;
@@ -358,9 +361,12 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
358 } while (bvec >= bio->bi_io_vec); 361 } while (bvec >= bio->bi_io_vec);
359 362
360 if (unlikely(!uptodate)) { 363 if (unlikely(!uptodate)) {
361 if (!wdata->pnfs_error) 364 struct nfs_write_data *data = par->data;
362 wdata->pnfs_error = -EIO; 365 struct nfs_pgio_header *header = data->header;
363 pnfs_set_lo_fail(wdata->lseg); 366
367 if (!header->pnfs_error)
368 header->pnfs_error = -EIO;
369 pnfs_set_lo_fail(header->lseg);
364 } 370 }
365 bio_put(bio); 371 bio_put(bio);
366 put_parallel(par); 372 put_parallel(par);
@@ -370,12 +376,13 @@ static void bl_end_io_write(struct bio *bio, int err)
370{ 376{
371 struct parallel_io *par = bio->bi_private; 377 struct parallel_io *par = bio->bi_private;
372 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 378 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
373 struct nfs_write_data *wdata = (struct nfs_write_data *)par->data; 379 struct nfs_write_data *data = par->data;
380 struct nfs_pgio_header *header = data->header;
374 381
375 if (!uptodate) { 382 if (!uptodate) {
376 if (!wdata->pnfs_error) 383 if (!header->pnfs_error)
377 wdata->pnfs_error = -EIO; 384 header->pnfs_error = -EIO;
378 pnfs_set_lo_fail(wdata->lseg); 385 pnfs_set_lo_fail(header->lseg);
379 } 386 }
380 bio_put(bio); 387 bio_put(bio);
381 put_parallel(par); 388 put_parallel(par);
@@ -391,9 +398,9 @@ static void bl_write_cleanup(struct work_struct *work)
391 dprintk("%s enter\n", __func__); 398 dprintk("%s enter\n", __func__);
392 task = container_of(work, struct rpc_task, u.tk_work); 399 task = container_of(work, struct rpc_task, u.tk_work);
393 wdata = container_of(task, struct nfs_write_data, task); 400 wdata = container_of(task, struct nfs_write_data, task);
394 if (likely(!wdata->pnfs_error)) { 401 if (likely(!wdata->header->pnfs_error)) {
395 /* Marks for LAYOUTCOMMIT */ 402 /* Marks for LAYOUTCOMMIT */
396 mark_extents_written(BLK_LSEG2EXT(wdata->lseg), 403 mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg),
397 wdata->args.offset, wdata->args.count); 404 wdata->args.offset, wdata->args.count);
398 } 405 }
399 pnfs_ld_write_done(wdata); 406 pnfs_ld_write_done(wdata);
@@ -404,12 +411,12 @@ static void bl_end_par_io_write(void *data, int num_se)
404{ 411{
405 struct nfs_write_data *wdata = data; 412 struct nfs_write_data *wdata = data;
406 413
407 if (unlikely(wdata->pnfs_error)) { 414 if (unlikely(wdata->header->pnfs_error)) {
408 bl_free_short_extents(&BLK_LSEG2EXT(wdata->lseg)->bl_inval, 415 bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval,
409 num_se); 416 num_se);
410 } 417 }
411 418
412 wdata->task.tk_status = wdata->pnfs_error; 419 wdata->task.tk_status = wdata->header->pnfs_error;
413 wdata->verf.committed = NFS_FILE_SYNC; 420 wdata->verf.committed = NFS_FILE_SYNC;
414 INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup); 421 INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
415 schedule_work(&wdata->task.u.tk_work); 422 schedule_work(&wdata->task.u.tk_work);
@@ -540,6 +547,7 @@ check_page:
540static enum pnfs_try_status 547static enum pnfs_try_status
541bl_write_pagelist(struct nfs_write_data *wdata, int sync) 548bl_write_pagelist(struct nfs_write_data *wdata, int sync)
542{ 549{
550 struct nfs_pgio_header *header = wdata->header;
543 int i, ret, npg_zero, pg_index, last = 0; 551 int i, ret, npg_zero, pg_index, last = 0;
544 struct bio *bio = NULL; 552 struct bio *bio = NULL;
545 struct pnfs_block_extent *be = NULL, *cow_read = NULL; 553 struct pnfs_block_extent *be = NULL, *cow_read = NULL;
@@ -552,7 +560,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
552 pgoff_t index; 560 pgoff_t index;
553 u64 temp; 561 u64 temp;
554 int npg_per_block = 562 int npg_per_block =
555 NFS_SERVER(wdata->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT; 563 NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT;
556 564
557 dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); 565 dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
558 /* At this point, wdata->pages is a (sequential) list of nfs_pages. 566 /* At this point, wdata->pages is a (sequential) list of nfs_pages.
@@ -566,7 +574,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
566 /* At this point, have to be more careful with error handling */ 574 /* At this point, have to be more careful with error handling */
567 575
568 isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT); 576 isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT);
569 be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read); 577 be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), isect, &cow_read);
570 if (!be || !is_writable(be, isect)) { 578 if (!be || !is_writable(be, isect)) {
571 dprintk("%s no matching extents!\n", __func__); 579 dprintk("%s no matching extents!\n", __func__);
572 goto out_mds; 580 goto out_mds;
@@ -597,10 +605,10 @@ fill_invalid_ext:
597 dprintk("%s zero %dth page: index %lu isect %llu\n", 605 dprintk("%s zero %dth page: index %lu isect %llu\n",
598 __func__, npg_zero, index, 606 __func__, npg_zero, index,
599 (unsigned long long)isect); 607 (unsigned long long)isect);
600 page = bl_find_get_zeroing_page(wdata->inode, index, 608 page = bl_find_get_zeroing_page(header->inode, index,
601 cow_read); 609 cow_read);
602 if (unlikely(IS_ERR(page))) { 610 if (unlikely(IS_ERR(page))) {
603 wdata->pnfs_error = PTR_ERR(page); 611 header->pnfs_error = PTR_ERR(page);
604 goto out; 612 goto out;
605 } else if (page == NULL) 613 } else if (page == NULL)
606 goto next_page; 614 goto next_page;
@@ -612,7 +620,7 @@ fill_invalid_ext:
612 __func__, ret); 620 __func__, ret);
613 end_page_writeback(page); 621 end_page_writeback(page);
614 page_cache_release(page); 622 page_cache_release(page);
615 wdata->pnfs_error = ret; 623 header->pnfs_error = ret;
616 goto out; 624 goto out;
617 } 625 }
618 if (likely(!bl_push_one_short_extent(be->be_inval))) 626 if (likely(!bl_push_one_short_extent(be->be_inval)))
@@ -620,11 +628,11 @@ fill_invalid_ext:
620 else { 628 else {
621 end_page_writeback(page); 629 end_page_writeback(page);
622 page_cache_release(page); 630 page_cache_release(page);
623 wdata->pnfs_error = -ENOMEM; 631 header->pnfs_error = -ENOMEM;
624 goto out; 632 goto out;
625 } 633 }
626 /* FIXME: This should be done in bi_end_io */ 634 /* FIXME: This should be done in bi_end_io */
627 mark_extents_written(BLK_LSEG2EXT(wdata->lseg), 635 mark_extents_written(BLK_LSEG2EXT(header->lseg),
628 page->index << PAGE_CACHE_SHIFT, 636 page->index << PAGE_CACHE_SHIFT,
629 PAGE_CACHE_SIZE); 637 PAGE_CACHE_SIZE);
630 638
@@ -632,7 +640,7 @@ fill_invalid_ext:
632 isect, page, be, 640 isect, page, be,
633 bl_end_io_write_zero, par); 641 bl_end_io_write_zero, par);
634 if (IS_ERR(bio)) { 642 if (IS_ERR(bio)) {
635 wdata->pnfs_error = PTR_ERR(bio); 643 header->pnfs_error = PTR_ERR(bio);
636 bio = NULL; 644 bio = NULL;
637 goto out; 645 goto out;
638 } 646 }
@@ -647,16 +655,16 @@ next_page:
647 655
648 /* Middle pages */ 656 /* Middle pages */
649 pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT; 657 pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT;
650 for (i = pg_index; i < wdata->npages; i++) { 658 for (i = pg_index; i < wdata->pages.npages; i++) {
651 if (!extent_length) { 659 if (!extent_length) {
652 /* We've used up the previous extent */ 660 /* We've used up the previous extent */
653 bl_put_extent(be); 661 bl_put_extent(be);
654 bio = bl_submit_bio(WRITE, bio); 662 bio = bl_submit_bio(WRITE, bio);
655 /* Get the next one */ 663 /* Get the next one */
656 be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), 664 be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg),
657 isect, NULL); 665 isect, NULL);
658 if (!be || !is_writable(be, isect)) { 666 if (!be || !is_writable(be, isect)) {
659 wdata->pnfs_error = -EINVAL; 667 header->pnfs_error = -EINVAL;
660 goto out; 668 goto out;
661 } 669 }
662 if (be->be_state == PNFS_BLOCK_INVALID_DATA) { 670 if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
@@ -664,7 +672,7 @@ next_page:
664 be->be_inval))) 672 be->be_inval)))
665 par->bse_count++; 673 par->bse_count++;
666 else { 674 else {
667 wdata->pnfs_error = -ENOMEM; 675 header->pnfs_error = -ENOMEM;
668 goto out; 676 goto out;
669 } 677 }
670 } 678 }
@@ -677,15 +685,15 @@ next_page:
677 if (unlikely(ret)) { 685 if (unlikely(ret)) {
678 dprintk("%s bl_mark_sectors_init fail %d\n", 686 dprintk("%s bl_mark_sectors_init fail %d\n",
679 __func__, ret); 687 __func__, ret);
680 wdata->pnfs_error = ret; 688 header->pnfs_error = ret;
681 goto out; 689 goto out;
682 } 690 }
683 } 691 }
684 bio = bl_add_page_to_bio(bio, wdata->npages - i, WRITE, 692 bio = bl_add_page_to_bio(bio, wdata->pages.npages - i, WRITE,
685 isect, pages[i], be, 693 isect, pages[i], be,
686 bl_end_io_write, par); 694 bl_end_io_write, par);
687 if (IS_ERR(bio)) { 695 if (IS_ERR(bio)) {
688 wdata->pnfs_error = PTR_ERR(bio); 696 header->pnfs_error = PTR_ERR(bio);
689 bio = NULL; 697 bio = NULL;
690 goto out; 698 goto out;
691 } 699 }
diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c
index a5c88a554d92..c96554245ccf 100644
--- a/fs/nfs/blocklayout/blocklayoutdev.c
+++ b/fs/nfs/blocklayout/blocklayoutdev.c
@@ -123,7 +123,7 @@ nfs4_blk_decode_device(struct nfs_server *server,
123 uint8_t *dataptr; 123 uint8_t *dataptr;
124 DECLARE_WAITQUEUE(wq, current); 124 DECLARE_WAITQUEUE(wq, current);
125 int offset, len, i, rc; 125 int offset, len, i, rc;
126 struct net *net = server->nfs_client->net; 126 struct net *net = server->nfs_client->cl_net;
127 struct nfs_net *nn = net_generic(net, nfs_net_id); 127 struct nfs_net *nn = net_generic(net, nfs_net_id);
128 struct bl_dev_msg *reply = &nn->bl_mount_reply; 128 struct bl_dev_msg *reply = &nn->bl_mount_reply;
129 129
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 60f7e4ec842c..7d108753af81 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -65,7 +65,7 @@ static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
65static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) 65static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)
66{ 66{
67 int ret = 0; 67 int ret = 0;
68 struct nfs_net *nn = net_generic(clp->net, nfs_net_id); 68 struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
69 69
70 if (clp->rpc_ops->version != 4 || minorversion != 0) 70 if (clp->rpc_ops->version != 4 || minorversion != 0)
71 return ret; 71 return ret;
@@ -90,7 +90,9 @@ static bool nfs4_disable_idmapping = true;
90 * RPC cruft for NFS 90 * RPC cruft for NFS
91 */ 91 */
92static const struct rpc_version *nfs_version[5] = { 92static const struct rpc_version *nfs_version[5] = {
93#ifdef CONFIG_NFS_V2
93 [2] = &nfs_version2, 94 [2] = &nfs_version2,
95#endif
94#ifdef CONFIG_NFS_V3 96#ifdef CONFIG_NFS_V3
95 [3] = &nfs_version3, 97 [3] = &nfs_version3,
96#endif 98#endif
@@ -129,6 +131,7 @@ const struct rpc_program nfsacl_program = {
129#endif /* CONFIG_NFS_V3_ACL */ 131#endif /* CONFIG_NFS_V3_ACL */
130 132
131struct nfs_client_initdata { 133struct nfs_client_initdata {
134 unsigned long init_flags;
132 const char *hostname; 135 const char *hostname;
133 const struct sockaddr *addr; 136 const struct sockaddr *addr;
134 size_t addrlen; 137 size_t addrlen;
@@ -172,7 +175,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
172 clp->cl_rpcclient = ERR_PTR(-EINVAL); 175 clp->cl_rpcclient = ERR_PTR(-EINVAL);
173 176
174 clp->cl_proto = cl_init->proto; 177 clp->cl_proto = cl_init->proto;
175 clp->net = get_net(cl_init->net); 178 clp->cl_net = get_net(cl_init->net);
176 179
177#ifdef CONFIG_NFS_V4 180#ifdef CONFIG_NFS_V4
178 err = nfs_get_cb_ident_idr(clp, cl_init->minorversion); 181 err = nfs_get_cb_ident_idr(clp, cl_init->minorversion);
@@ -182,7 +185,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
182 spin_lock_init(&clp->cl_lock); 185 spin_lock_init(&clp->cl_lock);
183 INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); 186 INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
184 rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); 187 rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
185 clp->cl_boot_time = CURRENT_TIME;
186 clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; 188 clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
187 clp->cl_minorversion = cl_init->minorversion; 189 clp->cl_minorversion = cl_init->minorversion;
188 clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion]; 190 clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
@@ -207,6 +209,7 @@ static void nfs4_shutdown_session(struct nfs_client *clp)
207 if (nfs4_has_session(clp)) { 209 if (nfs4_has_session(clp)) {
208 nfs4_deviceid_purge_client(clp); 210 nfs4_deviceid_purge_client(clp);
209 nfs4_destroy_session(clp->cl_session); 211 nfs4_destroy_session(clp->cl_session);
212 nfs4_destroy_clientid(clp);
210 } 213 }
211 214
212} 215}
@@ -235,6 +238,9 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
235 nfs_idmap_delete(clp); 238 nfs_idmap_delete(clp);
236 239
237 rpc_destroy_wait_queue(&clp->cl_rpcwaitq); 240 rpc_destroy_wait_queue(&clp->cl_rpcwaitq);
241 kfree(clp->cl_serverowner);
242 kfree(clp->cl_serverscope);
243 kfree(clp->cl_implid);
238} 244}
239 245
240/* idr_remove_all is not needed as all id's are removed by nfs_put_client */ 246/* idr_remove_all is not needed as all id's are removed by nfs_put_client */
@@ -248,7 +254,7 @@ void nfs_cleanup_cb_ident_idr(struct net *net)
248/* nfs_client_lock held */ 254/* nfs_client_lock held */
249static void nfs_cb_idr_remove_locked(struct nfs_client *clp) 255static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
250{ 256{
251 struct nfs_net *nn = net_generic(clp->net, nfs_net_id); 257 struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
252 258
253 if (clp->cl_cb_ident) 259 if (clp->cl_cb_ident)
254 idr_remove(&nn->cb_ident_idr, clp->cl_cb_ident); 260 idr_remove(&nn->cb_ident_idr, clp->cl_cb_ident);
@@ -301,10 +307,8 @@ static void nfs_free_client(struct nfs_client *clp)
301 if (clp->cl_machine_cred != NULL) 307 if (clp->cl_machine_cred != NULL)
302 put_rpccred(clp->cl_machine_cred); 308 put_rpccred(clp->cl_machine_cred);
303 309
304 put_net(clp->net); 310 put_net(clp->cl_net);
305 kfree(clp->cl_hostname); 311 kfree(clp->cl_hostname);
306 kfree(clp->server_scope);
307 kfree(clp->impl_id);
308 kfree(clp); 312 kfree(clp);
309 313
310 dprintk("<-- nfs_free_client()\n"); 314 dprintk("<-- nfs_free_client()\n");
@@ -321,7 +325,7 @@ void nfs_put_client(struct nfs_client *clp)
321 return; 325 return;
322 326
323 dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count)); 327 dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count));
324 nn = net_generic(clp->net, nfs_net_id); 328 nn = net_generic(clp->cl_net, nfs_net_id);
325 329
326 if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) { 330 if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) {
327 list_del(&clp->cl_share_link); 331 list_del(&clp->cl_share_link);
@@ -456,6 +460,8 @@ static bool nfs4_cb_match_client(const struct sockaddr *addr,
456 clp->cl_cons_state == NFS_CS_SESSION_INITING)) 460 clp->cl_cons_state == NFS_CS_SESSION_INITING))
457 return false; 461 return false;
458 462
463 smp_rmb();
464
459 /* Match the version and minorversion */ 465 /* Match the version and minorversion */
460 if (clp->rpc_ops->version != 4 || 466 if (clp->rpc_ops->version != 4 ||
461 clp->cl_minorversion != minorversion) 467 clp->cl_minorversion != minorversion)
@@ -504,6 +510,47 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
504 return NULL; 510 return NULL;
505} 511}
506 512
513static bool nfs_client_init_is_complete(const struct nfs_client *clp)
514{
515 return clp->cl_cons_state != NFS_CS_INITING;
516}
517
518int nfs_wait_client_init_complete(const struct nfs_client *clp)
519{
520 return wait_event_killable(nfs_client_active_wq,
521 nfs_client_init_is_complete(clp));
522}
523
524/*
525 * Found an existing client. Make sure it's ready before returning.
526 */
527static struct nfs_client *
528nfs_found_client(const struct nfs_client_initdata *cl_init,
529 struct nfs_client *clp)
530{
531 int error;
532
533 error = nfs_wait_client_init_complete(clp);
534 if (error < 0) {
535 nfs_put_client(clp);
536 return ERR_PTR(-ERESTARTSYS);
537 }
538
539 if (clp->cl_cons_state < NFS_CS_READY) {
540 error = clp->cl_cons_state;
541 nfs_put_client(clp);
542 return ERR_PTR(error);
543 }
544
545 smp_rmb();
546
547 BUG_ON(clp->cl_cons_state != NFS_CS_READY);
548
549 dprintk("<-- %s found nfs_client %p for %s\n",
550 __func__, clp, cl_init->hostname ?: "");
551 return clp;
552}
553
507/* 554/*
508 * Look up a client by IP address and protocol version 555 * Look up a client by IP address and protocol version
509 * - creates a new record if one doesn't yet exist 556 * - creates a new record if one doesn't yet exist
@@ -512,11 +559,9 @@ static struct nfs_client *
512nfs_get_client(const struct nfs_client_initdata *cl_init, 559nfs_get_client(const struct nfs_client_initdata *cl_init,
513 const struct rpc_timeout *timeparms, 560 const struct rpc_timeout *timeparms,
514 const char *ip_addr, 561 const char *ip_addr,
515 rpc_authflavor_t authflavour, 562 rpc_authflavor_t authflavour)
516 int noresvport)
517{ 563{
518 struct nfs_client *clp, *new = NULL; 564 struct nfs_client *clp, *new = NULL;
519 int error;
520 struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id); 565 struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id);
521 566
522 dprintk("--> nfs_get_client(%s,v%u)\n", 567 dprintk("--> nfs_get_client(%s,v%u)\n",
@@ -527,60 +572,29 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
527 spin_lock(&nn->nfs_client_lock); 572 spin_lock(&nn->nfs_client_lock);
528 573
529 clp = nfs_match_client(cl_init); 574 clp = nfs_match_client(cl_init);
530 if (clp) 575 if (clp) {
531 goto found_client; 576 spin_unlock(&nn->nfs_client_lock);
532 if (new) 577 if (new)
533 goto install_client; 578 nfs_free_client(new);
579 return nfs_found_client(cl_init, clp);
580 }
581 if (new) {
582 list_add(&new->cl_share_link, &nn->nfs_client_list);
583 spin_unlock(&nn->nfs_client_lock);
584 new->cl_flags = cl_init->init_flags;
585 return cl_init->rpc_ops->init_client(new,
586 timeparms, ip_addr,
587 authflavour);
588 }
534 589
535 spin_unlock(&nn->nfs_client_lock); 590 spin_unlock(&nn->nfs_client_lock);
536 591
537 new = nfs_alloc_client(cl_init); 592 new = nfs_alloc_client(cl_init);
538 } while (!IS_ERR(new)); 593 } while (!IS_ERR(new));
539 594
540 dprintk("--> nfs_get_client() = %ld [failed]\n", PTR_ERR(new)); 595 dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n",
596 cl_init->hostname ?: "", PTR_ERR(new));
541 return new; 597 return new;
542
543 /* install a new client and return with it unready */
544install_client:
545 clp = new;
546 list_add(&clp->cl_share_link, &nn->nfs_client_list);
547 spin_unlock(&nn->nfs_client_lock);
548
549 error = cl_init->rpc_ops->init_client(clp, timeparms, ip_addr,
550 authflavour, noresvport);
551 if (error < 0) {
552 nfs_put_client(clp);
553 return ERR_PTR(error);
554 }
555 dprintk("--> nfs_get_client() = %p [new]\n", clp);
556 return clp;
557
558 /* found an existing client
559 * - make sure it's ready before returning
560 */
561found_client:
562 spin_unlock(&nn->nfs_client_lock);
563
564 if (new)
565 nfs_free_client(new);
566
567 error = wait_event_killable(nfs_client_active_wq,
568 clp->cl_cons_state < NFS_CS_INITING);
569 if (error < 0) {
570 nfs_put_client(clp);
571 return ERR_PTR(-ERESTARTSYS);
572 }
573
574 if (clp->cl_cons_state < NFS_CS_READY) {
575 error = clp->cl_cons_state;
576 nfs_put_client(clp);
577 return ERR_PTR(error);
578 }
579
580 BUG_ON(clp->cl_cons_state != NFS_CS_READY);
581
582 dprintk("--> nfs_get_client() = %p [share]\n", clp);
583 return clp;
584} 598}
585 599
586/* 600/*
@@ -588,27 +602,12 @@ found_client:
588 */ 602 */
589void nfs_mark_client_ready(struct nfs_client *clp, int state) 603void nfs_mark_client_ready(struct nfs_client *clp, int state)
590{ 604{
605 smp_wmb();
591 clp->cl_cons_state = state; 606 clp->cl_cons_state = state;
592 wake_up_all(&nfs_client_active_wq); 607 wake_up_all(&nfs_client_active_wq);
593} 608}
594 609
595/* 610/*
596 * With sessions, the client is not marked ready until after a
597 * successful EXCHANGE_ID and CREATE_SESSION.
598 *
599 * Map errors cl_cons_state errors to EPROTONOSUPPORT to indicate
600 * other versions of NFS can be tried.
601 */
602int nfs4_check_client_ready(struct nfs_client *clp)
603{
604 if (!nfs4_has_session(clp))
605 return 0;
606 if (clp->cl_cons_state < NFS_CS_READY)
607 return -EPROTONOSUPPORT;
608 return 0;
609}
610
611/*
612 * Initialise the timeout values for a connection 611 * Initialise the timeout values for a connection
613 */ 612 */
614static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, 613static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
@@ -654,12 +653,11 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
654 */ 653 */
655static int nfs_create_rpc_client(struct nfs_client *clp, 654static int nfs_create_rpc_client(struct nfs_client *clp,
656 const struct rpc_timeout *timeparms, 655 const struct rpc_timeout *timeparms,
657 rpc_authflavor_t flavor, 656 rpc_authflavor_t flavor)
658 int discrtry, int noresvport)
659{ 657{
660 struct rpc_clnt *clnt = NULL; 658 struct rpc_clnt *clnt = NULL;
661 struct rpc_create_args args = { 659 struct rpc_create_args args = {
662 .net = clp->net, 660 .net = clp->cl_net,
663 .protocol = clp->cl_proto, 661 .protocol = clp->cl_proto,
664 .address = (struct sockaddr *)&clp->cl_addr, 662 .address = (struct sockaddr *)&clp->cl_addr,
665 .addrsize = clp->cl_addrlen, 663 .addrsize = clp->cl_addrlen,
@@ -670,9 +668,9 @@ static int nfs_create_rpc_client(struct nfs_client *clp,
670 .authflavor = flavor, 668 .authflavor = flavor,
671 }; 669 };
672 670
673 if (discrtry) 671 if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags))
674 args.flags |= RPC_CLNT_CREATE_DISCRTRY; 672 args.flags |= RPC_CLNT_CREATE_DISCRTRY;
675 if (noresvport) 673 if (test_bit(NFS_CS_NORESVPORT, &clp->cl_flags))
676 args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; 674 args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
677 675
678 if (!IS_ERR(clp->cl_rpcclient)) 676 if (!IS_ERR(clp->cl_rpcclient))
@@ -713,7 +711,7 @@ static int nfs_start_lockd(struct nfs_server *server)
713 .nfs_version = clp->rpc_ops->version, 711 .nfs_version = clp->rpc_ops->version,
714 .noresvport = server->flags & NFS_MOUNT_NORESVPORT ? 712 .noresvport = server->flags & NFS_MOUNT_NORESVPORT ?
715 1 : 0, 713 1 : 0,
716 .net = clp->net, 714 .net = clp->cl_net,
717 }; 715 };
718 716
719 if (nlm_init.nfs_version > 3) 717 if (nlm_init.nfs_version > 3)
@@ -805,36 +803,43 @@ static int nfs_init_server_rpcclient(struct nfs_server *server,
805 return 0; 803 return 0;
806} 804}
807 805
808/* 806/**
809 * Initialise an NFS2 or NFS3 client 807 * nfs_init_client - Initialise an NFS2 or NFS3 client
808 *
809 * @clp: nfs_client to initialise
810 * @timeparms: timeout parameters for underlying RPC transport
811 * @ip_addr: IP presentation address (not used)
812 * @authflavor: authentication flavor for underlying RPC transport
813 *
814 * Returns pointer to an NFS client, or an ERR_PTR value.
810 */ 815 */
811int nfs_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, 816struct nfs_client *nfs_init_client(struct nfs_client *clp,
812 const char *ip_addr, rpc_authflavor_t authflavour, 817 const struct rpc_timeout *timeparms,
813 int noresvport) 818 const char *ip_addr, rpc_authflavor_t authflavour)
814{ 819{
815 int error; 820 int error;
816 821
817 if (clp->cl_cons_state == NFS_CS_READY) { 822 if (clp->cl_cons_state == NFS_CS_READY) {
818 /* the client is already initialised */ 823 /* the client is already initialised */
819 dprintk("<-- nfs_init_client() = 0 [already %p]\n", clp); 824 dprintk("<-- nfs_init_client() = 0 [already %p]\n", clp);
820 return 0; 825 return clp;
821 } 826 }
822 827
823 /* 828 /*
824 * Create a client RPC handle for doing FSSTAT with UNIX auth only 829 * Create a client RPC handle for doing FSSTAT with UNIX auth only
825 * - RFC 2623, sec 2.3.2 830 * - RFC 2623, sec 2.3.2
826 */ 831 */
827 error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX, 832 error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX);
828 0, noresvport);
829 if (error < 0) 833 if (error < 0)
830 goto error; 834 goto error;
831 nfs_mark_client_ready(clp, NFS_CS_READY); 835 nfs_mark_client_ready(clp, NFS_CS_READY);
832 return 0; 836 return clp;
833 837
834error: 838error:
835 nfs_mark_client_ready(clp, error); 839 nfs_mark_client_ready(clp, error);
840 nfs_put_client(clp);
836 dprintk("<-- nfs_init_client() = xerror %d\n", error); 841 dprintk("<-- nfs_init_client() = xerror %d\n", error);
837 return error; 842 return ERR_PTR(error);
838} 843}
839 844
840/* 845/*
@@ -847,7 +852,7 @@ static int nfs_init_server(struct nfs_server *server,
847 .hostname = data->nfs_server.hostname, 852 .hostname = data->nfs_server.hostname,
848 .addr = (const struct sockaddr *)&data->nfs_server.address, 853 .addr = (const struct sockaddr *)&data->nfs_server.address,
849 .addrlen = data->nfs_server.addrlen, 854 .addrlen = data->nfs_server.addrlen,
850 .rpc_ops = &nfs_v2_clientops, 855 .rpc_ops = NULL,
851 .proto = data->nfs_server.protocol, 856 .proto = data->nfs_server.protocol,
852 .net = data->net, 857 .net = data->net,
853 }; 858 };
@@ -857,17 +862,28 @@ static int nfs_init_server(struct nfs_server *server,
857 862
858 dprintk("--> nfs_init_server()\n"); 863 dprintk("--> nfs_init_server()\n");
859 864
865 switch (data->version) {
866#ifdef CONFIG_NFS_V2
867 case 2:
868 cl_init.rpc_ops = &nfs_v2_clientops;
869 break;
870#endif
860#ifdef CONFIG_NFS_V3 871#ifdef CONFIG_NFS_V3
861 if (data->version == 3) 872 case 3:
862 cl_init.rpc_ops = &nfs_v3_clientops; 873 cl_init.rpc_ops = &nfs_v3_clientops;
874 break;
863#endif 875#endif
876 default:
877 return -EPROTONOSUPPORT;
878 }
864 879
865 nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, 880 nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
866 data->timeo, data->retrans); 881 data->timeo, data->retrans);
882 if (data->flags & NFS_MOUNT_NORESVPORT)
883 set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
867 884
868 /* Allocate or find a client reference we can use */ 885 /* Allocate or find a client reference we can use */
869 clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX, 886 clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX);
870 data->flags & NFS_MOUNT_NORESVPORT);
871 if (IS_ERR(clp)) { 887 if (IS_ERR(clp)) {
872 dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp)); 888 dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
873 return PTR_ERR(clp); 889 return PTR_ERR(clp);
@@ -880,7 +896,7 @@ static int nfs_init_server(struct nfs_server *server,
880 server->options = data->options; 896 server->options = data->options;
881 server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID| 897 server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
882 NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP| 898 NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP|
883 NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME; 899 NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME|NFS_CAP_CHANGE_ATTR;
884 900
885 if (data->rsize) 901 if (data->rsize)
886 server->rsize = nfs_block_size(data->rsize, NULL); 902 server->rsize = nfs_block_size(data->rsize, NULL);
@@ -1048,7 +1064,7 @@ static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_serve
1048static void nfs_server_insert_lists(struct nfs_server *server) 1064static void nfs_server_insert_lists(struct nfs_server *server)
1049{ 1065{
1050 struct nfs_client *clp = server->nfs_client; 1066 struct nfs_client *clp = server->nfs_client;
1051 struct nfs_net *nn = net_generic(clp->net, nfs_net_id); 1067 struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
1052 1068
1053 spin_lock(&nn->nfs_client_lock); 1069 spin_lock(&nn->nfs_client_lock);
1054 list_add_tail_rcu(&server->client_link, &clp->cl_superblocks); 1070 list_add_tail_rcu(&server->client_link, &clp->cl_superblocks);
@@ -1065,7 +1081,7 @@ static void nfs_server_remove_lists(struct nfs_server *server)
1065 1081
1066 if (clp == NULL) 1082 if (clp == NULL)
1067 return; 1083 return;
1068 nn = net_generic(clp->net, nfs_net_id); 1084 nn = net_generic(clp->cl_net, nfs_net_id);
1069 spin_lock(&nn->nfs_client_lock); 1085 spin_lock(&nn->nfs_client_lock);
1070 list_del_rcu(&server->client_link); 1086 list_del_rcu(&server->client_link);
1071 if (list_empty(&clp->cl_superblocks)) 1087 if (list_empty(&clp->cl_superblocks))
@@ -1333,21 +1349,27 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp)
1333 * so that the client back channel can find the 1349 * so that the client back channel can find the
1334 * nfs_client struct 1350 * nfs_client struct
1335 */ 1351 */
1336 clp->cl_cons_state = NFS_CS_SESSION_INITING; 1352 nfs_mark_client_ready(clp, NFS_CS_SESSION_INITING);
1337 } 1353 }
1338#endif /* CONFIG_NFS_V4_1 */ 1354#endif /* CONFIG_NFS_V4_1 */
1339 1355
1340 return nfs4_init_callback(clp); 1356 return nfs4_init_callback(clp);
1341} 1357}
1342 1358
1343/* 1359/**
1344 * Initialise an NFS4 client record 1360 * nfs4_init_client - Initialise an NFS4 client record
1361 *
1362 * @clp: nfs_client to initialise
1363 * @timeparms: timeout parameters for underlying RPC transport
1364 * @ip_addr: callback IP address in presentation format
1365 * @authflavor: authentication flavor for underlying RPC transport
1366 *
1367 * Returns pointer to an NFS client, or an ERR_PTR value.
1345 */ 1368 */
1346int nfs4_init_client(struct nfs_client *clp, 1369struct nfs_client *nfs4_init_client(struct nfs_client *clp,
1347 const struct rpc_timeout *timeparms, 1370 const struct rpc_timeout *timeparms,
1348 const char *ip_addr, 1371 const char *ip_addr,
1349 rpc_authflavor_t authflavour, 1372 rpc_authflavor_t authflavour)
1350 int noresvport)
1351{ 1373{
1352 char buf[INET6_ADDRSTRLEN + 1]; 1374 char buf[INET6_ADDRSTRLEN + 1];
1353 int error; 1375 int error;
@@ -1355,14 +1377,14 @@ int nfs4_init_client(struct nfs_client *clp,
1355 if (clp->cl_cons_state == NFS_CS_READY) { 1377 if (clp->cl_cons_state == NFS_CS_READY) {
1356 /* the client is initialised already */ 1378 /* the client is initialised already */
1357 dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp); 1379 dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp);
1358 return 0; 1380 return clp;
1359 } 1381 }
1360 1382
1361 /* Check NFS protocol revision and initialize RPC op vector */ 1383 /* Check NFS protocol revision and initialize RPC op vector */
1362 clp->rpc_ops = &nfs_v4_clientops; 1384 clp->rpc_ops = &nfs_v4_clientops;
1363 1385
1364 error = nfs_create_rpc_client(clp, timeparms, authflavour, 1386 __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
1365 1, noresvport); 1387 error = nfs_create_rpc_client(clp, timeparms, authflavour);
1366 if (error < 0) 1388 if (error < 0)
1367 goto error; 1389 goto error;
1368 1390
@@ -1395,12 +1417,13 @@ int nfs4_init_client(struct nfs_client *clp,
1395 1417
1396 if (!nfs4_has_session(clp)) 1418 if (!nfs4_has_session(clp))
1397 nfs_mark_client_ready(clp, NFS_CS_READY); 1419 nfs_mark_client_ready(clp, NFS_CS_READY);
1398 return 0; 1420 return clp;
1399 1421
1400error: 1422error:
1401 nfs_mark_client_ready(clp, error); 1423 nfs_mark_client_ready(clp, error);
1424 nfs_put_client(clp);
1402 dprintk("<-- nfs4_init_client() = xerror %d\n", error); 1425 dprintk("<-- nfs4_init_client() = xerror %d\n", error);
1403 return error; 1426 return ERR_PTR(error);
1404} 1427}
1405 1428
1406/* 1429/*
@@ -1429,9 +1452,11 @@ static int nfs4_set_client(struct nfs_server *server,
1429 1452
1430 dprintk("--> nfs4_set_client()\n"); 1453 dprintk("--> nfs4_set_client()\n");
1431 1454
1455 if (server->flags & NFS_MOUNT_NORESVPORT)
1456 set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
1457
1432 /* Allocate or find a client reference we can use */ 1458 /* Allocate or find a client reference we can use */
1433 clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour, 1459 clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour);
1434 server->flags & NFS_MOUNT_NORESVPORT);
1435 if (IS_ERR(clp)) { 1460 if (IS_ERR(clp)) {
1436 error = PTR_ERR(clp); 1461 error = PTR_ERR(clp);
1437 goto error; 1462 goto error;
@@ -1465,8 +1490,8 @@ error:
1465 * the MDS. 1490 * the MDS.
1466 */ 1491 */
1467struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, 1492struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
1468 const struct sockaddr *ds_addr, 1493 const struct sockaddr *ds_addr, int ds_addrlen,
1469 int ds_addrlen, int ds_proto) 1494 int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans)
1470{ 1495{
1471 struct nfs_client_initdata cl_init = { 1496 struct nfs_client_initdata cl_init = {
1472 .addr = ds_addr, 1497 .addr = ds_addr,
@@ -1474,14 +1499,9 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
1474 .rpc_ops = &nfs_v4_clientops, 1499 .rpc_ops = &nfs_v4_clientops,
1475 .proto = ds_proto, 1500 .proto = ds_proto,
1476 .minorversion = mds_clp->cl_minorversion, 1501 .minorversion = mds_clp->cl_minorversion,
1477 .net = mds_clp->net, 1502 .net = mds_clp->cl_net,
1478 };
1479 struct rpc_timeout ds_timeout = {
1480 .to_initval = 15 * HZ,
1481 .to_maxval = 15 * HZ,
1482 .to_retries = 1,
1483 .to_exponential = 1,
1484 }; 1503 };
1504 struct rpc_timeout ds_timeout;
1485 struct nfs_client *clp; 1505 struct nfs_client *clp;
1486 1506
1487 /* 1507 /*
@@ -1489,8 +1509,9 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
1489 * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS 1509 * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS
1490 * (section 13.1 RFC 5661). 1510 * (section 13.1 RFC 5661).
1491 */ 1511 */
1512 nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans);
1492 clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr, 1513 clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr,
1493 mds_clp->cl_rpcclient->cl_auth->au_flavor, 0); 1514 mds_clp->cl_rpcclient->cl_auth->au_flavor);
1494 1515
1495 dprintk("<-- %s %p\n", __func__, clp); 1516 dprintk("<-- %s %p\n", __func__, clp);
1496 return clp; 1517 return clp;
@@ -1701,7 +1722,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1701 rpc_protocol(parent_server->client), 1722 rpc_protocol(parent_server->client),
1702 parent_server->client->cl_timeout, 1723 parent_server->client->cl_timeout,
1703 parent_client->cl_mvops->minor_version, 1724 parent_client->cl_mvops->minor_version,
1704 parent_client->net); 1725 parent_client->cl_net);
1705 if (error < 0) 1726 if (error < 0)
1706 goto error; 1727 goto error;
1707 1728
@@ -1805,6 +1826,7 @@ void nfs_clients_init(struct net *net)
1805 idr_init(&nn->cb_ident_idr); 1826 idr_init(&nn->cb_ident_idr);
1806#endif 1827#endif
1807 spin_lock_init(&nn->nfs_client_lock); 1828 spin_lock_init(&nn->nfs_client_lock);
1829 nn->boot_time = CURRENT_TIME;
1808} 1830}
1809 1831
1810#ifdef CONFIG_PROC_FS 1832#ifdef CONFIG_PROC_FS
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 89af1d269274..bd3a9601d32d 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -316,6 +316,10 @@ out:
316 * nfs_client_return_marked_delegations - return previously marked delegations 316 * nfs_client_return_marked_delegations - return previously marked delegations
317 * @clp: nfs_client to process 317 * @clp: nfs_client to process
318 * 318 *
319 * Note that this function is designed to be called by the state
320 * manager thread. For this reason, it cannot flush the dirty data,
321 * since that could deadlock in case of a state recovery error.
322 *
319 * Returns zero on success, or a negative errno value. 323 * Returns zero on success, or a negative errno value.
320 */ 324 */
321int nfs_client_return_marked_delegations(struct nfs_client *clp) 325int nfs_client_return_marked_delegations(struct nfs_client *clp)
@@ -340,11 +344,9 @@ restart:
340 server); 344 server);
341 rcu_read_unlock(); 345 rcu_read_unlock();
342 346
343 if (delegation != NULL) { 347 if (delegation != NULL)
344 filemap_flush(inode->i_mapping);
345 err = __nfs_inode_return_delegation(inode, 348 err = __nfs_inode_return_delegation(inode,
346 delegation, 0); 349 delegation, 0);
347 }
348 iput(inode); 350 iput(inode);
349 if (!err) 351 if (!err)
350 goto restart; 352 goto restart;
@@ -380,6 +382,10 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode)
380 * nfs_inode_return_delegation - synchronously return a delegation 382 * nfs_inode_return_delegation - synchronously return a delegation
381 * @inode: inode to process 383 * @inode: inode to process
382 * 384 *
385 * This routine will always flush any dirty data to disk on the
386 * assumption that if we need to return the delegation, then
387 * we should stop caching.
388 *
383 * Returns zero on success, or a negative errno value. 389 * Returns zero on success, or a negative errno value.
384 */ 390 */
385int nfs_inode_return_delegation(struct inode *inode) 391int nfs_inode_return_delegation(struct inode *inode)
@@ -389,10 +395,10 @@ int nfs_inode_return_delegation(struct inode *inode)
389 struct nfs_delegation *delegation; 395 struct nfs_delegation *delegation;
390 int err = 0; 396 int err = 0;
391 397
398 nfs_wb_all(inode);
392 if (rcu_access_pointer(nfsi->delegation) != NULL) { 399 if (rcu_access_pointer(nfsi->delegation) != NULL) {
393 delegation = nfs_detach_delegation(nfsi, server); 400 delegation = nfs_detach_delegation(nfsi, server);
394 if (delegation != NULL) { 401 if (delegation != NULL) {
395 nfs_wb_all(inode);
396 err = __nfs_inode_return_delegation(inode, delegation, 1); 402 err = __nfs_inode_return_delegation(inode, delegation, 1);
397 } 403 }
398 } 404 }
@@ -538,6 +544,8 @@ int nfs_async_inode_return_delegation(struct inode *inode,
538 struct nfs_client *clp = server->nfs_client; 544 struct nfs_client *clp = server->nfs_client;
539 struct nfs_delegation *delegation; 545 struct nfs_delegation *delegation;
540 546
547 filemap_flush(inode->i_mapping);
548
541 rcu_read_lock(); 549 rcu_read_lock();
542 delegation = rcu_dereference(NFS_I(inode)->delegation); 550 delegation = rcu_dereference(NFS_I(inode)->delegation);
543 551
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index cd6a7a8dadae..72709c4193fa 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -66,6 +66,7 @@ static inline int nfs_have_delegation(struct inode *inode, fmode_t flags)
66 66
67static inline int nfs_inode_return_delegation(struct inode *inode) 67static inline int nfs_inode_return_delegation(struct inode *inode)
68{ 68{
69 nfs_wb_all(inode);
69 return 0; 70 return 0;
70} 71}
71#endif 72#endif
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index eedd24d0ad2e..0989a2099688 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -475,6 +475,29 @@ different:
475} 475}
476 476
477static 477static
478bool nfs_use_readdirplus(struct inode *dir, struct file *filp)
479{
480 if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS))
481 return false;
482 if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags))
483 return true;
484 if (filp->f_pos == 0)
485 return true;
486 return false;
487}
488
489/*
490 * This function is called by the lookup code to request the use of
491 * readdirplus to accelerate any future lookups in the same
492 * directory.
493 */
494static
495void nfs_advise_use_readdirplus(struct inode *dir)
496{
497 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags);
498}
499
500static
478void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) 501void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
479{ 502{
480 struct qstr filename = QSTR_INIT(entry->name, entry->len); 503 struct qstr filename = QSTR_INIT(entry->name, entry->len);
@@ -871,7 +894,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
871 desc->file = filp; 894 desc->file = filp;
872 desc->dir_cookie = &dir_ctx->dir_cookie; 895 desc->dir_cookie = &dir_ctx->dir_cookie;
873 desc->decode = NFS_PROTO(inode)->decode_dirent; 896 desc->decode = NFS_PROTO(inode)->decode_dirent;
874 desc->plus = NFS_USE_READDIRPLUS(inode); 897 desc->plus = nfs_use_readdirplus(inode, filp) ? 1 : 0;
875 898
876 nfs_block_sillyrename(dentry); 899 nfs_block_sillyrename(dentry);
877 res = nfs_revalidate_mapping(inode, filp->f_mapping); 900 res = nfs_revalidate_mapping(inode, filp->f_mapping);
@@ -1111,7 +1134,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
1111 if (!inode) { 1134 if (!inode) {
1112 if (nfs_neg_need_reval(dir, dentry, nd)) 1135 if (nfs_neg_need_reval(dir, dentry, nd))
1113 goto out_bad; 1136 goto out_bad;
1114 goto out_valid; 1137 goto out_valid_noent;
1115 } 1138 }
1116 1139
1117 if (is_bad_inode(inode)) { 1140 if (is_bad_inode(inode)) {
@@ -1140,7 +1163,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
1140 if (fhandle == NULL || fattr == NULL) 1163 if (fhandle == NULL || fattr == NULL)
1141 goto out_error; 1164 goto out_error;
1142 1165
1143 error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr); 1166 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
1144 if (error) 1167 if (error)
1145 goto out_bad; 1168 goto out_bad;
1146 if (nfs_compare_fh(NFS_FH(inode), fhandle)) 1169 if (nfs_compare_fh(NFS_FH(inode), fhandle))
@@ -1153,6 +1176,9 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
1153out_set_verifier: 1176out_set_verifier:
1154 nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); 1177 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1155 out_valid: 1178 out_valid:
1179 /* Success: notify readdir to use READDIRPLUS */
1180 nfs_advise_use_readdirplus(dir);
1181 out_valid_noent:
1156 dput(parent); 1182 dput(parent);
1157 dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n", 1183 dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n",
1158 __func__, dentry->d_parent->d_name.name, 1184 __func__, dentry->d_parent->d_name.name,
@@ -1296,7 +1322,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
1296 parent = dentry->d_parent; 1322 parent = dentry->d_parent;
1297 /* Protect against concurrent sillydeletes */ 1323 /* Protect against concurrent sillydeletes */
1298 nfs_block_sillyrename(parent); 1324 nfs_block_sillyrename(parent);
1299 error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr); 1325 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
1300 if (error == -ENOENT) 1326 if (error == -ENOENT)
1301 goto no_entry; 1327 goto no_entry;
1302 if (error < 0) { 1328 if (error < 0) {
@@ -1308,6 +1334,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
1308 if (IS_ERR(res)) 1334 if (IS_ERR(res))
1309 goto out_unblock_sillyrename; 1335 goto out_unblock_sillyrename;
1310 1336
1337 /* Success: notify readdir to use READDIRPLUS */
1338 nfs_advise_use_readdirplus(dir);
1339
1311no_entry: 1340no_entry:
1312 res = d_materialise_unique(dentry, inode); 1341 res = d_materialise_unique(dentry, inode);
1313 if (res != NULL) { 1342 if (res != NULL) {
@@ -1643,7 +1672,7 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
1643 if (dentry->d_inode) 1672 if (dentry->d_inode)
1644 goto out; 1673 goto out;
1645 if (fhandle->size == 0) { 1674 if (fhandle->size == 0) {
1646 error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr); 1675 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
1647 if (error) 1676 if (error)
1648 goto out_error; 1677 goto out_error;
1649 } 1678 }
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 481be7f7bdd3..23d170bc44f4 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -56,6 +56,7 @@
56 56
57#include "internal.h" 57#include "internal.h"
58#include "iostat.h" 58#include "iostat.h"
59#include "pnfs.h"
59 60
60#define NFSDBG_FACILITY NFSDBG_VFS 61#define NFSDBG_FACILITY NFSDBG_VFS
61 62
@@ -81,16 +82,19 @@ struct nfs_direct_req {
81 struct completion completion; /* wait for i/o completion */ 82 struct completion completion; /* wait for i/o completion */
82 83
83 /* commit state */ 84 /* commit state */
84 struct list_head rewrite_list; /* saved nfs_write_data structs */ 85 struct nfs_mds_commit_info mds_cinfo; /* Storage for cinfo */
85 struct nfs_write_data * commit_data; /* special write_data for commits */ 86 struct pnfs_ds_commit_info ds_cinfo; /* Storage for cinfo */
87 struct work_struct work;
86 int flags; 88 int flags;
87#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ 89#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */
88#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */ 90#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */
89 struct nfs_writeverf verf; /* unstable write verifier */ 91 struct nfs_writeverf verf; /* unstable write verifier */
90}; 92};
91 93
94static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops;
95static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops;
92static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode); 96static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode);
93static const struct rpc_call_ops nfs_write_direct_ops; 97static void nfs_direct_write_schedule_work(struct work_struct *work);
94 98
95static inline void get_dreq(struct nfs_direct_req *dreq) 99static inline void get_dreq(struct nfs_direct_req *dreq)
96{ 100{
@@ -124,22 +128,6 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_
124 return -EINVAL; 128 return -EINVAL;
125} 129}
126 130
127static void nfs_direct_dirty_pages(struct page **pages, unsigned int pgbase, size_t count)
128{
129 unsigned int npages;
130 unsigned int i;
131
132 if (count == 0)
133 return;
134 pages += (pgbase >> PAGE_SHIFT);
135 npages = (count + (pgbase & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT;
136 for (i = 0; i < npages; i++) {
137 struct page *page = pages[i];
138 if (!PageCompound(page))
139 set_page_dirty(page);
140 }
141}
142
143static void nfs_direct_release_pages(struct page **pages, unsigned int npages) 131static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
144{ 132{
145 unsigned int i; 133 unsigned int i;
@@ -147,26 +135,30 @@ static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
147 page_cache_release(pages[i]); 135 page_cache_release(pages[i]);
148} 136}
149 137
138void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
139 struct nfs_direct_req *dreq)
140{
141 cinfo->lock = &dreq->lock;
142 cinfo->mds = &dreq->mds_cinfo;
143 cinfo->ds = &dreq->ds_cinfo;
144 cinfo->dreq = dreq;
145 cinfo->completion_ops = &nfs_direct_commit_completion_ops;
146}
147
150static inline struct nfs_direct_req *nfs_direct_req_alloc(void) 148static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
151{ 149{
152 struct nfs_direct_req *dreq; 150 struct nfs_direct_req *dreq;
153 151
154 dreq = kmem_cache_alloc(nfs_direct_cachep, GFP_KERNEL); 152 dreq = kmem_cache_zalloc(nfs_direct_cachep, GFP_KERNEL);
155 if (!dreq) 153 if (!dreq)
156 return NULL; 154 return NULL;
157 155
158 kref_init(&dreq->kref); 156 kref_init(&dreq->kref);
159 kref_get(&dreq->kref); 157 kref_get(&dreq->kref);
160 init_completion(&dreq->completion); 158 init_completion(&dreq->completion);
161 INIT_LIST_HEAD(&dreq->rewrite_list); 159 INIT_LIST_HEAD(&dreq->mds_cinfo.list);
162 dreq->iocb = NULL; 160 INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
163 dreq->ctx = NULL;
164 dreq->l_ctx = NULL;
165 spin_lock_init(&dreq->lock); 161 spin_lock_init(&dreq->lock);
166 atomic_set(&dreq->io_count, 0);
167 dreq->count = 0;
168 dreq->error = 0;
169 dreq->flags = 0;
170 162
171 return dreq; 163 return dreq;
172} 164}
@@ -226,47 +218,80 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
226 nfs_direct_req_release(dreq); 218 nfs_direct_req_release(dreq);
227} 219}
228 220
229/* 221static void nfs_direct_readpage_release(struct nfs_page *req)
230 * We must hold a reference to all the pages in this direct read request
231 * until the RPCs complete. This could be long *after* we are woken up in
232 * nfs_direct_wait (for instance, if someone hits ^C on a slow server).
233 */
234static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
235{ 222{
236 struct nfs_read_data *data = calldata; 223 dprintk("NFS: direct read done (%s/%lld %d@%lld)\n",
237 224 req->wb_context->dentry->d_inode->i_sb->s_id,
238 nfs_readpage_result(task, data); 225 (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
226 req->wb_bytes,
227 (long long)req_offset(req));
228 nfs_release_request(req);
239} 229}
240 230
241static void nfs_direct_read_release(void *calldata) 231static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
242{ 232{
233 unsigned long bytes = 0;
234 struct nfs_direct_req *dreq = hdr->dreq;
243 235
244 struct nfs_read_data *data = calldata; 236 if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
245 struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; 237 goto out_put;
246 int status = data->task.tk_status;
247 238
248 spin_lock(&dreq->lock); 239 spin_lock(&dreq->lock);
249 if (unlikely(status < 0)) { 240 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0))
250 dreq->error = status; 241 dreq->error = hdr->error;
251 spin_unlock(&dreq->lock); 242 else
252 } else { 243 dreq->count += hdr->good_bytes;
253 dreq->count += data->res.count; 244 spin_unlock(&dreq->lock);
254 spin_unlock(&dreq->lock);
255 nfs_direct_dirty_pages(data->pagevec,
256 data->args.pgbase,
257 data->res.count);
258 }
259 nfs_direct_release_pages(data->pagevec, data->npages);
260 245
246 while (!list_empty(&hdr->pages)) {
247 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
248 struct page *page = req->wb_page;
249
250 if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
251 if (bytes > hdr->good_bytes)
252 zero_user(page, 0, PAGE_SIZE);
253 else if (hdr->good_bytes - bytes < PAGE_SIZE)
254 zero_user_segment(page,
255 hdr->good_bytes & ~PAGE_MASK,
256 PAGE_SIZE);
257 }
258 if (!PageCompound(page)) {
259 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
260 if (bytes < hdr->good_bytes)
261 set_page_dirty(page);
262 } else
263 set_page_dirty(page);
264 }
265 bytes += req->wb_bytes;
266 nfs_list_remove_request(req);
267 nfs_direct_readpage_release(req);
268 }
269out_put:
261 if (put_dreq(dreq)) 270 if (put_dreq(dreq))
262 nfs_direct_complete(dreq); 271 nfs_direct_complete(dreq);
263 nfs_readdata_free(data); 272 hdr->release(hdr);
273}
274
275static void nfs_read_sync_pgio_error(struct list_head *head)
276{
277 struct nfs_page *req;
278
279 while (!list_empty(head)) {
280 req = nfs_list_entry(head->next);
281 nfs_list_remove_request(req);
282 nfs_release_request(req);
283 }
264} 284}
265 285
266static const struct rpc_call_ops nfs_read_direct_ops = { 286static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr)
267 .rpc_call_prepare = nfs_read_prepare, 287{
268 .rpc_call_done = nfs_direct_read_result, 288 get_dreq(hdr->dreq);
269 .rpc_release = nfs_direct_read_release, 289}
290
291static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
292 .error_cleanup = nfs_read_sync_pgio_error,
293 .init_hdr = nfs_direct_pgio_init,
294 .completion = nfs_direct_read_completion,
270}; 295};
271 296
272/* 297/*
@@ -276,107 +301,82 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
276 * handled automatically by nfs_direct_read_result(). Otherwise, if 301 * handled automatically by nfs_direct_read_result(). Otherwise, if
277 * no requests have been sent, just return an error. 302 * no requests have been sent, just return an error.
278 */ 303 */
279static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, 304static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc,
280 const struct iovec *iov, 305 const struct iovec *iov,
281 loff_t pos) 306 loff_t pos)
282{ 307{
308 struct nfs_direct_req *dreq = desc->pg_dreq;
283 struct nfs_open_context *ctx = dreq->ctx; 309 struct nfs_open_context *ctx = dreq->ctx;
284 struct inode *inode = ctx->dentry->d_inode; 310 struct inode *inode = ctx->dentry->d_inode;
285 unsigned long user_addr = (unsigned long)iov->iov_base; 311 unsigned long user_addr = (unsigned long)iov->iov_base;
286 size_t count = iov->iov_len; 312 size_t count = iov->iov_len;
287 size_t rsize = NFS_SERVER(inode)->rsize; 313 size_t rsize = NFS_SERVER(inode)->rsize;
288 struct rpc_task *task;
289 struct rpc_message msg = {
290 .rpc_cred = ctx->cred,
291 };
292 struct rpc_task_setup task_setup_data = {
293 .rpc_client = NFS_CLIENT(inode),
294 .rpc_message = &msg,
295 .callback_ops = &nfs_read_direct_ops,
296 .workqueue = nfsiod_workqueue,
297 .flags = RPC_TASK_ASYNC,
298 };
299 unsigned int pgbase; 314 unsigned int pgbase;
300 int result; 315 int result;
301 ssize_t started = 0; 316 ssize_t started = 0;
317 struct page **pagevec = NULL;
318 unsigned int npages;
302 319
303 do { 320 do {
304 struct nfs_read_data *data;
305 size_t bytes; 321 size_t bytes;
322 int i;
306 323
307 pgbase = user_addr & ~PAGE_MASK; 324 pgbase = user_addr & ~PAGE_MASK;
308 bytes = min(rsize,count); 325 bytes = min(max_t(size_t, rsize, PAGE_SIZE), count);
309 326
310 result = -ENOMEM; 327 result = -ENOMEM;
311 data = nfs_readdata_alloc(nfs_page_array_len(pgbase, bytes)); 328 npages = nfs_page_array_len(pgbase, bytes);
312 if (unlikely(!data)) 329 if (!pagevec)
330 pagevec = kmalloc(npages * sizeof(struct page *),
331 GFP_KERNEL);
332 if (!pagevec)
313 break; 333 break;
314
315 down_read(&current->mm->mmap_sem); 334 down_read(&current->mm->mmap_sem);
316 result = get_user_pages(current, current->mm, user_addr, 335 result = get_user_pages(current, current->mm, user_addr,
317 data->npages, 1, 0, data->pagevec, NULL); 336 npages, 1, 0, pagevec, NULL);
318 up_read(&current->mm->mmap_sem); 337 up_read(&current->mm->mmap_sem);
319 if (result < 0) { 338 if (result < 0)
320 nfs_readdata_free(data);
321 break; 339 break;
322 } 340 if ((unsigned)result < npages) {
323 if ((unsigned)result < data->npages) {
324 bytes = result * PAGE_SIZE; 341 bytes = result * PAGE_SIZE;
325 if (bytes <= pgbase) { 342 if (bytes <= pgbase) {
326 nfs_direct_release_pages(data->pagevec, result); 343 nfs_direct_release_pages(pagevec, result);
327 nfs_readdata_free(data);
328 break; 344 break;
329 } 345 }
330 bytes -= pgbase; 346 bytes -= pgbase;
331 data->npages = result; 347 npages = result;
332 } 348 }
333 349
334 get_dreq(dreq); 350 for (i = 0; i < npages; i++) {
335 351 struct nfs_page *req;
336 data->req = (struct nfs_page *) dreq; 352 unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
337 data->inode = inode; 353 /* XXX do we need to do the eof zeroing found in async_filler? */
338 data->cred = msg.rpc_cred; 354 req = nfs_create_request(dreq->ctx, dreq->inode,
339 data->args.fh = NFS_FH(inode); 355 pagevec[i],
340 data->args.context = ctx; 356 pgbase, req_len);
341 data->args.lock_context = dreq->l_ctx; 357 if (IS_ERR(req)) {
342 data->args.offset = pos; 358 result = PTR_ERR(req);
343 data->args.pgbase = pgbase; 359 break;
344 data->args.pages = data->pagevec; 360 }
345 data->args.count = bytes; 361 req->wb_index = pos >> PAGE_SHIFT;
346 data->res.fattr = &data->fattr; 362 req->wb_offset = pos & ~PAGE_MASK;
347 data->res.eof = 0; 363 if (!nfs_pageio_add_request(desc, req)) {
348 data->res.count = bytes; 364 result = desc->pg_error;
349 nfs_fattr_init(&data->fattr); 365 nfs_release_request(req);
350 msg.rpc_argp = &data->args; 366 break;
351 msg.rpc_resp = &data->res; 367 }
352 368 pgbase = 0;
353 task_setup_data.task = &data->task; 369 bytes -= req_len;
354 task_setup_data.callback_data = data; 370 started += req_len;
355 NFS_PROTO(inode)->read_setup(data, &msg); 371 user_addr += req_len;
356 372 pos += req_len;
357 task = rpc_run_task(&task_setup_data); 373 count -= req_len;
358 if (IS_ERR(task)) 374 }
359 break; 375 /* The nfs_page now hold references to these pages */
360 rpc_put_task(task); 376 nfs_direct_release_pages(pagevec, npages);
361 377 } while (count != 0 && result >= 0);
362 dprintk("NFS: %5u initiated direct read call " 378
363 "(req %s/%Ld, %zu bytes @ offset %Lu)\n", 379 kfree(pagevec);
364 data->task.tk_pid,
365 inode->i_sb->s_id,
366 (long long)NFS_FILEID(inode),
367 bytes,
368 (unsigned long long)data->args.offset);
369
370 started += bytes;
371 user_addr += bytes;
372 pos += bytes;
373 /* FIXME: Remove this unnecessary math from final patch */
374 pgbase += bytes;
375 pgbase &= ~PAGE_MASK;
376 BUG_ON(pgbase != (user_addr & ~PAGE_MASK));
377
378 count -= bytes;
379 } while (count != 0);
380 380
381 if (started) 381 if (started)
382 return started; 382 return started;
@@ -388,15 +388,19 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
388 unsigned long nr_segs, 388 unsigned long nr_segs,
389 loff_t pos) 389 loff_t pos)
390{ 390{
391 struct nfs_pageio_descriptor desc;
391 ssize_t result = -EINVAL; 392 ssize_t result = -EINVAL;
392 size_t requested_bytes = 0; 393 size_t requested_bytes = 0;
393 unsigned long seg; 394 unsigned long seg;
394 395
396 nfs_pageio_init_read(&desc, dreq->inode,
397 &nfs_direct_read_completion_ops);
395 get_dreq(dreq); 398 get_dreq(dreq);
399 desc.pg_dreq = dreq;
396 400
397 for (seg = 0; seg < nr_segs; seg++) { 401 for (seg = 0; seg < nr_segs; seg++) {
398 const struct iovec *vec = &iov[seg]; 402 const struct iovec *vec = &iov[seg];
399 result = nfs_direct_read_schedule_segment(dreq, vec, pos); 403 result = nfs_direct_read_schedule_segment(&desc, vec, pos);
400 if (result < 0) 404 if (result < 0)
401 break; 405 break;
402 requested_bytes += result; 406 requested_bytes += result;
@@ -405,6 +409,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
405 pos += vec->iov_len; 409 pos += vec->iov_len;
406 } 410 }
407 411
412 nfs_pageio_complete(&desc);
413
408 /* 414 /*
409 * If no bytes were started, return the error, and let the 415 * If no bytes were started, return the error, and let the
410 * generic layer handle the completion. 416 * generic layer handle the completion.
@@ -441,104 +447,64 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
441 result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos); 447 result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos);
442 if (!result) 448 if (!result)
443 result = nfs_direct_wait(dreq); 449 result = nfs_direct_wait(dreq);
450 NFS_I(inode)->read_io += result;
444out_release: 451out_release:
445 nfs_direct_req_release(dreq); 452 nfs_direct_req_release(dreq);
446out: 453out:
447 return result; 454 return result;
448} 455}
449 456
450static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
451{
452 while (!list_empty(&dreq->rewrite_list)) {
453 struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages);
454 list_del(&data->pages);
455 nfs_direct_release_pages(data->pagevec, data->npages);
456 nfs_writedata_free(data);
457 }
458}
459
460#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 457#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
461static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) 458static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
462{ 459{
463 struct inode *inode = dreq->inode; 460 struct nfs_pageio_descriptor desc;
464 struct list_head *p; 461 struct nfs_page *req, *tmp;
465 struct nfs_write_data *data; 462 LIST_HEAD(reqs);
466 struct rpc_task *task; 463 struct nfs_commit_info cinfo;
467 struct rpc_message msg = { 464 LIST_HEAD(failed);
468 .rpc_cred = dreq->ctx->cred, 465
469 }; 466 nfs_init_cinfo_from_dreq(&cinfo, dreq);
470 struct rpc_task_setup task_setup_data = { 467 pnfs_recover_commit_reqs(dreq->inode, &reqs, &cinfo);
471 .rpc_client = NFS_CLIENT(inode), 468 spin_lock(cinfo.lock);
472 .rpc_message = &msg, 469 nfs_scan_commit_list(&cinfo.mds->list, &reqs, &cinfo, 0);
473 .callback_ops = &nfs_write_direct_ops, 470 spin_unlock(cinfo.lock);
474 .workqueue = nfsiod_workqueue,
475 .flags = RPC_TASK_ASYNC,
476 };
477 471
478 dreq->count = 0; 472 dreq->count = 0;
479 get_dreq(dreq); 473 get_dreq(dreq);
480 474
481 list_for_each(p, &dreq->rewrite_list) { 475 nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE,
482 data = list_entry(p, struct nfs_write_data, pages); 476 &nfs_direct_write_completion_ops);
483 477 desc.pg_dreq = dreq;
484 get_dreq(dreq); 478
485 479 list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
486 /* Use stable writes */ 480 if (!nfs_pageio_add_request(&desc, req)) {
487 data->args.stable = NFS_FILE_SYNC; 481 nfs_list_add_request(req, &failed);
488 482 spin_lock(cinfo.lock);
489 /* 483 dreq->flags = 0;
490 * Reset data->res. 484 dreq->error = -EIO;
491 */ 485 spin_unlock(cinfo.lock);
492 nfs_fattr_init(&data->fattr); 486 }
493 data->res.count = data->args.count;
494 memset(&data->verf, 0, sizeof(data->verf));
495
496 /*
497 * Reuse data->task; data->args should not have changed
498 * since the original request was sent.
499 */
500 task_setup_data.task = &data->task;
501 task_setup_data.callback_data = data;
502 msg.rpc_argp = &data->args;
503 msg.rpc_resp = &data->res;
504 NFS_PROTO(inode)->write_setup(data, &msg);
505
506 /*
507 * We're called via an RPC callback, so BKL is already held.
508 */
509 task = rpc_run_task(&task_setup_data);
510 if (!IS_ERR(task))
511 rpc_put_task(task);
512
513 dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
514 data->task.tk_pid,
515 inode->i_sb->s_id,
516 (long long)NFS_FILEID(inode),
517 data->args.count,
518 (unsigned long long)data->args.offset);
519 } 487 }
488 nfs_pageio_complete(&desc);
520 489
521 if (put_dreq(dreq)) 490 while (!list_empty(&failed))
522 nfs_direct_write_complete(dreq, inode); 491 nfs_unlock_and_release_request(req);
523}
524
525static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)
526{
527 struct nfs_write_data *data = calldata;
528 492
529 /* Call the NFS version-specific code */ 493 if (put_dreq(dreq))
530 NFS_PROTO(data->inode)->commit_done(task, data); 494 nfs_direct_write_complete(dreq, dreq->inode);
531} 495}
532 496
533static void nfs_direct_commit_release(void *calldata) 497static void nfs_direct_commit_complete(struct nfs_commit_data *data)
534{ 498{
535 struct nfs_write_data *data = calldata; 499 struct nfs_direct_req *dreq = data->dreq;
536 struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; 500 struct nfs_commit_info cinfo;
501 struct nfs_page *req;
537 int status = data->task.tk_status; 502 int status = data->task.tk_status;
538 503
504 nfs_init_cinfo_from_dreq(&cinfo, dreq);
539 if (status < 0) { 505 if (status < 0) {
540 dprintk("NFS: %5u commit failed with error %d.\n", 506 dprintk("NFS: %5u commit failed with error %d.\n",
541 data->task.tk_pid, status); 507 data->task.tk_pid, status);
542 dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 508 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
543 } else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) { 509 } else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
544 dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid); 510 dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid);
@@ -546,62 +512,47 @@ static void nfs_direct_commit_release(void *calldata)
546 } 512 }
547 513
548 dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status); 514 dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status);
549 nfs_direct_write_complete(dreq, data->inode); 515 while (!list_empty(&data->pages)) {
550 nfs_commit_free(data); 516 req = nfs_list_entry(data->pages.next);
517 nfs_list_remove_request(req);
518 if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) {
519 /* Note the rewrite will go through mds */
520 kref_get(&req->wb_kref);
521 nfs_mark_request_commit(req, NULL, &cinfo);
522 }
523 nfs_unlock_and_release_request(req);
524 }
525
526 if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
527 nfs_direct_write_complete(dreq, data->inode);
528}
529
530static void nfs_direct_error_cleanup(struct nfs_inode *nfsi)
531{
532 /* There is no lock to clear */
551} 533}
552 534
553static const struct rpc_call_ops nfs_commit_direct_ops = { 535static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = {
554 .rpc_call_prepare = nfs_write_prepare, 536 .completion = nfs_direct_commit_complete,
555 .rpc_call_done = nfs_direct_commit_result, 537 .error_cleanup = nfs_direct_error_cleanup,
556 .rpc_release = nfs_direct_commit_release,
557}; 538};
558 539
559static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) 540static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
560{ 541{
561 struct nfs_write_data *data = dreq->commit_data; 542 int res;
562 struct rpc_task *task; 543 struct nfs_commit_info cinfo;
563 struct rpc_message msg = { 544 LIST_HEAD(mds_list);
564 .rpc_argp = &data->args, 545
565 .rpc_resp = &data->res, 546 nfs_init_cinfo_from_dreq(&cinfo, dreq);
566 .rpc_cred = dreq->ctx->cred, 547 nfs_scan_commit(dreq->inode, &mds_list, &cinfo);
567 }; 548 res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo);
568 struct rpc_task_setup task_setup_data = { 549 if (res < 0) /* res == -ENOMEM */
569 .task = &data->task, 550 nfs_direct_write_reschedule(dreq);
570 .rpc_client = NFS_CLIENT(dreq->inode),
571 .rpc_message = &msg,
572 .callback_ops = &nfs_commit_direct_ops,
573 .callback_data = data,
574 .workqueue = nfsiod_workqueue,
575 .flags = RPC_TASK_ASYNC,
576 };
577
578 data->inode = dreq->inode;
579 data->cred = msg.rpc_cred;
580
581 data->args.fh = NFS_FH(data->inode);
582 data->args.offset = 0;
583 data->args.count = 0;
584 data->args.context = dreq->ctx;
585 data->args.lock_context = dreq->l_ctx;
586 data->res.count = 0;
587 data->res.fattr = &data->fattr;
588 data->res.verf = &data->verf;
589 nfs_fattr_init(&data->fattr);
590
591 NFS_PROTO(data->inode)->commit_setup(data, &msg);
592
593 /* Note: task.tk_ops->rpc_release will free dreq->commit_data */
594 dreq->commit_data = NULL;
595
596 dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
597
598 task = rpc_run_task(&task_setup_data);
599 if (!IS_ERR(task))
600 rpc_put_task(task);
601} 551}
602 552
603static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) 553static void nfs_direct_write_schedule_work(struct work_struct *work)
604{ 554{
555 struct nfs_direct_req *dreq = container_of(work, struct nfs_direct_req, work);
605 int flags = dreq->flags; 556 int flags = dreq->flags;
606 557
607 dreq->flags = 0; 558 dreq->flags = 0;
@@ -613,89 +564,32 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
613 nfs_direct_write_reschedule(dreq); 564 nfs_direct_write_reschedule(dreq);
614 break; 565 break;
615 default: 566 default:
616 if (dreq->commit_data != NULL) 567 nfs_zap_mapping(dreq->inode, dreq->inode->i_mapping);
617 nfs_commit_free(dreq->commit_data);
618 nfs_direct_free_writedata(dreq);
619 nfs_zap_mapping(inode, inode->i_mapping);
620 nfs_direct_complete(dreq); 568 nfs_direct_complete(dreq);
621 } 569 }
622} 570}
623 571
624static void nfs_alloc_commit_data(struct nfs_direct_req *dreq) 572static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
625{ 573{
626 dreq->commit_data = nfs_commitdata_alloc(); 574 schedule_work(&dreq->work); /* Calls nfs_direct_write_schedule_work */
627 if (dreq->commit_data != NULL)
628 dreq->commit_data->req = (struct nfs_page *) dreq;
629} 575}
576
630#else 577#else
631static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq) 578static void nfs_direct_write_schedule_work(struct work_struct *work)
632{ 579{
633 dreq->commit_data = NULL;
634} 580}
635 581
636static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) 582static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
637{ 583{
638 nfs_direct_free_writedata(dreq);
639 nfs_zap_mapping(inode, inode->i_mapping); 584 nfs_zap_mapping(inode, inode->i_mapping);
640 nfs_direct_complete(dreq); 585 nfs_direct_complete(dreq);
641} 586}
642#endif 587#endif
643 588
644static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
645{
646 struct nfs_write_data *data = calldata;
647
648 nfs_writeback_done(task, data);
649}
650
651/* 589/*
652 * NB: Return the value of the first error return code. Subsequent 590 * NB: Return the value of the first error return code. Subsequent
653 * errors after the first one are ignored. 591 * errors after the first one are ignored.
654 */ 592 */
655static void nfs_direct_write_release(void *calldata)
656{
657 struct nfs_write_data *data = calldata;
658 struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
659 int status = data->task.tk_status;
660
661 spin_lock(&dreq->lock);
662
663 if (unlikely(status < 0)) {
664 /* An error has occurred, so we should not commit */
665 dreq->flags = 0;
666 dreq->error = status;
667 }
668 if (unlikely(dreq->error != 0))
669 goto out_unlock;
670
671 dreq->count += data->res.count;
672
673 if (data->res.verf->committed != NFS_FILE_SYNC) {
674 switch (dreq->flags) {
675 case 0:
676 memcpy(&dreq->verf, &data->verf, sizeof(dreq->verf));
677 dreq->flags = NFS_ODIRECT_DO_COMMIT;
678 break;
679 case NFS_ODIRECT_DO_COMMIT:
680 if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) {
681 dprintk("NFS: %5u write verify failed\n", data->task.tk_pid);
682 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
683 }
684 }
685 }
686out_unlock:
687 spin_unlock(&dreq->lock);
688
689 if (put_dreq(dreq))
690 nfs_direct_write_complete(dreq, data->inode);
691}
692
693static const struct rpc_call_ops nfs_write_direct_ops = {
694 .rpc_call_prepare = nfs_write_prepare,
695 .rpc_call_done = nfs_direct_write_result,
696 .rpc_release = nfs_direct_write_release,
697};
698
699/* 593/*
700 * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE 594 * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
701 * operation. If nfs_writedata_alloc() or get_user_pages() fails, 595 * operation. If nfs_writedata_alloc() or get_user_pages() fails,
@@ -703,132 +597,187 @@ static const struct rpc_call_ops nfs_write_direct_ops = {
703 * handled automatically by nfs_direct_write_result(). Otherwise, if 597 * handled automatically by nfs_direct_write_result(). Otherwise, if
704 * no requests have been sent, just return an error. 598 * no requests have been sent, just return an error.
705 */ 599 */
706static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, 600static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc,
707 const struct iovec *iov, 601 const struct iovec *iov,
708 loff_t pos, int sync) 602 loff_t pos)
709{ 603{
604 struct nfs_direct_req *dreq = desc->pg_dreq;
710 struct nfs_open_context *ctx = dreq->ctx; 605 struct nfs_open_context *ctx = dreq->ctx;
711 struct inode *inode = ctx->dentry->d_inode; 606 struct inode *inode = ctx->dentry->d_inode;
712 unsigned long user_addr = (unsigned long)iov->iov_base; 607 unsigned long user_addr = (unsigned long)iov->iov_base;
713 size_t count = iov->iov_len; 608 size_t count = iov->iov_len;
714 struct rpc_task *task;
715 struct rpc_message msg = {
716 .rpc_cred = ctx->cred,
717 };
718 struct rpc_task_setup task_setup_data = {
719 .rpc_client = NFS_CLIENT(inode),
720 .rpc_message = &msg,
721 .callback_ops = &nfs_write_direct_ops,
722 .workqueue = nfsiod_workqueue,
723 .flags = RPC_TASK_ASYNC,
724 };
725 size_t wsize = NFS_SERVER(inode)->wsize; 609 size_t wsize = NFS_SERVER(inode)->wsize;
726 unsigned int pgbase; 610 unsigned int pgbase;
727 int result; 611 int result;
728 ssize_t started = 0; 612 ssize_t started = 0;
613 struct page **pagevec = NULL;
614 unsigned int npages;
729 615
730 do { 616 do {
731 struct nfs_write_data *data;
732 size_t bytes; 617 size_t bytes;
618 int i;
733 619
734 pgbase = user_addr & ~PAGE_MASK; 620 pgbase = user_addr & ~PAGE_MASK;
735 bytes = min(wsize,count); 621 bytes = min(max_t(size_t, wsize, PAGE_SIZE), count);
736 622
737 result = -ENOMEM; 623 result = -ENOMEM;
738 data = nfs_writedata_alloc(nfs_page_array_len(pgbase, bytes)); 624 npages = nfs_page_array_len(pgbase, bytes);
739 if (unlikely(!data)) 625 if (!pagevec)
626 pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL);
627 if (!pagevec)
740 break; 628 break;
741 629
742 down_read(&current->mm->mmap_sem); 630 down_read(&current->mm->mmap_sem);
743 result = get_user_pages(current, current->mm, user_addr, 631 result = get_user_pages(current, current->mm, user_addr,
744 data->npages, 0, 0, data->pagevec, NULL); 632 npages, 0, 0, pagevec, NULL);
745 up_read(&current->mm->mmap_sem); 633 up_read(&current->mm->mmap_sem);
746 if (result < 0) { 634 if (result < 0)
747 nfs_writedata_free(data);
748 break; 635 break;
749 } 636
750 if ((unsigned)result < data->npages) { 637 if ((unsigned)result < npages) {
751 bytes = result * PAGE_SIZE; 638 bytes = result * PAGE_SIZE;
752 if (bytes <= pgbase) { 639 if (bytes <= pgbase) {
753 nfs_direct_release_pages(data->pagevec, result); 640 nfs_direct_release_pages(pagevec, result);
754 nfs_writedata_free(data);
755 break; 641 break;
756 } 642 }
757 bytes -= pgbase; 643 bytes -= pgbase;
758 data->npages = result; 644 npages = result;
759 } 645 }
760 646
761 get_dreq(dreq); 647 for (i = 0; i < npages; i++) {
762 648 struct nfs_page *req;
763 list_move_tail(&data->pages, &dreq->rewrite_list); 649 unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
764
765 data->req = (struct nfs_page *) dreq;
766 data->inode = inode;
767 data->cred = msg.rpc_cred;
768 data->args.fh = NFS_FH(inode);
769 data->args.context = ctx;
770 data->args.lock_context = dreq->l_ctx;
771 data->args.offset = pos;
772 data->args.pgbase = pgbase;
773 data->args.pages = data->pagevec;
774 data->args.count = bytes;
775 data->args.stable = sync;
776 data->res.fattr = &data->fattr;
777 data->res.count = bytes;
778 data->res.verf = &data->verf;
779 nfs_fattr_init(&data->fattr);
780
781 task_setup_data.task = &data->task;
782 task_setup_data.callback_data = data;
783 msg.rpc_argp = &data->args;
784 msg.rpc_resp = &data->res;
785 NFS_PROTO(inode)->write_setup(data, &msg);
786
787 task = rpc_run_task(&task_setup_data);
788 if (IS_ERR(task))
789 break;
790 rpc_put_task(task);
791
792 dprintk("NFS: %5u initiated direct write call "
793 "(req %s/%Ld, %zu bytes @ offset %Lu)\n",
794 data->task.tk_pid,
795 inode->i_sb->s_id,
796 (long long)NFS_FILEID(inode),
797 bytes,
798 (unsigned long long)data->args.offset);
799 650
800 started += bytes; 651 req = nfs_create_request(dreq->ctx, dreq->inode,
801 user_addr += bytes; 652 pagevec[i],
802 pos += bytes; 653 pgbase, req_len);
803 654 if (IS_ERR(req)) {
804 /* FIXME: Remove this useless math from the final patch */ 655 result = PTR_ERR(req);
805 pgbase += bytes; 656 break;
806 pgbase &= ~PAGE_MASK; 657 }
807 BUG_ON(pgbase != (user_addr & ~PAGE_MASK)); 658 nfs_lock_request(req);
659 req->wb_index = pos >> PAGE_SHIFT;
660 req->wb_offset = pos & ~PAGE_MASK;
661 if (!nfs_pageio_add_request(desc, req)) {
662 result = desc->pg_error;
663 nfs_unlock_and_release_request(req);
664 break;
665 }
666 pgbase = 0;
667 bytes -= req_len;
668 started += req_len;
669 user_addr += req_len;
670 pos += req_len;
671 count -= req_len;
672 }
673 /* The nfs_page now hold references to these pages */
674 nfs_direct_release_pages(pagevec, npages);
675 } while (count != 0 && result >= 0);
808 676
809 count -= bytes; 677 kfree(pagevec);
810 } while (count != 0);
811 678
812 if (started) 679 if (started)
813 return started; 680 return started;
814 return result < 0 ? (ssize_t) result : -EFAULT; 681 return result < 0 ? (ssize_t) result : -EFAULT;
815} 682}
816 683
684static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
685{
686 struct nfs_direct_req *dreq = hdr->dreq;
687 struct nfs_commit_info cinfo;
688 int bit = -1;
689 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
690
691 if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
692 goto out_put;
693
694 nfs_init_cinfo_from_dreq(&cinfo, dreq);
695
696 spin_lock(&dreq->lock);
697
698 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
699 dreq->flags = 0;
700 dreq->error = hdr->error;
701 }
702 if (dreq->error != 0)
703 bit = NFS_IOHDR_ERROR;
704 else {
705 dreq->count += hdr->good_bytes;
706 if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) {
707 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
708 bit = NFS_IOHDR_NEED_RESCHED;
709 } else if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
710 if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
711 bit = NFS_IOHDR_NEED_RESCHED;
712 else if (dreq->flags == 0) {
713 memcpy(&dreq->verf, &req->wb_verf,
714 sizeof(dreq->verf));
715 bit = NFS_IOHDR_NEED_COMMIT;
716 dreq->flags = NFS_ODIRECT_DO_COMMIT;
717 } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
718 if (memcmp(&dreq->verf, &req->wb_verf, sizeof(dreq->verf))) {
719 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
720 bit = NFS_IOHDR_NEED_RESCHED;
721 } else
722 bit = NFS_IOHDR_NEED_COMMIT;
723 }
724 }
725 }
726 spin_unlock(&dreq->lock);
727
728 while (!list_empty(&hdr->pages)) {
729 req = nfs_list_entry(hdr->pages.next);
730 nfs_list_remove_request(req);
731 switch (bit) {
732 case NFS_IOHDR_NEED_RESCHED:
733 case NFS_IOHDR_NEED_COMMIT:
734 kref_get(&req->wb_kref);
735 nfs_mark_request_commit(req, hdr->lseg, &cinfo);
736 }
737 nfs_unlock_and_release_request(req);
738 }
739
740out_put:
741 if (put_dreq(dreq))
742 nfs_direct_write_complete(dreq, hdr->inode);
743 hdr->release(hdr);
744}
745
746static void nfs_write_sync_pgio_error(struct list_head *head)
747{
748 struct nfs_page *req;
749
750 while (!list_empty(head)) {
751 req = nfs_list_entry(head->next);
752 nfs_list_remove_request(req);
753 nfs_unlock_and_release_request(req);
754 }
755}
756
757static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
758 .error_cleanup = nfs_write_sync_pgio_error,
759 .init_hdr = nfs_direct_pgio_init,
760 .completion = nfs_direct_write_completion,
761};
762
817static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, 763static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
818 const struct iovec *iov, 764 const struct iovec *iov,
819 unsigned long nr_segs, 765 unsigned long nr_segs,
820 loff_t pos, int sync) 766 loff_t pos)
821{ 767{
768 struct nfs_pageio_descriptor desc;
822 ssize_t result = 0; 769 ssize_t result = 0;
823 size_t requested_bytes = 0; 770 size_t requested_bytes = 0;
824 unsigned long seg; 771 unsigned long seg;
825 772
773 nfs_pageio_init_write(&desc, dreq->inode, FLUSH_COND_STABLE,
774 &nfs_direct_write_completion_ops);
775 desc.pg_dreq = dreq;
826 get_dreq(dreq); 776 get_dreq(dreq);
827 777
828 for (seg = 0; seg < nr_segs; seg++) { 778 for (seg = 0; seg < nr_segs; seg++) {
829 const struct iovec *vec = &iov[seg]; 779 const struct iovec *vec = &iov[seg];
830 result = nfs_direct_write_schedule_segment(dreq, vec, 780 result = nfs_direct_write_schedule_segment(&desc, vec, pos);
831 pos, sync);
832 if (result < 0) 781 if (result < 0)
833 break; 782 break;
834 requested_bytes += result; 783 requested_bytes += result;
@@ -836,6 +785,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
836 break; 785 break;
837 pos += vec->iov_len; 786 pos += vec->iov_len;
838 } 787 }
788 nfs_pageio_complete(&desc);
789 NFS_I(dreq->inode)->write_io += desc.pg_bytes_written;
839 790
840 /* 791 /*
841 * If no bytes were started, return the error, and let the 792 * If no bytes were started, return the error, and let the
@@ -858,16 +809,10 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
858 ssize_t result = -ENOMEM; 809 ssize_t result = -ENOMEM;
859 struct inode *inode = iocb->ki_filp->f_mapping->host; 810 struct inode *inode = iocb->ki_filp->f_mapping->host;
860 struct nfs_direct_req *dreq; 811 struct nfs_direct_req *dreq;
861 size_t wsize = NFS_SERVER(inode)->wsize;
862 int sync = NFS_UNSTABLE;
863 812
864 dreq = nfs_direct_req_alloc(); 813 dreq = nfs_direct_req_alloc();
865 if (!dreq) 814 if (!dreq)
866 goto out; 815 goto out;
867 nfs_alloc_commit_data(dreq);
868
869 if (dreq->commit_data == NULL || count <= wsize)
870 sync = NFS_FILE_SYNC;
871 816
872 dreq->inode = inode; 817 dreq->inode = inode;
873 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); 818 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
@@ -877,7 +822,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
877 if (!is_sync_kiocb(iocb)) 822 if (!is_sync_kiocb(iocb))
878 dreq->iocb = iocb; 823 dreq->iocb = iocb;
879 824
880 result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, sync); 825 result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos);
881 if (!result) 826 if (!result)
882 result = nfs_direct_wait(dreq); 827 result = nfs_direct_wait(dreq);
883out_release: 828out_release:
@@ -997,10 +942,15 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
997 task_io_account_write(count); 942 task_io_account_write(count);
998 943
999 retval = nfs_direct_write(iocb, iov, nr_segs, pos, count); 944 retval = nfs_direct_write(iocb, iov, nr_segs, pos, count);
945 if (retval > 0) {
946 struct inode *inode = mapping->host;
1000 947
1001 if (retval > 0)
1002 iocb->ki_pos = pos + retval; 948 iocb->ki_pos = pos + retval;
1003 949 spin_lock(&inode->i_lock);
950 if (i_size_read(inode) < iocb->ki_pos)
951 i_size_write(inode, iocb->ki_pos);
952 spin_unlock(&inode->i_lock);
953 }
1004out: 954out:
1005 return retval; 955 return retval;
1006} 956}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index aa9b709fd328..56311ca5f9f8 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -174,6 +174,13 @@ nfs_file_flush(struct file *file, fl_owner_t id)
174 if ((file->f_mode & FMODE_WRITE) == 0) 174 if ((file->f_mode & FMODE_WRITE) == 0)
175 return 0; 175 return 0;
176 176
177 /*
178 * If we're holding a write delegation, then just start the i/o
179 * but don't wait for completion (or send a commit).
180 */
181 if (nfs_have_delegation(inode, FMODE_WRITE))
182 return filemap_fdatawrite(file->f_mapping);
183
177 /* Flush writes to the server and return any errors */ 184 /* Flush writes to the server and return any errors */
178 return vfs_fsync(file, 0); 185 return vfs_fsync(file, 0);
179} 186}
@@ -417,6 +424,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
417 424
418 if (status < 0) 425 if (status < 0)
419 return status; 426 return status;
427 NFS_I(mapping->host)->write_io += copied;
420 return copied; 428 return copied;
421} 429}
422 430
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index ae65c16b3670..c817787fbdb4 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -64,23 +64,12 @@ void nfs_fscache_release_client_cookie(struct nfs_client *clp)
64 * either by the 'fsc=xxx' option to mount, or by inheriting it from the parent 64 * either by the 'fsc=xxx' option to mount, or by inheriting it from the parent
65 * superblock across an automount point of some nature. 65 * superblock across an automount point of some nature.
66 */ 66 */
67void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, 67void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int ulen)
68 struct nfs_clone_mount *mntdata)
69{ 68{
70 struct nfs_fscache_key *key, *xkey; 69 struct nfs_fscache_key *key, *xkey;
71 struct nfs_server *nfss = NFS_SB(sb); 70 struct nfs_server *nfss = NFS_SB(sb);
72 struct rb_node **p, *parent; 71 struct rb_node **p, *parent;
73 int diff, ulen; 72 int diff;
74
75 if (uniq) {
76 ulen = strlen(uniq);
77 } else if (mntdata) {
78 struct nfs_server *mnt_s = NFS_SB(mntdata->sb);
79 if (mnt_s->fscache_key) {
80 uniq = mnt_s->fscache_key->key.uniquifier;
81 ulen = mnt_s->fscache_key->key.uniq_len;
82 }
83 }
84 73
85 if (!uniq) { 74 if (!uniq) {
86 uniq = ""; 75 uniq = "";
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
index b9c572d0679f..c5b11b53ff33 100644
--- a/fs/nfs/fscache.h
+++ b/fs/nfs/fscache.h
@@ -73,9 +73,7 @@ extern void nfs_fscache_unregister(void);
73extern void nfs_fscache_get_client_cookie(struct nfs_client *); 73extern void nfs_fscache_get_client_cookie(struct nfs_client *);
74extern void nfs_fscache_release_client_cookie(struct nfs_client *); 74extern void nfs_fscache_release_client_cookie(struct nfs_client *);
75 75
76extern void nfs_fscache_get_super_cookie(struct super_block *, 76extern void nfs_fscache_get_super_cookie(struct super_block *, const char *, int);
77 const char *,
78 struct nfs_clone_mount *);
79extern void nfs_fscache_release_super_cookie(struct super_block *); 77extern void nfs_fscache_release_super_cookie(struct super_block *);
80 78
81extern void nfs_fscache_init_inode_cookie(struct inode *); 79extern void nfs_fscache_init_inode_cookie(struct inode *);
@@ -172,12 +170,6 @@ static inline void nfs_fscache_unregister(void) {}
172static inline void nfs_fscache_get_client_cookie(struct nfs_client *clp) {} 170static inline void nfs_fscache_get_client_cookie(struct nfs_client *clp) {}
173static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {} 171static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {}
174 172
175static inline void nfs_fscache_get_super_cookie(
176 struct super_block *sb,
177 const char *uniq,
178 struct nfs_clone_mount *mntdata)
179{
180}
181static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {} 173static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {}
182 174
183static inline void nfs_fscache_init_inode_cookie(struct inode *inode) {} 175static inline void nfs_fscache_init_inode_cookie(struct inode *inode) {}
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 4ca6f5c8038e..8abfb19bd3aa 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -150,7 +150,7 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh)
150 goto out; 150 goto out;
151 151
152 /* Start by getting the root filehandle from the server */ 152 /* Start by getting the root filehandle from the server */
153 ret = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo); 153 ret = nfs4_proc_get_rootfh(server, mntfh, &fsinfo);
154 if (ret < 0) { 154 if (ret < 0) {
155 dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret); 155 dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret);
156 goto out; 156 goto out;
@@ -178,87 +178,4 @@ out:
178 return ret; 178 return ret;
179} 179}
180 180
181/*
182 * get an NFS4 root dentry from the root filehandle
183 */
184struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh,
185 const char *devname)
186{
187 struct nfs_server *server = NFS_SB(sb);
188 struct nfs_fattr *fattr = NULL;
189 struct dentry *ret;
190 struct inode *inode;
191 void *name = kstrdup(devname, GFP_KERNEL);
192 int error;
193
194 dprintk("--> nfs4_get_root()\n");
195
196 if (!name)
197 return ERR_PTR(-ENOMEM);
198
199 /* get the info about the server and filesystem */
200 error = nfs4_server_capabilities(server, mntfh);
201 if (error < 0) {
202 dprintk("nfs_get_root: getcaps error = %d\n",
203 -error);
204 kfree(name);
205 return ERR_PTR(error);
206 }
207
208 fattr = nfs_alloc_fattr();
209 if (fattr == NULL) {
210 kfree(name);
211 return ERR_PTR(-ENOMEM);
212 }
213
214 /* get the actual root for this mount */
215 error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr);
216 if (error < 0) {
217 dprintk("nfs_get_root: getattr error = %d\n", -error);
218 ret = ERR_PTR(error);
219 goto out;
220 }
221
222 if (fattr->valid & NFS_ATTR_FATTR_FSID &&
223 !nfs_fsid_equal(&server->fsid, &fattr->fsid))
224 memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid));
225
226 inode = nfs_fhget(sb, mntfh, fattr);
227 if (IS_ERR(inode)) {
228 dprintk("nfs_get_root: get root inode failed\n");
229 ret = ERR_CAST(inode);
230 goto out;
231 }
232
233 error = nfs_superblock_set_dummy_root(sb, inode);
234 if (error != 0) {
235 ret = ERR_PTR(error);
236 goto out;
237 }
238
239 /* root dentries normally start off anonymous and get spliced in later
240 * if the dentry tree reaches them; however if the dentry already
241 * exists, we'll pick it up at this point and use it as the root
242 */
243 ret = d_obtain_alias(inode);
244 if (IS_ERR(ret)) {
245 dprintk("nfs_get_root: get root dentry failed\n");
246 goto out;
247 }
248
249 security_d_instantiate(ret, inode);
250 spin_lock(&ret->d_lock);
251 if (IS_ROOT(ret) && !(ret->d_flags & DCACHE_NFSFS_RENAMED)) {
252 ret->d_fsdata = name;
253 name = NULL;
254 }
255 spin_unlock(&ret->d_lock);
256out:
257 if (name)
258 kfree(name);
259 nfs_free_fattr(fattr);
260 dprintk("<-- nfs4_get_root()\n");
261 return ret;
262}
263
264#endif /* CONFIG_NFS_V4 */ 181#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index ba3019f5934c..b5b86a05059c 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -415,7 +415,7 @@ static int __nfs_idmap_register(struct dentry *dir,
415static void nfs_idmap_unregister(struct nfs_client *clp, 415static void nfs_idmap_unregister(struct nfs_client *clp,
416 struct rpc_pipe *pipe) 416 struct rpc_pipe *pipe)
417{ 417{
418 struct net *net = clp->net; 418 struct net *net = clp->cl_net;
419 struct super_block *pipefs_sb; 419 struct super_block *pipefs_sb;
420 420
421 pipefs_sb = rpc_get_sb_net(net); 421 pipefs_sb = rpc_get_sb_net(net);
@@ -429,7 +429,7 @@ static int nfs_idmap_register(struct nfs_client *clp,
429 struct idmap *idmap, 429 struct idmap *idmap,
430 struct rpc_pipe *pipe) 430 struct rpc_pipe *pipe)
431{ 431{
432 struct net *net = clp->net; 432 struct net *net = clp->cl_net;
433 struct super_block *pipefs_sb; 433 struct super_block *pipefs_sb;
434 int err = 0; 434 int err = 0;
435 435
@@ -530,9 +530,25 @@ static struct nfs_client *nfs_get_client_for_event(struct net *net, int event)
530 struct nfs_net *nn = net_generic(net, nfs_net_id); 530 struct nfs_net *nn = net_generic(net, nfs_net_id);
531 struct dentry *cl_dentry; 531 struct dentry *cl_dentry;
532 struct nfs_client *clp; 532 struct nfs_client *clp;
533 int err;
533 534
535restart:
534 spin_lock(&nn->nfs_client_lock); 536 spin_lock(&nn->nfs_client_lock);
535 list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { 537 list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
538 /* Wait for initialisation to finish */
539 if (clp->cl_cons_state == NFS_CS_INITING) {
540 atomic_inc(&clp->cl_count);
541 spin_unlock(&nn->nfs_client_lock);
542 err = nfs_wait_client_init_complete(clp);
543 nfs_put_client(clp);
544 if (err)
545 return NULL;
546 goto restart;
547 }
548 /* Skip nfs_clients that failed to initialise */
549 if (clp->cl_cons_state < 0)
550 continue;
551 smp_rmb();
536 if (clp->rpc_ops != &nfs_v4_clientops) 552 if (clp->rpc_ops != &nfs_v4_clientops)
537 continue; 553 continue;
538 cl_dentry = clp->cl_idmap->idmap_pipe->dentry; 554 cl_dentry = clp->cl_idmap->idmap_pipe->dentry;
@@ -640,20 +656,16 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,
640 struct idmap_msg *im; 656 struct idmap_msg *im;
641 struct idmap *idmap = (struct idmap *)aux; 657 struct idmap *idmap = (struct idmap *)aux;
642 struct key *key = cons->key; 658 struct key *key = cons->key;
643 int ret; 659 int ret = -ENOMEM;
644 660
645 /* msg and im are freed in idmap_pipe_destroy_msg */ 661 /* msg and im are freed in idmap_pipe_destroy_msg */
646 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 662 msg = kmalloc(sizeof(*msg), GFP_KERNEL);
647 if (IS_ERR(msg)) { 663 if (!msg)
648 ret = PTR_ERR(msg);
649 goto out0; 664 goto out0;
650 }
651 665
652 im = kmalloc(sizeof(*im), GFP_KERNEL); 666 im = kmalloc(sizeof(*im), GFP_KERNEL);
653 if (IS_ERR(im)) { 667 if (!im)
654 ret = PTR_ERR(im);
655 goto out1; 668 goto out1;
656 }
657 669
658 ret = nfs_idmap_prepare_message(key->description, im, msg); 670 ret = nfs_idmap_prepare_message(key->description, im, msg);
659 if (ret < 0) 671 if (ret < 0)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e8bbfa5b3500..2f6f78c4b42d 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -121,7 +121,7 @@ static void nfs_clear_inode(struct inode *inode)
121void nfs_evict_inode(struct inode *inode) 121void nfs_evict_inode(struct inode *inode)
122{ 122{
123 truncate_inode_pages(&inode->i_data, 0); 123 truncate_inode_pages(&inode->i_data, 0);
124 end_writeback(inode); 124 clear_inode(inode);
125 nfs_clear_inode(inode); 125 nfs_clear_inode(inode);
126} 126}
127 127
@@ -285,9 +285,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
285 inode->i_mode = fattr->mode; 285 inode->i_mode = fattr->mode;
286 if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0 286 if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0
287 && nfs_server_capable(inode, NFS_CAP_MODE)) 287 && nfs_server_capable(inode, NFS_CAP_MODE))
288 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 288 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
289 | NFS_INO_INVALID_ACCESS
290 | NFS_INO_INVALID_ACL;
291 /* Why so? Because we want revalidate for devices/FIFOs, and 289 /* Why so? Because we want revalidate for devices/FIFOs, and
292 * that's precisely what we have in nfs_file_inode_operations. 290 * that's precisely what we have in nfs_file_inode_operations.
293 */ 291 */
@@ -300,8 +298,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
300 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; 298 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
301 inode->i_fop = &nfs_dir_operations; 299 inode->i_fop = &nfs_dir_operations;
302 inode->i_data.a_ops = &nfs_dir_aops; 300 inode->i_data.a_ops = &nfs_dir_aops;
303 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS))
304 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
305 /* Deal with crossing mountpoints */ 301 /* Deal with crossing mountpoints */
306 if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT || 302 if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT ||
307 fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { 303 fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
@@ -327,6 +323,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
327 inode->i_gid = -2; 323 inode->i_gid = -2;
328 inode->i_blocks = 0; 324 inode->i_blocks = 0;
329 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); 325 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
326 nfsi->write_io = 0;
327 nfsi->read_io = 0;
330 328
331 nfsi->read_cache_jiffies = fattr->time_start; 329 nfsi->read_cache_jiffies = fattr->time_start;
332 nfsi->attr_gencount = fattr->gencount; 330 nfsi->attr_gencount = fattr->gencount;
@@ -337,24 +335,19 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
337 if (fattr->valid & NFS_ATTR_FATTR_MTIME) 335 if (fattr->valid & NFS_ATTR_FATTR_MTIME)
338 inode->i_mtime = fattr->mtime; 336 inode->i_mtime = fattr->mtime;
339 else if (nfs_server_capable(inode, NFS_CAP_MTIME)) 337 else if (nfs_server_capable(inode, NFS_CAP_MTIME))
340 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 338 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
341 | NFS_INO_INVALID_DATA;
342 if (fattr->valid & NFS_ATTR_FATTR_CTIME) 339 if (fattr->valid & NFS_ATTR_FATTR_CTIME)
343 inode->i_ctime = fattr->ctime; 340 inode->i_ctime = fattr->ctime;
344 else if (nfs_server_capable(inode, NFS_CAP_CTIME)) 341 else if (nfs_server_capable(inode, NFS_CAP_CTIME))
345 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 342 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
346 | NFS_INO_INVALID_ACCESS
347 | NFS_INO_INVALID_ACL;
348 if (fattr->valid & NFS_ATTR_FATTR_CHANGE) 343 if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
349 inode->i_version = fattr->change_attr; 344 inode->i_version = fattr->change_attr;
350 else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR)) 345 else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR))
351 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 346 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
352 | NFS_INO_INVALID_DATA;
353 if (fattr->valid & NFS_ATTR_FATTR_SIZE) 347 if (fattr->valid & NFS_ATTR_FATTR_SIZE)
354 inode->i_size = nfs_size_to_loff_t(fattr->size); 348 inode->i_size = nfs_size_to_loff_t(fattr->size);
355 else 349 else
356 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 350 nfsi->cache_validity |= NFS_INO_INVALID_ATTR
357 | NFS_INO_INVALID_DATA
358 | NFS_INO_REVAL_PAGECACHE; 351 | NFS_INO_REVAL_PAGECACHE;
359 if (fattr->valid & NFS_ATTR_FATTR_NLINK) 352 if (fattr->valid & NFS_ATTR_FATTR_NLINK)
360 set_nlink(inode, fattr->nlink); 353 set_nlink(inode, fattr->nlink);
@@ -363,15 +356,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
363 if (fattr->valid & NFS_ATTR_FATTR_OWNER) 356 if (fattr->valid & NFS_ATTR_FATTR_OWNER)
364 inode->i_uid = fattr->uid; 357 inode->i_uid = fattr->uid;
365 else if (nfs_server_capable(inode, NFS_CAP_OWNER)) 358 else if (nfs_server_capable(inode, NFS_CAP_OWNER))
366 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 359 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
367 | NFS_INO_INVALID_ACCESS
368 | NFS_INO_INVALID_ACL;
369 if (fattr->valid & NFS_ATTR_FATTR_GROUP) 360 if (fattr->valid & NFS_ATTR_FATTR_GROUP)
370 inode->i_gid = fattr->gid; 361 inode->i_gid = fattr->gid;
371 else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP)) 362 else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
372 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 363 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
373 | NFS_INO_INVALID_ACCESS
374 | NFS_INO_INVALID_ACL;
375 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) 364 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
376 inode->i_blocks = fattr->du.nfs2.blocks; 365 inode->i_blocks = fattr->du.nfs2.blocks;
377 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { 366 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
@@ -654,6 +643,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f
654 nfs_init_lock_context(&ctx->lock_context); 643 nfs_init_lock_context(&ctx->lock_context);
655 ctx->lock_context.open_context = ctx; 644 ctx->lock_context.open_context = ctx;
656 INIT_LIST_HEAD(&ctx->list); 645 INIT_LIST_HEAD(&ctx->list);
646 ctx->mdsthreshold = NULL;
657 return ctx; 647 return ctx;
658} 648}
659 649
@@ -682,6 +672,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
682 put_rpccred(ctx->cred); 672 put_rpccred(ctx->cred);
683 dput(ctx->dentry); 673 dput(ctx->dentry);
684 nfs_sb_deactive(sb); 674 nfs_sb_deactive(sb);
675 kfree(ctx->mdsthreshold);
685 kfree(ctx); 676 kfree(ctx);
686} 677}
687 678
@@ -870,6 +861,15 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
870 return 0; 861 return 0;
871} 862}
872 863
864static bool nfs_mapping_need_revalidate_inode(struct inode *inode)
865{
866 if (nfs_have_delegated_attributes(inode))
867 return false;
868 return (NFS_I(inode)->cache_validity & NFS_INO_REVAL_PAGECACHE)
869 || nfs_attribute_timeout(inode)
870 || NFS_STALE(inode);
871}
872
873/** 873/**
874 * nfs_revalidate_mapping - Revalidate the pagecache 874 * nfs_revalidate_mapping - Revalidate the pagecache
875 * @inode - pointer to host inode 875 * @inode - pointer to host inode
@@ -880,9 +880,7 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
880 struct nfs_inode *nfsi = NFS_I(inode); 880 struct nfs_inode *nfsi = NFS_I(inode);
881 int ret = 0; 881 int ret = 0;
882 882
883 if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) 883 if (nfs_mapping_need_revalidate_inode(inode)) {
884 || nfs_attribute_cache_expired(inode)
885 || NFS_STALE(inode)) {
886 ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode); 884 ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
887 if (ret < 0) 885 if (ret < 0)
888 goto out; 886 goto out;
@@ -948,6 +946,8 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
948 unsigned long invalid = 0; 946 unsigned long invalid = 0;
949 947
950 948
949 if (nfs_have_delegated_attributes(inode))
950 return 0;
951 /* Has the inode gone and changed behind our back? */ 951 /* Has the inode gone and changed behind our back? */
952 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) 952 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
953 return -EIO; 953 return -EIO;
@@ -960,7 +960,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
960 960
961 /* Verify a few of the more important attributes */ 961 /* Verify a few of the more important attributes */
962 if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime)) 962 if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime))
963 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; 963 invalid |= NFS_INO_INVALID_ATTR;
964 964
965 if (fattr->valid & NFS_ATTR_FATTR_SIZE) { 965 if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
966 cur_size = i_size_read(inode); 966 cur_size = i_size_read(inode);
@@ -1279,14 +1279,26 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1279 nfs_display_fhandle_hash(NFS_FH(inode)), 1279 nfs_display_fhandle_hash(NFS_FH(inode)),
1280 atomic_read(&inode->i_count), fattr->valid); 1280 atomic_read(&inode->i_count), fattr->valid);
1281 1281
1282 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) 1282 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) {
1283 goto out_fileid; 1283 printk(KERN_ERR "NFS: server %s error: fileid changed\n"
1284 "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n",
1285 NFS_SERVER(inode)->nfs_client->cl_hostname,
1286 inode->i_sb->s_id, (long long)nfsi->fileid,
1287 (long long)fattr->fileid);
1288 goto out_err;
1289 }
1284 1290
1285 /* 1291 /*
1286 * Make sure the inode's type hasn't changed. 1292 * Make sure the inode's type hasn't changed.
1287 */ 1293 */
1288 if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) 1294 if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
1289 goto out_changed; 1295 /*
1296 * Big trouble! The inode has become a different object.
1297 */
1298 printk(KERN_DEBUG "NFS: %s: inode %ld mode changed, %07o to %07o\n",
1299 __func__, inode->i_ino, inode->i_mode, fattr->mode);
1300 goto out_err;
1301 }
1290 1302
1291 server = NFS_SERVER(inode); 1303 server = NFS_SERVER(inode);
1292 /* Update the fsid? */ 1304 /* Update the fsid? */
@@ -1314,7 +1326,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1314 if (inode->i_version != fattr->change_attr) { 1326 if (inode->i_version != fattr->change_attr) {
1315 dprintk("NFS: change_attr change on server for file %s/%ld\n", 1327 dprintk("NFS: change_attr change on server for file %s/%ld\n",
1316 inode->i_sb->s_id, inode->i_ino); 1328 inode->i_sb->s_id, inode->i_ino);
1317 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1329 invalid |= NFS_INO_INVALID_ATTR
1330 | NFS_INO_INVALID_DATA
1331 | NFS_INO_INVALID_ACCESS
1332 | NFS_INO_INVALID_ACL
1333 | NFS_INO_REVAL_PAGECACHE;
1318 if (S_ISDIR(inode->i_mode)) 1334 if (S_ISDIR(inode->i_mode))
1319 nfs_force_lookup_revalidate(inode); 1335 nfs_force_lookup_revalidate(inode);
1320 inode->i_version = fattr->change_attr; 1336 inode->i_version = fattr->change_attr;
@@ -1323,38 +1339,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1323 invalid |= save_cache_validity; 1339 invalid |= save_cache_validity;
1324 1340
1325 if (fattr->valid & NFS_ATTR_FATTR_MTIME) { 1341 if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
1326 /* NFSv2/v3: Check if the mtime agrees */ 1342 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
1327 if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
1328 dprintk("NFS: mtime change on server for file %s/%ld\n",
1329 inode->i_sb->s_id, inode->i_ino);
1330 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
1331 if (S_ISDIR(inode->i_mode))
1332 nfs_force_lookup_revalidate(inode);
1333 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
1334 }
1335 } else if (server->caps & NFS_CAP_MTIME) 1343 } else if (server->caps & NFS_CAP_MTIME)
1336 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR 1344 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
1337 | NFS_INO_INVALID_DATA
1338 | NFS_INO_REVAL_PAGECACHE
1339 | NFS_INO_REVAL_FORCED); 1345 | NFS_INO_REVAL_FORCED);
1340 1346
1341 if (fattr->valid & NFS_ATTR_FATTR_CTIME) { 1347 if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
1342 /* If ctime has changed we should definitely clear access+acl caches */ 1348 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
1343 if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
1344 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1345 /* and probably clear data for a directory too as utimes can cause
1346 * havoc with our cache.
1347 */
1348 if (S_ISDIR(inode->i_mode)) {
1349 invalid |= NFS_INO_INVALID_DATA;
1350 nfs_force_lookup_revalidate(inode);
1351 }
1352 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
1353 }
1354 } else if (server->caps & NFS_CAP_CTIME) 1349 } else if (server->caps & NFS_CAP_CTIME)
1355 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR 1350 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
1356 | NFS_INO_INVALID_ACCESS
1357 | NFS_INO_INVALID_ACL
1358 | NFS_INO_REVAL_FORCED); 1351 | NFS_INO_REVAL_FORCED);
1359 1352
1360 /* Check if our cached file size is stale */ 1353 /* Check if our cached file size is stale */
@@ -1466,12 +1459,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1466 nfsi->cache_validity |= invalid; 1459 nfsi->cache_validity |= invalid;
1467 1460
1468 return 0; 1461 return 0;
1469 out_changed:
1470 /*
1471 * Big trouble! The inode has become a different object.
1472 */
1473 printk(KERN_DEBUG "NFS: %s: inode %ld mode changed, %07o to %07o\n",
1474 __func__, inode->i_ino, inode->i_mode, fattr->mode);
1475 out_err: 1462 out_err:
1476 /* 1463 /*
1477 * No need to worry about unhashing the dentry, as the 1464 * No need to worry about unhashing the dentry, as the
@@ -1480,13 +1467,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1480 */ 1467 */
1481 nfs_invalidate_inode(inode); 1468 nfs_invalidate_inode(inode);
1482 return -ESTALE; 1469 return -ESTALE;
1483
1484 out_fileid:
1485 printk(KERN_ERR "NFS: server %s error: fileid changed\n"
1486 "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n",
1487 NFS_SERVER(inode)->nfs_client->cl_hostname, inode->i_sb->s_id,
1488 (long long)nfsi->fileid, (long long)fattr->fileid);
1489 goto out_err;
1490} 1470}
1491 1471
1492 1472
@@ -1500,7 +1480,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1500void nfs4_evict_inode(struct inode *inode) 1480void nfs4_evict_inode(struct inode *inode)
1501{ 1481{
1502 truncate_inode_pages(&inode->i_data, 0); 1482 truncate_inode_pages(&inode->i_data, 0);
1503 end_writeback(inode); 1483 clear_inode(inode);
1504 pnfs_return_layout(inode); 1484 pnfs_return_layout(inode);
1505 pnfs_destroy_layout(NFS_I(inode)); 1485 pnfs_destroy_layout(NFS_I(inode));
1506 /* If we are holding a delegation, return it! */ 1486 /* If we are holding a delegation, return it! */
@@ -1547,7 +1527,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
1547 nfsi->delegation_state = 0; 1527 nfsi->delegation_state = 0;
1548 init_rwsem(&nfsi->rwsem); 1528 init_rwsem(&nfsi->rwsem);
1549 nfsi->layout = NULL; 1529 nfsi->layout = NULL;
1550 atomic_set(&nfsi->commits_outstanding, 0); 1530 atomic_set(&nfsi->commit_info.rpcs_out, 0);
1551#endif 1531#endif
1552} 1532}
1553 1533
@@ -1559,9 +1539,9 @@ static void init_once(void *foo)
1559 INIT_LIST_HEAD(&nfsi->open_files); 1539 INIT_LIST_HEAD(&nfsi->open_files);
1560 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); 1540 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
1561 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); 1541 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
1562 INIT_LIST_HEAD(&nfsi->commit_list); 1542 INIT_LIST_HEAD(&nfsi->commit_info.list);
1563 nfsi->npages = 0; 1543 nfsi->npages = 0;
1564 nfsi->ncommit = 0; 1544 nfsi->commit_info.ncommit = 0;
1565 atomic_set(&nfsi->silly_count, 1); 1545 atomic_set(&nfsi->silly_count, 1);
1566 INIT_HLIST_HEAD(&nfsi->silly_list); 1546 INIT_HLIST_HEAD(&nfsi->silly_list);
1567 init_waitqueue_head(&nfsi->waitqueue); 1547 init_waitqueue_head(&nfsi->waitqueue);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index b777bdaba4c5..1848a7275592 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -103,6 +103,7 @@ struct nfs_parsed_mount_data {
103 unsigned int version; 103 unsigned int version;
104 unsigned int minorversion; 104 unsigned int minorversion;
105 char *fscache_uniq; 105 char *fscache_uniq;
106 bool need_mount;
106 107
107 struct { 108 struct {
108 struct sockaddr_storage address; 109 struct sockaddr_storage address;
@@ -167,11 +168,13 @@ extern struct nfs_server *nfs_clone_server(struct nfs_server *,
167 struct nfs_fh *, 168 struct nfs_fh *,
168 struct nfs_fattr *, 169 struct nfs_fattr *,
169 rpc_authflavor_t); 170 rpc_authflavor_t);
171extern int nfs_wait_client_init_complete(const struct nfs_client *clp);
170extern void nfs_mark_client_ready(struct nfs_client *clp, int state); 172extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
171extern int nfs4_check_client_ready(struct nfs_client *clp);
172extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, 173extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
173 const struct sockaddr *ds_addr, 174 const struct sockaddr *ds_addr,
174 int ds_addrlen, int ds_proto); 175 int ds_addrlen, int ds_proto,
176 unsigned int ds_timeo,
177 unsigned int ds_retrans);
175#ifdef CONFIG_PROC_FS 178#ifdef CONFIG_PROC_FS
176extern int __init nfs_fs_proc_init(void); 179extern int __init nfs_fs_proc_init(void);
177extern void nfs_fs_proc_exit(void); 180extern void nfs_fs_proc_exit(void);
@@ -185,21 +188,11 @@ static inline void nfs_fs_proc_exit(void)
185} 188}
186#endif 189#endif
187 190
188/* nfs4namespace.c */
189#ifdef CONFIG_NFS_V4
190extern struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry);
191#else
192static inline
193struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry)
194{
195 return ERR_PTR(-ENOENT);
196}
197#endif
198
199/* callback_xdr.c */ 191/* callback_xdr.c */
200extern struct svc_version nfs4_callback_version1; 192extern struct svc_version nfs4_callback_version1;
201extern struct svc_version nfs4_callback_version4; 193extern struct svc_version nfs4_callback_version4;
202 194
195struct nfs_pageio_descriptor;
203/* pagelist.c */ 196/* pagelist.c */
204extern int __init nfs_init_nfspagecache(void); 197extern int __init nfs_init_nfspagecache(void);
205extern void nfs_destroy_nfspagecache(void); 198extern void nfs_destroy_nfspagecache(void);
@@ -210,9 +203,13 @@ extern void nfs_destroy_writepagecache(void);
210 203
211extern int __init nfs_init_directcache(void); 204extern int __init nfs_init_directcache(void);
212extern void nfs_destroy_directcache(void); 205extern void nfs_destroy_directcache(void);
206extern bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount);
207extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
208 struct nfs_pgio_header *hdr,
209 void (*release)(struct nfs_pgio_header *hdr));
210void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);
213 211
214/* nfs2xdr.c */ 212/* nfs2xdr.c */
215extern int nfs_stat_to_errno(enum nfs_stat);
216extern struct rpc_procinfo nfs_procedures[]; 213extern struct rpc_procinfo nfs_procedures[];
217extern int nfs2_decode_dirent(struct xdr_stream *, 214extern int nfs2_decode_dirent(struct xdr_stream *,
218 struct nfs_entry *, int); 215 struct nfs_entry *, int);
@@ -237,14 +234,13 @@ extern const u32 nfs41_maxwrite_overhead;
237extern struct rpc_procinfo nfs4_procedures[]; 234extern struct rpc_procinfo nfs4_procedures[];
238#endif 235#endif
239 236
240extern int nfs4_init_ds_session(struct nfs_client *clp); 237extern int nfs4_init_ds_session(struct nfs_client *, unsigned long);
241 238
242/* proc.c */ 239/* proc.c */
243void nfs_close_context(struct nfs_open_context *ctx, int is_sync); 240void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
244extern int nfs_init_client(struct nfs_client *clp, 241extern struct nfs_client *nfs_init_client(struct nfs_client *clp,
245 const struct rpc_timeout *timeparms, 242 const struct rpc_timeout *timeparms,
246 const char *ip_addr, rpc_authflavor_t authflavour, 243 const char *ip_addr, rpc_authflavor_t authflavour);
247 int noresvport);
248 244
249/* dir.c */ 245/* dir.c */
250extern int nfs_access_cache_shrinker(struct shrinker *shrink, 246extern int nfs_access_cache_shrinker(struct shrinker *shrink,
@@ -280,9 +276,10 @@ extern void nfs_sb_deactive(struct super_block *sb);
280extern char *nfs_path(char **p, struct dentry *dentry, 276extern char *nfs_path(char **p, struct dentry *dentry,
281 char *buffer, ssize_t buflen); 277 char *buffer, ssize_t buflen);
282extern struct vfsmount *nfs_d_automount(struct path *path); 278extern struct vfsmount *nfs_d_automount(struct path *path);
283#ifdef CONFIG_NFS_V4 279struct vfsmount *nfs_submount(struct nfs_server *, struct dentry *,
284rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); 280 struct nfs_fh *, struct nfs_fattr *);
285#endif 281struct vfsmount *nfs_do_submount(struct dentry *, struct nfs_fh *,
282 struct nfs_fattr *, rpc_authflavor_t);
286 283
287/* getroot.c */ 284/* getroot.c */
288extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *, 285extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *,
@@ -294,46 +291,73 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
294extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); 291extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
295#endif 292#endif
296 293
297struct nfs_pageio_descriptor; 294struct nfs_pgio_completion_ops;
298/* read.c */ 295/* read.c */
299extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, 296extern struct nfs_read_header *nfs_readhdr_alloc(void);
300 const struct rpc_call_ops *call_ops); 297extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
298extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
299 struct inode *inode,
300 const struct nfs_pgio_completion_ops *compl_ops);
301extern int nfs_initiate_read(struct rpc_clnt *clnt,
302 struct nfs_read_data *data,
303 const struct rpc_call_ops *call_ops, int flags);
301extern void nfs_read_prepare(struct rpc_task *task, void *calldata); 304extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
302extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, 305extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
303 struct list_head *head); 306 struct nfs_pgio_header *hdr);
304
305extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, 307extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
306 struct inode *inode); 308 struct inode *inode,
309 const struct nfs_pgio_completion_ops *compl_ops);
307extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); 310extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
308extern void nfs_readdata_release(struct nfs_read_data *rdata); 311extern void nfs_readdata_release(struct nfs_read_data *rdata);
309 312
310/* write.c */ 313/* write.c */
314extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
315 struct inode *inode, int ioflags,
316 const struct nfs_pgio_completion_ops *compl_ops);
317extern struct nfs_write_header *nfs_writehdr_alloc(void);
318extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
311extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, 319extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
312 struct list_head *head); 320 struct nfs_pgio_header *hdr);
313extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, 321extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
314 struct inode *inode, int ioflags); 322 struct inode *inode, int ioflags,
323 const struct nfs_pgio_completion_ops *compl_ops);
315extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); 324extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
316extern void nfs_writedata_release(struct nfs_write_data *wdata); 325extern void nfs_writedata_release(struct nfs_write_data *wdata);
317extern void nfs_commit_free(struct nfs_write_data *p); 326extern void nfs_commit_free(struct nfs_commit_data *p);
318extern int nfs_initiate_write(struct nfs_write_data *data, 327extern int nfs_initiate_write(struct rpc_clnt *clnt,
319 struct rpc_clnt *clnt, 328 struct nfs_write_data *data,
320 const struct rpc_call_ops *call_ops, 329 const struct rpc_call_ops *call_ops,
321 int how); 330 int how, int flags);
322extern void nfs_write_prepare(struct rpc_task *task, void *calldata); 331extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
323extern int nfs_initiate_commit(struct nfs_write_data *data, 332extern void nfs_commit_prepare(struct rpc_task *task, void *calldata);
324 struct rpc_clnt *clnt, 333extern int nfs_initiate_commit(struct rpc_clnt *clnt,
334 struct nfs_commit_data *data,
325 const struct rpc_call_ops *call_ops, 335 const struct rpc_call_ops *call_ops,
326 int how); 336 int how, int flags);
327extern void nfs_init_commit(struct nfs_write_data *data, 337extern void nfs_init_commit(struct nfs_commit_data *data,
328 struct list_head *head, 338 struct list_head *head,
329 struct pnfs_layout_segment *lseg); 339 struct pnfs_layout_segment *lseg,
340 struct nfs_commit_info *cinfo);
341int nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
342 struct nfs_commit_info *cinfo, int max);
343int nfs_scan_commit(struct inode *inode, struct list_head *dst,
344 struct nfs_commit_info *cinfo);
345void nfs_mark_request_commit(struct nfs_page *req,
346 struct pnfs_layout_segment *lseg,
347 struct nfs_commit_info *cinfo);
348int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
349 int how, struct nfs_commit_info *cinfo);
330void nfs_retry_commit(struct list_head *page_list, 350void nfs_retry_commit(struct list_head *page_list,
331 struct pnfs_layout_segment *lseg); 351 struct pnfs_layout_segment *lseg,
332void nfs_commit_clear_lock(struct nfs_inode *nfsi); 352 struct nfs_commit_info *cinfo);
333void nfs_commitdata_release(void *data); 353void nfs_commitdata_release(struct nfs_commit_data *data);
334void nfs_commit_release_pages(struct nfs_write_data *data); 354void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
335void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head); 355 struct nfs_commit_info *cinfo);
336void nfs_request_remove_commit_list(struct nfs_page *req); 356void nfs_request_remove_commit_list(struct nfs_page *req,
357 struct nfs_commit_info *cinfo);
358void nfs_init_cinfo(struct nfs_commit_info *cinfo,
359 struct inode *inode,
360 struct nfs_direct_req *dreq);
337 361
338#ifdef CONFIG_MIGRATION 362#ifdef CONFIG_MIGRATION
339extern int nfs_migrate_page(struct address_space *, 363extern int nfs_migrate_page(struct address_space *,
@@ -342,15 +366,16 @@ extern int nfs_migrate_page(struct address_space *,
342#define nfs_migrate_page NULL 366#define nfs_migrate_page NULL
343#endif 367#endif
344 368
369/* direct.c */
370void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
371 struct nfs_direct_req *dreq);
372
345/* nfs4proc.c */ 373/* nfs4proc.c */
346extern void __nfs4_read_done_cb(struct nfs_read_data *); 374extern void __nfs4_read_done_cb(struct nfs_read_data *);
347extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data); 375extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
348extern int nfs4_init_client(struct nfs_client *clp,
349 const struct rpc_timeout *timeparms, 376 const struct rpc_timeout *timeparms,
350 const char *ip_addr, 377 const char *ip_addr,
351 rpc_authflavor_t authflavour, 378 rpc_authflavor_t authflavour);
352 int noresvport);
353extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data);
354extern int _nfs4_call_sync(struct rpc_clnt *clnt, 379extern int _nfs4_call_sync(struct rpc_clnt *clnt,
355 struct nfs_server *server, 380 struct nfs_server *server,
356 struct rpc_message *msg, 381 struct rpc_message *msg,
@@ -466,3 +491,15 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len)
466 PAGE_SIZE - 1) >> PAGE_SHIFT; 491 PAGE_SIZE - 1) >> PAGE_SHIFT;
467} 492}
468 493
494/*
495 * Convert a struct timespec into a 64-bit change attribute
496 *
497 * This does approximately the same thing as timespec_to_ns(),
498 * but for calculation efficiency, we multiply the seconds by
499 * 1024*1024*1024.
500 */
501static inline
502u64 nfs_timespec_to_change_attr(const struct timespec *ts)
503{
504 return ((u64)ts->tv_sec << 30) + ts->tv_nsec;
505}
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index d51868e5683c..08b9c93675da 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -26,11 +26,6 @@ static LIST_HEAD(nfs_automount_list);
26static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts); 26static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts);
27int nfs_mountpoint_expiry_timeout = 500 * HZ; 27int nfs_mountpoint_expiry_timeout = 500 * HZ;
28 28
29static struct vfsmount *nfs_do_submount(struct dentry *dentry,
30 struct nfs_fh *fh,
31 struct nfs_fattr *fattr,
32 rpc_authflavor_t authflavor);
33
34/* 29/*
35 * nfs_path - reconstruct the path given an arbitrary dentry 30 * nfs_path - reconstruct the path given an arbitrary dentry
36 * @base - used to return pointer to the end of devname part of path 31 * @base - used to return pointer to the end of devname part of path
@@ -118,64 +113,6 @@ Elong:
118 return ERR_PTR(-ENAMETOOLONG); 113 return ERR_PTR(-ENAMETOOLONG);
119} 114}
120 115
121#ifdef CONFIG_NFS_V4
122rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
123{
124 struct gss_api_mech *mech;
125 struct xdr_netobj oid;
126 int i;
127 rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX;
128
129 for (i = 0; i < flavors->num_flavors; i++) {
130 struct nfs4_secinfo_flavor *flavor;
131 flavor = &flavors->flavors[i];
132
133 if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) {
134 pseudoflavor = flavor->flavor;
135 break;
136 } else if (flavor->flavor == RPC_AUTH_GSS) {
137 oid.len = flavor->gss.sec_oid4.len;
138 oid.data = flavor->gss.sec_oid4.data;
139 mech = gss_mech_get_by_OID(&oid);
140 if (!mech)
141 continue;
142 pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service);
143 gss_mech_put(mech);
144 break;
145 }
146 }
147
148 return pseudoflavor;
149}
150
151static struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir,
152 struct qstr *name,
153 struct nfs_fh *fh,
154 struct nfs_fattr *fattr)
155{
156 int err;
157
158 if (NFS_PROTO(dir)->version == 4)
159 return nfs4_proc_lookup_mountpoint(dir, name, fh, fattr);
160
161 err = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, name, fh, fattr);
162 if (err)
163 return ERR_PTR(err);
164 return rpc_clone_client(NFS_SERVER(dir)->client);
165}
166#else /* CONFIG_NFS_V4 */
167static inline struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir,
168 struct qstr *name,
169 struct nfs_fh *fh,
170 struct nfs_fattr *fattr)
171{
172 int err = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, name, fh, fattr);
173 if (err)
174 return ERR_PTR(err);
175 return rpc_clone_client(NFS_SERVER(dir)->client);
176}
177#endif /* CONFIG_NFS_V4 */
178
179/* 116/*
180 * nfs_d_automount - Handle crossing a mountpoint on the server 117 * nfs_d_automount - Handle crossing a mountpoint on the server
181 * @path - The mountpoint 118 * @path - The mountpoint
@@ -191,10 +128,9 @@ static inline struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir,
191struct vfsmount *nfs_d_automount(struct path *path) 128struct vfsmount *nfs_d_automount(struct path *path)
192{ 129{
193 struct vfsmount *mnt; 130 struct vfsmount *mnt;
194 struct dentry *parent; 131 struct nfs_server *server = NFS_SERVER(path->dentry->d_inode);
195 struct nfs_fh *fh = NULL; 132 struct nfs_fh *fh = NULL;
196 struct nfs_fattr *fattr = NULL; 133 struct nfs_fattr *fattr = NULL;
197 struct rpc_clnt *client;
198 134
199 dprintk("--> nfs_d_automount()\n"); 135 dprintk("--> nfs_d_automount()\n");
200 136
@@ -210,21 +146,7 @@ struct vfsmount *nfs_d_automount(struct path *path)
210 146
211 dprintk("%s: enter\n", __func__); 147 dprintk("%s: enter\n", __func__);
212 148
213 /* Look it up again to get its attributes */ 149 mnt = server->nfs_client->rpc_ops->submount(server, path->dentry, fh, fattr);
214 parent = dget_parent(path->dentry);
215 client = nfs_lookup_mountpoint(parent->d_inode, &path->dentry->d_name, fh, fattr);
216 dput(parent);
217 if (IS_ERR(client)) {
218 mnt = ERR_CAST(client);
219 goto out;
220 }
221
222 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
223 mnt = nfs_do_refmount(client, path->dentry);
224 else
225 mnt = nfs_do_submount(path->dentry, fh, fattr, client->cl_auth->au_flavor);
226 rpc_shutdown_client(client);
227
228 if (IS_ERR(mnt)) 150 if (IS_ERR(mnt))
229 goto out; 151 goto out;
230 152
@@ -297,10 +219,8 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
297 * @authflavor - security flavor to use when performing the mount 219 * @authflavor - security flavor to use when performing the mount
298 * 220 *
299 */ 221 */
300static struct vfsmount *nfs_do_submount(struct dentry *dentry, 222struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh,
301 struct nfs_fh *fh, 223 struct nfs_fattr *fattr, rpc_authflavor_t authflavor)
302 struct nfs_fattr *fattr,
303 rpc_authflavor_t authflavor)
304{ 224{
305 struct nfs_clone_mount mountdata = { 225 struct nfs_clone_mount mountdata = {
306 .sb = dentry->d_sb, 226 .sb = dentry->d_sb,
@@ -333,3 +253,18 @@ out:
333 dprintk("<-- nfs_do_submount() = %p\n", mnt); 253 dprintk("<-- nfs_do_submount() = %p\n", mnt);
334 return mnt; 254 return mnt;
335} 255}
256
257struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry,
258 struct nfs_fh *fh, struct nfs_fattr *fattr)
259{
260 int err;
261 struct dentry *parent = dget_parent(dentry);
262
263 /* Look it up again to get its attributes */
264 err = server->nfs_client->rpc_ops->lookup(parent->d_inode, &dentry->d_name, fh, fattr);
265 dput(parent);
266 if (err != 0)
267 return ERR_PTR(err);
268
269 return nfs_do_submount(dentry, fh, fattr, server->client->cl_auth->au_flavor);
270}
diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h
index aa14ec303e94..8a6394edb8b0 100644
--- a/fs/nfs/netns.h
+++ b/fs/nfs/netns.h
@@ -1,3 +1,7 @@
1/*
2 * NFS-private data for each "struct net". Accessed with net_generic().
3 */
4
1#ifndef __NFS_NETNS_H__ 5#ifndef __NFS_NETNS_H__
2#define __NFS_NETNS_H__ 6#define __NFS_NETNS_H__
3 7
@@ -20,6 +24,7 @@ struct nfs_net {
20 struct idr cb_ident_idr; /* Protected by nfs_client_lock */ 24 struct idr cb_ident_idr; /* Protected by nfs_client_lock */
21#endif 25#endif
22 spinlock_t nfs_client_lock; 26 spinlock_t nfs_client_lock;
27 struct timespec boot_time;
23}; 28};
24 29
25extern int nfs_net_id; 30extern int nfs_net_id;
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 1f56000fabbd..baf759bccd05 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -61,6 +61,7 @@
61#define NFS_readdirres_sz (1) 61#define NFS_readdirres_sz (1)
62#define NFS_statfsres_sz (1+NFS_info_sz) 62#define NFS_statfsres_sz (1+NFS_info_sz)
63 63
64static int nfs_stat_to_errno(enum nfs_stat);
64 65
65/* 66/*
66 * While encoding arguments, set up the reply buffer in advance to 67 * While encoding arguments, set up the reply buffer in advance to
@@ -313,6 +314,8 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
313 p = xdr_decode_time(p, &fattr->atime); 314 p = xdr_decode_time(p, &fattr->atime);
314 p = xdr_decode_time(p, &fattr->mtime); 315 p = xdr_decode_time(p, &fattr->mtime);
315 xdr_decode_time(p, &fattr->ctime); 316 xdr_decode_time(p, &fattr->ctime);
317 fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime);
318
316 return 0; 319 return 0;
317out_overflow: 320out_overflow:
318 print_overflow_msg(__func__, xdr); 321 print_overflow_msg(__func__, xdr);
@@ -1109,7 +1112,7 @@ static const struct {
1109 * Returns a local errno value, or -EIO if the NFS status code is 1112 * Returns a local errno value, or -EIO if the NFS status code is
1110 * not recognized. This function is used jointly by NFSv2 and NFSv3. 1113 * not recognized. This function is used jointly by NFSv2 and NFSv3.
1111 */ 1114 */
1112int nfs_stat_to_errno(enum nfs_stat status) 1115static int nfs_stat_to_errno(enum nfs_stat status)
1113{ 1116{
1114 int i; 1117 int i;
1115 1118
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 75c68299358e..2292a0fd2bff 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -142,7 +142,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
142} 142}
143 143
144static int 144static int
145nfs3_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name, 145nfs3_proc_lookup(struct inode *dir, struct qstr *name,
146 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 146 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
147{ 147{
148 struct nfs3_diropargs arg = { 148 struct nfs3_diropargs arg = {
@@ -810,11 +810,13 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
810 810
811static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data) 811static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
812{ 812{
813 if (nfs3_async_handle_jukebox(task, data->inode)) 813 struct inode *inode = data->header->inode;
814
815 if (nfs3_async_handle_jukebox(task, inode))
814 return -EAGAIN; 816 return -EAGAIN;
815 817
816 nfs_invalidate_atime(data->inode); 818 nfs_invalidate_atime(inode);
817 nfs_refresh_inode(data->inode, &data->fattr); 819 nfs_refresh_inode(inode, &data->fattr);
818 return 0; 820 return 0;
819} 821}
820 822
@@ -830,10 +832,12 @@ static void nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_da
830 832
831static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) 833static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
832{ 834{
833 if (nfs3_async_handle_jukebox(task, data->inode)) 835 struct inode *inode = data->header->inode;
836
837 if (nfs3_async_handle_jukebox(task, inode))
834 return -EAGAIN; 838 return -EAGAIN;
835 if (task->tk_status >= 0) 839 if (task->tk_status >= 0)
836 nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr); 840 nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
837 return 0; 841 return 0;
838} 842}
839 843
@@ -847,7 +851,12 @@ static void nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_
847 rpc_call_start(task); 851 rpc_call_start(task);
848} 852}
849 853
850static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data) 854static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
855{
856 rpc_call_start(task);
857}
858
859static int nfs3_commit_done(struct rpc_task *task, struct nfs_commit_data *data)
851{ 860{
852 if (nfs3_async_handle_jukebox(task, data->inode)) 861 if (nfs3_async_handle_jukebox(task, data->inode))
853 return -EAGAIN; 862 return -EAGAIN;
@@ -855,7 +864,7 @@ static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
855 return 0; 864 return 0;
856} 865}
857 866
858static void nfs3_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) 867static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
859{ 868{
860 msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT]; 869 msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT];
861} 870}
@@ -875,6 +884,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
875 .file_inode_ops = &nfs3_file_inode_operations, 884 .file_inode_ops = &nfs3_file_inode_operations,
876 .file_ops = &nfs_file_operations, 885 .file_ops = &nfs_file_operations,
877 .getroot = nfs3_proc_get_root, 886 .getroot = nfs3_proc_get_root,
887 .submount = nfs_submount,
878 .getattr = nfs3_proc_getattr, 888 .getattr = nfs3_proc_getattr,
879 .setattr = nfs3_proc_setattr, 889 .setattr = nfs3_proc_setattr,
880 .lookup = nfs3_proc_lookup, 890 .lookup = nfs3_proc_lookup,
@@ -906,6 +916,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
906 .write_rpc_prepare = nfs3_proc_write_rpc_prepare, 916 .write_rpc_prepare = nfs3_proc_write_rpc_prepare,
907 .write_done = nfs3_write_done, 917 .write_done = nfs3_write_done,
908 .commit_setup = nfs3_proc_commit_setup, 918 .commit_setup = nfs3_proc_commit_setup,
919 .commit_rpc_prepare = nfs3_proc_commit_rpc_prepare,
909 .commit_done = nfs3_commit_done, 920 .commit_done = nfs3_commit_done,
910 .lock = nfs3_proc_lock, 921 .lock = nfs3_proc_lock,
911 .clear_acl_cache = nfs3_forget_cached_acls, 922 .clear_acl_cache = nfs3_forget_cached_acls,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index a77cc9a3ce55..902de489ec9b 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -86,6 +86,8 @@
86 XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)) 86 XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
87#define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz) 87#define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz)
88 88
89static int nfs3_stat_to_errno(enum nfs_stat);
90
89/* 91/*
90 * Map file type to S_IFMT bits 92 * Map file type to S_IFMT bits
91 */ 93 */
@@ -675,6 +677,7 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
675 p = xdr_decode_nfstime3(p, &fattr->atime); 677 p = xdr_decode_nfstime3(p, &fattr->atime);
676 p = xdr_decode_nfstime3(p, &fattr->mtime); 678 p = xdr_decode_nfstime3(p, &fattr->mtime);
677 xdr_decode_nfstime3(p, &fattr->ctime); 679 xdr_decode_nfstime3(p, &fattr->ctime);
680 fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime);
678 681
679 fattr->valid |= NFS_ATTR_FATTR_V3; 682 fattr->valid |= NFS_ATTR_FATTR_V3;
680 return 0; 683 return 0;
@@ -725,12 +728,14 @@ static int decode_wcc_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
725 goto out_overflow; 728 goto out_overflow;
726 729
727 fattr->valid |= NFS_ATTR_FATTR_PRESIZE 730 fattr->valid |= NFS_ATTR_FATTR_PRESIZE
731 | NFS_ATTR_FATTR_PRECHANGE
728 | NFS_ATTR_FATTR_PREMTIME 732 | NFS_ATTR_FATTR_PREMTIME
729 | NFS_ATTR_FATTR_PRECTIME; 733 | NFS_ATTR_FATTR_PRECTIME;
730 734
731 p = xdr_decode_size3(p, &fattr->pre_size); 735 p = xdr_decode_size3(p, &fattr->pre_size);
732 p = xdr_decode_nfstime3(p, &fattr->pre_mtime); 736 p = xdr_decode_nfstime3(p, &fattr->pre_mtime);
733 xdr_decode_nfstime3(p, &fattr->pre_ctime); 737 xdr_decode_nfstime3(p, &fattr->pre_ctime);
738 fattr->pre_change_attr = nfs_timespec_to_change_attr(&fattr->pre_ctime);
734 739
735 return 0; 740 return 0;
736out_overflow: 741out_overflow:
@@ -1287,7 +1292,7 @@ static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req,
1287 * }; 1292 * };
1288 */ 1293 */
1289static void encode_commit3args(struct xdr_stream *xdr, 1294static void encode_commit3args(struct xdr_stream *xdr,
1290 const struct nfs_writeargs *args) 1295 const struct nfs_commitargs *args)
1291{ 1296{
1292 __be32 *p; 1297 __be32 *p;
1293 1298
@@ -1300,7 +1305,7 @@ static void encode_commit3args(struct xdr_stream *xdr,
1300 1305
1301static void nfs3_xdr_enc_commit3args(struct rpc_rqst *req, 1306static void nfs3_xdr_enc_commit3args(struct rpc_rqst *req,
1302 struct xdr_stream *xdr, 1307 struct xdr_stream *xdr,
1303 const struct nfs_writeargs *args) 1308 const struct nfs_commitargs *args)
1304{ 1309{
1305 encode_commit3args(xdr, args); 1310 encode_commit3args(xdr, args);
1306} 1311}
@@ -1385,7 +1390,7 @@ static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req,
1385out: 1390out:
1386 return error; 1391 return error;
1387out_default: 1392out_default:
1388 return nfs_stat_to_errno(status); 1393 return nfs3_stat_to_errno(status);
1389} 1394}
1390 1395
1391/* 1396/*
@@ -1424,7 +1429,7 @@ static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req,
1424out: 1429out:
1425 return error; 1430 return error;
1426out_status: 1431out_status:
1427 return nfs_stat_to_errno(status); 1432 return nfs3_stat_to_errno(status);
1428} 1433}
1429 1434
1430/* 1435/*
@@ -1472,7 +1477,7 @@ out_default:
1472 error = decode_post_op_attr(xdr, result->dir_attr); 1477 error = decode_post_op_attr(xdr, result->dir_attr);
1473 if (unlikely(error)) 1478 if (unlikely(error))
1474 goto out; 1479 goto out;
1475 return nfs_stat_to_errno(status); 1480 return nfs3_stat_to_errno(status);
1476} 1481}
1477 1482
1478/* 1483/*
@@ -1513,7 +1518,7 @@ static int nfs3_xdr_dec_access3res(struct rpc_rqst *req,
1513out: 1518out:
1514 return error; 1519 return error;
1515out_default: 1520out_default:
1516 return nfs_stat_to_errno(status); 1521 return nfs3_stat_to_errno(status);
1517} 1522}
1518 1523
1519/* 1524/*
@@ -1554,7 +1559,7 @@ static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req,
1554out: 1559out:
1555 return error; 1560 return error;
1556out_default: 1561out_default:
1557 return nfs_stat_to_errno(status); 1562 return nfs3_stat_to_errno(status);
1558} 1563}
1559 1564
1560/* 1565/*
@@ -1636,7 +1641,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
1636out: 1641out:
1637 return error; 1642 return error;
1638out_status: 1643out_status:
1639 return nfs_stat_to_errno(status); 1644 return nfs3_stat_to_errno(status);
1640} 1645}
1641 1646
1642/* 1647/*
@@ -1706,7 +1711,7 @@ static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
1706out: 1711out:
1707 return error; 1712 return error;
1708out_status: 1713out_status:
1709 return nfs_stat_to_errno(status); 1714 return nfs3_stat_to_errno(status);
1710} 1715}
1711 1716
1712/* 1717/*
@@ -1770,7 +1775,7 @@ out_default:
1770 error = decode_wcc_data(xdr, result->dir_attr); 1775 error = decode_wcc_data(xdr, result->dir_attr);
1771 if (unlikely(error)) 1776 if (unlikely(error))
1772 goto out; 1777 goto out;
1773 return nfs_stat_to_errno(status); 1778 return nfs3_stat_to_errno(status);
1774} 1779}
1775 1780
1776/* 1781/*
@@ -1809,7 +1814,7 @@ static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req,
1809out: 1814out:
1810 return error; 1815 return error;
1811out_status: 1816out_status:
1812 return nfs_stat_to_errno(status); 1817 return nfs3_stat_to_errno(status);
1813} 1818}
1814 1819
1815/* 1820/*
@@ -1853,7 +1858,7 @@ static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
1853out: 1858out:
1854 return error; 1859 return error;
1855out_status: 1860out_status:
1856 return nfs_stat_to_errno(status); 1861 return nfs3_stat_to_errno(status);
1857} 1862}
1858 1863
1859/* 1864/*
@@ -1896,7 +1901,7 @@ static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
1896out: 1901out:
1897 return error; 1902 return error;
1898out_status: 1903out_status:
1899 return nfs_stat_to_errno(status); 1904 return nfs3_stat_to_errno(status);
1900} 1905}
1901 1906
1902/** 1907/**
@@ -2088,7 +2093,7 @@ out_default:
2088 error = decode_post_op_attr(xdr, result->dir_attr); 2093 error = decode_post_op_attr(xdr, result->dir_attr);
2089 if (unlikely(error)) 2094 if (unlikely(error))
2090 goto out; 2095 goto out;
2091 return nfs_stat_to_errno(status); 2096 return nfs3_stat_to_errno(status);
2092} 2097}
2093 2098
2094/* 2099/*
@@ -2156,7 +2161,7 @@ static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req,
2156out: 2161out:
2157 return error; 2162 return error;
2158out_status: 2163out_status:
2159 return nfs_stat_to_errno(status); 2164 return nfs3_stat_to_errno(status);
2160} 2165}
2161 2166
2162/* 2167/*
@@ -2232,7 +2237,7 @@ static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req,
2232out: 2237out:
2233 return error; 2238 return error;
2234out_status: 2239out_status:
2235 return nfs_stat_to_errno(status); 2240 return nfs3_stat_to_errno(status);
2236} 2241}
2237 2242
2238/* 2243/*
@@ -2295,7 +2300,7 @@ static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req,
2295out: 2300out:
2296 return error; 2301 return error;
2297out_status: 2302out_status:
2298 return nfs_stat_to_errno(status); 2303 return nfs3_stat_to_errno(status);
2299} 2304}
2300 2305
2301/* 2306/*
@@ -2319,7 +2324,7 @@ out_status:
2319 */ 2324 */
2320static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, 2325static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
2321 struct xdr_stream *xdr, 2326 struct xdr_stream *xdr,
2322 struct nfs_writeres *result) 2327 struct nfs_commitres *result)
2323{ 2328{
2324 enum nfs_stat status; 2329 enum nfs_stat status;
2325 int error; 2330 int error;
@@ -2336,7 +2341,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
2336out: 2341out:
2337 return error; 2342 return error;
2338out_status: 2343out_status:
2339 return nfs_stat_to_errno(status); 2344 return nfs3_stat_to_errno(status);
2340} 2345}
2341 2346
2342#ifdef CONFIG_NFS_V3_ACL 2347#ifdef CONFIG_NFS_V3_ACL
@@ -2401,7 +2406,7 @@ static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req,
2401out: 2406out:
2402 return error; 2407 return error;
2403out_default: 2408out_default:
2404 return nfs_stat_to_errno(status); 2409 return nfs3_stat_to_errno(status);
2405} 2410}
2406 2411
2407static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req, 2412static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
@@ -2420,11 +2425,76 @@ static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
2420out: 2425out:
2421 return error; 2426 return error;
2422out_default: 2427out_default:
2423 return nfs_stat_to_errno(status); 2428 return nfs3_stat_to_errno(status);
2424} 2429}
2425 2430
2426#endif /* CONFIG_NFS_V3_ACL */ 2431#endif /* CONFIG_NFS_V3_ACL */
2427 2432
2433
2434/*
2435 * We need to translate between nfs status return values and
2436 * the local errno values which may not be the same.
2437 */
2438static const struct {
2439 int stat;
2440 int errno;
2441} nfs_errtbl[] = {
2442 { NFS_OK, 0 },
2443 { NFSERR_PERM, -EPERM },
2444 { NFSERR_NOENT, -ENOENT },
2445 { NFSERR_IO, -errno_NFSERR_IO},
2446 { NFSERR_NXIO, -ENXIO },
2447/* { NFSERR_EAGAIN, -EAGAIN }, */
2448 { NFSERR_ACCES, -EACCES },
2449 { NFSERR_EXIST, -EEXIST },
2450 { NFSERR_XDEV, -EXDEV },
2451 { NFSERR_NODEV, -ENODEV },
2452 { NFSERR_NOTDIR, -ENOTDIR },
2453 { NFSERR_ISDIR, -EISDIR },
2454 { NFSERR_INVAL, -EINVAL },
2455 { NFSERR_FBIG, -EFBIG },
2456 { NFSERR_NOSPC, -ENOSPC },
2457 { NFSERR_ROFS, -EROFS },
2458 { NFSERR_MLINK, -EMLINK },
2459 { NFSERR_NAMETOOLONG, -ENAMETOOLONG },
2460 { NFSERR_NOTEMPTY, -ENOTEMPTY },
2461 { NFSERR_DQUOT, -EDQUOT },
2462 { NFSERR_STALE, -ESTALE },
2463 { NFSERR_REMOTE, -EREMOTE },
2464#ifdef EWFLUSH
2465 { NFSERR_WFLUSH, -EWFLUSH },
2466#endif
2467 { NFSERR_BADHANDLE, -EBADHANDLE },
2468 { NFSERR_NOT_SYNC, -ENOTSYNC },
2469 { NFSERR_BAD_COOKIE, -EBADCOOKIE },
2470 { NFSERR_NOTSUPP, -ENOTSUPP },
2471 { NFSERR_TOOSMALL, -ETOOSMALL },
2472 { NFSERR_SERVERFAULT, -EREMOTEIO },
2473 { NFSERR_BADTYPE, -EBADTYPE },
2474 { NFSERR_JUKEBOX, -EJUKEBOX },
2475 { -1, -EIO }
2476};
2477
2478/**
2479 * nfs3_stat_to_errno - convert an NFS status code to a local errno
2480 * @status: NFS status code to convert
2481 *
2482 * Returns a local errno value, or -EIO if the NFS status code is
2483 * not recognized. This function is used jointly by NFSv2 and NFSv3.
2484 */
2485static int nfs3_stat_to_errno(enum nfs_stat status)
2486{
2487 int i;
2488
2489 for (i = 0; nfs_errtbl[i].stat != -1; i++) {
2490 if (nfs_errtbl[i].stat == (int)status)
2491 return nfs_errtbl[i].errno;
2492 }
2493 dprintk("NFS: Unrecognized nfs status value: %u\n", status);
2494 return nfs_errtbl[i].errno;
2495}
2496
2497
2428#define PROC(proc, argtype, restype, timer) \ 2498#define PROC(proc, argtype, restype, timer) \
2429[NFS3PROC_##proc] = { \ 2499[NFS3PROC_##proc] = { \
2430 .p_proc = NFS3PROC_##proc, \ 2500 .p_proc = NFS3PROC_##proc, \
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 8d75021020b3..c6827f93ab57 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -24,6 +24,8 @@ enum nfs4_client_state {
24 NFS4CLNT_RECALL_SLOT, 24 NFS4CLNT_RECALL_SLOT,
25 NFS4CLNT_LEASE_CONFIRM, 25 NFS4CLNT_LEASE_CONFIRM,
26 NFS4CLNT_SERVER_SCOPE_MISMATCH, 26 NFS4CLNT_SERVER_SCOPE_MISMATCH,
27 NFS4CLNT_PURGE_STATE,
28 NFS4CLNT_BIND_CONN_TO_SESSION,
27}; 29};
28 30
29enum nfs4_session_state { 31enum nfs4_session_state {
@@ -52,11 +54,6 @@ struct nfs4_minor_version_ops {
52 const struct nfs4_state_maintenance_ops *state_renewal_ops; 54 const struct nfs4_state_maintenance_ops *state_renewal_ops;
53}; 55};
54 56
55struct nfs_unique_id {
56 struct rb_node rb_node;
57 __u64 id;
58};
59
60#define NFS_SEQID_CONFIRMED 1 57#define NFS_SEQID_CONFIRMED 1
61struct nfs_seqid_counter { 58struct nfs_seqid_counter {
62 ktime_t create_time; 59 ktime_t create_time;
@@ -206,12 +203,18 @@ extern const struct dentry_operations nfs4_dentry_operations;
206extern const struct inode_operations nfs4_dir_inode_operations; 203extern const struct inode_operations nfs4_dir_inode_operations;
207 204
208/* nfs4namespace.c */ 205/* nfs4namespace.c */
206rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
209struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *); 207struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *);
208struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *,
209 struct nfs_fh *, struct nfs_fattr *);
210 210
211/* nfs4proc.c */ 211/* nfs4proc.c */
212extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); 212extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
213extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); 213extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
214extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
215extern int nfs4_proc_bind_conn_to_session(struct nfs_client *, struct rpc_cred *cred);
214extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred); 216extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred);
217extern int nfs4_destroy_clientid(struct nfs_client *clp);
215extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); 218extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
216extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); 219extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
217extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc); 220extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc);
@@ -239,8 +242,8 @@ extern int nfs41_setup_sequence(struct nfs4_session *session,
239 struct rpc_task *task); 242 struct rpc_task *task);
240extern void nfs4_destroy_session(struct nfs4_session *session); 243extern void nfs4_destroy_session(struct nfs4_session *session);
241extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); 244extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
242extern int nfs4_proc_create_session(struct nfs_client *); 245extern int nfs4_proc_create_session(struct nfs_client *, struct rpc_cred *);
243extern int nfs4_proc_destroy_session(struct nfs4_session *); 246extern int nfs4_proc_destroy_session(struct nfs4_session *, struct rpc_cred *);
244extern int nfs4_init_session(struct nfs_server *server); 247extern int nfs4_init_session(struct nfs_server *server);
245extern int nfs4_proc_get_lease_time(struct nfs_client *clp, 248extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
246 struct nfs_fsinfo *fsinfo); 249 struct nfs_fsinfo *fsinfo);
@@ -310,9 +313,9 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);
310#if defined(CONFIG_NFS_V4_1) 313#if defined(CONFIG_NFS_V4_1)
311struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp); 314struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp);
312struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp); 315struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp);
313extern void nfs4_schedule_session_recovery(struct nfs4_session *); 316extern void nfs4_schedule_session_recovery(struct nfs4_session *, int);
314#else 317#else
315static inline void nfs4_schedule_session_recovery(struct nfs4_session *session) 318static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err)
316{ 319{
317} 320}
318#endif /* CONFIG_NFS_V4_1 */ 321#endif /* CONFIG_NFS_V4_1 */
@@ -334,7 +337,7 @@ extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs
334extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); 337extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
335extern void nfs41_handle_recall_slot(struct nfs_client *clp); 338extern void nfs41_handle_recall_slot(struct nfs_client *clp);
336extern void nfs41_handle_server_scope(struct nfs_client *, 339extern void nfs41_handle_server_scope(struct nfs_client *,
337 struct server_scope **); 340 struct nfs41_server_scope **);
338extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); 341extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
339extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); 342extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
340extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *, 343extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *,
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 5acfd9ea8a31..e1340293872c 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -82,29 +82,76 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
82 BUG(); 82 BUG();
83} 83}
84 84
85static void filelayout_reset_write(struct nfs_write_data *data)
86{
87 struct nfs_pgio_header *hdr = data->header;
88 struct rpc_task *task = &data->task;
89
90 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
91 dprintk("%s Reset task %5u for i/o through MDS "
92 "(req %s/%lld, %u bytes @ offset %llu)\n", __func__,
93 data->task.tk_pid,
94 hdr->inode->i_sb->s_id,
95 (long long)NFS_FILEID(hdr->inode),
96 data->args.count,
97 (unsigned long long)data->args.offset);
98
99 task->tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
100 &hdr->pages,
101 hdr->completion_ops);
102 }
103}
104
105static void filelayout_reset_read(struct nfs_read_data *data)
106{
107 struct nfs_pgio_header *hdr = data->header;
108 struct rpc_task *task = &data->task;
109
110 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
111 dprintk("%s Reset task %5u for i/o through MDS "
112 "(req %s/%lld, %u bytes @ offset %llu)\n", __func__,
113 data->task.tk_pid,
114 hdr->inode->i_sb->s_id,
115 (long long)NFS_FILEID(hdr->inode),
116 data->args.count,
117 (unsigned long long)data->args.offset);
118
119 task->tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
120 &hdr->pages,
121 hdr->completion_ops);
122 }
123}
124
85static int filelayout_async_handle_error(struct rpc_task *task, 125static int filelayout_async_handle_error(struct rpc_task *task,
86 struct nfs4_state *state, 126 struct nfs4_state *state,
87 struct nfs_client *clp, 127 struct nfs_client *clp,
88 int *reset) 128 struct pnfs_layout_segment *lseg)
89{ 129{
90 struct nfs_server *mds_server = NFS_SERVER(state->inode); 130 struct inode *inode = lseg->pls_layout->plh_inode;
131 struct nfs_server *mds_server = NFS_SERVER(inode);
132 struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
91 struct nfs_client *mds_client = mds_server->nfs_client; 133 struct nfs_client *mds_client = mds_server->nfs_client;
134 struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
92 135
93 if (task->tk_status >= 0) 136 if (task->tk_status >= 0)
94 return 0; 137 return 0;
95 *reset = 0;
96 138
97 switch (task->tk_status) { 139 switch (task->tk_status) {
98 /* MDS state errors */ 140 /* MDS state errors */
99 case -NFS4ERR_DELEG_REVOKED: 141 case -NFS4ERR_DELEG_REVOKED:
100 case -NFS4ERR_ADMIN_REVOKED: 142 case -NFS4ERR_ADMIN_REVOKED:
101 case -NFS4ERR_BAD_STATEID: 143 case -NFS4ERR_BAD_STATEID:
144 if (state == NULL)
145 break;
102 nfs_remove_bad_delegation(state->inode); 146 nfs_remove_bad_delegation(state->inode);
103 case -NFS4ERR_OPENMODE: 147 case -NFS4ERR_OPENMODE:
148 if (state == NULL)
149 break;
104 nfs4_schedule_stateid_recovery(mds_server, state); 150 nfs4_schedule_stateid_recovery(mds_server, state);
105 goto wait_on_recovery; 151 goto wait_on_recovery;
106 case -NFS4ERR_EXPIRED: 152 case -NFS4ERR_EXPIRED:
107 nfs4_schedule_stateid_recovery(mds_server, state); 153 if (state != NULL)
154 nfs4_schedule_stateid_recovery(mds_server, state);
108 nfs4_schedule_lease_recovery(mds_client); 155 nfs4_schedule_lease_recovery(mds_client);
109 goto wait_on_recovery; 156 goto wait_on_recovery;
110 /* DS session errors */ 157 /* DS session errors */
@@ -118,7 +165,7 @@ static int filelayout_async_handle_error(struct rpc_task *task,
118 dprintk("%s ERROR %d, Reset session. Exchangeid " 165 dprintk("%s ERROR %d, Reset session. Exchangeid "
119 "flags 0x%x\n", __func__, task->tk_status, 166 "flags 0x%x\n", __func__, task->tk_status,
120 clp->cl_exchange_flags); 167 clp->cl_exchange_flags);
121 nfs4_schedule_session_recovery(clp->cl_session); 168 nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
122 break; 169 break;
123 case -NFS4ERR_DELAY: 170 case -NFS4ERR_DELAY:
124 case -NFS4ERR_GRACE: 171 case -NFS4ERR_GRACE:
@@ -127,11 +174,48 @@ static int filelayout_async_handle_error(struct rpc_task *task,
127 break; 174 break;
128 case -NFS4ERR_RETRY_UNCACHED_REP: 175 case -NFS4ERR_RETRY_UNCACHED_REP:
129 break; 176 break;
177 /* Invalidate Layout errors */
178 case -NFS4ERR_PNFS_NO_LAYOUT:
179 case -ESTALE: /* mapped NFS4ERR_STALE */
180 case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */
181 case -EISDIR: /* mapped NFS4ERR_ISDIR */
182 case -NFS4ERR_FHEXPIRED:
183 case -NFS4ERR_WRONG_TYPE:
184 dprintk("%s Invalid layout error %d\n", __func__,
185 task->tk_status);
186 /*
187 * Destroy layout so new i/o will get a new layout.
188 * Layout will not be destroyed until all current lseg
189 * references are put. Mark layout as invalid to resend failed
190 * i/o and all i/o waiting on the slot table to the MDS until
191 * layout is destroyed and a new valid layout is obtained.
192 */
193 set_bit(NFS_LAYOUT_INVALID,
194 &NFS_I(inode)->layout->plh_flags);
195 pnfs_destroy_layout(NFS_I(inode));
196 rpc_wake_up(&tbl->slot_tbl_waitq);
197 goto reset;
198 /* RPC connection errors */
199 case -ECONNREFUSED:
200 case -EHOSTDOWN:
201 case -EHOSTUNREACH:
202 case -ENETUNREACH:
203 case -EIO:
204 case -ETIMEDOUT:
205 case -EPIPE:
206 dprintk("%s DS connection error %d\n", __func__,
207 task->tk_status);
208 if (!filelayout_test_devid_invalid(devid))
209 _pnfs_return_layout(inode);
210 filelayout_mark_devid_invalid(devid);
211 rpc_wake_up(&tbl->slot_tbl_waitq);
212 nfs4_ds_disconnect(clp);
213 /* fall through */
130 default: 214 default:
131 dprintk("%s DS error. Retry through MDS %d\n", __func__, 215reset:
216 dprintk("%s Retry through MDS. Error %d\n", __func__,
132 task->tk_status); 217 task->tk_status);
133 *reset = 1; 218 return -NFS4ERR_RESET_TO_MDS;
134 break;
135 } 219 }
136out: 220out:
137 task->tk_status = 0; 221 task->tk_status = 0;
@@ -148,18 +232,17 @@ wait_on_recovery:
148static int filelayout_read_done_cb(struct rpc_task *task, 232static int filelayout_read_done_cb(struct rpc_task *task,
149 struct nfs_read_data *data) 233 struct nfs_read_data *data)
150{ 234{
151 int reset = 0; 235 struct nfs_pgio_header *hdr = data->header;
236 int err;
152 237
153 dprintk("%s DS read\n", __func__); 238 err = filelayout_async_handle_error(task, data->args.context->state,
239 data->ds_clp, hdr->lseg);
154 240
155 if (filelayout_async_handle_error(task, data->args.context->state, 241 switch (err) {
156 data->ds_clp, &reset) == -EAGAIN) { 242 case -NFS4ERR_RESET_TO_MDS:
157 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", 243 filelayout_reset_read(data);
158 __func__, data->ds_clp, data->ds_clp->cl_session); 244 return task->tk_status;
159 if (reset) { 245 case -EAGAIN:
160 pnfs_set_lo_fail(data->lseg);
161 nfs4_reset_read(task, data);
162 }
163 rpc_restart_call_prepare(task); 246 rpc_restart_call_prepare(task);
164 return -EAGAIN; 247 return -EAGAIN;
165 } 248 }
@@ -175,13 +258,15 @@ static int filelayout_read_done_cb(struct rpc_task *task,
175static void 258static void
176filelayout_set_layoutcommit(struct nfs_write_data *wdata) 259filelayout_set_layoutcommit(struct nfs_write_data *wdata)
177{ 260{
178 if (FILELAYOUT_LSEG(wdata->lseg)->commit_through_mds || 261 struct nfs_pgio_header *hdr = wdata->header;
262
263 if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds ||
179 wdata->res.verf->committed == NFS_FILE_SYNC) 264 wdata->res.verf->committed == NFS_FILE_SYNC)
180 return; 265 return;
181 266
182 pnfs_set_layoutcommit(wdata); 267 pnfs_set_layoutcommit(wdata);
183 dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, wdata->inode->i_ino, 268 dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
184 (unsigned long) NFS_I(wdata->inode)->layout->plh_lwb); 269 (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
185} 270}
186 271
187/* 272/*
@@ -191,8 +276,14 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata)
191 */ 276 */
192static void filelayout_read_prepare(struct rpc_task *task, void *data) 277static void filelayout_read_prepare(struct rpc_task *task, void *data)
193{ 278{
194 struct nfs_read_data *rdata = (struct nfs_read_data *)data; 279 struct nfs_read_data *rdata = data;
195 280
281 if (filelayout_reset_to_mds(rdata->header->lseg)) {
282 dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
283 filelayout_reset_read(rdata);
284 rpc_exit(task, 0);
285 return;
286 }
196 rdata->read_done_cb = filelayout_read_done_cb; 287 rdata->read_done_cb = filelayout_read_done_cb;
197 288
198 if (nfs41_setup_sequence(rdata->ds_clp->cl_session, 289 if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
@@ -205,42 +296,47 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
205 296
206static void filelayout_read_call_done(struct rpc_task *task, void *data) 297static void filelayout_read_call_done(struct rpc_task *task, void *data)
207{ 298{
208 struct nfs_read_data *rdata = (struct nfs_read_data *)data; 299 struct nfs_read_data *rdata = data;
209 300
210 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); 301 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
211 302
303 if (test_bit(NFS_IOHDR_REDO, &rdata->header->flags) &&
304 task->tk_status == 0)
305 return;
306
212 /* Note this may cause RPC to be resent */ 307 /* Note this may cause RPC to be resent */
213 rdata->mds_ops->rpc_call_done(task, data); 308 rdata->header->mds_ops->rpc_call_done(task, data);
214} 309}
215 310
216static void filelayout_read_count_stats(struct rpc_task *task, void *data) 311static void filelayout_read_count_stats(struct rpc_task *task, void *data)
217{ 312{
218 struct nfs_read_data *rdata = (struct nfs_read_data *)data; 313 struct nfs_read_data *rdata = data;
219 314
220 rpc_count_iostats(task, NFS_SERVER(rdata->inode)->client->cl_metrics); 315 rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics);
221} 316}
222 317
223static void filelayout_read_release(void *data) 318static void filelayout_read_release(void *data)
224{ 319{
225 struct nfs_read_data *rdata = (struct nfs_read_data *)data; 320 struct nfs_read_data *rdata = data;
226 321
227 put_lseg(rdata->lseg); 322 nfs_put_client(rdata->ds_clp);
228 rdata->mds_ops->rpc_release(data); 323 rdata->header->mds_ops->rpc_release(data);
229} 324}
230 325
231static int filelayout_write_done_cb(struct rpc_task *task, 326static int filelayout_write_done_cb(struct rpc_task *task,
232 struct nfs_write_data *data) 327 struct nfs_write_data *data)
233{ 328{
234 int reset = 0; 329 struct nfs_pgio_header *hdr = data->header;
235 330 int err;
236 if (filelayout_async_handle_error(task, data->args.context->state, 331
237 data->ds_clp, &reset) == -EAGAIN) { 332 err = filelayout_async_handle_error(task, data->args.context->state,
238 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", 333 data->ds_clp, hdr->lseg);
239 __func__, data->ds_clp, data->ds_clp->cl_session); 334
240 if (reset) { 335 switch (err) {
241 pnfs_set_lo_fail(data->lseg); 336 case -NFS4ERR_RESET_TO_MDS:
242 nfs4_reset_write(task, data); 337 filelayout_reset_write(data);
243 } 338 return task->tk_status;
339 case -EAGAIN:
244 rpc_restart_call_prepare(task); 340 rpc_restart_call_prepare(task);
245 return -EAGAIN; 341 return -EAGAIN;
246 } 342 }
@@ -250,7 +346,7 @@ static int filelayout_write_done_cb(struct rpc_task *task,
250} 346}
251 347
252/* Fake up some data that will cause nfs_commit_release to retry the writes. */ 348/* Fake up some data that will cause nfs_commit_release to retry the writes. */
253static void prepare_to_resend_writes(struct nfs_write_data *data) 349static void prepare_to_resend_writes(struct nfs_commit_data *data)
254{ 350{
255 struct nfs_page *first = nfs_list_entry(data->pages.next); 351 struct nfs_page *first = nfs_list_entry(data->pages.next);
256 352
@@ -261,19 +357,19 @@ static void prepare_to_resend_writes(struct nfs_write_data *data)
261} 357}
262 358
263static int filelayout_commit_done_cb(struct rpc_task *task, 359static int filelayout_commit_done_cb(struct rpc_task *task,
264 struct nfs_write_data *data) 360 struct nfs_commit_data *data)
265{ 361{
266 int reset = 0; 362 int err;
267 363
268 if (filelayout_async_handle_error(task, data->args.context->state, 364 err = filelayout_async_handle_error(task, NULL, data->ds_clp,
269 data->ds_clp, &reset) == -EAGAIN) { 365 data->lseg);
270 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", 366
271 __func__, data->ds_clp, data->ds_clp->cl_session); 367 switch (err) {
272 if (reset) { 368 case -NFS4ERR_RESET_TO_MDS:
273 prepare_to_resend_writes(data); 369 prepare_to_resend_writes(data);
274 pnfs_set_lo_fail(data->lseg); 370 return -EAGAIN;
275 } else 371 case -EAGAIN:
276 rpc_restart_call_prepare(task); 372 rpc_restart_call_prepare(task);
277 return -EAGAIN; 373 return -EAGAIN;
278 } 374 }
279 375
@@ -282,8 +378,14 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
282 378
283static void filelayout_write_prepare(struct rpc_task *task, void *data) 379static void filelayout_write_prepare(struct rpc_task *task, void *data)
284{ 380{
285 struct nfs_write_data *wdata = (struct nfs_write_data *)data; 381 struct nfs_write_data *wdata = data;
286 382
383 if (filelayout_reset_to_mds(wdata->header->lseg)) {
384 dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
385 filelayout_reset_write(wdata);
386 rpc_exit(task, 0);
387 return;
388 }
287 if (nfs41_setup_sequence(wdata->ds_clp->cl_session, 389 if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
288 &wdata->args.seq_args, &wdata->res.seq_res, 390 &wdata->args.seq_args, &wdata->res.seq_res,
289 task)) 391 task))
@@ -294,36 +396,66 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data)
294 396
295static void filelayout_write_call_done(struct rpc_task *task, void *data) 397static void filelayout_write_call_done(struct rpc_task *task, void *data)
296{ 398{
297 struct nfs_write_data *wdata = (struct nfs_write_data *)data; 399 struct nfs_write_data *wdata = data;
400
401 if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) &&
402 task->tk_status == 0)
403 return;
298 404
299 /* Note this may cause RPC to be resent */ 405 /* Note this may cause RPC to be resent */
300 wdata->mds_ops->rpc_call_done(task, data); 406 wdata->header->mds_ops->rpc_call_done(task, data);
301} 407}
302 408
303static void filelayout_write_count_stats(struct rpc_task *task, void *data) 409static void filelayout_write_count_stats(struct rpc_task *task, void *data)
304{ 410{
305 struct nfs_write_data *wdata = (struct nfs_write_data *)data; 411 struct nfs_write_data *wdata = data;
306 412
307 rpc_count_iostats(task, NFS_SERVER(wdata->inode)->client->cl_metrics); 413 rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics);
308} 414}
309 415
310static void filelayout_write_release(void *data) 416static void filelayout_write_release(void *data)
311{ 417{
312 struct nfs_write_data *wdata = (struct nfs_write_data *)data; 418 struct nfs_write_data *wdata = data;
419
420 nfs_put_client(wdata->ds_clp);
421 wdata->header->mds_ops->rpc_release(data);
422}
423
424static void filelayout_commit_prepare(struct rpc_task *task, void *data)
425{
426 struct nfs_commit_data *wdata = data;
313 427
314 put_lseg(wdata->lseg); 428 if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
315 wdata->mds_ops->rpc_release(data); 429 &wdata->args.seq_args, &wdata->res.seq_res,
430 task))
431 return;
432
433 rpc_call_start(task);
434}
435
436static void filelayout_write_commit_done(struct rpc_task *task, void *data)
437{
438 struct nfs_commit_data *wdata = data;
439
440 /* Note this may cause RPC to be resent */
441 wdata->mds_ops->rpc_call_done(task, data);
442}
443
444static void filelayout_commit_count_stats(struct rpc_task *task, void *data)
445{
446 struct nfs_commit_data *cdata = data;
447
448 rpc_count_iostats(task, NFS_SERVER(cdata->inode)->client->cl_metrics);
316} 449}
317 450
318static void filelayout_commit_release(void *data) 451static void filelayout_commit_release(void *calldata)
319{ 452{
320 struct nfs_write_data *wdata = (struct nfs_write_data *)data; 453 struct nfs_commit_data *data = calldata;
321 454
322 nfs_commit_release_pages(wdata); 455 data->completion_ops->completion(data);
323 if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding)) 456 put_lseg(data->lseg);
324 nfs_commit_clear_lock(NFS_I(wdata->inode)); 457 nfs_put_client(data->ds_clp);
325 put_lseg(wdata->lseg); 458 nfs_commitdata_release(data);
326 nfs_commitdata_release(wdata);
327} 459}
328 460
329static const struct rpc_call_ops filelayout_read_call_ops = { 461static const struct rpc_call_ops filelayout_read_call_ops = {
@@ -341,16 +473,17 @@ static const struct rpc_call_ops filelayout_write_call_ops = {
341}; 473};
342 474
343static const struct rpc_call_ops filelayout_commit_call_ops = { 475static const struct rpc_call_ops filelayout_commit_call_ops = {
344 .rpc_call_prepare = filelayout_write_prepare, 476 .rpc_call_prepare = filelayout_commit_prepare,
345 .rpc_call_done = filelayout_write_call_done, 477 .rpc_call_done = filelayout_write_commit_done,
346 .rpc_count_stats = filelayout_write_count_stats, 478 .rpc_count_stats = filelayout_commit_count_stats,
347 .rpc_release = filelayout_commit_release, 479 .rpc_release = filelayout_commit_release,
348}; 480};
349 481
350static enum pnfs_try_status 482static enum pnfs_try_status
351filelayout_read_pagelist(struct nfs_read_data *data) 483filelayout_read_pagelist(struct nfs_read_data *data)
352{ 484{
353 struct pnfs_layout_segment *lseg = data->lseg; 485 struct nfs_pgio_header *hdr = data->header;
486 struct pnfs_layout_segment *lseg = hdr->lseg;
354 struct nfs4_pnfs_ds *ds; 487 struct nfs4_pnfs_ds *ds;
355 loff_t offset = data->args.offset; 488 loff_t offset = data->args.offset;
356 u32 j, idx; 489 u32 j, idx;
@@ -358,25 +491,20 @@ filelayout_read_pagelist(struct nfs_read_data *data)
358 int status; 491 int status;
359 492
360 dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n", 493 dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
361 __func__, data->inode->i_ino, 494 __func__, hdr->inode->i_ino,
362 data->args.pgbase, (size_t)data->args.count, offset); 495 data->args.pgbase, (size_t)data->args.count, offset);
363 496
364 if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
365 return PNFS_NOT_ATTEMPTED;
366
367 /* Retrieve the correct rpc_client for the byte range */ 497 /* Retrieve the correct rpc_client for the byte range */
368 j = nfs4_fl_calc_j_index(lseg, offset); 498 j = nfs4_fl_calc_j_index(lseg, offset);
369 idx = nfs4_fl_calc_ds_index(lseg, j); 499 idx = nfs4_fl_calc_ds_index(lseg, j);
370 ds = nfs4_fl_prepare_ds(lseg, idx); 500 ds = nfs4_fl_prepare_ds(lseg, idx);
371 if (!ds) { 501 if (!ds)
372 /* Either layout fh index faulty, or ds connect failed */
373 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
374 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
375 return PNFS_NOT_ATTEMPTED; 502 return PNFS_NOT_ATTEMPTED;
376 } 503 dprintk("%s USE DS: %s cl_count %d\n", __func__,
377 dprintk("%s USE DS: %s\n", __func__, ds->ds_remotestr); 504 ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
378 505
379 /* No multipath support. Use first DS */ 506 /* No multipath support. Use first DS */
507 atomic_inc(&ds->ds_clp->cl_count);
380 data->ds_clp = ds->ds_clp; 508 data->ds_clp = ds->ds_clp;
381 fh = nfs4_fl_select_ds_fh(lseg, j); 509 fh = nfs4_fl_select_ds_fh(lseg, j);
382 if (fh) 510 if (fh)
@@ -386,8 +514,8 @@ filelayout_read_pagelist(struct nfs_read_data *data)
386 data->mds_offset = offset; 514 data->mds_offset = offset;
387 515
388 /* Perform an asynchronous read to ds */ 516 /* Perform an asynchronous read to ds */
389 status = nfs_initiate_read(data, ds->ds_clp->cl_rpcclient, 517 status = nfs_initiate_read(ds->ds_clp->cl_rpcclient, data,
390 &filelayout_read_call_ops); 518 &filelayout_read_call_ops, RPC_TASK_SOFTCONN);
391 BUG_ON(status != 0); 519 BUG_ON(status != 0);
392 return PNFS_ATTEMPTED; 520 return PNFS_ATTEMPTED;
393} 521}
@@ -396,32 +524,26 @@ filelayout_read_pagelist(struct nfs_read_data *data)
396static enum pnfs_try_status 524static enum pnfs_try_status
397filelayout_write_pagelist(struct nfs_write_data *data, int sync) 525filelayout_write_pagelist(struct nfs_write_data *data, int sync)
398{ 526{
399 struct pnfs_layout_segment *lseg = data->lseg; 527 struct nfs_pgio_header *hdr = data->header;
528 struct pnfs_layout_segment *lseg = hdr->lseg;
400 struct nfs4_pnfs_ds *ds; 529 struct nfs4_pnfs_ds *ds;
401 loff_t offset = data->args.offset; 530 loff_t offset = data->args.offset;
402 u32 j, idx; 531 u32 j, idx;
403 struct nfs_fh *fh; 532 struct nfs_fh *fh;
404 int status; 533 int status;
405 534
406 if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
407 return PNFS_NOT_ATTEMPTED;
408
409 /* Retrieve the correct rpc_client for the byte range */ 535 /* Retrieve the correct rpc_client for the byte range */
410 j = nfs4_fl_calc_j_index(lseg, offset); 536 j = nfs4_fl_calc_j_index(lseg, offset);
411 idx = nfs4_fl_calc_ds_index(lseg, j); 537 idx = nfs4_fl_calc_ds_index(lseg, j);
412 ds = nfs4_fl_prepare_ds(lseg, idx); 538 ds = nfs4_fl_prepare_ds(lseg, idx);
413 if (!ds) { 539 if (!ds)
414 printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n",
415 __func__);
416 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
417 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
418 return PNFS_NOT_ATTEMPTED; 540 return PNFS_NOT_ATTEMPTED;
419 } 541 dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n",
420 dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__, 542 __func__, hdr->inode->i_ino, sync, (size_t) data->args.count,
421 data->inode->i_ino, sync, (size_t) data->args.count, offset, 543 offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
422 ds->ds_remotestr);
423 544
424 data->write_done_cb = filelayout_write_done_cb; 545 data->write_done_cb = filelayout_write_done_cb;
546 atomic_inc(&ds->ds_clp->cl_count);
425 data->ds_clp = ds->ds_clp; 547 data->ds_clp = ds->ds_clp;
426 fh = nfs4_fl_select_ds_fh(lseg, j); 548 fh = nfs4_fl_select_ds_fh(lseg, j);
427 if (fh) 549 if (fh)
@@ -433,8 +555,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
433 data->args.offset = filelayout_get_dserver_offset(lseg, offset); 555 data->args.offset = filelayout_get_dserver_offset(lseg, offset);
434 556
435 /* Perform an asynchronous write */ 557 /* Perform an asynchronous write */
436 status = nfs_initiate_write(data, ds->ds_clp->cl_rpcclient, 558 status = nfs_initiate_write(ds->ds_clp->cl_rpcclient, data,
437 &filelayout_write_call_ops, sync); 559 &filelayout_write_call_ops, sync,
560 RPC_TASK_SOFTCONN);
438 BUG_ON(status != 0); 561 BUG_ON(status != 0);
439 return PNFS_ATTEMPTED; 562 return PNFS_ATTEMPTED;
440} 563}
@@ -650,10 +773,65 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg)
650 773
651 dprintk("--> %s\n", __func__); 774 dprintk("--> %s\n", __func__);
652 nfs4_fl_put_deviceid(fl->dsaddr); 775 nfs4_fl_put_deviceid(fl->dsaddr);
653 kfree(fl->commit_buckets); 776 /* This assumes a single RW lseg */
777 if (lseg->pls_range.iomode == IOMODE_RW) {
778 struct nfs4_filelayout *flo;
779
780 flo = FILELAYOUT_FROM_HDR(lseg->pls_layout);
781 flo->commit_info.nbuckets = 0;
782 kfree(flo->commit_info.buckets);
783 flo->commit_info.buckets = NULL;
784 }
654 _filelayout_free_lseg(fl); 785 _filelayout_free_lseg(fl);
655} 786}
656 787
788static int
789filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
790 struct nfs_commit_info *cinfo,
791 gfp_t gfp_flags)
792{
793 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
794 struct pnfs_commit_bucket *buckets;
795 int size;
796
797 if (fl->commit_through_mds)
798 return 0;
799 if (cinfo->ds->nbuckets != 0) {
800 /* This assumes there is only one IOMODE_RW lseg. What
801 * we really want to do is have a layout_hdr level
802 * dictionary of <multipath_list4, fh> keys, each
803 * associated with a struct list_head, populated by calls
804 * to filelayout_write_pagelist().
805 * */
806 return 0;
807 }
808
809 size = (fl->stripe_type == STRIPE_SPARSE) ?
810 fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
811
812 buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket),
813 gfp_flags);
814 if (!buckets)
815 return -ENOMEM;
816 else {
817 int i;
818
819 spin_lock(cinfo->lock);
820 if (cinfo->ds->nbuckets != 0)
821 kfree(buckets);
822 else {
823 cinfo->ds->buckets = buckets;
824 cinfo->ds->nbuckets = size;
825 for (i = 0; i < size; i++) {
826 INIT_LIST_HEAD(&buckets[i].written);
827 INIT_LIST_HEAD(&buckets[i].committing);
828 }
829 }
830 spin_unlock(cinfo->lock);
831 return 0;
832 }
833}
834
657static struct pnfs_layout_segment * 835static struct pnfs_layout_segment *
658filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, 836filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
659 struct nfs4_layoutget_res *lgr, 837 struct nfs4_layoutget_res *lgr,
@@ -673,29 +851,6 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
673 _filelayout_free_lseg(fl); 851 _filelayout_free_lseg(fl);
674 return NULL; 852 return NULL;
675 } 853 }
676
677 /* This assumes there is only one IOMODE_RW lseg. What
678 * we really want to do is have a layout_hdr level
679 * dictionary of <multipath_list4, fh> keys, each
680 * associated with a struct list_head, populated by calls
681 * to filelayout_write_pagelist().
682 * */
683 if ((!fl->commit_through_mds) && (lgr->range.iomode == IOMODE_RW)) {
684 int i;
685 int size = (fl->stripe_type == STRIPE_SPARSE) ?
686 fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
687
688 fl->commit_buckets = kcalloc(size, sizeof(struct nfs4_fl_commit_bucket), gfp_flags);
689 if (!fl->commit_buckets) {
690 filelayout_free_lseg(&fl->generic_hdr);
691 return NULL;
692 }
693 fl->number_of_buckets = size;
694 for (i = 0; i < size; i++) {
695 INIT_LIST_HEAD(&fl->commit_buckets[i].written);
696 INIT_LIST_HEAD(&fl->commit_buckets[i].committing);
697 }
698 }
699 return &fl->generic_hdr; 854 return &fl->generic_hdr;
700} 855}
701 856
@@ -716,8 +871,8 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
716 !nfs_generic_pg_test(pgio, prev, req)) 871 !nfs_generic_pg_test(pgio, prev, req))
717 return false; 872 return false;
718 873
719 p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT; 874 p_stripe = (u64)req_offset(prev);
720 r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT; 875 r_stripe = (u64)req_offset(req);
721 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; 876 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
722 877
723 do_div(p_stripe, stripe_unit); 878 do_div(p_stripe, stripe_unit);
@@ -732,6 +887,16 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
732{ 887{
733 BUG_ON(pgio->pg_lseg != NULL); 888 BUG_ON(pgio->pg_lseg != NULL);
734 889
890 if (req->wb_offset != req->wb_pgbase) {
891 /*
892 * Handling unaligned pages is difficult, because have to
893 * somehow split a req in two in certain cases in the
894 * pg.test code. Avoid this by just not using pnfs
895 * in this case.
896 */
897 nfs_pageio_reset_read_mds(pgio);
898 return;
899 }
735 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 900 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
736 req->wb_context, 901 req->wb_context,
737 0, 902 0,
@@ -747,8 +912,13 @@ static void
747filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, 912filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
748 struct nfs_page *req) 913 struct nfs_page *req)
749{ 914{
915 struct nfs_commit_info cinfo;
916 int status;
917
750 BUG_ON(pgio->pg_lseg != NULL); 918 BUG_ON(pgio->pg_lseg != NULL);
751 919
920 if (req->wb_offset != req->wb_pgbase)
921 goto out_mds;
752 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 922 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
753 req->wb_context, 923 req->wb_context,
754 0, 924 0,
@@ -757,7 +927,17 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
757 GFP_NOFS); 927 GFP_NOFS);
758 /* If no lseg, fall back to write through mds */ 928 /* If no lseg, fall back to write through mds */
759 if (pgio->pg_lseg == NULL) 929 if (pgio->pg_lseg == NULL)
760 nfs_pageio_reset_write_mds(pgio); 930 goto out_mds;
931 nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq);
932 status = filelayout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS);
933 if (status < 0) {
934 put_lseg(pgio->pg_lseg);
935 pgio->pg_lseg = NULL;
936 goto out_mds;
937 }
938 return;
939out_mds:
940 nfs_pageio_reset_write_mds(pgio);
761} 941}
762 942
763static const struct nfs_pageio_ops filelayout_pg_read_ops = { 943static const struct nfs_pageio_ops filelayout_pg_read_ops = {
@@ -784,43 +964,42 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
784 * If this will make the bucket empty, it will need to put the lseg reference. 964 * If this will make the bucket empty, it will need to put the lseg reference.
785 */ 965 */
786static void 966static void
787filelayout_clear_request_commit(struct nfs_page *req) 967filelayout_clear_request_commit(struct nfs_page *req,
968 struct nfs_commit_info *cinfo)
788{ 969{
789 struct pnfs_layout_segment *freeme = NULL; 970 struct pnfs_layout_segment *freeme = NULL;
790 struct inode *inode = req->wb_context->dentry->d_inode;
791 971
792 spin_lock(&inode->i_lock); 972 spin_lock(cinfo->lock);
793 if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) 973 if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
794 goto out; 974 goto out;
975 cinfo->ds->nwritten--;
795 if (list_is_singular(&req->wb_list)) { 976 if (list_is_singular(&req->wb_list)) {
796 struct pnfs_layout_segment *lseg; 977 struct pnfs_commit_bucket *bucket;
797 978
798 /* From here we can find the bucket, but for the moment, 979 bucket = list_first_entry(&req->wb_list,
799 * since there is only one relevant lseg... 980 struct pnfs_commit_bucket,
800 */ 981 written);
801 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { 982 freeme = bucket->wlseg;
802 if (lseg->pls_range.iomode == IOMODE_RW) { 983 bucket->wlseg = NULL;
803 freeme = lseg;
804 break;
805 }
806 }
807 } 984 }
808out: 985out:
809 nfs_request_remove_commit_list(req); 986 nfs_request_remove_commit_list(req, cinfo);
810 spin_unlock(&inode->i_lock); 987 spin_unlock(cinfo->lock);
811 put_lseg(freeme); 988 put_lseg(freeme);
812} 989}
813 990
814static struct list_head * 991static struct list_head *
815filelayout_choose_commit_list(struct nfs_page *req, 992filelayout_choose_commit_list(struct nfs_page *req,
816 struct pnfs_layout_segment *lseg) 993 struct pnfs_layout_segment *lseg,
994 struct nfs_commit_info *cinfo)
817{ 995{
818 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); 996 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
819 u32 i, j; 997 u32 i, j;
820 struct list_head *list; 998 struct list_head *list;
999 struct pnfs_commit_bucket *buckets;
821 1000
822 if (fl->commit_through_mds) 1001 if (fl->commit_through_mds)
823 return &NFS_I(req->wb_context->dentry->d_inode)->commit_list; 1002 return &cinfo->mds->list;
824 1003
825 /* Note that we are calling nfs4_fl_calc_j_index on each page 1004 /* Note that we are calling nfs4_fl_calc_j_index on each page
826 * that ends up being committed to a data server. An attractive 1005 * that ends up being committed to a data server. An attractive
@@ -828,31 +1007,33 @@ filelayout_choose_commit_list(struct nfs_page *req,
828 * to store the value calculated in filelayout_write_pagelist 1007 * to store the value calculated in filelayout_write_pagelist
829 * and just use that here. 1008 * and just use that here.
830 */ 1009 */
831 j = nfs4_fl_calc_j_index(lseg, 1010 j = nfs4_fl_calc_j_index(lseg, req_offset(req));
832 (loff_t)req->wb_index << PAGE_CACHE_SHIFT);
833 i = select_bucket_index(fl, j); 1011 i = select_bucket_index(fl, j);
834 list = &fl->commit_buckets[i].written; 1012 buckets = cinfo->ds->buckets;
1013 list = &buckets[i].written;
835 if (list_empty(list)) { 1014 if (list_empty(list)) {
836 /* Non-empty buckets hold a reference on the lseg. That ref 1015 /* Non-empty buckets hold a reference on the lseg. That ref
837 * is normally transferred to the COMMIT call and released 1016 * is normally transferred to the COMMIT call and released
838 * there. It could also be released if the last req is pulled 1017 * there. It could also be released if the last req is pulled
839 * off due to a rewrite, in which case it will be done in 1018 * off due to a rewrite, in which case it will be done in
840 * filelayout_remove_commit_req 1019 * filelayout_clear_request_commit
841 */ 1020 */
842 get_lseg(lseg); 1021 buckets[i].wlseg = get_lseg(lseg);
843 } 1022 }
844 set_bit(PG_COMMIT_TO_DS, &req->wb_flags); 1023 set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
1024 cinfo->ds->nwritten++;
845 return list; 1025 return list;
846} 1026}
847 1027
848static void 1028static void
849filelayout_mark_request_commit(struct nfs_page *req, 1029filelayout_mark_request_commit(struct nfs_page *req,
850 struct pnfs_layout_segment *lseg) 1030 struct pnfs_layout_segment *lseg,
1031 struct nfs_commit_info *cinfo)
851{ 1032{
852 struct list_head *list; 1033 struct list_head *list;
853 1034
854 list = filelayout_choose_commit_list(req, lseg); 1035 list = filelayout_choose_commit_list(req, lseg, cinfo);
855 nfs_request_add_commit_list(req, list); 1036 nfs_request_add_commit_list(req, list, cinfo);
856} 1037}
857 1038
858static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) 1039static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
@@ -880,7 +1061,7 @@ select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i)
880 return flseg->fh_array[i]; 1061 return flseg->fh_array[i];
881} 1062}
882 1063
883static int filelayout_initiate_commit(struct nfs_write_data *data, int how) 1064static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
884{ 1065{
885 struct pnfs_layout_segment *lseg = data->lseg; 1066 struct pnfs_layout_segment *lseg = data->lseg;
886 struct nfs4_pnfs_ds *ds; 1067 struct nfs4_pnfs_ds *ds;
@@ -890,135 +1071,138 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
890 idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); 1071 idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
891 ds = nfs4_fl_prepare_ds(lseg, idx); 1072 ds = nfs4_fl_prepare_ds(lseg, idx);
892 if (!ds) { 1073 if (!ds) {
893 printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n",
894 __func__);
895 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
896 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
897 prepare_to_resend_writes(data); 1074 prepare_to_resend_writes(data);
898 filelayout_commit_release(data); 1075 filelayout_commit_release(data);
899 return -EAGAIN; 1076 return -EAGAIN;
900 } 1077 }
901 dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how); 1078 dprintk("%s ino %lu, how %d cl_count %d\n", __func__,
902 data->write_done_cb = filelayout_commit_done_cb; 1079 data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count));
1080 data->commit_done_cb = filelayout_commit_done_cb;
1081 atomic_inc(&ds->ds_clp->cl_count);
903 data->ds_clp = ds->ds_clp; 1082 data->ds_clp = ds->ds_clp;
904 fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); 1083 fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
905 if (fh) 1084 if (fh)
906 data->args.fh = fh; 1085 data->args.fh = fh;
907 return nfs_initiate_commit(data, ds->ds_clp->cl_rpcclient, 1086 return nfs_initiate_commit(ds->ds_clp->cl_rpcclient, data,
908 &filelayout_commit_call_ops, how); 1087 &filelayout_commit_call_ops, how,
909} 1088 RPC_TASK_SOFTCONN);
910
911/*
912 * This is only useful while we are using whole file layouts.
913 */
914static struct pnfs_layout_segment *
915find_only_write_lseg_locked(struct inode *inode)
916{
917 struct pnfs_layout_segment *lseg;
918
919 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
920 if (lseg->pls_range.iomode == IOMODE_RW)
921 return lseg;
922 return NULL;
923}
924
925static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode)
926{
927 struct pnfs_layout_segment *rv;
928
929 spin_lock(&inode->i_lock);
930 rv = find_only_write_lseg_locked(inode);
931 if (rv)
932 get_lseg(rv);
933 spin_unlock(&inode->i_lock);
934 return rv;
935} 1089}
936 1090
937static int 1091static int
938filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max, 1092transfer_commit_list(struct list_head *src, struct list_head *dst,
939 spinlock_t *lock) 1093 struct nfs_commit_info *cinfo, int max)
940{ 1094{
941 struct list_head *src = &bucket->written;
942 struct list_head *dst = &bucket->committing;
943 struct nfs_page *req, *tmp; 1095 struct nfs_page *req, *tmp;
944 int ret = 0; 1096 int ret = 0;
945 1097
946 list_for_each_entry_safe(req, tmp, src, wb_list) { 1098 list_for_each_entry_safe(req, tmp, src, wb_list) {
947 if (!nfs_lock_request(req)) 1099 if (!nfs_lock_request(req))
948 continue; 1100 continue;
949 if (cond_resched_lock(lock)) 1101 kref_get(&req->wb_kref);
1102 if (cond_resched_lock(cinfo->lock))
950 list_safe_reset_next(req, tmp, wb_list); 1103 list_safe_reset_next(req, tmp, wb_list);
951 nfs_request_remove_commit_list(req); 1104 nfs_request_remove_commit_list(req, cinfo);
952 clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); 1105 clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
953 nfs_list_add_request(req, dst); 1106 nfs_list_add_request(req, dst);
954 ret++; 1107 ret++;
955 if (ret == max) 1108 if ((ret == max) && !cinfo->dreq)
956 break; 1109 break;
957 } 1110 }
958 return ret; 1111 return ret;
959} 1112}
960 1113
1114static int
1115filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
1116 struct nfs_commit_info *cinfo,
1117 int max)
1118{
1119 struct list_head *src = &bucket->written;
1120 struct list_head *dst = &bucket->committing;
1121 int ret;
1122
1123 ret = transfer_commit_list(src, dst, cinfo, max);
1124 if (ret) {
1125 cinfo->ds->nwritten -= ret;
1126 cinfo->ds->ncommitting += ret;
1127 bucket->clseg = bucket->wlseg;
1128 if (list_empty(src))
1129 bucket->wlseg = NULL;
1130 else
1131 get_lseg(bucket->clseg);
1132 }
1133 return ret;
1134}
1135
961/* Move reqs from written to committing lists, returning count of number moved. 1136/* Move reqs from written to committing lists, returning count of number moved.
962 * Note called with i_lock held. 1137 * Note called with cinfo->lock held.
963 */ 1138 */
964static int filelayout_scan_commit_lists(struct inode *inode, int max, 1139static int filelayout_scan_commit_lists(struct nfs_commit_info *cinfo,
965 spinlock_t *lock) 1140 int max)
966{ 1141{
967 struct pnfs_layout_segment *lseg;
968 struct nfs4_filelayout_segment *fl;
969 int i, rv = 0, cnt; 1142 int i, rv = 0, cnt;
970 1143
971 lseg = find_only_write_lseg_locked(inode); 1144 for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) {
972 if (!lseg) 1145 cnt = filelayout_scan_ds_commit_list(&cinfo->ds->buckets[i],
973 goto out_done; 1146 cinfo, max);
974 fl = FILELAYOUT_LSEG(lseg);
975 if (fl->commit_through_mds)
976 goto out_done;
977 for (i = 0; i < fl->number_of_buckets && max != 0; i++) {
978 cnt = filelayout_scan_ds_commit_list(&fl->commit_buckets[i],
979 max, lock);
980 max -= cnt; 1147 max -= cnt;
981 rv += cnt; 1148 rv += cnt;
982 } 1149 }
983out_done:
984 return rv; 1150 return rv;
985} 1151}
986 1152
1153/* Pull everything off the committing lists and dump into @dst */
1154static void filelayout_recover_commit_reqs(struct list_head *dst,
1155 struct nfs_commit_info *cinfo)
1156{
1157 struct pnfs_commit_bucket *b;
1158 int i;
1159
1160 /* NOTE cinfo->lock is NOT held, relying on fact that this is
1161 * only called on single thread per dreq.
1162 * Can't take the lock because need to do put_lseg
1163 */
1164 for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
1165 if (transfer_commit_list(&b->written, dst, cinfo, 0)) {
1166 BUG_ON(!list_empty(&b->written));
1167 put_lseg(b->wlseg);
1168 b->wlseg = NULL;
1169 }
1170 }
1171 cinfo->ds->nwritten = 0;
1172}
1173
987static unsigned int 1174static unsigned int
988alloc_ds_commits(struct inode *inode, struct list_head *list) 1175alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
989{ 1176{
990 struct pnfs_layout_segment *lseg; 1177 struct pnfs_ds_commit_info *fl_cinfo;
991 struct nfs4_filelayout_segment *fl; 1178 struct pnfs_commit_bucket *bucket;
992 struct nfs_write_data *data; 1179 struct nfs_commit_data *data;
993 int i, j; 1180 int i, j;
994 unsigned int nreq = 0; 1181 unsigned int nreq = 0;
995 1182
996 /* Won't need this when non-whole file layout segments are supported 1183 fl_cinfo = cinfo->ds;
997 * instead we will use a pnfs_layout_hdr structure */ 1184 bucket = fl_cinfo->buckets;
998 lseg = find_only_write_lseg(inode); 1185 for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) {
999 if (!lseg) 1186 if (list_empty(&bucket->committing))
1000 return 0;
1001 fl = FILELAYOUT_LSEG(lseg);
1002 for (i = 0; i < fl->number_of_buckets; i++) {
1003 if (list_empty(&fl->commit_buckets[i].committing))
1004 continue; 1187 continue;
1005 data = nfs_commitdata_alloc(); 1188 data = nfs_commitdata_alloc();
1006 if (!data) 1189 if (!data)
1007 break; 1190 break;
1008 data->ds_commit_index = i; 1191 data->ds_commit_index = i;
1009 data->lseg = lseg; 1192 data->lseg = bucket->clseg;
1193 bucket->clseg = NULL;
1010 list_add(&data->pages, list); 1194 list_add(&data->pages, list);
1011 nreq++; 1195 nreq++;
1012 } 1196 }
1013 1197
1014 /* Clean up on error */ 1198 /* Clean up on error */
1015 for (j = i; j < fl->number_of_buckets; j++) { 1199 for (j = i; j < fl_cinfo->nbuckets; j++, bucket++) {
1016 if (list_empty(&fl->commit_buckets[i].committing)) 1200 if (list_empty(&bucket->committing))
1017 continue; 1201 continue;
1018 nfs_retry_commit(&fl->commit_buckets[i].committing, lseg); 1202 nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
1019 put_lseg(lseg); /* associated with emptying bucket */ 1203 put_lseg(bucket->clseg);
1204 bucket->clseg = NULL;
1020 } 1205 }
1021 put_lseg(lseg);
1022 /* Caller will clean up entries put on list */ 1206 /* Caller will clean up entries put on list */
1023 return nreq; 1207 return nreq;
1024} 1208}
@@ -1026,9 +1210,9 @@ alloc_ds_commits(struct inode *inode, struct list_head *list)
1026/* This follows nfs_commit_list pretty closely */ 1210/* This follows nfs_commit_list pretty closely */
1027static int 1211static int
1028filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, 1212filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
1029 int how) 1213 int how, struct nfs_commit_info *cinfo)
1030{ 1214{
1031 struct nfs_write_data *data, *tmp; 1215 struct nfs_commit_data *data, *tmp;
1032 LIST_HEAD(list); 1216 LIST_HEAD(list);
1033 unsigned int nreq = 0; 1217 unsigned int nreq = 0;
1034 1218
@@ -1039,30 +1223,34 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
1039 list_add(&data->pages, &list); 1223 list_add(&data->pages, &list);
1040 nreq++; 1224 nreq++;
1041 } else 1225 } else
1042 nfs_retry_commit(mds_pages, NULL); 1226 nfs_retry_commit(mds_pages, NULL, cinfo);
1043 } 1227 }
1044 1228
1045 nreq += alloc_ds_commits(inode, &list); 1229 nreq += alloc_ds_commits(cinfo, &list);
1046 1230
1047 if (nreq == 0) { 1231 if (nreq == 0) {
1048 nfs_commit_clear_lock(NFS_I(inode)); 1232 cinfo->completion_ops->error_cleanup(NFS_I(inode));
1049 goto out; 1233 goto out;
1050 } 1234 }
1051 1235
1052 atomic_add(nreq, &NFS_I(inode)->commits_outstanding); 1236 atomic_add(nreq, &cinfo->mds->rpcs_out);
1053 1237
1054 list_for_each_entry_safe(data, tmp, &list, pages) { 1238 list_for_each_entry_safe(data, tmp, &list, pages) {
1055 list_del_init(&data->pages); 1239 list_del_init(&data->pages);
1056 if (!data->lseg) { 1240 if (!data->lseg) {
1057 nfs_init_commit(data, mds_pages, NULL); 1241 nfs_init_commit(data, mds_pages, NULL, cinfo);
1058 nfs_initiate_commit(data, NFS_CLIENT(inode), 1242 nfs_initiate_commit(NFS_CLIENT(inode), data,
1059 data->mds_ops, how); 1243 data->mds_ops, how, 0);
1060 } else { 1244 } else {
1061 nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index].committing, data->lseg); 1245 struct pnfs_commit_bucket *buckets;
1246
1247 buckets = cinfo->ds->buckets;
1248 nfs_init_commit(data, &buckets[data->ds_commit_index].committing, data->lseg, cinfo);
1062 filelayout_initiate_commit(data, how); 1249 filelayout_initiate_commit(data, how);
1063 } 1250 }
1064 } 1251 }
1065out: 1252out:
1253 cinfo->ds->ncommitting = 0;
1066 return PNFS_ATTEMPTED; 1254 return PNFS_ATTEMPTED;
1067} 1255}
1068 1256
@@ -1072,17 +1260,47 @@ filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)
1072 nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node)); 1260 nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
1073} 1261}
1074 1262
1263static struct pnfs_layout_hdr *
1264filelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
1265{
1266 struct nfs4_filelayout *flo;
1267
1268 flo = kzalloc(sizeof(*flo), gfp_flags);
1269 return &flo->generic_hdr;
1270}
1271
1272static void
1273filelayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
1274{
1275 kfree(FILELAYOUT_FROM_HDR(lo));
1276}
1277
1278static struct pnfs_ds_commit_info *
1279filelayout_get_ds_info(struct inode *inode)
1280{
1281 struct pnfs_layout_hdr *layout = NFS_I(inode)->layout;
1282
1283 if (layout == NULL)
1284 return NULL;
1285 else
1286 return &FILELAYOUT_FROM_HDR(layout)->commit_info;
1287}
1288
1075static struct pnfs_layoutdriver_type filelayout_type = { 1289static struct pnfs_layoutdriver_type filelayout_type = {
1076 .id = LAYOUT_NFSV4_1_FILES, 1290 .id = LAYOUT_NFSV4_1_FILES,
1077 .name = "LAYOUT_NFSV4_1_FILES", 1291 .name = "LAYOUT_NFSV4_1_FILES",
1078 .owner = THIS_MODULE, 1292 .owner = THIS_MODULE,
1293 .alloc_layout_hdr = filelayout_alloc_layout_hdr,
1294 .free_layout_hdr = filelayout_free_layout_hdr,
1079 .alloc_lseg = filelayout_alloc_lseg, 1295 .alloc_lseg = filelayout_alloc_lseg,
1080 .free_lseg = filelayout_free_lseg, 1296 .free_lseg = filelayout_free_lseg,
1081 .pg_read_ops = &filelayout_pg_read_ops, 1297 .pg_read_ops = &filelayout_pg_read_ops,
1082 .pg_write_ops = &filelayout_pg_write_ops, 1298 .pg_write_ops = &filelayout_pg_write_ops,
1299 .get_ds_info = &filelayout_get_ds_info,
1083 .mark_request_commit = filelayout_mark_request_commit, 1300 .mark_request_commit = filelayout_mark_request_commit,
1084 .clear_request_commit = filelayout_clear_request_commit, 1301 .clear_request_commit = filelayout_clear_request_commit,
1085 .scan_commit_lists = filelayout_scan_commit_lists, 1302 .scan_commit_lists = filelayout_scan_commit_lists,
1303 .recover_commit_reqs = filelayout_recover_commit_reqs,
1086 .commit_pagelist = filelayout_commit_pagelist, 1304 .commit_pagelist = filelayout_commit_pagelist,
1087 .read_pagelist = filelayout_read_pagelist, 1305 .read_pagelist = filelayout_read_pagelist,
1088 .write_pagelist = filelayout_write_pagelist, 1306 .write_pagelist = filelayout_write_pagelist,
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 21190bb1f5e3..43fe802dd678 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -33,6 +33,13 @@
33#include "pnfs.h" 33#include "pnfs.h"
34 34
35/* 35/*
36 * Default data server connection timeout and retrans vaules.
37 * Set by module paramters dataserver_timeo and dataserver_retrans.
38 */
39#define NFS4_DEF_DS_TIMEO 60
40#define NFS4_DEF_DS_RETRANS 5
41
42/*
36 * Field testing shows we need to support up to 4096 stripe indices. 43 * Field testing shows we need to support up to 4096 stripe indices.
37 * We store each index as a u8 (u32 on the wire) to keep the memory footprint 44 * We store each index as a u8 (u32 on the wire) to keep the memory footprint
38 * reasonable. This in turn means we support a maximum of 256 45 * reasonable. This in turn means we support a maximum of 256
@@ -41,6 +48,9 @@
41#define NFS4_PNFS_MAX_STRIPE_CNT 4096 48#define NFS4_PNFS_MAX_STRIPE_CNT 4096
42#define NFS4_PNFS_MAX_MULTI_CNT 256 /* 256 fit into a u8 stripe_index */ 49#define NFS4_PNFS_MAX_MULTI_CNT 256 /* 256 fit into a u8 stripe_index */
43 50
51/* error codes for internal use */
52#define NFS4ERR_RESET_TO_MDS 12001
53
44enum stripetype4 { 54enum stripetype4 {
45 STRIPE_SPARSE = 1, 55 STRIPE_SPARSE = 1,
46 STRIPE_DENSE = 2 56 STRIPE_DENSE = 2
@@ -62,23 +72,14 @@ struct nfs4_pnfs_ds {
62 atomic_t ds_count; 72 atomic_t ds_count;
63}; 73};
64 74
65/* nfs4_file_layout_dsaddr flags */
66#define NFS4_DEVICE_ID_NEG_ENTRY 0x00000001
67
68struct nfs4_file_layout_dsaddr { 75struct nfs4_file_layout_dsaddr {
69 struct nfs4_deviceid_node id_node; 76 struct nfs4_deviceid_node id_node;
70 unsigned long flags;
71 u32 stripe_count; 77 u32 stripe_count;
72 u8 *stripe_indices; 78 u8 *stripe_indices;
73 u32 ds_num; 79 u32 ds_num;
74 struct nfs4_pnfs_ds *ds_list[1]; 80 struct nfs4_pnfs_ds *ds_list[1];
75}; 81};
76 82
77struct nfs4_fl_commit_bucket {
78 struct list_head written;
79 struct list_head committing;
80};
81
82struct nfs4_filelayout_segment { 83struct nfs4_filelayout_segment {
83 struct pnfs_layout_segment generic_hdr; 84 struct pnfs_layout_segment generic_hdr;
84 u32 stripe_type; 85 u32 stripe_type;
@@ -89,10 +90,19 @@ struct nfs4_filelayout_segment {
89 struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ 90 struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
90 unsigned int num_fh; 91 unsigned int num_fh;
91 struct nfs_fh **fh_array; 92 struct nfs_fh **fh_array;
92 struct nfs4_fl_commit_bucket *commit_buckets; /* Sort commits to ds */
93 int number_of_buckets;
94}; 93};
95 94
95struct nfs4_filelayout {
96 struct pnfs_layout_hdr generic_hdr;
97 struct pnfs_ds_commit_info commit_info;
98};
99
100static inline struct nfs4_filelayout *
101FILELAYOUT_FROM_HDR(struct pnfs_layout_hdr *lo)
102{
103 return container_of(lo, struct nfs4_filelayout, generic_hdr);
104}
105
96static inline struct nfs4_filelayout_segment * 106static inline struct nfs4_filelayout_segment *
97FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg) 107FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg)
98{ 108{
@@ -107,6 +117,36 @@ FILELAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg)
107 return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node; 117 return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node;
108} 118}
109 119
120static inline void
121filelayout_mark_devid_invalid(struct nfs4_deviceid_node *node)
122{
123 u32 *p = (u32 *)&node->deviceid;
124
125 printk(KERN_WARNING "NFS: Deviceid [%x%x%x%x] marked out of use.\n",
126 p[0], p[1], p[2], p[3]);
127
128 set_bit(NFS_DEVICEID_INVALID, &node->flags);
129}
130
131static inline bool
132filelayout_test_layout_invalid(struct pnfs_layout_hdr *lo)
133{
134 return test_bit(NFS_LAYOUT_INVALID, &lo->plh_flags);
135}
136
137static inline bool
138filelayout_test_devid_invalid(struct nfs4_deviceid_node *node)
139{
140 return test_bit(NFS_DEVICEID_INVALID, &node->flags);
141}
142
143static inline bool
144filelayout_reset_to_mds(struct pnfs_layout_segment *lseg)
145{
146 return filelayout_test_devid_invalid(FILELAYOUT_DEVID_NODE(lseg)) ||
147 filelayout_test_layout_invalid(lseg->pls_layout);
148}
149
110extern struct nfs_fh * 150extern struct nfs_fh *
111nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); 151nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j);
112 152
@@ -119,5 +159,6 @@ extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
119extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); 159extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
120struct nfs4_file_layout_dsaddr * 160struct nfs4_file_layout_dsaddr *
121get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags); 161get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags);
162void nfs4_ds_disconnect(struct nfs_client *clp);
122 163
123#endif /* FS_NFS_NFS4FILELAYOUT_H */ 164#endif /* FS_NFS_NFS4FILELAYOUT_H */
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index c9cff9adb2d3..a1fab8da7f03 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -30,12 +30,16 @@
30 30
31#include <linux/nfs_fs.h> 31#include <linux/nfs_fs.h>
32#include <linux/vmalloc.h> 32#include <linux/vmalloc.h>
33#include <linux/module.h>
33 34
34#include "internal.h" 35#include "internal.h"
35#include "nfs4filelayout.h" 36#include "nfs4filelayout.h"
36 37
37#define NFSDBG_FACILITY NFSDBG_PNFS_LD 38#define NFSDBG_FACILITY NFSDBG_PNFS_LD
38 39
40static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO;
41static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS;
42
39/* 43/*
40 * Data server cache 44 * Data server cache
41 * 45 *
@@ -145,6 +149,28 @@ _data_server_lookup_locked(const struct list_head *dsaddrs)
145} 149}
146 150
147/* 151/*
152 * Lookup DS by nfs_client pointer. Zero data server client pointer
153 */
154void nfs4_ds_disconnect(struct nfs_client *clp)
155{
156 struct nfs4_pnfs_ds *ds;
157 struct nfs_client *found = NULL;
158
159 dprintk("%s clp %p\n", __func__, clp);
160 spin_lock(&nfs4_ds_cache_lock);
161 list_for_each_entry(ds, &nfs4_data_server_cache, ds_node)
162 if (ds->ds_clp && ds->ds_clp == clp) {
163 found = ds->ds_clp;
164 ds->ds_clp = NULL;
165 }
166 spin_unlock(&nfs4_ds_cache_lock);
167 if (found) {
168 set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state);
169 nfs_put_client(clp);
170 }
171}
172
173/*
148 * Create an rpc connection to the nfs4_pnfs_ds data server 174 * Create an rpc connection to the nfs4_pnfs_ds data server
149 * Currently only supports IPv4 and IPv6 addresses 175 * Currently only supports IPv4 and IPv6 addresses
150 */ 176 */
@@ -165,8 +191,9 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
165 __func__, ds->ds_remotestr, da->da_remotestr); 191 __func__, ds->ds_remotestr, da->da_remotestr);
166 192
167 clp = nfs4_set_ds_client(mds_srv->nfs_client, 193 clp = nfs4_set_ds_client(mds_srv->nfs_client,
168 (struct sockaddr *)&da->da_addr, 194 (struct sockaddr *)&da->da_addr,
169 da->da_addrlen, IPPROTO_TCP); 195 da->da_addrlen, IPPROTO_TCP,
196 dataserver_timeo, dataserver_retrans);
170 if (!IS_ERR(clp)) 197 if (!IS_ERR(clp))
171 break; 198 break;
172 } 199 }
@@ -176,28 +203,7 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
176 goto out; 203 goto out;
177 } 204 }
178 205
179 if ((clp->cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS) != 0) { 206 status = nfs4_init_ds_session(clp, mds_srv->nfs_client->cl_lease_time);
180 if (!is_ds_client(clp)) {
181 status = -ENODEV;
182 goto out_put;
183 }
184 ds->ds_clp = clp;
185 dprintk("%s [existing] server=%s\n", __func__,
186 ds->ds_remotestr);
187 goto out;
188 }
189
190 /*
191 * Do not set NFS_CS_CHECK_LEASE_TIME instead set the DS lease to
192 * be equal to the MDS lease. Renewal is scheduled in create_session.
193 */
194 spin_lock(&mds_srv->nfs_client->cl_lock);
195 clp->cl_lease_time = mds_srv->nfs_client->cl_lease_time;
196 spin_unlock(&mds_srv->nfs_client->cl_lock);
197 clp->cl_last_renewal = jiffies;
198
199 /* New nfs_client */
200 status = nfs4_init_ds_session(clp);
201 if (status) 207 if (status)
202 goto out_put; 208 goto out_put;
203 209
@@ -602,7 +608,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
602 608
603 mp_count = be32_to_cpup(p); /* multipath count */ 609 mp_count = be32_to_cpup(p); /* multipath count */
604 for (j = 0; j < mp_count; j++) { 610 for (j = 0; j < mp_count; j++) {
605 da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->net, 611 da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->cl_net,
606 &stream, gfp_flags); 612 &stream, gfp_flags);
607 if (da) 613 if (da)
608 list_add_tail(&da->da_node, &dsaddrs); 614 list_add_tail(&da->da_node, &dsaddrs);
@@ -791,48 +797,42 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
791 return flseg->fh_array[i]; 797 return flseg->fh_array[i];
792} 798}
793 799
794static void
795filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
796 int err, const char *ds_remotestr)
797{
798 u32 *p = (u32 *)&dsaddr->id_node.deviceid;
799
800 printk(KERN_ERR "NFS: data server %s connection error %d."
801 " Deviceid [%x%x%x%x] marked out of use.\n",
802 ds_remotestr, err, p[0], p[1], p[2], p[3]);
803
804 spin_lock(&nfs4_ds_cache_lock);
805 dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY;
806 spin_unlock(&nfs4_ds_cache_lock);
807}
808
809struct nfs4_pnfs_ds * 800struct nfs4_pnfs_ds *
810nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) 801nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
811{ 802{
812 struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr; 803 struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr;
813 struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; 804 struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
805 struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
806
807 if (filelayout_test_devid_invalid(devid))
808 return NULL;
814 809
815 if (ds == NULL) { 810 if (ds == NULL) {
816 printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", 811 printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
817 __func__, ds_idx); 812 __func__, ds_idx);
818 return NULL; 813 goto mark_dev_invalid;
819 } 814 }
820 815
821 if (!ds->ds_clp) { 816 if (!ds->ds_clp) {
822 struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode); 817 struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
823 int err; 818 int err;
824 819
825 if (dsaddr->flags & NFS4_DEVICE_ID_NEG_ENTRY) {
826 /* Already tried to connect, don't try again */
827 dprintk("%s Deviceid marked out of use\n", __func__);
828 return NULL;
829 }
830 err = nfs4_ds_connect(s, ds); 820 err = nfs4_ds_connect(s, ds);
831 if (err) { 821 if (err)
832 filelayout_mark_devid_negative(dsaddr, err, 822 goto mark_dev_invalid;
833 ds->ds_remotestr);
834 return NULL;
835 }
836 } 823 }
837 return ds; 824 return ds;
825
826mark_dev_invalid:
827 filelayout_mark_devid_invalid(devid);
828 return NULL;
838} 829}
830
831module_param(dataserver_retrans, uint, 0644);
832MODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client "
833 "retries a request before it attempts further "
834 " recovery action.");
835module_param(dataserver_timeo, uint, 0644);
836MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the "
837 "NFSv4.1 client waits for a response from a "
838 " data server before it retries an NFS request.");
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index a7f3dedc4ec7..017b4b01a69c 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -132,6 +132,35 @@ static size_t nfs_parse_server_name(char *string, size_t len,
132 return ret; 132 return ret;
133} 133}
134 134
135rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
136{
137 struct gss_api_mech *mech;
138 struct xdr_netobj oid;
139 int i;
140 rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX;
141
142 for (i = 0; i < flavors->num_flavors; i++) {
143 struct nfs4_secinfo_flavor *flavor;
144 flavor = &flavors->flavors[i];
145
146 if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) {
147 pseudoflavor = flavor->flavor;
148 break;
149 } else if (flavor->flavor == RPC_AUTH_GSS) {
150 oid.len = flavor->gss.sec_oid4.len;
151 oid.data = flavor->gss.sec_oid4.data;
152 mech = gss_mech_get_by_OID(&oid);
153 if (!mech)
154 continue;
155 pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service);
156 gss_mech_put(mech);
157 break;
158 }
159 }
160
161 return pseudoflavor;
162}
163
135static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr *name) 164static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr *name)
136{ 165{
137 struct page *page; 166 struct page *page;
@@ -168,7 +197,7 @@ struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *clnt, struct inode *ino
168 rpc_authflavor_t flavor; 197 rpc_authflavor_t flavor;
169 198
170 flavor = nfs4_negotiate_security(inode, name); 199 flavor = nfs4_negotiate_security(inode, name);
171 if (flavor < 0) 200 if ((int)flavor < 0)
172 return ERR_PTR(flavor); 201 return ERR_PTR(flavor);
173 202
174 clone = rpc_clone_client(clnt); 203 clone = rpc_clone_client(clnt);
@@ -300,7 +329,7 @@ out:
300 * @dentry - dentry of referral 329 * @dentry - dentry of referral
301 * 330 *
302 */ 331 */
303struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry) 332static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry)
304{ 333{
305 struct vfsmount *mnt = ERR_PTR(-ENOMEM); 334 struct vfsmount *mnt = ERR_PTR(-ENOMEM);
306 struct dentry *parent; 335 struct dentry *parent;
@@ -341,3 +370,25 @@ out:
341 dprintk("%s: done\n", __func__); 370 dprintk("%s: done\n", __func__);
342 return mnt; 371 return mnt;
343} 372}
373
374struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry,
375 struct nfs_fh *fh, struct nfs_fattr *fattr)
376{
377 struct dentry *parent = dget_parent(dentry);
378 struct rpc_clnt *client;
379 struct vfsmount *mnt;
380
381 /* Look it up again to get its attributes and sec flavor */
382 client = nfs4_proc_lookup_mountpoint(parent->d_inode, &dentry->d_name, fh, fattr);
383 dput(parent);
384 if (IS_ERR(client))
385 return ERR_CAST(client);
386
387 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
388 mnt = nfs_do_refmount(client, dentry);
389 else
390 mnt = nfs_do_submount(dentry, fh, fattr, client->cl_auth->au_flavor);
391
392 rpc_shutdown_client(client);
393 return mnt;
394}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ab985f6f0da8..d48dbefa0e71 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -64,6 +64,7 @@
64#include "iostat.h" 64#include "iostat.h"
65#include "callback.h" 65#include "callback.h"
66#include "pnfs.h" 66#include "pnfs.h"
67#include "netns.h"
67 68
68#define NFSDBG_FACILITY NFSDBG_PROC 69#define NFSDBG_FACILITY NFSDBG_PROC
69 70
@@ -80,6 +81,7 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
80static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); 81static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
81static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); 82static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
82static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); 83static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
84static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *);
83static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); 85static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
84static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, 86static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
85 struct nfs_fattr *fattr, struct iattr *sattr, 87 struct nfs_fattr *fattr, struct iattr *sattr,
@@ -101,6 +103,8 @@ static int nfs4_map_errors(int err)
101 case -NFS4ERR_BADOWNER: 103 case -NFS4ERR_BADOWNER:
102 case -NFS4ERR_BADNAME: 104 case -NFS4ERR_BADNAME:
103 return -EINVAL; 105 return -EINVAL;
106 case -NFS4ERR_SHARE_DENIED:
107 return -EACCES;
104 default: 108 default:
105 dprintk("%s could not handle NFSv4 error %d\n", 109 dprintk("%s could not handle NFSv4 error %d\n",
106 __func__, -err); 110 __func__, -err);
@@ -304,7 +308,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
304 case -NFS4ERR_SEQ_MISORDERED: 308 case -NFS4ERR_SEQ_MISORDERED:
305 dprintk("%s ERROR: %d Reset session\n", __func__, 309 dprintk("%s ERROR: %d Reset session\n", __func__,
306 errorcode); 310 errorcode);
307 nfs4_schedule_session_recovery(clp->cl_session); 311 nfs4_schedule_session_recovery(clp->cl_session, errorcode);
308 exception->retry = 1; 312 exception->retry = 1;
309 break; 313 break;
310#endif /* defined(CONFIG_NFS_V4_1) */ 314#endif /* defined(CONFIG_NFS_V4_1) */
@@ -772,7 +776,7 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
772 struct nfs_inode *nfsi = NFS_I(dir); 776 struct nfs_inode *nfsi = NFS_I(dir);
773 777
774 spin_lock(&dir->i_lock); 778 spin_lock(&dir->i_lock);
775 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA; 779 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
776 if (!cinfo->atomic || cinfo->before != dir->i_version) 780 if (!cinfo->atomic || cinfo->before != dir->i_version)
777 nfs_force_lookup_revalidate(dir); 781 nfs_force_lookup_revalidate(dir);
778 dir->i_version = cinfo->after; 782 dir->i_version = cinfo->after;
@@ -788,7 +792,6 @@ struct nfs4_opendata {
788 struct nfs4_string owner_name; 792 struct nfs4_string owner_name;
789 struct nfs4_string group_name; 793 struct nfs4_string group_name;
790 struct nfs_fattr f_attr; 794 struct nfs_fattr f_attr;
791 struct nfs_fattr dir_attr;
792 struct dentry *dir; 795 struct dentry *dir;
793 struct dentry *dentry; 796 struct dentry *dentry;
794 struct nfs4_state_owner *owner; 797 struct nfs4_state_owner *owner;
@@ -804,12 +807,10 @@ struct nfs4_opendata {
804static void nfs4_init_opendata_res(struct nfs4_opendata *p) 807static void nfs4_init_opendata_res(struct nfs4_opendata *p)
805{ 808{
806 p->o_res.f_attr = &p->f_attr; 809 p->o_res.f_attr = &p->f_attr;
807 p->o_res.dir_attr = &p->dir_attr;
808 p->o_res.seqid = p->o_arg.seqid; 810 p->o_res.seqid = p->o_arg.seqid;
809 p->c_res.seqid = p->c_arg.seqid; 811 p->c_res.seqid = p->c_arg.seqid;
810 p->o_res.server = p->o_arg.server; 812 p->o_res.server = p->o_arg.server;
811 nfs_fattr_init(&p->f_attr); 813 nfs_fattr_init(&p->f_attr);
812 nfs_fattr_init(&p->dir_attr);
813 nfs_fattr_init_names(&p->f_attr, &p->owner_name, &p->group_name); 814 nfs_fattr_init_names(&p->f_attr, &p->owner_name, &p->group_name);
814} 815}
815 816
@@ -843,7 +844,6 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
843 p->o_arg.name = &dentry->d_name; 844 p->o_arg.name = &dentry->d_name;
844 p->o_arg.server = server; 845 p->o_arg.server = server;
845 p->o_arg.bitmask = server->attr_bitmask; 846 p->o_arg.bitmask = server->attr_bitmask;
846 p->o_arg.dir_bitmask = server->cache_consistency_bitmask;
847 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; 847 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
848 if (attrs != NULL && attrs->ia_valid != 0) { 848 if (attrs != NULL && attrs->ia_valid != 0) {
849 __be32 verf[2]; 849 __be32 verf[2];
@@ -1332,7 +1332,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1332 case -NFS4ERR_BAD_HIGH_SLOT: 1332 case -NFS4ERR_BAD_HIGH_SLOT:
1333 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 1333 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1334 case -NFS4ERR_DEADSESSION: 1334 case -NFS4ERR_DEADSESSION:
1335 nfs4_schedule_session_recovery(server->nfs_client->cl_session); 1335 nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
1336 goto out; 1336 goto out;
1337 case -NFS4ERR_STALE_CLIENTID: 1337 case -NFS4ERR_STALE_CLIENTID:
1338 case -NFS4ERR_STALE_STATEID: 1338 case -NFS4ERR_STALE_STATEID:
@@ -1611,8 +1611,6 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
1611 1611
1612 nfs_fattr_map_and_free_names(NFS_SERVER(dir), &data->f_attr); 1612 nfs_fattr_map_and_free_names(NFS_SERVER(dir), &data->f_attr);
1613 1613
1614 nfs_refresh_inode(dir, o_res->dir_attr);
1615
1616 if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { 1614 if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
1617 status = _nfs4_proc_open_confirm(data); 1615 status = _nfs4_proc_open_confirm(data);
1618 if (status != 0) 1616 if (status != 0)
@@ -1645,11 +1643,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
1645 1643
1646 nfs_fattr_map_and_free_names(server, &data->f_attr); 1644 nfs_fattr_map_and_free_names(server, &data->f_attr);
1647 1645
1648 if (o_arg->open_flags & O_CREAT) { 1646 if (o_arg->open_flags & O_CREAT)
1649 update_changeattr(dir, &o_res->cinfo); 1647 update_changeattr(dir, &o_res->cinfo);
1650 nfs_post_op_update_inode(dir, o_res->dir_attr);
1651 } else
1652 nfs_refresh_inode(dir, o_res->dir_attr);
1653 if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0) 1648 if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0)
1654 server->caps &= ~NFS_CAP_POSIX_LOCK; 1649 server->caps &= ~NFS_CAP_POSIX_LOCK;
1655 if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { 1650 if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
@@ -1789,7 +1784,14 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct
1789/* 1784/*
1790 * Returns a referenced nfs4_state 1785 * Returns a referenced nfs4_state
1791 */ 1786 */
1792static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) 1787static int _nfs4_do_open(struct inode *dir,
1788 struct dentry *dentry,
1789 fmode_t fmode,
1790 int flags,
1791 struct iattr *sattr,
1792 struct rpc_cred *cred,
1793 struct nfs4_state **res,
1794 struct nfs4_threshold **ctx_th)
1793{ 1795{
1794 struct nfs4_state_owner *sp; 1796 struct nfs4_state_owner *sp;
1795 struct nfs4_state *state = NULL; 1797 struct nfs4_state *state = NULL;
@@ -1814,6 +1816,11 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode
1814 if (opendata == NULL) 1816 if (opendata == NULL)
1815 goto err_put_state_owner; 1817 goto err_put_state_owner;
1816 1818
1819 if (ctx_th && server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) {
1820 opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc();
1821 if (!opendata->f_attr.mdsthreshold)
1822 goto err_opendata_put;
1823 }
1817 if (dentry->d_inode != NULL) 1824 if (dentry->d_inode != NULL)
1818 opendata->state = nfs4_get_open_state(dentry->d_inode, sp); 1825 opendata->state = nfs4_get_open_state(dentry->d_inode, sp);
1819 1826
@@ -1839,11 +1846,19 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode
1839 nfs_setattr_update_inode(state->inode, sattr); 1846 nfs_setattr_update_inode(state->inode, sattr);
1840 nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr); 1847 nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr);
1841 } 1848 }
1849
1850 if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server))
1851 *ctx_th = opendata->f_attr.mdsthreshold;
1852 else
1853 kfree(opendata->f_attr.mdsthreshold);
1854 opendata->f_attr.mdsthreshold = NULL;
1855
1842 nfs4_opendata_put(opendata); 1856 nfs4_opendata_put(opendata);
1843 nfs4_put_state_owner(sp); 1857 nfs4_put_state_owner(sp);
1844 *res = state; 1858 *res = state;
1845 return 0; 1859 return 0;
1846err_opendata_put: 1860err_opendata_put:
1861 kfree(opendata->f_attr.mdsthreshold);
1847 nfs4_opendata_put(opendata); 1862 nfs4_opendata_put(opendata);
1848err_put_state_owner: 1863err_put_state_owner:
1849 nfs4_put_state_owner(sp); 1864 nfs4_put_state_owner(sp);
@@ -1853,14 +1868,21 @@ out_err:
1853} 1868}
1854 1869
1855 1870
1856static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred) 1871static struct nfs4_state *nfs4_do_open(struct inode *dir,
1872 struct dentry *dentry,
1873 fmode_t fmode,
1874 int flags,
1875 struct iattr *sattr,
1876 struct rpc_cred *cred,
1877 struct nfs4_threshold **ctx_th)
1857{ 1878{
1858 struct nfs4_exception exception = { }; 1879 struct nfs4_exception exception = { };
1859 struct nfs4_state *res; 1880 struct nfs4_state *res;
1860 int status; 1881 int status;
1861 1882
1862 do { 1883 do {
1863 status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, &res); 1884 status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred,
1885 &res, ctx_th);
1864 if (status == 0) 1886 if (status == 0)
1865 break; 1887 break;
1866 /* NOTE: BAD_SEQID means the server and client disagree about the 1888 /* NOTE: BAD_SEQID means the server and client disagree about the
@@ -2184,7 +2206,8 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags
2184 struct nfs4_state *state; 2206 struct nfs4_state *state;
2185 2207
2186 /* Protect against concurrent sillydeletes */ 2208 /* Protect against concurrent sillydeletes */
2187 state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr, ctx->cred); 2209 state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr,
2210 ctx->cred, &ctx->mdsthreshold);
2188 if (IS_ERR(state)) 2211 if (IS_ERR(state))
2189 return ERR_CAST(state); 2212 return ERR_CAST(state);
2190 ctx->state = state; 2213 ctx->state = state;
@@ -2354,8 +2377,8 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
2354/* 2377/*
2355 * get the file handle for the "/" directory on the server 2378 * get the file handle for the "/" directory on the server
2356 */ 2379 */
2357static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, 2380int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle,
2358 struct nfs_fsinfo *info) 2381 struct nfs_fsinfo *info)
2359{ 2382{
2360 int minor_version = server->nfs_client->cl_minorversion; 2383 int minor_version = server->nfs_client->cl_minorversion;
2361 int status = nfs4_lookup_root(server, fhandle, info); 2384 int status = nfs4_lookup_root(server, fhandle, info);
@@ -2372,6 +2395,31 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
2372 return nfs4_map_errors(status); 2395 return nfs4_map_errors(status);
2373} 2396}
2374 2397
2398static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *mntfh,
2399 struct nfs_fsinfo *info)
2400{
2401 int error;
2402 struct nfs_fattr *fattr = info->fattr;
2403
2404 error = nfs4_server_capabilities(server, mntfh);
2405 if (error < 0) {
2406 dprintk("nfs4_get_root: getcaps error = %d\n", -error);
2407 return error;
2408 }
2409
2410 error = nfs4_proc_getattr(server, mntfh, fattr);
2411 if (error < 0) {
2412 dprintk("nfs4_get_root: getattr error = %d\n", -error);
2413 return error;
2414 }
2415
2416 if (fattr->valid & NFS_ATTR_FATTR_FSID &&
2417 !nfs_fsid_equal(&server->fsid, &fattr->fsid))
2418 memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid));
2419
2420 return error;
2421}
2422
2375/* 2423/*
2376 * Get locations and (maybe) other attributes of a referral. 2424 * Get locations and (maybe) other attributes of a referral.
2377 * Note that we'll actually follow the referral later when 2425 * Note that we'll actually follow the referral later when
@@ -2578,7 +2626,7 @@ out:
2578 return err; 2626 return err;
2579} 2627}
2580 2628
2581static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name, 2629static int nfs4_proc_lookup(struct inode *dir, struct qstr *name,
2582 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 2630 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
2583{ 2631{
2584 int status; 2632 int status;
@@ -2761,7 +2809,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
2761 fmode = ctx->mode; 2809 fmode = ctx->mode;
2762 } 2810 }
2763 sattr->ia_mode &= ~current_umask(); 2811 sattr->ia_mode &= ~current_umask();
2764 state = nfs4_do_open(dir, de, fmode, flags, sattr, cred); 2812 state = nfs4_do_open(dir, de, fmode, flags, sattr, cred, NULL);
2765 d_drop(dentry); 2813 d_drop(dentry);
2766 if (IS_ERR(state)) { 2814 if (IS_ERR(state)) {
2767 status = PTR_ERR(state); 2815 status = PTR_ERR(state);
@@ -2783,7 +2831,6 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
2783 struct nfs_removeargs args = { 2831 struct nfs_removeargs args = {
2784 .fh = NFS_FH(dir), 2832 .fh = NFS_FH(dir),
2785 .name = *name, 2833 .name = *name,
2786 .bitmask = server->attr_bitmask,
2787 }; 2834 };
2788 struct nfs_removeres res = { 2835 struct nfs_removeres res = {
2789 .server = server, 2836 .server = server,
@@ -2793,19 +2840,11 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
2793 .rpc_argp = &args, 2840 .rpc_argp = &args,
2794 .rpc_resp = &res, 2841 .rpc_resp = &res,
2795 }; 2842 };
2796 int status = -ENOMEM; 2843 int status;
2797
2798 res.dir_attr = nfs_alloc_fattr();
2799 if (res.dir_attr == NULL)
2800 goto out;
2801 2844
2802 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); 2845 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
2803 if (status == 0) { 2846 if (status == 0)
2804 update_changeattr(dir, &res.cinfo); 2847 update_changeattr(dir, &res.cinfo);
2805 nfs_post_op_update_inode(dir, res.dir_attr);
2806 }
2807 nfs_free_fattr(res.dir_attr);
2808out:
2809 return status; 2848 return status;
2810} 2849}
2811 2850
@@ -2827,7 +2866,6 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
2827 struct nfs_removeargs *args = msg->rpc_argp; 2866 struct nfs_removeargs *args = msg->rpc_argp;
2828 struct nfs_removeres *res = msg->rpc_resp; 2867 struct nfs_removeres *res = msg->rpc_resp;
2829 2868
2830 args->bitmask = server->cache_consistency_bitmask;
2831 res->server = server; 2869 res->server = server;
2832 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; 2870 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];
2833 nfs41_init_sequence(&args->seq_args, &res->seq_res, 1); 2871 nfs41_init_sequence(&args->seq_args, &res->seq_res, 1);
@@ -2852,7 +2890,6 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
2852 if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN) 2890 if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
2853 return 0; 2891 return 0;
2854 update_changeattr(dir, &res->cinfo); 2892 update_changeattr(dir, &res->cinfo);
2855 nfs_post_op_update_inode(dir, res->dir_attr);
2856 return 1; 2893 return 1;
2857} 2894}
2858 2895
@@ -2863,7 +2900,6 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
2863 struct nfs_renameres *res = msg->rpc_resp; 2900 struct nfs_renameres *res = msg->rpc_resp;
2864 2901
2865 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME]; 2902 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME];
2866 arg->bitmask = server->attr_bitmask;
2867 res->server = server; 2903 res->server = server;
2868 nfs41_init_sequence(&arg->seq_args, &res->seq_res, 1); 2904 nfs41_init_sequence(&arg->seq_args, &res->seq_res, 1);
2869} 2905}
@@ -2889,9 +2925,7 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
2889 return 0; 2925 return 0;
2890 2926
2891 update_changeattr(old_dir, &res->old_cinfo); 2927 update_changeattr(old_dir, &res->old_cinfo);
2892 nfs_post_op_update_inode(old_dir, res->old_fattr);
2893 update_changeattr(new_dir, &res->new_cinfo); 2928 update_changeattr(new_dir, &res->new_cinfo);
2894 nfs_post_op_update_inode(new_dir, res->new_fattr);
2895 return 1; 2929 return 1;
2896} 2930}
2897 2931
@@ -2904,7 +2938,6 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
2904 .new_dir = NFS_FH(new_dir), 2938 .new_dir = NFS_FH(new_dir),
2905 .old_name = old_name, 2939 .old_name = old_name,
2906 .new_name = new_name, 2940 .new_name = new_name,
2907 .bitmask = server->attr_bitmask,
2908 }; 2941 };
2909 struct nfs_renameres res = { 2942 struct nfs_renameres res = {
2910 .server = server, 2943 .server = server,
@@ -2916,21 +2949,11 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
2916 }; 2949 };
2917 int status = -ENOMEM; 2950 int status = -ENOMEM;
2918 2951
2919 res.old_fattr = nfs_alloc_fattr();
2920 res.new_fattr = nfs_alloc_fattr();
2921 if (res.old_fattr == NULL || res.new_fattr == NULL)
2922 goto out;
2923
2924 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); 2952 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
2925 if (!status) { 2953 if (!status) {
2926 update_changeattr(old_dir, &res.old_cinfo); 2954 update_changeattr(old_dir, &res.old_cinfo);
2927 nfs_post_op_update_inode(old_dir, res.old_fattr);
2928 update_changeattr(new_dir, &res.new_cinfo); 2955 update_changeattr(new_dir, &res.new_cinfo);
2929 nfs_post_op_update_inode(new_dir, res.new_fattr);
2930 } 2956 }
2931out:
2932 nfs_free_fattr(res.new_fattr);
2933 nfs_free_fattr(res.old_fattr);
2934 return status; 2957 return status;
2935} 2958}
2936 2959
@@ -2968,18 +2991,15 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *
2968 int status = -ENOMEM; 2991 int status = -ENOMEM;
2969 2992
2970 res.fattr = nfs_alloc_fattr(); 2993 res.fattr = nfs_alloc_fattr();
2971 res.dir_attr = nfs_alloc_fattr(); 2994 if (res.fattr == NULL)
2972 if (res.fattr == NULL || res.dir_attr == NULL)
2973 goto out; 2995 goto out;
2974 2996
2975 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); 2997 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
2976 if (!status) { 2998 if (!status) {
2977 update_changeattr(dir, &res.cinfo); 2999 update_changeattr(dir, &res.cinfo);
2978 nfs_post_op_update_inode(dir, res.dir_attr);
2979 nfs_post_op_update_inode(inode, res.fattr); 3000 nfs_post_op_update_inode(inode, res.fattr);
2980 } 3001 }
2981out: 3002out:
2982 nfs_free_fattr(res.dir_attr);
2983 nfs_free_fattr(res.fattr); 3003 nfs_free_fattr(res.fattr);
2984 return status; 3004 return status;
2985} 3005}
@@ -3002,7 +3022,6 @@ struct nfs4_createdata {
3002 struct nfs4_create_res res; 3022 struct nfs4_create_res res;
3003 struct nfs_fh fh; 3023 struct nfs_fh fh;
3004 struct nfs_fattr fattr; 3024 struct nfs_fattr fattr;
3005 struct nfs_fattr dir_fattr;
3006}; 3025};
3007 3026
3008static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, 3027static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,
@@ -3026,9 +3045,7 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,
3026 data->res.server = server; 3045 data->res.server = server;
3027 data->res.fh = &data->fh; 3046 data->res.fh = &data->fh;
3028 data->res.fattr = &data->fattr; 3047 data->res.fattr = &data->fattr;
3029 data->res.dir_fattr = &data->dir_fattr;
3030 nfs_fattr_init(data->res.fattr); 3048 nfs_fattr_init(data->res.fattr);
3031 nfs_fattr_init(data->res.dir_fattr);
3032 } 3049 }
3033 return data; 3050 return data;
3034} 3051}
@@ -3039,7 +3056,6 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_
3039 &data->arg.seq_args, &data->res.seq_res, 1); 3056 &data->arg.seq_args, &data->res.seq_res, 1);
3040 if (status == 0) { 3057 if (status == 0) {
3041 update_changeattr(dir, &data->res.dir_cinfo); 3058 update_changeattr(dir, &data->res.dir_cinfo);
3042 nfs_post_op_update_inode(dir, data->res.dir_fattr);
3043 status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); 3059 status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
3044 } 3060 }
3045 return status; 3061 return status;
@@ -3335,12 +3351,12 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
3335 3351
3336void __nfs4_read_done_cb(struct nfs_read_data *data) 3352void __nfs4_read_done_cb(struct nfs_read_data *data)
3337{ 3353{
3338 nfs_invalidate_atime(data->inode); 3354 nfs_invalidate_atime(data->header->inode);
3339} 3355}
3340 3356
3341static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) 3357static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
3342{ 3358{
3343 struct nfs_server *server = NFS_SERVER(data->inode); 3359 struct nfs_server *server = NFS_SERVER(data->header->inode);
3344 3360
3345 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { 3361 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
3346 rpc_restart_call_prepare(task); 3362 rpc_restart_call_prepare(task);
@@ -3375,7 +3391,7 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message
3375 3391
3376static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) 3392static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
3377{ 3393{
3378 if (nfs4_setup_sequence(NFS_SERVER(data->inode), 3394 if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
3379 &data->args.seq_args, 3395 &data->args.seq_args,
3380 &data->res.seq_res, 3396 &data->res.seq_res,
3381 task)) 3397 task))
@@ -3383,25 +3399,9 @@ static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_da
3383 rpc_call_start(task); 3399 rpc_call_start(task);
3384} 3400}
3385 3401
3386/* Reset the the nfs_read_data to send the read to the MDS. */
3387void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data)
3388{
3389 dprintk("%s Reset task for i/o through\n", __func__);
3390 put_lseg(data->lseg);
3391 data->lseg = NULL;
3392 /* offsets will differ in the dense stripe case */
3393 data->args.offset = data->mds_offset;
3394 data->ds_clp = NULL;
3395 data->args.fh = NFS_FH(data->inode);
3396 data->read_done_cb = nfs4_read_done_cb;
3397 task->tk_ops = data->mds_ops;
3398 rpc_task_reset_client(task, NFS_CLIENT(data->inode));
3399}
3400EXPORT_SYMBOL_GPL(nfs4_reset_read);
3401
3402static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data) 3402static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data)
3403{ 3403{
3404 struct inode *inode = data->inode; 3404 struct inode *inode = data->header->inode;
3405 3405
3406 if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { 3406 if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
3407 rpc_restart_call_prepare(task); 3407 rpc_restart_call_prepare(task);
@@ -3409,7 +3409,7 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data
3409 } 3409 }
3410 if (task->tk_status >= 0) { 3410 if (task->tk_status >= 0) {
3411 renew_lease(NFS_SERVER(inode), data->timestamp); 3411 renew_lease(NFS_SERVER(inode), data->timestamp);
3412 nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); 3412 nfs_post_op_update_inode_force_wcc(inode, &data->fattr);
3413 } 3413 }
3414 return 0; 3414 return 0;
3415} 3415}
@@ -3422,32 +3422,30 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
3422 nfs4_write_done_cb(task, data); 3422 nfs4_write_done_cb(task, data);
3423} 3423}
3424 3424
3425/* Reset the the nfs_write_data to send the write to the MDS. */ 3425static
3426void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data) 3426bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data)
3427{ 3427{
3428 dprintk("%s Reset task for i/o through\n", __func__); 3428 const struct nfs_pgio_header *hdr = data->header;
3429 put_lseg(data->lseg); 3429
3430 data->lseg = NULL; 3430 /* Don't request attributes for pNFS or O_DIRECT writes */
3431 data->ds_clp = NULL; 3431 if (data->ds_clp != NULL || hdr->dreq != NULL)
3432 data->write_done_cb = nfs4_write_done_cb; 3432 return false;
3433 data->args.fh = NFS_FH(data->inode); 3433 /* Otherwise, request attributes if and only if we don't hold
3434 data->args.bitmask = data->res.server->cache_consistency_bitmask; 3434 * a delegation
3435 data->args.offset = data->mds_offset; 3435 */
3436 data->res.fattr = &data->fattr; 3436 return nfs_have_delegation(hdr->inode, FMODE_READ) == 0;
3437 task->tk_ops = data->mds_ops;
3438 rpc_task_reset_client(task, NFS_CLIENT(data->inode));
3439} 3437}
3440EXPORT_SYMBOL_GPL(nfs4_reset_write);
3441 3438
3442static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) 3439static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
3443{ 3440{
3444 struct nfs_server *server = NFS_SERVER(data->inode); 3441 struct nfs_server *server = NFS_SERVER(data->header->inode);
3445 3442
3446 if (data->lseg) { 3443 if (!nfs4_write_need_cache_consistency_data(data)) {
3447 data->args.bitmask = NULL; 3444 data->args.bitmask = NULL;
3448 data->res.fattr = NULL; 3445 data->res.fattr = NULL;
3449 } else 3446 } else
3450 data->args.bitmask = server->cache_consistency_bitmask; 3447 data->args.bitmask = server->cache_consistency_bitmask;
3448
3451 if (!data->write_done_cb) 3449 if (!data->write_done_cb)
3452 data->write_done_cb = nfs4_write_done_cb; 3450 data->write_done_cb = nfs4_write_done_cb;
3453 data->res.server = server; 3451 data->res.server = server;
@@ -3459,6 +3457,16 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
3459 3457
3460static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) 3458static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
3461{ 3459{
3460 if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
3461 &data->args.seq_args,
3462 &data->res.seq_res,
3463 task))
3464 return;
3465 rpc_call_start(task);
3466}
3467
3468static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
3469{
3462 if (nfs4_setup_sequence(NFS_SERVER(data->inode), 3470 if (nfs4_setup_sequence(NFS_SERVER(data->inode),
3463 &data->args.seq_args, 3471 &data->args.seq_args,
3464 &data->res.seq_res, 3472 &data->res.seq_res,
@@ -3467,7 +3475,7 @@ static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_
3467 rpc_call_start(task); 3475 rpc_call_start(task);
3468} 3476}
3469 3477
3470static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data) 3478static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_commit_data *data)
3471{ 3479{
3472 struct inode *inode = data->inode; 3480 struct inode *inode = data->inode;
3473 3481
@@ -3475,28 +3483,22 @@ static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *dat
3475 rpc_restart_call_prepare(task); 3483 rpc_restart_call_prepare(task);
3476 return -EAGAIN; 3484 return -EAGAIN;
3477 } 3485 }
3478 nfs_refresh_inode(inode, data->res.fattr);
3479 return 0; 3486 return 0;
3480} 3487}
3481 3488
3482static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) 3489static int nfs4_commit_done(struct rpc_task *task, struct nfs_commit_data *data)
3483{ 3490{
3484 if (!nfs4_sequence_done(task, &data->res.seq_res)) 3491 if (!nfs4_sequence_done(task, &data->res.seq_res))
3485 return -EAGAIN; 3492 return -EAGAIN;
3486 return data->write_done_cb(task, data); 3493 return data->commit_done_cb(task, data);
3487} 3494}
3488 3495
3489static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) 3496static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
3490{ 3497{
3491 struct nfs_server *server = NFS_SERVER(data->inode); 3498 struct nfs_server *server = NFS_SERVER(data->inode);
3492 3499
3493 if (data->lseg) { 3500 if (data->commit_done_cb == NULL)
3494 data->args.bitmask = NULL; 3501 data->commit_done_cb = nfs4_commit_done_cb;
3495 data->res.fattr = NULL;
3496 } else
3497 data->args.bitmask = server->cache_consistency_bitmask;
3498 if (!data->write_done_cb)
3499 data->write_done_cb = nfs4_commit_done_cb;
3500 data->res.server = server; 3502 data->res.server = server;
3501 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; 3503 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
3502 nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); 3504 nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
@@ -3905,7 +3907,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3905 case -NFS4ERR_SEQ_MISORDERED: 3907 case -NFS4ERR_SEQ_MISORDERED:
3906 dprintk("%s ERROR %d, Reset session\n", __func__, 3908 dprintk("%s ERROR %d, Reset session\n", __func__,
3907 task->tk_status); 3909 task->tk_status);
3908 nfs4_schedule_session_recovery(clp->cl_session); 3910 nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
3909 task->tk_status = 0; 3911 task->tk_status = 0;
3910 return -EAGAIN; 3912 return -EAGAIN;
3911#endif /* CONFIG_NFS_V4_1 */ 3913#endif /* CONFIG_NFS_V4_1 */
@@ -3931,13 +3933,21 @@ wait_on_recovery:
3931 return -EAGAIN; 3933 return -EAGAIN;
3932} 3934}
3933 3935
3934static void nfs4_construct_boot_verifier(struct nfs_client *clp, 3936static void nfs4_init_boot_verifier(const struct nfs_client *clp,
3935 nfs4_verifier *bootverf) 3937 nfs4_verifier *bootverf)
3936{ 3938{
3937 __be32 verf[2]; 3939 __be32 verf[2];
3938 3940
3939 verf[0] = htonl((u32)clp->cl_boot_time.tv_sec); 3941 if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) {
3940 verf[1] = htonl((u32)clp->cl_boot_time.tv_nsec); 3942 /* An impossible timestamp guarantees this value
3943 * will never match a generated boot time. */
3944 verf[0] = 0;
3945 verf[1] = (__be32)(NSEC_PER_SEC + 1);
3946 } else {
3947 struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
3948 verf[0] = (__be32)nn->boot_time.tv_sec;
3949 verf[1] = (__be32)nn->boot_time.tv_nsec;
3950 }
3941 memcpy(bootverf->data, verf, sizeof(bootverf->data)); 3951 memcpy(bootverf->data, verf, sizeof(bootverf->data));
3942} 3952}
3943 3953
@@ -3960,7 +3970,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
3960 int loop = 0; 3970 int loop = 0;
3961 int status; 3971 int status;
3962 3972
3963 nfs4_construct_boot_verifier(clp, &sc_verifier); 3973 nfs4_init_boot_verifier(clp, &sc_verifier);
3964 3974
3965 for(;;) { 3975 for(;;) {
3966 rcu_read_lock(); 3976 rcu_read_lock();
@@ -4104,7 +4114,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
4104 nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); 4114 nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
4105 data->args.fhandle = &data->fh; 4115 data->args.fhandle = &data->fh;
4106 data->args.stateid = &data->stateid; 4116 data->args.stateid = &data->stateid;
4107 data->args.bitmask = server->attr_bitmask; 4117 data->args.bitmask = server->cache_consistency_bitmask;
4108 nfs_copy_fh(&data->fh, NFS_FH(inode)); 4118 nfs_copy_fh(&data->fh, NFS_FH(inode));
4109 nfs4_stateid_copy(&data->stateid, stateid); 4119 nfs4_stateid_copy(&data->stateid, stateid);
4110 data->res.fattr = &data->fattr; 4120 data->res.fattr = &data->fattr;
@@ -4125,9 +4135,10 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
4125 if (status != 0) 4135 if (status != 0)
4126 goto out; 4136 goto out;
4127 status = data->rpc_status; 4137 status = data->rpc_status;
4128 if (status != 0) 4138 if (status == 0)
4129 goto out; 4139 nfs_post_op_update_inode_force_wcc(inode, &data->fattr);
4130 nfs_refresh_inode(inode, &data->fattr); 4140 else
4141 nfs_refresh_inode(inode, &data->fattr);
4131out: 4142out:
4132 rpc_put_task(task); 4143 rpc_put_task(task);
4133 return status; 4144 return status;
@@ -4837,7 +4848,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4837 case -NFS4ERR_BAD_HIGH_SLOT: 4848 case -NFS4ERR_BAD_HIGH_SLOT:
4838 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 4849 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
4839 case -NFS4ERR_DEADSESSION: 4850 case -NFS4ERR_DEADSESSION:
4840 nfs4_schedule_session_recovery(server->nfs_client->cl_session); 4851 nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
4841 goto out; 4852 goto out;
4842 case -ERESTARTSYS: 4853 case -ERESTARTSYS:
4843 /* 4854 /*
@@ -5079,7 +5090,8 @@ out_inval:
5079} 5090}
5080 5091
5081static bool 5092static bool
5082nfs41_same_server_scope(struct server_scope *a, struct server_scope *b) 5093nfs41_same_server_scope(struct nfs41_server_scope *a,
5094 struct nfs41_server_scope *b)
5083{ 5095{
5084 if (a->server_scope_sz == b->server_scope_sz && 5096 if (a->server_scope_sz == b->server_scope_sz &&
5085 memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0) 5097 memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0)
@@ -5089,6 +5101,61 @@ nfs41_same_server_scope(struct server_scope *a, struct server_scope *b)
5089} 5101}
5090 5102
5091/* 5103/*
5104 * nfs4_proc_bind_conn_to_session()
5105 *
5106 * The 4.1 client currently uses the same TCP connection for the
5107 * fore and backchannel.
5108 */
5109int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, struct rpc_cred *cred)
5110{
5111 int status;
5112 struct nfs41_bind_conn_to_session_res res;
5113 struct rpc_message msg = {
5114 .rpc_proc =
5115 &nfs4_procedures[NFSPROC4_CLNT_BIND_CONN_TO_SESSION],
5116 .rpc_argp = clp,
5117 .rpc_resp = &res,
5118 .rpc_cred = cred,
5119 };
5120
5121 dprintk("--> %s\n", __func__);
5122 BUG_ON(clp == NULL);
5123
5124 res.session = kzalloc(sizeof(struct nfs4_session), GFP_NOFS);
5125 if (unlikely(res.session == NULL)) {
5126 status = -ENOMEM;
5127 goto out;
5128 }
5129
5130 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
5131 if (status == 0) {
5132 if (memcmp(res.session->sess_id.data,
5133 clp->cl_session->sess_id.data, NFS4_MAX_SESSIONID_LEN)) {
5134 dprintk("NFS: %s: Session ID mismatch\n", __func__);
5135 status = -EIO;
5136 goto out_session;
5137 }
5138 if (res.dir != NFS4_CDFS4_BOTH) {
5139 dprintk("NFS: %s: Unexpected direction from server\n",
5140 __func__);
5141 status = -EIO;
5142 goto out_session;
5143 }
5144 if (res.use_conn_in_rdma_mode) {
5145 dprintk("NFS: %s: Server returned RDMA mode = true\n",
5146 __func__);
5147 status = -EIO;
5148 goto out_session;
5149 }
5150 }
5151out_session:
5152 kfree(res.session);
5153out:
5154 dprintk("<-- %s status= %d\n", __func__, status);
5155 return status;
5156}
5157
5158/*
5092 * nfs4_proc_exchange_id() 5159 * nfs4_proc_exchange_id()
5093 * 5160 *
5094 * Since the clientid has expired, all compounds using sessions 5161 * Since the clientid has expired, all compounds using sessions
@@ -5105,7 +5172,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
5105 .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER, 5172 .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER,
5106 }; 5173 };
5107 struct nfs41_exchange_id_res res = { 5174 struct nfs41_exchange_id_res res = {
5108 .client = clp, 5175 0
5109 }; 5176 };
5110 int status; 5177 int status;
5111 struct rpc_message msg = { 5178 struct rpc_message msg = {
@@ -5118,7 +5185,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
5118 dprintk("--> %s\n", __func__); 5185 dprintk("--> %s\n", __func__);
5119 BUG_ON(clp == NULL); 5186 BUG_ON(clp == NULL);
5120 5187
5121 nfs4_construct_boot_verifier(clp, &verifier); 5188 nfs4_init_boot_verifier(clp, &verifier);
5122 5189
5123 args.id_len = scnprintf(args.id, sizeof(args.id), 5190 args.id_len = scnprintf(args.id, sizeof(args.id),
5124 "%s/%s/%u", 5191 "%s/%s/%u",
@@ -5126,59 +5193,135 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
5126 clp->cl_rpcclient->cl_nodename, 5193 clp->cl_rpcclient->cl_nodename,
5127 clp->cl_rpcclient->cl_auth->au_flavor); 5194 clp->cl_rpcclient->cl_auth->au_flavor);
5128 5195
5129 res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL); 5196 res.server_owner = kzalloc(sizeof(struct nfs41_server_owner),
5130 if (unlikely(!res.server_scope)) { 5197 GFP_NOFS);
5198 if (unlikely(res.server_owner == NULL)) {
5131 status = -ENOMEM; 5199 status = -ENOMEM;
5132 goto out; 5200 goto out;
5133 } 5201 }
5134 5202
5135 res.impl_id = kzalloc(sizeof(struct nfs41_impl_id), GFP_KERNEL); 5203 res.server_scope = kzalloc(sizeof(struct nfs41_server_scope),
5136 if (unlikely(!res.impl_id)) { 5204 GFP_NOFS);
5205 if (unlikely(res.server_scope == NULL)) {
5206 status = -ENOMEM;
5207 goto out_server_owner;
5208 }
5209
5210 res.impl_id = kzalloc(sizeof(struct nfs41_impl_id), GFP_NOFS);
5211 if (unlikely(res.impl_id == NULL)) {
5137 status = -ENOMEM; 5212 status = -ENOMEM;
5138 goto out_server_scope; 5213 goto out_server_scope;
5139 } 5214 }
5140 5215
5141 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); 5216 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
5142 if (!status) 5217 if (status == 0)
5143 status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); 5218 status = nfs4_check_cl_exchange_flags(res.flags);
5219
5220 if (status == 0) {
5221 clp->cl_clientid = res.clientid;
5222 clp->cl_exchange_flags = (res.flags & ~EXCHGID4_FLAG_CONFIRMED_R);
5223 if (!(res.flags & EXCHGID4_FLAG_CONFIRMED_R))
5224 clp->cl_seqid = res.seqid;
5225
5226 kfree(clp->cl_serverowner);
5227 clp->cl_serverowner = res.server_owner;
5228 res.server_owner = NULL;
5144 5229
5145 if (!status) {
5146 /* use the most recent implementation id */ 5230 /* use the most recent implementation id */
5147 kfree(clp->impl_id); 5231 kfree(clp->cl_implid);
5148 clp->impl_id = res.impl_id; 5232 clp->cl_implid = res.impl_id;
5149 } else
5150 kfree(res.impl_id);
5151 5233
5152 if (!status) { 5234 if (clp->cl_serverscope != NULL &&
5153 if (clp->server_scope && 5235 !nfs41_same_server_scope(clp->cl_serverscope,
5154 !nfs41_same_server_scope(clp->server_scope,
5155 res.server_scope)) { 5236 res.server_scope)) {
5156 dprintk("%s: server_scope mismatch detected\n", 5237 dprintk("%s: server_scope mismatch detected\n",
5157 __func__); 5238 __func__);
5158 set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state); 5239 set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state);
5159 kfree(clp->server_scope); 5240 kfree(clp->cl_serverscope);
5160 clp->server_scope = NULL; 5241 clp->cl_serverscope = NULL;
5161 } 5242 }
5162 5243
5163 if (!clp->server_scope) { 5244 if (clp->cl_serverscope == NULL) {
5164 clp->server_scope = res.server_scope; 5245 clp->cl_serverscope = res.server_scope;
5165 goto out; 5246 goto out;
5166 } 5247 }
5167 } 5248 } else
5249 kfree(res.impl_id);
5168 5250
5251out_server_owner:
5252 kfree(res.server_owner);
5169out_server_scope: 5253out_server_scope:
5170 kfree(res.server_scope); 5254 kfree(res.server_scope);
5171out: 5255out:
5172 if (clp->impl_id) 5256 if (clp->cl_implid != NULL)
5173 dprintk("%s: Server Implementation ID: " 5257 dprintk("%s: Server Implementation ID: "
5174 "domain: %s, name: %s, date: %llu,%u\n", 5258 "domain: %s, name: %s, date: %llu,%u\n",
5175 __func__, clp->impl_id->domain, clp->impl_id->name, 5259 __func__, clp->cl_implid->domain, clp->cl_implid->name,
5176 clp->impl_id->date.seconds, 5260 clp->cl_implid->date.seconds,
5177 clp->impl_id->date.nseconds); 5261 clp->cl_implid->date.nseconds);
5178 dprintk("<-- %s status= %d\n", __func__, status); 5262 dprintk("<-- %s status= %d\n", __func__, status);
5179 return status; 5263 return status;
5180} 5264}
5181 5265
5266static int _nfs4_proc_destroy_clientid(struct nfs_client *clp,
5267 struct rpc_cred *cred)
5268{
5269 struct rpc_message msg = {
5270 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_CLIENTID],
5271 .rpc_argp = clp,
5272 .rpc_cred = cred,
5273 };
5274 int status;
5275
5276 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
5277 if (status)
5278 pr_warn("NFS: Got error %d from the server %s on "
5279 "DESTROY_CLIENTID.", status, clp->cl_hostname);
5280 return status;
5281}
5282
5283static int nfs4_proc_destroy_clientid(struct nfs_client *clp,
5284 struct rpc_cred *cred)
5285{
5286 unsigned int loop;
5287 int ret;
5288
5289 for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) {
5290 ret = _nfs4_proc_destroy_clientid(clp, cred);
5291 switch (ret) {
5292 case -NFS4ERR_DELAY:
5293 case -NFS4ERR_CLIENTID_BUSY:
5294 ssleep(1);
5295 break;
5296 default:
5297 return ret;
5298 }
5299 }
5300 return 0;
5301}
5302
5303int nfs4_destroy_clientid(struct nfs_client *clp)
5304{
5305 struct rpc_cred *cred;
5306 int ret = 0;
5307
5308 if (clp->cl_mvops->minor_version < 1)
5309 goto out;
5310 if (clp->cl_exchange_flags == 0)
5311 goto out;
5312 cred = nfs4_get_exchange_id_cred(clp);
5313 ret = nfs4_proc_destroy_clientid(clp, cred);
5314 if (cred)
5315 put_rpccred(cred);
5316 switch (ret) {
5317 case 0:
5318 case -NFS4ERR_STALE_CLIENTID:
5319 clp->cl_exchange_flags = 0;
5320 }
5321out:
5322 return ret;
5323}
5324
5182struct nfs4_get_lease_time_data { 5325struct nfs4_get_lease_time_data {
5183 struct nfs4_get_lease_time_args *args; 5326 struct nfs4_get_lease_time_args *args;
5184 struct nfs4_get_lease_time_res *res; 5327 struct nfs4_get_lease_time_res *res;
@@ -5399,8 +5542,12 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
5399void nfs4_destroy_session(struct nfs4_session *session) 5542void nfs4_destroy_session(struct nfs4_session *session)
5400{ 5543{
5401 struct rpc_xprt *xprt; 5544 struct rpc_xprt *xprt;
5545 struct rpc_cred *cred;
5402 5546
5403 nfs4_proc_destroy_session(session); 5547 cred = nfs4_get_exchange_id_cred(session->clp);
5548 nfs4_proc_destroy_session(session, cred);
5549 if (cred)
5550 put_rpccred(cred);
5404 5551
5405 rcu_read_lock(); 5552 rcu_read_lock();
5406 xprt = rcu_dereference(session->clp->cl_rpcclient->cl_xprt); 5553 xprt = rcu_dereference(session->clp->cl_rpcclient->cl_xprt);
@@ -5510,7 +5657,8 @@ static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args,
5510 return nfs4_verify_back_channel_attrs(args, session); 5657 return nfs4_verify_back_channel_attrs(args, session);
5511} 5658}
5512 5659
5513static int _nfs4_proc_create_session(struct nfs_client *clp) 5660static int _nfs4_proc_create_session(struct nfs_client *clp,
5661 struct rpc_cred *cred)
5514{ 5662{
5515 struct nfs4_session *session = clp->cl_session; 5663 struct nfs4_session *session = clp->cl_session;
5516 struct nfs41_create_session_args args = { 5664 struct nfs41_create_session_args args = {
@@ -5524,6 +5672,7 @@ static int _nfs4_proc_create_session(struct nfs_client *clp)
5524 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE_SESSION], 5672 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE_SESSION],
5525 .rpc_argp = &args, 5673 .rpc_argp = &args,
5526 .rpc_resp = &res, 5674 .rpc_resp = &res,
5675 .rpc_cred = cred,
5527 }; 5676 };
5528 int status; 5677 int status;
5529 5678
@@ -5548,7 +5697,7 @@ static int _nfs4_proc_create_session(struct nfs_client *clp)
5548 * It is the responsibility of the caller to verify the session is 5697 * It is the responsibility of the caller to verify the session is
5549 * expired before calling this routine. 5698 * expired before calling this routine.
5550 */ 5699 */
5551int nfs4_proc_create_session(struct nfs_client *clp) 5700int nfs4_proc_create_session(struct nfs_client *clp, struct rpc_cred *cred)
5552{ 5701{
5553 int status; 5702 int status;
5554 unsigned *ptr; 5703 unsigned *ptr;
@@ -5556,7 +5705,7 @@ int nfs4_proc_create_session(struct nfs_client *clp)
5556 5705
5557 dprintk("--> %s clp=%p session=%p\n", __func__, clp, session); 5706 dprintk("--> %s clp=%p session=%p\n", __func__, clp, session);
5558 5707
5559 status = _nfs4_proc_create_session(clp); 5708 status = _nfs4_proc_create_session(clp, cred);
5560 if (status) 5709 if (status)
5561 goto out; 5710 goto out;
5562 5711
@@ -5578,10 +5727,15 @@ out:
5578 * Issue the over-the-wire RPC DESTROY_SESSION. 5727 * Issue the over-the-wire RPC DESTROY_SESSION.
5579 * The caller must serialize access to this routine. 5728 * The caller must serialize access to this routine.
5580 */ 5729 */
5581int nfs4_proc_destroy_session(struct nfs4_session *session) 5730int nfs4_proc_destroy_session(struct nfs4_session *session,
5731 struct rpc_cred *cred)
5582{ 5732{
5733 struct rpc_message msg = {
5734 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_SESSION],
5735 .rpc_argp = session,
5736 .rpc_cred = cred,
5737 };
5583 int status = 0; 5738 int status = 0;
5584 struct rpc_message msg;
5585 5739
5586 dprintk("--> nfs4_proc_destroy_session\n"); 5740 dprintk("--> nfs4_proc_destroy_session\n");
5587 5741
@@ -5589,10 +5743,6 @@ int nfs4_proc_destroy_session(struct nfs4_session *session)
5589 if (session->clp->cl_cons_state != NFS_CS_READY) 5743 if (session->clp->cl_cons_state != NFS_CS_READY)
5590 return status; 5744 return status;
5591 5745
5592 msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_SESSION];
5593 msg.rpc_argp = session;
5594 msg.rpc_resp = NULL;
5595 msg.rpc_cred = NULL;
5596 status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); 5746 status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
5597 5747
5598 if (status) 5748 if (status)
@@ -5604,53 +5754,79 @@ int nfs4_proc_destroy_session(struct nfs4_session *session)
5604 return status; 5754 return status;
5605} 5755}
5606 5756
5757/*
5758 * With sessions, the client is not marked ready until after a
5759 * successful EXCHANGE_ID and CREATE_SESSION.
5760 *
5761 * Map errors cl_cons_state errors to EPROTONOSUPPORT to indicate
5762 * other versions of NFS can be tried.
5763 */
5764static int nfs41_check_session_ready(struct nfs_client *clp)
5765{
5766 int ret;
5767
5768 if (clp->cl_cons_state == NFS_CS_SESSION_INITING) {
5769 ret = nfs4_client_recover_expired_lease(clp);
5770 if (ret)
5771 return ret;
5772 }
5773 if (clp->cl_cons_state < NFS_CS_READY)
5774 return -EPROTONOSUPPORT;
5775 smp_rmb();
5776 return 0;
5777}
5778
5607int nfs4_init_session(struct nfs_server *server) 5779int nfs4_init_session(struct nfs_server *server)
5608{ 5780{
5609 struct nfs_client *clp = server->nfs_client; 5781 struct nfs_client *clp = server->nfs_client;
5610 struct nfs4_session *session; 5782 struct nfs4_session *session;
5611 unsigned int rsize, wsize; 5783 unsigned int rsize, wsize;
5612 int ret;
5613 5784
5614 if (!nfs4_has_session(clp)) 5785 if (!nfs4_has_session(clp))
5615 return 0; 5786 return 0;
5616 5787
5617 session = clp->cl_session; 5788 session = clp->cl_session;
5618 if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) 5789 spin_lock(&clp->cl_lock);
5619 return 0; 5790 if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) {
5620 5791
5621 rsize = server->rsize; 5792 rsize = server->rsize;
5622 if (rsize == 0) 5793 if (rsize == 0)
5623 rsize = NFS_MAX_FILE_IO_SIZE; 5794 rsize = NFS_MAX_FILE_IO_SIZE;
5624 wsize = server->wsize; 5795 wsize = server->wsize;
5625 if (wsize == 0) 5796 if (wsize == 0)
5626 wsize = NFS_MAX_FILE_IO_SIZE; 5797 wsize = NFS_MAX_FILE_IO_SIZE;
5627 5798
5628 session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead; 5799 session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead;
5629 session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead; 5800 session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead;
5801 }
5802 spin_unlock(&clp->cl_lock);
5630 5803
5631 ret = nfs4_recover_expired_lease(server); 5804 return nfs41_check_session_ready(clp);
5632 if (!ret)
5633 ret = nfs4_check_client_ready(clp);
5634 return ret;
5635} 5805}
5636 5806
5637int nfs4_init_ds_session(struct nfs_client *clp) 5807int nfs4_init_ds_session(struct nfs_client *clp, unsigned long lease_time)
5638{ 5808{
5639 struct nfs4_session *session = clp->cl_session; 5809 struct nfs4_session *session = clp->cl_session;
5640 int ret; 5810 int ret;
5641 5811
5642 if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) 5812 spin_lock(&clp->cl_lock);
5643 return 0; 5813 if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) {
5644 5814 /*
5645 ret = nfs4_client_recover_expired_lease(clp); 5815 * Do not set NFS_CS_CHECK_LEASE_TIME instead set the
5646 if (!ret) 5816 * DS lease to be equal to the MDS lease.
5647 /* Test for the DS role */ 5817 */
5648 if (!is_ds_client(clp)) 5818 clp->cl_lease_time = lease_time;
5649 ret = -ENODEV; 5819 clp->cl_last_renewal = jiffies;
5650 if (!ret) 5820 }
5651 ret = nfs4_check_client_ready(clp); 5821 spin_unlock(&clp->cl_lock);
5652 return ret;
5653 5822
5823 ret = nfs41_check_session_ready(clp);
5824 if (ret)
5825 return ret;
5826 /* Test for the DS role */
5827 if (!is_ds_client(clp))
5828 return -ENODEV;
5829 return 0;
5654} 5830}
5655EXPORT_SYMBOL_GPL(nfs4_init_ds_session); 5831EXPORT_SYMBOL_GPL(nfs4_init_ds_session);
5656 5832
@@ -6557,6 +6733,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
6557 .file_inode_ops = &nfs4_file_inode_operations, 6733 .file_inode_ops = &nfs4_file_inode_operations,
6558 .file_ops = &nfs4_file_operations, 6734 .file_ops = &nfs4_file_operations,
6559 .getroot = nfs4_proc_get_root, 6735 .getroot = nfs4_proc_get_root,
6736 .submount = nfs4_submount,
6560 .getattr = nfs4_proc_getattr, 6737 .getattr = nfs4_proc_getattr,
6561 .setattr = nfs4_proc_setattr, 6738 .setattr = nfs4_proc_setattr,
6562 .lookup = nfs4_proc_lookup, 6739 .lookup = nfs4_proc_lookup,
@@ -6589,13 +6766,13 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
6589 .write_rpc_prepare = nfs4_proc_write_rpc_prepare, 6766 .write_rpc_prepare = nfs4_proc_write_rpc_prepare,
6590 .write_done = nfs4_write_done, 6767 .write_done = nfs4_write_done,
6591 .commit_setup = nfs4_proc_commit_setup, 6768 .commit_setup = nfs4_proc_commit_setup,
6769 .commit_rpc_prepare = nfs4_proc_commit_rpc_prepare,
6592 .commit_done = nfs4_commit_done, 6770 .commit_done = nfs4_commit_done,
6593 .lock = nfs4_proc_lock, 6771 .lock = nfs4_proc_lock,
6594 .clear_acl_cache = nfs4_zap_acl_attr, 6772 .clear_acl_cache = nfs4_zap_acl_attr,
6595 .close_context = nfs4_close_context, 6773 .close_context = nfs4_close_context,
6596 .open_context = nfs4_atomic_open, 6774 .open_context = nfs4_atomic_open,
6597 .init_client = nfs4_init_client, 6775 .init_client = nfs4_init_client,
6598 .secinfo = nfs4_proc_secinfo,
6599}; 6776};
6600 6777
6601static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { 6778static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index dc484c0eae7f..6930bec91bca 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -49,7 +49,7 @@
49#include "nfs4_fs.h" 49#include "nfs4_fs.h"
50#include "delegation.h" 50#include "delegation.h"
51 51
52#define NFSDBG_FACILITY NFSDBG_PROC 52#define NFSDBG_FACILITY NFSDBG_STATE
53 53
54void 54void
55nfs4_renew_state(struct work_struct *work) 55nfs4_renew_state(struct work_struct *work)
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 7f0fcfc1fe9d..c679b9ecef63 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -57,6 +57,8 @@
57#include "internal.h" 57#include "internal.h"
58#include "pnfs.h" 58#include "pnfs.h"
59 59
60#define NFSDBG_FACILITY NFSDBG_STATE
61
60#define OPENOWNER_POOL_SIZE 8 62#define OPENOWNER_POOL_SIZE 8
61 63
62const nfs4_stateid zero_stateid; 64const nfs4_stateid zero_stateid;
@@ -254,7 +256,7 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
254 goto out; 256 goto out;
255 set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); 257 set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
256do_confirm: 258do_confirm:
257 status = nfs4_proc_create_session(clp); 259 status = nfs4_proc_create_session(clp, cred);
258 if (status != 0) 260 if (status != 0)
259 goto out; 261 goto out;
260 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); 262 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
@@ -1106,6 +1108,8 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp)
1106 return; 1108 return;
1107 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) 1109 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
1108 set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); 1110 set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
1111 dprintk("%s: scheduling lease recovery for server %s\n", __func__,
1112 clp->cl_hostname);
1109 nfs4_schedule_state_manager(clp); 1113 nfs4_schedule_state_manager(clp);
1110} 1114}
1111EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery); 1115EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery);
@@ -1122,6 +1126,8 @@ static void nfs40_handle_cb_pathdown(struct nfs_client *clp)
1122{ 1126{
1123 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); 1127 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1124 nfs_expire_all_delegations(clp); 1128 nfs_expire_all_delegations(clp);
1129 dprintk("%s: handling CB_PATHDOWN recovery for server %s\n", __func__,
1130 clp->cl_hostname);
1125} 1131}
1126 1132
1127void nfs4_schedule_path_down_recovery(struct nfs_client *clp) 1133void nfs4_schedule_path_down_recovery(struct nfs_client *clp)
@@ -1158,6 +1164,8 @@ void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4
1158 struct nfs_client *clp = server->nfs_client; 1164 struct nfs_client *clp = server->nfs_client;
1159 1165
1160 nfs4_state_mark_reclaim_nograce(clp, state); 1166 nfs4_state_mark_reclaim_nograce(clp, state);
1167 dprintk("%s: scheduling stateid recovery for server %s\n", __func__,
1168 clp->cl_hostname);
1161 nfs4_schedule_state_manager(clp); 1169 nfs4_schedule_state_manager(clp);
1162} 1170}
1163EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery); 1171EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery);
@@ -1491,19 +1499,25 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1491 case -NFS4ERR_BADSLOT: 1499 case -NFS4ERR_BADSLOT:
1492 case -NFS4ERR_BAD_HIGH_SLOT: 1500 case -NFS4ERR_BAD_HIGH_SLOT:
1493 case -NFS4ERR_DEADSESSION: 1501 case -NFS4ERR_DEADSESSION:
1494 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1495 case -NFS4ERR_SEQ_FALSE_RETRY: 1502 case -NFS4ERR_SEQ_FALSE_RETRY:
1496 case -NFS4ERR_SEQ_MISORDERED: 1503 case -NFS4ERR_SEQ_MISORDERED:
1497 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); 1504 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
1498 /* Zero session reset errors */ 1505 /* Zero session reset errors */
1499 break; 1506 break;
1507 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1508 set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
1509 break;
1500 case -EKEYEXPIRED: 1510 case -EKEYEXPIRED:
1501 /* Nothing we can do */ 1511 /* Nothing we can do */
1502 nfs4_warn_keyexpired(clp->cl_hostname); 1512 nfs4_warn_keyexpired(clp->cl_hostname);
1503 break; 1513 break;
1504 default: 1514 default:
1515 dprintk("%s: failed to handle error %d for server %s\n",
1516 __func__, error, clp->cl_hostname);
1505 return error; 1517 return error;
1506 } 1518 }
1519 dprintk("%s: handled error %d for server %s\n", __func__, error,
1520 clp->cl_hostname);
1507 return 0; 1521 return 0;
1508} 1522}
1509 1523
@@ -1572,34 +1586,82 @@ out:
1572 return nfs4_recovery_handle_error(clp, status); 1586 return nfs4_recovery_handle_error(clp, status);
1573} 1587}
1574 1588
1589/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors
1590 * on EXCHANGE_ID for v4.1
1591 */
1592static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status)
1593{
1594 switch (status) {
1595 case -NFS4ERR_SEQ_MISORDERED:
1596 if (test_and_set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state))
1597 return -ESERVERFAULT;
1598 /* Lease confirmation error: retry after purging the lease */
1599 ssleep(1);
1600 case -NFS4ERR_CLID_INUSE:
1601 case -NFS4ERR_STALE_CLIENTID:
1602 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
1603 break;
1604 case -EACCES:
1605 if (clp->cl_machine_cred == NULL)
1606 return -EACCES;
1607 /* Handle case where the user hasn't set up machine creds */
1608 nfs4_clear_machine_cred(clp);
1609 case -NFS4ERR_DELAY:
1610 case -ETIMEDOUT:
1611 case -EAGAIN:
1612 ssleep(1);
1613 break;
1614
1615 case -NFS4ERR_MINOR_VERS_MISMATCH:
1616 if (clp->cl_cons_state == NFS_CS_SESSION_INITING)
1617 nfs_mark_client_ready(clp, -EPROTONOSUPPORT);
1618 dprintk("%s: exit with error %d for server %s\n",
1619 __func__, -EPROTONOSUPPORT, clp->cl_hostname);
1620 return -EPROTONOSUPPORT;
1621 case -EKEYEXPIRED:
1622 nfs4_warn_keyexpired(clp->cl_hostname);
1623 case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
1624 * in nfs4_exchange_id */
1625 default:
1626 dprintk("%s: exit with error %d for server %s\n", __func__,
1627 status, clp->cl_hostname);
1628 return status;
1629 }
1630 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1631 dprintk("%s: handled error %d for server %s\n", __func__, status,
1632 clp->cl_hostname);
1633 return 0;
1634}
1635
1575static int nfs4_reclaim_lease(struct nfs_client *clp) 1636static int nfs4_reclaim_lease(struct nfs_client *clp)
1576{ 1637{
1577 struct rpc_cred *cred; 1638 struct rpc_cred *cred;
1578 const struct nfs4_state_recovery_ops *ops = 1639 const struct nfs4_state_recovery_ops *ops =
1579 clp->cl_mvops->reboot_recovery_ops; 1640 clp->cl_mvops->reboot_recovery_ops;
1580 int status = -ENOENT; 1641 int status;
1581 1642
1582 cred = ops->get_clid_cred(clp); 1643 cred = ops->get_clid_cred(clp);
1583 if (cred != NULL) { 1644 if (cred == NULL)
1584 status = ops->establish_clid(clp, cred); 1645 return -ENOENT;
1585 put_rpccred(cred); 1646 status = ops->establish_clid(clp, cred);
1586 /* Handle case where the user hasn't set up machine creds */ 1647 put_rpccred(cred);
1587 if (status == -EACCES && cred == clp->cl_machine_cred) { 1648 if (status != 0)
1588 nfs4_clear_machine_cred(clp); 1649 return nfs4_handle_reclaim_lease_error(clp, status);
1589 status = -EAGAIN; 1650 return 0;
1590 }
1591 if (status == -NFS4ERR_MINOR_VERS_MISMATCH)
1592 status = -EPROTONOSUPPORT;
1593 }
1594 return status;
1595} 1651}
1596 1652
1597#ifdef CONFIG_NFS_V4_1 1653#ifdef CONFIG_NFS_V4_1
1598void nfs4_schedule_session_recovery(struct nfs4_session *session) 1654void nfs4_schedule_session_recovery(struct nfs4_session *session, int err)
1599{ 1655{
1600 struct nfs_client *clp = session->clp; 1656 struct nfs_client *clp = session->clp;
1601 1657
1602 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); 1658 switch (err) {
1659 default:
1660 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
1661 break;
1662 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1663 set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
1664 }
1603 nfs4_schedule_lease_recovery(clp); 1665 nfs4_schedule_lease_recovery(clp);
1604} 1666}
1605EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery); 1667EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery);
@@ -1607,14 +1669,19 @@ EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery);
1607void nfs41_handle_recall_slot(struct nfs_client *clp) 1669void nfs41_handle_recall_slot(struct nfs_client *clp)
1608{ 1670{
1609 set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); 1671 set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
1672 dprintk("%s: scheduling slot recall for server %s\n", __func__,
1673 clp->cl_hostname);
1610 nfs4_schedule_state_manager(clp); 1674 nfs4_schedule_state_manager(clp);
1611} 1675}
1612 1676
1613static void nfs4_reset_all_state(struct nfs_client *clp) 1677static void nfs4_reset_all_state(struct nfs_client *clp)
1614{ 1678{
1615 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { 1679 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
1616 clp->cl_boot_time = CURRENT_TIME; 1680 set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
1681 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
1617 nfs4_state_start_reclaim_nograce(clp); 1682 nfs4_state_start_reclaim_nograce(clp);
1683 dprintk("%s: scheduling reset of all state for server %s!\n",
1684 __func__, clp->cl_hostname);
1618 nfs4_schedule_state_manager(clp); 1685 nfs4_schedule_state_manager(clp);
1619 } 1686 }
1620} 1687}
@@ -1623,33 +1690,50 @@ static void nfs41_handle_server_reboot(struct nfs_client *clp)
1623{ 1690{
1624 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { 1691 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
1625 nfs4_state_start_reclaim_reboot(clp); 1692 nfs4_state_start_reclaim_reboot(clp);
1693 dprintk("%s: server %s rebooted!\n", __func__,
1694 clp->cl_hostname);
1626 nfs4_schedule_state_manager(clp); 1695 nfs4_schedule_state_manager(clp);
1627 } 1696 }
1628} 1697}
1629 1698
1630static void nfs41_handle_state_revoked(struct nfs_client *clp) 1699static void nfs41_handle_state_revoked(struct nfs_client *clp)
1631{ 1700{
1632 /* Temporary */
1633 nfs4_reset_all_state(clp); 1701 nfs4_reset_all_state(clp);
1702 dprintk("%s: state revoked on server %s\n", __func__, clp->cl_hostname);
1634} 1703}
1635 1704
1636static void nfs41_handle_recallable_state_revoked(struct nfs_client *clp) 1705static void nfs41_handle_recallable_state_revoked(struct nfs_client *clp)
1637{ 1706{
1638 /* This will need to handle layouts too */ 1707 /* This will need to handle layouts too */
1639 nfs_expire_all_delegations(clp); 1708 nfs_expire_all_delegations(clp);
1709 dprintk("%s: Recallable state revoked on server %s!\n", __func__,
1710 clp->cl_hostname);
1640} 1711}
1641 1712
1642static void nfs41_handle_cb_path_down(struct nfs_client *clp) 1713static void nfs41_handle_backchannel_fault(struct nfs_client *clp)
1643{ 1714{
1644 nfs_expire_all_delegations(clp); 1715 nfs_expire_all_delegations(clp);
1645 if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0) 1716 if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0)
1646 nfs4_schedule_state_manager(clp); 1717 nfs4_schedule_state_manager(clp);
1718 dprintk("%s: server %s declared a backchannel fault\n", __func__,
1719 clp->cl_hostname);
1720}
1721
1722static void nfs41_handle_cb_path_down(struct nfs_client *clp)
1723{
1724 if (test_and_set_bit(NFS4CLNT_BIND_CONN_TO_SESSION,
1725 &clp->cl_state) == 0)
1726 nfs4_schedule_state_manager(clp);
1647} 1727}
1648 1728
1649void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) 1729void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
1650{ 1730{
1651 if (!flags) 1731 if (!flags)
1652 return; 1732 return;
1733
1734 dprintk("%s: \"%s\" (client ID %llx) flags=0x%08x\n",
1735 __func__, clp->cl_hostname, clp->cl_clientid, flags);
1736
1653 if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED) 1737 if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED)
1654 nfs41_handle_server_reboot(clp); 1738 nfs41_handle_server_reboot(clp);
1655 if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED | 1739 if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED |
@@ -1659,18 +1743,21 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
1659 nfs41_handle_state_revoked(clp); 1743 nfs41_handle_state_revoked(clp);
1660 if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED) 1744 if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED)
1661 nfs41_handle_recallable_state_revoked(clp); 1745 nfs41_handle_recallable_state_revoked(clp);
1662 if (flags & (SEQ4_STATUS_CB_PATH_DOWN | 1746 if (flags & SEQ4_STATUS_BACKCHANNEL_FAULT)
1663 SEQ4_STATUS_BACKCHANNEL_FAULT | 1747 nfs41_handle_backchannel_fault(clp);
1664 SEQ4_STATUS_CB_PATH_DOWN_SESSION)) 1748 else if (flags & (SEQ4_STATUS_CB_PATH_DOWN |
1749 SEQ4_STATUS_CB_PATH_DOWN_SESSION))
1665 nfs41_handle_cb_path_down(clp); 1750 nfs41_handle_cb_path_down(clp);
1666} 1751}
1667 1752
1668static int nfs4_reset_session(struct nfs_client *clp) 1753static int nfs4_reset_session(struct nfs_client *clp)
1669{ 1754{
1755 struct rpc_cred *cred;
1670 int status; 1756 int status;
1671 1757
1672 nfs4_begin_drain_session(clp); 1758 nfs4_begin_drain_session(clp);
1673 status = nfs4_proc_destroy_session(clp->cl_session); 1759 cred = nfs4_get_exchange_id_cred(clp);
1760 status = nfs4_proc_destroy_session(clp->cl_session, cred);
1674 if (status && status != -NFS4ERR_BADSESSION && 1761 if (status && status != -NFS4ERR_BADSESSION &&
1675 status != -NFS4ERR_DEADSESSION) { 1762 status != -NFS4ERR_DEADSESSION) {
1676 status = nfs4_recovery_handle_error(clp, status); 1763 status = nfs4_recovery_handle_error(clp, status);
@@ -1678,19 +1765,26 @@ static int nfs4_reset_session(struct nfs_client *clp)
1678 } 1765 }
1679 1766
1680 memset(clp->cl_session->sess_id.data, 0, NFS4_MAX_SESSIONID_LEN); 1767 memset(clp->cl_session->sess_id.data, 0, NFS4_MAX_SESSIONID_LEN);
1681 status = nfs4_proc_create_session(clp); 1768 status = nfs4_proc_create_session(clp, cred);
1682 if (status) { 1769 if (status) {
1683 status = nfs4_recovery_handle_error(clp, status); 1770 dprintk("%s: session reset failed with status %d for server %s!\n",
1771 __func__, status, clp->cl_hostname);
1772 status = nfs4_handle_reclaim_lease_error(clp, status);
1684 goto out; 1773 goto out;
1685 } 1774 }
1686 clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); 1775 clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
1687 /* create_session negotiated new slot table */ 1776 /* create_session negotiated new slot table */
1688 clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); 1777 clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
1778 clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
1779 dprintk("%s: session reset was successful for server %s!\n",
1780 __func__, clp->cl_hostname);
1689 1781
1690 /* Let the state manager reestablish state */ 1782 /* Let the state manager reestablish state */
1691 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) 1783 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
1692 nfs41_setup_state_renewal(clp); 1784 nfs41_setup_state_renewal(clp);
1693out: 1785out:
1786 if (cred)
1787 put_rpccred(cred);
1694 return status; 1788 return status;
1695} 1789}
1696 1790
@@ -1722,37 +1816,41 @@ static int nfs4_recall_slot(struct nfs_client *clp)
1722 return 0; 1816 return 0;
1723} 1817}
1724 1818
1725#else /* CONFIG_NFS_V4_1 */ 1819static int nfs4_bind_conn_to_session(struct nfs_client *clp)
1726static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
1727static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; }
1728static int nfs4_recall_slot(struct nfs_client *clp) { return 0; }
1729#endif /* CONFIG_NFS_V4_1 */
1730
1731/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors
1732 * on EXCHANGE_ID for v4.1
1733 */
1734static void nfs4_set_lease_expired(struct nfs_client *clp, int status)
1735{ 1820{
1736 switch (status) { 1821 struct rpc_cred *cred;
1737 case -NFS4ERR_CLID_INUSE: 1822 int ret;
1738 case -NFS4ERR_STALE_CLIENTID: 1823
1739 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); 1824 nfs4_begin_drain_session(clp);
1825 cred = nfs4_get_exchange_id_cred(clp);
1826 ret = nfs4_proc_bind_conn_to_session(clp, cred);
1827 if (cred)
1828 put_rpccred(cred);
1829 clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
1830 switch (ret) {
1831 case 0:
1832 dprintk("%s: bind_conn_to_session was successful for server %s!\n",
1833 __func__, clp->cl_hostname);
1740 break; 1834 break;
1741 case -NFS4ERR_DELAY: 1835 case -NFS4ERR_DELAY:
1742 case -ETIMEDOUT:
1743 case -EAGAIN:
1744 ssleep(1); 1836 ssleep(1);
1837 set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
1745 break; 1838 break;
1746
1747 case -EKEYEXPIRED:
1748 nfs4_warn_keyexpired(clp->cl_hostname);
1749 case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
1750 * in nfs4_exchange_id */
1751 default: 1839 default:
1752 return; 1840 return nfs4_recovery_handle_error(clp, ret);
1753 } 1841 }
1754 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); 1842 return 0;
1755} 1843}
1844#else /* CONFIG_NFS_V4_1 */
1845static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
1846static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; }
1847static int nfs4_recall_slot(struct nfs_client *clp) { return 0; }
1848
1849static int nfs4_bind_conn_to_session(struct nfs_client *clp)
1850{
1851 return 0;
1852}
1853#endif /* CONFIG_NFS_V4_1 */
1756 1854
1757static void nfs4_state_manager(struct nfs_client *clp) 1855static void nfs4_state_manager(struct nfs_client *clp)
1758{ 1856{
@@ -1760,19 +1858,21 @@ static void nfs4_state_manager(struct nfs_client *clp)
1760 1858
1761 /* Ensure exclusive access to NFSv4 state */ 1859 /* Ensure exclusive access to NFSv4 state */
1762 do { 1860 do {
1861 if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) {
1862 status = nfs4_reclaim_lease(clp);
1863 if (status < 0)
1864 goto out_error;
1865 clear_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
1866 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1867 }
1868
1763 if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) { 1869 if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) {
1764 /* We're going to have to re-establish a clientid */ 1870 /* We're going to have to re-establish a clientid */
1765 status = nfs4_reclaim_lease(clp); 1871 status = nfs4_reclaim_lease(clp);
1766 if (status) { 1872 if (status < 0)
1767 nfs4_set_lease_expired(clp, status);
1768 if (test_bit(NFS4CLNT_LEASE_EXPIRED,
1769 &clp->cl_state))
1770 continue;
1771 if (clp->cl_cons_state ==
1772 NFS_CS_SESSION_INITING)
1773 nfs_mark_client_ready(clp, status);
1774 goto out_error; 1873 goto out_error;
1775 } 1874 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
1875 continue;
1776 clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); 1876 clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
1777 1877
1778 if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, 1878 if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH,
@@ -1803,6 +1903,15 @@ static void nfs4_state_manager(struct nfs_client *clp)
1803 goto out_error; 1903 goto out_error;
1804 } 1904 }
1805 1905
1906 /* Send BIND_CONN_TO_SESSION */
1907 if (test_and_clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION,
1908 &clp->cl_state) && nfs4_has_session(clp)) {
1909 status = nfs4_bind_conn_to_session(clp);
1910 if (status < 0)
1911 goto out_error;
1912 continue;
1913 }
1914
1806 /* First recover reboot state... */ 1915 /* First recover reboot state... */
1807 if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) { 1916 if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {
1808 status = nfs4_do_reclaim(clp, 1917 status = nfs4_do_reclaim(clp,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index c54aae364bee..ee4a74db95d0 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -53,9 +53,11 @@
53#include <linux/nfs4.h> 53#include <linux/nfs4.h>
54#include <linux/nfs_fs.h> 54#include <linux/nfs_fs.h>
55#include <linux/nfs_idmap.h> 55#include <linux/nfs_idmap.h>
56
56#include "nfs4_fs.h" 57#include "nfs4_fs.h"
57#include "internal.h" 58#include "internal.h"
58#include "pnfs.h" 59#include "pnfs.h"
60#include "netns.h"
59 61
60#define NFSDBG_FACILITY NFSDBG_XDR 62#define NFSDBG_FACILITY NFSDBG_XDR
61 63
@@ -99,9 +101,12 @@ static int nfs4_stat_to_errno(int);
99#define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) 101#define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2))
100#define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) 102#define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
101#define nfs4_group_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) 103#define nfs4_group_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
104/* We support only one layout type per file system */
105#define decode_mdsthreshold_maxsz (1 + 1 + nfs4_fattr_bitmap_maxsz + 1 + 8)
102/* This is based on getfattr, which uses the most attributes: */ 106/* This is based on getfattr, which uses the most attributes: */
103#define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \ 107#define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \
104 3 + 3 + 3 + nfs4_owner_maxsz + nfs4_group_maxsz)) 108 3 + 3 + 3 + nfs4_owner_maxsz + \
109 nfs4_group_maxsz + decode_mdsthreshold_maxsz))
105#define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \ 110#define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \
106 nfs4_fattr_value_maxsz) 111 nfs4_fattr_value_maxsz)
107#define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz) 112#define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz)
@@ -321,8 +326,20 @@ static int nfs4_stat_to_errno(int);
321 1 /* csr_flags */ + \ 326 1 /* csr_flags */ + \
322 decode_channel_attrs_maxsz + \ 327 decode_channel_attrs_maxsz + \
323 decode_channel_attrs_maxsz) 328 decode_channel_attrs_maxsz)
329#define encode_bind_conn_to_session_maxsz (op_encode_hdr_maxsz + \
330 /* bctsa_sessid */ \
331 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \
332 1 /* bctsa_dir */ + \
333 1 /* bctsa_use_conn_in_rdma_mode */)
334#define decode_bind_conn_to_session_maxsz (op_decode_hdr_maxsz + \
335 /* bctsr_sessid */ \
336 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \
337 1 /* bctsr_dir */ + \
338 1 /* bctsr_use_conn_in_rdma_mode */)
324#define encode_destroy_session_maxsz (op_encode_hdr_maxsz + 4) 339#define encode_destroy_session_maxsz (op_encode_hdr_maxsz + 4)
325#define decode_destroy_session_maxsz (op_decode_hdr_maxsz) 340#define decode_destroy_session_maxsz (op_decode_hdr_maxsz)
341#define encode_destroy_clientid_maxsz (op_encode_hdr_maxsz + 2)
342#define decode_destroy_clientid_maxsz (op_decode_hdr_maxsz)
326#define encode_sequence_maxsz (op_encode_hdr_maxsz + \ 343#define encode_sequence_maxsz (op_encode_hdr_maxsz + \
327 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 4) 344 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 4)
328#define decode_sequence_maxsz (op_decode_hdr_maxsz + \ 345#define decode_sequence_maxsz (op_decode_hdr_maxsz + \
@@ -421,30 +438,22 @@ static int nfs4_stat_to_errno(int);
421#define NFS4_enc_commit_sz (compound_encode_hdr_maxsz + \ 438#define NFS4_enc_commit_sz (compound_encode_hdr_maxsz + \
422 encode_sequence_maxsz + \ 439 encode_sequence_maxsz + \
423 encode_putfh_maxsz + \ 440 encode_putfh_maxsz + \
424 encode_commit_maxsz + \ 441 encode_commit_maxsz)
425 encode_getattr_maxsz)
426#define NFS4_dec_commit_sz (compound_decode_hdr_maxsz + \ 442#define NFS4_dec_commit_sz (compound_decode_hdr_maxsz + \
427 decode_sequence_maxsz + \ 443 decode_sequence_maxsz + \
428 decode_putfh_maxsz + \ 444 decode_putfh_maxsz + \
429 decode_commit_maxsz + \ 445 decode_commit_maxsz)
430 decode_getattr_maxsz)
431#define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \ 446#define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \
432 encode_sequence_maxsz + \ 447 encode_sequence_maxsz + \
433 encode_putfh_maxsz + \ 448 encode_putfh_maxsz + \
434 encode_savefh_maxsz + \
435 encode_open_maxsz + \ 449 encode_open_maxsz + \
436 encode_getfh_maxsz + \ 450 encode_getfh_maxsz + \
437 encode_getattr_maxsz + \
438 encode_restorefh_maxsz + \
439 encode_getattr_maxsz) 451 encode_getattr_maxsz)
440#define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \ 452#define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \
441 decode_sequence_maxsz + \ 453 decode_sequence_maxsz + \
442 decode_putfh_maxsz + \ 454 decode_putfh_maxsz + \
443 decode_savefh_maxsz + \
444 decode_open_maxsz + \ 455 decode_open_maxsz + \
445 decode_getfh_maxsz + \ 456 decode_getfh_maxsz + \
446 decode_getattr_maxsz + \
447 decode_restorefh_maxsz + \
448 decode_getattr_maxsz) 457 decode_getattr_maxsz)
449#define NFS4_enc_open_confirm_sz \ 458#define NFS4_enc_open_confirm_sz \
450 (compound_encode_hdr_maxsz + \ 459 (compound_encode_hdr_maxsz + \
@@ -595,47 +604,37 @@ static int nfs4_stat_to_errno(int);
595#define NFS4_enc_remove_sz (compound_encode_hdr_maxsz + \ 604#define NFS4_enc_remove_sz (compound_encode_hdr_maxsz + \
596 encode_sequence_maxsz + \ 605 encode_sequence_maxsz + \
597 encode_putfh_maxsz + \ 606 encode_putfh_maxsz + \
598 encode_remove_maxsz + \ 607 encode_remove_maxsz)
599 encode_getattr_maxsz)
600#define NFS4_dec_remove_sz (compound_decode_hdr_maxsz + \ 608#define NFS4_dec_remove_sz (compound_decode_hdr_maxsz + \
601 decode_sequence_maxsz + \ 609 decode_sequence_maxsz + \
602 decode_putfh_maxsz + \ 610 decode_putfh_maxsz + \
603 decode_remove_maxsz + \ 611 decode_remove_maxsz)
604 decode_getattr_maxsz)
605#define NFS4_enc_rename_sz (compound_encode_hdr_maxsz + \ 612#define NFS4_enc_rename_sz (compound_encode_hdr_maxsz + \
606 encode_sequence_maxsz + \ 613 encode_sequence_maxsz + \
607 encode_putfh_maxsz + \ 614 encode_putfh_maxsz + \
608 encode_savefh_maxsz + \ 615 encode_savefh_maxsz + \
609 encode_putfh_maxsz + \ 616 encode_putfh_maxsz + \
610 encode_rename_maxsz + \ 617 encode_rename_maxsz)
611 encode_getattr_maxsz + \
612 encode_restorefh_maxsz + \
613 encode_getattr_maxsz)
614#define NFS4_dec_rename_sz (compound_decode_hdr_maxsz + \ 618#define NFS4_dec_rename_sz (compound_decode_hdr_maxsz + \
615 decode_sequence_maxsz + \ 619 decode_sequence_maxsz + \
616 decode_putfh_maxsz + \ 620 decode_putfh_maxsz + \
617 decode_savefh_maxsz + \ 621 decode_savefh_maxsz + \
618 decode_putfh_maxsz + \ 622 decode_putfh_maxsz + \
619 decode_rename_maxsz + \ 623 decode_rename_maxsz)
620 decode_getattr_maxsz + \
621 decode_restorefh_maxsz + \
622 decode_getattr_maxsz)
623#define NFS4_enc_link_sz (compound_encode_hdr_maxsz + \ 624#define NFS4_enc_link_sz (compound_encode_hdr_maxsz + \
624 encode_sequence_maxsz + \ 625 encode_sequence_maxsz + \
625 encode_putfh_maxsz + \ 626 encode_putfh_maxsz + \
626 encode_savefh_maxsz + \ 627 encode_savefh_maxsz + \
627 encode_putfh_maxsz + \ 628 encode_putfh_maxsz + \
628 encode_link_maxsz + \ 629 encode_link_maxsz + \
629 decode_getattr_maxsz + \
630 encode_restorefh_maxsz + \ 630 encode_restorefh_maxsz + \
631 decode_getattr_maxsz) 631 encode_getattr_maxsz)
632#define NFS4_dec_link_sz (compound_decode_hdr_maxsz + \ 632#define NFS4_dec_link_sz (compound_decode_hdr_maxsz + \
633 decode_sequence_maxsz + \ 633 decode_sequence_maxsz + \
634 decode_putfh_maxsz + \ 634 decode_putfh_maxsz + \
635 decode_savefh_maxsz + \ 635 decode_savefh_maxsz + \
636 decode_putfh_maxsz + \ 636 decode_putfh_maxsz + \
637 decode_link_maxsz + \ 637 decode_link_maxsz + \
638 decode_getattr_maxsz + \
639 decode_restorefh_maxsz + \ 638 decode_restorefh_maxsz + \
640 decode_getattr_maxsz) 639 decode_getattr_maxsz)
641#define NFS4_enc_symlink_sz (compound_encode_hdr_maxsz + \ 640#define NFS4_enc_symlink_sz (compound_encode_hdr_maxsz + \
@@ -653,20 +652,14 @@ static int nfs4_stat_to_errno(int);
653#define NFS4_enc_create_sz (compound_encode_hdr_maxsz + \ 652#define NFS4_enc_create_sz (compound_encode_hdr_maxsz + \
654 encode_sequence_maxsz + \ 653 encode_sequence_maxsz + \
655 encode_putfh_maxsz + \ 654 encode_putfh_maxsz + \
656 encode_savefh_maxsz + \
657 encode_create_maxsz + \ 655 encode_create_maxsz + \
658 encode_getfh_maxsz + \ 656 encode_getfh_maxsz + \
659 encode_getattr_maxsz + \
660 encode_restorefh_maxsz + \
661 encode_getattr_maxsz) 657 encode_getattr_maxsz)
662#define NFS4_dec_create_sz (compound_decode_hdr_maxsz + \ 658#define NFS4_dec_create_sz (compound_decode_hdr_maxsz + \
663 decode_sequence_maxsz + \ 659 decode_sequence_maxsz + \
664 decode_putfh_maxsz + \ 660 decode_putfh_maxsz + \
665 decode_savefh_maxsz + \
666 decode_create_maxsz + \ 661 decode_create_maxsz + \
667 decode_getfh_maxsz + \ 662 decode_getfh_maxsz + \
668 decode_getattr_maxsz + \
669 decode_restorefh_maxsz + \
670 decode_getattr_maxsz) 663 decode_getattr_maxsz)
671#define NFS4_enc_pathconf_sz (compound_encode_hdr_maxsz + \ 664#define NFS4_enc_pathconf_sz (compound_encode_hdr_maxsz + \
672 encode_sequence_maxsz + \ 665 encode_sequence_maxsz + \
@@ -738,6 +731,12 @@ static int nfs4_stat_to_errno(int);
738 decode_putfh_maxsz + \ 731 decode_putfh_maxsz + \
739 decode_secinfo_maxsz) 732 decode_secinfo_maxsz)
740#if defined(CONFIG_NFS_V4_1) 733#if defined(CONFIG_NFS_V4_1)
734#define NFS4_enc_bind_conn_to_session_sz \
735 (compound_encode_hdr_maxsz + \
736 encode_bind_conn_to_session_maxsz)
737#define NFS4_dec_bind_conn_to_session_sz \
738 (compound_decode_hdr_maxsz + \
739 decode_bind_conn_to_session_maxsz)
741#define NFS4_enc_exchange_id_sz \ 740#define NFS4_enc_exchange_id_sz \
742 (compound_encode_hdr_maxsz + \ 741 (compound_encode_hdr_maxsz + \
743 encode_exchange_id_maxsz) 742 encode_exchange_id_maxsz)
@@ -754,6 +753,10 @@ static int nfs4_stat_to_errno(int);
754 encode_destroy_session_maxsz) 753 encode_destroy_session_maxsz)
755#define NFS4_dec_destroy_session_sz (compound_decode_hdr_maxsz + \ 754#define NFS4_dec_destroy_session_sz (compound_decode_hdr_maxsz + \
756 decode_destroy_session_maxsz) 755 decode_destroy_session_maxsz)
756#define NFS4_enc_destroy_clientid_sz (compound_encode_hdr_maxsz + \
757 encode_destroy_clientid_maxsz)
758#define NFS4_dec_destroy_clientid_sz (compound_decode_hdr_maxsz + \
759 decode_destroy_clientid_maxsz)
757#define NFS4_enc_sequence_sz \ 760#define NFS4_enc_sequence_sz \
758 (compound_decode_hdr_maxsz + \ 761 (compound_decode_hdr_maxsz + \
759 encode_sequence_maxsz) 762 encode_sequence_maxsz)
@@ -1103,7 +1106,7 @@ static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg
1103 encode_nfs4_stateid(xdr, arg->stateid); 1106 encode_nfs4_stateid(xdr, arg->stateid);
1104} 1107}
1105 1108
1106static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) 1109static void encode_commit(struct xdr_stream *xdr, const struct nfs_commitargs *args, struct compound_hdr *hdr)
1107{ 1110{
1108 __be32 *p; 1111 __be32 *p;
1109 1112
@@ -1194,6 +1197,16 @@ static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct c
1194 bitmask[1] & nfs4_fattr_bitmap[1], hdr); 1197 bitmask[1] & nfs4_fattr_bitmap[1], hdr);
1195} 1198}
1196 1199
1200static void encode_getfattr_open(struct xdr_stream *xdr, const u32 *bitmask,
1201 struct compound_hdr *hdr)
1202{
1203 encode_getattr_three(xdr,
1204 bitmask[0] & nfs4_fattr_bitmap[0],
1205 bitmask[1] & nfs4_fattr_bitmap[1],
1206 bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD,
1207 hdr);
1208}
1209
1197static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) 1210static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
1198{ 1211{
1199 encode_getattr_three(xdr, 1212 encode_getattr_three(xdr,
@@ -1678,6 +1691,20 @@ static void encode_secinfo(struct xdr_stream *xdr, const struct qstr *name, stru
1678 1691
1679#if defined(CONFIG_NFS_V4_1) 1692#if defined(CONFIG_NFS_V4_1)
1680/* NFSv4.1 operations */ 1693/* NFSv4.1 operations */
1694static void encode_bind_conn_to_session(struct xdr_stream *xdr,
1695 struct nfs4_session *session,
1696 struct compound_hdr *hdr)
1697{
1698 __be32 *p;
1699
1700 encode_op_hdr(xdr, OP_BIND_CONN_TO_SESSION,
1701 decode_bind_conn_to_session_maxsz, hdr);
1702 encode_opaque_fixed(xdr, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
1703 p = xdr_reserve_space(xdr, 8);
1704 *p++ = cpu_to_be32(NFS4_CDFC4_BACK_OR_BOTH);
1705 *p = 0; /* use_conn_in_rdma_mode = False */
1706}
1707
1681static void encode_exchange_id(struct xdr_stream *xdr, 1708static void encode_exchange_id(struct xdr_stream *xdr,
1682 struct nfs41_exchange_id_args *args, 1709 struct nfs41_exchange_id_args *args,
1683 struct compound_hdr *hdr) 1710 struct compound_hdr *hdr)
@@ -1726,6 +1753,7 @@ static void encode_create_session(struct xdr_stream *xdr,
1726 char machine_name[NFS4_MAX_MACHINE_NAME_LEN]; 1753 char machine_name[NFS4_MAX_MACHINE_NAME_LEN];
1727 uint32_t len; 1754 uint32_t len;
1728 struct nfs_client *clp = args->client; 1755 struct nfs_client *clp = args->client;
1756 struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
1729 u32 max_resp_sz_cached; 1757 u32 max_resp_sz_cached;
1730 1758
1731 /* 1759 /*
@@ -1767,7 +1795,7 @@ static void encode_create_session(struct xdr_stream *xdr,
1767 *p++ = cpu_to_be32(RPC_AUTH_UNIX); /* auth_sys */ 1795 *p++ = cpu_to_be32(RPC_AUTH_UNIX); /* auth_sys */
1768 1796
1769 /* authsys_parms rfc1831 */ 1797 /* authsys_parms rfc1831 */
1770 *p++ = cpu_to_be32((u32)clp->cl_boot_time.tv_nsec); /* stamp */ 1798 *p++ = (__be32)nn->boot_time.tv_nsec; /* stamp */
1771 p = xdr_encode_opaque(p, machine_name, len); 1799 p = xdr_encode_opaque(p, machine_name, len);
1772 *p++ = cpu_to_be32(0); /* UID */ 1800 *p++ = cpu_to_be32(0); /* UID */
1773 *p++ = cpu_to_be32(0); /* GID */ 1801 *p++ = cpu_to_be32(0); /* GID */
@@ -1782,6 +1810,14 @@ static void encode_destroy_session(struct xdr_stream *xdr,
1782 encode_opaque_fixed(xdr, session->sess_id.data, NFS4_MAX_SESSIONID_LEN); 1810 encode_opaque_fixed(xdr, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
1783} 1811}
1784 1812
1813static void encode_destroy_clientid(struct xdr_stream *xdr,
1814 uint64_t clientid,
1815 struct compound_hdr *hdr)
1816{
1817 encode_op_hdr(xdr, OP_DESTROY_CLIENTID, decode_destroy_clientid_maxsz, hdr);
1818 encode_uint64(xdr, clientid);
1819}
1820
1785static void encode_reclaim_complete(struct xdr_stream *xdr, 1821static void encode_reclaim_complete(struct xdr_stream *xdr,
1786 struct nfs41_reclaim_complete_args *args, 1822 struct nfs41_reclaim_complete_args *args,
1787 struct compound_hdr *hdr) 1823 struct compound_hdr *hdr)
@@ -2064,7 +2100,6 @@ static void nfs4_xdr_enc_remove(struct rpc_rqst *req, struct xdr_stream *xdr,
2064 encode_sequence(xdr, &args->seq_args, &hdr); 2100 encode_sequence(xdr, &args->seq_args, &hdr);
2065 encode_putfh(xdr, args->fh, &hdr); 2101 encode_putfh(xdr, args->fh, &hdr);
2066 encode_remove(xdr, &args->name, &hdr); 2102 encode_remove(xdr, &args->name, &hdr);
2067 encode_getfattr(xdr, args->bitmask, &hdr);
2068 encode_nops(&hdr); 2103 encode_nops(&hdr);
2069} 2104}
2070 2105
@@ -2084,9 +2119,6 @@ static void nfs4_xdr_enc_rename(struct rpc_rqst *req, struct xdr_stream *xdr,
2084 encode_savefh(xdr, &hdr); 2119 encode_savefh(xdr, &hdr);
2085 encode_putfh(xdr, args->new_dir, &hdr); 2120 encode_putfh(xdr, args->new_dir, &hdr);
2086 encode_rename(xdr, args->old_name, args->new_name, &hdr); 2121 encode_rename(xdr, args->old_name, args->new_name, &hdr);
2087 encode_getfattr(xdr, args->bitmask, &hdr);
2088 encode_restorefh(xdr, &hdr);
2089 encode_getfattr(xdr, args->bitmask, &hdr);
2090 encode_nops(&hdr); 2122 encode_nops(&hdr);
2091} 2123}
2092 2124
@@ -2106,7 +2138,6 @@ static void nfs4_xdr_enc_link(struct rpc_rqst *req, struct xdr_stream *xdr,
2106 encode_savefh(xdr, &hdr); 2138 encode_savefh(xdr, &hdr);
2107 encode_putfh(xdr, args->dir_fh, &hdr); 2139 encode_putfh(xdr, args->dir_fh, &hdr);
2108 encode_link(xdr, args->name, &hdr); 2140 encode_link(xdr, args->name, &hdr);
2109 encode_getfattr(xdr, args->bitmask, &hdr);
2110 encode_restorefh(xdr, &hdr); 2141 encode_restorefh(xdr, &hdr);
2111 encode_getfattr(xdr, args->bitmask, &hdr); 2142 encode_getfattr(xdr, args->bitmask, &hdr);
2112 encode_nops(&hdr); 2143 encode_nops(&hdr);
@@ -2125,12 +2156,9 @@ static void nfs4_xdr_enc_create(struct rpc_rqst *req, struct xdr_stream *xdr,
2125 encode_compound_hdr(xdr, req, &hdr); 2156 encode_compound_hdr(xdr, req, &hdr);
2126 encode_sequence(xdr, &args->seq_args, &hdr); 2157 encode_sequence(xdr, &args->seq_args, &hdr);
2127 encode_putfh(xdr, args->dir_fh, &hdr); 2158 encode_putfh(xdr, args->dir_fh, &hdr);
2128 encode_savefh(xdr, &hdr);
2129 encode_create(xdr, args, &hdr); 2159 encode_create(xdr, args, &hdr);
2130 encode_getfh(xdr, &hdr); 2160 encode_getfh(xdr, &hdr);
2131 encode_getfattr(xdr, args->bitmask, &hdr); 2161 encode_getfattr(xdr, args->bitmask, &hdr);
2132 encode_restorefh(xdr, &hdr);
2133 encode_getfattr(xdr, args->bitmask, &hdr);
2134 encode_nops(&hdr); 2162 encode_nops(&hdr);
2135} 2163}
2136 2164
@@ -2191,12 +2219,9 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
2191 encode_compound_hdr(xdr, req, &hdr); 2219 encode_compound_hdr(xdr, req, &hdr);
2192 encode_sequence(xdr, &args->seq_args, &hdr); 2220 encode_sequence(xdr, &args->seq_args, &hdr);
2193 encode_putfh(xdr, args->fh, &hdr); 2221 encode_putfh(xdr, args->fh, &hdr);
2194 encode_savefh(xdr, &hdr);
2195 encode_open(xdr, args, &hdr); 2222 encode_open(xdr, args, &hdr);
2196 encode_getfh(xdr, &hdr); 2223 encode_getfh(xdr, &hdr);
2197 encode_getfattr(xdr, args->bitmask, &hdr); 2224 encode_getfattr_open(xdr, args->bitmask, &hdr);
2198 encode_restorefh(xdr, &hdr);
2199 encode_getfattr(xdr, args->dir_bitmask, &hdr);
2200 encode_nops(&hdr); 2225 encode_nops(&hdr);
2201} 2226}
2202 2227
@@ -2448,7 +2473,7 @@ static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
2448 * a COMMIT request 2473 * a COMMIT request
2449 */ 2474 */
2450static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr, 2475static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
2451 struct nfs_writeargs *args) 2476 struct nfs_commitargs *args)
2452{ 2477{
2453 struct compound_hdr hdr = { 2478 struct compound_hdr hdr = {
2454 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2479 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
@@ -2458,8 +2483,6 @@ static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
2458 encode_sequence(xdr, &args->seq_args, &hdr); 2483 encode_sequence(xdr, &args->seq_args, &hdr);
2459 encode_putfh(xdr, args->fh, &hdr); 2484 encode_putfh(xdr, args->fh, &hdr);
2460 encode_commit(xdr, args, &hdr); 2485 encode_commit(xdr, args, &hdr);
2461 if (args->bitmask)
2462 encode_getfattr(xdr, args->bitmask, &hdr);
2463 encode_nops(&hdr); 2486 encode_nops(&hdr);
2464} 2487}
2465 2488
@@ -2602,8 +2625,8 @@ static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req,
2602 encode_compound_hdr(xdr, req, &hdr); 2625 encode_compound_hdr(xdr, req, &hdr);
2603 encode_sequence(xdr, &args->seq_args, &hdr); 2626 encode_sequence(xdr, &args->seq_args, &hdr);
2604 encode_putfh(xdr, args->fhandle, &hdr); 2627 encode_putfh(xdr, args->fhandle, &hdr);
2605 encode_delegreturn(xdr, args->stateid, &hdr);
2606 encode_getfattr(xdr, args->bitmask, &hdr); 2628 encode_getfattr(xdr, args->bitmask, &hdr);
2629 encode_delegreturn(xdr, args->stateid, &hdr);
2607 encode_nops(&hdr); 2630 encode_nops(&hdr);
2608} 2631}
2609 2632
@@ -2651,6 +2674,22 @@ static void nfs4_xdr_enc_secinfo(struct rpc_rqst *req,
2651 2674
2652#if defined(CONFIG_NFS_V4_1) 2675#if defined(CONFIG_NFS_V4_1)
2653/* 2676/*
2677 * BIND_CONN_TO_SESSION request
2678 */
2679static void nfs4_xdr_enc_bind_conn_to_session(struct rpc_rqst *req,
2680 struct xdr_stream *xdr,
2681 struct nfs_client *clp)
2682{
2683 struct compound_hdr hdr = {
2684 .minorversion = clp->cl_mvops->minor_version,
2685 };
2686
2687 encode_compound_hdr(xdr, req, &hdr);
2688 encode_bind_conn_to_session(xdr, clp->cl_session, &hdr);
2689 encode_nops(&hdr);
2690}
2691
2692/*
2654 * EXCHANGE_ID request 2693 * EXCHANGE_ID request
2655 */ 2694 */
2656static void nfs4_xdr_enc_exchange_id(struct rpc_rqst *req, 2695static void nfs4_xdr_enc_exchange_id(struct rpc_rqst *req,
@@ -2699,6 +2738,22 @@ static void nfs4_xdr_enc_destroy_session(struct rpc_rqst *req,
2699} 2738}
2700 2739
2701/* 2740/*
2741 * a DESTROY_CLIENTID request
2742 */
2743static void nfs4_xdr_enc_destroy_clientid(struct rpc_rqst *req,
2744 struct xdr_stream *xdr,
2745 struct nfs_client *clp)
2746{
2747 struct compound_hdr hdr = {
2748 .minorversion = clp->cl_mvops->minor_version,
2749 };
2750
2751 encode_compound_hdr(xdr, req, &hdr);
2752 encode_destroy_clientid(xdr, clp->cl_clientid, &hdr);
2753 encode_nops(&hdr);
2754}
2755
2756/*
2702 * a SEQUENCE request 2757 * a SEQUENCE request
2703 */ 2758 */
2704static void nfs4_xdr_enc_sequence(struct rpc_rqst *req, struct xdr_stream *xdr, 2759static void nfs4_xdr_enc_sequence(struct rpc_rqst *req, struct xdr_stream *xdr,
@@ -4102,7 +4157,7 @@ static int decode_verifier(struct xdr_stream *xdr, void *verifier)
4102 return decode_opaque_fixed(xdr, verifier, NFS4_VERIFIER_SIZE); 4157 return decode_opaque_fixed(xdr, verifier, NFS4_VERIFIER_SIZE);
4103} 4158}
4104 4159
4105static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res) 4160static int decode_commit(struct xdr_stream *xdr, struct nfs_commitres *res)
4106{ 4161{
4107 int status; 4162 int status;
4108 4163
@@ -4220,6 +4275,110 @@ xdr_error:
4220 return status; 4275 return status;
4221} 4276}
4222 4277
4278static int decode_threshold_hint(struct xdr_stream *xdr,
4279 uint32_t *bitmap,
4280 uint64_t *res,
4281 uint32_t hint_bit)
4282{
4283 __be32 *p;
4284
4285 *res = 0;
4286 if (likely(bitmap[0] & hint_bit)) {
4287 p = xdr_inline_decode(xdr, 8);
4288 if (unlikely(!p))
4289 goto out_overflow;
4290 xdr_decode_hyper(p, res);
4291 }
4292 return 0;
4293out_overflow:
4294 print_overflow_msg(__func__, xdr);
4295 return -EIO;
4296}
4297
4298static int decode_first_threshold_item4(struct xdr_stream *xdr,
4299 struct nfs4_threshold *res)
4300{
4301 __be32 *p, *savep;
4302 uint32_t bitmap[3] = {0,}, attrlen;
4303 int status;
4304
4305 /* layout type */
4306 p = xdr_inline_decode(xdr, 4);
4307 if (unlikely(!p)) {
4308 print_overflow_msg(__func__, xdr);
4309 return -EIO;
4310 }
4311 res->l_type = be32_to_cpup(p);
4312
4313 /* thi_hintset bitmap */
4314 status = decode_attr_bitmap(xdr, bitmap);
4315 if (status < 0)
4316 goto xdr_error;
4317
4318 /* thi_hintlist length */
4319 status = decode_attr_length(xdr, &attrlen, &savep);
4320 if (status < 0)
4321 goto xdr_error;
4322 /* thi_hintlist */
4323 status = decode_threshold_hint(xdr, bitmap, &res->rd_sz, THRESHOLD_RD);
4324 if (status < 0)
4325 goto xdr_error;
4326 status = decode_threshold_hint(xdr, bitmap, &res->wr_sz, THRESHOLD_WR);
4327 if (status < 0)
4328 goto xdr_error;
4329 status = decode_threshold_hint(xdr, bitmap, &res->rd_io_sz,
4330 THRESHOLD_RD_IO);
4331 if (status < 0)
4332 goto xdr_error;
4333 status = decode_threshold_hint(xdr, bitmap, &res->wr_io_sz,
4334 THRESHOLD_WR_IO);
4335 if (status < 0)
4336 goto xdr_error;
4337
4338 status = verify_attr_len(xdr, savep, attrlen);
4339 res->bm = bitmap[0];
4340
4341 dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n",
4342 __func__, res->bm, res->rd_sz, res->wr_sz, res->rd_io_sz,
4343 res->wr_io_sz);
4344xdr_error:
4345 dprintk("%s ret=%d!\n", __func__, status);
4346 return status;
4347}
4348
4349/*
4350 * Thresholds on pNFS direct I/O vrs MDS I/O
4351 */
4352static int decode_attr_mdsthreshold(struct xdr_stream *xdr,
4353 uint32_t *bitmap,
4354 struct nfs4_threshold *res)
4355{
4356 __be32 *p;
4357 int status = 0;
4358 uint32_t num;
4359
4360 if (unlikely(bitmap[2] & (FATTR4_WORD2_MDSTHRESHOLD - 1U)))
4361 return -EIO;
4362 if (likely(bitmap[2] & FATTR4_WORD2_MDSTHRESHOLD)) {
4363 p = xdr_inline_decode(xdr, 4);
4364 if (unlikely(!p))
4365 goto out_overflow;
4366 num = be32_to_cpup(p);
4367 if (num == 0)
4368 return 0;
4369 if (num > 1)
4370 printk(KERN_INFO "%s: Warning: Multiple pNFS layout "
4371 "drivers per filesystem not supported\n",
4372 __func__);
4373
4374 status = decode_first_threshold_item4(xdr, res);
4375 }
4376 return status;
4377out_overflow:
4378 print_overflow_msg(__func__, xdr);
4379 return -EIO;
4380}
4381
4223static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, 4382static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
4224 struct nfs_fattr *fattr, struct nfs_fh *fh, 4383 struct nfs_fattr *fattr, struct nfs_fh *fh,
4225 struct nfs4_fs_locations *fs_loc, 4384 struct nfs4_fs_locations *fs_loc,
@@ -4326,6 +4485,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
4326 goto xdr_error; 4485 goto xdr_error;
4327 fattr->valid |= status; 4486 fattr->valid |= status;
4328 4487
4488 status = decode_attr_mdsthreshold(xdr, bitmap, fattr->mdsthreshold);
4489 if (status < 0)
4490 goto xdr_error;
4491
4329xdr_error: 4492xdr_error:
4330 dprintk("%s: xdr returned %d\n", __func__, -status); 4493 dprintk("%s: xdr returned %d\n", __func__, -status);
4331 return status; 4494 return status;
@@ -5156,7 +5319,6 @@ static int decode_exchange_id(struct xdr_stream *xdr,
5156 uint32_t dummy; 5319 uint32_t dummy;
5157 char *dummy_str; 5320 char *dummy_str;
5158 int status; 5321 int status;
5159 struct nfs_client *clp = res->client;
5160 uint32_t impl_id_count; 5322 uint32_t impl_id_count;
5161 5323
5162 status = decode_op_hdr(xdr, OP_EXCHANGE_ID); 5324 status = decode_op_hdr(xdr, OP_EXCHANGE_ID);
@@ -5166,36 +5328,39 @@ static int decode_exchange_id(struct xdr_stream *xdr,
5166 p = xdr_inline_decode(xdr, 8); 5328 p = xdr_inline_decode(xdr, 8);
5167 if (unlikely(!p)) 5329 if (unlikely(!p))
5168 goto out_overflow; 5330 goto out_overflow;
5169 xdr_decode_hyper(p, &clp->cl_clientid); 5331 xdr_decode_hyper(p, &res->clientid);
5170 p = xdr_inline_decode(xdr, 12); 5332 p = xdr_inline_decode(xdr, 12);
5171 if (unlikely(!p)) 5333 if (unlikely(!p))
5172 goto out_overflow; 5334 goto out_overflow;
5173 clp->cl_seqid = be32_to_cpup(p++); 5335 res->seqid = be32_to_cpup(p++);
5174 clp->cl_exchange_flags = be32_to_cpup(p++); 5336 res->flags = be32_to_cpup(p++);
5175 5337
5176 /* We ask for SP4_NONE */ 5338 /* We ask for SP4_NONE */
5177 dummy = be32_to_cpup(p); 5339 dummy = be32_to_cpup(p);
5178 if (dummy != SP4_NONE) 5340 if (dummy != SP4_NONE)
5179 return -EIO; 5341 return -EIO;
5180 5342
5181 /* Throw away minor_id */ 5343 /* server_owner4.so_minor_id */
5182 p = xdr_inline_decode(xdr, 8); 5344 p = xdr_inline_decode(xdr, 8);
5183 if (unlikely(!p)) 5345 if (unlikely(!p))
5184 goto out_overflow; 5346 goto out_overflow;
5347 p = xdr_decode_hyper(p, &res->server_owner->minor_id);
5185 5348
5186 /* Throw away Major id */ 5349 /* server_owner4.so_major_id */
5187 status = decode_opaque_inline(xdr, &dummy, &dummy_str); 5350 status = decode_opaque_inline(xdr, &dummy, &dummy_str);
5188 if (unlikely(status)) 5351 if (unlikely(status))
5189 return status; 5352 return status;
5353 if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
5354 return -EIO;
5355 memcpy(res->server_owner->major_id, dummy_str, dummy);
5356 res->server_owner->major_id_sz = dummy;
5190 5357
5191 /* Save server_scope */ 5358 /* server_scope4 */
5192 status = decode_opaque_inline(xdr, &dummy, &dummy_str); 5359 status = decode_opaque_inline(xdr, &dummy, &dummy_str);
5193 if (unlikely(status)) 5360 if (unlikely(status))
5194 return status; 5361 return status;
5195
5196 if (unlikely(dummy > NFS4_OPAQUE_LIMIT)) 5362 if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
5197 return -EIO; 5363 return -EIO;
5198
5199 memcpy(res->server_scope->server_scope, dummy_str, dummy); 5364 memcpy(res->server_scope->server_scope, dummy_str, dummy);
5200 res->server_scope->server_scope_sz = dummy; 5365 res->server_scope->server_scope_sz = dummy;
5201 5366
@@ -5276,6 +5441,37 @@ static int decode_sessionid(struct xdr_stream *xdr, struct nfs4_sessionid *sid)
5276 return decode_opaque_fixed(xdr, sid->data, NFS4_MAX_SESSIONID_LEN); 5441 return decode_opaque_fixed(xdr, sid->data, NFS4_MAX_SESSIONID_LEN);
5277} 5442}
5278 5443
5444static int decode_bind_conn_to_session(struct xdr_stream *xdr,
5445 struct nfs41_bind_conn_to_session_res *res)
5446{
5447 __be32 *p;
5448 int status;
5449
5450 status = decode_op_hdr(xdr, OP_BIND_CONN_TO_SESSION);
5451 if (!status)
5452 status = decode_sessionid(xdr, &res->session->sess_id);
5453 if (unlikely(status))
5454 return status;
5455
5456 /* dir flags, rdma mode bool */
5457 p = xdr_inline_decode(xdr, 8);
5458 if (unlikely(!p))
5459 goto out_overflow;
5460
5461 res->dir = be32_to_cpup(p++);
5462 if (res->dir == 0 || res->dir > NFS4_CDFS4_BOTH)
5463 return -EIO;
5464 if (be32_to_cpup(p) == 0)
5465 res->use_conn_in_rdma_mode = false;
5466 else
5467 res->use_conn_in_rdma_mode = true;
5468
5469 return 0;
5470out_overflow:
5471 print_overflow_msg(__func__, xdr);
5472 return -EIO;
5473}
5474
5279static int decode_create_session(struct xdr_stream *xdr, 5475static int decode_create_session(struct xdr_stream *xdr,
5280 struct nfs41_create_session_res *res) 5476 struct nfs41_create_session_res *res)
5281{ 5477{
@@ -5312,6 +5508,11 @@ static int decode_destroy_session(struct xdr_stream *xdr, void *dummy)
5312 return decode_op_hdr(xdr, OP_DESTROY_SESSION); 5508 return decode_op_hdr(xdr, OP_DESTROY_SESSION);
5313} 5509}
5314 5510
5511static int decode_destroy_clientid(struct xdr_stream *xdr, void *dummy)
5512{
5513 return decode_op_hdr(xdr, OP_DESTROY_CLIENTID);
5514}
5515
5315static int decode_reclaim_complete(struct xdr_stream *xdr, void *dummy) 5516static int decode_reclaim_complete(struct xdr_stream *xdr, void *dummy)
5316{ 5517{
5317 return decode_op_hdr(xdr, OP_RECLAIM_COMPLETE); 5518 return decode_op_hdr(xdr, OP_RECLAIM_COMPLETE);
@@ -5800,9 +6001,6 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5800 if (status) 6001 if (status)
5801 goto out; 6002 goto out;
5802 status = decode_remove(xdr, &res->cinfo); 6003 status = decode_remove(xdr, &res->cinfo);
5803 if (status)
5804 goto out;
5805 decode_getfattr(xdr, res->dir_attr, res->server);
5806out: 6004out:
5807 return status; 6005 return status;
5808} 6006}
@@ -5832,15 +6030,6 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5832 if (status) 6030 if (status)
5833 goto out; 6031 goto out;
5834 status = decode_rename(xdr, &res->old_cinfo, &res->new_cinfo); 6032 status = decode_rename(xdr, &res->old_cinfo, &res->new_cinfo);
5835 if (status)
5836 goto out;
5837 /* Current FH is target directory */
5838 if (decode_getfattr(xdr, res->new_fattr, res->server))
5839 goto out;
5840 status = decode_restorefh(xdr);
5841 if (status)
5842 goto out;
5843 decode_getfattr(xdr, res->old_fattr, res->server);
5844out: 6033out:
5845 return status; 6034 return status;
5846} 6035}
@@ -5876,8 +6065,6 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5876 * Note order: OP_LINK leaves the directory as the current 6065 * Note order: OP_LINK leaves the directory as the current
5877 * filehandle. 6066 * filehandle.
5878 */ 6067 */
5879 if (decode_getfattr(xdr, res->dir_attr, res->server))
5880 goto out;
5881 status = decode_restorefh(xdr); 6068 status = decode_restorefh(xdr);
5882 if (status) 6069 if (status)
5883 goto out; 6070 goto out;
@@ -5904,21 +6091,13 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5904 status = decode_putfh(xdr); 6091 status = decode_putfh(xdr);
5905 if (status) 6092 if (status)
5906 goto out; 6093 goto out;
5907 status = decode_savefh(xdr);
5908 if (status)
5909 goto out;
5910 status = decode_create(xdr, &res->dir_cinfo); 6094 status = decode_create(xdr, &res->dir_cinfo);
5911 if (status) 6095 if (status)
5912 goto out; 6096 goto out;
5913 status = decode_getfh(xdr, res->fh); 6097 status = decode_getfh(xdr, res->fh);
5914 if (status) 6098 if (status)
5915 goto out; 6099 goto out;
5916 if (decode_getfattr(xdr, res->fattr, res->server)) 6100 decode_getfattr(xdr, res->fattr, res->server);
5917 goto out;
5918 status = decode_restorefh(xdr);
5919 if (status)
5920 goto out;
5921 decode_getfattr(xdr, res->dir_fattr, res->server);
5922out: 6101out:
5923 return status; 6102 return status;
5924} 6103}
@@ -6075,19 +6254,12 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
6075 status = decode_putfh(xdr); 6254 status = decode_putfh(xdr);
6076 if (status) 6255 if (status)
6077 goto out; 6256 goto out;
6078 status = decode_savefh(xdr);
6079 if (status)
6080 goto out;
6081 status = decode_open(xdr, res); 6257 status = decode_open(xdr, res);
6082 if (status) 6258 if (status)
6083 goto out; 6259 goto out;
6084 if (decode_getfh(xdr, &res->fh) != 0) 6260 if (decode_getfh(xdr, &res->fh) != 0)
6085 goto out; 6261 goto out;
6086 if (decode_getfattr(xdr, res->f_attr, res->server) != 0) 6262 decode_getfattr(xdr, res->f_attr, res->server);
6087 goto out;
6088 if (decode_restorefh(xdr) != 0)
6089 goto out;
6090 decode_getfattr(xdr, res->dir_attr, res->server);
6091out: 6263out:
6092 return status; 6264 return status;
6093} 6265}
@@ -6353,7 +6525,7 @@ out:
6353 * Decode COMMIT response 6525 * Decode COMMIT response
6354 */ 6526 */
6355static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr, 6527static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
6356 struct nfs_writeres *res) 6528 struct nfs_commitres *res)
6357{ 6529{
6358 struct compound_hdr hdr; 6530 struct compound_hdr hdr;
6359 int status; 6531 int status;
@@ -6368,10 +6540,6 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
6368 if (status) 6540 if (status)
6369 goto out; 6541 goto out;
6370 status = decode_commit(xdr, res); 6542 status = decode_commit(xdr, res);
6371 if (status)
6372 goto out;
6373 if (res->fattr)
6374 decode_getfattr(xdr, res->fattr, res->server);
6375out: 6543out:
6376 return status; 6544 return status;
6377} 6545}
@@ -6527,10 +6695,10 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp,
6527 status = decode_putfh(xdr); 6695 status = decode_putfh(xdr);
6528 if (status != 0) 6696 if (status != 0)
6529 goto out; 6697 goto out;
6530 status = decode_delegreturn(xdr); 6698 status = decode_getfattr(xdr, res->fattr, res->server);
6531 if (status != 0) 6699 if (status != 0)
6532 goto out; 6700 goto out;
6533 decode_getfattr(xdr, res->fattr, res->server); 6701 status = decode_delegreturn(xdr);
6534out: 6702out:
6535 return status; 6703 return status;
6536} 6704}
@@ -6591,6 +6759,22 @@ out:
6591 6759
6592#if defined(CONFIG_NFS_V4_1) 6760#if defined(CONFIG_NFS_V4_1)
6593/* 6761/*
6762 * Decode BIND_CONN_TO_SESSION response
6763 */
6764static int nfs4_xdr_dec_bind_conn_to_session(struct rpc_rqst *rqstp,
6765 struct xdr_stream *xdr,
6766 void *res)
6767{
6768 struct compound_hdr hdr;
6769 int status;
6770
6771 status = decode_compound_hdr(xdr, &hdr);
6772 if (!status)
6773 status = decode_bind_conn_to_session(xdr, res);
6774 return status;
6775}
6776
6777/*
6594 * Decode EXCHANGE_ID response 6778 * Decode EXCHANGE_ID response
6595 */ 6779 */
6596static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp, 6780static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp,
@@ -6639,6 +6823,22 @@ static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp,
6639} 6823}
6640 6824
6641/* 6825/*
6826 * Decode DESTROY_CLIENTID response
6827 */
6828static int nfs4_xdr_dec_destroy_clientid(struct rpc_rqst *rqstp,
6829 struct xdr_stream *xdr,
6830 void *res)
6831{
6832 struct compound_hdr hdr;
6833 int status;
6834
6835 status = decode_compound_hdr(xdr, &hdr);
6836 if (!status)
6837 status = decode_destroy_clientid(xdr, res);
6838 return status;
6839}
6840
6841/*
6642 * Decode SEQUENCE response 6842 * Decode SEQUENCE response
6643 */ 6843 */
6644static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp, 6844static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp,
@@ -7085,6 +7285,9 @@ struct rpc_procinfo nfs4_procedures[] = {
7085 PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid), 7285 PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid),
7086 PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid), 7286 PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid),
7087 PROC(GETDEVICELIST, enc_getdevicelist, dec_getdevicelist), 7287 PROC(GETDEVICELIST, enc_getdevicelist, dec_getdevicelist),
7288 PROC(BIND_CONN_TO_SESSION,
7289 enc_bind_conn_to_session, dec_bind_conn_to_session),
7290 PROC(DESTROY_CLIENTID, enc_destroy_clientid, dec_destroy_clientid),
7088#endif /* CONFIG_NFS_V4_1 */ 7291#endif /* CONFIG_NFS_V4_1 */
7089}; 7292};
7090 7293
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 4bff4a3dab46..b47277baebab 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -211,7 +211,7 @@ static void copy_single_comp(struct ore_components *oc, unsigned c,
211 memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred)); 211 memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred));
212} 212}
213 213
214int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags, 214static int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags,
215 struct objio_segment **pseg) 215 struct objio_segment **pseg)
216{ 216{
217/* This is the in memory structure of the objio_segment 217/* This is the in memory structure of the objio_segment
@@ -440,11 +440,12 @@ static void _read_done(struct ore_io_state *ios, void *private)
440 440
441int objio_read_pagelist(struct nfs_read_data *rdata) 441int objio_read_pagelist(struct nfs_read_data *rdata)
442{ 442{
443 struct nfs_pgio_header *hdr = rdata->header;
443 struct objio_state *objios; 444 struct objio_state *objios;
444 int ret; 445 int ret;
445 446
446 ret = objio_alloc_io_state(NFS_I(rdata->inode)->layout, true, 447 ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true,
447 rdata->lseg, rdata->args.pages, rdata->args.pgbase, 448 hdr->lseg, rdata->args.pages, rdata->args.pgbase,
448 rdata->args.offset, rdata->args.count, rdata, 449 rdata->args.offset, rdata->args.count, rdata,
449 GFP_KERNEL, &objios); 450 GFP_KERNEL, &objios);
450 if (unlikely(ret)) 451 if (unlikely(ret))
@@ -483,12 +484,12 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
483{ 484{
484 struct objio_state *objios = priv; 485 struct objio_state *objios = priv;
485 struct nfs_write_data *wdata = objios->oir.rpcdata; 486 struct nfs_write_data *wdata = objios->oir.rpcdata;
487 struct address_space *mapping = wdata->header->inode->i_mapping;
486 pgoff_t index = offset / PAGE_SIZE; 488 pgoff_t index = offset / PAGE_SIZE;
487 struct page *page = find_get_page(wdata->inode->i_mapping, index); 489 struct page *page = find_get_page(mapping, index);
488 490
489 if (!page) { 491 if (!page) {
490 page = find_or_create_page(wdata->inode->i_mapping, 492 page = find_or_create_page(mapping, index, GFP_NOFS);
491 index, GFP_NOFS);
492 if (unlikely(!page)) { 493 if (unlikely(!page)) {
493 dprintk("%s: grab_cache_page Failed index=0x%lx\n", 494 dprintk("%s: grab_cache_page Failed index=0x%lx\n",
494 __func__, index); 495 __func__, index);
@@ -518,11 +519,12 @@ static const struct _ore_r4w_op _r4w_op = {
518 519
519int objio_write_pagelist(struct nfs_write_data *wdata, int how) 520int objio_write_pagelist(struct nfs_write_data *wdata, int how)
520{ 521{
522 struct nfs_pgio_header *hdr = wdata->header;
521 struct objio_state *objios; 523 struct objio_state *objios;
522 int ret; 524 int ret;
523 525
524 ret = objio_alloc_io_state(NFS_I(wdata->inode)->layout, false, 526 ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false,
525 wdata->lseg, wdata->args.pages, wdata->args.pgbase, 527 hdr->lseg, wdata->args.pages, wdata->args.pgbase,
526 wdata->args.offset, wdata->args.count, wdata, GFP_NOFS, 528 wdata->args.offset, wdata->args.count, wdata, GFP_NOFS,
527 &objios); 529 &objios);
528 if (unlikely(ret)) 530 if (unlikely(ret))
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 595c5fc21a19..874613545301 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -258,7 +258,7 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
258 if (status >= 0) 258 if (status >= 0)
259 rdata->res.count = status; 259 rdata->res.count = status;
260 else 260 else
261 rdata->pnfs_error = status; 261 rdata->header->pnfs_error = status;
262 objlayout_iodone(oir); 262 objlayout_iodone(oir);
263 /* must not use oir after this point */ 263 /* must not use oir after this point */
264 264
@@ -279,12 +279,14 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
279enum pnfs_try_status 279enum pnfs_try_status
280objlayout_read_pagelist(struct nfs_read_data *rdata) 280objlayout_read_pagelist(struct nfs_read_data *rdata)
281{ 281{
282 struct nfs_pgio_header *hdr = rdata->header;
283 struct inode *inode = hdr->inode;
282 loff_t offset = rdata->args.offset; 284 loff_t offset = rdata->args.offset;
283 size_t count = rdata->args.count; 285 size_t count = rdata->args.count;
284 int err; 286 int err;
285 loff_t eof; 287 loff_t eof;
286 288
287 eof = i_size_read(rdata->inode); 289 eof = i_size_read(inode);
288 if (unlikely(offset + count > eof)) { 290 if (unlikely(offset + count > eof)) {
289 if (offset >= eof) { 291 if (offset >= eof) {
290 err = 0; 292 err = 0;
@@ -297,17 +299,17 @@ objlayout_read_pagelist(struct nfs_read_data *rdata)
297 } 299 }
298 300
299 rdata->res.eof = (offset + count) >= eof; 301 rdata->res.eof = (offset + count) >= eof;
300 _fix_verify_io_params(rdata->lseg, &rdata->args.pages, 302 _fix_verify_io_params(hdr->lseg, &rdata->args.pages,
301 &rdata->args.pgbase, 303 &rdata->args.pgbase,
302 rdata->args.offset, rdata->args.count); 304 rdata->args.offset, rdata->args.count);
303 305
304 dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n", 306 dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n",
305 __func__, rdata->inode->i_ino, offset, count, rdata->res.eof); 307 __func__, inode->i_ino, offset, count, rdata->res.eof);
306 308
307 err = objio_read_pagelist(rdata); 309 err = objio_read_pagelist(rdata);
308 out: 310 out:
309 if (unlikely(err)) { 311 if (unlikely(err)) {
310 rdata->pnfs_error = err; 312 hdr->pnfs_error = err;
311 dprintk("%s: Returned Error %d\n", __func__, err); 313 dprintk("%s: Returned Error %d\n", __func__, err);
312 return PNFS_NOT_ATTEMPTED; 314 return PNFS_NOT_ATTEMPTED;
313 } 315 }
@@ -340,7 +342,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
340 wdata->res.count = status; 342 wdata->res.count = status;
341 wdata->verf.committed = oir->committed; 343 wdata->verf.committed = oir->committed;
342 } else { 344 } else {
343 wdata->pnfs_error = status; 345 wdata->header->pnfs_error = status;
344 } 346 }
345 objlayout_iodone(oir); 347 objlayout_iodone(oir);
346 /* must not use oir after this point */ 348 /* must not use oir after this point */
@@ -363,15 +365,16 @@ enum pnfs_try_status
363objlayout_write_pagelist(struct nfs_write_data *wdata, 365objlayout_write_pagelist(struct nfs_write_data *wdata,
364 int how) 366 int how)
365{ 367{
368 struct nfs_pgio_header *hdr = wdata->header;
366 int err; 369 int err;
367 370
368 _fix_verify_io_params(wdata->lseg, &wdata->args.pages, 371 _fix_verify_io_params(hdr->lseg, &wdata->args.pages,
369 &wdata->args.pgbase, 372 &wdata->args.pgbase,
370 wdata->args.offset, wdata->args.count); 373 wdata->args.offset, wdata->args.count);
371 374
372 err = objio_write_pagelist(wdata, how); 375 err = objio_write_pagelist(wdata, how);
373 if (unlikely(err)) { 376 if (unlikely(err)) {
374 wdata->pnfs_error = err; 377 hdr->pnfs_error = err;
375 dprintk("%s: Returned Error %d\n", __func__, err); 378 dprintk("%s: Returned Error %d\n", __func__, err);
376 return PNFS_NOT_ATTEMPTED; 379 return PNFS_NOT_ATTEMPTED;
377 } 380 }
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index d21fceaa9f62..aed913c833f4 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -26,6 +26,47 @@
26 26
27static struct kmem_cache *nfs_page_cachep; 27static struct kmem_cache *nfs_page_cachep;
28 28
29bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
30{
31 p->npages = pagecount;
32 if (pagecount <= ARRAY_SIZE(p->page_array))
33 p->pagevec = p->page_array;
34 else {
35 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
36 if (!p->pagevec)
37 p->npages = 0;
38 }
39 return p->pagevec != NULL;
40}
41
42void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
43 struct nfs_pgio_header *hdr,
44 void (*release)(struct nfs_pgio_header *hdr))
45{
46 hdr->req = nfs_list_entry(desc->pg_list.next);
47 hdr->inode = desc->pg_inode;
48 hdr->cred = hdr->req->wb_context->cred;
49 hdr->io_start = req_offset(hdr->req);
50 hdr->good_bytes = desc->pg_count;
51 hdr->dreq = desc->pg_dreq;
52 hdr->release = release;
53 hdr->completion_ops = desc->pg_completion_ops;
54 if (hdr->completion_ops->init_hdr)
55 hdr->completion_ops->init_hdr(hdr);
56}
57
58void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos)
59{
60 spin_lock(&hdr->lock);
61 if (pos < hdr->io_start + hdr->good_bytes) {
62 set_bit(NFS_IOHDR_ERROR, &hdr->flags);
63 clear_bit(NFS_IOHDR_EOF, &hdr->flags);
64 hdr->good_bytes = pos - hdr->io_start;
65 hdr->error = error;
66 }
67 spin_unlock(&hdr->lock);
68}
69
29static inline struct nfs_page * 70static inline struct nfs_page *
30nfs_page_alloc(void) 71nfs_page_alloc(void)
31{ 72{
@@ -76,12 +117,8 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
76 * long write-back delay. This will be adjusted in 117 * long write-back delay. This will be adjusted in
77 * update_nfs_request below if the region is not locked. */ 118 * update_nfs_request below if the region is not locked. */
78 req->wb_page = page; 119 req->wb_page = page;
79 atomic_set(&req->wb_complete, 0);
80 req->wb_index = page->index; 120 req->wb_index = page->index;
81 page_cache_get(page); 121 page_cache_get(page);
82 BUG_ON(PagePrivate(page));
83 BUG_ON(!PageLocked(page));
84 BUG_ON(page->mapping->host != inode);
85 req->wb_offset = offset; 122 req->wb_offset = offset;
86 req->wb_pgbase = offset; 123 req->wb_pgbase = offset;
87 req->wb_bytes = count; 124 req->wb_bytes = count;
@@ -104,6 +141,15 @@ void nfs_unlock_request(struct nfs_page *req)
104 clear_bit(PG_BUSY, &req->wb_flags); 141 clear_bit(PG_BUSY, &req->wb_flags);
105 smp_mb__after_clear_bit(); 142 smp_mb__after_clear_bit();
106 wake_up_bit(&req->wb_flags, PG_BUSY); 143 wake_up_bit(&req->wb_flags, PG_BUSY);
144}
145
146/**
147 * nfs_unlock_and_release_request - Unlock request and release the nfs_page
148 * @req:
149 */
150void nfs_unlock_and_release_request(struct nfs_page *req)
151{
152 nfs_unlock_request(req);
107 nfs_release_request(req); 153 nfs_release_request(req);
108} 154}
109 155
@@ -203,6 +249,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
203void nfs_pageio_init(struct nfs_pageio_descriptor *desc, 249void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
204 struct inode *inode, 250 struct inode *inode,
205 const struct nfs_pageio_ops *pg_ops, 251 const struct nfs_pageio_ops *pg_ops,
252 const struct nfs_pgio_completion_ops *compl_ops,
206 size_t bsize, 253 size_t bsize,
207 int io_flags) 254 int io_flags)
208{ 255{
@@ -215,9 +262,11 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
215 desc->pg_recoalesce = 0; 262 desc->pg_recoalesce = 0;
216 desc->pg_inode = inode; 263 desc->pg_inode = inode;
217 desc->pg_ops = pg_ops; 264 desc->pg_ops = pg_ops;
265 desc->pg_completion_ops = compl_ops;
218 desc->pg_ioflags = io_flags; 266 desc->pg_ioflags = io_flags;
219 desc->pg_error = 0; 267 desc->pg_error = 0;
220 desc->pg_lseg = NULL; 268 desc->pg_lseg = NULL;
269 desc->pg_dreq = NULL;
221} 270}
222 271
223/** 272/**
@@ -241,12 +290,12 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
241 return false; 290 return false;
242 if (req->wb_context->state != prev->wb_context->state) 291 if (req->wb_context->state != prev->wb_context->state)
243 return false; 292 return false;
244 if (req->wb_index != (prev->wb_index + 1))
245 return false;
246 if (req->wb_pgbase != 0) 293 if (req->wb_pgbase != 0)
247 return false; 294 return false;
248 if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) 295 if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
249 return false; 296 return false;
297 if (req_offset(req) != req_offset(prev) + prev->wb_bytes)
298 return false;
250 return pgio->pg_ops->pg_test(pgio, prev, req); 299 return pgio->pg_ops->pg_test(pgio, prev, req);
251} 300}
252 301
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 38512bcd2e98..b8323aa7b543 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -395,6 +395,9 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
395 dprintk("%s:Begin lo %p\n", __func__, lo); 395 dprintk("%s:Begin lo %p\n", __func__, lo);
396 396
397 if (list_empty(&lo->plh_segs)) { 397 if (list_empty(&lo->plh_segs)) {
398 /* Reset MDS Threshold I/O counters */
399 NFS_I(lo->plh_inode)->write_io = 0;
400 NFS_I(lo->plh_inode)->read_io = 0;
398 if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) 401 if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags))
399 put_layout_hdr_locked(lo); 402 put_layout_hdr_locked(lo);
400 return 0; 403 return 0;
@@ -455,6 +458,7 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
455 spin_unlock(&nfsi->vfs_inode.i_lock); 458 spin_unlock(&nfsi->vfs_inode.i_lock);
456 pnfs_free_lseg_list(&tmp_list); 459 pnfs_free_lseg_list(&tmp_list);
457} 460}
461EXPORT_SYMBOL_GPL(pnfs_destroy_layout);
458 462
459/* 463/*
460 * Called by the state manger to remove all layouts established under an 464 * Called by the state manger to remove all layouts established under an
@@ -692,6 +696,7 @@ out:
692 dprintk("<-- %s status: %d\n", __func__, status); 696 dprintk("<-- %s status: %d\n", __func__, status);
693 return status; 697 return status;
694} 698}
699EXPORT_SYMBOL_GPL(_pnfs_return_layout);
695 700
696bool pnfs_roc(struct inode *ino) 701bool pnfs_roc(struct inode *ino)
697{ 702{
@@ -931,6 +936,81 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo,
931} 936}
932 937
933/* 938/*
939 * Use mdsthreshold hints set at each OPEN to determine if I/O should go
940 * to the MDS or over pNFS
941 *
942 * The nfs_inode read_io and write_io fields are cumulative counters reset
943 * when there are no layout segments. Note that in pnfs_update_layout iomode
944 * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a
945 * WRITE request.
946 *
947 * A return of true means use MDS I/O.
948 *
949 * From rfc 5661:
950 * If a file's size is smaller than the file size threshold, data accesses
951 * SHOULD be sent to the metadata server. If an I/O request has a length that
952 * is below the I/O size threshold, the I/O SHOULD be sent to the metadata
953 * server. If both file size and I/O size are provided, the client SHOULD
954 * reach or exceed both thresholds before sending its read or write
955 * requests to the data server.
956 */
957static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx,
958 struct inode *ino, int iomode)
959{
960 struct nfs4_threshold *t = ctx->mdsthreshold;
961 struct nfs_inode *nfsi = NFS_I(ino);
962 loff_t fsize = i_size_read(ino);
963 bool size = false, size_set = false, io = false, io_set = false, ret = false;
964
965 if (t == NULL)
966 return ret;
967
968 dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n",
969 __func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz);
970
971 switch (iomode) {
972 case IOMODE_READ:
973 if (t->bm & THRESHOLD_RD) {
974 dprintk("%s fsize %llu\n", __func__, fsize);
975 size_set = true;
976 if (fsize < t->rd_sz)
977 size = true;
978 }
979 if (t->bm & THRESHOLD_RD_IO) {
980 dprintk("%s nfsi->read_io %llu\n", __func__,
981 nfsi->read_io);
982 io_set = true;
983 if (nfsi->read_io < t->rd_io_sz)
984 io = true;
985 }
986 break;
987 case IOMODE_RW:
988 if (t->bm & THRESHOLD_WR) {
989 dprintk("%s fsize %llu\n", __func__, fsize);
990 size_set = true;
991 if (fsize < t->wr_sz)
992 size = true;
993 }
994 if (t->bm & THRESHOLD_WR_IO) {
995 dprintk("%s nfsi->write_io %llu\n", __func__,
996 nfsi->write_io);
997 io_set = true;
998 if (nfsi->write_io < t->wr_io_sz)
999 io = true;
1000 }
1001 break;
1002 }
1003 if (size_set && io_set) {
1004 if (size && io)
1005 ret = true;
1006 } else if (size || io)
1007 ret = true;
1008
1009 dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret);
1010 return ret;
1011}
1012
1013/*
934 * Layout segment is retreived from the server if not cached. 1014 * Layout segment is retreived from the server if not cached.
935 * The appropriate layout segment is referenced and returned to the caller. 1015 * The appropriate layout segment is referenced and returned to the caller.
936 */ 1016 */
@@ -957,6 +1037,10 @@ pnfs_update_layout(struct inode *ino,
957 1037
958 if (!pnfs_enabled_sb(NFS_SERVER(ino))) 1038 if (!pnfs_enabled_sb(NFS_SERVER(ino)))
959 return NULL; 1039 return NULL;
1040
1041 if (pnfs_within_mdsthreshold(ctx, ino, iomode))
1042 return NULL;
1043
960 spin_lock(&ino->i_lock); 1044 spin_lock(&ino->i_lock);
961 lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); 1045 lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
962 if (lo == NULL) { 1046 if (lo == NULL) {
@@ -1082,6 +1166,10 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
1082{ 1166{
1083 BUG_ON(pgio->pg_lseg != NULL); 1167 BUG_ON(pgio->pg_lseg != NULL);
1084 1168
1169 if (req->wb_offset != req->wb_pgbase) {
1170 nfs_pageio_reset_read_mds(pgio);
1171 return;
1172 }
1085 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1173 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
1086 req->wb_context, 1174 req->wb_context,
1087 req_offset(req), 1175 req_offset(req),
@@ -1100,6 +1188,10 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *
1100{ 1188{
1101 BUG_ON(pgio->pg_lseg != NULL); 1189 BUG_ON(pgio->pg_lseg != NULL);
1102 1190
1191 if (req->wb_offset != req->wb_pgbase) {
1192 nfs_pageio_reset_write_mds(pgio);
1193 return;
1194 }
1103 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1195 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
1104 req->wb_context, 1196 req->wb_context,
1105 req_offset(req), 1197 req_offset(req),
@@ -1113,26 +1205,31 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *
1113EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); 1205EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
1114 1206
1115bool 1207bool
1116pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) 1208pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
1209 const struct nfs_pgio_completion_ops *compl_ops)
1117{ 1210{
1118 struct nfs_server *server = NFS_SERVER(inode); 1211 struct nfs_server *server = NFS_SERVER(inode);
1119 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; 1212 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
1120 1213
1121 if (ld == NULL) 1214 if (ld == NULL)
1122 return false; 1215 return false;
1123 nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0); 1216 nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops,
1217 server->rsize, 0);
1124 return true; 1218 return true;
1125} 1219}
1126 1220
1127bool 1221bool
1128pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) 1222pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode,
1223 int ioflags,
1224 const struct nfs_pgio_completion_ops *compl_ops)
1129{ 1225{
1130 struct nfs_server *server = NFS_SERVER(inode); 1226 struct nfs_server *server = NFS_SERVER(inode);
1131 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; 1227 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
1132 1228
1133 if (ld == NULL) 1229 if (ld == NULL)
1134 return false; 1230 return false;
1135 nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags); 1231 nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops,
1232 server->wsize, ioflags);
1136 return true; 1233 return true;
1137} 1234}
1138 1235
@@ -1162,13 +1259,15 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
1162} 1259}
1163EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); 1260EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
1164 1261
1165static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head) 1262int pnfs_write_done_resend_to_mds(struct inode *inode,
1263 struct list_head *head,
1264 const struct nfs_pgio_completion_ops *compl_ops)
1166{ 1265{
1167 struct nfs_pageio_descriptor pgio; 1266 struct nfs_pageio_descriptor pgio;
1168 LIST_HEAD(failed); 1267 LIST_HEAD(failed);
1169 1268
1170 /* Resend all requests through the MDS */ 1269 /* Resend all requests through the MDS */
1171 nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE); 1270 nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE, compl_ops);
1172 while (!list_empty(head)) { 1271 while (!list_empty(head)) {
1173 struct nfs_page *req = nfs_list_entry(head->next); 1272 struct nfs_page *req = nfs_list_entry(head->next);
1174 1273
@@ -1188,30 +1287,37 @@ static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *
1188 } 1287 }
1189 return 0; 1288 return 0;
1190} 1289}
1290EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
1291
1292static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
1293{
1294 struct nfs_pgio_header *hdr = data->header;
1295
1296 dprintk("pnfs write error = %d\n", hdr->pnfs_error);
1297 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
1298 PNFS_LAYOUTRET_ON_ERROR) {
1299 clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
1300 pnfs_return_layout(hdr->inode);
1301 }
1302 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
1303 data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
1304 &hdr->pages,
1305 hdr->completion_ops);
1306}
1191 1307
1192/* 1308/*
1193 * Called by non rpc-based layout drivers 1309 * Called by non rpc-based layout drivers
1194 */ 1310 */
1195void pnfs_ld_write_done(struct nfs_write_data *data) 1311void pnfs_ld_write_done(struct nfs_write_data *data)
1196{ 1312{
1197 if (likely(!data->pnfs_error)) { 1313 struct nfs_pgio_header *hdr = data->header;
1314
1315 if (!hdr->pnfs_error) {
1198 pnfs_set_layoutcommit(data); 1316 pnfs_set_layoutcommit(data);
1199 data->mds_ops->rpc_call_done(&data->task, data); 1317 hdr->mds_ops->rpc_call_done(&data->task, data);
1200 } else { 1318 } else
1201 dprintk("pnfs write error = %d\n", data->pnfs_error); 1319 pnfs_ld_handle_write_error(data);
1202 if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & 1320 hdr->mds_ops->rpc_release(data);
1203 PNFS_LAYOUTRET_ON_ERROR) {
1204 /* Don't lo_commit on error, Server will needs to
1205 * preform a file recovery.
1206 */
1207 clear_bit(NFS_INO_LAYOUTCOMMIT,
1208 &NFS_I(data->inode)->flags);
1209 pnfs_return_layout(data->inode);
1210 }
1211 data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages);
1212 }
1213 put_lseg(data->lseg);
1214 data->mds_ops->rpc_release(data);
1215} 1321}
1216EXPORT_SYMBOL_GPL(pnfs_ld_write_done); 1322EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
1217 1323
@@ -1219,12 +1325,13 @@ static void
1219pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, 1325pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
1220 struct nfs_write_data *data) 1326 struct nfs_write_data *data)
1221{ 1327{
1222 list_splice_tail_init(&data->pages, &desc->pg_list); 1328 struct nfs_pgio_header *hdr = data->header;
1223 if (data->req && list_empty(&data->req->wb_list)) 1329
1224 nfs_list_add_request(data->req, &desc->pg_list); 1330 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
1225 nfs_pageio_reset_write_mds(desc); 1331 list_splice_tail_init(&hdr->pages, &desc->pg_list);
1226 desc->pg_recoalesce = 1; 1332 nfs_pageio_reset_write_mds(desc);
1227 put_lseg(data->lseg); 1333 desc->pg_recoalesce = 1;
1334 }
1228 nfs_writedata_release(data); 1335 nfs_writedata_release(data);
1229} 1336}
1230 1337
@@ -1234,23 +1341,18 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
1234 struct pnfs_layout_segment *lseg, 1341 struct pnfs_layout_segment *lseg,
1235 int how) 1342 int how)
1236{ 1343{
1237 struct inode *inode = wdata->inode; 1344 struct nfs_pgio_header *hdr = wdata->header;
1345 struct inode *inode = hdr->inode;
1238 enum pnfs_try_status trypnfs; 1346 enum pnfs_try_status trypnfs;
1239 struct nfs_server *nfss = NFS_SERVER(inode); 1347 struct nfs_server *nfss = NFS_SERVER(inode);
1240 1348
1241 wdata->mds_ops = call_ops; 1349 hdr->mds_ops = call_ops;
1242 wdata->lseg = get_lseg(lseg);
1243 1350
1244 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, 1351 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
1245 inode->i_ino, wdata->args.count, wdata->args.offset, how); 1352 inode->i_ino, wdata->args.count, wdata->args.offset, how);
1246
1247 trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how); 1353 trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how);
1248 if (trypnfs == PNFS_NOT_ATTEMPTED) { 1354 if (trypnfs != PNFS_NOT_ATTEMPTED)
1249 put_lseg(wdata->lseg);
1250 wdata->lseg = NULL;
1251 } else
1252 nfs_inc_stats(inode, NFSIOS_PNFS_WRITE); 1355 nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
1253
1254 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 1356 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
1255 return trypnfs; 1357 return trypnfs;
1256} 1358}
@@ -1266,7 +1368,7 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he
1266 while (!list_empty(head)) { 1368 while (!list_empty(head)) {
1267 enum pnfs_try_status trypnfs; 1369 enum pnfs_try_status trypnfs;
1268 1370
1269 data = list_entry(head->next, struct nfs_write_data, list); 1371 data = list_first_entry(head, struct nfs_write_data, list);
1270 list_del_init(&data->list); 1372 list_del_init(&data->list);
1271 1373
1272 trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); 1374 trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
@@ -1276,43 +1378,82 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he
1276 put_lseg(lseg); 1378 put_lseg(lseg);
1277} 1379}
1278 1380
1381static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
1382{
1383 put_lseg(hdr->lseg);
1384 nfs_writehdr_free(hdr);
1385}
1386
1279int 1387int
1280pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) 1388pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
1281{ 1389{
1282 LIST_HEAD(head); 1390 struct nfs_write_header *whdr;
1391 struct nfs_pgio_header *hdr;
1283 int ret; 1392 int ret;
1284 1393
1285 ret = nfs_generic_flush(desc, &head); 1394 whdr = nfs_writehdr_alloc();
1286 if (ret != 0) { 1395 if (!whdr) {
1396 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1287 put_lseg(desc->pg_lseg); 1397 put_lseg(desc->pg_lseg);
1288 desc->pg_lseg = NULL; 1398 desc->pg_lseg = NULL;
1289 return ret; 1399 return -ENOMEM;
1290 } 1400 }
1291 pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags); 1401 hdr = &whdr->header;
1292 return 0; 1402 nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
1403 hdr->lseg = get_lseg(desc->pg_lseg);
1404 atomic_inc(&hdr->refcnt);
1405 ret = nfs_generic_flush(desc, hdr);
1406 if (ret != 0) {
1407 put_lseg(desc->pg_lseg);
1408 desc->pg_lseg = NULL;
1409 } else
1410 pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags);
1411 if (atomic_dec_and_test(&hdr->refcnt))
1412 hdr->completion_ops->completion(hdr);
1413 return ret;
1293} 1414}
1294EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); 1415EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
1295 1416
1296static void pnfs_ld_handle_read_error(struct nfs_read_data *data) 1417int pnfs_read_done_resend_to_mds(struct inode *inode,
1418 struct list_head *head,
1419 const struct nfs_pgio_completion_ops *compl_ops)
1297{ 1420{
1298 struct nfs_pageio_descriptor pgio; 1421 struct nfs_pageio_descriptor pgio;
1422 LIST_HEAD(failed);
1299 1423
1300 put_lseg(data->lseg); 1424 /* Resend all requests through the MDS */
1301 data->lseg = NULL; 1425 nfs_pageio_init_read_mds(&pgio, inode, compl_ops);
1302 dprintk("pnfs write error = %d\n", data->pnfs_error); 1426 while (!list_empty(head)) {
1303 if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & 1427 struct nfs_page *req = nfs_list_entry(head->next);
1304 PNFS_LAYOUTRET_ON_ERROR)
1305 pnfs_return_layout(data->inode);
1306
1307 nfs_pageio_init_read_mds(&pgio, data->inode);
1308
1309 while (!list_empty(&data->pages)) {
1310 struct nfs_page *req = nfs_list_entry(data->pages.next);
1311 1428
1312 nfs_list_remove_request(req); 1429 nfs_list_remove_request(req);
1313 nfs_pageio_add_request(&pgio, req); 1430 if (!nfs_pageio_add_request(&pgio, req))
1431 nfs_list_add_request(req, &failed);
1314 } 1432 }
1315 nfs_pageio_complete(&pgio); 1433 nfs_pageio_complete(&pgio);
1434
1435 if (!list_empty(&failed)) {
1436 list_move(&failed, head);
1437 return -EIO;
1438 }
1439 return 0;
1440}
1441EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
1442
1443static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
1444{
1445 struct nfs_pgio_header *hdr = data->header;
1446
1447 dprintk("pnfs read error = %d\n", hdr->pnfs_error);
1448 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
1449 PNFS_LAYOUTRET_ON_ERROR) {
1450 clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
1451 pnfs_return_layout(hdr->inode);
1452 }
1453 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
1454 data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
1455 &hdr->pages,
1456 hdr->completion_ops);
1316} 1457}
1317 1458
1318/* 1459/*
@@ -1320,13 +1461,14 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
1320 */ 1461 */
1321void pnfs_ld_read_done(struct nfs_read_data *data) 1462void pnfs_ld_read_done(struct nfs_read_data *data)
1322{ 1463{
1323 if (likely(!data->pnfs_error)) { 1464 struct nfs_pgio_header *hdr = data->header;
1465
1466 if (likely(!hdr->pnfs_error)) {
1324 __nfs4_read_done_cb(data); 1467 __nfs4_read_done_cb(data);
1325 data->mds_ops->rpc_call_done(&data->task, data); 1468 hdr->mds_ops->rpc_call_done(&data->task, data);
1326 } else 1469 } else
1327 pnfs_ld_handle_read_error(data); 1470 pnfs_ld_handle_read_error(data);
1328 put_lseg(data->lseg); 1471 hdr->mds_ops->rpc_release(data);
1329 data->mds_ops->rpc_release(data);
1330} 1472}
1331EXPORT_SYMBOL_GPL(pnfs_ld_read_done); 1473EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
1332 1474
@@ -1334,11 +1476,13 @@ static void
1334pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, 1476pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
1335 struct nfs_read_data *data) 1477 struct nfs_read_data *data)
1336{ 1478{
1337 list_splice_tail_init(&data->pages, &desc->pg_list); 1479 struct nfs_pgio_header *hdr = data->header;
1338 if (data->req && list_empty(&data->req->wb_list)) 1480
1339 nfs_list_add_request(data->req, &desc->pg_list); 1481 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
1340 nfs_pageio_reset_read_mds(desc); 1482 list_splice_tail_init(&hdr->pages, &desc->pg_list);
1341 desc->pg_recoalesce = 1; 1483 nfs_pageio_reset_read_mds(desc);
1484 desc->pg_recoalesce = 1;
1485 }
1342 nfs_readdata_release(data); 1486 nfs_readdata_release(data);
1343} 1487}
1344 1488
@@ -1350,23 +1494,19 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
1350 const struct rpc_call_ops *call_ops, 1494 const struct rpc_call_ops *call_ops,
1351 struct pnfs_layout_segment *lseg) 1495 struct pnfs_layout_segment *lseg)
1352{ 1496{
1353 struct inode *inode = rdata->inode; 1497 struct nfs_pgio_header *hdr = rdata->header;
1498 struct inode *inode = hdr->inode;
1354 struct nfs_server *nfss = NFS_SERVER(inode); 1499 struct nfs_server *nfss = NFS_SERVER(inode);
1355 enum pnfs_try_status trypnfs; 1500 enum pnfs_try_status trypnfs;
1356 1501
1357 rdata->mds_ops = call_ops; 1502 hdr->mds_ops = call_ops;
1358 rdata->lseg = get_lseg(lseg);
1359 1503
1360 dprintk("%s: Reading ino:%lu %u@%llu\n", 1504 dprintk("%s: Reading ino:%lu %u@%llu\n",
1361 __func__, inode->i_ino, rdata->args.count, rdata->args.offset); 1505 __func__, inode->i_ino, rdata->args.count, rdata->args.offset);
1362 1506
1363 trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata); 1507 trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata);
1364 if (trypnfs == PNFS_NOT_ATTEMPTED) { 1508 if (trypnfs != PNFS_NOT_ATTEMPTED)
1365 put_lseg(rdata->lseg);
1366 rdata->lseg = NULL;
1367 } else {
1368 nfs_inc_stats(inode, NFSIOS_PNFS_READ); 1509 nfs_inc_stats(inode, NFSIOS_PNFS_READ);
1369 }
1370 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 1510 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
1371 return trypnfs; 1511 return trypnfs;
1372} 1512}
@@ -1382,7 +1522,7 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea
1382 while (!list_empty(head)) { 1522 while (!list_empty(head)) {
1383 enum pnfs_try_status trypnfs; 1523 enum pnfs_try_status trypnfs;
1384 1524
1385 data = list_entry(head->next, struct nfs_read_data, list); 1525 data = list_first_entry(head, struct nfs_read_data, list);
1386 list_del_init(&data->list); 1526 list_del_init(&data->list);
1387 1527
1388 trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); 1528 trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
@@ -1392,20 +1532,40 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea
1392 put_lseg(lseg); 1532 put_lseg(lseg);
1393} 1533}
1394 1534
1535static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
1536{
1537 put_lseg(hdr->lseg);
1538 nfs_readhdr_free(hdr);
1539}
1540
1395int 1541int
1396pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 1542pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
1397{ 1543{
1398 LIST_HEAD(head); 1544 struct nfs_read_header *rhdr;
1545 struct nfs_pgio_header *hdr;
1399 int ret; 1546 int ret;
1400 1547
1401 ret = nfs_generic_pagein(desc, &head); 1548 rhdr = nfs_readhdr_alloc();
1402 if (ret != 0) { 1549 if (!rhdr) {
1550 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1551 ret = -ENOMEM;
1403 put_lseg(desc->pg_lseg); 1552 put_lseg(desc->pg_lseg);
1404 desc->pg_lseg = NULL; 1553 desc->pg_lseg = NULL;
1405 return ret; 1554 return ret;
1406 } 1555 }
1407 pnfs_do_multiple_reads(desc, &head); 1556 hdr = &rhdr->header;
1408 return 0; 1557 nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
1558 hdr->lseg = get_lseg(desc->pg_lseg);
1559 atomic_inc(&hdr->refcnt);
1560 ret = nfs_generic_pagein(desc, hdr);
1561 if (ret != 0) {
1562 put_lseg(desc->pg_lseg);
1563 desc->pg_lseg = NULL;
1564 } else
1565 pnfs_do_multiple_reads(desc, &hdr->rpc_list);
1566 if (atomic_dec_and_test(&hdr->refcnt))
1567 hdr->completion_ops->completion(hdr);
1568 return ret;
1409} 1569}
1410EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); 1570EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
1411 1571
@@ -1438,30 +1598,32 @@ EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
1438void 1598void
1439pnfs_set_layoutcommit(struct nfs_write_data *wdata) 1599pnfs_set_layoutcommit(struct nfs_write_data *wdata)
1440{ 1600{
1441 struct nfs_inode *nfsi = NFS_I(wdata->inode); 1601 struct nfs_pgio_header *hdr = wdata->header;
1602 struct inode *inode = hdr->inode;
1603 struct nfs_inode *nfsi = NFS_I(inode);
1442 loff_t end_pos = wdata->mds_offset + wdata->res.count; 1604 loff_t end_pos = wdata->mds_offset + wdata->res.count;
1443 bool mark_as_dirty = false; 1605 bool mark_as_dirty = false;
1444 1606
1445 spin_lock(&nfsi->vfs_inode.i_lock); 1607 spin_lock(&inode->i_lock);
1446 if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { 1608 if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
1447 mark_as_dirty = true; 1609 mark_as_dirty = true;
1448 dprintk("%s: Set layoutcommit for inode %lu ", 1610 dprintk("%s: Set layoutcommit for inode %lu ",
1449 __func__, wdata->inode->i_ino); 1611 __func__, inode->i_ino);
1450 } 1612 }
1451 if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &wdata->lseg->pls_flags)) { 1613 if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) {
1452 /* references matched in nfs4_layoutcommit_release */ 1614 /* references matched in nfs4_layoutcommit_release */
1453 get_lseg(wdata->lseg); 1615 get_lseg(hdr->lseg);
1454 } 1616 }
1455 if (end_pos > nfsi->layout->plh_lwb) 1617 if (end_pos > nfsi->layout->plh_lwb)
1456 nfsi->layout->plh_lwb = end_pos; 1618 nfsi->layout->plh_lwb = end_pos;
1457 spin_unlock(&nfsi->vfs_inode.i_lock); 1619 spin_unlock(&inode->i_lock);
1458 dprintk("%s: lseg %p end_pos %llu\n", 1620 dprintk("%s: lseg %p end_pos %llu\n",
1459 __func__, wdata->lseg, nfsi->layout->plh_lwb); 1621 __func__, hdr->lseg, nfsi->layout->plh_lwb);
1460 1622
1461 /* if pnfs_layoutcommit_inode() runs between inode locks, the next one 1623 /* if pnfs_layoutcommit_inode() runs between inode locks, the next one
1462 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ 1624 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */
1463 if (mark_as_dirty) 1625 if (mark_as_dirty)
1464 mark_inode_dirty_sync(wdata->inode); 1626 mark_inode_dirty_sync(inode);
1465} 1627}
1466EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); 1628EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
1467 1629
@@ -1550,3 +1712,15 @@ out_free:
1550 kfree(data); 1712 kfree(data);
1551 goto out; 1713 goto out;
1552} 1714}
1715
1716struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
1717{
1718 struct nfs4_threshold *thp;
1719
1720 thp = kzalloc(sizeof(*thp), GFP_NOFS);
1721 if (!thp) {
1722 dprintk("%s mdsthreshold allocation failed\n", __func__);
1723 return NULL;
1724 }
1725 return thp;
1726}
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 442ebf68eeec..29fd23c0efdc 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -63,6 +63,7 @@ enum {
63 NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ 63 NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */
64 NFS_LAYOUT_ROC, /* some lseg had roc bit set */ 64 NFS_LAYOUT_ROC, /* some lseg had roc bit set */
65 NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */ 65 NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */
66 NFS_LAYOUT_INVALID, /* layout is being destroyed */
66}; 67};
67 68
68enum layoutdriver_policy_flags { 69enum layoutdriver_policy_flags {
@@ -94,11 +95,20 @@ struct pnfs_layoutdriver_type {
94 const struct nfs_pageio_ops *pg_read_ops; 95 const struct nfs_pageio_ops *pg_read_ops;
95 const struct nfs_pageio_ops *pg_write_ops; 96 const struct nfs_pageio_ops *pg_write_ops;
96 97
98 struct pnfs_ds_commit_info *(*get_ds_info) (struct inode *inode);
97 void (*mark_request_commit) (struct nfs_page *req, 99 void (*mark_request_commit) (struct nfs_page *req,
98 struct pnfs_layout_segment *lseg); 100 struct pnfs_layout_segment *lseg,
99 void (*clear_request_commit) (struct nfs_page *req); 101 struct nfs_commit_info *cinfo);
100 int (*scan_commit_lists) (struct inode *inode, int max, spinlock_t *lock); 102 void (*clear_request_commit) (struct nfs_page *req,
101 int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how); 103 struct nfs_commit_info *cinfo);
104 int (*scan_commit_lists) (struct nfs_commit_info *cinfo,
105 int max);
106 void (*recover_commit_reqs) (struct list_head *list,
107 struct nfs_commit_info *cinfo);
108 int (*commit_pagelist)(struct inode *inode,
109 struct list_head *mds_pages,
110 int how,
111 struct nfs_commit_info *cinfo);
102 112
103 /* 113 /*
104 * Return PNFS_ATTEMPTED to indicate the layout code has attempted 114 * Return PNFS_ATTEMPTED to indicate the layout code has attempted
@@ -168,8 +178,10 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
168void get_layout_hdr(struct pnfs_layout_hdr *lo); 178void get_layout_hdr(struct pnfs_layout_hdr *lo);
169void put_lseg(struct pnfs_layout_segment *lseg); 179void put_lseg(struct pnfs_layout_segment *lseg);
170 180
171bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *); 181bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
172bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int); 182 const struct nfs_pgio_completion_ops *);
183bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *,
184 int, const struct nfs_pgio_completion_ops *);
173 185
174void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); 186void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32);
175void unset_pnfs_layoutdriver(struct nfs_server *); 187void unset_pnfs_layoutdriver(struct nfs_server *);
@@ -211,6 +223,11 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
211 gfp_t gfp_flags); 223 gfp_t gfp_flags);
212 224
213void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp); 225void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp);
226int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head,
227 const struct nfs_pgio_completion_ops *compl_ops);
228int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head,
229 const struct nfs_pgio_completion_ops *compl_ops);
230struct nfs4_threshold *pnfs_mdsthreshold_alloc(void);
214 231
215/* nfs4_deviceid_flags */ 232/* nfs4_deviceid_flags */
216enum { 233enum {
@@ -261,49 +278,66 @@ static inline int pnfs_enabled_sb(struct nfs_server *nfss)
261} 278}
262 279
263static inline int 280static inline int
264pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) 281pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how,
282 struct nfs_commit_info *cinfo)
265{ 283{
266 if (!test_and_clear_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags)) 284 if (cinfo->ds == NULL || cinfo->ds->ncommitting == 0)
267 return PNFS_NOT_ATTEMPTED; 285 return PNFS_NOT_ATTEMPTED;
268 return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how); 286 return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how, cinfo);
287}
288
289static inline struct pnfs_ds_commit_info *
290pnfs_get_ds_info(struct inode *inode)
291{
292 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
293
294 if (ld == NULL || ld->get_ds_info == NULL)
295 return NULL;
296 return ld->get_ds_info(inode);
269} 297}
270 298
271static inline bool 299static inline bool
272pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) 300pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
301 struct nfs_commit_info *cinfo)
273{ 302{
274 struct inode *inode = req->wb_context->dentry->d_inode; 303 struct inode *inode = req->wb_context->dentry->d_inode;
275 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 304 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
276 305
277 if (lseg == NULL || ld->mark_request_commit == NULL) 306 if (lseg == NULL || ld->mark_request_commit == NULL)
278 return false; 307 return false;
279 ld->mark_request_commit(req, lseg); 308 ld->mark_request_commit(req, lseg, cinfo);
280 return true; 309 return true;
281} 310}
282 311
283static inline bool 312static inline bool
284pnfs_clear_request_commit(struct nfs_page *req) 313pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo)
285{ 314{
286 struct inode *inode = req->wb_context->dentry->d_inode; 315 struct inode *inode = req->wb_context->dentry->d_inode;
287 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 316 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
288 317
289 if (ld == NULL || ld->clear_request_commit == NULL) 318 if (ld == NULL || ld->clear_request_commit == NULL)
290 return false; 319 return false;
291 ld->clear_request_commit(req); 320 ld->clear_request_commit(req, cinfo);
292 return true; 321 return true;
293} 322}
294 323
295static inline int 324static inline int
296pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock) 325pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo,
326 int max)
297{ 327{
298 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 328 if (cinfo->ds == NULL || cinfo->ds->nwritten == 0)
299 int ret;
300
301 if (ld == NULL || ld->scan_commit_lists == NULL)
302 return 0; 329 return 0;
303 ret = ld->scan_commit_lists(inode, max, lock); 330 else
304 if (ret != 0) 331 return NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists(cinfo, max);
305 set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags); 332}
306 return ret; 333
334static inline void
335pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list,
336 struct nfs_commit_info *cinfo)
337{
338 if (cinfo->ds == NULL || cinfo->ds->nwritten == 0)
339 return;
340 NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo);
307} 341}
308 342
309/* Should the pNFS client commit and return the layout upon a setattr */ 343/* Should the pNFS client commit and return the layout upon a setattr */
@@ -327,6 +361,14 @@ static inline int pnfs_return_layout(struct inode *ino)
327 return 0; 361 return 0;
328} 362}
329 363
364static inline bool
365pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src,
366 struct nfs_server *nfss)
367{
368 return (dst && src && src->bm != 0 &&
369 nfss->pnfs_curr_ld->id == src->l_type);
370}
371
330#ifdef NFS_DEBUG 372#ifdef NFS_DEBUG
331void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); 373void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id);
332#else 374#else
@@ -396,45 +438,74 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
396{ 438{
397} 439}
398 440
399static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) 441static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
442 const struct nfs_pgio_completion_ops *compl_ops)
400{ 443{
401 return false; 444 return false;
402} 445}
403 446
404static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) 447static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags,
448 const struct nfs_pgio_completion_ops *compl_ops)
405{ 449{
406 return false; 450 return false;
407} 451}
408 452
409static inline int 453static inline int
410pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) 454pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how,
455 struct nfs_commit_info *cinfo)
411{ 456{
412 return PNFS_NOT_ATTEMPTED; 457 return PNFS_NOT_ATTEMPTED;
413} 458}
414 459
460static inline struct pnfs_ds_commit_info *
461pnfs_get_ds_info(struct inode *inode)
462{
463 return NULL;
464}
465
415static inline bool 466static inline bool
416pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) 467pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
468 struct nfs_commit_info *cinfo)
417{ 469{
418 return false; 470 return false;
419} 471}
420 472
421static inline bool 473static inline bool
422pnfs_clear_request_commit(struct nfs_page *req) 474pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo)
423{ 475{
424 return false; 476 return false;
425} 477}
426 478
427static inline int 479static inline int
428pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock) 480pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo,
481 int max)
429{ 482{
430 return 0; 483 return 0;
431} 484}
432 485
486static inline void
487pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list,
488 struct nfs_commit_info *cinfo)
489{
490}
491
433static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) 492static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
434{ 493{
435 return 0; 494 return 0;
436} 495}
437 496
497static inline bool
498pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src,
499 struct nfs_server *nfss)
500{
501 return false;
502}
503
504static inline struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
505{
506 return NULL;
507}
508
438#endif /* CONFIG_NFS_V4_1 */ 509#endif /* CONFIG_NFS_V4_1 */
439 510
440#endif /* FS_NFS_PNFS_H */ 511#endif /* FS_NFS_PNFS_H */
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index d6408b6437de..a706b6bcc286 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -178,7 +178,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
178} 178}
179 179
180static int 180static int
181nfs_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name, 181nfs_proc_lookup(struct inode *dir, struct qstr *name,
182 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 182 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
183{ 183{
184 struct nfs_diropargs arg = { 184 struct nfs_diropargs arg = {
@@ -640,12 +640,14 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
640 640
641static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) 641static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
642{ 642{
643 struct inode *inode = data->header->inode;
644
643 if (nfs_async_handle_expired_key(task)) 645 if (nfs_async_handle_expired_key(task))
644 return -EAGAIN; 646 return -EAGAIN;
645 647
646 nfs_invalidate_atime(data->inode); 648 nfs_invalidate_atime(inode);
647 if (task->tk_status >= 0) { 649 if (task->tk_status >= 0) {
648 nfs_refresh_inode(data->inode, data->res.fattr); 650 nfs_refresh_inode(inode, data->res.fattr);
649 /* Emulate the eof flag, which isn't normally needed in NFSv2 651 /* Emulate the eof flag, which isn't normally needed in NFSv2
650 * as it is guaranteed to always return the file attributes 652 * as it is guaranteed to always return the file attributes
651 */ 653 */
@@ -667,11 +669,13 @@ static void nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_dat
667 669
668static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) 670static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
669{ 671{
672 struct inode *inode = data->header->inode;
673
670 if (nfs_async_handle_expired_key(task)) 674 if (nfs_async_handle_expired_key(task))
671 return -EAGAIN; 675 return -EAGAIN;
672 676
673 if (task->tk_status >= 0) 677 if (task->tk_status >= 0)
674 nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr); 678 nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
675 return 0; 679 return 0;
676} 680}
677 681
@@ -687,8 +691,13 @@ static void nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_d
687 rpc_call_start(task); 691 rpc_call_start(task);
688} 692}
689 693
694static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
695{
696 BUG();
697}
698
690static void 699static void
691nfs_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) 700nfs_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
692{ 701{
693 BUG(); 702 BUG();
694} 703}
@@ -732,6 +741,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
732 .file_inode_ops = &nfs_file_inode_operations, 741 .file_inode_ops = &nfs_file_inode_operations,
733 .file_ops = &nfs_file_operations, 742 .file_ops = &nfs_file_operations,
734 .getroot = nfs_proc_get_root, 743 .getroot = nfs_proc_get_root,
744 .submount = nfs_submount,
735 .getattr = nfs_proc_getattr, 745 .getattr = nfs_proc_getattr,
736 .setattr = nfs_proc_setattr, 746 .setattr = nfs_proc_setattr,
737 .lookup = nfs_proc_lookup, 747 .lookup = nfs_proc_lookup,
@@ -763,6 +773,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
763 .write_rpc_prepare = nfs_proc_write_rpc_prepare, 773 .write_rpc_prepare = nfs_proc_write_rpc_prepare,
764 .write_done = nfs_write_done, 774 .write_done = nfs_write_done,
765 .commit_setup = nfs_proc_commit_setup, 775 .commit_setup = nfs_proc_commit_setup,
776 .commit_rpc_prepare = nfs_proc_commit_rpc_prepare,
766 .lock = nfs_proc_lock, 777 .lock = nfs_proc_lock,
767 .lock_check_bounds = nfs_lock_check_bounds, 778 .lock_check_bounds = nfs_lock_check_bounds,
768 .close_context = nfs_close_context, 779 .close_context = nfs_close_context,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 0a4be28c2ea3..86ced7836214 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -30,43 +30,73 @@
30#define NFSDBG_FACILITY NFSDBG_PAGECACHE 30#define NFSDBG_FACILITY NFSDBG_PAGECACHE
31 31
32static const struct nfs_pageio_ops nfs_pageio_read_ops; 32static const struct nfs_pageio_ops nfs_pageio_read_ops;
33static const struct rpc_call_ops nfs_read_partial_ops; 33static const struct rpc_call_ops nfs_read_common_ops;
34static const struct rpc_call_ops nfs_read_full_ops; 34static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops;
35 35
36static struct kmem_cache *nfs_rdata_cachep; 36static struct kmem_cache *nfs_rdata_cachep;
37 37
38struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) 38struct nfs_read_header *nfs_readhdr_alloc(void)
39{ 39{
40 struct nfs_read_data *p; 40 struct nfs_read_header *rhdr;
41 41
42 p = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); 42 rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
43 if (p) { 43 if (rhdr) {
44 INIT_LIST_HEAD(&p->pages); 44 struct nfs_pgio_header *hdr = &rhdr->header;
45 p->npages = pagecount; 45
46 if (pagecount <= ARRAY_SIZE(p->page_array)) 46 INIT_LIST_HEAD(&hdr->pages);
47 p->pagevec = p->page_array; 47 INIT_LIST_HEAD(&hdr->rpc_list);
48 else { 48 spin_lock_init(&hdr->lock);
49 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); 49 atomic_set(&hdr->refcnt, 0);
50 if (!p->pagevec) { 50 }
51 kmem_cache_free(nfs_rdata_cachep, p); 51 return rhdr;
52 p = NULL; 52}
53 } 53
54 } 54static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
55 unsigned int pagecount)
56{
57 struct nfs_read_data *data, *prealloc;
58
59 prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data;
60 if (prealloc->header == NULL)
61 data = prealloc;
62 else
63 data = kzalloc(sizeof(*data), GFP_KERNEL);
64 if (!data)
65 goto out;
66
67 if (nfs_pgarray_set(&data->pages, pagecount)) {
68 data->header = hdr;
69 atomic_inc(&hdr->refcnt);
70 } else {
71 if (data != prealloc)
72 kfree(data);
73 data = NULL;
55 } 74 }
56 return p; 75out:
76 return data;
57} 77}
58 78
59void nfs_readdata_free(struct nfs_read_data *p) 79void nfs_readhdr_free(struct nfs_pgio_header *hdr)
60{ 80{
61 if (p && (p->pagevec != &p->page_array[0])) 81 struct nfs_read_header *rhdr = container_of(hdr, struct nfs_read_header, header);
62 kfree(p->pagevec); 82
63 kmem_cache_free(nfs_rdata_cachep, p); 83 kmem_cache_free(nfs_rdata_cachep, rhdr);
64} 84}
65 85
66void nfs_readdata_release(struct nfs_read_data *rdata) 86void nfs_readdata_release(struct nfs_read_data *rdata)
67{ 87{
88 struct nfs_pgio_header *hdr = rdata->header;
89 struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header);
90
68 put_nfs_open_context(rdata->args.context); 91 put_nfs_open_context(rdata->args.context);
69 nfs_readdata_free(rdata); 92 if (rdata->pages.pagevec != rdata->pages.page_array)
93 kfree(rdata->pages.pagevec);
94 if (rdata != &read_header->rpc_data)
95 kfree(rdata);
96 else
97 rdata->header = NULL;
98 if (atomic_dec_and_test(&hdr->refcnt))
99 hdr->completion_ops->completion(hdr);
70} 100}
71 101
72static 102static
@@ -78,39 +108,11 @@ int nfs_return_empty_page(struct page *page)
78 return 0; 108 return 0;
79} 109}
80 110
81static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
82{
83 unsigned int remainder = data->args.count - data->res.count;
84 unsigned int base = data->args.pgbase + data->res.count;
85 unsigned int pglen;
86 struct page **pages;
87
88 if (data->res.eof == 0 || remainder == 0)
89 return;
90 /*
91 * Note: "remainder" can never be negative, since we check for
92 * this in the XDR code.
93 */
94 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
95 base &= ~PAGE_CACHE_MASK;
96 pglen = PAGE_CACHE_SIZE - base;
97 for (;;) {
98 if (remainder <= pglen) {
99 zero_user(*pages, base, remainder);
100 break;
101 }
102 zero_user(*pages, base, pglen);
103 pages++;
104 remainder -= pglen;
105 pglen = PAGE_CACHE_SIZE;
106 base = 0;
107 }
108}
109
110void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, 111void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
111 struct inode *inode) 112 struct inode *inode,
113 const struct nfs_pgio_completion_ops *compl_ops)
112{ 114{
113 nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, 115 nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops,
114 NFS_SERVER(inode)->rsize, 0); 116 NFS_SERVER(inode)->rsize, 0);
115} 117}
116 118
@@ -121,11 +123,12 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
121} 123}
122EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); 124EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
123 125
124static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, 126void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
125 struct inode *inode) 127 struct inode *inode,
128 const struct nfs_pgio_completion_ops *compl_ops)
126{ 129{
127 if (!pnfs_pageio_init_read(pgio, inode)) 130 if (!pnfs_pageio_init_read(pgio, inode, compl_ops))
128 nfs_pageio_init_read_mds(pgio, inode); 131 nfs_pageio_init_read_mds(pgio, inode, compl_ops);
129} 132}
130 133
131int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, 134int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
@@ -146,9 +149,10 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
146 if (len < PAGE_CACHE_SIZE) 149 if (len < PAGE_CACHE_SIZE)
147 zero_user_segment(page, len, PAGE_CACHE_SIZE); 150 zero_user_segment(page, len, PAGE_CACHE_SIZE);
148 151
149 nfs_pageio_init_read(&pgio, inode); 152 nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops);
150 nfs_pageio_add_request(&pgio, new); 153 nfs_pageio_add_request(&pgio, new);
151 nfs_pageio_complete(&pgio); 154 nfs_pageio_complete(&pgio);
155 NFS_I(inode)->read_io += pgio.pg_bytes_written;
152 return 0; 156 return 0;
153} 157}
154 158
@@ -169,16 +173,49 @@ static void nfs_readpage_release(struct nfs_page *req)
169 nfs_release_request(req); 173 nfs_release_request(req);
170} 174}
171 175
172int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, 176/* Note io was page aligned */
173 const struct rpc_call_ops *call_ops) 177static void nfs_read_completion(struct nfs_pgio_header *hdr)
178{
179 unsigned long bytes = 0;
180
181 if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
182 goto out;
183 while (!list_empty(&hdr->pages)) {
184 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
185 struct page *page = req->wb_page;
186
187 if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
188 if (bytes > hdr->good_bytes)
189 zero_user(page, 0, PAGE_SIZE);
190 else if (hdr->good_bytes - bytes < PAGE_SIZE)
191 zero_user_segment(page,
192 hdr->good_bytes & ~PAGE_MASK,
193 PAGE_SIZE);
194 }
195 bytes += req->wb_bytes;
196 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
197 if (bytes <= hdr->good_bytes)
198 SetPageUptodate(page);
199 } else
200 SetPageUptodate(page);
201 nfs_list_remove_request(req);
202 nfs_readpage_release(req);
203 }
204out:
205 hdr->release(hdr);
206}
207
208int nfs_initiate_read(struct rpc_clnt *clnt,
209 struct nfs_read_data *data,
210 const struct rpc_call_ops *call_ops, int flags)
174{ 211{
175 struct inode *inode = data->inode; 212 struct inode *inode = data->header->inode;
176 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; 213 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
177 struct rpc_task *task; 214 struct rpc_task *task;
178 struct rpc_message msg = { 215 struct rpc_message msg = {
179 .rpc_argp = &data->args, 216 .rpc_argp = &data->args,
180 .rpc_resp = &data->res, 217 .rpc_resp = &data->res,
181 .rpc_cred = data->cred, 218 .rpc_cred = data->header->cred,
182 }; 219 };
183 struct rpc_task_setup task_setup_data = { 220 struct rpc_task_setup task_setup_data = {
184 .task = &data->task, 221 .task = &data->task,
@@ -187,7 +224,7 @@ int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
187 .callback_ops = call_ops, 224 .callback_ops = call_ops,
188 .callback_data = data, 225 .callback_data = data,
189 .workqueue = nfsiod_workqueue, 226 .workqueue = nfsiod_workqueue,
190 .flags = RPC_TASK_ASYNC | swap_flags, 227 .flags = RPC_TASK_ASYNC | swap_flags | flags,
191 }; 228 };
192 229
193 /* Set up the initial task struct. */ 230 /* Set up the initial task struct. */
@@ -212,19 +249,15 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read);
212/* 249/*
213 * Set up the NFS read request struct 250 * Set up the NFS read request struct
214 */ 251 */
215static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, 252static void nfs_read_rpcsetup(struct nfs_read_data *data,
216 unsigned int count, unsigned int offset) 253 unsigned int count, unsigned int offset)
217{ 254{
218 struct inode *inode = req->wb_context->dentry->d_inode; 255 struct nfs_page *req = data->header->req;
219
220 data->req = req;
221 data->inode = inode;
222 data->cred = req->wb_context->cred;
223 256
224 data->args.fh = NFS_FH(inode); 257 data->args.fh = NFS_FH(data->header->inode);
225 data->args.offset = req_offset(req) + offset; 258 data->args.offset = req_offset(req) + offset;
226 data->args.pgbase = req->wb_pgbase + offset; 259 data->args.pgbase = req->wb_pgbase + offset;
227 data->args.pages = data->pagevec; 260 data->args.pages = data->pages.pagevec;
228 data->args.count = count; 261 data->args.count = count;
229 data->args.context = get_nfs_open_context(req->wb_context); 262 data->args.context = get_nfs_open_context(req->wb_context);
230 data->args.lock_context = req->wb_lock_context; 263 data->args.lock_context = req->wb_lock_context;
@@ -238,9 +271,9 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
238static int nfs_do_read(struct nfs_read_data *data, 271static int nfs_do_read(struct nfs_read_data *data,
239 const struct rpc_call_ops *call_ops) 272 const struct rpc_call_ops *call_ops)
240{ 273{
241 struct inode *inode = data->args.context->dentry->d_inode; 274 struct inode *inode = data->header->inode;
242 275
243 return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops); 276 return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops, 0);
244} 277}
245 278
246static int 279static int
@@ -253,7 +286,7 @@ nfs_do_multiple_reads(struct list_head *head,
253 while (!list_empty(head)) { 286 while (!list_empty(head)) {
254 int ret2; 287 int ret2;
255 288
256 data = list_entry(head->next, struct nfs_read_data, list); 289 data = list_first_entry(head, struct nfs_read_data, list);
257 list_del_init(&data->list); 290 list_del_init(&data->list);
258 291
259 ret2 = nfs_do_read(data, call_ops); 292 ret2 = nfs_do_read(data, call_ops);
@@ -275,6 +308,24 @@ nfs_async_read_error(struct list_head *head)
275 } 308 }
276} 309}
277 310
311static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = {
312 .error_cleanup = nfs_async_read_error,
313 .completion = nfs_read_completion,
314};
315
316static void nfs_pagein_error(struct nfs_pageio_descriptor *desc,
317 struct nfs_pgio_header *hdr)
318{
319 set_bit(NFS_IOHDR_REDO, &hdr->flags);
320 while (!list_empty(&hdr->rpc_list)) {
321 struct nfs_read_data *data = list_first_entry(&hdr->rpc_list,
322 struct nfs_read_data, list);
323 list_del(&data->list);
324 nfs_readdata_release(data);
325 }
326 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
327}
328
278/* 329/*
279 * Generate multiple requests to fill a single page. 330 * Generate multiple requests to fill a single page.
280 * 331 *
@@ -288,93 +339,95 @@ nfs_async_read_error(struct list_head *head)
288 * won't see the new data until our attribute cache is updated. This is more 339 * won't see the new data until our attribute cache is updated. This is more
289 * or less conventional NFS client behavior. 340 * or less conventional NFS client behavior.
290 */ 341 */
291static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) 342static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc,
343 struct nfs_pgio_header *hdr)
292{ 344{
293 struct nfs_page *req = nfs_list_entry(desc->pg_list.next); 345 struct nfs_page *req = hdr->req;
294 struct page *page = req->wb_page; 346 struct page *page = req->wb_page;
295 struct nfs_read_data *data; 347 struct nfs_read_data *data;
296 size_t rsize = desc->pg_bsize, nbytes; 348 size_t rsize = desc->pg_bsize, nbytes;
297 unsigned int offset; 349 unsigned int offset;
298 int requests = 0;
299 int ret = 0;
300
301 nfs_list_remove_request(req);
302 350
303 offset = 0; 351 offset = 0;
304 nbytes = desc->pg_count; 352 nbytes = desc->pg_count;
305 do { 353 do {
306 size_t len = min(nbytes,rsize); 354 size_t len = min(nbytes,rsize);
307 355
308 data = nfs_readdata_alloc(1); 356 data = nfs_readdata_alloc(hdr, 1);
309 if (!data) 357 if (!data) {
310 goto out_bad; 358 nfs_pagein_error(desc, hdr);
311 data->pagevec[0] = page; 359 return -ENOMEM;
312 nfs_read_rpcsetup(req, data, len, offset); 360 }
313 list_add(&data->list, res); 361 data->pages.pagevec[0] = page;
314 requests++; 362 nfs_read_rpcsetup(data, len, offset);
363 list_add(&data->list, &hdr->rpc_list);
315 nbytes -= len; 364 nbytes -= len;
316 offset += len; 365 offset += len;
317 } while(nbytes != 0); 366 } while (nbytes != 0);
318 atomic_set(&req->wb_complete, requests); 367
319 desc->pg_rpc_callops = &nfs_read_partial_ops; 368 nfs_list_remove_request(req);
320 return ret; 369 nfs_list_add_request(req, &hdr->pages);
321out_bad: 370 desc->pg_rpc_callops = &nfs_read_common_ops;
322 while (!list_empty(res)) { 371 return 0;
323 data = list_entry(res->next, struct nfs_read_data, list);
324 list_del(&data->list);
325 nfs_readdata_release(data);
326 }
327 nfs_readpage_release(req);
328 return -ENOMEM;
329} 372}
330 373
331static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res) 374static int nfs_pagein_one(struct nfs_pageio_descriptor *desc,
375 struct nfs_pgio_header *hdr)
332{ 376{
333 struct nfs_page *req; 377 struct nfs_page *req;
334 struct page **pages; 378 struct page **pages;
335 struct nfs_read_data *data; 379 struct nfs_read_data *data;
336 struct list_head *head = &desc->pg_list; 380 struct list_head *head = &desc->pg_list;
337 int ret = 0;
338 381
339 data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base, 382 data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base,
340 desc->pg_count)); 383 desc->pg_count));
341 if (!data) { 384 if (!data) {
342 nfs_async_read_error(head); 385 nfs_pagein_error(desc, hdr);
343 ret = -ENOMEM; 386 return -ENOMEM;
344 goto out;
345 } 387 }
346 388
347 pages = data->pagevec; 389 pages = data->pages.pagevec;
348 while (!list_empty(head)) { 390 while (!list_empty(head)) {
349 req = nfs_list_entry(head->next); 391 req = nfs_list_entry(head->next);
350 nfs_list_remove_request(req); 392 nfs_list_remove_request(req);
351 nfs_list_add_request(req, &data->pages); 393 nfs_list_add_request(req, &hdr->pages);
352 *pages++ = req->wb_page; 394 *pages++ = req->wb_page;
353 } 395 }
354 req = nfs_list_entry(data->pages.next);
355 396
356 nfs_read_rpcsetup(req, data, desc->pg_count, 0); 397 nfs_read_rpcsetup(data, desc->pg_count, 0);
357 list_add(&data->list, res); 398 list_add(&data->list, &hdr->rpc_list);
358 desc->pg_rpc_callops = &nfs_read_full_ops; 399 desc->pg_rpc_callops = &nfs_read_common_ops;
359out: 400 return 0;
360 return ret;
361} 401}
362 402
363int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head) 403int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
404 struct nfs_pgio_header *hdr)
364{ 405{
365 if (desc->pg_bsize < PAGE_CACHE_SIZE) 406 if (desc->pg_bsize < PAGE_CACHE_SIZE)
366 return nfs_pagein_multi(desc, head); 407 return nfs_pagein_multi(desc, hdr);
367 return nfs_pagein_one(desc, head); 408 return nfs_pagein_one(desc, hdr);
368} 409}
369 410
370static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 411static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
371{ 412{
372 LIST_HEAD(head); 413 struct nfs_read_header *rhdr;
414 struct nfs_pgio_header *hdr;
373 int ret; 415 int ret;
374 416
375 ret = nfs_generic_pagein(desc, &head); 417 rhdr = nfs_readhdr_alloc();
418 if (!rhdr) {
419 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
420 return -ENOMEM;
421 }
422 hdr = &rhdr->header;
423 nfs_pgheader_init(desc, hdr, nfs_readhdr_free);
424 atomic_inc(&hdr->refcnt);
425 ret = nfs_generic_pagein(desc, hdr);
376 if (ret == 0) 426 if (ret == 0)
377 ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops); 427 ret = nfs_do_multiple_reads(&hdr->rpc_list,
428 desc->pg_rpc_callops);
429 if (atomic_dec_and_test(&hdr->refcnt))
430 hdr->completion_ops->completion(hdr);
378 return ret; 431 return ret;
379} 432}
380 433
@@ -389,20 +442,21 @@ static const struct nfs_pageio_ops nfs_pageio_read_ops = {
389 */ 442 */
390int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) 443int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
391{ 444{
445 struct inode *inode = data->header->inode;
392 int status; 446 int status;
393 447
394 dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid, 448 dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid,
395 task->tk_status); 449 task->tk_status);
396 450
397 status = NFS_PROTO(data->inode)->read_done(task, data); 451 status = NFS_PROTO(inode)->read_done(task, data);
398 if (status != 0) 452 if (status != 0)
399 return status; 453 return status;
400 454
401 nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count); 455 nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, data->res.count);
402 456
403 if (task->tk_status == -ESTALE) { 457 if (task->tk_status == -ESTALE) {
404 set_bit(NFS_INO_STALE, &NFS_I(data->inode)->flags); 458 set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
405 nfs_mark_for_revalidate(data->inode); 459 nfs_mark_for_revalidate(inode);
406 } 460 }
407 return 0; 461 return 0;
408} 462}
@@ -412,15 +466,13 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
412 struct nfs_readargs *argp = &data->args; 466 struct nfs_readargs *argp = &data->args;
413 struct nfs_readres *resp = &data->res; 467 struct nfs_readres *resp = &data->res;
414 468
415 if (resp->eof || resp->count == argp->count)
416 return;
417
418 /* This is a short read! */ 469 /* This is a short read! */
419 nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); 470 nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD);
420 /* Has the server at least made some progress? */ 471 /* Has the server at least made some progress? */
421 if (resp->count == 0) 472 if (resp->count == 0) {
473 nfs_set_pgio_error(data->header, -EIO, argp->offset);
422 return; 474 return;
423 475 }
424 /* Yes, so retry the read at the end of the data */ 476 /* Yes, so retry the read at the end of the data */
425 data->mds_offset += resp->count; 477 data->mds_offset += resp->count;
426 argp->offset += resp->count; 478 argp->offset += resp->count;
@@ -429,114 +481,46 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
429 rpc_restart_call_prepare(task); 481 rpc_restart_call_prepare(task);
430} 482}
431 483
432/* 484static void nfs_readpage_result_common(struct rpc_task *task, void *calldata)
433 * Handle a read reply that fills part of a page.
434 */
435static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
436{ 485{
437 struct nfs_read_data *data = calldata; 486 struct nfs_read_data *data = calldata;
438 487 struct nfs_pgio_header *hdr = data->header;
488
489 /* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */
439 if (nfs_readpage_result(task, data) != 0) 490 if (nfs_readpage_result(task, data) != 0)
440 return; 491 return;
441 if (task->tk_status < 0) 492 if (task->tk_status < 0)
442 return; 493 nfs_set_pgio_error(hdr, task->tk_status, data->args.offset);
443 494 else if (data->res.eof) {
444 nfs_readpage_truncate_uninitialised_page(data); 495 loff_t bound;
445 nfs_readpage_retry(task, data); 496
497 bound = data->args.offset + data->res.count;
498 spin_lock(&hdr->lock);
499 if (bound < hdr->io_start + hdr->good_bytes) {
500 set_bit(NFS_IOHDR_EOF, &hdr->flags);
501 clear_bit(NFS_IOHDR_ERROR, &hdr->flags);
502 hdr->good_bytes = bound - hdr->io_start;
503 }
504 spin_unlock(&hdr->lock);
505 } else if (data->res.count != data->args.count)
506 nfs_readpage_retry(task, data);
446} 507}
447 508
448static void nfs_readpage_release_partial(void *calldata) 509static void nfs_readpage_release_common(void *calldata)
449{ 510{
450 struct nfs_read_data *data = calldata;
451 struct nfs_page *req = data->req;
452 struct page *page = req->wb_page;
453 int status = data->task.tk_status;
454
455 if (status < 0)
456 set_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags);
457
458 if (atomic_dec_and_test(&req->wb_complete)) {
459 if (!test_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags))
460 SetPageUptodate(page);
461 nfs_readpage_release(req);
462 }
463 nfs_readdata_release(calldata); 511 nfs_readdata_release(calldata);
464} 512}
465 513
466void nfs_read_prepare(struct rpc_task *task, void *calldata) 514void nfs_read_prepare(struct rpc_task *task, void *calldata)
467{ 515{
468 struct nfs_read_data *data = calldata; 516 struct nfs_read_data *data = calldata;
469 NFS_PROTO(data->inode)->read_rpc_prepare(task, data); 517 NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data);
470}
471
472static const struct rpc_call_ops nfs_read_partial_ops = {
473 .rpc_call_prepare = nfs_read_prepare,
474 .rpc_call_done = nfs_readpage_result_partial,
475 .rpc_release = nfs_readpage_release_partial,
476};
477
478static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
479{
480 unsigned int count = data->res.count;
481 unsigned int base = data->args.pgbase;
482 struct page **pages;
483
484 if (data->res.eof)
485 count = data->args.count;
486 if (unlikely(count == 0))
487 return;
488 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
489 base &= ~PAGE_CACHE_MASK;
490 count += base;
491 for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
492 SetPageUptodate(*pages);
493 if (count == 0)
494 return;
495 /* Was this a short read? */
496 if (data->res.eof || data->res.count == data->args.count)
497 SetPageUptodate(*pages);
498}
499
500/*
501 * This is the callback from RPC telling us whether a reply was
502 * received or some error occurred (timeout or socket shutdown).
503 */
504static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
505{
506 struct nfs_read_data *data = calldata;
507
508 if (nfs_readpage_result(task, data) != 0)
509 return;
510 if (task->tk_status < 0)
511 return;
512 /*
513 * Note: nfs_readpage_retry may change the values of
514 * data->args. In the multi-page case, we therefore need
515 * to ensure that we call nfs_readpage_set_pages_uptodate()
516 * first.
517 */
518 nfs_readpage_truncate_uninitialised_page(data);
519 nfs_readpage_set_pages_uptodate(data);
520 nfs_readpage_retry(task, data);
521}
522
523static void nfs_readpage_release_full(void *calldata)
524{
525 struct nfs_read_data *data = calldata;
526
527 while (!list_empty(&data->pages)) {
528 struct nfs_page *req = nfs_list_entry(data->pages.next);
529
530 nfs_list_remove_request(req);
531 nfs_readpage_release(req);
532 }
533 nfs_readdata_release(calldata);
534} 518}
535 519
536static const struct rpc_call_ops nfs_read_full_ops = { 520static const struct rpc_call_ops nfs_read_common_ops = {
537 .rpc_call_prepare = nfs_read_prepare, 521 .rpc_call_prepare = nfs_read_prepare,
538 .rpc_call_done = nfs_readpage_result_full, 522 .rpc_call_done = nfs_readpage_result_common,
539 .rpc_release = nfs_readpage_release_full, 523 .rpc_release = nfs_readpage_release_common,
540}; 524};
541 525
542/* 526/*
@@ -668,11 +652,12 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
668 if (ret == 0) 652 if (ret == 0)
669 goto read_complete; /* all pages were read */ 653 goto read_complete; /* all pages were read */
670 654
671 nfs_pageio_init_read(&pgio, inode); 655 nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops);
672 656
673 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); 657 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
674 658
675 nfs_pageio_complete(&pgio); 659 nfs_pageio_complete(&pgio);
660 NFS_I(inode)->read_io += pgio.pg_bytes_written;
676 npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 661 npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
677 nfs_add_stats(inode, NFSIOS_READPAGES, npages); 662 nfs_add_stats(inode, NFSIOS_READPAGES, npages);
678read_complete: 663read_complete:
@@ -684,7 +669,7 @@ out:
684int __init nfs_init_readpagecache(void) 669int __init nfs_init_readpagecache(void)
685{ 670{
686 nfs_rdata_cachep = kmem_cache_create("nfs_read_data", 671 nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
687 sizeof(struct nfs_read_data), 672 sizeof(struct nfs_read_header),
688 0, SLAB_HWCACHE_ALIGN, 673 0, SLAB_HWCACHE_ALIGN,
689 NULL); 674 NULL);
690 if (nfs_rdata_cachep == NULL) 675 if (nfs_rdata_cachep == NULL)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 4ac7fca7e4bf..ff656c022684 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -66,6 +66,7 @@
66#include "pnfs.h" 66#include "pnfs.h"
67 67
68#define NFSDBG_FACILITY NFSDBG_VFS 68#define NFSDBG_FACILITY NFSDBG_VFS
69#define NFS_TEXT_DATA 1
69 70
70#ifdef CONFIG_NFS_V3 71#ifdef CONFIG_NFS_V3
71#define NFS_DEFAULT_VERSION 3 72#define NFS_DEFAULT_VERSION 3
@@ -277,12 +278,22 @@ static match_table_t nfs_vers_tokens = {
277 { Opt_vers_err, NULL } 278 { Opt_vers_err, NULL }
278}; 279};
279 280
281struct nfs_mount_info {
282 void (*fill_super)(struct super_block *, struct nfs_mount_info *);
283 int (*set_security)(struct super_block *, struct dentry *, struct nfs_mount_info *);
284 struct nfs_parsed_mount_data *parsed;
285 struct nfs_clone_mount *cloned;
286 struct nfs_fh *mntfh;
287};
288
280static void nfs_umount_begin(struct super_block *); 289static void nfs_umount_begin(struct super_block *);
281static int nfs_statfs(struct dentry *, struct kstatfs *); 290static int nfs_statfs(struct dentry *, struct kstatfs *);
282static int nfs_show_options(struct seq_file *, struct dentry *); 291static int nfs_show_options(struct seq_file *, struct dentry *);
283static int nfs_show_devname(struct seq_file *, struct dentry *); 292static int nfs_show_devname(struct seq_file *, struct dentry *);
284static int nfs_show_path(struct seq_file *, struct dentry *); 293static int nfs_show_path(struct seq_file *, struct dentry *);
285static int nfs_show_stats(struct seq_file *, struct dentry *); 294static int nfs_show_stats(struct seq_file *, struct dentry *);
295static struct dentry *nfs_fs_mount_common(struct file_system_type *,
296 struct nfs_server *, int, const char *, struct nfs_mount_info *);
286static struct dentry *nfs_fs_mount(struct file_system_type *, 297static struct dentry *nfs_fs_mount(struct file_system_type *,
287 int, const char *, void *); 298 int, const char *, void *);
288static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type, 299static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type,
@@ -323,12 +334,11 @@ static const struct super_operations nfs_sops = {
323}; 334};
324 335
325#ifdef CONFIG_NFS_V4 336#ifdef CONFIG_NFS_V4
326static int nfs4_validate_text_mount_data(void *options, 337static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *);
338static int nfs4_validate_mount_data(void *options,
327 struct nfs_parsed_mount_data *args, const char *dev_name); 339 struct nfs_parsed_mount_data *args, const char *dev_name);
328static struct dentry *nfs4_try_mount(int flags, const char *dev_name, 340static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
329 struct nfs_parsed_mount_data *data); 341 struct nfs_mount_info *mount_info);
330static struct dentry *nfs4_mount(struct file_system_type *fs_type,
331 int flags, const char *dev_name, void *raw_data);
332static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type, 342static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type,
333 int flags, const char *dev_name, void *raw_data); 343 int flags, const char *dev_name, void *raw_data);
334static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type, 344static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type,
@@ -342,7 +352,7 @@ static void nfs4_kill_super(struct super_block *sb);
342static struct file_system_type nfs4_fs_type = { 352static struct file_system_type nfs4_fs_type = {
343 .owner = THIS_MODULE, 353 .owner = THIS_MODULE,
344 .name = "nfs4", 354 .name = "nfs4",
345 .mount = nfs4_mount, 355 .mount = nfs_fs_mount,
346 .kill_sb = nfs4_kill_super, 356 .kill_sb = nfs4_kill_super,
347 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 357 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
348}; 358};
@@ -786,8 +796,8 @@ static void show_pnfs(struct seq_file *m, struct nfs_server *server)
786 796
787static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss) 797static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss)
788{ 798{
789 if (nfss->nfs_client && nfss->nfs_client->impl_id) { 799 if (nfss->nfs_client && nfss->nfs_client->cl_implid) {
790 struct nfs41_impl_id *impl_id = nfss->nfs_client->impl_id; 800 struct nfs41_impl_id *impl_id = nfss->nfs_client->cl_implid;
791 seq_printf(m, "\n\timpl_id:\tname='%s',domain='%s'," 801 seq_printf(m, "\n\timpl_id:\tname='%s',domain='%s',"
792 "date='%llu,%u'", 802 "date='%llu,%u'",
793 impl_id->name, impl_id->domain, 803 impl_id->name, impl_id->domain,
@@ -938,7 +948,7 @@ static void nfs_umount_begin(struct super_block *sb)
938 rpc_killall_tasks(rpc); 948 rpc_killall_tasks(rpc);
939} 949}
940 950
941static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int version) 951static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void)
942{ 952{
943 struct nfs_parsed_mount_data *data; 953 struct nfs_parsed_mount_data *data;
944 954
@@ -953,8 +963,8 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int ve
953 data->nfs_server.protocol = XPRT_TRANSPORT_TCP; 963 data->nfs_server.protocol = XPRT_TRANSPORT_TCP;
954 data->auth_flavors[0] = RPC_AUTH_UNIX; 964 data->auth_flavors[0] = RPC_AUTH_UNIX;
955 data->auth_flavor_len = 1; 965 data->auth_flavor_len = 1;
956 data->version = version;
957 data->minorversion = 0; 966 data->minorversion = 0;
967 data->need_mount = true;
958 data->net = current->nsproxy->net_ns; 968 data->net = current->nsproxy->net_ns;
959 security_init_mnt_opts(&data->lsm_opts); 969 security_init_mnt_opts(&data->lsm_opts);
960 } 970 }
@@ -1674,8 +1684,8 @@ static int nfs_walk_authlist(struct nfs_parsed_mount_data *args,
1674 * Use the remote server's MOUNT service to request the NFS file handle 1684 * Use the remote server's MOUNT service to request the NFS file handle
1675 * corresponding to the provided path. 1685 * corresponding to the provided path.
1676 */ 1686 */
1677static int nfs_try_mount(struct nfs_parsed_mount_data *args, 1687static int nfs_request_mount(struct nfs_parsed_mount_data *args,
1678 struct nfs_fh *root_fh) 1688 struct nfs_fh *root_fh)
1679{ 1689{
1680 rpc_authflavor_t server_authlist[NFS_MAX_SECFLAVORS]; 1690 rpc_authflavor_t server_authlist[NFS_MAX_SECFLAVORS];
1681 unsigned int server_authlist_len = ARRAY_SIZE(server_authlist); 1691 unsigned int server_authlist_len = ARRAY_SIZE(server_authlist);
@@ -1738,6 +1748,26 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
1738 return nfs_walk_authlist(args, &request); 1748 return nfs_walk_authlist(args, &request);
1739} 1749}
1740 1750
1751static struct dentry *nfs_try_mount(int flags, const char *dev_name,
1752 struct nfs_mount_info *mount_info)
1753{
1754 int status;
1755 struct nfs_server *server;
1756
1757 if (mount_info->parsed->need_mount) {
1758 status = nfs_request_mount(mount_info->parsed, mount_info->mntfh);
1759 if (status)
1760 return ERR_PTR(status);
1761 }
1762
1763 /* Get a volume representation */
1764 server = nfs_create_server(mount_info->parsed, mount_info->mntfh);
1765 if (IS_ERR(server))
1766 return ERR_CAST(server);
1767
1768 return nfs_fs_mount_common(&nfs_fs_type, server, flags, dev_name, mount_info);
1769}
1770
1741/* 1771/*
1742 * Split "dev_name" into "hostname:export_path". 1772 * Split "dev_name" into "hostname:export_path".
1743 * 1773 *
@@ -1826,10 +1856,10 @@ out_path:
1826 * + breaking back: trying proto=udp after proto=tcp, v2 after v3, 1856 * + breaking back: trying proto=udp after proto=tcp, v2 after v3,
1827 * mountproto=tcp after mountproto=udp, and so on 1857 * mountproto=tcp after mountproto=udp, and so on
1828 */ 1858 */
1829static int nfs_validate_mount_data(void *options, 1859static int nfs23_validate_mount_data(void *options,
1830 struct nfs_parsed_mount_data *args, 1860 struct nfs_parsed_mount_data *args,
1831 struct nfs_fh *mntfh, 1861 struct nfs_fh *mntfh,
1832 const char *dev_name) 1862 const char *dev_name)
1833{ 1863{
1834 struct nfs_mount_data *data = (struct nfs_mount_data *)options; 1864 struct nfs_mount_data *data = (struct nfs_mount_data *)options;
1835 struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; 1865 struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
@@ -1883,6 +1913,7 @@ static int nfs_validate_mount_data(void *options,
1883 args->acregmax = data->acregmax; 1913 args->acregmax = data->acregmax;
1884 args->acdirmin = data->acdirmin; 1914 args->acdirmin = data->acdirmin;
1885 args->acdirmax = data->acdirmax; 1915 args->acdirmax = data->acdirmax;
1916 args->need_mount = false;
1886 1917
1887 memcpy(sap, &data->addr, sizeof(data->addr)); 1918 memcpy(sap, &data->addr, sizeof(data->addr));
1888 args->nfs_server.addrlen = sizeof(data->addr); 1919 args->nfs_server.addrlen = sizeof(data->addr);
@@ -1934,43 +1965,8 @@ static int nfs_validate_mount_data(void *options,
1934 } 1965 }
1935 1966
1936 break; 1967 break;
1937 default: { 1968 default:
1938 int status; 1969 return NFS_TEXT_DATA;
1939
1940 if (nfs_parse_mount_options((char *)options, args) == 0)
1941 return -EINVAL;
1942
1943 if (!nfs_verify_server_address(sap))
1944 goto out_no_address;
1945
1946 if (args->version == 4)
1947#ifdef CONFIG_NFS_V4
1948 return nfs4_validate_text_mount_data(options,
1949 args, dev_name);
1950#else
1951 goto out_v4_not_compiled;
1952#endif
1953
1954 nfs_set_port(sap, &args->nfs_server.port, 0);
1955
1956 nfs_set_mount_transport_protocol(args);
1957
1958 status = nfs_parse_devname(dev_name,
1959 &args->nfs_server.hostname,
1960 PAGE_SIZE,
1961 &args->nfs_server.export_path,
1962 NFS_MAXPATHLEN);
1963 if (!status)
1964 status = nfs_try_mount(args, mntfh);
1965
1966 kfree(args->nfs_server.export_path);
1967 args->nfs_server.export_path = NULL;
1968
1969 if (status)
1970 return status;
1971
1972 break;
1973 }
1974 } 1970 }
1975 1971
1976#ifndef CONFIG_NFS_V3 1972#ifndef CONFIG_NFS_V3
@@ -1999,12 +1995,6 @@ out_v3_not_compiled:
1999 return -EPROTONOSUPPORT; 1995 return -EPROTONOSUPPORT;
2000#endif /* !CONFIG_NFS_V3 */ 1996#endif /* !CONFIG_NFS_V3 */
2001 1997
2002#ifndef CONFIG_NFS_V4
2003out_v4_not_compiled:
2004 dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n");
2005 return -EPROTONOSUPPORT;
2006#endif /* !CONFIG_NFS_V4 */
2007
2008out_nomem: 1998out_nomem:
2009 dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n"); 1999 dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n");
2010 return -ENOMEM; 2000 return -ENOMEM;
@@ -2018,6 +2008,82 @@ out_invalid_fh:
2018 return -EINVAL; 2008 return -EINVAL;
2019} 2009}
2020 2010
2011#ifdef CONFIG_NFS_V4
2012static int nfs_validate_mount_data(struct file_system_type *fs_type,
2013 void *options,
2014 struct nfs_parsed_mount_data *args,
2015 struct nfs_fh *mntfh,
2016 const char *dev_name)
2017{
2018 if (fs_type == &nfs_fs_type)
2019 return nfs23_validate_mount_data(options, args, mntfh, dev_name);
2020 return nfs4_validate_mount_data(options, args, dev_name);
2021}
2022#else
2023static int nfs_validate_mount_data(struct file_system_type *fs_type,
2024 void *options,
2025 struct nfs_parsed_mount_data *args,
2026 struct nfs_fh *mntfh,
2027 const char *dev_name)
2028{
2029 return nfs23_validate_mount_data(options, args, mntfh, dev_name);
2030}
2031#endif
2032
2033static int nfs_validate_text_mount_data(void *options,
2034 struct nfs_parsed_mount_data *args,
2035 const char *dev_name)
2036{
2037 int port = 0;
2038 int max_namelen = PAGE_SIZE;
2039 int max_pathlen = NFS_MAXPATHLEN;
2040 struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
2041
2042 if (nfs_parse_mount_options((char *)options, args) == 0)
2043 return -EINVAL;
2044
2045 if (!nfs_verify_server_address(sap))
2046 goto out_no_address;
2047
2048 if (args->version == 4) {
2049#ifdef CONFIG_NFS_V4
2050 port = NFS_PORT;
2051 max_namelen = NFS4_MAXNAMLEN;
2052 max_pathlen = NFS4_MAXPATHLEN;
2053 nfs_validate_transport_protocol(args);
2054 nfs4_validate_mount_flags(args);
2055#else
2056 goto out_v4_not_compiled;
2057#endif /* CONFIG_NFS_V4 */
2058 } else
2059 nfs_set_mount_transport_protocol(args);
2060
2061 nfs_set_port(sap, &args->nfs_server.port, port);
2062
2063 if (args->auth_flavor_len > 1)
2064 goto out_bad_auth;
2065
2066 return nfs_parse_devname(dev_name,
2067 &args->nfs_server.hostname,
2068 max_namelen,
2069 &args->nfs_server.export_path,
2070 max_pathlen);
2071
2072#ifndef CONFIG_NFS_V4
2073out_v4_not_compiled:
2074 dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n");
2075 return -EPROTONOSUPPORT;
2076#endif /* !CONFIG_NFS_V4 */
2077
2078out_no_address:
2079 dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n");
2080 return -EINVAL;
2081
2082out_bad_auth:
2083 dfprintk(MOUNT, "NFS: Too many RPC auth flavours specified\n");
2084 return -EINVAL;
2085}
2086
2021static int 2087static int
2022nfs_compare_remount_data(struct nfs_server *nfss, 2088nfs_compare_remount_data(struct nfs_server *nfss,
2023 struct nfs_parsed_mount_data *data) 2089 struct nfs_parsed_mount_data *data)
@@ -2129,8 +2195,9 @@ static inline void nfs_initialise_sb(struct super_block *sb)
2129 * Finish setting up an NFS2/3 superblock 2195 * Finish setting up an NFS2/3 superblock
2130 */ 2196 */
2131static void nfs_fill_super(struct super_block *sb, 2197static void nfs_fill_super(struct super_block *sb,
2132 struct nfs_parsed_mount_data *data) 2198 struct nfs_mount_info *mount_info)
2133{ 2199{
2200 struct nfs_parsed_mount_data *data = mount_info->parsed;
2134 struct nfs_server *server = NFS_SB(sb); 2201 struct nfs_server *server = NFS_SB(sb);
2135 2202
2136 sb->s_blocksize_bits = 0; 2203 sb->s_blocksize_bits = 0;
@@ -2154,8 +2221,9 @@ static void nfs_fill_super(struct super_block *sb,
2154 * Finish setting up a cloned NFS2/3 superblock 2221 * Finish setting up a cloned NFS2/3 superblock
2155 */ 2222 */
2156static void nfs_clone_super(struct super_block *sb, 2223static void nfs_clone_super(struct super_block *sb,
2157 const struct super_block *old_sb) 2224 struct nfs_mount_info *mount_info)
2158{ 2225{
2226 const struct super_block *old_sb = mount_info->cloned->sb;
2159 struct nfs_server *server = NFS_SB(sb); 2227 struct nfs_server *server = NFS_SB(sb);
2160 2228
2161 sb->s_blocksize_bits = old_sb->s_blocksize_bits; 2229 sb->s_blocksize_bits = old_sb->s_blocksize_bits;
@@ -2278,52 +2346,70 @@ static int nfs_compare_super(struct super_block *sb, void *data)
2278 return nfs_compare_mount_options(sb, server, mntflags); 2346 return nfs_compare_mount_options(sb, server, mntflags);
2279} 2347}
2280 2348
2349#ifdef CONFIG_NFS_FSCACHE
2350static void nfs_get_cache_cookie(struct super_block *sb,
2351 struct nfs_parsed_mount_data *parsed,
2352 struct nfs_clone_mount *cloned)
2353{
2354 char *uniq = NULL;
2355 int ulen = 0;
2356
2357 if (parsed && parsed->fscache_uniq) {
2358 uniq = parsed->fscache_uniq;
2359 ulen = strlen(parsed->fscache_uniq);
2360 } else if (cloned) {
2361 struct nfs_server *mnt_s = NFS_SB(cloned->sb);
2362 if (mnt_s->fscache_key) {
2363 uniq = mnt_s->fscache_key->key.uniquifier;
2364 ulen = mnt_s->fscache_key->key.uniq_len;
2365 };
2366 }
2367
2368 nfs_fscache_get_super_cookie(sb, uniq, ulen);
2369}
2370#else
2371static void nfs_get_cache_cookie(struct super_block *sb,
2372 struct nfs_parsed_mount_data *parsed,
2373 struct nfs_clone_mount *cloned)
2374{
2375}
2376#endif
2377
2281static int nfs_bdi_register(struct nfs_server *server) 2378static int nfs_bdi_register(struct nfs_server *server)
2282{ 2379{
2283 return bdi_register_dev(&server->backing_dev_info, server->s_dev); 2380 return bdi_register_dev(&server->backing_dev_info, server->s_dev);
2284} 2381}
2285 2382
2286static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, 2383static int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot,
2287 int flags, const char *dev_name, void *raw_data) 2384 struct nfs_mount_info *mount_info)
2385{
2386 return security_sb_set_mnt_opts(s, &mount_info->parsed->lsm_opts);
2387}
2388
2389static int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot,
2390 struct nfs_mount_info *mount_info)
2391{
2392 /* clone any lsm security options from the parent to the new sb */
2393 security_sb_clone_mnt_opts(mount_info->cloned->sb, s);
2394 if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops)
2395 return -ESTALE;
2396 return 0;
2397}
2398
2399static struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type,
2400 struct nfs_server *server,
2401 int flags, const char *dev_name,
2402 struct nfs_mount_info *mount_info)
2288{ 2403{
2289 struct nfs_server *server = NULL;
2290 struct super_block *s; 2404 struct super_block *s;
2291 struct nfs_parsed_mount_data *data;
2292 struct nfs_fh *mntfh;
2293 struct dentry *mntroot = ERR_PTR(-ENOMEM); 2405 struct dentry *mntroot = ERR_PTR(-ENOMEM);
2294 int (*compare_super)(struct super_block *, void *) = nfs_compare_super; 2406 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
2295 struct nfs_sb_mountdata sb_mntdata = { 2407 struct nfs_sb_mountdata sb_mntdata = {
2296 .mntflags = flags, 2408 .mntflags = flags,
2409 .server = server,
2297 }; 2410 };
2298 int error; 2411 int error;
2299 2412
2300 data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION);
2301 mntfh = nfs_alloc_fhandle();
2302 if (data == NULL || mntfh == NULL)
2303 goto out;
2304
2305 /* Validate the mount data */
2306 error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name);
2307 if (error < 0) {
2308 mntroot = ERR_PTR(error);
2309 goto out;
2310 }
2311
2312#ifdef CONFIG_NFS_V4
2313 if (data->version == 4) {
2314 mntroot = nfs4_try_mount(flags, dev_name, data);
2315 goto out;
2316 }
2317#endif /* CONFIG_NFS_V4 */
2318
2319 /* Get a volume representation */
2320 server = nfs_create_server(data, mntfh);
2321 if (IS_ERR(server)) {
2322 mntroot = ERR_CAST(server);
2323 goto out;
2324 }
2325 sb_mntdata.server = server;
2326
2327 if (server->flags & NFS_MOUNT_UNSHARED) 2413 if (server->flags & NFS_MOUNT_UNSHARED)
2328 compare_super = NULL; 2414 compare_super = NULL;
2329 2415
@@ -2351,23 +2437,21 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
2351 2437
2352 if (!s->s_root) { 2438 if (!s->s_root) {
2353 /* initial superblock/root creation */ 2439 /* initial superblock/root creation */
2354 nfs_fill_super(s, data); 2440 mount_info->fill_super(s, mount_info);
2355 nfs_fscache_get_super_cookie(s, data->fscache_uniq, NULL); 2441 nfs_get_cache_cookie(s, mount_info->parsed, mount_info->cloned);
2356 } 2442 }
2357 2443
2358 mntroot = nfs_get_root(s, mntfh, dev_name); 2444 mntroot = nfs_get_root(s, mount_info->mntfh, dev_name);
2359 if (IS_ERR(mntroot)) 2445 if (IS_ERR(mntroot))
2360 goto error_splat_super; 2446 goto error_splat_super;
2361 2447
2362 error = security_sb_set_mnt_opts(s, &data->lsm_opts); 2448 error = mount_info->set_security(s, mntroot, mount_info);
2363 if (error) 2449 if (error)
2364 goto error_splat_root; 2450 goto error_splat_root;
2365 2451
2366 s->s_flags |= MS_ACTIVE; 2452 s->s_flags |= MS_ACTIVE;
2367 2453
2368out: 2454out:
2369 nfs_free_parsed_mount_data(data);
2370 nfs_free_fhandle(mntfh);
2371 return mntroot; 2455 return mntroot;
2372 2456
2373out_err_nosb: 2457out_err_nosb:
@@ -2385,6 +2469,43 @@ error_splat_bdi:
2385 goto out; 2469 goto out;
2386} 2470}
2387 2471
2472static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
2473 int flags, const char *dev_name, void *raw_data)
2474{
2475 struct nfs_mount_info mount_info = {
2476 .fill_super = nfs_fill_super,
2477 .set_security = nfs_set_sb_security,
2478 };
2479 struct dentry *mntroot = ERR_PTR(-ENOMEM);
2480 int error;
2481
2482 mount_info.parsed = nfs_alloc_parsed_mount_data();
2483 mount_info.mntfh = nfs_alloc_fhandle();
2484 if (mount_info.parsed == NULL || mount_info.mntfh == NULL)
2485 goto out;
2486
2487 /* Validate the mount data */
2488 error = nfs_validate_mount_data(fs_type, raw_data, mount_info.parsed, mount_info.mntfh, dev_name);
2489 if (error == NFS_TEXT_DATA)
2490 error = nfs_validate_text_mount_data(raw_data, mount_info.parsed, dev_name);
2491 if (error < 0) {
2492 mntroot = ERR_PTR(error);
2493 goto out;
2494 }
2495
2496#ifdef CONFIG_NFS_V4
2497 if (mount_info.parsed->version == 4)
2498 mntroot = nfs4_try_mount(flags, dev_name, &mount_info);
2499 else
2500#endif /* CONFIG_NFS_V4 */
2501 mntroot = nfs_try_mount(flags, dev_name, &mount_info);
2502
2503out:
2504 nfs_free_parsed_mount_data(mount_info.parsed);
2505 nfs_free_fhandle(mount_info.mntfh);
2506 return mntroot;
2507}
2508
2388/* 2509/*
2389 * Ensure that we unregister the bdi before kill_anon_super 2510 * Ensure that we unregister the bdi before kill_anon_super
2390 * releases the device name 2511 * releases the device name
@@ -2409,93 +2530,51 @@ static void nfs_kill_super(struct super_block *s)
2409} 2530}
2410 2531
2411/* 2532/*
2412 * Clone an NFS2/3 server record on xdev traversal (FSID-change) 2533 * Clone an NFS2/3/4 server record on xdev traversal (FSID-change)
2413 */ 2534 */
2414static struct dentry * 2535static struct dentry *
2415nfs_xdev_mount(struct file_system_type *fs_type, int flags, 2536nfs_xdev_mount_common(struct file_system_type *fs_type, int flags,
2416 const char *dev_name, void *raw_data) 2537 const char *dev_name, struct nfs_mount_info *mount_info)
2417{ 2538{
2418 struct nfs_clone_mount *data = raw_data; 2539 struct nfs_clone_mount *data = mount_info->cloned;
2419 struct super_block *s;
2420 struct nfs_server *server; 2540 struct nfs_server *server;
2421 struct dentry *mntroot; 2541 struct dentry *mntroot = ERR_PTR(-ENOMEM);
2422 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
2423 struct nfs_sb_mountdata sb_mntdata = {
2424 .mntflags = flags,
2425 };
2426 int error; 2542 int error;
2427 2543
2428 dprintk("--> nfs_xdev_mount()\n"); 2544 dprintk("--> nfs_xdev_mount_common()\n");
2545
2546 mount_info->mntfh = data->fh;
2429 2547
2430 /* create a new volume representation */ 2548 /* create a new volume representation */
2431 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); 2549 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor);
2432 if (IS_ERR(server)) { 2550 if (IS_ERR(server)) {
2433 error = PTR_ERR(server); 2551 error = PTR_ERR(server);
2434 goto out_err_noserver; 2552 goto out_err;
2435 }
2436 sb_mntdata.server = server;
2437
2438 if (server->flags & NFS_MOUNT_UNSHARED)
2439 compare_super = NULL;
2440
2441 /* -o noac implies -o sync */
2442 if (server->flags & NFS_MOUNT_NOAC)
2443 sb_mntdata.mntflags |= MS_SYNCHRONOUS;
2444
2445 /* Get a superblock - note that we may end up sharing one that already exists */
2446 s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata);
2447 if (IS_ERR(s)) {
2448 error = PTR_ERR(s);
2449 goto out_err_nosb;
2450 }
2451
2452 if (s->s_fs_info != server) {
2453 nfs_free_server(server);
2454 server = NULL;
2455 } else {
2456 error = nfs_bdi_register(server);
2457 if (error)
2458 goto error_splat_bdi;
2459 }
2460
2461 if (!s->s_root) {
2462 /* initial superblock/root creation */
2463 nfs_clone_super(s, data->sb);
2464 nfs_fscache_get_super_cookie(s, NULL, data);
2465 }
2466
2467 mntroot = nfs_get_root(s, data->fh, dev_name);
2468 if (IS_ERR(mntroot)) {
2469 error = PTR_ERR(mntroot);
2470 goto error_splat_super;
2471 }
2472 if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) {
2473 dput(mntroot);
2474 error = -ESTALE;
2475 goto error_splat_super;
2476 } 2553 }
2477 2554
2478 s->s_flags |= MS_ACTIVE; 2555 mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mount_info);
2479 2556 dprintk("<-- nfs_xdev_mount_common() = 0\n");
2480 /* clone any lsm security options from the parent to the new sb */ 2557out:
2481 security_sb_clone_mnt_opts(data->sb, s);
2482
2483 dprintk("<-- nfs_xdev_mount() = 0\n");
2484 return mntroot; 2558 return mntroot;
2485 2559
2486out_err_nosb: 2560out_err:
2487 nfs_free_server(server); 2561 dprintk("<-- nfs_xdev_mount_common() = %d [error]\n", error);
2488out_err_noserver: 2562 goto out;
2489 dprintk("<-- nfs_xdev_mount() = %d [error]\n", error); 2563}
2490 return ERR_PTR(error);
2491 2564
2492error_splat_super: 2565/*
2493 if (server && !s->s_root) 2566 * Clone an NFS2/3 server record on xdev traversal (FSID-change)
2494 bdi_unregister(&server->backing_dev_info); 2567 */
2495error_splat_bdi: 2568static struct dentry *
2496 deactivate_locked_super(s); 2569nfs_xdev_mount(struct file_system_type *fs_type, int flags,
2497 dprintk("<-- nfs_xdev_mount() = %d [splat]\n", error); 2570 const char *dev_name, void *raw_data)
2498 return ERR_PTR(error); 2571{
2572 struct nfs_mount_info mount_info = {
2573 .fill_super = nfs_clone_super,
2574 .set_security = nfs_clone_sb_security,
2575 .cloned = raw_data,
2576 };
2577 return nfs_xdev_mount_common(&nfs_fs_type, flags, dev_name, &mount_info);
2499} 2578}
2500 2579
2501#ifdef CONFIG_NFS_V4 2580#ifdef CONFIG_NFS_V4
@@ -2504,8 +2583,9 @@ error_splat_bdi:
2504 * Finish setting up a cloned NFS4 superblock 2583 * Finish setting up a cloned NFS4 superblock
2505 */ 2584 */
2506static void nfs4_clone_super(struct super_block *sb, 2585static void nfs4_clone_super(struct super_block *sb,
2507 const struct super_block *old_sb) 2586 struct nfs_mount_info *mount_info)
2508{ 2587{
2588 const struct super_block *old_sb = mount_info->cloned->sb;
2509 sb->s_blocksize_bits = old_sb->s_blocksize_bits; 2589 sb->s_blocksize_bits = old_sb->s_blocksize_bits;
2510 sb->s_blocksize = old_sb->s_blocksize; 2590 sb->s_blocksize = old_sb->s_blocksize;
2511 sb->s_maxbytes = old_sb->s_maxbytes; 2591 sb->s_maxbytes = old_sb->s_maxbytes;
@@ -2523,7 +2603,8 @@ static void nfs4_clone_super(struct super_block *sb,
2523/* 2603/*
2524 * Set up an NFS4 superblock 2604 * Set up an NFS4 superblock
2525 */ 2605 */
2526static void nfs4_fill_super(struct super_block *sb) 2606static void nfs4_fill_super(struct super_block *sb,
2607 struct nfs_mount_info *mount_info)
2527{ 2608{
2528 sb->s_time_gran = 1; 2609 sb->s_time_gran = 1;
2529 sb->s_op = &nfs4_sops; 2610 sb->s_op = &nfs4_sops;
@@ -2542,37 +2623,6 @@ static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args)
2542 NFS_MOUNT_LOCAL_FLOCK|NFS_MOUNT_LOCAL_FCNTL); 2623 NFS_MOUNT_LOCAL_FLOCK|NFS_MOUNT_LOCAL_FCNTL);
2543} 2624}
2544 2625
2545static int nfs4_validate_text_mount_data(void *options,
2546 struct nfs_parsed_mount_data *args,
2547 const char *dev_name)
2548{
2549 struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
2550
2551 nfs_set_port(sap, &args->nfs_server.port, NFS_PORT);
2552
2553 nfs_validate_transport_protocol(args);
2554
2555 nfs4_validate_mount_flags(args);
2556
2557 if (args->version != 4) {
2558 dfprintk(MOUNT,
2559 "NFS4: Illegal mount version\n");
2560 return -EINVAL;
2561 }
2562
2563 if (args->auth_flavor_len > 1) {
2564 dfprintk(MOUNT,
2565 "NFS4: Too many RPC auth flavours specified\n");
2566 return -EINVAL;
2567 }
2568
2569 return nfs_parse_devname(dev_name,
2570 &args->nfs_server.hostname,
2571 NFS4_MAXNAMLEN,
2572 &args->nfs_server.export_path,
2573 NFS4_MAXPATHLEN);
2574}
2575
2576/* 2626/*
2577 * Validate NFSv4 mount options 2627 * Validate NFSv4 mount options
2578 */ 2628 */
@@ -2643,13 +2693,7 @@ static int nfs4_validate_mount_data(void *options,
2643 2693
2644 break; 2694 break;
2645 default: 2695 default:
2646 if (nfs_parse_mount_options((char *)options, args) == 0) 2696 return NFS_TEXT_DATA;
2647 return -EINVAL;
2648
2649 if (!nfs_verify_server_address(sap))
2650 return -EINVAL;
2651
2652 return nfs4_validate_text_mount_data(options, args, dev_name);
2653 } 2697 }
2654 2698
2655 return 0; 2699 return 0;
@@ -2673,91 +2717,26 @@ out_no_address:
2673 */ 2717 */
2674static struct dentry * 2718static struct dentry *
2675nfs4_remote_mount(struct file_system_type *fs_type, int flags, 2719nfs4_remote_mount(struct file_system_type *fs_type, int flags,
2676 const char *dev_name, void *raw_data) 2720 const char *dev_name, void *info)
2677{ 2721{
2678 struct nfs_parsed_mount_data *data = raw_data; 2722 struct nfs_mount_info *mount_info = info;
2679 struct super_block *s;
2680 struct nfs_server *server; 2723 struct nfs_server *server;
2681 struct nfs_fh *mntfh; 2724 struct dentry *mntroot = ERR_PTR(-ENOMEM);
2682 struct dentry *mntroot;
2683 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
2684 struct nfs_sb_mountdata sb_mntdata = {
2685 .mntflags = flags,
2686 };
2687 int error = -ENOMEM;
2688 2725
2689 mntfh = nfs_alloc_fhandle(); 2726 mount_info->fill_super = nfs4_fill_super;
2690 if (data == NULL || mntfh == NULL) 2727 mount_info->set_security = nfs_set_sb_security;
2691 goto out;
2692 2728
2693 /* Get a volume representation */ 2729 /* Get a volume representation */
2694 server = nfs4_create_server(data, mntfh); 2730 server = nfs4_create_server(mount_info->parsed, mount_info->mntfh);
2695 if (IS_ERR(server)) { 2731 if (IS_ERR(server)) {
2696 error = PTR_ERR(server); 2732 mntroot = ERR_CAST(server);
2697 goto out; 2733 goto out;
2698 } 2734 }
2699 sb_mntdata.server = server;
2700 2735
2701 if (server->flags & NFS4_MOUNT_UNSHARED) 2736 mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mount_info);
2702 compare_super = NULL;
2703
2704 /* -o noac implies -o sync */
2705 if (server->flags & NFS_MOUNT_NOAC)
2706 sb_mntdata.mntflags |= MS_SYNCHRONOUS;
2707
2708 /* Get a superblock - note that we may end up sharing one that already exists */
2709 s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
2710 if (IS_ERR(s)) {
2711 error = PTR_ERR(s);
2712 goto out_free;
2713 }
2714
2715 if (s->s_fs_info != server) {
2716 nfs_free_server(server);
2717 server = NULL;
2718 } else {
2719 error = nfs_bdi_register(server);
2720 if (error)
2721 goto error_splat_bdi;
2722 }
2723
2724 if (!s->s_root) {
2725 /* initial superblock/root creation */
2726 nfs4_fill_super(s);
2727 nfs_fscache_get_super_cookie(s, data->fscache_uniq, NULL);
2728 }
2729
2730 mntroot = nfs4_get_root(s, mntfh, dev_name);
2731 if (IS_ERR(mntroot)) {
2732 error = PTR_ERR(mntroot);
2733 goto error_splat_super;
2734 }
2735
2736 error = security_sb_set_mnt_opts(s, &data->lsm_opts);
2737 if (error)
2738 goto error_splat_root;
2739
2740 s->s_flags |= MS_ACTIVE;
2741
2742 nfs_free_fhandle(mntfh);
2743 return mntroot;
2744 2737
2745out: 2738out:
2746 nfs_free_fhandle(mntfh); 2739 return mntroot;
2747 return ERR_PTR(error);
2748
2749out_free:
2750 nfs_free_server(server);
2751 goto out;
2752
2753error_splat_root:
2754 dput(mntroot);
2755error_splat_super:
2756 if (server && !s->s_root)
2757 bdi_unregister(&server->backing_dev_info);
2758error_splat_bdi:
2759 deactivate_locked_super(s);
2760 goto out;
2761} 2740}
2762 2741
2763static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type, 2742static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type,
@@ -2869,17 +2848,18 @@ static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
2869} 2848}
2870 2849
2871static struct dentry *nfs4_try_mount(int flags, const char *dev_name, 2850static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
2872 struct nfs_parsed_mount_data *data) 2851 struct nfs_mount_info *mount_info)
2873{ 2852{
2874 char *export_path; 2853 char *export_path;
2875 struct vfsmount *root_mnt; 2854 struct vfsmount *root_mnt;
2876 struct dentry *res; 2855 struct dentry *res;
2856 struct nfs_parsed_mount_data *data = mount_info->parsed;
2877 2857
2878 dfprintk(MOUNT, "--> nfs4_try_mount()\n"); 2858 dfprintk(MOUNT, "--> nfs4_try_mount()\n");
2879 2859
2880 export_path = data->nfs_server.export_path; 2860 export_path = data->nfs_server.export_path;
2881 data->nfs_server.export_path = "/"; 2861 data->nfs_server.export_path = "/";
2882 root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, data, 2862 root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info,
2883 data->nfs_server.hostname); 2863 data->nfs_server.hostname);
2884 data->nfs_server.export_path = export_path; 2864 data->nfs_server.export_path = export_path;
2885 2865
@@ -2891,38 +2871,6 @@ static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
2891 return res; 2871 return res;
2892} 2872}
2893 2873
2894/*
2895 * Get the superblock for an NFS4 mountpoint
2896 */
2897static struct dentry *nfs4_mount(struct file_system_type *fs_type,
2898 int flags, const char *dev_name, void *raw_data)
2899{
2900 struct nfs_parsed_mount_data *data;
2901 int error = -ENOMEM;
2902 struct dentry *res = ERR_PTR(-ENOMEM);
2903
2904 data = nfs_alloc_parsed_mount_data(4);
2905 if (data == NULL)
2906 goto out;
2907
2908 /* Validate the mount data */
2909 error = nfs4_validate_mount_data(raw_data, data, dev_name);
2910 if (error < 0) {
2911 res = ERR_PTR(error);
2912 goto out;
2913 }
2914
2915 res = nfs4_try_mount(flags, dev_name, data);
2916 if (IS_ERR(res))
2917 error = PTR_ERR(res);
2918
2919out:
2920 nfs_free_parsed_mount_data(data);
2921 dprintk("<-- nfs4_mount() = %d%s\n", error,
2922 error != 0 ? " [error]" : "");
2923 return res;
2924}
2925
2926static void nfs4_kill_super(struct super_block *sb) 2874static void nfs4_kill_super(struct super_block *sb)
2927{ 2875{
2928 struct nfs_server *server = NFS_SB(sb); 2876 struct nfs_server *server = NFS_SB(sb);
@@ -2942,181 +2890,43 @@ static struct dentry *
2942nfs4_xdev_mount(struct file_system_type *fs_type, int flags, 2890nfs4_xdev_mount(struct file_system_type *fs_type, int flags,
2943 const char *dev_name, void *raw_data) 2891 const char *dev_name, void *raw_data)
2944{ 2892{
2945 struct nfs_clone_mount *data = raw_data; 2893 struct nfs_mount_info mount_info = {
2946 struct super_block *s; 2894 .fill_super = nfs4_clone_super,
2947 struct nfs_server *server; 2895 .set_security = nfs_clone_sb_security,
2948 struct dentry *mntroot; 2896 .cloned = raw_data,
2949 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
2950 struct nfs_sb_mountdata sb_mntdata = {
2951 .mntflags = flags,
2952 }; 2897 };
2953 int error; 2898 return nfs_xdev_mount_common(&nfs4_fs_type, flags, dev_name, &mount_info);
2954
2955 dprintk("--> nfs4_xdev_mount()\n");
2956
2957 /* create a new volume representation */
2958 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor);
2959 if (IS_ERR(server)) {
2960 error = PTR_ERR(server);
2961 goto out_err_noserver;
2962 }
2963 sb_mntdata.server = server;
2964
2965 if (server->flags & NFS4_MOUNT_UNSHARED)
2966 compare_super = NULL;
2967
2968 /* -o noac implies -o sync */
2969 if (server->flags & NFS_MOUNT_NOAC)
2970 sb_mntdata.mntflags |= MS_SYNCHRONOUS;
2971
2972 /* Get a superblock - note that we may end up sharing one that already exists */
2973 s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
2974 if (IS_ERR(s)) {
2975 error = PTR_ERR(s);
2976 goto out_err_nosb;
2977 }
2978
2979 if (s->s_fs_info != server) {
2980 nfs_free_server(server);
2981 server = NULL;
2982 } else {
2983 error = nfs_bdi_register(server);
2984 if (error)
2985 goto error_splat_bdi;
2986 }
2987
2988 if (!s->s_root) {
2989 /* initial superblock/root creation */
2990 nfs4_clone_super(s, data->sb);
2991 nfs_fscache_get_super_cookie(s, NULL, data);
2992 }
2993
2994 mntroot = nfs4_get_root(s, data->fh, dev_name);
2995 if (IS_ERR(mntroot)) {
2996 error = PTR_ERR(mntroot);
2997 goto error_splat_super;
2998 }
2999 if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) {
3000 dput(mntroot);
3001 error = -ESTALE;
3002 goto error_splat_super;
3003 }
3004
3005 s->s_flags |= MS_ACTIVE;
3006
3007 security_sb_clone_mnt_opts(data->sb, s);
3008
3009 dprintk("<-- nfs4_xdev_mount() = 0\n");
3010 return mntroot;
3011
3012out_err_nosb:
3013 nfs_free_server(server);
3014out_err_noserver:
3015 dprintk("<-- nfs4_xdev_mount() = %d [error]\n", error);
3016 return ERR_PTR(error);
3017
3018error_splat_super:
3019 if (server && !s->s_root)
3020 bdi_unregister(&server->backing_dev_info);
3021error_splat_bdi:
3022 deactivate_locked_super(s);
3023 dprintk("<-- nfs4_xdev_mount() = %d [splat]\n", error);
3024 return ERR_PTR(error);
3025} 2899}
3026 2900
3027static struct dentry * 2901static struct dentry *
3028nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, 2902nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags,
3029 const char *dev_name, void *raw_data) 2903 const char *dev_name, void *raw_data)
3030{ 2904{
3031 struct nfs_clone_mount *data = raw_data; 2905 struct nfs_mount_info mount_info = {
3032 struct super_block *s; 2906 .fill_super = nfs4_fill_super,
3033 struct nfs_server *server; 2907 .set_security = nfs_clone_sb_security,
3034 struct dentry *mntroot; 2908 .cloned = raw_data,
3035 struct nfs_fh *mntfh;
3036 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
3037 struct nfs_sb_mountdata sb_mntdata = {
3038 .mntflags = flags,
3039 }; 2909 };
3040 int error = -ENOMEM; 2910 struct nfs_server *server;
2911 struct dentry *mntroot = ERR_PTR(-ENOMEM);
3041 2912
3042 dprintk("--> nfs4_referral_get_sb()\n"); 2913 dprintk("--> nfs4_referral_get_sb()\n");
3043 2914
3044 mntfh = nfs_alloc_fhandle(); 2915 mount_info.mntfh = nfs_alloc_fhandle();
3045 if (mntfh == NULL) 2916 if (mount_info.cloned == NULL || mount_info.mntfh == NULL)
3046 goto out_err_nofh; 2917 goto out;
3047 2918
3048 /* create a new volume representation */ 2919 /* create a new volume representation */
3049 server = nfs4_create_referral_server(data, mntfh); 2920 server = nfs4_create_referral_server(mount_info.cloned, mount_info.mntfh);
3050 if (IS_ERR(server)) { 2921 if (IS_ERR(server)) {
3051 error = PTR_ERR(server); 2922 mntroot = ERR_CAST(server);
3052 goto out_err_noserver; 2923 goto out;
3053 }
3054 sb_mntdata.server = server;
3055
3056 if (server->flags & NFS4_MOUNT_UNSHARED)
3057 compare_super = NULL;
3058
3059 /* -o noac implies -o sync */
3060 if (server->flags & NFS_MOUNT_NOAC)
3061 sb_mntdata.mntflags |= MS_SYNCHRONOUS;
3062
3063 /* Get a superblock - note that we may end up sharing one that already exists */
3064 s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
3065 if (IS_ERR(s)) {
3066 error = PTR_ERR(s);
3067 goto out_err_nosb;
3068 }
3069
3070 if (s->s_fs_info != server) {
3071 nfs_free_server(server);
3072 server = NULL;
3073 } else {
3074 error = nfs_bdi_register(server);
3075 if (error)
3076 goto error_splat_bdi;
3077 }
3078
3079 if (!s->s_root) {
3080 /* initial superblock/root creation */
3081 nfs4_fill_super(s);
3082 nfs_fscache_get_super_cookie(s, NULL, data);
3083 }
3084
3085 mntroot = nfs4_get_root(s, mntfh, dev_name);
3086 if (IS_ERR(mntroot)) {
3087 error = PTR_ERR(mntroot);
3088 goto error_splat_super;
3089 }
3090 if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) {
3091 dput(mntroot);
3092 error = -ESTALE;
3093 goto error_splat_super;
3094 } 2924 }
3095 2925
3096 s->s_flags |= MS_ACTIVE; 2926 mntroot = nfs_fs_mount_common(&nfs4_fs_type, server, flags, dev_name, &mount_info);
3097 2927out:
3098 security_sb_clone_mnt_opts(data->sb, s); 2928 nfs_free_fhandle(mount_info.mntfh);
3099
3100 nfs_free_fhandle(mntfh);
3101 dprintk("<-- nfs4_referral_get_sb() = 0\n");
3102 return mntroot; 2929 return mntroot;
3103
3104out_err_nosb:
3105 nfs_free_server(server);
3106out_err_noserver:
3107 nfs_free_fhandle(mntfh);
3108out_err_nofh:
3109 dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error);
3110 return ERR_PTR(error);
3111
3112error_splat_super:
3113 if (server && !s->s_root)
3114 bdi_unregister(&server->backing_dev_info);
3115error_splat_bdi:
3116 deactivate_locked_super(s);
3117 nfs_free_fhandle(mntfh);
3118 dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error);
3119 return ERR_PTR(error);
3120} 2930}
3121 2931
3122/* 2932/*
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c07462320f6b..e6fe3d69d14c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -39,20 +39,20 @@
39/* 39/*
40 * Local function declarations 40 * Local function declarations
41 */ 41 */
42static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,
43 struct inode *inode, int ioflags);
44static void nfs_redirty_request(struct nfs_page *req); 42static void nfs_redirty_request(struct nfs_page *req);
45static const struct rpc_call_ops nfs_write_partial_ops; 43static const struct rpc_call_ops nfs_write_common_ops;
46static const struct rpc_call_ops nfs_write_full_ops;
47static const struct rpc_call_ops nfs_commit_ops; 44static const struct rpc_call_ops nfs_commit_ops;
45static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
46static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
48 47
49static struct kmem_cache *nfs_wdata_cachep; 48static struct kmem_cache *nfs_wdata_cachep;
50static mempool_t *nfs_wdata_mempool; 49static mempool_t *nfs_wdata_mempool;
50static struct kmem_cache *nfs_cdata_cachep;
51static mempool_t *nfs_commit_mempool; 51static mempool_t *nfs_commit_mempool;
52 52
53struct nfs_write_data *nfs_commitdata_alloc(void) 53struct nfs_commit_data *nfs_commitdata_alloc(void)
54{ 54{
55 struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); 55 struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS);
56 56
57 if (p) { 57 if (p) {
58 memset(p, 0, sizeof(*p)); 58 memset(p, 0, sizeof(*p));
@@ -62,46 +62,73 @@ struct nfs_write_data *nfs_commitdata_alloc(void)
62} 62}
63EXPORT_SYMBOL_GPL(nfs_commitdata_alloc); 63EXPORT_SYMBOL_GPL(nfs_commitdata_alloc);
64 64
65void nfs_commit_free(struct nfs_write_data *p) 65void nfs_commit_free(struct nfs_commit_data *p)
66{ 66{
67 if (p && (p->pagevec != &p->page_array[0]))
68 kfree(p->pagevec);
69 mempool_free(p, nfs_commit_mempool); 67 mempool_free(p, nfs_commit_mempool);
70} 68}
71EXPORT_SYMBOL_GPL(nfs_commit_free); 69EXPORT_SYMBOL_GPL(nfs_commit_free);
72 70
73struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) 71struct nfs_write_header *nfs_writehdr_alloc(void)
74{ 72{
75 struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS); 73 struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);
76 74
77 if (p) { 75 if (p) {
76 struct nfs_pgio_header *hdr = &p->header;
77
78 memset(p, 0, sizeof(*p)); 78 memset(p, 0, sizeof(*p));
79 INIT_LIST_HEAD(&p->pages); 79 INIT_LIST_HEAD(&hdr->pages);
80 p->npages = pagecount; 80 INIT_LIST_HEAD(&hdr->rpc_list);
81 if (pagecount <= ARRAY_SIZE(p->page_array)) 81 spin_lock_init(&hdr->lock);
82 p->pagevec = p->page_array; 82 atomic_set(&hdr->refcnt, 0);
83 else {
84 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
85 if (!p->pagevec) {
86 mempool_free(p, nfs_wdata_mempool);
87 p = NULL;
88 }
89 }
90 } 83 }
91 return p; 84 return p;
92} 85}
93 86
94void nfs_writedata_free(struct nfs_write_data *p) 87static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
88 unsigned int pagecount)
89{
90 struct nfs_write_data *data, *prealloc;
91
92 prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data;
93 if (prealloc->header == NULL)
94 data = prealloc;
95 else
96 data = kzalloc(sizeof(*data), GFP_KERNEL);
97 if (!data)
98 goto out;
99
100 if (nfs_pgarray_set(&data->pages, pagecount)) {
101 data->header = hdr;
102 atomic_inc(&hdr->refcnt);
103 } else {
104 if (data != prealloc)
105 kfree(data);
106 data = NULL;
107 }
108out:
109 return data;
110}
111
112void nfs_writehdr_free(struct nfs_pgio_header *hdr)
95{ 113{
96 if (p && (p->pagevec != &p->page_array[0])) 114 struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header);
97 kfree(p->pagevec); 115 mempool_free(whdr, nfs_wdata_mempool);
98 mempool_free(p, nfs_wdata_mempool);
99} 116}
100 117
101void nfs_writedata_release(struct nfs_write_data *wdata) 118void nfs_writedata_release(struct nfs_write_data *wdata)
102{ 119{
120 struct nfs_pgio_header *hdr = wdata->header;
121 struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header);
122
103 put_nfs_open_context(wdata->args.context); 123 put_nfs_open_context(wdata->args.context);
104 nfs_writedata_free(wdata); 124 if (wdata->pages.pagevec != wdata->pages.page_array)
125 kfree(wdata->pages.pagevec);
126 if (wdata != &write_header->rpc_data)
127 kfree(wdata);
128 else
129 wdata->header = NULL;
130 if (atomic_dec_and_test(&hdr->refcnt))
131 hdr->completion_ops->completion(hdr);
105} 132}
106 133
107static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) 134static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
@@ -203,7 +230,6 @@ static int nfs_set_page_writeback(struct page *page)
203 struct inode *inode = page->mapping->host; 230 struct inode *inode = page->mapping->host;
204 struct nfs_server *nfss = NFS_SERVER(inode); 231 struct nfs_server *nfss = NFS_SERVER(inode);
205 232
206 page_cache_get(page);
207 if (atomic_long_inc_return(&nfss->writeback) > 233 if (atomic_long_inc_return(&nfss->writeback) >
208 NFS_CONGESTION_ON_THRESH) { 234 NFS_CONGESTION_ON_THRESH) {
209 set_bdi_congested(&nfss->backing_dev_info, 235 set_bdi_congested(&nfss->backing_dev_info,
@@ -219,7 +245,6 @@ static void nfs_end_page_writeback(struct page *page)
219 struct nfs_server *nfss = NFS_SERVER(inode); 245 struct nfs_server *nfss = NFS_SERVER(inode);
220 246
221 end_page_writeback(page); 247 end_page_writeback(page);
222 page_cache_release(page);
223 if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) 248 if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
224 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); 249 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
225} 250}
@@ -235,10 +260,10 @@ static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblo
235 req = nfs_page_find_request_locked(page); 260 req = nfs_page_find_request_locked(page);
236 if (req == NULL) 261 if (req == NULL)
237 break; 262 break;
238 if (nfs_lock_request_dontget(req)) 263 if (nfs_lock_request(req))
239 break; 264 break;
240 /* Note: If we hold the page lock, as is the case in nfs_writepage, 265 /* Note: If we hold the page lock, as is the case in nfs_writepage,
241 * then the call to nfs_lock_request_dontget() will always 266 * then the call to nfs_lock_request() will always
242 * succeed provided that someone hasn't already marked the 267 * succeed provided that someone hasn't already marked the
243 * request as dirty (in which case we don't care). 268 * request as dirty (in which case we don't care).
244 */ 269 */
@@ -310,7 +335,8 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc
310 struct nfs_pageio_descriptor pgio; 335 struct nfs_pageio_descriptor pgio;
311 int err; 336 int err;
312 337
313 nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc)); 338 nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc),
339 &nfs_async_write_completion_ops);
314 err = nfs_do_writepage(page, wbc, &pgio); 340 err = nfs_do_writepage(page, wbc, &pgio);
315 nfs_pageio_complete(&pgio); 341 nfs_pageio_complete(&pgio);
316 if (err < 0) 342 if (err < 0)
@@ -353,7 +379,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
353 379
354 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); 380 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
355 381
356 nfs_pageio_init_write(&pgio, inode, wb_priority(wbc)); 382 nfs_pageio_init_write(&pgio, inode, wb_priority(wbc),
383 &nfs_async_write_completion_ops);
357 err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); 384 err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
358 nfs_pageio_complete(&pgio); 385 nfs_pageio_complete(&pgio);
359 386
@@ -379,7 +406,7 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
379 struct nfs_inode *nfsi = NFS_I(inode); 406 struct nfs_inode *nfsi = NFS_I(inode);
380 407
381 /* Lock the request! */ 408 /* Lock the request! */
382 nfs_lock_request_dontget(req); 409 nfs_lock_request(req);
383 410
384 spin_lock(&inode->i_lock); 411 spin_lock(&inode->i_lock);
385 if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE)) 412 if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE))
@@ -421,65 +448,88 @@ nfs_mark_request_dirty(struct nfs_page *req)
421/** 448/**
422 * nfs_request_add_commit_list - add request to a commit list 449 * nfs_request_add_commit_list - add request to a commit list
423 * @req: pointer to a struct nfs_page 450 * @req: pointer to a struct nfs_page
424 * @head: commit list head 451 * @dst: commit list head
452 * @cinfo: holds list lock and accounting info
425 * 453 *
426 * This sets the PG_CLEAN bit, updates the inode global count of 454 * This sets the PG_CLEAN bit, updates the cinfo count of
427 * number of outstanding requests requiring a commit as well as 455 * number of outstanding requests requiring a commit as well as
428 * the MM page stats. 456 * the MM page stats.
429 * 457 *
430 * The caller must _not_ hold the inode->i_lock, but must be 458 * The caller must _not_ hold the cinfo->lock, but must be
431 * holding the nfs_page lock. 459 * holding the nfs_page lock.
432 */ 460 */
433void 461void
434nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head) 462nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
463 struct nfs_commit_info *cinfo)
435{ 464{
436 struct inode *inode = req->wb_context->dentry->d_inode;
437
438 set_bit(PG_CLEAN, &(req)->wb_flags); 465 set_bit(PG_CLEAN, &(req)->wb_flags);
439 spin_lock(&inode->i_lock); 466 spin_lock(cinfo->lock);
440 nfs_list_add_request(req, head); 467 nfs_list_add_request(req, dst);
441 NFS_I(inode)->ncommit++; 468 cinfo->mds->ncommit++;
442 spin_unlock(&inode->i_lock); 469 spin_unlock(cinfo->lock);
443 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 470 if (!cinfo->dreq) {
444 inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); 471 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
445 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 472 inc_bdi_stat(req->wb_page->mapping->backing_dev_info,
473 BDI_RECLAIMABLE);
474 __mark_inode_dirty(req->wb_context->dentry->d_inode,
475 I_DIRTY_DATASYNC);
476 }
446} 477}
447EXPORT_SYMBOL_GPL(nfs_request_add_commit_list); 478EXPORT_SYMBOL_GPL(nfs_request_add_commit_list);
448 479
449/** 480/**
450 * nfs_request_remove_commit_list - Remove request from a commit list 481 * nfs_request_remove_commit_list - Remove request from a commit list
451 * @req: pointer to a nfs_page 482 * @req: pointer to a nfs_page
483 * @cinfo: holds list lock and accounting info
452 * 484 *
453 * This clears the PG_CLEAN bit, and updates the inode global count of 485 * This clears the PG_CLEAN bit, and updates the cinfo's count of
454 * number of outstanding requests requiring a commit 486 * number of outstanding requests requiring a commit
455 * It does not update the MM page stats. 487 * It does not update the MM page stats.
456 * 488 *
457 * The caller _must_ hold the inode->i_lock and the nfs_page lock. 489 * The caller _must_ hold the cinfo->lock and the nfs_page lock.
458 */ 490 */
459void 491void
460nfs_request_remove_commit_list(struct nfs_page *req) 492nfs_request_remove_commit_list(struct nfs_page *req,
493 struct nfs_commit_info *cinfo)
461{ 494{
462 struct inode *inode = req->wb_context->dentry->d_inode;
463
464 if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) 495 if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags))
465 return; 496 return;
466 nfs_list_remove_request(req); 497 nfs_list_remove_request(req);
467 NFS_I(inode)->ncommit--; 498 cinfo->mds->ncommit--;
468} 499}
469EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list); 500EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list);
470 501
502static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
503 struct inode *inode)
504{
505 cinfo->lock = &inode->i_lock;
506 cinfo->mds = &NFS_I(inode)->commit_info;
507 cinfo->ds = pnfs_get_ds_info(inode);
508 cinfo->dreq = NULL;
509 cinfo->completion_ops = &nfs_commit_completion_ops;
510}
511
512void nfs_init_cinfo(struct nfs_commit_info *cinfo,
513 struct inode *inode,
514 struct nfs_direct_req *dreq)
515{
516 if (dreq)
517 nfs_init_cinfo_from_dreq(cinfo, dreq);
518 else
519 nfs_init_cinfo_from_inode(cinfo, inode);
520}
521EXPORT_SYMBOL_GPL(nfs_init_cinfo);
471 522
472/* 523/*
473 * Add a request to the inode's commit list. 524 * Add a request to the inode's commit list.
474 */ 525 */
475static void 526void
476nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) 527nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
528 struct nfs_commit_info *cinfo)
477{ 529{
478 struct inode *inode = req->wb_context->dentry->d_inode; 530 if (pnfs_mark_request_commit(req, lseg, cinfo))
479
480 if (pnfs_mark_request_commit(req, lseg))
481 return; 531 return;
482 nfs_request_add_commit_list(req, &NFS_I(inode)->commit_list); 532 nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo);
483} 533}
484 534
485static void 535static void
@@ -494,11 +544,13 @@ nfs_clear_request_commit(struct nfs_page *req)
494{ 544{
495 if (test_bit(PG_CLEAN, &req->wb_flags)) { 545 if (test_bit(PG_CLEAN, &req->wb_flags)) {
496 struct inode *inode = req->wb_context->dentry->d_inode; 546 struct inode *inode = req->wb_context->dentry->d_inode;
547 struct nfs_commit_info cinfo;
497 548
498 if (!pnfs_clear_request_commit(req)) { 549 nfs_init_cinfo_from_inode(&cinfo, inode);
499 spin_lock(&inode->i_lock); 550 if (!pnfs_clear_request_commit(req, &cinfo)) {
500 nfs_request_remove_commit_list(req); 551 spin_lock(cinfo.lock);
501 spin_unlock(&inode->i_lock); 552 nfs_request_remove_commit_list(req, &cinfo);
553 spin_unlock(cinfo.lock);
502 } 554 }
503 nfs_clear_page_commit(req->wb_page); 555 nfs_clear_page_commit(req->wb_page);
504 } 556 }
@@ -508,28 +560,25 @@ static inline
508int nfs_write_need_commit(struct nfs_write_data *data) 560int nfs_write_need_commit(struct nfs_write_data *data)
509{ 561{
510 if (data->verf.committed == NFS_DATA_SYNC) 562 if (data->verf.committed == NFS_DATA_SYNC)
511 return data->lseg == NULL; 563 return data->header->lseg == NULL;
512 else 564 return data->verf.committed != NFS_FILE_SYNC;
513 return data->verf.committed != NFS_FILE_SYNC;
514} 565}
515 566
516static inline 567#else
517int nfs_reschedule_unstable_write(struct nfs_page *req, 568static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
518 struct nfs_write_data *data) 569 struct inode *inode)
519{ 570{
520 if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) {
521 nfs_mark_request_commit(req, data->lseg);
522 return 1;
523 }
524 if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) {
525 nfs_mark_request_dirty(req);
526 return 1;
527 }
528 return 0;
529} 571}
530#else 572
531static void 573void nfs_init_cinfo(struct nfs_commit_info *cinfo,
532nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) 574 struct inode *inode,
575 struct nfs_direct_req *dreq)
576{
577}
578
579void
580nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
581 struct nfs_commit_info *cinfo)
533{ 582{
534} 583}
535 584
@@ -544,25 +593,57 @@ int nfs_write_need_commit(struct nfs_write_data *data)
544 return 0; 593 return 0;
545} 594}
546 595
547static inline 596#endif
548int nfs_reschedule_unstable_write(struct nfs_page *req, 597
549 struct nfs_write_data *data) 598static void nfs_write_completion(struct nfs_pgio_header *hdr)
550{ 599{
551 return 0; 600 struct nfs_commit_info cinfo;
601 unsigned long bytes = 0;
602
603 if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
604 goto out;
605 nfs_init_cinfo_from_inode(&cinfo, hdr->inode);
606 while (!list_empty(&hdr->pages)) {
607 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
608
609 bytes += req->wb_bytes;
610 nfs_list_remove_request(req);
611 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
612 (hdr->good_bytes < bytes)) {
613 nfs_set_pageerror(req->wb_page);
614 nfs_context_set_write_error(req->wb_context, hdr->error);
615 goto remove_req;
616 }
617 if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) {
618 nfs_mark_request_dirty(req);
619 goto next;
620 }
621 if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
622 nfs_mark_request_commit(req, hdr->lseg, &cinfo);
623 goto next;
624 }
625remove_req:
626 nfs_inode_remove_request(req);
627next:
628 nfs_unlock_request(req);
629 nfs_end_page_writeback(req->wb_page);
630 nfs_release_request(req);
631 }
632out:
633 hdr->release(hdr);
552} 634}
553#endif
554 635
555#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 636#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
556static int 637static unsigned long
557nfs_need_commit(struct nfs_inode *nfsi) 638nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
558{ 639{
559 return nfsi->ncommit > 0; 640 return cinfo->mds->ncommit;
560} 641}
561 642
562/* i_lock held by caller */ 643/* cinfo->lock held by caller */
563static int 644int
564nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max, 645nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
565 spinlock_t *lock) 646 struct nfs_commit_info *cinfo, int max)
566{ 647{
567 struct nfs_page *req, *tmp; 648 struct nfs_page *req, *tmp;
568 int ret = 0; 649 int ret = 0;
@@ -570,12 +651,13 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max,
570 list_for_each_entry_safe(req, tmp, src, wb_list) { 651 list_for_each_entry_safe(req, tmp, src, wb_list) {
571 if (!nfs_lock_request(req)) 652 if (!nfs_lock_request(req))
572 continue; 653 continue;
573 if (cond_resched_lock(lock)) 654 kref_get(&req->wb_kref);
655 if (cond_resched_lock(cinfo->lock))
574 list_safe_reset_next(req, tmp, wb_list); 656 list_safe_reset_next(req, tmp, wb_list);
575 nfs_request_remove_commit_list(req); 657 nfs_request_remove_commit_list(req, cinfo);
576 nfs_list_add_request(req, dst); 658 nfs_list_add_request(req, dst);
577 ret++; 659 ret++;
578 if (ret == max) 660 if ((ret == max) && !cinfo->dreq)
579 break; 661 break;
580 } 662 }
581 return ret; 663 return ret;
@@ -584,37 +666,38 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max,
584/* 666/*
585 * nfs_scan_commit - Scan an inode for commit requests 667 * nfs_scan_commit - Scan an inode for commit requests
586 * @inode: NFS inode to scan 668 * @inode: NFS inode to scan
587 * @dst: destination list 669 * @dst: mds destination list
670 * @cinfo: mds and ds lists of reqs ready to commit
588 * 671 *
589 * Moves requests from the inode's 'commit' request list. 672 * Moves requests from the inode's 'commit' request list.
590 * The requests are *not* checked to ensure that they form a contiguous set. 673 * The requests are *not* checked to ensure that they form a contiguous set.
591 */ 674 */
592static int 675int
593nfs_scan_commit(struct inode *inode, struct list_head *dst) 676nfs_scan_commit(struct inode *inode, struct list_head *dst,
677 struct nfs_commit_info *cinfo)
594{ 678{
595 struct nfs_inode *nfsi = NFS_I(inode);
596 int ret = 0; 679 int ret = 0;
597 680
598 spin_lock(&inode->i_lock); 681 spin_lock(cinfo->lock);
599 if (nfsi->ncommit > 0) { 682 if (cinfo->mds->ncommit > 0) {
600 const int max = INT_MAX; 683 const int max = INT_MAX;
601 684
602 ret = nfs_scan_commit_list(&nfsi->commit_list, dst, max, 685 ret = nfs_scan_commit_list(&cinfo->mds->list, dst,
603 &inode->i_lock); 686 cinfo, max);
604 ret += pnfs_scan_commit_lists(inode, max - ret, 687 ret += pnfs_scan_commit_lists(inode, cinfo, max - ret);
605 &inode->i_lock);
606 } 688 }
607 spin_unlock(&inode->i_lock); 689 spin_unlock(cinfo->lock);
608 return ret; 690 return ret;
609} 691}
610 692
611#else 693#else
612static inline int nfs_need_commit(struct nfs_inode *nfsi) 694static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
613{ 695{
614 return 0; 696 return 0;
615} 697}
616 698
617static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst) 699int nfs_scan_commit(struct inode *inode, struct list_head *dst,
700 struct nfs_commit_info *cinfo)
618{ 701{
619 return 0; 702 return 0;
620} 703}
@@ -659,7 +742,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
659 || end < req->wb_offset) 742 || end < req->wb_offset)
660 goto out_flushme; 743 goto out_flushme;
661 744
662 if (nfs_lock_request_dontget(req)) 745 if (nfs_lock_request(req))
663 break; 746 break;
664 747
665 /* The request is locked, so wait and then retry */ 748 /* The request is locked, so wait and then retry */
@@ -729,7 +812,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
729 nfs_grow_file(page, offset, count); 812 nfs_grow_file(page, offset, count);
730 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); 813 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
731 nfs_mark_request_dirty(req); 814 nfs_mark_request_dirty(req);
732 nfs_unlock_request(req); 815 nfs_unlock_and_release_request(req);
733 return 0; 816 return 0;
734} 817}
735 818
@@ -766,10 +849,14 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
766 * the PageUptodate() flag. In this case, we will need to turn off 849 * the PageUptodate() flag. In this case, we will need to turn off
767 * write optimisations that depend on the page contents being correct. 850 * write optimisations that depend on the page contents being correct.
768 */ 851 */
769static int nfs_write_pageuptodate(struct page *page, struct inode *inode) 852static bool nfs_write_pageuptodate(struct page *page, struct inode *inode)
770{ 853{
771 return PageUptodate(page) && 854 if (nfs_have_delegated_attributes(inode))
772 !(NFS_I(inode)->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA)); 855 goto out;
856 if (NFS_I(inode)->cache_validity & NFS_INO_REVAL_PAGECACHE)
857 return false;
858out:
859 return PageUptodate(page) != 0;
773} 860}
774 861
775/* 862/*
@@ -815,17 +902,6 @@ int nfs_updatepage(struct file *file, struct page *page,
815 return status; 902 return status;
816} 903}
817 904
818static void nfs_writepage_release(struct nfs_page *req,
819 struct nfs_write_data *data)
820{
821 struct page *page = req->wb_page;
822
823 if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data))
824 nfs_inode_remove_request(req);
825 nfs_unlock_request(req);
826 nfs_end_page_writeback(page);
827}
828
829static int flush_task_priority(int how) 905static int flush_task_priority(int how)
830{ 906{
831 switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) { 907 switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) {
@@ -837,18 +913,18 @@ static int flush_task_priority(int how)
837 return RPC_PRIORITY_NORMAL; 913 return RPC_PRIORITY_NORMAL;
838} 914}
839 915
840int nfs_initiate_write(struct nfs_write_data *data, 916int nfs_initiate_write(struct rpc_clnt *clnt,
841 struct rpc_clnt *clnt, 917 struct nfs_write_data *data,
842 const struct rpc_call_ops *call_ops, 918 const struct rpc_call_ops *call_ops,
843 int how) 919 int how, int flags)
844{ 920{
845 struct inode *inode = data->inode; 921 struct inode *inode = data->header->inode;
846 int priority = flush_task_priority(how); 922 int priority = flush_task_priority(how);
847 struct rpc_task *task; 923 struct rpc_task *task;
848 struct rpc_message msg = { 924 struct rpc_message msg = {
849 .rpc_argp = &data->args, 925 .rpc_argp = &data->args,
850 .rpc_resp = &data->res, 926 .rpc_resp = &data->res,
851 .rpc_cred = data->cred, 927 .rpc_cred = data->header->cred,
852 }; 928 };
853 struct rpc_task_setup task_setup_data = { 929 struct rpc_task_setup task_setup_data = {
854 .rpc_client = clnt, 930 .rpc_client = clnt,
@@ -857,7 +933,7 @@ int nfs_initiate_write(struct nfs_write_data *data,
857 .callback_ops = call_ops, 933 .callback_ops = call_ops,
858 .callback_data = data, 934 .callback_data = data,
859 .workqueue = nfsiod_workqueue, 935 .workqueue = nfsiod_workqueue,
860 .flags = RPC_TASK_ASYNC, 936 .flags = RPC_TASK_ASYNC | flags,
861 .priority = priority, 937 .priority = priority,
862 }; 938 };
863 int ret = 0; 939 int ret = 0;
@@ -892,26 +968,21 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write);
892/* 968/*
893 * Set up the argument/result storage required for the RPC call. 969 * Set up the argument/result storage required for the RPC call.
894 */ 970 */
895static void nfs_write_rpcsetup(struct nfs_page *req, 971static void nfs_write_rpcsetup(struct nfs_write_data *data,
896 struct nfs_write_data *data,
897 unsigned int count, unsigned int offset, 972 unsigned int count, unsigned int offset,
898 int how) 973 int how, struct nfs_commit_info *cinfo)
899{ 974{
900 struct inode *inode = req->wb_context->dentry->d_inode; 975 struct nfs_page *req = data->header->req;
901 976
902 /* Set up the RPC argument and reply structs 977 /* Set up the RPC argument and reply structs
903 * NB: take care not to mess about with data->commit et al. */ 978 * NB: take care not to mess about with data->commit et al. */
904 979
905 data->req = req; 980 data->args.fh = NFS_FH(data->header->inode);
906 data->inode = inode = req->wb_context->dentry->d_inode;
907 data->cred = req->wb_context->cred;
908
909 data->args.fh = NFS_FH(inode);
910 data->args.offset = req_offset(req) + offset; 981 data->args.offset = req_offset(req) + offset;
911 /* pnfs_set_layoutcommit needs this */ 982 /* pnfs_set_layoutcommit needs this */
912 data->mds_offset = data->args.offset; 983 data->mds_offset = data->args.offset;
913 data->args.pgbase = req->wb_pgbase + offset; 984 data->args.pgbase = req->wb_pgbase + offset;
914 data->args.pages = data->pagevec; 985 data->args.pages = data->pages.pagevec;
915 data->args.count = count; 986 data->args.count = count;
916 data->args.context = get_nfs_open_context(req->wb_context); 987 data->args.context = get_nfs_open_context(req->wb_context);
917 data->args.lock_context = req->wb_lock_context; 988 data->args.lock_context = req->wb_lock_context;
@@ -920,7 +991,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
920 case 0: 991 case 0:
921 break; 992 break;
922 case FLUSH_COND_STABLE: 993 case FLUSH_COND_STABLE:
923 if (nfs_need_commit(NFS_I(inode))) 994 if (nfs_reqs_to_commit(cinfo))
924 break; 995 break;
925 default: 996 default:
926 data->args.stable = NFS_FILE_SYNC; 997 data->args.stable = NFS_FILE_SYNC;
@@ -936,9 +1007,9 @@ static int nfs_do_write(struct nfs_write_data *data,
936 const struct rpc_call_ops *call_ops, 1007 const struct rpc_call_ops *call_ops,
937 int how) 1008 int how)
938{ 1009{
939 struct inode *inode = data->args.context->dentry->d_inode; 1010 struct inode *inode = data->header->inode;
940 1011
941 return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how); 1012 return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how, 0);
942} 1013}
943 1014
944static int nfs_do_multiple_writes(struct list_head *head, 1015static int nfs_do_multiple_writes(struct list_head *head,
@@ -951,7 +1022,7 @@ static int nfs_do_multiple_writes(struct list_head *head,
951 while (!list_empty(head)) { 1022 while (!list_empty(head)) {
952 int ret2; 1023 int ret2;
953 1024
954 data = list_entry(head->next, struct nfs_write_data, list); 1025 data = list_first_entry(head, struct nfs_write_data, list);
955 list_del_init(&data->list); 1026 list_del_init(&data->list);
956 1027
957 ret2 = nfs_do_write(data, call_ops, how); 1028 ret2 = nfs_do_write(data, call_ops, how);
@@ -967,31 +1038,60 @@ static int nfs_do_multiple_writes(struct list_head *head,
967 */ 1038 */
968static void nfs_redirty_request(struct nfs_page *req) 1039static void nfs_redirty_request(struct nfs_page *req)
969{ 1040{
970 struct page *page = req->wb_page;
971
972 nfs_mark_request_dirty(req); 1041 nfs_mark_request_dirty(req);
973 nfs_unlock_request(req); 1042 nfs_unlock_request(req);
974 nfs_end_page_writeback(page); 1043 nfs_end_page_writeback(req->wb_page);
1044 nfs_release_request(req);
1045}
1046
1047static void nfs_async_write_error(struct list_head *head)
1048{
1049 struct nfs_page *req;
1050
1051 while (!list_empty(head)) {
1052 req = nfs_list_entry(head->next);
1053 nfs_list_remove_request(req);
1054 nfs_redirty_request(req);
1055 }
1056}
1057
1058static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
1059 .error_cleanup = nfs_async_write_error,
1060 .completion = nfs_write_completion,
1061};
1062
1063static void nfs_flush_error(struct nfs_pageio_descriptor *desc,
1064 struct nfs_pgio_header *hdr)
1065{
1066 set_bit(NFS_IOHDR_REDO, &hdr->flags);
1067 while (!list_empty(&hdr->rpc_list)) {
1068 struct nfs_write_data *data = list_first_entry(&hdr->rpc_list,
1069 struct nfs_write_data, list);
1070 list_del(&data->list);
1071 nfs_writedata_release(data);
1072 }
1073 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
975} 1074}
976 1075
977/* 1076/*
978 * Generate multiple small requests to write out a single 1077 * Generate multiple small requests to write out a single
979 * contiguous dirty area on one page. 1078 * contiguous dirty area on one page.
980 */ 1079 */
981static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) 1080static int nfs_flush_multi(struct nfs_pageio_descriptor *desc,
1081 struct nfs_pgio_header *hdr)
982{ 1082{
983 struct nfs_page *req = nfs_list_entry(desc->pg_list.next); 1083 struct nfs_page *req = hdr->req;
984 struct page *page = req->wb_page; 1084 struct page *page = req->wb_page;
985 struct nfs_write_data *data; 1085 struct nfs_write_data *data;
986 size_t wsize = desc->pg_bsize, nbytes; 1086 size_t wsize = desc->pg_bsize, nbytes;
987 unsigned int offset; 1087 unsigned int offset;
988 int requests = 0; 1088 int requests = 0;
989 int ret = 0; 1089 struct nfs_commit_info cinfo;
990 1090
991 nfs_list_remove_request(req); 1091 nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
992 1092
993 if ((desc->pg_ioflags & FLUSH_COND_STABLE) && 1093 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
994 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit || 1094 (desc->pg_moreio || nfs_reqs_to_commit(&cinfo) ||
995 desc->pg_count > wsize)) 1095 desc->pg_count > wsize))
996 desc->pg_ioflags &= ~FLUSH_COND_STABLE; 1096 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
997 1097
@@ -1001,28 +1101,22 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head
1001 do { 1101 do {
1002 size_t len = min(nbytes, wsize); 1102 size_t len = min(nbytes, wsize);
1003 1103
1004 data = nfs_writedata_alloc(1); 1104 data = nfs_writedata_alloc(hdr, 1);
1005 if (!data) 1105 if (!data) {
1006 goto out_bad; 1106 nfs_flush_error(desc, hdr);
1007 data->pagevec[0] = page; 1107 return -ENOMEM;
1008 nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags); 1108 }
1009 list_add(&data->list, res); 1109 data->pages.pagevec[0] = page;
1110 nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo);
1111 list_add(&data->list, &hdr->rpc_list);
1010 requests++; 1112 requests++;
1011 nbytes -= len; 1113 nbytes -= len;
1012 offset += len; 1114 offset += len;
1013 } while (nbytes != 0); 1115 } while (nbytes != 0);
1014 atomic_set(&req->wb_complete, requests); 1116 nfs_list_remove_request(req);
1015 desc->pg_rpc_callops = &nfs_write_partial_ops; 1117 nfs_list_add_request(req, &hdr->pages);
1016 return ret; 1118 desc->pg_rpc_callops = &nfs_write_common_ops;
1017 1119 return 0;
1018out_bad:
1019 while (!list_empty(res)) {
1020 data = list_entry(res->next, struct nfs_write_data, list);
1021 list_del(&data->list);
1022 nfs_writedata_release(data);
1023 }
1024 nfs_redirty_request(req);
1025 return -ENOMEM;
1026} 1120}
1027 1121
1028/* 1122/*
@@ -1033,62 +1127,71 @@ out_bad:
1033 * This is the case if nfs_updatepage detects a conflicting request 1127 * This is the case if nfs_updatepage detects a conflicting request
1034 * that has been written but not committed. 1128 * that has been written but not committed.
1035 */ 1129 */
1036static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res) 1130static int nfs_flush_one(struct nfs_pageio_descriptor *desc,
1131 struct nfs_pgio_header *hdr)
1037{ 1132{
1038 struct nfs_page *req; 1133 struct nfs_page *req;
1039 struct page **pages; 1134 struct page **pages;
1040 struct nfs_write_data *data; 1135 struct nfs_write_data *data;
1041 struct list_head *head = &desc->pg_list; 1136 struct list_head *head = &desc->pg_list;
1042 int ret = 0; 1137 struct nfs_commit_info cinfo;
1043 1138
1044 data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base, 1139 data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base,
1045 desc->pg_count)); 1140 desc->pg_count));
1046 if (!data) { 1141 if (!data) {
1047 while (!list_empty(head)) { 1142 nfs_flush_error(desc, hdr);
1048 req = nfs_list_entry(head->next); 1143 return -ENOMEM;
1049 nfs_list_remove_request(req);
1050 nfs_redirty_request(req);
1051 }
1052 ret = -ENOMEM;
1053 goto out;
1054 } 1144 }
1055 pages = data->pagevec; 1145
1146 nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
1147 pages = data->pages.pagevec;
1056 while (!list_empty(head)) { 1148 while (!list_empty(head)) {
1057 req = nfs_list_entry(head->next); 1149 req = nfs_list_entry(head->next);
1058 nfs_list_remove_request(req); 1150 nfs_list_remove_request(req);
1059 nfs_list_add_request(req, &data->pages); 1151 nfs_list_add_request(req, &hdr->pages);
1060 *pages++ = req->wb_page; 1152 *pages++ = req->wb_page;
1061 } 1153 }
1062 req = nfs_list_entry(data->pages.next);
1063 1154
1064 if ((desc->pg_ioflags & FLUSH_COND_STABLE) && 1155 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
1065 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) 1156 (desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
1066 desc->pg_ioflags &= ~FLUSH_COND_STABLE; 1157 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
1067 1158
1068 /* Set up the argument struct */ 1159 /* Set up the argument struct */
1069 nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags); 1160 nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
1070 list_add(&data->list, res); 1161 list_add(&data->list, &hdr->rpc_list);
1071 desc->pg_rpc_callops = &nfs_write_full_ops; 1162 desc->pg_rpc_callops = &nfs_write_common_ops;
1072out: 1163 return 0;
1073 return ret;
1074} 1164}
1075 1165
1076int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head) 1166int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
1167 struct nfs_pgio_header *hdr)
1077{ 1168{
1078 if (desc->pg_bsize < PAGE_CACHE_SIZE) 1169 if (desc->pg_bsize < PAGE_CACHE_SIZE)
1079 return nfs_flush_multi(desc, head); 1170 return nfs_flush_multi(desc, hdr);
1080 return nfs_flush_one(desc, head); 1171 return nfs_flush_one(desc, hdr);
1081} 1172}
1082 1173
1083static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) 1174static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
1084{ 1175{
1085 LIST_HEAD(head); 1176 struct nfs_write_header *whdr;
1177 struct nfs_pgio_header *hdr;
1086 int ret; 1178 int ret;
1087 1179
1088 ret = nfs_generic_flush(desc, &head); 1180 whdr = nfs_writehdr_alloc();
1181 if (!whdr) {
1182 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1183 return -ENOMEM;
1184 }
1185 hdr = &whdr->header;
1186 nfs_pgheader_init(desc, hdr, nfs_writehdr_free);
1187 atomic_inc(&hdr->refcnt);
1188 ret = nfs_generic_flush(desc, hdr);
1089 if (ret == 0) 1189 if (ret == 0)
1090 ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops, 1190 ret = nfs_do_multiple_writes(&hdr->rpc_list,
1091 desc->pg_ioflags); 1191 desc->pg_rpc_callops,
1192 desc->pg_ioflags);
1193 if (atomic_dec_and_test(&hdr->refcnt))
1194 hdr->completion_ops->completion(hdr);
1092 return ret; 1195 return ret;
1093} 1196}
1094 1197
@@ -1098,9 +1201,10 @@ static const struct nfs_pageio_ops nfs_pageio_write_ops = {
1098}; 1201};
1099 1202
1100void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, 1203void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
1101 struct inode *inode, int ioflags) 1204 struct inode *inode, int ioflags,
1205 const struct nfs_pgio_completion_ops *compl_ops)
1102{ 1206{
1103 nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, 1207 nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops,
1104 NFS_SERVER(inode)->wsize, ioflags); 1208 NFS_SERVER(inode)->wsize, ioflags);
1105} 1209}
1106 1210
@@ -1111,80 +1215,27 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
1111} 1215}
1112EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); 1216EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
1113 1217
1114static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, 1218void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
1115 struct inode *inode, int ioflags) 1219 struct inode *inode, int ioflags,
1220 const struct nfs_pgio_completion_ops *compl_ops)
1116{ 1221{
1117 if (!pnfs_pageio_init_write(pgio, inode, ioflags)) 1222 if (!pnfs_pageio_init_write(pgio, inode, ioflags, compl_ops))
1118 nfs_pageio_init_write_mds(pgio, inode, ioflags); 1223 nfs_pageio_init_write_mds(pgio, inode, ioflags, compl_ops);
1119} 1224}
1120 1225
1121/* 1226void nfs_write_prepare(struct rpc_task *task, void *calldata)
1122 * Handle a write reply that flushed part of a page.
1123 */
1124static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
1125{ 1227{
1126 struct nfs_write_data *data = calldata; 1228 struct nfs_write_data *data = calldata;
1127 1229 NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data);
1128 dprintk("NFS: %5u write(%s/%lld %d@%lld)",
1129 task->tk_pid,
1130 data->req->wb_context->dentry->d_inode->i_sb->s_id,
1131 (long long)
1132 NFS_FILEID(data->req->wb_context->dentry->d_inode),
1133 data->req->wb_bytes, (long long)req_offset(data->req));
1134
1135 nfs_writeback_done(task, data);
1136} 1230}
1137 1231
1138static void nfs_writeback_release_partial(void *calldata) 1232void nfs_commit_prepare(struct rpc_task *task, void *calldata)
1139{ 1233{
1140 struct nfs_write_data *data = calldata; 1234 struct nfs_commit_data *data = calldata;
1141 struct nfs_page *req = data->req;
1142 struct page *page = req->wb_page;
1143 int status = data->task.tk_status;
1144 1235
1145 if (status < 0) { 1236 NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
1146 nfs_set_pageerror(page);
1147 nfs_context_set_write_error(req->wb_context, status);
1148 dprintk(", error = %d\n", status);
1149 goto out;
1150 }
1151
1152 if (nfs_write_need_commit(data)) {
1153 struct inode *inode = page->mapping->host;
1154
1155 spin_lock(&inode->i_lock);
1156 if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) {
1157 /* Do nothing we need to resend the writes */
1158 } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) {
1159 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
1160 dprintk(" defer commit\n");
1161 } else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) {
1162 set_bit(PG_NEED_RESCHED, &req->wb_flags);
1163 clear_bit(PG_NEED_COMMIT, &req->wb_flags);
1164 dprintk(" server reboot detected\n");
1165 }
1166 spin_unlock(&inode->i_lock);
1167 } else
1168 dprintk(" OK\n");
1169
1170out:
1171 if (atomic_dec_and_test(&req->wb_complete))
1172 nfs_writepage_release(req, data);
1173 nfs_writedata_release(calldata);
1174} 1237}
1175 1238
1176void nfs_write_prepare(struct rpc_task *task, void *calldata)
1177{
1178 struct nfs_write_data *data = calldata;
1179 NFS_PROTO(data->inode)->write_rpc_prepare(task, data);
1180}
1181
1182static const struct rpc_call_ops nfs_write_partial_ops = {
1183 .rpc_call_prepare = nfs_write_prepare,
1184 .rpc_call_done = nfs_writeback_done_partial,
1185 .rpc_release = nfs_writeback_release_partial,
1186};
1187
1188/* 1239/*
1189 * Handle a write reply that flushes a whole page. 1240 * Handle a write reply that flushes a whole page.
1190 * 1241 *
@@ -1192,59 +1243,37 @@ static const struct rpc_call_ops nfs_write_partial_ops = {
1192 * writebacks since the page->count is kept > 1 for as long 1243 * writebacks since the page->count is kept > 1 for as long
1193 * as the page has a write request pending. 1244 * as the page has a write request pending.
1194 */ 1245 */
1195static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) 1246static void nfs_writeback_done_common(struct rpc_task *task, void *calldata)
1196{ 1247{
1197 struct nfs_write_data *data = calldata; 1248 struct nfs_write_data *data = calldata;
1198 1249
1199 nfs_writeback_done(task, data); 1250 nfs_writeback_done(task, data);
1200} 1251}
1201 1252
1202static void nfs_writeback_release_full(void *calldata) 1253static void nfs_writeback_release_common(void *calldata)
1203{ 1254{
1204 struct nfs_write_data *data = calldata; 1255 struct nfs_write_data *data = calldata;
1256 struct nfs_pgio_header *hdr = data->header;
1205 int status = data->task.tk_status; 1257 int status = data->task.tk_status;
1258 struct nfs_page *req = hdr->req;
1206 1259
1207 /* Update attributes as result of writeback. */ 1260 if ((status >= 0) && nfs_write_need_commit(data)) {
1208 while (!list_empty(&data->pages)) { 1261 spin_lock(&hdr->lock);
1209 struct nfs_page *req = nfs_list_entry(data->pages.next); 1262 if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
1210 struct page *page = req->wb_page; 1263 ; /* Do nothing */
1211 1264 else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
1212 nfs_list_remove_request(req);
1213
1214 dprintk("NFS: %5u write (%s/%lld %d@%lld)",
1215 data->task.tk_pid,
1216 req->wb_context->dentry->d_inode->i_sb->s_id,
1217 (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
1218 req->wb_bytes,
1219 (long long)req_offset(req));
1220
1221 if (status < 0) {
1222 nfs_set_pageerror(page);
1223 nfs_context_set_write_error(req->wb_context, status);
1224 dprintk(", error = %d\n", status);
1225 goto remove_request;
1226 }
1227
1228 if (nfs_write_need_commit(data)) {
1229 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); 1265 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
1230 nfs_mark_request_commit(req, data->lseg); 1266 else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf)))
1231 dprintk(" marked for commit\n"); 1267 set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
1232 goto next; 1268 spin_unlock(&hdr->lock);
1233 }
1234 dprintk(" OK\n");
1235remove_request:
1236 nfs_inode_remove_request(req);
1237 next:
1238 nfs_unlock_request(req);
1239 nfs_end_page_writeback(page);
1240 } 1269 }
1241 nfs_writedata_release(calldata); 1270 nfs_writedata_release(data);
1242} 1271}
1243 1272
1244static const struct rpc_call_ops nfs_write_full_ops = { 1273static const struct rpc_call_ops nfs_write_common_ops = {
1245 .rpc_call_prepare = nfs_write_prepare, 1274 .rpc_call_prepare = nfs_write_prepare,
1246 .rpc_call_done = nfs_writeback_done_full, 1275 .rpc_call_done = nfs_writeback_done_common,
1247 .rpc_release = nfs_writeback_release_full, 1276 .rpc_release = nfs_writeback_release_common,
1248}; 1277};
1249 1278
1250 1279
@@ -1255,6 +1284,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1255{ 1284{
1256 struct nfs_writeargs *argp = &data->args; 1285 struct nfs_writeargs *argp = &data->args;
1257 struct nfs_writeres *resp = &data->res; 1286 struct nfs_writeres *resp = &data->res;
1287 struct inode *inode = data->header->inode;
1258 int status; 1288 int status;
1259 1289
1260 dprintk("NFS: %5u nfs_writeback_done (status %d)\n", 1290 dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
@@ -1267,10 +1297,10 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1267 * another writer had changed the file, but some applications 1297 * another writer had changed the file, but some applications
1268 * depend on tighter cache coherency when writing. 1298 * depend on tighter cache coherency when writing.
1269 */ 1299 */
1270 status = NFS_PROTO(data->inode)->write_done(task, data); 1300 status = NFS_PROTO(inode)->write_done(task, data);
1271 if (status != 0) 1301 if (status != 0)
1272 return; 1302 return;
1273 nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count); 1303 nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
1274 1304
1275#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1305#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1276 if (resp->verf->committed < argp->stable && task->tk_status >= 0) { 1306 if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
@@ -1288,46 +1318,47 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1288 if (time_before(complain, jiffies)) { 1318 if (time_before(complain, jiffies)) {
1289 dprintk("NFS: faulty NFS server %s:" 1319 dprintk("NFS: faulty NFS server %s:"
1290 " (committed = %d) != (stable = %d)\n", 1320 " (committed = %d) != (stable = %d)\n",
1291 NFS_SERVER(data->inode)->nfs_client->cl_hostname, 1321 NFS_SERVER(inode)->nfs_client->cl_hostname,
1292 resp->verf->committed, argp->stable); 1322 resp->verf->committed, argp->stable);
1293 complain = jiffies + 300 * HZ; 1323 complain = jiffies + 300 * HZ;
1294 } 1324 }
1295 } 1325 }
1296#endif 1326#endif
1297 /* Is this a short write? */ 1327 if (task->tk_status < 0)
1298 if (task->tk_status >= 0 && resp->count < argp->count) { 1328 nfs_set_pgio_error(data->header, task->tk_status, argp->offset);
1329 else if (resp->count < argp->count) {
1299 static unsigned long complain; 1330 static unsigned long complain;
1300 1331
1301 nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE); 1332 /* This a short write! */
1333 nfs_inc_stats(inode, NFSIOS_SHORTWRITE);
1302 1334
1303 /* Has the server at least made some progress? */ 1335 /* Has the server at least made some progress? */
1304 if (resp->count != 0) { 1336 if (resp->count == 0) {
1305 /* Was this an NFSv2 write or an NFSv3 stable write? */ 1337 if (time_before(complain, jiffies)) {
1306 if (resp->verf->committed != NFS_UNSTABLE) { 1338 printk(KERN_WARNING
1307 /* Resend from where the server left off */ 1339 "NFS: Server wrote zero bytes, expected %u.\n",
1308 data->mds_offset += resp->count; 1340 argp->count);
1309 argp->offset += resp->count; 1341 complain = jiffies + 300 * HZ;
1310 argp->pgbase += resp->count;
1311 argp->count -= resp->count;
1312 } else {
1313 /* Resend as a stable write in order to avoid
1314 * headaches in the case of a server crash.
1315 */
1316 argp->stable = NFS_FILE_SYNC;
1317 } 1342 }
1318 rpc_restart_call_prepare(task); 1343 nfs_set_pgio_error(data->header, -EIO, argp->offset);
1344 task->tk_status = -EIO;
1319 return; 1345 return;
1320 } 1346 }
1321 if (time_before(complain, jiffies)) { 1347 /* Was this an NFSv2 write or an NFSv3 stable write? */
1322 printk(KERN_WARNING 1348 if (resp->verf->committed != NFS_UNSTABLE) {
1323 "NFS: Server wrote zero bytes, expected %u.\n", 1349 /* Resend from where the server left off */
1324 argp->count); 1350 data->mds_offset += resp->count;
1325 complain = jiffies + 300 * HZ; 1351 argp->offset += resp->count;
1352 argp->pgbase += resp->count;
1353 argp->count -= resp->count;
1354 } else {
1355 /* Resend as a stable write in order to avoid
1356 * headaches in the case of a server crash.
1357 */
1358 argp->stable = NFS_FILE_SYNC;
1326 } 1359 }
1327 /* Can't do anything about it except throw an error. */ 1360 rpc_restart_call_prepare(task);
1328 task->tk_status = -EIO;
1329 } 1361 }
1330 return;
1331} 1362}
1332 1363
1333 1364
@@ -1347,26 +1378,23 @@ static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
1347 return (ret < 0) ? ret : 1; 1378 return (ret < 0) ? ret : 1;
1348} 1379}
1349 1380
1350void nfs_commit_clear_lock(struct nfs_inode *nfsi) 1381static void nfs_commit_clear_lock(struct nfs_inode *nfsi)
1351{ 1382{
1352 clear_bit(NFS_INO_COMMIT, &nfsi->flags); 1383 clear_bit(NFS_INO_COMMIT, &nfsi->flags);
1353 smp_mb__after_clear_bit(); 1384 smp_mb__after_clear_bit();
1354 wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); 1385 wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
1355} 1386}
1356EXPORT_SYMBOL_GPL(nfs_commit_clear_lock);
1357 1387
1358void nfs_commitdata_release(void *data) 1388void nfs_commitdata_release(struct nfs_commit_data *data)
1359{ 1389{
1360 struct nfs_write_data *wdata = data; 1390 put_nfs_open_context(data->context);
1361 1391 nfs_commit_free(data);
1362 put_nfs_open_context(wdata->args.context);
1363 nfs_commit_free(wdata);
1364} 1392}
1365EXPORT_SYMBOL_GPL(nfs_commitdata_release); 1393EXPORT_SYMBOL_GPL(nfs_commitdata_release);
1366 1394
1367int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt, 1395int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
1368 const struct rpc_call_ops *call_ops, 1396 const struct rpc_call_ops *call_ops,
1369 int how) 1397 int how, int flags)
1370{ 1398{
1371 struct rpc_task *task; 1399 struct rpc_task *task;
1372 int priority = flush_task_priority(how); 1400 int priority = flush_task_priority(how);
@@ -1382,7 +1410,7 @@ int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt,
1382 .callback_ops = call_ops, 1410 .callback_ops = call_ops,
1383 .callback_data = data, 1411 .callback_data = data,
1384 .workqueue = nfsiod_workqueue, 1412 .workqueue = nfsiod_workqueue,
1385 .flags = RPC_TASK_ASYNC, 1413 .flags = RPC_TASK_ASYNC | flags,
1386 .priority = priority, 1414 .priority = priority,
1387 }; 1415 };
1388 /* Set up the initial task struct. */ 1416 /* Set up the initial task struct. */
@@ -1403,9 +1431,10 @@ EXPORT_SYMBOL_GPL(nfs_initiate_commit);
1403/* 1431/*
1404 * Set up the argument/result storage required for the RPC call. 1432 * Set up the argument/result storage required for the RPC call.
1405 */ 1433 */
1406void nfs_init_commit(struct nfs_write_data *data, 1434void nfs_init_commit(struct nfs_commit_data *data,
1407 struct list_head *head, 1435 struct list_head *head,
1408 struct pnfs_layout_segment *lseg) 1436 struct pnfs_layout_segment *lseg,
1437 struct nfs_commit_info *cinfo)
1409{ 1438{
1410 struct nfs_page *first = nfs_list_entry(head->next); 1439 struct nfs_page *first = nfs_list_entry(head->next);
1411 struct inode *inode = first->wb_context->dentry->d_inode; 1440 struct inode *inode = first->wb_context->dentry->d_inode;
@@ -1419,13 +1448,14 @@ void nfs_init_commit(struct nfs_write_data *data,
1419 data->cred = first->wb_context->cred; 1448 data->cred = first->wb_context->cred;
1420 data->lseg = lseg; /* reference transferred */ 1449 data->lseg = lseg; /* reference transferred */
1421 data->mds_ops = &nfs_commit_ops; 1450 data->mds_ops = &nfs_commit_ops;
1451 data->completion_ops = cinfo->completion_ops;
1452 data->dreq = cinfo->dreq;
1422 1453
1423 data->args.fh = NFS_FH(data->inode); 1454 data->args.fh = NFS_FH(data->inode);
1424 /* Note: we always request a commit of the entire inode */ 1455 /* Note: we always request a commit of the entire inode */
1425 data->args.offset = 0; 1456 data->args.offset = 0;
1426 data->args.count = 0; 1457 data->args.count = 0;
1427 data->args.context = get_nfs_open_context(first->wb_context); 1458 data->context = get_nfs_open_context(first->wb_context);
1428 data->res.count = 0;
1429 data->res.fattr = &data->fattr; 1459 data->res.fattr = &data->fattr;
1430 data->res.verf = &data->verf; 1460 data->res.verf = &data->verf;
1431 nfs_fattr_init(&data->fattr); 1461 nfs_fattr_init(&data->fattr);
@@ -1433,18 +1463,21 @@ void nfs_init_commit(struct nfs_write_data *data,
1433EXPORT_SYMBOL_GPL(nfs_init_commit); 1463EXPORT_SYMBOL_GPL(nfs_init_commit);
1434 1464
1435void nfs_retry_commit(struct list_head *page_list, 1465void nfs_retry_commit(struct list_head *page_list,
1436 struct pnfs_layout_segment *lseg) 1466 struct pnfs_layout_segment *lseg,
1467 struct nfs_commit_info *cinfo)
1437{ 1468{
1438 struct nfs_page *req; 1469 struct nfs_page *req;
1439 1470
1440 while (!list_empty(page_list)) { 1471 while (!list_empty(page_list)) {
1441 req = nfs_list_entry(page_list->next); 1472 req = nfs_list_entry(page_list->next);
1442 nfs_list_remove_request(req); 1473 nfs_list_remove_request(req);
1443 nfs_mark_request_commit(req, lseg); 1474 nfs_mark_request_commit(req, lseg, cinfo);
1444 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 1475 if (!cinfo->dreq) {
1445 dec_bdi_stat(req->wb_page->mapping->backing_dev_info, 1476 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1446 BDI_RECLAIMABLE); 1477 dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
1447 nfs_unlock_request(req); 1478 BDI_RECLAIMABLE);
1479 }
1480 nfs_unlock_and_release_request(req);
1448 } 1481 }
1449} 1482}
1450EXPORT_SYMBOL_GPL(nfs_retry_commit); 1483EXPORT_SYMBOL_GPL(nfs_retry_commit);
@@ -1453,9 +1486,10 @@ EXPORT_SYMBOL_GPL(nfs_retry_commit);
1453 * Commit dirty pages 1486 * Commit dirty pages
1454 */ 1487 */
1455static int 1488static int
1456nfs_commit_list(struct inode *inode, struct list_head *head, int how) 1489nfs_commit_list(struct inode *inode, struct list_head *head, int how,
1490 struct nfs_commit_info *cinfo)
1457{ 1491{
1458 struct nfs_write_data *data; 1492 struct nfs_commit_data *data;
1459 1493
1460 data = nfs_commitdata_alloc(); 1494 data = nfs_commitdata_alloc();
1461 1495
@@ -1463,11 +1497,13 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1463 goto out_bad; 1497 goto out_bad;
1464 1498
1465 /* Set up the argument struct */ 1499 /* Set up the argument struct */
1466 nfs_init_commit(data, head, NULL); 1500 nfs_init_commit(data, head, NULL, cinfo);
1467 return nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how); 1501 atomic_inc(&cinfo->mds->rpcs_out);
1502 return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops,
1503 how, 0);
1468 out_bad: 1504 out_bad:
1469 nfs_retry_commit(head, NULL); 1505 nfs_retry_commit(head, NULL, cinfo);
1470 nfs_commit_clear_lock(NFS_I(inode)); 1506 cinfo->completion_ops->error_cleanup(NFS_I(inode));
1471 return -ENOMEM; 1507 return -ENOMEM;
1472} 1508}
1473 1509
@@ -1476,7 +1512,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1476 */ 1512 */
1477static void nfs_commit_done(struct rpc_task *task, void *calldata) 1513static void nfs_commit_done(struct rpc_task *task, void *calldata)
1478{ 1514{
1479 struct nfs_write_data *data = calldata; 1515 struct nfs_commit_data *data = calldata;
1480 1516
1481 dprintk("NFS: %5u nfs_commit_done (status %d)\n", 1517 dprintk("NFS: %5u nfs_commit_done (status %d)\n",
1482 task->tk_pid, task->tk_status); 1518 task->tk_pid, task->tk_status);
@@ -1485,10 +1521,11 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
1485 NFS_PROTO(data->inode)->commit_done(task, data); 1521 NFS_PROTO(data->inode)->commit_done(task, data);
1486} 1522}
1487 1523
1488void nfs_commit_release_pages(struct nfs_write_data *data) 1524static void nfs_commit_release_pages(struct nfs_commit_data *data)
1489{ 1525{
1490 struct nfs_page *req; 1526 struct nfs_page *req;
1491 int status = data->task.tk_status; 1527 int status = data->task.tk_status;
1528 struct nfs_commit_info cinfo;
1492 1529
1493 while (!list_empty(&data->pages)) { 1530 while (!list_empty(&data->pages)) {
1494 req = nfs_list_entry(data->pages.next); 1531 req = nfs_list_entry(data->pages.next);
@@ -1519,42 +1556,59 @@ void nfs_commit_release_pages(struct nfs_write_data *data)
1519 dprintk(" mismatch\n"); 1556 dprintk(" mismatch\n");
1520 nfs_mark_request_dirty(req); 1557 nfs_mark_request_dirty(req);
1521 next: 1558 next:
1522 nfs_unlock_request(req); 1559 nfs_unlock_and_release_request(req);
1523 } 1560 }
1561 nfs_init_cinfo(&cinfo, data->inode, data->dreq);
1562 if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
1563 nfs_commit_clear_lock(NFS_I(data->inode));
1524} 1564}
1525EXPORT_SYMBOL_GPL(nfs_commit_release_pages);
1526 1565
1527static void nfs_commit_release(void *calldata) 1566static void nfs_commit_release(void *calldata)
1528{ 1567{
1529 struct nfs_write_data *data = calldata; 1568 struct nfs_commit_data *data = calldata;
1530 1569
1531 nfs_commit_release_pages(data); 1570 data->completion_ops->completion(data);
1532 nfs_commit_clear_lock(NFS_I(data->inode));
1533 nfs_commitdata_release(calldata); 1571 nfs_commitdata_release(calldata);
1534} 1572}
1535 1573
1536static const struct rpc_call_ops nfs_commit_ops = { 1574static const struct rpc_call_ops nfs_commit_ops = {
1537 .rpc_call_prepare = nfs_write_prepare, 1575 .rpc_call_prepare = nfs_commit_prepare,
1538 .rpc_call_done = nfs_commit_done, 1576 .rpc_call_done = nfs_commit_done,
1539 .rpc_release = nfs_commit_release, 1577 .rpc_release = nfs_commit_release,
1540}; 1578};
1541 1579
1580static const struct nfs_commit_completion_ops nfs_commit_completion_ops = {
1581 .completion = nfs_commit_release_pages,
1582 .error_cleanup = nfs_commit_clear_lock,
1583};
1584
1585int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
1586 int how, struct nfs_commit_info *cinfo)
1587{
1588 int status;
1589
1590 status = pnfs_commit_list(inode, head, how, cinfo);
1591 if (status == PNFS_NOT_ATTEMPTED)
1592 status = nfs_commit_list(inode, head, how, cinfo);
1593 return status;
1594}
1595
1542int nfs_commit_inode(struct inode *inode, int how) 1596int nfs_commit_inode(struct inode *inode, int how)
1543{ 1597{
1544 LIST_HEAD(head); 1598 LIST_HEAD(head);
1599 struct nfs_commit_info cinfo;
1545 int may_wait = how & FLUSH_SYNC; 1600 int may_wait = how & FLUSH_SYNC;
1546 int res; 1601 int res;
1547 1602
1548 res = nfs_commit_set_lock(NFS_I(inode), may_wait); 1603 res = nfs_commit_set_lock(NFS_I(inode), may_wait);
1549 if (res <= 0) 1604 if (res <= 0)
1550 goto out_mark_dirty; 1605 goto out_mark_dirty;
1551 res = nfs_scan_commit(inode, &head); 1606 nfs_init_cinfo_from_inode(&cinfo, inode);
1607 res = nfs_scan_commit(inode, &head, &cinfo);
1552 if (res) { 1608 if (res) {
1553 int error; 1609 int error;
1554 1610
1555 error = pnfs_commit_list(inode, &head, how); 1611 error = nfs_generic_commit_list(inode, &head, how, &cinfo);
1556 if (error == PNFS_NOT_ATTEMPTED)
1557 error = nfs_commit_list(inode, &head, how);
1558 if (error < 0) 1612 if (error < 0)
1559 return error; 1613 return error;
1560 if (!may_wait) 1614 if (!may_wait)
@@ -1585,14 +1639,14 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
1585 int ret = 0; 1639 int ret = 0;
1586 1640
1587 /* no commits means nothing needs to be done */ 1641 /* no commits means nothing needs to be done */
1588 if (!nfsi->ncommit) 1642 if (!nfsi->commit_info.ncommit)
1589 return ret; 1643 return ret;
1590 1644
1591 if (wbc->sync_mode == WB_SYNC_NONE) { 1645 if (wbc->sync_mode == WB_SYNC_NONE) {
1592 /* Don't commit yet if this is a non-blocking flush and there 1646 /* Don't commit yet if this is a non-blocking flush and there
1593 * are a lot of outstanding writes for this mapping. 1647 * are a lot of outstanding writes for this mapping.
1594 */ 1648 */
1595 if (nfsi->ncommit <= (nfsi->npages >> 1)) 1649 if (nfsi->commit_info.ncommit <= (nfsi->npages >> 1))
1596 goto out_mark_dirty; 1650 goto out_mark_dirty;
1597 1651
1598 /* don't wait for the COMMIT response */ 1652 /* don't wait for the COMMIT response */
@@ -1665,7 +1719,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
1665 req = nfs_page_find_request(page); 1719 req = nfs_page_find_request(page);
1666 if (req == NULL) 1720 if (req == NULL)
1667 break; 1721 break;
1668 if (nfs_lock_request_dontget(req)) { 1722 if (nfs_lock_request(req)) {
1669 nfs_clear_request_commit(req); 1723 nfs_clear_request_commit(req);
1670 nfs_inode_remove_request(req); 1724 nfs_inode_remove_request(req);
1671 /* 1725 /*
@@ -1673,7 +1727,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
1673 * page as being dirty 1727 * page as being dirty
1674 */ 1728 */
1675 cancel_dirty_page(page, PAGE_CACHE_SIZE); 1729 cancel_dirty_page(page, PAGE_CACHE_SIZE);
1676 nfs_unlock_request(req); 1730 nfs_unlock_and_release_request(req);
1677 break; 1731 break;
1678 } 1732 }
1679 ret = nfs_wait_on_request(req); 1733 ret = nfs_wait_on_request(req);
@@ -1742,7 +1796,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
1742int __init nfs_init_writepagecache(void) 1796int __init nfs_init_writepagecache(void)
1743{ 1797{
1744 nfs_wdata_cachep = kmem_cache_create("nfs_write_data", 1798 nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
1745 sizeof(struct nfs_write_data), 1799 sizeof(struct nfs_write_header),
1746 0, SLAB_HWCACHE_ALIGN, 1800 0, SLAB_HWCACHE_ALIGN,
1747 NULL); 1801 NULL);
1748 if (nfs_wdata_cachep == NULL) 1802 if (nfs_wdata_cachep == NULL)
@@ -1753,6 +1807,13 @@ int __init nfs_init_writepagecache(void)
1753 if (nfs_wdata_mempool == NULL) 1807 if (nfs_wdata_mempool == NULL)
1754 return -ENOMEM; 1808 return -ENOMEM;
1755 1809
1810 nfs_cdata_cachep = kmem_cache_create("nfs_commit_data",
1811 sizeof(struct nfs_commit_data),
1812 0, SLAB_HWCACHE_ALIGN,
1813 NULL);
1814 if (nfs_cdata_cachep == NULL)
1815 return -ENOMEM;
1816
1756 nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT, 1817 nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,
1757 nfs_wdata_cachep); 1818 nfs_wdata_cachep);
1758 if (nfs_commit_mempool == NULL) 1819 if (nfs_commit_mempool == NULL)
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 8f7b95ac1f7e..7cc64465ec26 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -734,7 +734,7 @@ void nilfs_evict_inode(struct inode *inode)
734 if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) { 734 if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) {
735 if (inode->i_data.nrpages) 735 if (inode->i_data.nrpages)
736 truncate_inode_pages(&inode->i_data, 0); 736 truncate_inode_pages(&inode->i_data, 0);
737 end_writeback(inode); 737 clear_inode(inode);
738 nilfs_clear_inode(inode); 738 nilfs_clear_inode(inode);
739 return; 739 return;
740 } 740 }
@@ -746,7 +746,7 @@ void nilfs_evict_inode(struct inode *inode)
746 /* TODO: some of the following operations may fail. */ 746 /* TODO: some of the following operations may fail. */
747 nilfs_truncate_bmap(ii, 0); 747 nilfs_truncate_bmap(ii, 0);
748 nilfs_mark_inode_dirty(inode); 748 nilfs_mark_inode_dirty(inode);
749 end_writeback(inode); 749 clear_inode(inode);
750 750
751 ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino); 751 ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino);
752 if (!ret) 752 if (!ret)
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 2eaa66652944..c6dbd3db6ca8 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2258,7 +2258,7 @@ void ntfs_evict_big_inode(struct inode *vi)
2258 ntfs_inode *ni = NTFS_I(vi); 2258 ntfs_inode *ni = NTFS_I(vi);
2259 2259
2260 truncate_inode_pages(&vi->i_data, 0); 2260 truncate_inode_pages(&vi->i_data, 0);
2261 end_writeback(vi); 2261 clear_inode(vi);
2262 2262
2263#ifdef NTFS_RW 2263#ifdef NTFS_RW
2264 if (NInoDirty(ni)) { 2264 if (NInoDirty(ni)) {
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 3b5825ef3193..e31d6ae013ab 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -367,7 +367,7 @@ static void dlmfs_evict_inode(struct inode *inode)
367 int status; 367 int status;
368 struct dlmfs_inode_private *ip; 368 struct dlmfs_inode_private *ip;
369 369
370 end_writeback(inode); 370 clear_inode(inode);
371 371
372 mlog(0, "inode %lu\n", inode->i_ino); 372 mlog(0, "inode %lu\n", inode->i_ino);
373 373
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 17454a904d7b..735514ca400f 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1069,7 +1069,7 @@ static void ocfs2_clear_inode(struct inode *inode)
1069 int status; 1069 int status;
1070 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1070 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1071 1071
1072 end_writeback(inode); 1072 clear_inode(inode);
1073 trace_ocfs2_clear_inode((unsigned long long)oi->ip_blkno, 1073 trace_ocfs2_clear_inode((unsigned long long)oi->ip_blkno,
1074 inode->i_nlink); 1074 inode->i_nlink);
1075 1075
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index dbc842222589..e6213b3725d1 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -184,7 +184,7 @@ int omfs_sync_inode(struct inode *inode)
184static void omfs_evict_inode(struct inode *inode) 184static void omfs_evict_inode(struct inode *inode)
185{ 185{
186 truncate_inode_pages(&inode->i_data, 0); 186 truncate_inode_pages(&inode->i_data, 0);
187 end_writeback(inode); 187 clear_inode(inode);
188 188
189 if (inode->i_nlink) 189 if (inode->i_nlink)
190 return; 190 return;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index d2d3108a611c..d7d711876b6a 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -411,12 +411,13 @@ static const struct file_operations proc_lstats_operations = {
411 411
412static int proc_oom_score(struct task_struct *task, char *buffer) 412static int proc_oom_score(struct task_struct *task, char *buffer)
413{ 413{
414 unsigned long totalpages = totalram_pages + total_swap_pages;
414 unsigned long points = 0; 415 unsigned long points = 0;
415 416
416 read_lock(&tasklist_lock); 417 read_lock(&tasklist_lock);
417 if (pid_alive(task)) 418 if (pid_alive(task))
418 points = oom_badness(task, NULL, NULL, 419 points = oom_badness(task, NULL, NULL, totalpages) *
419 totalram_pages + total_swap_pages); 420 1000 / totalpages;
420 read_unlock(&tasklist_lock); 421 read_unlock(&tasklist_lock);
421 return sprintf(buffer, "%lu\n", points); 422 return sprintf(buffer, "%lu\n", points);
422} 423}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 554ecc54799f..7ac817b64a71 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -33,7 +33,7 @@ static void proc_evict_inode(struct inode *inode)
33 const struct proc_ns_operations *ns_ops; 33 const struct proc_ns_operations *ns_ops;
34 34
35 truncate_inode_pages(&inode->i_data, 0); 35 truncate_inode_pages(&inode->i_data, 0);
36 end_writeback(inode); 36 clear_inode(inode);
37 37
38 /* Stop tracking associated processes */ 38 /* Stop tracking associated processes */
39 put_pid(PROC_I(inode)->pid); 39 put_pid(PROC_I(inode)->pid);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 1030a716d155..7faaf2acc570 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -784,7 +784,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
784 784
785 /* find the first VMA at or above 'addr' */ 785 /* find the first VMA at or above 'addr' */
786 vma = find_vma(walk->mm, addr); 786 vma = find_vma(walk->mm, addr);
787 if (pmd_trans_huge_lock(pmd, vma) == 1) { 787 if (vma && pmd_trans_huge_lock(pmd, vma) == 1) {
788 for (; addr != end; addr += PAGE_SIZE) { 788 for (; addr != end; addr += PAGE_SIZE) {
789 unsigned long offset; 789 unsigned long offset;
790 790
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 19507889bb7f..aeb19e68e086 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -85,7 +85,7 @@ static void pstore_evict_inode(struct inode *inode)
85 struct pstore_private *p = inode->i_private; 85 struct pstore_private *p = inode->i_private;
86 unsigned long flags; 86 unsigned long flags;
87 87
88 end_writeback(inode); 88 clear_inode(inode);
89 if (p) { 89 if (p) {
90 spin_lock_irqsave(&allpstore_lock, flags); 90 spin_lock_irqsave(&allpstore_lock, flags);
91 list_del(&p->list); 91 list_del(&p->list);
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index d69a1d1d7e15..10cbe841cb7e 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -116,15 +116,15 @@
116 * spinlock to internal buffers before writing. 116 * spinlock to internal buffers before writing.
117 * 117 *
118 * Lock ordering (including related VFS locks) is the following: 118 * Lock ordering (including related VFS locks) is the following:
119 * i_mutex > dqonoff_sem > journal_lock > dqptr_sem > dquot->dq_lock > 119 * dqonoff_mutex > i_mutex > journal_lock > dqptr_sem > dquot->dq_lock >
120 * dqio_mutex 120 * dqio_mutex
121 * dqonoff_mutex > i_mutex comes from dquot_quota_sync, dquot_enable, etc.
121 * The lock ordering of dqptr_sem imposed by quota code is only dqonoff_sem > 122 * The lock ordering of dqptr_sem imposed by quota code is only dqonoff_sem >
122 * dqptr_sem. But filesystem has to count with the fact that functions such as 123 * dqptr_sem. But filesystem has to count with the fact that functions such as
123 * dquot_alloc_space() acquire dqptr_sem and they usually have to be called 124 * dquot_alloc_space() acquire dqptr_sem and they usually have to be called
124 * from inside a transaction to keep filesystem consistency after a crash. Also 125 * from inside a transaction to keep filesystem consistency after a crash. Also
125 * filesystems usually want to do some IO on dquot from ->mark_dirty which is 126 * filesystems usually want to do some IO on dquot from ->mark_dirty which is
126 * called with dqptr_sem held. 127 * called with dqptr_sem held.
127 * i_mutex on quota files is special (it's below dqio_mutex)
128 */ 128 */
129 129
130static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock); 130static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock);
@@ -638,7 +638,7 @@ int dquot_quota_sync(struct super_block *sb, int type, int wait)
638 dqstats_inc(DQST_SYNCS); 638 dqstats_inc(DQST_SYNCS);
639 mutex_unlock(&dqopt->dqonoff_mutex); 639 mutex_unlock(&dqopt->dqonoff_mutex);
640 640
641 if (!wait || (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE)) 641 if (!wait || (dqopt->flags & DQUOT_QUOTA_SYS_FILE))
642 return 0; 642 return 0;
643 643
644 /* This is not very clever (and fast) but currently I don't know about 644 /* This is not very clever (and fast) but currently I don't know about
@@ -652,18 +652,17 @@ int dquot_quota_sync(struct super_block *sb, int type, int wait)
652 * Now when everything is written we can discard the pagecache so 652 * Now when everything is written we can discard the pagecache so
653 * that userspace sees the changes. 653 * that userspace sees the changes.
654 */ 654 */
655 mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); 655 mutex_lock(&dqopt->dqonoff_mutex);
656 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 656 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
657 if (type != -1 && cnt != type) 657 if (type != -1 && cnt != type)
658 continue; 658 continue;
659 if (!sb_has_quota_active(sb, cnt)) 659 if (!sb_has_quota_active(sb, cnt))
660 continue; 660 continue;
661 mutex_lock_nested(&sb_dqopt(sb)->files[cnt]->i_mutex, 661 mutex_lock(&dqopt->files[cnt]->i_mutex);
662 I_MUTEX_QUOTA); 662 truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
663 truncate_inode_pages(&sb_dqopt(sb)->files[cnt]->i_data, 0); 663 mutex_unlock(&dqopt->files[cnt]->i_mutex);
664 mutex_unlock(&sb_dqopt(sb)->files[cnt]->i_mutex);
665 } 664 }
666 mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); 665 mutex_unlock(&dqopt->dqonoff_mutex);
667 666
668 return 0; 667 return 0;
669} 668}
@@ -907,14 +906,14 @@ static void add_dquot_ref(struct super_block *sb, int type)
907 spin_unlock(&inode->i_lock); 906 spin_unlock(&inode->i_lock);
908 continue; 907 continue;
909 } 908 }
910#ifdef CONFIG_QUOTA_DEBUG
911 if (unlikely(inode_get_rsv_space(inode) > 0))
912 reserved = 1;
913#endif
914 __iget(inode); 909 __iget(inode);
915 spin_unlock(&inode->i_lock); 910 spin_unlock(&inode->i_lock);
916 spin_unlock(&inode_sb_list_lock); 911 spin_unlock(&inode_sb_list_lock);
917 912
913#ifdef CONFIG_QUOTA_DEBUG
914 if (unlikely(inode_get_rsv_space(inode) > 0))
915 reserved = 1;
916#endif
918 iput(old_inode); 917 iput(old_inode);
919 __dquot_initialize(inode, type); 918 __dquot_initialize(inode, type);
920 919
@@ -2037,8 +2036,7 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags)
2037 /* If quota was reenabled in the meantime, we have 2036 /* If quota was reenabled in the meantime, we have
2038 * nothing to do */ 2037 * nothing to do */
2039 if (!sb_has_quota_loaded(sb, cnt)) { 2038 if (!sb_has_quota_loaded(sb, cnt)) {
2040 mutex_lock_nested(&toputinode[cnt]->i_mutex, 2039 mutex_lock(&toputinode[cnt]->i_mutex);
2041 I_MUTEX_QUOTA);
2042 toputinode[cnt]->i_flags &= ~(S_IMMUTABLE | 2040 toputinode[cnt]->i_flags &= ~(S_IMMUTABLE |
2043 S_NOATIME | S_NOQUOTA); 2041 S_NOATIME | S_NOQUOTA);
2044 truncate_inode_pages(&toputinode[cnt]->i_data, 2042 truncate_inode_pages(&toputinode[cnt]->i_data,
@@ -2133,7 +2131,7 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
2133 /* We don't want quota and atime on quota files (deadlocks 2131 /* We don't want quota and atime on quota files (deadlocks
2134 * possible) Also nobody should write to the file - we use 2132 * possible) Also nobody should write to the file - we use
2135 * special IO operations which ignore the immutable bit. */ 2133 * special IO operations which ignore the immutable bit. */
2136 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); 2134 mutex_lock(&inode->i_mutex);
2137 oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE | 2135 oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE |
2138 S_NOQUOTA); 2136 S_NOQUOTA);
2139 inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE; 2137 inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE;
@@ -2180,7 +2178,7 @@ out_file_init:
2180 iput(inode); 2178 iput(inode);
2181out_lock: 2179out_lock:
2182 if (oldflags != -1) { 2180 if (oldflags != -1) {
2183 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); 2181 mutex_lock(&inode->i_mutex);
2184 /* Set the flags back (in the case of accidental quotaon() 2182 /* Set the flags back (in the case of accidental quotaon()
2185 * on a wrong file we don't want to mess up the flags) */ 2183 * on a wrong file we don't want to mess up the flags) */
2186 inode->i_flags &= ~(S_NOATIME | S_NOQUOTA | S_IMMUTABLE); 2184 inode->i_flags &= ~(S_NOATIME | S_NOQUOTA | S_IMMUTABLE);
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 494c315c7417..59d06871a850 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -76,14 +76,14 @@ void reiserfs_evict_inode(struct inode *inode)
76 ; 76 ;
77 } 77 }
78 out: 78 out:
79 end_writeback(inode); /* note this must go after the journal_end to prevent deadlock */ 79 clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */
80 dquot_drop(inode); 80 dquot_drop(inode);
81 inode->i_blocks = 0; 81 inode->i_blocks = 0;
82 reiserfs_write_unlock_once(inode->i_sb, depth); 82 reiserfs_write_unlock_once(inode->i_sb, depth);
83 return; 83 return;
84 84
85no_delete: 85no_delete:
86 end_writeback(inode); 86 clear_inode(inode);
87 dquot_drop(inode); 87 dquot_drop(inode);
88} 88}
89 89
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 8b7616ef06d8..c07b7d709447 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2270,7 +2270,6 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
2270 (unsigned long long)off, (unsigned long long)len); 2270 (unsigned long long)off, (unsigned long long)len);
2271 return -EIO; 2271 return -EIO;
2272 } 2272 }
2273 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
2274 while (towrite > 0) { 2273 while (towrite > 0) {
2275 tocopy = sb->s_blocksize - offset < towrite ? 2274 tocopy = sb->s_blocksize - offset < towrite ?
2276 sb->s_blocksize - offset : towrite; 2275 sb->s_blocksize - offset : towrite;
@@ -2302,16 +2301,13 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
2302 blk++; 2301 blk++;
2303 } 2302 }
2304out: 2303out:
2305 if (len == towrite) { 2304 if (len == towrite)
2306 mutex_unlock(&inode->i_mutex);
2307 return err; 2305 return err;
2308 }
2309 if (inode->i_size < off + len - towrite) 2306 if (inode->i_size < off + len - towrite)
2310 i_size_write(inode, off + len - towrite); 2307 i_size_write(inode, off + len - towrite);
2311 inode->i_version++; 2308 inode->i_version++;
2312 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 2309 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
2313 mark_inode_dirty(inode); 2310 mark_inode_dirty(inode);
2314 mutex_unlock(&inode->i_mutex);
2315 return len - towrite; 2311 return len - towrite;
2316} 2312}
2317 2313
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 907c2b3af758..0ce3ccf7f401 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -310,7 +310,7 @@ void sysfs_evict_inode(struct inode *inode)
310 struct sysfs_dirent *sd = inode->i_private; 310 struct sysfs_dirent *sd = inode->i_private;
311 311
312 truncate_inode_pages(&inode->i_data, 0); 312 truncate_inode_pages(&inode->i_data, 0);
313 end_writeback(inode); 313 clear_inode(inode);
314 sysfs_put(sd); 314 sysfs_put(sd);
315} 315}
316 316
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 3da5ce25faf0..08d0b2568cd3 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -316,7 +316,7 @@ static void sysv_evict_inode(struct inode *inode)
316 sysv_truncate(inode); 316 sysv_truncate(inode);
317 } 317 }
318 invalidate_inode_buffers(inode); 318 invalidate_inode_buffers(inode);
319 end_writeback(inode); 319 clear_inode(inode);
320 if (!inode->i_nlink) 320 if (!inode->i_nlink)
321 sysv_free_inode(inode); 321 sysv_free_inode(inode);
322} 322}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 001acccac0d6..5862dd9d2784 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -378,7 +378,7 @@ out:
378 smp_wmb(); 378 smp_wmb();
379 } 379 }
380done: 380done:
381 end_writeback(inode); 381 clear_inode(inode);
382} 382}
383 383
384static void ubifs_dirty_inode(struct inode *inode, int flags) 384static void ubifs_dirty_inode(struct inode *inode, int flags)
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 7d7528008359..873e1bab9c4c 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -80,7 +80,7 @@ void udf_evict_inode(struct inode *inode)
80 } else 80 } else
81 truncate_inode_pages(&inode->i_data, 0); 81 truncate_inode_pages(&inode->i_data, 0);
82 invalidate_inode_buffers(inode); 82 invalidate_inode_buffers(inode);
83 end_writeback(inode); 83 clear_inode(inode);
84 if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB && 84 if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB &&
85 inode->i_size != iinfo->i_lenExtents) { 85 inode->i_size != iinfo->i_lenExtents) {
86 udf_warn(inode->i_sb, "Inode %lu (mode %o) has inode size %llu different from extent length %llu. Filesystem need not be standards compliant.\n", 86 udf_warn(inode->i_sb, "Inode %lu (mode %o) has inode size %llu different from extent length %llu. Filesystem need not be standards compliant.\n",
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 7cdd3953d67e..dd7c89d8a1c1 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -895,7 +895,7 @@ void ufs_evict_inode(struct inode * inode)
895 } 895 }
896 896
897 invalidate_inode_buffers(inode); 897 invalidate_inode_buffers(inode);
898 end_writeback(inode); 898 clear_inode(inode);
899 899
900 if (want_delete) { 900 if (want_delete) {
901 lock_ufs(inode->i_sb); 901 lock_ufs(inode->i_sb);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 2fcfd5b0b046..0d9de41a7151 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -932,7 +932,7 @@ xfs_fs_evict_inode(
932 trace_xfs_evict_inode(ip); 932 trace_xfs_evict_inode(ip);
933 933
934 truncate_inode_pages(&inode->i_data, 0); 934 truncate_inode_pages(&inode->i_data, 0);
935 end_writeback(inode); 935 clear_inode(inode);
936 XFS_STATS_INC(vn_rele); 936 XFS_STATS_INC(vn_rele);
937 XFS_STATS_INC(vn_remove); 937 XFS_STATS_INC(vn_remove);
938 XFS_STATS_DEC(vn_active); 938 XFS_STATS_DEC(vn_active);