aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_super.c3
-rw-r--r--fs/Kconfig18
-rw-r--r--fs/adfs/adfs.h55
-rw-r--r--fs/adfs/dir.c8
-rw-r--r--fs/adfs/dir_f.c8
-rw-r--r--fs/adfs/dir_fplus.c8
-rw-r--r--fs/adfs/file.c4
-rw-r--r--fs/adfs/inode.c10
-rw-r--r--fs/adfs/map.c6
-rw-r--r--fs/adfs/super.c17
-rw-r--r--fs/afs/misc.c16
-rw-r--r--fs/afs/vlocation.c2
-rw-r--r--fs/anon_inodes.c15
-rw-r--r--fs/befs/linuxvfs.c24
-rw-r--r--fs/binfmt_elf.c8
-rw-r--r--fs/binfmt_elf_fdpic.c8
-rw-r--r--fs/bio.c5
-rw-r--r--fs/btrfs/acl.c44
-rw-r--r--fs/btrfs/btrfs_inode.h4
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/disk-io.c26
-rw-r--r--fs/btrfs/inode.c18
-rw-r--r--fs/btrfs/transaction.c12
-rw-r--r--fs/compat.c2
-rw-r--r--fs/compat_ioctl.c108
-rw-r--r--fs/debugfs/file.c65
-rw-r--r--fs/debugfs/inode.c11
-rw-r--r--fs/devpts/inode.c10
-rw-r--r--fs/drop_caches.c2
-rw-r--r--fs/efs/dir.c5
-rw-r--r--fs/efs/namei.c9
-rw-r--r--fs/efs/symlink.c7
-rw-r--r--fs/eventpoll.c21
-rw-r--r--fs/ext2/acl.c81
-rw-r--r--fs/ext2/acl.h4
-rw-r--r--fs/ext2/dir.c5
-rw-r--r--fs/ext2/ext2.h8
-rw-r--r--fs/ext2/inode.c4
-rw-r--r--fs/ext2/namei.c5
-rw-r--r--fs/ext2/super.c16
-rw-r--r--fs/ext3/acl.c82
-rw-r--r--fs/ext3/acl.h4
-rw-r--r--fs/ext3/inode.c26
-rw-r--r--fs/ext3/resize.c2
-rw-r--r--fs/ext3/super.c18
-rw-r--r--fs/ext4/Makefile2
-rw-r--r--fs/ext4/acl.c64
-rw-r--r--fs/ext4/acl.h4
-rw-r--r--fs/ext4/ext4.h43
-rw-r--r--fs/ext4/ext4_extents.h4
-rw-r--r--fs/ext4/extents.c4
-rw-r--r--fs/ext4/file.c36
-rw-r--r--fs/ext4/fsync.c8
-rw-r--r--fs/ext4/ialloc.c48
-rw-r--r--fs/ext4/inode.c285
-rw-r--r--fs/ext4/ioctl.c36
-rw-r--r--fs/ext4/mballoc.c85
-rw-r--r--fs/ext4/mballoc.h1
-rw-r--r--fs/ext4/migrate.c8
-rw-r--r--fs/ext4/move_extent.c1320
-rw-r--r--fs/ext4/namei.c10
-rw-r--r--fs/ext4/resize.c2
-rw-r--r--fs/ext4/super.c46
-rw-r--r--fs/fat/cache.c6
-rw-r--r--fs/fat/dir.c31
-rw-r--r--fs/fat/fat.h7
-rw-r--r--fs/fat/fatent.c4
-rw-r--r--fs/fat/file.c184
-rw-r--r--fs/fat/inode.c30
-rw-r--r--fs/fat/misc.c22
-rw-r--r--fs/fat/namei_msdos.c2
-rw-r--r--fs/fat/namei_vfat.c6
-rw-r--r--fs/fcntl.c33
-rw-r--r--fs/fs-writeback.c106
-rw-r--r--fs/fuse/inode.c3
-rw-r--r--fs/gfs2/Kconfig2
-rw-r--r--fs/inode.c29
-rw-r--r--fs/ioctl.c37
-rw-r--r--fs/isofs/dir.c5
-rw-r--r--fs/isofs/inode.c118
-rw-r--r--fs/isofs/isofs.h27
-rw-r--r--fs/isofs/joliet.c36
-rw-r--r--fs/isofs/namei.c4
-rw-r--r--fs/jbd/transaction.c48
-rw-r--r--fs/jbd2/checkpoint.c5
-rw-r--r--fs/jbd2/commit.c13
-rw-r--r--fs/jbd2/journal.c69
-rw-r--r--fs/jbd2/transaction.c49
-rw-r--r--fs/jffs2/acl.c87
-rw-r--r--fs/jffs2/acl.h4
-rw-r--r--fs/jffs2/jffs2_fs_i.h4
-rw-r--r--fs/jffs2/os-linux.h4
-rw-r--r--fs/jffs2/readinode.c1
-rw-r--r--fs/jffs2/scan.c4
-rw-r--r--fs/jfs/acl.c47
-rw-r--r--fs/jfs/jfs_extent.c1
-rw-r--r--fs/jfs/jfs_incore.h6
-rw-r--r--fs/jfs/super.c16
-rw-r--r--fs/jfs/xattr.c10
-rw-r--r--fs/lockd/clntproc.c4
-rw-r--r--fs/lockd/mon.c19
-rw-r--r--fs/lockd/svclock.c2
-rw-r--r--fs/locks.c3
-rw-r--r--fs/minix/bitmap.c25
-rw-r--r--fs/minix/dir.c5
-rw-r--r--fs/minix/inode.c4
-rw-r--r--fs/minix/minix.h5
-rw-r--r--fs/namei.c11
-rw-r--r--fs/namespace.c90
-rw-r--r--fs/ncpfs/ncplib_kernel.c8
-rw-r--r--fs/nfs/Kconfig11
-rw-r--r--fs/nfs/callback.c218
-rw-r--r--fs/nfs/callback.h68
-rw-r--r--fs/nfs/callback_proc.c127
-rw-r--r--fs/nfs/callback_xdr.c280
-rw-r--r--fs/nfs/client.c191
-rw-r--r--fs/nfs/delegation.c32
-rw-r--r--fs/nfs/direct.c9
-rw-r--r--fs/nfs/file.c37
-rw-r--r--fs/nfs/internal.h70
-rw-r--r--fs/nfs/iostat.h6
-rw-r--r--fs/nfs/mount_clnt.c337
-rw-r--r--fs/nfs/namespace.c5
-rw-r--r--fs/nfs/nfs3acl.c2
-rw-r--r--fs/nfs/nfs4_fs.h37
-rw-r--r--fs/nfs/nfs4proc.c1348
-rw-r--r--fs/nfs/nfs4renewd.c6
-rw-r--r--fs/nfs/nfs4state.c190
-rw-r--r--fs/nfs/nfs4xdr.c1072
-rw-r--r--fs/nfs/nfsroot.c5
-rw-r--r--fs/nfs/read.c33
-rw-r--r--fs/nfs/super.c497
-rw-r--r--fs/nfs/unlink.c20
-rw-r--r--fs/nfs/write.c31
-rw-r--r--fs/nfsd/export.c13
-rw-r--r--fs/nfsd/nfs3proc.c237
-rw-r--r--fs/nfsd/nfs3xdr.c1
-rw-r--r--fs/nfsd/nfs4callback.c247
-rw-r--r--fs/nfsd/nfs4proc.c129
-rw-r--r--fs/nfsd/nfs4state.c171
-rw-r--r--fs/nfsd/nfs4xdr.c296
-rw-r--r--fs/nfsd/nfscache.c33
-rw-r--r--fs/nfsd/nfsctl.c294
-rw-r--r--fs/nfsd/nfsfh.c6
-rw-r--r--fs/nfsd/nfsproc.c198
-rw-r--r--fs/nfsd/nfssvc.c12
-rw-r--r--fs/nfsd/vfs.c93
-rw-r--r--fs/nilfs2/bmap.c272
-rw-r--r--fs/nilfs2/bmap.h135
-rw-r--r--fs/nilfs2/btnode.c9
-rw-r--r--fs/nilfs2/btnode.h2
-rw-r--r--fs/nilfs2/btree.c366
-rw-r--r--fs/nilfs2/btree.h31
-rw-r--r--fs/nilfs2/cpfile.c47
-rw-r--r--fs/nilfs2/cpfile.h4
-rw-r--r--fs/nilfs2/dat.c36
-rw-r--r--fs/nilfs2/dat.h2
-rw-r--r--fs/nilfs2/direct.c139
-rw-r--r--fs/nilfs2/direct.h20
-rw-r--r--fs/nilfs2/gcinode.c5
-rw-r--r--fs/nilfs2/inode.c26
-rw-r--r--fs/nilfs2/ioctl.c35
-rw-r--r--fs/nilfs2/mdt.c3
-rw-r--r--fs/nilfs2/nilfs.h5
-rw-r--r--fs/nilfs2/recovery.c37
-rw-r--r--fs/nilfs2/segbuf.c3
-rw-r--r--fs/nilfs2/seglist.h85
-rw-r--r--fs/nilfs2/segment.c130
-rw-r--r--fs/nilfs2/segment.h12
-rw-r--r--fs/nilfs2/sufile.c119
-rw-r--r--fs/nilfs2/sufile.h62
-rw-r--r--fs/nilfs2/super.c19
-rw-r--r--fs/nilfs2/the_nilfs.c1
-rw-r--r--fs/nls/nls_base.c166
-rw-r--r--fs/nls/nls_utf8.c13
-rw-r--r--fs/notify/inotify/inotify.h3
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c2
-rw-r--r--fs/notify/inotify/inotify_user.c32
-rw-r--r--fs/ntfs/inode.c3
-rw-r--r--fs/ntfs/logfile.c3
-rw-r--r--fs/ocfs2/alloc.c80
-rw-r--r--fs/ocfs2/blockcheck.c184
-rw-r--r--fs/ocfs2/blockcheck.h29
-rw-r--r--fs/ocfs2/cluster/masklog.h35
-rw-r--r--fs/ocfs2/cluster/tcp.c7
-rw-r--r--fs/ocfs2/dir.c21
-rw-r--r--fs/ocfs2/dlmglue.c144
-rw-r--r--fs/ocfs2/dlmglue.h31
-rw-r--r--fs/ocfs2/file.c62
-rw-r--r--fs/ocfs2/inode.c11
-rw-r--r--fs/ocfs2/journal.c126
-rw-r--r--fs/ocfs2/journal.h4
-rw-r--r--fs/ocfs2/namei.c15
-rw-r--r--fs/ocfs2/ocfs2.h26
-rw-r--r--fs/ocfs2/ocfs2_lockid.h5
-rw-r--r--fs/ocfs2/quota_global.c4
-rw-r--r--fs/ocfs2/quota_local.c21
-rw-r--r--fs/ocfs2/stack_o2cb.c11
-rw-r--r--fs/ocfs2/stack_user.c8
-rw-r--r--fs/ocfs2/stackglue.c13
-rw-r--r--fs/ocfs2/stackglue.h6
-rw-r--r--fs/ocfs2/suballoc.c28
-rw-r--r--fs/ocfs2/super.c103
-rw-r--r--fs/ocfs2/sysfile.c19
-rw-r--r--fs/ocfs2/xattr.c5
-rw-r--r--fs/open.c58
-rw-r--r--fs/proc/Makefile1
-rw-r--r--fs/proc/base.c19
-rw-r--r--fs/proc/meminfo.c4
-rw-r--r--fs/proc/page.c162
-rw-r--r--fs/proc/proc_devtree.c10
-rw-r--r--fs/proc/softirqs.c44
-rw-r--r--fs/proc/stat.c15
-rw-r--r--fs/proc/vmcore.c7
-rw-r--r--fs/ramfs/inode.c9
-rw-r--r--fs/reiserfs/do_balan.c5
-rw-r--r--fs/reiserfs/inode.c4
-rw-r--r--fs/reiserfs/lbalance.c10
-rw-r--r--fs/reiserfs/resize.c1
-rw-r--r--fs/reiserfs/super.c24
-rw-r--r--fs/reiserfs/xattr_acl.c58
-rw-r--r--fs/select.c40
-rw-r--r--fs/seq_file.c20
-rw-r--r--fs/super.c21
-rw-r--r--fs/sysfs/symlink.c5
-rw-r--r--fs/sysv/dir.c5
-rw-r--r--fs/sysv/inode.c11
-rw-r--r--fs/ubifs/budget.c4
-rw-r--r--fs/ubifs/dir.c19
-rw-r--r--fs/ubifs/io.c34
-rw-r--r--fs/ubifs/recovery.c31
-rw-r--r--fs/ubifs/super.c76
-rw-r--r--fs/ubifs/ubifs.h13
-rw-r--r--fs/ubifs/xattr.c2
-rw-r--r--fs/udf/balloc.c9
-rw-r--r--fs/udf/lowlevel.c7
-rw-r--r--fs/ufs/inode.c10
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c73
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c2
-rw-r--r--fs/xfs/xfs_acl.h4
-rw-r--r--fs/xfs/xfs_iget.c2
-rw-r--r--fs/xfs/xfs_inode.h5
243 files changed, 10040 insertions, 4031 deletions
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index ab5547ff29a1..38d695d66a0b 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -37,7 +37,6 @@
37#include <linux/mount.h> 37#include <linux/mount.h>
38#include <linux/idr.h> 38#include <linux/idr.h>
39#include <linux/sched.h> 39#include <linux/sched.h>
40#include <linux/smp_lock.h>
41#include <net/9p/9p.h> 40#include <net/9p/9p.h>
42#include <net/9p/client.h> 41#include <net/9p/client.h>
43 42
@@ -231,10 +230,8 @@ v9fs_umount_begin(struct super_block *sb)
231{ 230{
232 struct v9fs_session_info *v9ses; 231 struct v9fs_session_info *v9ses;
233 232
234 lock_kernel();
235 v9ses = sb->s_fs_info; 233 v9ses = sb->s_fs_info;
236 v9fs_session_cancel(v9ses); 234 v9fs_session_cancel(v9ses);
237 unlock_kernel();
238} 235}
239 236
240static const struct super_operations v9fs_super_ops = { 237static const struct super_operations v9fs_super_ops = {
diff --git a/fs/Kconfig b/fs/Kconfig
index 525da2e8f73b..a97263be6a91 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -39,6 +39,13 @@ config FS_POSIX_ACL
39 bool 39 bool
40 default n 40 default n
41 41
42source "fs/xfs/Kconfig"
43source "fs/gfs2/Kconfig"
44source "fs/ocfs2/Kconfig"
45source "fs/btrfs/Kconfig"
46
47endif # BLOCK
48
42config FILE_LOCKING 49config FILE_LOCKING
43 bool "Enable POSIX file locking API" if EMBEDDED 50 bool "Enable POSIX file locking API" if EMBEDDED
44 default y 51 default y
@@ -47,13 +54,6 @@ config FILE_LOCKING
47 for filesystems like NFS and for the flock() system 54 for filesystems like NFS and for the flock() system
48 call. Disabling this option saves about 11k. 55 call. Disabling this option saves about 11k.
49 56
50source "fs/xfs/Kconfig"
51source "fs/gfs2/Kconfig"
52source "fs/ocfs2/Kconfig"
53source "fs/btrfs/Kconfig"
54
55endif # BLOCK
56
57source "fs/notify/Kconfig" 57source "fs/notify/Kconfig"
58 58
59source "fs/quota/Kconfig" 59source "fs/quota/Kconfig"
@@ -134,7 +134,7 @@ config TMPFS_POSIX_ACL
134config HUGETLBFS 134config HUGETLBFS
135 bool "HugeTLB file system support" 135 bool "HugeTLB file system support"
136 depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || \ 136 depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || \
137 (S390 && 64BIT) || BROKEN 137 (S390 && 64BIT) || SYS_SUPPORTS_HUGETLBFS || BROKEN
138 help 138 help
139 hugetlbfs is a filesystem backing for HugeTLB pages, based on 139 hugetlbfs is a filesystem backing for HugeTLB pages, based on
140 ramfs. For architectures that support it, say Y here and read 140 ramfs. For architectures that support it, say Y here and read
@@ -236,10 +236,12 @@ source "fs/nfsd/Kconfig"
236 236
237config LOCKD 237config LOCKD
238 tristate 238 tristate
239 depends on FILE_LOCKING
239 240
240config LOCKD_V4 241config LOCKD_V4
241 bool 242 bool
242 depends on NFSD_V3 || NFS_V3 243 depends on NFSD_V3 || NFS_V3
244 depends on FILE_LOCKING
243 default y 245 default y
244 246
245config EXPORTFS 247config EXPORTFS
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index a6665f37f456..9cc18775b832 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -1,3 +1,6 @@
1#include <linux/fs.h>
2#include <linux/adfs_fs.h>
3
1/* Internal data structures for ADFS */ 4/* Internal data structures for ADFS */
2 5
3#define ADFS_FREE_FRAG 0 6#define ADFS_FREE_FRAG 0
@@ -17,6 +20,58 @@
17struct buffer_head; 20struct buffer_head;
18 21
19/* 22/*
23 * adfs file system inode data in memory
24 */
25struct adfs_inode_info {
26 loff_t mmu_private;
27 unsigned long parent_id; /* object id of parent */
28 __u32 loadaddr; /* RISC OS load address */
29 __u32 execaddr; /* RISC OS exec address */
30 unsigned int filetype; /* RISC OS file type */
31 unsigned int attr; /* RISC OS permissions */
32 unsigned int stamped:1; /* RISC OS file has date/time */
33 struct inode vfs_inode;
34};
35
36/*
37 * Forward-declare this
38 */
39struct adfs_discmap;
40struct adfs_dir_ops;
41
42/*
43 * ADFS file system superblock data in memory
44 */
45struct adfs_sb_info {
46 struct adfs_discmap *s_map; /* bh list containing map */
47 struct adfs_dir_ops *s_dir; /* directory operations */
48
49 uid_t s_uid; /* owner uid */
50 gid_t s_gid; /* owner gid */
51 umode_t s_owner_mask; /* ADFS owner perm -> unix perm */
52 umode_t s_other_mask; /* ADFS other perm -> unix perm */
53
54 __u32 s_ids_per_zone; /* max. no ids in one zone */
55 __u32 s_idlen; /* length of ID in map */
56 __u32 s_map_size; /* sector size of a map */
57 unsigned long s_size; /* total size (in blocks) of this fs */
58 signed int s_map2blk; /* shift left by this for map->sector */
59 unsigned int s_log2sharesize;/* log2 share size */
60 __le32 s_version; /* disc format version */
61 unsigned int s_namelen; /* maximum number of characters in name */
62};
63
64static inline struct adfs_sb_info *ADFS_SB(struct super_block *sb)
65{
66 return sb->s_fs_info;
67}
68
69static inline struct adfs_inode_info *ADFS_I(struct inode *inode)
70{
71 return container_of(inode, struct adfs_inode_info, vfs_inode);
72}
73
74/*
20 * Directory handling 75 * Directory handling
21 */ 76 */
22struct adfs_dir { 77struct adfs_dir {
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index 4d4073447d1a..23aa52f548a0 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -9,15 +9,7 @@
9 * 9 *
10 * Common directory handling for ADFS 10 * Common directory handling for ADFS
11 */ 11 */
12#include <linux/errno.h>
13#include <linux/fs.h>
14#include <linux/adfs_fs.h>
15#include <linux/time.h>
16#include <linux/stat.h>
17#include <linux/spinlock.h>
18#include <linux/smp_lock.h> 12#include <linux/smp_lock.h>
19#include <linux/buffer_head.h> /* for file_fsync() */
20
21#include "adfs.h" 13#include "adfs.h"
22 14
23/* 15/*
diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c
index 31df6adf0de6..bafc71222e25 100644
--- a/fs/adfs/dir_f.c
+++ b/fs/adfs/dir_f.c
@@ -9,15 +9,7 @@
9 * 9 *
10 * E and F format directory handling 10 * E and F format directory handling
11 */ 11 */
12#include <linux/errno.h>
13#include <linux/fs.h>
14#include <linux/adfs_fs.h>
15#include <linux/time.h>
16#include <linux/stat.h>
17#include <linux/spinlock.h>
18#include <linux/buffer_head.h> 12#include <linux/buffer_head.h>
19#include <linux/string.h>
20
21#include "adfs.h" 13#include "adfs.h"
22#include "dir_f.h" 14#include "dir_f.h"
23 15
diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c
index 139e0f345f18..1796bb352d05 100644
--- a/fs/adfs/dir_fplus.c
+++ b/fs/adfs/dir_fplus.c
@@ -7,15 +7,7 @@
7 * it under the terms of the GNU General Public License version 2 as 7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation. 8 * published by the Free Software Foundation.
9 */ 9 */
10#include <linux/errno.h>
11#include <linux/fs.h>
12#include <linux/adfs_fs.h>
13#include <linux/time.h>
14#include <linux/stat.h>
15#include <linux/spinlock.h>
16#include <linux/buffer_head.h> 10#include <linux/buffer_head.h>
17#include <linux/string.h>
18
19#include "adfs.h" 11#include "adfs.h"
20#include "dir_fplus.h" 12#include "dir_fplus.h"
21 13
diff --git a/fs/adfs/file.c b/fs/adfs/file.c
index 8224d54a2afb..005ea34d1758 100644
--- a/fs/adfs/file.c
+++ b/fs/adfs/file.c
@@ -19,10 +19,6 @@
19 * 19 *
20 * adfs regular file handling primitives 20 * adfs regular file handling primitives
21 */ 21 */
22#include <linux/fs.h>
23#include <linux/buffer_head.h> /* for file_fsync() */
24#include <linux/adfs_fs.h>
25
26#include "adfs.h" 22#include "adfs.h"
27 23
28const struct file_operations adfs_file_operations = { 24const struct file_operations adfs_file_operations = {
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 05b3a677201d..798cb071d132 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -7,17 +7,8 @@
7 * it under the terms of the GNU General Public License version 2 as 7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation. 8 * published by the Free Software Foundation.
9 */ 9 */
10#include <linux/errno.h>
11#include <linux/fs.h>
12#include <linux/adfs_fs.h>
13#include <linux/time.h>
14#include <linux/stat.h>
15#include <linux/string.h>
16#include <linux/mm.h>
17#include <linux/smp_lock.h> 10#include <linux/smp_lock.h>
18#include <linux/module.h>
19#include <linux/buffer_head.h> 11#include <linux/buffer_head.h>
20
21#include "adfs.h" 12#include "adfs.h"
22 13
23/* 14/*
@@ -395,4 +386,3 @@ int adfs_write_inode(struct inode *inode, int wait)
395 unlock_kernel(); 386 unlock_kernel();
396 return ret; 387 return ret;
397} 388}
398MODULE_LICENSE("GPL");
diff --git a/fs/adfs/map.c b/fs/adfs/map.c
index 568081b93f73..d1a5932bb0f1 100644
--- a/fs/adfs/map.c
+++ b/fs/adfs/map.c
@@ -7,14 +7,8 @@
7 * it under the terms of the GNU General Public License version 2 as 7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation. 8 * published by the Free Software Foundation.
9 */ 9 */
10#include <linux/errno.h>
11#include <linux/fs.h>
12#include <linux/adfs_fs.h>
13#include <linux/spinlock.h>
14#include <linux/buffer_head.h> 10#include <linux/buffer_head.h>
15
16#include <asm/unaligned.h> 11#include <asm/unaligned.h>
17
18#include "adfs.h" 12#include "adfs.h"
19 13
20/* 14/*
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 0ec5aaf47aa7..aad92f0a1048 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -8,26 +8,12 @@
8 * published by the Free Software Foundation. 8 * published by the Free Software Foundation.
9 */ 9 */
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/errno.h>
12#include <linux/fs.h>
13#include <linux/adfs_fs.h>
14#include <linux/slab.h>
15#include <linux/time.h>
16#include <linux/stat.h>
17#include <linux/string.h>
18#include <linux/init.h> 11#include <linux/init.h>
19#include <linux/buffer_head.h> 12#include <linux/buffer_head.h>
20#include <linux/vfs.h>
21#include <linux/parser.h> 13#include <linux/parser.h>
22#include <linux/bitops.h>
23#include <linux/mount.h> 14#include <linux/mount.h>
24#include <linux/seq_file.h> 15#include <linux/seq_file.h>
25 16#include <linux/statfs.h>
26#include <asm/uaccess.h>
27#include <asm/system.h>
28
29#include <stdarg.h>
30
31#include "adfs.h" 17#include "adfs.h"
32#include "dir_f.h" 18#include "dir_f.h"
33#include "dir_fplus.h" 19#include "dir_fplus.h"
@@ -534,3 +520,4 @@ static void __exit exit_adfs_fs(void)
534 520
535module_init(init_adfs_fs) 521module_init(init_adfs_fs)
536module_exit(exit_adfs_fs) 522module_exit(exit_adfs_fs)
523MODULE_LICENSE("GPL");
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index 2d33a5f7d218..0dd4dafee10b 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -12,6 +12,7 @@
12#include <linux/kernel.h> 12#include <linux/kernel.h>
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/errno.h> 14#include <linux/errno.h>
15#include <rxrpc/packet.h>
15#include "internal.h" 16#include "internal.h"
16#include "afs_fs.h" 17#include "afs_fs.h"
17 18
@@ -54,6 +55,21 @@ int afs_abort_to_error(u32 abort_code)
54 case 0x2f6df24: return -ENOLCK; 55 case 0x2f6df24: return -ENOLCK;
55 case 0x2f6df26: return -ENOTEMPTY; 56 case 0x2f6df26: return -ENOTEMPTY;
56 case 0x2f6df78: return -EDQUOT; 57 case 0x2f6df78: return -EDQUOT;
58
59 case RXKADINCONSISTENCY: return -EPROTO;
60 case RXKADPACKETSHORT: return -EPROTO;
61 case RXKADLEVELFAIL: return -EKEYREJECTED;
62 case RXKADTICKETLEN: return -EKEYREJECTED;
63 case RXKADOUTOFSEQUENCE: return -EPROTO;
64 case RXKADNOAUTH: return -EKEYREJECTED;
65 case RXKADBADKEY: return -EKEYREJECTED;
66 case RXKADBADTICKET: return -EKEYREJECTED;
67 case RXKADUNKNOWNKEY: return -EKEYREJECTED;
68 case RXKADEXPIRED: return -EKEYEXPIRED;
69 case RXKADSEALEDINCON: return -EKEYREJECTED;
70 case RXKADDATALEN: return -EKEYREJECTED;
71 case RXKADILLEGALLEVEL: return -EKEYREJECTED;
72
57 default: return -EREMOTEIO; 73 default: return -EREMOTEIO;
58 } 74 }
59} 75}
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index ec2a7431e458..6e689208def2 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -65,6 +65,8 @@ static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl,
65 goto out; 65 goto out;
66 goto rotate; 66 goto rotate;
67 case -ENOMEDIUM: 67 case -ENOMEDIUM:
68 case -EKEYREJECTED:
69 case -EKEYEXPIRED:
68 goto out; 70 goto out;
69 default: 71 default:
70 ret = -EIO; 72 ret = -EIO;
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 1dd96d4406c0..47d4a01c5393 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -52,6 +52,19 @@ static const struct dentry_operations anon_inodefs_dentry_operations = {
52 .d_delete = anon_inodefs_delete_dentry, 52 .d_delete = anon_inodefs_delete_dentry,
53}; 53};
54 54
55/*
56 * nop .set_page_dirty method so that people can use .page_mkwrite on
57 * anon inodes.
58 */
59static int anon_set_page_dirty(struct page *page)
60{
61 return 0;
62};
63
64static const struct address_space_operations anon_aops = {
65 .set_page_dirty = anon_set_page_dirty,
66};
67
55/** 68/**
56 * anon_inode_getfd - creates a new file instance by hooking it up to an 69 * anon_inode_getfd - creates a new file instance by hooking it up to an
57 * anonymous inode, and a dentry that describe the "class" 70 * anonymous inode, and a dentry that describe the "class"
@@ -151,6 +164,8 @@ static struct inode *anon_inode_mkinode(void)
151 164
152 inode->i_fop = &anon_inode_fops; 165 inode->i_fop = &anon_inode_fops;
153 166
167 inode->i_mapping->a_ops = &anon_aops;
168
154 /* 169 /*
155 * Mark the inode dirty from the very beginning, 170 * Mark the inode dirty from the very beginning,
156 * that way it will never be moved to the dirty 171 * that way it will never be moved to the dirty
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 9367b6297d84..615d5496fe0f 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -513,7 +513,7 @@ befs_utf2nls(struct super_block *sb, const char *in,
513{ 513{
514 struct nls_table *nls = BEFS_SB(sb)->nls; 514 struct nls_table *nls = BEFS_SB(sb)->nls;
515 int i, o; 515 int i, o;
516 wchar_t uni; 516 unicode_t uni;
517 int unilen, utflen; 517 int unilen, utflen;
518 char *result; 518 char *result;
519 /* The utf8->nls conversion won't make the final nls string bigger 519 /* The utf8->nls conversion won't make the final nls string bigger
@@ -539,16 +539,16 @@ befs_utf2nls(struct super_block *sb, const char *in,
539 for (i = o = 0; i < in_len; i += utflen, o += unilen) { 539 for (i = o = 0; i < in_len; i += utflen, o += unilen) {
540 540
541 /* convert from UTF-8 to Unicode */ 541 /* convert from UTF-8 to Unicode */
542 utflen = utf8_mbtowc(&uni, &in[i], in_len - i); 542 utflen = utf8_to_utf32(&in[i], in_len - i, &uni);
543 if (utflen < 0) { 543 if (utflen < 0)
544 goto conv_err; 544 goto conv_err;
545 }
546 545
547 /* convert from Unicode to nls */ 546 /* convert from Unicode to nls */
547 if (uni > MAX_WCHAR_T)
548 goto conv_err;
548 unilen = nls->uni2char(uni, &result[o], in_len - o); 549 unilen = nls->uni2char(uni, &result[o], in_len - o);
549 if (unilen < 0) { 550 if (unilen < 0)
550 goto conv_err; 551 goto conv_err;
551 }
552 } 552 }
553 result[o] = '\0'; 553 result[o] = '\0';
554 *out_len = o; 554 *out_len = o;
@@ -619,15 +619,13 @@ befs_nls2utf(struct super_block *sb, const char *in,
619 619
620 /* convert from nls to unicode */ 620 /* convert from nls to unicode */
621 unilen = nls->char2uni(&in[i], in_len - i, &uni); 621 unilen = nls->char2uni(&in[i], in_len - i, &uni);
622 if (unilen < 0) { 622 if (unilen < 0)
623 goto conv_err; 623 goto conv_err;
624 }
625 624
626 /* convert from unicode to UTF-8 */ 625 /* convert from unicode to UTF-8 */
627 utflen = utf8_wctomb(&result[o], uni, 3); 626 utflen = utf32_to_utf8(uni, &result[o], 3);
628 if (utflen <= 0) { 627 if (utflen <= 0)
629 goto conv_err; 628 goto conv_err;
630 }
631 } 629 }
632 630
633 result[o] = '\0'; 631 result[o] = '\0';
@@ -737,8 +735,6 @@ parse_options(char *options, befs_mount_options * opts)
737static void 735static void
738befs_put_super(struct super_block *sb) 736befs_put_super(struct super_block *sb)
739{ 737{
740 lock_kernel();
741
742 kfree(BEFS_SB(sb)->mount_opts.iocharset); 738 kfree(BEFS_SB(sb)->mount_opts.iocharset);
743 BEFS_SB(sb)->mount_opts.iocharset = NULL; 739 BEFS_SB(sb)->mount_opts.iocharset = NULL;
744 740
@@ -749,8 +745,6 @@ befs_put_super(struct super_block *sb)
749 745
750 kfree(sb->s_fs_info); 746 kfree(sb->s_fs_info);
751 sb->s_fs_info = NULL; 747 sb->s_fs_info = NULL;
752
753 unlock_kernel();
754} 748}
755 749
756/* Allocate private field of the superblock, fill it. 750/* Allocate private field of the superblock, fill it.
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 40381df34869..9fa212b014a5 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1340,8 +1340,10 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
1340 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr; 1340 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1341 prstatus->pr_sigpend = p->pending.signal.sig[0]; 1341 prstatus->pr_sigpend = p->pending.signal.sig[0];
1342 prstatus->pr_sighold = p->blocked.sig[0]; 1342 prstatus->pr_sighold = p->blocked.sig[0];
1343 rcu_read_lock();
1344 prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1345 rcu_read_unlock();
1343 prstatus->pr_pid = task_pid_vnr(p); 1346 prstatus->pr_pid = task_pid_vnr(p);
1344 prstatus->pr_ppid = task_pid_vnr(p->real_parent);
1345 prstatus->pr_pgrp = task_pgrp_vnr(p); 1347 prstatus->pr_pgrp = task_pgrp_vnr(p);
1346 prstatus->pr_sid = task_session_vnr(p); 1348 prstatus->pr_sid = task_session_vnr(p);
1347 if (thread_group_leader(p)) { 1349 if (thread_group_leader(p)) {
@@ -1382,8 +1384,10 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1382 psinfo->pr_psargs[i] = ' '; 1384 psinfo->pr_psargs[i] = ' ';
1383 psinfo->pr_psargs[len] = 0; 1385 psinfo->pr_psargs[len] = 0;
1384 1386
1387 rcu_read_lock();
1388 psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1389 rcu_read_unlock();
1385 psinfo->pr_pid = task_pid_vnr(p); 1390 psinfo->pr_pid = task_pid_vnr(p);
1386 psinfo->pr_ppid = task_pid_vnr(p->real_parent);
1387 psinfo->pr_pgrp = task_pgrp_vnr(p); 1391 psinfo->pr_pgrp = task_pgrp_vnr(p);
1388 psinfo->pr_sid = task_session_vnr(p); 1392 psinfo->pr_sid = task_session_vnr(p);
1389 1393
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index fdb66faa24f1..20fbeced472b 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1387,8 +1387,10 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
1387 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr; 1387 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1388 prstatus->pr_sigpend = p->pending.signal.sig[0]; 1388 prstatus->pr_sigpend = p->pending.signal.sig[0];
1389 prstatus->pr_sighold = p->blocked.sig[0]; 1389 prstatus->pr_sighold = p->blocked.sig[0];
1390 rcu_read_lock();
1391 prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1392 rcu_read_unlock();
1390 prstatus->pr_pid = task_pid_vnr(p); 1393 prstatus->pr_pid = task_pid_vnr(p);
1391 prstatus->pr_ppid = task_pid_vnr(p->real_parent);
1392 prstatus->pr_pgrp = task_pgrp_vnr(p); 1394 prstatus->pr_pgrp = task_pgrp_vnr(p);
1393 prstatus->pr_sid = task_session_vnr(p); 1395 prstatus->pr_sid = task_session_vnr(p);
1394 if (thread_group_leader(p)) { 1396 if (thread_group_leader(p)) {
@@ -1432,8 +1434,10 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1432 psinfo->pr_psargs[i] = ' '; 1434 psinfo->pr_psargs[i] = ' ';
1433 psinfo->pr_psargs[len] = 0; 1435 psinfo->pr_psargs[len] = 0;
1434 1436
1437 rcu_read_lock();
1438 psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1439 rcu_read_unlock();
1435 psinfo->pr_pid = task_pid_vnr(p); 1440 psinfo->pr_pid = task_pid_vnr(p);
1436 psinfo->pr_ppid = task_pid_vnr(p->real_parent);
1437 psinfo->pr_pgrp = task_pgrp_vnr(p); 1441 psinfo->pr_pgrp = task_pgrp_vnr(p);
1438 psinfo->pr_sid = task_session_vnr(p); 1442 psinfo->pr_sid = task_session_vnr(p);
1439 1443
diff --git a/fs/bio.c b/fs/bio.c
index 59000215e59b..24c914043532 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -25,7 +25,6 @@
25#include <linux/module.h> 25#include <linux/module.h>
26#include <linux/mempool.h> 26#include <linux/mempool.h>
27#include <linux/workqueue.h> 27#include <linux/workqueue.h>
28#include <linux/blktrace_api.h>
29#include <scsi/sg.h> /* for struct sg_iovec */ 28#include <scsi/sg.h> /* for struct sg_iovec */
30 29
31#include <trace/events/block.h> 30#include <trace/events/block.h>
@@ -358,9 +357,9 @@ static void bio_kmalloc_destructor(struct bio *bio)
358 * 357 *
359 * If %__GFP_WAIT is set, then bio_alloc will always be able to allocate 358 * If %__GFP_WAIT is set, then bio_alloc will always be able to allocate
360 * a bio. This is due to the mempool guarantees. To make this work, callers 359 * a bio. This is due to the mempool guarantees. To make this work, callers
361 * must never allocate more than 1 bio at the time from this pool. Callers 360 * must never allocate more than 1 bio at a time from this pool. Callers
362 * that need to allocate more than 1 bio must always submit the previously 361 * that need to allocate more than 1 bio must always submit the previously
363 * allocate bio for IO before attempting to allocate a new one. Failure to 362 * allocated bio for IO before attempting to allocate a new one. Failure to
364 * do so can cause livelocks under memory pressure. 363 * do so can cause livelocks under memory pressure.
365 * 364 *
366 **/ 365 **/
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 603972576f0f..f128427b995b 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -29,51 +29,28 @@
29 29
30#ifdef CONFIG_FS_POSIX_ACL 30#ifdef CONFIG_FS_POSIX_ACL
31 31
32static void btrfs_update_cached_acl(struct inode *inode,
33 struct posix_acl **p_acl,
34 struct posix_acl *acl)
35{
36 spin_lock(&inode->i_lock);
37 if (*p_acl && *p_acl != BTRFS_ACL_NOT_CACHED)
38 posix_acl_release(*p_acl);
39 *p_acl = posix_acl_dup(acl);
40 spin_unlock(&inode->i_lock);
41}
42
43static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) 32static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
44{ 33{
45 int size; 34 int size;
46 const char *name; 35 const char *name;
47 char *value = NULL; 36 char *value = NULL;
48 struct posix_acl *acl = NULL, **p_acl; 37 struct posix_acl *acl;
38
39 acl = get_cached_acl(inode, type);
40 if (acl != ACL_NOT_CACHED)
41 return acl;
49 42
50 switch (type) { 43 switch (type) {
51 case ACL_TYPE_ACCESS: 44 case ACL_TYPE_ACCESS:
52 name = POSIX_ACL_XATTR_ACCESS; 45 name = POSIX_ACL_XATTR_ACCESS;
53 p_acl = &BTRFS_I(inode)->i_acl;
54 break; 46 break;
55 case ACL_TYPE_DEFAULT: 47 case ACL_TYPE_DEFAULT:
56 name = POSIX_ACL_XATTR_DEFAULT; 48 name = POSIX_ACL_XATTR_DEFAULT;
57 p_acl = &BTRFS_I(inode)->i_default_acl;
58 break; 49 break;
59 default: 50 default:
60 return ERR_PTR(-EINVAL); 51 BUG();
61 } 52 }
62 53
63 /* Handle the cached NULL acl case without locking */
64 acl = ACCESS_ONCE(*p_acl);
65 if (!acl)
66 return acl;
67
68 spin_lock(&inode->i_lock);
69 acl = *p_acl;
70 if (acl != BTRFS_ACL_NOT_CACHED)
71 acl = posix_acl_dup(acl);
72 spin_unlock(&inode->i_lock);
73
74 if (acl != BTRFS_ACL_NOT_CACHED)
75 return acl;
76
77 size = __btrfs_getxattr(inode, name, "", 0); 54 size = __btrfs_getxattr(inode, name, "", 0);
78 if (size > 0) { 55 if (size > 0) {
79 value = kzalloc(size, GFP_NOFS); 56 value = kzalloc(size, GFP_NOFS);
@@ -82,13 +59,13 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
82 size = __btrfs_getxattr(inode, name, value, size); 59 size = __btrfs_getxattr(inode, name, value, size);
83 if (size > 0) { 60 if (size > 0) {
84 acl = posix_acl_from_xattr(value, size); 61 acl = posix_acl_from_xattr(value, size);
85 btrfs_update_cached_acl(inode, p_acl, acl); 62 set_cached_acl(inode, type, acl);
86 } 63 }
87 kfree(value); 64 kfree(value);
88 } else if (size == -ENOENT || size == -ENODATA || size == 0) { 65 } else if (size == -ENOENT || size == -ENODATA || size == 0) {
89 /* FIXME, who returns -ENOENT? I think nobody */ 66 /* FIXME, who returns -ENOENT? I think nobody */
90 acl = NULL; 67 acl = NULL;
91 btrfs_update_cached_acl(inode, p_acl, acl); 68 set_cached_acl(inode, type, acl);
92 } else { 69 } else {
93 acl = ERR_PTR(-EIO); 70 acl = ERR_PTR(-EIO);
94 } 71 }
@@ -121,7 +98,6 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
121{ 98{
122 int ret, size = 0; 99 int ret, size = 0;
123 const char *name; 100 const char *name;
124 struct posix_acl **p_acl;
125 char *value = NULL; 101 char *value = NULL;
126 mode_t mode; 102 mode_t mode;
127 103
@@ -141,13 +117,11 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
141 ret = 0; 117 ret = 0;
142 inode->i_mode = mode; 118 inode->i_mode = mode;
143 name = POSIX_ACL_XATTR_ACCESS; 119 name = POSIX_ACL_XATTR_ACCESS;
144 p_acl = &BTRFS_I(inode)->i_acl;
145 break; 120 break;
146 case ACL_TYPE_DEFAULT: 121 case ACL_TYPE_DEFAULT:
147 if (!S_ISDIR(inode->i_mode)) 122 if (!S_ISDIR(inode->i_mode))
148 return acl ? -EINVAL : 0; 123 return acl ? -EINVAL : 0;
149 name = POSIX_ACL_XATTR_DEFAULT; 124 name = POSIX_ACL_XATTR_DEFAULT;
150 p_acl = &BTRFS_I(inode)->i_default_acl;
151 break; 125 break;
152 default: 126 default:
153 return -EINVAL; 127 return -EINVAL;
@@ -172,7 +146,7 @@ out:
172 kfree(value); 146 kfree(value);
173 147
174 if (!ret) 148 if (!ret)
175 btrfs_update_cached_acl(inode, p_acl, acl); 149 set_cached_acl(inode, type, acl);
176 150
177 return ret; 151 return ret;
178} 152}
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index acb4f3517582..ea1ea0af8c0e 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -53,10 +53,6 @@ struct btrfs_inode {
53 /* used to order data wrt metadata */ 53 /* used to order data wrt metadata */
54 struct btrfs_ordered_inode_tree ordered_tree; 54 struct btrfs_ordered_inode_tree ordered_tree;
55 55
56 /* standard acl pointers */
57 struct posix_acl *i_acl;
58 struct posix_acl *i_default_acl;
59
60 /* for keeping track of orphaned inodes */ 56 /* for keeping track of orphaned inodes */
61 struct list_head i_orphan; 57 struct list_head i_orphan;
62 58
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 03441a99ea38..2779c2f5360a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -41,8 +41,6 @@ struct btrfs_ordered_sum;
41 41
42#define BTRFS_MAGIC "_BHRfS_M" 42#define BTRFS_MAGIC "_BHRfS_M"
43 43
44#define BTRFS_ACL_NOT_CACHED ((void *)-1)
45
46#define BTRFS_MAX_LEVEL 8 44#define BTRFS_MAX_LEVEL 8
47 45
48#define BTRFS_COMPAT_EXTENT_TREE_V0 46#define BTRFS_COMPAT_EXTENT_TREE_V0
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 0d50d49d990a..d28d29c95f7c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -42,6 +42,8 @@
42static struct extent_io_ops btree_extent_io_ops; 42static struct extent_io_ops btree_extent_io_ops;
43static void end_workqueue_fn(struct btrfs_work *work); 43static void end_workqueue_fn(struct btrfs_work *work);
44 44
45static atomic_t btrfs_bdi_num = ATOMIC_INIT(0);
46
45/* 47/*
46 * end_io_wq structs are used to do processing in task context when an IO is 48 * end_io_wq structs are used to do processing in task context when an IO is
47 * complete. This is used during reads to verify checksums, and it is used 49 * complete. This is used during reads to verify checksums, and it is used
@@ -1342,12 +1344,25 @@ static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
1342 free_extent_map(em); 1344 free_extent_map(em);
1343} 1345}
1344 1346
1347/*
1348 * If this fails, caller must call bdi_destroy() to get rid of the
1349 * bdi again.
1350 */
1345static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) 1351static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
1346{ 1352{
1347 bdi_init(bdi); 1353 int err;
1354
1355 bdi->capabilities = BDI_CAP_MAP_COPY;
1356 err = bdi_init(bdi);
1357 if (err)
1358 return err;
1359
1360 err = bdi_register(bdi, NULL, "btrfs-%d",
1361 atomic_inc_return(&btrfs_bdi_num));
1362 if (err)
1363 return err;
1364
1348 bdi->ra_pages = default_backing_dev_info.ra_pages; 1365 bdi->ra_pages = default_backing_dev_info.ra_pages;
1349 bdi->state = 0;
1350 bdi->capabilities = default_backing_dev_info.capabilities;
1351 bdi->unplug_io_fn = btrfs_unplug_io_fn; 1366 bdi->unplug_io_fn = btrfs_unplug_io_fn;
1352 bdi->unplug_io_data = info; 1367 bdi->unplug_io_data = info;
1353 bdi->congested_fn = btrfs_congested_fn; 1368 bdi->congested_fn = btrfs_congested_fn;
@@ -1569,7 +1584,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1569 fs_info->sb = sb; 1584 fs_info->sb = sb;
1570 fs_info->max_extent = (u64)-1; 1585 fs_info->max_extent = (u64)-1;
1571 fs_info->max_inline = 8192 * 1024; 1586 fs_info->max_inline = 8192 * 1024;
1572 setup_bdi(fs_info, &fs_info->bdi); 1587 if (setup_bdi(fs_info, &fs_info->bdi))
1588 goto fail_bdi;
1573 fs_info->btree_inode = new_inode(sb); 1589 fs_info->btree_inode = new_inode(sb);
1574 fs_info->btree_inode->i_ino = 1; 1590 fs_info->btree_inode->i_ino = 1;
1575 fs_info->btree_inode->i_nlink = 1; 1591 fs_info->btree_inode->i_nlink = 1;
@@ -1946,8 +1962,8 @@ fail_iput:
1946 1962
1947 btrfs_close_devices(fs_info->fs_devices); 1963 btrfs_close_devices(fs_info->fs_devices);
1948 btrfs_mapping_tree_free(&fs_info->mapping_tree); 1964 btrfs_mapping_tree_free(&fs_info->mapping_tree);
1965fail_bdi:
1949 bdi_destroy(&fs_info->bdi); 1966 bdi_destroy(&fs_info->bdi);
1950
1951fail: 1967fail:
1952 kfree(extent_root); 1968 kfree(extent_root);
1953 kfree(tree_root); 1969 kfree(tree_root);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8612b3a09811..dbe1aabf96cd 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2122,10 +2122,8 @@ static void btrfs_read_locked_inode(struct inode *inode)
2122 * any xattrs or acls 2122 * any xattrs or acls
2123 */ 2123 */
2124 maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino); 2124 maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino);
2125 if (!maybe_acls) { 2125 if (!maybe_acls)
2126 BTRFS_I(inode)->i_acl = NULL; 2126 cache_no_acl(inode);
2127 BTRFS_I(inode)->i_default_acl = NULL;
2128 }
2129 2127
2130 BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, 2128 BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0,
2131 alloc_group_block, 0); 2129 alloc_group_block, 0);
@@ -3141,9 +3139,6 @@ static noinline void init_btrfs_i(struct inode *inode)
3141{ 3139{
3142 struct btrfs_inode *bi = BTRFS_I(inode); 3140 struct btrfs_inode *bi = BTRFS_I(inode);
3143 3141
3144 bi->i_acl = BTRFS_ACL_NOT_CACHED;
3145 bi->i_default_acl = BTRFS_ACL_NOT_CACHED;
3146
3147 bi->generation = 0; 3142 bi->generation = 0;
3148 bi->sequence = 0; 3143 bi->sequence = 0;
3149 bi->last_trans = 0; 3144 bi->last_trans = 0;
@@ -4640,8 +4635,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
4640 ei->last_trans = 0; 4635 ei->last_trans = 0;
4641 ei->logged_trans = 0; 4636 ei->logged_trans = 0;
4642 btrfs_ordered_inode_tree_init(&ei->ordered_tree); 4637 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
4643 ei->i_acl = BTRFS_ACL_NOT_CACHED;
4644 ei->i_default_acl = BTRFS_ACL_NOT_CACHED;
4645 INIT_LIST_HEAD(&ei->i_orphan); 4638 INIT_LIST_HEAD(&ei->i_orphan);
4646 INIT_LIST_HEAD(&ei->ordered_operations); 4639 INIT_LIST_HEAD(&ei->ordered_operations);
4647 return &ei->vfs_inode; 4640 return &ei->vfs_inode;
@@ -4655,13 +4648,6 @@ void btrfs_destroy_inode(struct inode *inode)
4655 WARN_ON(!list_empty(&inode->i_dentry)); 4648 WARN_ON(!list_empty(&inode->i_dentry));
4656 WARN_ON(inode->i_data.nrpages); 4649 WARN_ON(inode->i_data.nrpages);
4657 4650
4658 if (BTRFS_I(inode)->i_acl &&
4659 BTRFS_I(inode)->i_acl != BTRFS_ACL_NOT_CACHED)
4660 posix_acl_release(BTRFS_I(inode)->i_acl);
4661 if (BTRFS_I(inode)->i_default_acl &&
4662 BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED)
4663 posix_acl_release(BTRFS_I(inode)->i_default_acl);
4664
4665 /* 4651 /*
4666 * Make sure we're properly removed from the ordered operation 4652 * Make sure we're properly removed from the ordered operation
4667 * lists. 4653 * lists.
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 2e177d7f4bb9..4e83457ea253 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -543,13 +543,13 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
543 btrfs_free_log(trans, root); 543 btrfs_free_log(trans, root);
544 btrfs_update_reloc_root(trans, root); 544 btrfs_update_reloc_root(trans, root);
545 545
546 if (root->commit_root == root->node) 546 if (root->commit_root != root->node) {
547 continue; 547 free_extent_buffer(root->commit_root);
548 548 root->commit_root = btrfs_root_node(root);
549 free_extent_buffer(root->commit_root); 549 btrfs_set_root_node(&root->root_item,
550 root->commit_root = btrfs_root_node(root); 550 root->node);
551 }
551 552
552 btrfs_set_root_node(&root->root_item, root->node);
553 err = btrfs_update_root(trans, fs_info->tree_root, 553 err = btrfs_update_root(trans, fs_info->tree_root,
554 &root->root_key, 554 &root->root_key,
555 &root->root_item); 555 &root->root_item);
diff --git a/fs/compat.c b/fs/compat.c
index 6aefb776dfeb..cdd51a3a7c53 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -471,7 +471,7 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
471 ret = sys_fcntl(fd, cmd, (unsigned long)&f); 471 ret = sys_fcntl(fd, cmd, (unsigned long)&f);
472 set_fs(old_fs); 472 set_fs(old_fs);
473 if (cmd == F_GETLK && ret == 0) { 473 if (cmd == F_GETLK && ret == 0) {
474 /* GETLK was successfule and we need to return the data... 474 /* GETLK was successful and we need to return the data...
475 * but it needs to fit in the compat structure. 475 * but it needs to fit in the compat structure.
476 * l_start shouldn't be too big, unless the original 476 * l_start shouldn't be too big, unless the original
477 * start + end is greater than COMPAT_OFF_T_MAX, in which 477 * start + end is greater than COMPAT_OFF_T_MAX, in which
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index b83f6bcfa51a..626c7483b4de 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -31,6 +31,7 @@
31#include <linux/skbuff.h> 31#include <linux/skbuff.h>
32#include <linux/netlink.h> 32#include <linux/netlink.h>
33#include <linux/vt.h> 33#include <linux/vt.h>
34#include <linux/falloc.h>
34#include <linux/fs.h> 35#include <linux/fs.h>
35#include <linux/file.h> 36#include <linux/file.h>
36#include <linux/ppp_defs.h> 37#include <linux/ppp_defs.h>
@@ -94,7 +95,6 @@
94#include <linux/atm_tcp.h> 95#include <linux/atm_tcp.h>
95#include <linux/sonet.h> 96#include <linux/sonet.h>
96#include <linux/atm_suni.h> 97#include <linux/atm_suni.h>
97#include <linux/mtd/mtd.h>
98 98
99#include <linux/usb.h> 99#include <linux/usb.h>
100#include <linux/usbdevice_fs.h> 100#include <linux/usbdevice_fs.h>
@@ -788,12 +788,6 @@ static int sg_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
788 if (put_user(compat_ptr(data), &sgio->usr_ptr)) 788 if (put_user(compat_ptr(data), &sgio->usr_ptr))
789 return -EFAULT; 789 return -EFAULT;
790 790
791 if (copy_in_user(&sgio->status, &sgio32->status,
792 (4 * sizeof(unsigned char)) +
793 (2 * sizeof(unsigned short)) +
794 (3 * sizeof(int))))
795 return -EFAULT;
796
797 err = sys_ioctl(fd, cmd, (unsigned long) sgio); 791 err = sys_ioctl(fd, cmd, (unsigned long) sgio);
798 792
799 if (err >= 0) { 793 if (err >= 0) {
@@ -1411,46 +1405,6 @@ static int ioc_settimeout(unsigned int fd, unsigned int cmd, unsigned long arg)
1411#define HIDPGETCONNLIST _IOR('H', 210, int) 1405#define HIDPGETCONNLIST _IOR('H', 210, int)
1412#define HIDPGETCONNINFO _IOR('H', 211, int) 1406#define HIDPGETCONNINFO _IOR('H', 211, int)
1413 1407
1414struct mtd_oob_buf32 {
1415 u_int32_t start;
1416 u_int32_t length;
1417 compat_caddr_t ptr; /* unsigned char* */
1418};
1419
1420#define MEMWRITEOOB32 _IOWR('M',3,struct mtd_oob_buf32)
1421#define MEMREADOOB32 _IOWR('M',4,struct mtd_oob_buf32)
1422
1423static int mtd_rw_oob(unsigned int fd, unsigned int cmd, unsigned long arg)
1424{
1425 struct mtd_oob_buf __user *buf = compat_alloc_user_space(sizeof(*buf));
1426 struct mtd_oob_buf32 __user *buf32 = compat_ptr(arg);
1427 u32 data;
1428 char __user *datap;
1429 unsigned int real_cmd;
1430 int err;
1431
1432 real_cmd = (cmd == MEMREADOOB32) ?
1433 MEMREADOOB : MEMWRITEOOB;
1434
1435 if (copy_in_user(&buf->start, &buf32->start,
1436 2 * sizeof(u32)) ||
1437 get_user(data, &buf32->ptr))
1438 return -EFAULT;
1439 datap = compat_ptr(data);
1440 if (put_user(datap, &buf->ptr))
1441 return -EFAULT;
1442
1443 err = sys_ioctl(fd, real_cmd, (unsigned long) buf);
1444
1445 if (!err) {
1446 if (copy_in_user(&buf32->start, &buf->start,
1447 2 * sizeof(u32)))
1448 err = -EFAULT;
1449 }
1450
1451 return err;
1452}
1453
1454#ifdef CONFIG_BLOCK 1408#ifdef CONFIG_BLOCK
1455struct raw32_config_request 1409struct raw32_config_request
1456{ 1410{
@@ -1765,7 +1719,7 @@ static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd, unsigned long a
1765 1719
1766/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE 1720/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
1767 * for some operations; this forces use of the newer bridge-utils that 1721 * for some operations; this forces use of the newer bridge-utils that
1768 * use compatiable ioctls 1722 * use compatible ioctls
1769 */ 1723 */
1770static int old_bridge_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) 1724static int old_bridge_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
1771{ 1725{
@@ -1826,6 +1780,41 @@ lp_timeout_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
1826 return sys_ioctl(fd, cmd, (unsigned long)tn); 1780 return sys_ioctl(fd, cmd, (unsigned long)tn);
1827} 1781}
1828 1782
1783/* on ia32 l_start is on a 32-bit boundary */
1784#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
1785struct space_resv_32 {
1786 __s16 l_type;
1787 __s16 l_whence;
1788 __s64 l_start __attribute__((packed));
1789 /* len == 0 means until end of file */
1790 __s64 l_len __attribute__((packed));
1791 __s32 l_sysid;
1792 __u32 l_pid;
1793 __s32 l_pad[4]; /* reserve area */
1794};
1795
1796#define FS_IOC_RESVSP_32 _IOW ('X', 40, struct space_resv_32)
1797#define FS_IOC_RESVSP64_32 _IOW ('X', 42, struct space_resv_32)
1798
1799/* just account for different alignment */
1800static int compat_ioctl_preallocate(struct file *file, unsigned long arg)
1801{
1802 struct space_resv_32 __user *p32 = (void __user *)arg;
1803 struct space_resv __user *p = compat_alloc_user_space(sizeof(*p));
1804
1805 if (copy_in_user(&p->l_type, &p32->l_type, sizeof(s16)) ||
1806 copy_in_user(&p->l_whence, &p32->l_whence, sizeof(s16)) ||
1807 copy_in_user(&p->l_start, &p32->l_start, sizeof(s64)) ||
1808 copy_in_user(&p->l_len, &p32->l_len, sizeof(s64)) ||
1809 copy_in_user(&p->l_sysid, &p32->l_sysid, sizeof(s32)) ||
1810 copy_in_user(&p->l_pid, &p32->l_pid, sizeof(u32)) ||
1811 copy_in_user(&p->l_pad, &p32->l_pad, 4*sizeof(u32)))
1812 return -EFAULT;
1813
1814 return ioctl_preallocate(file, p);
1815}
1816#endif
1817
1829 1818
1830typedef int (*ioctl_trans_handler_t)(unsigned int, unsigned int, 1819typedef int (*ioctl_trans_handler_t)(unsigned int, unsigned int,
1831 unsigned long, struct file *); 1820 unsigned long, struct file *);
@@ -2432,15 +2421,6 @@ COMPATIBLE_IOCTL(USBDEVFS_SUBMITURB32)
2432COMPATIBLE_IOCTL(USBDEVFS_REAPURB32) 2421COMPATIBLE_IOCTL(USBDEVFS_REAPURB32)
2433COMPATIBLE_IOCTL(USBDEVFS_REAPURBNDELAY32) 2422COMPATIBLE_IOCTL(USBDEVFS_REAPURBNDELAY32)
2434COMPATIBLE_IOCTL(USBDEVFS_CLEAR_HALT) 2423COMPATIBLE_IOCTL(USBDEVFS_CLEAR_HALT)
2435/* MTD */
2436COMPATIBLE_IOCTL(MEMGETINFO)
2437COMPATIBLE_IOCTL(MEMERASE)
2438COMPATIBLE_IOCTL(MEMLOCK)
2439COMPATIBLE_IOCTL(MEMUNLOCK)
2440COMPATIBLE_IOCTL(MEMGETREGIONCOUNT)
2441COMPATIBLE_IOCTL(MEMGETREGIONINFO)
2442COMPATIBLE_IOCTL(MEMGETBADBLOCK)
2443COMPATIBLE_IOCTL(MEMSETBADBLOCK)
2444/* NBD */ 2424/* NBD */
2445ULONG_IOCTL(NBD_SET_SOCK) 2425ULONG_IOCTL(NBD_SET_SOCK)
2446ULONG_IOCTL(NBD_SET_BLKSIZE) 2426ULONG_IOCTL(NBD_SET_BLKSIZE)
@@ -2550,8 +2530,6 @@ COMPATIBLE_IOCTL(JSIOCGBUTTONS)
2550COMPATIBLE_IOCTL(JSIOCGNAME(0)) 2530COMPATIBLE_IOCTL(JSIOCGNAME(0))
2551 2531
2552/* now things that need handlers */ 2532/* now things that need handlers */
2553HANDLE_IOCTL(MEMREADOOB32, mtd_rw_oob)
2554HANDLE_IOCTL(MEMWRITEOOB32, mtd_rw_oob)
2555#ifdef CONFIG_NET 2533#ifdef CONFIG_NET
2556HANDLE_IOCTL(SIOCGIFNAME, dev_ifname32) 2534HANDLE_IOCTL(SIOCGIFNAME, dev_ifname32)
2557HANDLE_IOCTL(SIOCGIFCONF, dev_ifconf) 2535HANDLE_IOCTL(SIOCGIFCONF, dev_ifconf)
@@ -2814,6 +2792,18 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
2814 case FIOQSIZE: 2792 case FIOQSIZE:
2815 break; 2793 break;
2816 2794
2795#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
2796 case FS_IOC_RESVSP_32:
2797 case FS_IOC_RESVSP64_32:
2798 error = compat_ioctl_preallocate(filp, arg);
2799 goto out_fput;
2800#else
2801 case FS_IOC_RESVSP:
2802 case FS_IOC_RESVSP64:
2803 error = ioctl_preallocate(filp, (void __user *)arg);
2804 goto out_fput;
2805#endif
2806
2817 case FIBMAP: 2807 case FIBMAP:
2818 case FIGETBSZ: 2808 case FIGETBSZ:
2819 case FIONREAD: 2809 case FIONREAD:
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 33a90120f6ad..4d74fc72c195 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -67,6 +67,8 @@ static int debugfs_u8_get(void *data, u64 *val)
67 return 0; 67 return 0;
68} 68}
69DEFINE_SIMPLE_ATTRIBUTE(fops_u8, debugfs_u8_get, debugfs_u8_set, "%llu\n"); 69DEFINE_SIMPLE_ATTRIBUTE(fops_u8, debugfs_u8_get, debugfs_u8_set, "%llu\n");
70DEFINE_SIMPLE_ATTRIBUTE(fops_u8_ro, debugfs_u8_get, NULL, "%llu\n");
71DEFINE_SIMPLE_ATTRIBUTE(fops_u8_wo, NULL, debugfs_u8_set, "%llu\n");
70 72
71/** 73/**
72 * debugfs_create_u8 - create a debugfs file that is used to read and write an unsigned 8-bit value 74 * debugfs_create_u8 - create a debugfs file that is used to read and write an unsigned 8-bit value
@@ -95,6 +97,13 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u8, debugfs_u8_get, debugfs_u8_set, "%llu\n");
95struct dentry *debugfs_create_u8(const char *name, mode_t mode, 97struct dentry *debugfs_create_u8(const char *name, mode_t mode,
96 struct dentry *parent, u8 *value) 98 struct dentry *parent, u8 *value)
97{ 99{
100 /* if there are no write bits set, make read only */
101 if (!(mode & S_IWUGO))
102 return debugfs_create_file(name, mode, parent, value, &fops_u8_ro);
103 /* if there are no read bits set, make write only */
104 if (!(mode & S_IRUGO))
105 return debugfs_create_file(name, mode, parent, value, &fops_u8_wo);
106
98 return debugfs_create_file(name, mode, parent, value, &fops_u8); 107 return debugfs_create_file(name, mode, parent, value, &fops_u8);
99} 108}
100EXPORT_SYMBOL_GPL(debugfs_create_u8); 109EXPORT_SYMBOL_GPL(debugfs_create_u8);
@@ -110,6 +119,8 @@ static int debugfs_u16_get(void *data, u64 *val)
110 return 0; 119 return 0;
111} 120}
112DEFINE_SIMPLE_ATTRIBUTE(fops_u16, debugfs_u16_get, debugfs_u16_set, "%llu\n"); 121DEFINE_SIMPLE_ATTRIBUTE(fops_u16, debugfs_u16_get, debugfs_u16_set, "%llu\n");
122DEFINE_SIMPLE_ATTRIBUTE(fops_u16_ro, debugfs_u16_get, NULL, "%llu\n");
123DEFINE_SIMPLE_ATTRIBUTE(fops_u16_wo, NULL, debugfs_u16_set, "%llu\n");
113 124
114/** 125/**
115 * debugfs_create_u16 - create a debugfs file that is used to read and write an unsigned 16-bit value 126 * debugfs_create_u16 - create a debugfs file that is used to read and write an unsigned 16-bit value
@@ -138,6 +149,13 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u16, debugfs_u16_get, debugfs_u16_set, "%llu\n");
138struct dentry *debugfs_create_u16(const char *name, mode_t mode, 149struct dentry *debugfs_create_u16(const char *name, mode_t mode,
139 struct dentry *parent, u16 *value) 150 struct dentry *parent, u16 *value)
140{ 151{
152 /* if there are no write bits set, make read only */
153 if (!(mode & S_IWUGO))
154 return debugfs_create_file(name, mode, parent, value, &fops_u16_ro);
155 /* if there are no read bits set, make write only */
156 if (!(mode & S_IRUGO))
157 return debugfs_create_file(name, mode, parent, value, &fops_u16_wo);
158
141 return debugfs_create_file(name, mode, parent, value, &fops_u16); 159 return debugfs_create_file(name, mode, parent, value, &fops_u16);
142} 160}
143EXPORT_SYMBOL_GPL(debugfs_create_u16); 161EXPORT_SYMBOL_GPL(debugfs_create_u16);
@@ -153,6 +171,8 @@ static int debugfs_u32_get(void *data, u64 *val)
153 return 0; 171 return 0;
154} 172}
155DEFINE_SIMPLE_ATTRIBUTE(fops_u32, debugfs_u32_get, debugfs_u32_set, "%llu\n"); 173DEFINE_SIMPLE_ATTRIBUTE(fops_u32, debugfs_u32_get, debugfs_u32_set, "%llu\n");
174DEFINE_SIMPLE_ATTRIBUTE(fops_u32_ro, debugfs_u32_get, NULL, "%llu\n");
175DEFINE_SIMPLE_ATTRIBUTE(fops_u32_wo, NULL, debugfs_u32_set, "%llu\n");
156 176
157/** 177/**
158 * debugfs_create_u32 - create a debugfs file that is used to read and write an unsigned 32-bit value 178 * debugfs_create_u32 - create a debugfs file that is used to read and write an unsigned 32-bit value
@@ -181,6 +201,13 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u32, debugfs_u32_get, debugfs_u32_set, "%llu\n");
181struct dentry *debugfs_create_u32(const char *name, mode_t mode, 201struct dentry *debugfs_create_u32(const char *name, mode_t mode,
182 struct dentry *parent, u32 *value) 202 struct dentry *parent, u32 *value)
183{ 203{
204 /* if there are no write bits set, make read only */
205 if (!(mode & S_IWUGO))
206 return debugfs_create_file(name, mode, parent, value, &fops_u32_ro);
207 /* if there are no read bits set, make write only */
208 if (!(mode & S_IRUGO))
209 return debugfs_create_file(name, mode, parent, value, &fops_u32_wo);
210
184 return debugfs_create_file(name, mode, parent, value, &fops_u32); 211 return debugfs_create_file(name, mode, parent, value, &fops_u32);
185} 212}
186EXPORT_SYMBOL_GPL(debugfs_create_u32); 213EXPORT_SYMBOL_GPL(debugfs_create_u32);
@@ -197,6 +224,8 @@ static int debugfs_u64_get(void *data, u64 *val)
197 return 0; 224 return 0;
198} 225}
199DEFINE_SIMPLE_ATTRIBUTE(fops_u64, debugfs_u64_get, debugfs_u64_set, "%llu\n"); 226DEFINE_SIMPLE_ATTRIBUTE(fops_u64, debugfs_u64_get, debugfs_u64_set, "%llu\n");
227DEFINE_SIMPLE_ATTRIBUTE(fops_u64_ro, debugfs_u64_get, NULL, "%llu\n");
228DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
200 229
201/** 230/**
202 * debugfs_create_u64 - create a debugfs file that is used to read and write an unsigned 64-bit value 231 * debugfs_create_u64 - create a debugfs file that is used to read and write an unsigned 64-bit value
@@ -225,15 +254,28 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u64, debugfs_u64_get, debugfs_u64_set, "%llu\n");
225struct dentry *debugfs_create_u64(const char *name, mode_t mode, 254struct dentry *debugfs_create_u64(const char *name, mode_t mode,
226 struct dentry *parent, u64 *value) 255 struct dentry *parent, u64 *value)
227{ 256{
257 /* if there are no write bits set, make read only */
258 if (!(mode & S_IWUGO))
259 return debugfs_create_file(name, mode, parent, value, &fops_u64_ro);
260 /* if there are no read bits set, make write only */
261 if (!(mode & S_IRUGO))
262 return debugfs_create_file(name, mode, parent, value, &fops_u64_wo);
263
228 return debugfs_create_file(name, mode, parent, value, &fops_u64); 264 return debugfs_create_file(name, mode, parent, value, &fops_u64);
229} 265}
230EXPORT_SYMBOL_GPL(debugfs_create_u64); 266EXPORT_SYMBOL_GPL(debugfs_create_u64);
231 267
232DEFINE_SIMPLE_ATTRIBUTE(fops_x8, debugfs_u8_get, debugfs_u8_set, "0x%02llx\n"); 268DEFINE_SIMPLE_ATTRIBUTE(fops_x8, debugfs_u8_get, debugfs_u8_set, "0x%02llx\n");
269DEFINE_SIMPLE_ATTRIBUTE(fops_x8_ro, debugfs_u8_get, NULL, "0x%02llx\n");
270DEFINE_SIMPLE_ATTRIBUTE(fops_x8_wo, NULL, debugfs_u8_set, "0x%02llx\n");
233 271
234DEFINE_SIMPLE_ATTRIBUTE(fops_x16, debugfs_u16_get, debugfs_u16_set, "0x%04llx\n"); 272DEFINE_SIMPLE_ATTRIBUTE(fops_x16, debugfs_u16_get, debugfs_u16_set, "0x%04llx\n");
273DEFINE_SIMPLE_ATTRIBUTE(fops_x16_ro, debugfs_u16_get, NULL, "0x%04llx\n");
274DEFINE_SIMPLE_ATTRIBUTE(fops_x16_wo, NULL, debugfs_u16_set, "0x%04llx\n");
235 275
236DEFINE_SIMPLE_ATTRIBUTE(fops_x32, debugfs_u32_get, debugfs_u32_set, "0x%08llx\n"); 276DEFINE_SIMPLE_ATTRIBUTE(fops_x32, debugfs_u32_get, debugfs_u32_set, "0x%08llx\n");
277DEFINE_SIMPLE_ATTRIBUTE(fops_x32_ro, debugfs_u32_get, NULL, "0x%08llx\n");
278DEFINE_SIMPLE_ATTRIBUTE(fops_x32_wo, NULL, debugfs_u32_set, "0x%08llx\n");
237 279
238/* 280/*
239 * debugfs_create_x{8,16,32} - create a debugfs file that is used to read and write an unsigned {8,16,32}-bit value 281 * debugfs_create_x{8,16,32} - create a debugfs file that is used to read and write an unsigned {8,16,32}-bit value
@@ -256,6 +298,13 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_x32, debugfs_u32_get, debugfs_u32_set, "0x%08llx\n"
256struct dentry *debugfs_create_x8(const char *name, mode_t mode, 298struct dentry *debugfs_create_x8(const char *name, mode_t mode,
257 struct dentry *parent, u8 *value) 299 struct dentry *parent, u8 *value)
258{ 300{
301 /* if there are no write bits set, make read only */
302 if (!(mode & S_IWUGO))
303 return debugfs_create_file(name, mode, parent, value, &fops_x8_ro);
304 /* if there are no read bits set, make write only */
305 if (!(mode & S_IRUGO))
306 return debugfs_create_file(name, mode, parent, value, &fops_x8_wo);
307
259 return debugfs_create_file(name, mode, parent, value, &fops_x8); 308 return debugfs_create_file(name, mode, parent, value, &fops_x8);
260} 309}
261EXPORT_SYMBOL_GPL(debugfs_create_x8); 310EXPORT_SYMBOL_GPL(debugfs_create_x8);
@@ -273,6 +322,13 @@ EXPORT_SYMBOL_GPL(debugfs_create_x8);
273struct dentry *debugfs_create_x16(const char *name, mode_t mode, 322struct dentry *debugfs_create_x16(const char *name, mode_t mode,
274 struct dentry *parent, u16 *value) 323 struct dentry *parent, u16 *value)
275{ 324{
325 /* if there are no write bits set, make read only */
326 if (!(mode & S_IWUGO))
327 return debugfs_create_file(name, mode, parent, value, &fops_x16_ro);
328 /* if there are no read bits set, make write only */
329 if (!(mode & S_IRUGO))
330 return debugfs_create_file(name, mode, parent, value, &fops_x16_wo);
331
276 return debugfs_create_file(name, mode, parent, value, &fops_x16); 332 return debugfs_create_file(name, mode, parent, value, &fops_x16);
277} 333}
278EXPORT_SYMBOL_GPL(debugfs_create_x16); 334EXPORT_SYMBOL_GPL(debugfs_create_x16);
@@ -290,6 +346,13 @@ EXPORT_SYMBOL_GPL(debugfs_create_x16);
290struct dentry *debugfs_create_x32(const char *name, mode_t mode, 346struct dentry *debugfs_create_x32(const char *name, mode_t mode,
291 struct dentry *parent, u32 *value) 347 struct dentry *parent, u32 *value)
292{ 348{
349 /* if there are no write bits set, make read only */
350 if (!(mode & S_IWUGO))
351 return debugfs_create_file(name, mode, parent, value, &fops_x32_ro);
352 /* if there are no read bits set, make write only */
353 if (!(mode & S_IRUGO))
354 return debugfs_create_file(name, mode, parent, value, &fops_x32_wo);
355
293 return debugfs_create_file(name, mode, parent, value, &fops_x32); 356 return debugfs_create_file(name, mode, parent, value, &fops_x32);
294} 357}
295EXPORT_SYMBOL_GPL(debugfs_create_x32); 358EXPORT_SYMBOL_GPL(debugfs_create_x32);
@@ -419,7 +482,7 @@ static const struct file_operations fops_blob = {
419}; 482};
420 483
421/** 484/**
422 * debugfs_create_blob - create a debugfs file that is used to read and write a binary blob 485 * debugfs_create_blob - create a debugfs file that is used to read a binary blob
423 * @name: a pointer to a string containing the name of the file to create. 486 * @name: a pointer to a string containing the name of the file to create.
424 * @mode: the permission that the file should have 487 * @mode: the permission that the file should have
425 * @parent: a pointer to the parent dentry for this file. This should be a 488 * @parent: a pointer to the parent dentry for this file. This should be a
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 0662ba6de85a..d22438ef7674 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -403,6 +403,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
403 } 403 }
404 child = list_entry(parent->d_subdirs.next, struct dentry, 404 child = list_entry(parent->d_subdirs.next, struct dentry,
405 d_u.d_child); 405 d_u.d_child);
406 next_sibling:
406 407
407 /* 408 /*
408 * If "child" isn't empty, walk down the tree and 409 * If "child" isn't empty, walk down the tree and
@@ -417,6 +418,16 @@ void debugfs_remove_recursive(struct dentry *dentry)
417 __debugfs_remove(child, parent); 418 __debugfs_remove(child, parent);
418 if (parent->d_subdirs.next == &child->d_u.d_child) { 419 if (parent->d_subdirs.next == &child->d_u.d_child) {
419 /* 420 /*
421 * Try the next sibling.
422 */
423 if (child->d_u.d_child.next != &parent->d_subdirs) {
424 child = list_entry(child->d_u.d_child.next,
425 struct dentry,
426 d_u.d_child);
427 goto next_sibling;
428 }
429
430 /*
420 * Avoid infinite loop if we fail to remove 431 * Avoid infinite loop if we fail to remove
421 * one dentry. 432 * one dentry.
422 */ 433 */
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 9b1d285f9fe6..75efb028974b 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -423,7 +423,6 @@ static void devpts_kill_sb(struct super_block *sb)
423} 423}
424 424
425static struct file_system_type devpts_fs_type = { 425static struct file_system_type devpts_fs_type = {
426 .owner = THIS_MODULE,
427 .name = "devpts", 426 .name = "devpts",
428 .get_sb = devpts_get_sb, 427 .get_sb = devpts_get_sb,
429 .kill_sb = devpts_kill_sb, 428 .kill_sb = devpts_kill_sb,
@@ -564,13 +563,4 @@ static int __init init_devpts_fs(void)
564 } 563 }
565 return err; 564 return err;
566} 565}
567
568static void __exit exit_devpts_fs(void)
569{
570 unregister_filesystem(&devpts_fs_type);
571 mntput(devpts_mnt);
572}
573
574module_init(init_devpts_fs) 566module_init(init_devpts_fs)
575module_exit(exit_devpts_fs)
576MODULE_LICENSE("GPL");
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index b6a719a909f8..a2edb7913447 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -24,7 +24,7 @@ static void drop_pagecache_sb(struct super_block *sb)
24 continue; 24 continue;
25 __iget(inode); 25 __iget(inode);
26 spin_unlock(&inode_lock); 26 spin_unlock(&inode_lock);
27 __invalidate_mapping_pages(inode->i_mapping, 0, -1, true); 27 invalidate_mapping_pages(inode->i_mapping, 0, -1);
28 iput(toput_inode); 28 iput(toput_inode);
29 toput_inode = inode; 29 toput_inode = inode;
30 spin_lock(&inode_lock); 30 spin_lock(&inode_lock);
diff --git a/fs/efs/dir.c b/fs/efs/dir.c
index 49308a29798a..7ee6f7e3a608 100644
--- a/fs/efs/dir.c
+++ b/fs/efs/dir.c
@@ -5,12 +5,12 @@
5 */ 5 */
6 6
7#include <linux/buffer_head.h> 7#include <linux/buffer_head.h>
8#include <linux/smp_lock.h>
9#include "efs.h" 8#include "efs.h"
10 9
11static int efs_readdir(struct file *, void *, filldir_t); 10static int efs_readdir(struct file *, void *, filldir_t);
12 11
13const struct file_operations efs_dir_operations = { 12const struct file_operations efs_dir_operations = {
13 .llseek = generic_file_llseek,
14 .read = generic_read_dir, 14 .read = generic_read_dir,
15 .readdir = efs_readdir, 15 .readdir = efs_readdir,
16}; 16};
@@ -33,8 +33,6 @@ static int efs_readdir(struct file *filp, void *dirent, filldir_t filldir) {
33 if (inode->i_size & (EFS_DIRBSIZE-1)) 33 if (inode->i_size & (EFS_DIRBSIZE-1))
34 printk(KERN_WARNING "EFS: WARNING: readdir(): directory size not a multiple of EFS_DIRBSIZE\n"); 34 printk(KERN_WARNING "EFS: WARNING: readdir(): directory size not a multiple of EFS_DIRBSIZE\n");
35 35
36 lock_kernel();
37
38 /* work out where this entry can be found */ 36 /* work out where this entry can be found */
39 block = filp->f_pos >> EFS_DIRBSIZE_BITS; 37 block = filp->f_pos >> EFS_DIRBSIZE_BITS;
40 38
@@ -107,7 +105,6 @@ static int efs_readdir(struct file *filp, void *dirent, filldir_t filldir) {
107 105
108 filp->f_pos = (block << EFS_DIRBSIZE_BITS) | slot; 106 filp->f_pos = (block << EFS_DIRBSIZE_BITS) | slot;
109out: 107out:
110 unlock_kernel();
111 return 0; 108 return 0;
112} 109}
113 110
diff --git a/fs/efs/namei.c b/fs/efs/namei.c
index c3fb5f9c4a44..1511bf9e5f80 100644
--- a/fs/efs/namei.c
+++ b/fs/efs/namei.c
@@ -8,7 +8,6 @@
8 8
9#include <linux/buffer_head.h> 9#include <linux/buffer_head.h>
10#include <linux/string.h> 10#include <linux/string.h>
11#include <linux/smp_lock.h>
12#include <linux/exportfs.h> 11#include <linux/exportfs.h>
13#include "efs.h" 12#include "efs.h"
14 13
@@ -63,16 +62,12 @@ struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct namei
63 efs_ino_t inodenum; 62 efs_ino_t inodenum;
64 struct inode * inode = NULL; 63 struct inode * inode = NULL;
65 64
66 lock_kernel();
67 inodenum = efs_find_entry(dir, dentry->d_name.name, dentry->d_name.len); 65 inodenum = efs_find_entry(dir, dentry->d_name.name, dentry->d_name.len);
68 if (inodenum) { 66 if (inodenum) {
69 inode = efs_iget(dir->i_sb, inodenum); 67 inode = efs_iget(dir->i_sb, inodenum);
70 if (IS_ERR(inode)) { 68 if (IS_ERR(inode))
71 unlock_kernel();
72 return ERR_CAST(inode); 69 return ERR_CAST(inode);
73 }
74 } 70 }
75 unlock_kernel();
76 71
77 return d_splice_alias(inode, dentry); 72 return d_splice_alias(inode, dentry);
78} 73}
@@ -115,11 +110,9 @@ struct dentry *efs_get_parent(struct dentry *child)
115 struct dentry *parent = ERR_PTR(-ENOENT); 110 struct dentry *parent = ERR_PTR(-ENOENT);
116 efs_ino_t ino; 111 efs_ino_t ino;
117 112
118 lock_kernel();
119 ino = efs_find_entry(child->d_inode, "..", 2); 113 ino = efs_find_entry(child->d_inode, "..", 2);
120 if (ino) 114 if (ino)
121 parent = d_obtain_alias(efs_iget(child->d_inode->i_sb, ino)); 115 parent = d_obtain_alias(efs_iget(child->d_inode->i_sb, ino));
122 unlock_kernel();
123 116
124 return parent; 117 return parent;
125} 118}
diff --git a/fs/efs/symlink.c b/fs/efs/symlink.c
index 41911ec83aaf..75117d0dac2b 100644
--- a/fs/efs/symlink.c
+++ b/fs/efs/symlink.c
@@ -9,7 +9,6 @@
9#include <linux/string.h> 9#include <linux/string.h>
10#include <linux/pagemap.h> 10#include <linux/pagemap.h>
11#include <linux/buffer_head.h> 11#include <linux/buffer_head.h>
12#include <linux/smp_lock.h>
13#include "efs.h" 12#include "efs.h"
14 13
15static int efs_symlink_readpage(struct file *file, struct page *page) 14static int efs_symlink_readpage(struct file *file, struct page *page)
@@ -22,9 +21,8 @@ static int efs_symlink_readpage(struct file *file, struct page *page)
22 21
23 err = -ENAMETOOLONG; 22 err = -ENAMETOOLONG;
24 if (size > 2 * EFS_BLOCKSIZE) 23 if (size > 2 * EFS_BLOCKSIZE)
25 goto fail_notlocked; 24 goto fail;
26 25
27 lock_kernel();
28 /* read first 512 bytes of link target */ 26 /* read first 512 bytes of link target */
29 err = -EIO; 27 err = -EIO;
30 bh = sb_bread(inode->i_sb, efs_bmap(inode, 0)); 28 bh = sb_bread(inode->i_sb, efs_bmap(inode, 0));
@@ -40,14 +38,11 @@ static int efs_symlink_readpage(struct file *file, struct page *page)
40 brelse(bh); 38 brelse(bh);
41 } 39 }
42 link[size] = '\0'; 40 link[size] = '\0';
43 unlock_kernel();
44 SetPageUptodate(page); 41 SetPageUptodate(page);
45 kunmap(page); 42 kunmap(page);
46 unlock_page(page); 43 unlock_page(page);
47 return 0; 44 return 0;
48fail: 45fail:
49 unlock_kernel();
50fail_notlocked:
51 SetPageError(page); 46 SetPageError(page);
52 kunmap(page); 47 kunmap(page);
53 unlock_page(page); 48 unlock_page(page);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 5458e80fc558..085c5c063420 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -98,7 +98,7 @@ struct epoll_filefd {
98struct nested_call_node { 98struct nested_call_node {
99 struct list_head llink; 99 struct list_head llink;
100 void *cookie; 100 void *cookie;
101 int cpu; 101 void *ctx;
102}; 102};
103 103
104/* 104/*
@@ -317,17 +317,17 @@ static void ep_nested_calls_init(struct nested_calls *ncalls)
317 * @nproc: Nested call core function pointer. 317 * @nproc: Nested call core function pointer.
318 * @priv: Opaque data to be passed to the @nproc callback. 318 * @priv: Opaque data to be passed to the @nproc callback.
319 * @cookie: Cookie to be used to identify this nested call. 319 * @cookie: Cookie to be used to identify this nested call.
320 * @ctx: This instance context.
320 * 321 *
321 * Returns: Returns the code returned by the @nproc callback, or -1 if 322 * Returns: Returns the code returned by the @nproc callback, or -1 if
322 * the maximum recursion limit has been exceeded. 323 * the maximum recursion limit has been exceeded.
323 */ 324 */
324static int ep_call_nested(struct nested_calls *ncalls, int max_nests, 325static int ep_call_nested(struct nested_calls *ncalls, int max_nests,
325 int (*nproc)(void *, void *, int), void *priv, 326 int (*nproc)(void *, void *, int), void *priv,
326 void *cookie) 327 void *cookie, void *ctx)
327{ 328{
328 int error, call_nests = 0; 329 int error, call_nests = 0;
329 unsigned long flags; 330 unsigned long flags;
330 int this_cpu = get_cpu();
331 struct list_head *lsthead = &ncalls->tasks_call_list; 331 struct list_head *lsthead = &ncalls->tasks_call_list;
332 struct nested_call_node *tncur; 332 struct nested_call_node *tncur;
333 struct nested_call_node tnode; 333 struct nested_call_node tnode;
@@ -340,7 +340,7 @@ static int ep_call_nested(struct nested_calls *ncalls, int max_nests,
340 * very much limited. 340 * very much limited.
341 */ 341 */
342 list_for_each_entry(tncur, lsthead, llink) { 342 list_for_each_entry(tncur, lsthead, llink) {
343 if (tncur->cpu == this_cpu && 343 if (tncur->ctx == ctx &&
344 (tncur->cookie == cookie || ++call_nests > max_nests)) { 344 (tncur->cookie == cookie || ++call_nests > max_nests)) {
345 /* 345 /*
346 * Ops ... loop detected or maximum nest level reached. 346 * Ops ... loop detected or maximum nest level reached.
@@ -352,7 +352,7 @@ static int ep_call_nested(struct nested_calls *ncalls, int max_nests,
352 } 352 }
353 353
354 /* Add the current task and cookie to the list */ 354 /* Add the current task and cookie to the list */
355 tnode.cpu = this_cpu; 355 tnode.ctx = ctx;
356 tnode.cookie = cookie; 356 tnode.cookie = cookie;
357 list_add(&tnode.llink, lsthead); 357 list_add(&tnode.llink, lsthead);
358 358
@@ -364,10 +364,9 @@ static int ep_call_nested(struct nested_calls *ncalls, int max_nests,
364 /* Remove the current task from the list */ 364 /* Remove the current task from the list */
365 spin_lock_irqsave(&ncalls->lock, flags); 365 spin_lock_irqsave(&ncalls->lock, flags);
366 list_del(&tnode.llink); 366 list_del(&tnode.llink);
367 out_unlock: 367out_unlock:
368 spin_unlock_irqrestore(&ncalls->lock, flags); 368 spin_unlock_irqrestore(&ncalls->lock, flags);
369 369
370 put_cpu();
371 return error; 370 return error;
372} 371}
373 372
@@ -408,8 +407,12 @@ static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests)
408 */ 407 */
409static void ep_poll_safewake(wait_queue_head_t *wq) 408static void ep_poll_safewake(wait_queue_head_t *wq)
410{ 409{
410 int this_cpu = get_cpu();
411
411 ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS, 412 ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS,
412 ep_poll_wakeup_proc, NULL, wq); 413 ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu);
414
415 put_cpu();
413} 416}
414 417
415/* 418/*
@@ -663,7 +666,7 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
663 * could re-enter here. 666 * could re-enter here.
664 */ 667 */
665 pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS, 668 pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS,
666 ep_poll_readyevents_proc, ep, ep); 669 ep_poll_readyevents_proc, ep, ep, current);
667 670
668 return pollflags != -1 ? pollflags : 0; 671 return pollflags != -1 ? pollflags : 0;
669} 672}
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index d46e38cb85c5..d636e1297cad 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -125,37 +125,12 @@ fail:
125 return ERR_PTR(-EINVAL); 125 return ERR_PTR(-EINVAL);
126} 126}
127 127
128static inline struct posix_acl *
129ext2_iget_acl(struct inode *inode, struct posix_acl **i_acl)
130{
131 struct posix_acl *acl = EXT2_ACL_NOT_CACHED;
132
133 spin_lock(&inode->i_lock);
134 if (*i_acl != EXT2_ACL_NOT_CACHED)
135 acl = posix_acl_dup(*i_acl);
136 spin_unlock(&inode->i_lock);
137
138 return acl;
139}
140
141static inline void
142ext2_iset_acl(struct inode *inode, struct posix_acl **i_acl,
143 struct posix_acl *acl)
144{
145 spin_lock(&inode->i_lock);
146 if (*i_acl != EXT2_ACL_NOT_CACHED)
147 posix_acl_release(*i_acl);
148 *i_acl = posix_acl_dup(acl);
149 spin_unlock(&inode->i_lock);
150}
151
152/* 128/*
153 * inode->i_mutex: don't care 129 * inode->i_mutex: don't care
154 */ 130 */
155static struct posix_acl * 131static struct posix_acl *
156ext2_get_acl(struct inode *inode, int type) 132ext2_get_acl(struct inode *inode, int type)
157{ 133{
158 struct ext2_inode_info *ei = EXT2_I(inode);
159 int name_index; 134 int name_index;
160 char *value = NULL; 135 char *value = NULL;
161 struct posix_acl *acl; 136 struct posix_acl *acl;
@@ -164,23 +139,19 @@ ext2_get_acl(struct inode *inode, int type)
164 if (!test_opt(inode->i_sb, POSIX_ACL)) 139 if (!test_opt(inode->i_sb, POSIX_ACL))
165 return NULL; 140 return NULL;
166 141
167 switch(type) { 142 acl = get_cached_acl(inode, type);
168 case ACL_TYPE_ACCESS: 143 if (acl != ACL_NOT_CACHED)
169 acl = ext2_iget_acl(inode, &ei->i_acl); 144 return acl;
170 if (acl != EXT2_ACL_NOT_CACHED) 145
171 return acl; 146 switch (type) {
172 name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; 147 case ACL_TYPE_ACCESS:
173 break; 148 name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS;
174 149 break;
175 case ACL_TYPE_DEFAULT: 150 case ACL_TYPE_DEFAULT:
176 acl = ext2_iget_acl(inode, &ei->i_default_acl); 151 name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT;
177 if (acl != EXT2_ACL_NOT_CACHED) 152 break;
178 return acl; 153 default:
179 name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT; 154 BUG();
180 break;
181
182 default:
183 return ERR_PTR(-EINVAL);
184 } 155 }
185 retval = ext2_xattr_get(inode, name_index, "", NULL, 0); 156 retval = ext2_xattr_get(inode, name_index, "", NULL, 0);
186 if (retval > 0) { 157 if (retval > 0) {
@@ -197,17 +168,9 @@ ext2_get_acl(struct inode *inode, int type)
197 acl = ERR_PTR(retval); 168 acl = ERR_PTR(retval);
198 kfree(value); 169 kfree(value);
199 170
200 if (!IS_ERR(acl)) { 171 if (!IS_ERR(acl))
201 switch(type) { 172 set_cached_acl(inode, type, acl);
202 case ACL_TYPE_ACCESS:
203 ext2_iset_acl(inode, &ei->i_acl, acl);
204 break;
205 173
206 case ACL_TYPE_DEFAULT:
207 ext2_iset_acl(inode, &ei->i_default_acl, acl);
208 break;
209 }
210 }
211 return acl; 174 return acl;
212} 175}
213 176
@@ -217,7 +180,6 @@ ext2_get_acl(struct inode *inode, int type)
217static int 180static int
218ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) 181ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
219{ 182{
220 struct ext2_inode_info *ei = EXT2_I(inode);
221 int name_index; 183 int name_index;
222 void *value = NULL; 184 void *value = NULL;
223 size_t size = 0; 185 size_t size = 0;
@@ -263,17 +225,8 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
263 error = ext2_xattr_set(inode, name_index, "", value, size, 0); 225 error = ext2_xattr_set(inode, name_index, "", value, size, 0);
264 226
265 kfree(value); 227 kfree(value);
266 if (!error) { 228 if (!error)
267 switch(type) { 229 set_cached_acl(inode, type, acl);
268 case ACL_TYPE_ACCESS:
269 ext2_iset_acl(inode, &ei->i_acl, acl);
270 break;
271
272 case ACL_TYPE_DEFAULT:
273 ext2_iset_acl(inode, &ei->i_default_acl, acl);
274 break;
275 }
276 }
277 return error; 230 return error;
278} 231}
279 232
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index b42cf578554b..ecefe478898f 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -53,10 +53,6 @@ static inline int ext2_acl_count(size_t size)
53 53
54#ifdef CONFIG_EXT2_FS_POSIX_ACL 54#ifdef CONFIG_EXT2_FS_POSIX_ACL
55 55
56/* Value for inode->u.ext2_i.i_acl and inode->u.ext2_i.i_default_acl
57 if the ACL has not been cached */
58#define EXT2_ACL_NOT_CACHED ((void *)-1)
59
60/* acl.c */ 56/* acl.c */
61extern int ext2_permission (struct inode *, int); 57extern int ext2_permission (struct inode *, int);
62extern int ext2_acl_chmod (struct inode *); 58extern int ext2_acl_chmod (struct inode *);
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 003500498c22..6cde970b0a1a 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -450,7 +450,7 @@ ino_t ext2_inode_by_name(struct inode *dir, struct qstr *child)
450 450
451/* Releases the page */ 451/* Releases the page */
452void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, 452void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
453 struct page *page, struct inode *inode) 453 struct page *page, struct inode *inode, int update_times)
454{ 454{
455 loff_t pos = page_offset(page) + 455 loff_t pos = page_offset(page) +
456 (char *) de - (char *) page_address(page); 456 (char *) de - (char *) page_address(page);
@@ -465,7 +465,8 @@ void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
465 ext2_set_de_type(de, inode); 465 ext2_set_de_type(de, inode);
466 err = ext2_commit_chunk(page, pos, len); 466 err = ext2_commit_chunk(page, pos, len);
467 ext2_put_page(page); 467 ext2_put_page(page);
468 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; 468 if (update_times)
469 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
469 EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL; 470 EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
470 mark_inode_dirty(dir); 471 mark_inode_dirty(dir);
471} 472}
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index b2bbf45039e0..9a8a8e27a063 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -27,7 +27,7 @@ struct ext2_inode_info {
27 /* 27 /*
28 * i_block_group is the number of the block group which contains 28 * i_block_group is the number of the block group which contains
29 * this file's inode. Constant across the lifetime of the inode, 29 * this file's inode. Constant across the lifetime of the inode,
30 * it is ued for making block allocation decisions - we try to 30 * it is used for making block allocation decisions - we try to
31 * place a file's data blocks near its inode block, and new inodes 31 * place a file's data blocks near its inode block, and new inodes
32 * near to their parent directory's inode. 32 * near to their parent directory's inode.
33 */ 33 */
@@ -47,10 +47,6 @@ struct ext2_inode_info {
47 */ 47 */
48 struct rw_semaphore xattr_sem; 48 struct rw_semaphore xattr_sem;
49#endif 49#endif
50#ifdef CONFIG_EXT2_FS_POSIX_ACL
51 struct posix_acl *i_acl;
52 struct posix_acl *i_default_acl;
53#endif
54 rwlock_t i_meta_lock; 50 rwlock_t i_meta_lock;
55 51
56 /* 52 /*
@@ -111,7 +107,7 @@ extern struct ext2_dir_entry_2 * ext2_find_entry (struct inode *,struct qstr *,
111extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *); 107extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *);
112extern int ext2_empty_dir (struct inode *); 108extern int ext2_empty_dir (struct inode *);
113extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **); 109extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **);
114extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *); 110extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *, int);
115 111
116/* ialloc.c */ 112/* ialloc.c */
117extern struct inode * ext2_new_inode (struct inode *, int); 113extern struct inode * ext2_new_inode (struct inode *, int);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 29ed682061f6..e27130341d4f 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1224,10 +1224,6 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
1224 return inode; 1224 return inode;
1225 1225
1226 ei = EXT2_I(inode); 1226 ei = EXT2_I(inode);
1227#ifdef CONFIG_EXT2_FS_POSIX_ACL
1228 ei->i_acl = EXT2_ACL_NOT_CACHED;
1229 ei->i_default_acl = EXT2_ACL_NOT_CACHED;
1230#endif
1231 ei->i_block_alloc_info = NULL; 1227 ei->i_block_alloc_info = NULL;
1232 1228
1233 raw_inode = ext2_get_inode(inode->i_sb, ino, &bh); 1229 raw_inode = ext2_get_inode(inode->i_sb, ino, &bh);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 90ea17998a73..6524ecaebb7a 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -320,7 +320,7 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
320 if (!new_de) 320 if (!new_de)
321 goto out_dir; 321 goto out_dir;
322 inode_inc_link_count(old_inode); 322 inode_inc_link_count(old_inode);
323 ext2_set_link(new_dir, new_de, new_page, old_inode); 323 ext2_set_link(new_dir, new_de, new_page, old_inode, 1);
324 new_inode->i_ctime = CURRENT_TIME_SEC; 324 new_inode->i_ctime = CURRENT_TIME_SEC;
325 if (dir_de) 325 if (dir_de)
326 drop_nlink(new_inode); 326 drop_nlink(new_inode);
@@ -352,7 +352,8 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
352 inode_dec_link_count(old_inode); 352 inode_dec_link_count(old_inode);
353 353
354 if (dir_de) { 354 if (dir_de) {
355 ext2_set_link(old_inode, dir_de, dir_page, new_dir); 355 if (old_dir != new_dir)
356 ext2_set_link(old_inode, dir_de, dir_page, new_dir, 0);
356 inode_dec_link_count(old_dir); 357 inode_dec_link_count(old_dir);
357 } 358 }
358 return 0; 359 return 0;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 458999638c3d..1a9ffee47d56 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -152,10 +152,6 @@ static struct inode *ext2_alloc_inode(struct super_block *sb)
152 ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, GFP_KERNEL); 152 ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, GFP_KERNEL);
153 if (!ei) 153 if (!ei)
154 return NULL; 154 return NULL;
155#ifdef CONFIG_EXT2_FS_POSIX_ACL
156 ei->i_acl = EXT2_ACL_NOT_CACHED;
157 ei->i_default_acl = EXT2_ACL_NOT_CACHED;
158#endif
159 ei->i_block_alloc_info = NULL; 155 ei->i_block_alloc_info = NULL;
160 ei->vfs_inode.i_version = 1; 156 ei->vfs_inode.i_version = 1;
161 return &ei->vfs_inode; 157 return &ei->vfs_inode;
@@ -198,18 +194,6 @@ static void destroy_inodecache(void)
198static void ext2_clear_inode(struct inode *inode) 194static void ext2_clear_inode(struct inode *inode)
199{ 195{
200 struct ext2_block_alloc_info *rsv = EXT2_I(inode)->i_block_alloc_info; 196 struct ext2_block_alloc_info *rsv = EXT2_I(inode)->i_block_alloc_info;
201#ifdef CONFIG_EXT2_FS_POSIX_ACL
202 struct ext2_inode_info *ei = EXT2_I(inode);
203
204 if (ei->i_acl && ei->i_acl != EXT2_ACL_NOT_CACHED) {
205 posix_acl_release(ei->i_acl);
206 ei->i_acl = EXT2_ACL_NOT_CACHED;
207 }
208 if (ei->i_default_acl && ei->i_default_acl != EXT2_ACL_NOT_CACHED) {
209 posix_acl_release(ei->i_default_acl);
210 ei->i_default_acl = EXT2_ACL_NOT_CACHED;
211 }
212#endif
213 ext2_discard_reservation(inode); 197 ext2_discard_reservation(inode);
214 EXT2_I(inode)->i_block_alloc_info = NULL; 198 EXT2_I(inode)->i_block_alloc_info = NULL;
215 if (unlikely(rsv)) 199 if (unlikely(rsv))
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index d81ef2fdb08e..e167bae37ef0 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -126,30 +126,6 @@ fail:
126 return ERR_PTR(-EINVAL); 126 return ERR_PTR(-EINVAL);
127} 127}
128 128
129static inline struct posix_acl *
130ext3_iget_acl(struct inode *inode, struct posix_acl **i_acl)
131{
132 struct posix_acl *acl = EXT3_ACL_NOT_CACHED;
133
134 spin_lock(&inode->i_lock);
135 if (*i_acl != EXT3_ACL_NOT_CACHED)
136 acl = posix_acl_dup(*i_acl);
137 spin_unlock(&inode->i_lock);
138
139 return acl;
140}
141
142static inline void
143ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl,
144 struct posix_acl *acl)
145{
146 spin_lock(&inode->i_lock);
147 if (*i_acl != EXT3_ACL_NOT_CACHED)
148 posix_acl_release(*i_acl);
149 *i_acl = posix_acl_dup(acl);
150 spin_unlock(&inode->i_lock);
151}
152
153/* 129/*
154 * Inode operation get_posix_acl(). 130 * Inode operation get_posix_acl().
155 * 131 *
@@ -158,7 +134,6 @@ ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl,
158static struct posix_acl * 134static struct posix_acl *
159ext3_get_acl(struct inode *inode, int type) 135ext3_get_acl(struct inode *inode, int type)
160{ 136{
161 struct ext3_inode_info *ei = EXT3_I(inode);
162 int name_index; 137 int name_index;
163 char *value = NULL; 138 char *value = NULL;
164 struct posix_acl *acl; 139 struct posix_acl *acl;
@@ -167,24 +142,21 @@ ext3_get_acl(struct inode *inode, int type)
167 if (!test_opt(inode->i_sb, POSIX_ACL)) 142 if (!test_opt(inode->i_sb, POSIX_ACL))
168 return NULL; 143 return NULL;
169 144
170 switch(type) { 145 acl = get_cached_acl(inode, type);
171 case ACL_TYPE_ACCESS: 146 if (acl != ACL_NOT_CACHED)
172 acl = ext3_iget_acl(inode, &ei->i_acl); 147 return acl;
173 if (acl != EXT3_ACL_NOT_CACHED) 148
174 return acl; 149 switch (type) {
175 name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS; 150 case ACL_TYPE_ACCESS:
176 break; 151 name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
177 152 break;
178 case ACL_TYPE_DEFAULT: 153 case ACL_TYPE_DEFAULT:
179 acl = ext3_iget_acl(inode, &ei->i_default_acl); 154 name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT;
180 if (acl != EXT3_ACL_NOT_CACHED) 155 break;
181 return acl; 156 default:
182 name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT; 157 BUG();
183 break;
184
185 default:
186 return ERR_PTR(-EINVAL);
187 } 158 }
159
188 retval = ext3_xattr_get(inode, name_index, "", NULL, 0); 160 retval = ext3_xattr_get(inode, name_index, "", NULL, 0);
189 if (retval > 0) { 161 if (retval > 0) {
190 value = kmalloc(retval, GFP_NOFS); 162 value = kmalloc(retval, GFP_NOFS);
@@ -200,17 +172,9 @@ ext3_get_acl(struct inode *inode, int type)
200 acl = ERR_PTR(retval); 172 acl = ERR_PTR(retval);
201 kfree(value); 173 kfree(value);
202 174
203 if (!IS_ERR(acl)) { 175 if (!IS_ERR(acl))
204 switch(type) { 176 set_cached_acl(inode, type, acl);
205 case ACL_TYPE_ACCESS:
206 ext3_iset_acl(inode, &ei->i_acl, acl);
207 break;
208 177
209 case ACL_TYPE_DEFAULT:
210 ext3_iset_acl(inode, &ei->i_default_acl, acl);
211 break;
212 }
213 }
214 return acl; 178 return acl;
215} 179}
216 180
@@ -223,7 +187,6 @@ static int
223ext3_set_acl(handle_t *handle, struct inode *inode, int type, 187ext3_set_acl(handle_t *handle, struct inode *inode, int type,
224 struct posix_acl *acl) 188 struct posix_acl *acl)
225{ 189{
226 struct ext3_inode_info *ei = EXT3_I(inode);
227 int name_index; 190 int name_index;
228 void *value = NULL; 191 void *value = NULL;
229 size_t size = 0; 192 size_t size = 0;
@@ -268,17 +231,10 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
268 value, size, 0); 231 value, size, 0);
269 232
270 kfree(value); 233 kfree(value);
271 if (!error) {
272 switch(type) {
273 case ACL_TYPE_ACCESS:
274 ext3_iset_acl(inode, &ei->i_acl, acl);
275 break;
276 234
277 case ACL_TYPE_DEFAULT: 235 if (!error)
278 ext3_iset_acl(inode, &ei->i_default_acl, acl); 236 set_cached_acl(inode, type, acl);
279 break; 237
280 }
281 }
282 return error; 238 return error;
283} 239}
284 240
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 42da16b8cac0..07d15a3a5969 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -53,10 +53,6 @@ static inline int ext3_acl_count(size_t size)
53 53
54#ifdef CONFIG_EXT3_FS_POSIX_ACL 54#ifdef CONFIG_EXT3_FS_POSIX_ACL
55 55
56/* Value for inode->u.ext3_i.i_acl and inode->u.ext3_i.i_default_acl
57 if the ACL has not been cached */
58#define EXT3_ACL_NOT_CACHED ((void *)-1)
59
60/* acl.c */ 56/* acl.c */
61extern int ext3_permission (struct inode *, int); 57extern int ext3_permission (struct inode *, int);
62extern int ext3_acl_chmod (struct inode *); 58extern int ext3_acl_chmod (struct inode *);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index b0248c6d5d4c..5f51fed5c750 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -820,7 +820,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
820 while (count < maxblocks && count <= blocks_to_boundary) { 820 while (count < maxblocks && count <= blocks_to_boundary) {
821 ext3_fsblk_t blk; 821 ext3_fsblk_t blk;
822 822
823 if (!verify_chain(chain, partial)) { 823 if (!verify_chain(chain, chain + depth - 1)) {
824 /* 824 /*
825 * Indirect block might be removed by 825 * Indirect block might be removed by
826 * truncate while we were reading it. 826 * truncate while we were reading it.
@@ -2374,7 +2374,7 @@ void ext3_truncate(struct inode *inode)
2374 struct page *page; 2374 struct page *page;
2375 2375
2376 if (!ext3_can_truncate(inode)) 2376 if (!ext3_can_truncate(inode))
2377 return; 2377 goto out_notrans;
2378 2378
2379 if (inode->i_size == 0 && ext3_should_writeback_data(inode)) 2379 if (inode->i_size == 0 && ext3_should_writeback_data(inode))
2380 ei->i_state |= EXT3_STATE_FLUSH_ON_CLOSE; 2380 ei->i_state |= EXT3_STATE_FLUSH_ON_CLOSE;
@@ -2390,7 +2390,7 @@ void ext3_truncate(struct inode *inode)
2390 page = grab_cache_page(mapping, 2390 page = grab_cache_page(mapping,
2391 inode->i_size >> PAGE_CACHE_SHIFT); 2391 inode->i_size >> PAGE_CACHE_SHIFT);
2392 if (!page) 2392 if (!page)
2393 return; 2393 goto out_notrans;
2394 } 2394 }
2395 2395
2396 handle = start_transaction(inode); 2396 handle = start_transaction(inode);
@@ -2401,7 +2401,7 @@ void ext3_truncate(struct inode *inode)
2401 unlock_page(page); 2401 unlock_page(page);
2402 page_cache_release(page); 2402 page_cache_release(page);
2403 } 2403 }
2404 return; /* AKPM: return what? */ 2404 goto out_notrans;
2405 } 2405 }
2406 2406
2407 last_block = (inode->i_size + blocksize-1) 2407 last_block = (inode->i_size + blocksize-1)
@@ -2525,6 +2525,14 @@ out_stop:
2525 ext3_orphan_del(handle, inode); 2525 ext3_orphan_del(handle, inode);
2526 2526
2527 ext3_journal_stop(handle); 2527 ext3_journal_stop(handle);
2528 return;
2529out_notrans:
2530 /*
2531 * Delete the inode from orphan list so that it doesn't stay there
2532 * forever and trigger assertion on umount.
2533 */
2534 if (inode->i_nlink)
2535 ext3_orphan_del(NULL, inode);
2528} 2536}
2529 2537
2530static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb, 2538static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
@@ -2744,10 +2752,6 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
2744 return inode; 2752 return inode;
2745 2753
2746 ei = EXT3_I(inode); 2754 ei = EXT3_I(inode);
2747#ifdef CONFIG_EXT3_FS_POSIX_ACL
2748 ei->i_acl = EXT3_ACL_NOT_CACHED;
2749 ei->i_default_acl = EXT3_ACL_NOT_CACHED;
2750#endif
2751 ei->i_block_alloc_info = NULL; 2755 ei->i_block_alloc_info = NULL;
2752 2756
2753 ret = __ext3_get_inode_loc(inode, &iloc, 0); 2757 ret = __ext3_get_inode_loc(inode, &iloc, 0);
@@ -3122,12 +3126,6 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
3122 3126
3123 rc = inode_setattr(inode, attr); 3127 rc = inode_setattr(inode, attr);
3124 3128
3125 /* If inode_setattr's call to ext3_truncate failed to get a
3126 * transaction handle at all, we need to clean up the in-core
3127 * orphan list manually. */
3128 if (inode->i_nlink)
3129 ext3_orphan_del(NULL, inode);
3130
3131 if (!rc && (ia_valid & ATTR_MODE)) 3129 if (!rc && (ia_valid & ATTR_MODE))
3132 rc = ext3_acl_chmod(inode); 3130 rc = ext3_acl_chmod(inode);
3133 3131
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 8a0b26340b54..8359e7b3dc89 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -990,7 +990,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
990 sb->s_id, n_blocks_count); 990 sb->s_id, n_blocks_count);
991 if (sizeof(sector_t) < 8) 991 if (sizeof(sector_t) < 8)
992 ext3_warning(sb, __func__, 992 ext3_warning(sb, __func__,
993 "CONFIG_LBD not enabled\n"); 993 "CONFIG_LBDAF not enabled\n");
994 return -EINVAL; 994 return -EINVAL;
995 } 995 }
996 996
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 26aa64dee6aa..524b349c6299 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -464,10 +464,6 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
464 ei = kmem_cache_alloc(ext3_inode_cachep, GFP_NOFS); 464 ei = kmem_cache_alloc(ext3_inode_cachep, GFP_NOFS);
465 if (!ei) 465 if (!ei)
466 return NULL; 466 return NULL;
467#ifdef CONFIG_EXT3_FS_POSIX_ACL
468 ei->i_acl = EXT3_ACL_NOT_CACHED;
469 ei->i_default_acl = EXT3_ACL_NOT_CACHED;
470#endif
471 ei->i_block_alloc_info = NULL; 467 ei->i_block_alloc_info = NULL;
472 ei->vfs_inode.i_version = 1; 468 ei->vfs_inode.i_version = 1;
473 return &ei->vfs_inode; 469 return &ei->vfs_inode;
@@ -518,18 +514,6 @@ static void destroy_inodecache(void)
518static void ext3_clear_inode(struct inode *inode) 514static void ext3_clear_inode(struct inode *inode)
519{ 515{
520 struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info; 516 struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info;
521#ifdef CONFIG_EXT3_FS_POSIX_ACL
522 if (EXT3_I(inode)->i_acl &&
523 EXT3_I(inode)->i_acl != EXT3_ACL_NOT_CACHED) {
524 posix_acl_release(EXT3_I(inode)->i_acl);
525 EXT3_I(inode)->i_acl = EXT3_ACL_NOT_CACHED;
526 }
527 if (EXT3_I(inode)->i_default_acl &&
528 EXT3_I(inode)->i_default_acl != EXT3_ACL_NOT_CACHED) {
529 posix_acl_release(EXT3_I(inode)->i_default_acl);
530 EXT3_I(inode)->i_default_acl = EXT3_ACL_NOT_CACHED;
531 }
532#endif
533 ext3_discard_reservation(inode); 517 ext3_discard_reservation(inode);
534 EXT3_I(inode)->i_block_alloc_info = NULL; 518 EXT3_I(inode)->i_block_alloc_info = NULL;
535 if (unlikely(rsv)) 519 if (unlikely(rsv))
@@ -1812,7 +1796,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1812 printk(KERN_ERR "EXT3-fs: filesystem on %s:" 1796 printk(KERN_ERR "EXT3-fs: filesystem on %s:"
1813 " too large to mount safely\n", sb->s_id); 1797 " too large to mount safely\n", sb->s_id);
1814 if (sizeof(sector_t) < 8) 1798 if (sizeof(sector_t) < 8)
1815 printk(KERN_WARNING "EXT3-fs: CONFIG_LBD not " 1799 printk(KERN_WARNING "EXT3-fs: CONFIG_LBDAF not "
1816 "enabled\n"); 1800 "enabled\n");
1817 goto failed_mount; 1801 goto failed_mount;
1818 } 1802 }
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 8a34710ecf40..8867b2a1e5fe 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT4_FS) += ext4.o
6 6
7ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ 7ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ 8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
9 ext4_jbd2.o migrate.o mballoc.o block_validity.o 9 ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o
10 10
11ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o 11ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
12ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o 12ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 647e0d65a284..f6d8967149ca 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -126,30 +126,6 @@ fail:
126 return ERR_PTR(-EINVAL); 126 return ERR_PTR(-EINVAL);
127} 127}
128 128
129static inline struct posix_acl *
130ext4_iget_acl(struct inode *inode, struct posix_acl **i_acl)
131{
132 struct posix_acl *acl = EXT4_ACL_NOT_CACHED;
133
134 spin_lock(&inode->i_lock);
135 if (*i_acl != EXT4_ACL_NOT_CACHED)
136 acl = posix_acl_dup(*i_acl);
137 spin_unlock(&inode->i_lock);
138
139 return acl;
140}
141
142static inline void
143ext4_iset_acl(struct inode *inode, struct posix_acl **i_acl,
144 struct posix_acl *acl)
145{
146 spin_lock(&inode->i_lock);
147 if (*i_acl != EXT4_ACL_NOT_CACHED)
148 posix_acl_release(*i_acl);
149 *i_acl = posix_acl_dup(acl);
150 spin_unlock(&inode->i_lock);
151}
152
153/* 129/*
154 * Inode operation get_posix_acl(). 130 * Inode operation get_posix_acl().
155 * 131 *
@@ -158,7 +134,6 @@ ext4_iset_acl(struct inode *inode, struct posix_acl **i_acl,
158static struct posix_acl * 134static struct posix_acl *
159ext4_get_acl(struct inode *inode, int type) 135ext4_get_acl(struct inode *inode, int type)
160{ 136{
161 struct ext4_inode_info *ei = EXT4_I(inode);
162 int name_index; 137 int name_index;
163 char *value = NULL; 138 char *value = NULL;
164 struct posix_acl *acl; 139 struct posix_acl *acl;
@@ -167,23 +142,19 @@ ext4_get_acl(struct inode *inode, int type)
167 if (!test_opt(inode->i_sb, POSIX_ACL)) 142 if (!test_opt(inode->i_sb, POSIX_ACL))
168 return NULL; 143 return NULL;
169 144
145 acl = get_cached_acl(inode, type);
146 if (acl != ACL_NOT_CACHED)
147 return acl;
148
170 switch (type) { 149 switch (type) {
171 case ACL_TYPE_ACCESS: 150 case ACL_TYPE_ACCESS:
172 acl = ext4_iget_acl(inode, &ei->i_acl);
173 if (acl != EXT4_ACL_NOT_CACHED)
174 return acl;
175 name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; 151 name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
176 break; 152 break;
177
178 case ACL_TYPE_DEFAULT: 153 case ACL_TYPE_DEFAULT:
179 acl = ext4_iget_acl(inode, &ei->i_default_acl);
180 if (acl != EXT4_ACL_NOT_CACHED)
181 return acl;
182 name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT; 154 name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
183 break; 155 break;
184
185 default: 156 default:
186 return ERR_PTR(-EINVAL); 157 BUG();
187 } 158 }
188 retval = ext4_xattr_get(inode, name_index, "", NULL, 0); 159 retval = ext4_xattr_get(inode, name_index, "", NULL, 0);
189 if (retval > 0) { 160 if (retval > 0) {
@@ -200,17 +171,9 @@ ext4_get_acl(struct inode *inode, int type)
200 acl = ERR_PTR(retval); 171 acl = ERR_PTR(retval);
201 kfree(value); 172 kfree(value);
202 173
203 if (!IS_ERR(acl)) { 174 if (!IS_ERR(acl))
204 switch (type) { 175 set_cached_acl(inode, type, acl);
205 case ACL_TYPE_ACCESS:
206 ext4_iset_acl(inode, &ei->i_acl, acl);
207 break;
208 176
209 case ACL_TYPE_DEFAULT:
210 ext4_iset_acl(inode, &ei->i_default_acl, acl);
211 break;
212 }
213 }
214 return acl; 177 return acl;
215} 178}
216 179
@@ -223,7 +186,6 @@ static int
223ext4_set_acl(handle_t *handle, struct inode *inode, int type, 186ext4_set_acl(handle_t *handle, struct inode *inode, int type,
224 struct posix_acl *acl) 187 struct posix_acl *acl)
225{ 188{
226 struct ext4_inode_info *ei = EXT4_I(inode);
227 int name_index; 189 int name_index;
228 void *value = NULL; 190 void *value = NULL;
229 size_t size = 0; 191 size_t size = 0;
@@ -268,17 +230,9 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
268 value, size, 0); 230 value, size, 0);
269 231
270 kfree(value); 232 kfree(value);
271 if (!error) { 233 if (!error)
272 switch (type) { 234 set_cached_acl(inode, type, acl);
273 case ACL_TYPE_ACCESS:
274 ext4_iset_acl(inode, &ei->i_acl, acl);
275 break;
276 235
277 case ACL_TYPE_DEFAULT:
278 ext4_iset_acl(inode, &ei->i_default_acl, acl);
279 break;
280 }
281 }
282 return error; 236 return error;
283} 237}
284 238
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index cb45257a246e..949789d2bba6 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -53,10 +53,6 @@ static inline int ext4_acl_count(size_t size)
53 53
54#ifdef CONFIG_EXT4_FS_POSIX_ACL 54#ifdef CONFIG_EXT4_FS_POSIX_ACL
55 55
56/* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl
57 if the ACL has not been cached */
58#define EXT4_ACL_NOT_CACHED ((void *)-1)
59
60/* acl.c */ 56/* acl.c */
61extern int ext4_permission(struct inode *, int); 57extern int ext4_permission(struct inode *, int);
62extern int ext4_acl_chmod(struct inode *); 58extern int ext4_acl_chmod(struct inode *);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index cc7d5edc38c9..0ddf7e55abe1 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -352,6 +352,7 @@ struct ext4_new_group_data {
352 /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */ 352 /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */
353 /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ 353 /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
354#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) 354#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12)
355#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent)
355 356
356/* 357/*
357 * ioctl commands in 32 bit emulation 358 * ioctl commands in 32 bit emulation
@@ -447,6 +448,15 @@ struct ext4_inode {
447 __le32 i_version_hi; /* high 32 bits for 64-bit version */ 448 __le32 i_version_hi; /* high 32 bits for 64-bit version */
448}; 449};
449 450
451struct move_extent {
452 __u32 reserved; /* should be zero */
453 __u32 donor_fd; /* donor file descriptor */
454 __u64 orig_start; /* logical start offset in block for orig */
455 __u64 donor_start; /* logical start offset in block for donor */
456 __u64 len; /* block length to be moved */
457 __u64 moved_len; /* moved block length */
458};
459#define MAX_DEFRAG_SIZE ((1UL<<31) - 1)
450 460
451#define EXT4_EPOCH_BITS 2 461#define EXT4_EPOCH_BITS 2
452#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) 462#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
@@ -585,10 +595,6 @@ struct ext4_inode_info {
585 */ 595 */
586 struct rw_semaphore xattr_sem; 596 struct rw_semaphore xattr_sem;
587#endif 597#endif
588#ifdef CONFIG_EXT4_FS_POSIX_ACL
589 struct posix_acl *i_acl;
590 struct posix_acl *i_default_acl;
591#endif
592 598
593 struct list_head i_orphan; /* unlinked but open inodes */ 599 struct list_head i_orphan; /* unlinked but open inodes */
594 600
@@ -674,7 +680,6 @@ struct ext4_inode_info {
674#define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ 680#define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */
675#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ 681#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */
676#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ 682#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
677#define EXT4_MOUNT_ABORT 0x00200 /* Fatal error detected */
678#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ 683#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
679#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */ 684#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */
680#define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */ 685#define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */
@@ -696,17 +701,10 @@ struct ext4_inode_info {
696#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ 701#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
697#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ 702#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
698 703
699/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
700#ifndef _LINUX_EXT2_FS_H
701#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt 704#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
702#define set_opt(o, opt) o |= EXT4_MOUNT_##opt 705#define set_opt(o, opt) o |= EXT4_MOUNT_##opt
703#define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \ 706#define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \
704 EXT4_MOUNT_##opt) 707 EXT4_MOUNT_##opt)
705#else
706#define EXT2_MOUNT_NOLOAD EXT4_MOUNT_NOLOAD
707#define EXT2_MOUNT_ABORT EXT4_MOUNT_ABORT
708#define EXT2_MOUNT_DATA_FLAGS EXT4_MOUNT_DATA_FLAGS
709#endif
710 708
711#define ext4_set_bit ext2_set_bit 709#define ext4_set_bit ext2_set_bit
712#define ext4_set_bit_atomic ext2_set_bit_atomic 710#define ext4_set_bit_atomic ext2_set_bit_atomic
@@ -824,6 +822,13 @@ struct ext4_super_block {
824}; 822};
825 823
826#ifdef __KERNEL__ 824#ifdef __KERNEL__
825
826/*
827 * run-time mount flags
828 */
829#define EXT4_MF_MNTDIR_SAMPLED 0x0001
830#define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */
831
827/* 832/*
828 * fourth extended-fs super-block data in memory 833 * fourth extended-fs super-block data in memory
829 */ 834 */
@@ -842,7 +847,8 @@ struct ext4_sb_info {
842 struct buffer_head * s_sbh; /* Buffer containing the super block */ 847 struct buffer_head * s_sbh; /* Buffer containing the super block */
843 struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ 848 struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
844 struct buffer_head **s_group_desc; 849 struct buffer_head **s_group_desc;
845 unsigned long s_mount_opt; 850 unsigned int s_mount_opt;
851 unsigned int s_mount_flags;
846 ext4_fsblk_t s_sb_block; 852 ext4_fsblk_t s_sb_block;
847 uid_t s_resuid; 853 uid_t s_resuid;
848 gid_t s_resgid; 854 gid_t s_resgid;
@@ -853,6 +859,7 @@ struct ext4_sb_info {
853 int s_inode_size; 859 int s_inode_size;
854 int s_first_ino; 860 int s_first_ino;
855 unsigned int s_inode_readahead_blks; 861 unsigned int s_inode_readahead_blks;
862 unsigned int s_inode_goal;
856 spinlock_t s_next_gen_lock; 863 spinlock_t s_next_gen_lock;
857 u32 s_next_generation; 864 u32 s_next_generation;
858 u32 s_hash_seed[4]; 865 u32 s_hash_seed[4];
@@ -1305,7 +1312,8 @@ extern int ext4fs_dirhash(const char *name, int len, struct
1305 dx_hash_info *hinfo); 1312 dx_hash_info *hinfo);
1306 1313
1307/* ialloc.c */ 1314/* ialloc.c */
1308extern struct inode * ext4_new_inode(handle_t *, struct inode *, int); 1315extern struct inode *ext4_new_inode(handle_t *, struct inode *, int,
1316 const struct qstr *qstr, __u32 goal);
1309extern void ext4_free_inode(handle_t *, struct inode *); 1317extern void ext4_free_inode(handle_t *, struct inode *);
1310extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); 1318extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
1311extern unsigned long ext4_count_free_inodes(struct super_block *); 1319extern unsigned long ext4_count_free_inodes(struct super_block *);
@@ -1329,7 +1337,7 @@ extern void ext4_discard_preallocations(struct inode *);
1329extern int __init init_ext4_mballoc(void); 1337extern int __init init_ext4_mballoc(void);
1330extern void exit_ext4_mballoc(void); 1338extern void exit_ext4_mballoc(void);
1331extern void ext4_mb_free_blocks(handle_t *, struct inode *, 1339extern void ext4_mb_free_blocks(handle_t *, struct inode *,
1332 unsigned long, unsigned long, int, unsigned long *); 1340 ext4_fsblk_t, unsigned long, int, unsigned long *);
1333extern int ext4_mb_add_groupinfo(struct super_block *sb, 1341extern int ext4_mb_add_groupinfo(struct super_block *sb,
1334 ext4_group_t i, struct ext4_group_desc *desc); 1342 ext4_group_t i, struct ext4_group_desc *desc);
1335extern void ext4_mb_update_group_info(struct ext4_group_info *grp, 1343extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
@@ -1647,6 +1655,11 @@ extern int ext4_get_blocks(handle_t *handle, struct inode *inode,
1647 struct buffer_head *bh, int flags); 1655 struct buffer_head *bh, int flags);
1648extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 1656extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1649 __u64 start, __u64 len); 1657 __u64 start, __u64 len);
1658/* move_extent.c */
1659extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
1660 __u64 start_orig, __u64 start_donor,
1661 __u64 len, __u64 *moved_len);
1662
1650 1663
1651/* 1664/*
1652 * Add new method to test wether block and inode bitmaps are properly 1665 * Add new method to test wether block and inode bitmaps are properly
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index f0c3ec85bd48..20a84105a10b 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -221,12 +221,16 @@ static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
221} 221}
222 222
223extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); 223extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks);
224extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
224extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); 225extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
225extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); 226extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
226extern int ext4_extent_tree_init(handle_t *, struct inode *); 227extern int ext4_extent_tree_init(handle_t *, struct inode *);
227extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, 228extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
228 int num, 229 int num,
229 struct ext4_ext_path *path); 230 struct ext4_ext_path *path);
231extern int ext4_can_extents_be_merged(struct inode *inode,
232 struct ext4_extent *ex1,
233 struct ext4_extent *ex2);
230extern int ext4_ext_try_to_merge(struct inode *inode, 234extern int ext4_ext_try_to_merge(struct inode *inode,
231 struct ext4_ext_path *path, 235 struct ext4_ext_path *path,
232 struct ext4_extent *); 236 struct ext4_extent *);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 2593f748c3a4..50322a09bd01 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -49,7 +49,7 @@
49 * ext_pblock: 49 * ext_pblock:
50 * combine low and high parts of physical block number into ext4_fsblk_t 50 * combine low and high parts of physical block number into ext4_fsblk_t
51 */ 51 */
52static ext4_fsblk_t ext_pblock(struct ext4_extent *ex) 52ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
53{ 53{
54 ext4_fsblk_t block; 54 ext4_fsblk_t block;
55 55
@@ -1417,7 +1417,7 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
1417 return err; 1417 return err;
1418} 1418}
1419 1419
1420static int 1420int
1421ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, 1421ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
1422 struct ext4_extent *ex2) 1422 struct ext4_extent *ex2)
1423{ 1423{
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 588af8c77246..3f1873fef1c6 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -21,6 +21,8 @@
21#include <linux/time.h> 21#include <linux/time.h>
22#include <linux/fs.h> 22#include <linux/fs.h>
23#include <linux/jbd2.h> 23#include <linux/jbd2.h>
24#include <linux/mount.h>
25#include <linux/path.h>
24#include "ext4.h" 26#include "ext4.h"
25#include "ext4_jbd2.h" 27#include "ext4_jbd2.h"
26#include "xattr.h" 28#include "xattr.h"
@@ -145,6 +147,38 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
145 return 0; 147 return 0;
146} 148}
147 149
150static int ext4_file_open(struct inode * inode, struct file * filp)
151{
152 struct super_block *sb = inode->i_sb;
153 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
154 struct vfsmount *mnt = filp->f_path.mnt;
155 struct path path;
156 char buf[64], *cp;
157
158 if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) &&
159 !(sb->s_flags & MS_RDONLY))) {
160 sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED;
161 /*
162 * Sample where the filesystem has been mounted and
163 * store it in the superblock for sysadmin convenience
164 * when trying to sort through large numbers of block
165 * devices or filesystem images.
166 */
167 memset(buf, 0, sizeof(buf));
168 path.mnt = mnt->mnt_parent;
169 path.dentry = mnt->mnt_mountpoint;
170 path_get(&path);
171 cp = d_path(&path, buf, sizeof(buf));
172 path_put(&path);
173 if (!IS_ERR(cp)) {
174 memcpy(sbi->s_es->s_last_mounted, cp,
175 sizeof(sbi->s_es->s_last_mounted));
176 sb->s_dirt = 1;
177 }
178 }
179 return generic_file_open(inode, filp);
180}
181
148const struct file_operations ext4_file_operations = { 182const struct file_operations ext4_file_operations = {
149 .llseek = generic_file_llseek, 183 .llseek = generic_file_llseek,
150 .read = do_sync_read, 184 .read = do_sync_read,
@@ -156,7 +190,7 @@ const struct file_operations ext4_file_operations = {
156 .compat_ioctl = ext4_compat_ioctl, 190 .compat_ioctl = ext4_compat_ioctl,
157#endif 191#endif
158 .mmap = ext4_file_mmap, 192 .mmap = ext4_file_mmap,
159 .open = generic_file_open, 193 .open = ext4_file_open,
160 .release = ext4_release_file, 194 .release = ext4_release_file,
161 .fsync = ext4_sync_file, 195 .fsync = ext4_sync_file,
162 .splice_read = generic_file_splice_read, 196 .splice_read = generic_file_splice_read,
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 5afe4370840b..83cf6415f599 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -28,10 +28,12 @@
28#include <linux/writeback.h> 28#include <linux/writeback.h>
29#include <linux/jbd2.h> 29#include <linux/jbd2.h>
30#include <linux/blkdev.h> 30#include <linux/blkdev.h>
31#include <linux/marker.h> 31
32#include "ext4.h" 32#include "ext4.h"
33#include "ext4_jbd2.h" 33#include "ext4_jbd2.h"
34 34
35#include <trace/events/ext4.h>
36
35/* 37/*
36 * akpm: A new design for ext4_sync_file(). 38 * akpm: A new design for ext4_sync_file().
37 * 39 *
@@ -52,9 +54,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
52 54
53 J_ASSERT(ext4_journal_current_handle() == NULL); 55 J_ASSERT(ext4_journal_current_handle() == NULL);
54 56
55 trace_mark(ext4_sync_file, "dev %s datasync %d ino %ld parent %ld", 57 trace_ext4_sync_file(file, dentry, datasync);
56 inode->i_sb->s_id, datasync, inode->i_ino,
57 dentry->d_parent->d_inode->i_ino);
58 58
59 /* 59 /*
60 * data=writeback: 60 * data=writeback:
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 3743bd849bce..2f645732e3b7 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -23,11 +23,14 @@
23#include <linux/bitops.h> 23#include <linux/bitops.h>
24#include <linux/blkdev.h> 24#include <linux/blkdev.h>
25#include <asm/byteorder.h> 25#include <asm/byteorder.h>
26
26#include "ext4.h" 27#include "ext4.h"
27#include "ext4_jbd2.h" 28#include "ext4_jbd2.h"
28#include "xattr.h" 29#include "xattr.h"
29#include "acl.h" 30#include "acl.h"
30 31
32#include <trace/events/ext4.h>
33
31/* 34/*
32 * ialloc.c contains the inodes allocation and deallocation routines 35 * ialloc.c contains the inodes allocation and deallocation routines
33 */ 36 */
@@ -208,11 +211,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
208 211
209 ino = inode->i_ino; 212 ino = inode->i_ino;
210 ext4_debug("freeing inode %lu\n", ino); 213 ext4_debug("freeing inode %lu\n", ino);
211 trace_mark(ext4_free_inode, 214 trace_ext4_free_inode(inode);
212 "dev %s ino %lu mode %d uid %lu gid %lu bocks %llu",
213 sb->s_id, inode->i_ino, inode->i_mode,
214 (unsigned long) inode->i_uid, (unsigned long) inode->i_gid,
215 (unsigned long long) inode->i_blocks);
216 215
217 /* 216 /*
218 * Note: we must free any quota before locking the superblock, 217 * Note: we must free any quota before locking the superblock,
@@ -471,7 +470,8 @@ void get_orlov_stats(struct super_block *sb, ext4_group_t g,
471 */ 470 */
472 471
473static int find_group_orlov(struct super_block *sb, struct inode *parent, 472static int find_group_orlov(struct super_block *sb, struct inode *parent,
474 ext4_group_t *group, int mode) 473 ext4_group_t *group, int mode,
474 const struct qstr *qstr)
475{ 475{
476 ext4_group_t parent_group = EXT4_I(parent)->i_block_group; 476 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
477 struct ext4_sb_info *sbi = EXT4_SB(sb); 477 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -486,6 +486,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
486 struct ext4_group_desc *desc; 486 struct ext4_group_desc *desc;
487 struct orlov_stats stats; 487 struct orlov_stats stats;
488 int flex_size = ext4_flex_bg_size(sbi); 488 int flex_size = ext4_flex_bg_size(sbi);
489 struct dx_hash_info hinfo;
489 490
490 ngroups = real_ngroups; 491 ngroups = real_ngroups;
491 if (flex_size > 1) { 492 if (flex_size > 1) {
@@ -507,7 +508,13 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
507 int best_ndir = inodes_per_group; 508 int best_ndir = inodes_per_group;
508 int ret = -1; 509 int ret = -1;
509 510
510 get_random_bytes(&grp, sizeof(grp)); 511 if (qstr) {
512 hinfo.hash_version = DX_HASH_HALF_MD4;
513 hinfo.seed = sbi->s_hash_seed;
514 ext4fs_dirhash(qstr->name, qstr->len, &hinfo);
515 grp = hinfo.hash;
516 } else
517 get_random_bytes(&grp, sizeof(grp));
511 parent_group = (unsigned)grp % ngroups; 518 parent_group = (unsigned)grp % ngroups;
512 for (i = 0; i < ngroups; i++) { 519 for (i = 0; i < ngroups; i++) {
513 g = (parent_group + i) % ngroups; 520 g = (parent_group + i) % ngroups;
@@ -650,7 +657,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
650 *group = parent_group + flex_size; 657 *group = parent_group + flex_size;
651 if (*group > ngroups) 658 if (*group > ngroups)
652 *group = 0; 659 *group = 0;
653 return find_group_orlov(sb, parent, group, mode); 660 return find_group_orlov(sb, parent, group, mode, 0);
654 } 661 }
655 662
656 /* 663 /*
@@ -791,7 +798,8 @@ err_ret:
791 * For other inodes, search forward from the parent directory's block 798 * For other inodes, search forward from the parent directory's block
792 * group to find a free inode. 799 * group to find a free inode.
793 */ 800 */
794struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) 801struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
802 const struct qstr *qstr, __u32 goal)
795{ 803{
796 struct super_block *sb; 804 struct super_block *sb;
797 struct buffer_head *inode_bitmap_bh = NULL; 805 struct buffer_head *inode_bitmap_bh = NULL;
@@ -815,14 +823,23 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
815 823
816 sb = dir->i_sb; 824 sb = dir->i_sb;
817 ngroups = ext4_get_groups_count(sb); 825 ngroups = ext4_get_groups_count(sb);
818 trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id, 826 trace_ext4_request_inode(dir, mode);
819 dir->i_ino, mode);
820 inode = new_inode(sb); 827 inode = new_inode(sb);
821 if (!inode) 828 if (!inode)
822 return ERR_PTR(-ENOMEM); 829 return ERR_PTR(-ENOMEM);
823 ei = EXT4_I(inode); 830 ei = EXT4_I(inode);
824 sbi = EXT4_SB(sb); 831 sbi = EXT4_SB(sb);
825 832
833 if (!goal)
834 goal = sbi->s_inode_goal;
835
836 if (goal && goal < le32_to_cpu(sbi->s_es->s_inodes_count)) {
837 group = (goal - 1) / EXT4_INODES_PER_GROUP(sb);
838 ino = (goal - 1) % EXT4_INODES_PER_GROUP(sb);
839 ret2 = 0;
840 goto got_group;
841 }
842
826 if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { 843 if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) {
827 ret2 = find_group_flex(sb, dir, &group); 844 ret2 = find_group_flex(sb, dir, &group);
828 if (ret2 == -1) { 845 if (ret2 == -1) {
@@ -841,7 +858,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
841 if (test_opt(sb, OLDALLOC)) 858 if (test_opt(sb, OLDALLOC))
842 ret2 = find_group_dir(sb, dir, &group); 859 ret2 = find_group_dir(sb, dir, &group);
843 else 860 else
844 ret2 = find_group_orlov(sb, dir, &group, mode); 861 ret2 = find_group_orlov(sb, dir, &group, mode, qstr);
845 } else 862 } else
846 ret2 = find_group_other(sb, dir, &group, mode); 863 ret2 = find_group_other(sb, dir, &group, mode);
847 864
@@ -851,7 +868,7 @@ got_group:
851 if (ret2 == -1) 868 if (ret2 == -1)
852 goto out; 869 goto out;
853 870
854 for (i = 0; i < ngroups; i++) { 871 for (i = 0; i < ngroups; i++, ino = 0) {
855 err = -EIO; 872 err = -EIO;
856 873
857 gdp = ext4_get_group_desc(sb, group, &group_desc_bh); 874 gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
@@ -863,8 +880,6 @@ got_group:
863 if (!inode_bitmap_bh) 880 if (!inode_bitmap_bh)
864 goto fail; 881 goto fail;
865 882
866 ino = 0;
867
868repeat_in_this_group: 883repeat_in_this_group:
869 ino = ext4_find_next_zero_bit((unsigned long *) 884 ino = ext4_find_next_zero_bit((unsigned long *)
870 inode_bitmap_bh->b_data, 885 inode_bitmap_bh->b_data,
@@ -1047,8 +1062,7 @@ got:
1047 } 1062 }
1048 1063
1049 ext4_debug("allocating inode %lu\n", inode->i_ino); 1064 ext4_debug("allocating inode %lu\n", inode->i_ino);
1050 trace_mark(ext4_allocate_inode, "dev %s ino %lu dir %lu mode %d", 1065 trace_ext4_allocate_inode(inode, dir, mode);
1051 sb->s_id, inode->i_ino, dir->i_ino, mode);
1052 goto really_out; 1066 goto really_out;
1053fail: 1067fail:
1054 ext4_std_error(sb, err); 1068 ext4_std_error(sb, err);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 875db944b22f..60a26f3a6f8b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -37,11 +37,14 @@
37#include <linux/namei.h> 37#include <linux/namei.h>
38#include <linux/uio.h> 38#include <linux/uio.h>
39#include <linux/bio.h> 39#include <linux/bio.h>
40
40#include "ext4_jbd2.h" 41#include "ext4_jbd2.h"
41#include "xattr.h" 42#include "xattr.h"
42#include "acl.h" 43#include "acl.h"
43#include "ext4_extents.h" 44#include "ext4_extents.h"
44 45
46#include <trace/events/ext4.h>
47
45#define MPAGE_DA_EXTENT_TAIL 0x01 48#define MPAGE_DA_EXTENT_TAIL 0x01
46 49
47static inline int ext4_begin_ordered_truncate(struct inode *inode, 50static inline int ext4_begin_ordered_truncate(struct inode *inode,
@@ -78,7 +81,7 @@ static int ext4_inode_is_fast_symlink(struct inode *inode)
78 * If the handle isn't valid we're not journaling so there's nothing to do. 81 * If the handle isn't valid we're not journaling so there's nothing to do.
79 */ 82 */
80int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, 83int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
81 struct buffer_head *bh, ext4_fsblk_t blocknr) 84 struct buffer_head *bh, ext4_fsblk_t blocknr)
82{ 85{
83 int err; 86 int err;
84 87
@@ -90,7 +93,7 @@ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
90 BUFFER_TRACE(bh, "enter"); 93 BUFFER_TRACE(bh, "enter");
91 94
92 jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " 95 jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
93 "data mode %lx\n", 96 "data mode %x\n",
94 bh, is_metadata, inode->i_mode, 97 bh, is_metadata, inode->i_mode,
95 test_opt(inode->i_sb, DATA_FLAGS)); 98 test_opt(inode->i_sb, DATA_FLAGS));
96 99
@@ -329,8 +332,8 @@ static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
329 */ 332 */
330 333
331static int ext4_block_to_path(struct inode *inode, 334static int ext4_block_to_path(struct inode *inode,
332 ext4_lblk_t i_block, 335 ext4_lblk_t i_block,
333 ext4_lblk_t offsets[4], int *boundary) 336 ext4_lblk_t offsets[4], int *boundary)
334{ 337{
335 int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb); 338 int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb);
336 int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb); 339 int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb);
@@ -362,9 +365,9 @@ static int ext4_block_to_path(struct inode *inode,
362 final = ptrs; 365 final = ptrs;
363 } else { 366 } else {
364 ext4_warning(inode->i_sb, "ext4_block_to_path", 367 ext4_warning(inode->i_sb, "ext4_block_to_path",
365 "block %lu > max in inode %lu", 368 "block %lu > max in inode %lu",
366 i_block + direct_blocks + 369 i_block + direct_blocks +
367 indirect_blocks + double_blocks, inode->i_ino); 370 indirect_blocks + double_blocks, inode->i_ino);
368 } 371 }
369 if (boundary) 372 if (boundary)
370 *boundary = final - 1 - (i_block & (ptrs - 1)); 373 *boundary = final - 1 - (i_block & (ptrs - 1));
@@ -379,25 +382,25 @@ static int __ext4_check_blockref(const char *function, struct inode *inode,
379 382
380 while (bref < p+max) { 383 while (bref < p+max) {
381 blk = le32_to_cpu(*bref++); 384 blk = le32_to_cpu(*bref++);
382 if (blk && 385 if (blk &&
383 unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), 386 unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
384 blk, 1))) { 387 blk, 1))) {
385 ext4_error(inode->i_sb, function, 388 ext4_error(inode->i_sb, function,
386 "invalid block reference %u " 389 "invalid block reference %u "
387 "in inode #%lu", blk, inode->i_ino); 390 "in inode #%lu", blk, inode->i_ino);
388 return -EIO; 391 return -EIO;
389 } 392 }
390 } 393 }
391 return 0; 394 return 0;
392} 395}
393 396
394 397
395#define ext4_check_indirect_blockref(inode, bh) \ 398#define ext4_check_indirect_blockref(inode, bh) \
396 __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data, \ 399 __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data, \
397 EXT4_ADDR_PER_BLOCK((inode)->i_sb)) 400 EXT4_ADDR_PER_BLOCK((inode)->i_sb))
398 401
399#define ext4_check_inode_blockref(inode) \ 402#define ext4_check_inode_blockref(inode) \
400 __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data, \ 403 __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data, \
401 EXT4_NDIR_BLOCKS) 404 EXT4_NDIR_BLOCKS)
402 405
403/** 406/**
@@ -447,7 +450,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
447 bh = sb_getblk(sb, le32_to_cpu(p->key)); 450 bh = sb_getblk(sb, le32_to_cpu(p->key));
448 if (unlikely(!bh)) 451 if (unlikely(!bh))
449 goto failure; 452 goto failure;
450 453
451 if (!bh_uptodate_or_lock(bh)) { 454 if (!bh_uptodate_or_lock(bh)) {
452 if (bh_submit_read(bh) < 0) { 455 if (bh_submit_read(bh) < 0) {
453 put_bh(bh); 456 put_bh(bh);
@@ -459,7 +462,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
459 goto failure; 462 goto failure;
460 } 463 }
461 } 464 }
462 465
463 add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); 466 add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets);
464 /* Reader: end */ 467 /* Reader: end */
465 if (!p->key) 468 if (!p->key)
@@ -552,7 +555,7 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
552 * returns it. 555 * returns it.
553 */ 556 */
554static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, 557static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
555 Indirect *partial) 558 Indirect *partial)
556{ 559{
557 /* 560 /*
558 * XXX need to get goal block from mballoc's data structures 561 * XXX need to get goal block from mballoc's data structures
@@ -574,7 +577,7 @@ static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
574 * direct and indirect blocks. 577 * direct and indirect blocks.
575 */ 578 */
576static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, 579static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
577 int blocks_to_boundary) 580 int blocks_to_boundary)
578{ 581{
579 unsigned int count = 0; 582 unsigned int count = 0;
580 583
@@ -610,9 +613,9 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
610 * direct blocks 613 * direct blocks
611 */ 614 */
612static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, 615static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
613 ext4_lblk_t iblock, ext4_fsblk_t goal, 616 ext4_lblk_t iblock, ext4_fsblk_t goal,
614 int indirect_blks, int blks, 617 int indirect_blks, int blks,
615 ext4_fsblk_t new_blocks[4], int *err) 618 ext4_fsblk_t new_blocks[4], int *err)
616{ 619{
617 struct ext4_allocation_request ar; 620 struct ext4_allocation_request ar;
618 int target, i; 621 int target, i;
@@ -683,10 +686,10 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
683 } 686 }
684 if (!*err) { 687 if (!*err) {
685 if (target == blks) { 688 if (target == blks) {
686 /* 689 /*
687 * save the new block number 690 * save the new block number
688 * for the first direct block 691 * for the first direct block
689 */ 692 */
690 new_blocks[index] = current_block; 693 new_blocks[index] = current_block;
691 } 694 }
692 blk_allocated += ar.len; 695 blk_allocated += ar.len;
@@ -728,9 +731,9 @@ failed_out:
728 * as described above and return 0. 731 * as described above and return 0.
729 */ 732 */
730static int ext4_alloc_branch(handle_t *handle, struct inode *inode, 733static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
731 ext4_lblk_t iblock, int indirect_blks, 734 ext4_lblk_t iblock, int indirect_blks,
732 int *blks, ext4_fsblk_t goal, 735 int *blks, ext4_fsblk_t goal,
733 ext4_lblk_t *offsets, Indirect *branch) 736 ext4_lblk_t *offsets, Indirect *branch)
734{ 737{
735 int blocksize = inode->i_sb->s_blocksize; 738 int blocksize = inode->i_sb->s_blocksize;
736 int i, n = 0; 739 int i, n = 0;
@@ -777,7 +780,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
777 * the chain to point to the new allocated 780 * the chain to point to the new allocated
778 * data blocks numbers 781 * data blocks numbers
779 */ 782 */
780 for (i=1; i < num; i++) 783 for (i = 1; i < num; i++)
781 *(branch[n].p + i) = cpu_to_le32(++current_block); 784 *(branch[n].p + i) = cpu_to_le32(++current_block);
782 } 785 }
783 BUFFER_TRACE(bh, "marking uptodate"); 786 BUFFER_TRACE(bh, "marking uptodate");
@@ -820,7 +823,8 @@ failed:
820 * chain to new block and return 0. 823 * chain to new block and return 0.
821 */ 824 */
822static int ext4_splice_branch(handle_t *handle, struct inode *inode, 825static int ext4_splice_branch(handle_t *handle, struct inode *inode,
823 ext4_lblk_t block, Indirect *where, int num, int blks) 826 ext4_lblk_t block, Indirect *where, int num,
827 int blks)
824{ 828{
825 int i; 829 int i;
826 int err = 0; 830 int err = 0;
@@ -852,10 +856,6 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
852 } 856 }
853 857
854 /* We are done with atomic stuff, now do the rest of housekeeping */ 858 /* We are done with atomic stuff, now do the rest of housekeeping */
855
856 inode->i_ctime = ext4_current_time(inode);
857 ext4_mark_inode_dirty(handle, inode);
858
859 /* had we spliced it onto indirect block? */ 859 /* had we spliced it onto indirect block? */
860 if (where->bh) { 860 if (where->bh) {
861 /* 861 /*
@@ -874,8 +874,8 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
874 } else { 874 } else {
875 /* 875 /*
876 * OK, we spliced it into the inode itself on a direct block. 876 * OK, we spliced it into the inode itself on a direct block.
877 * Inode was dirtied above.
878 */ 877 */
878 ext4_mark_inode_dirty(handle, inode);
879 jbd_debug(5, "splicing direct\n"); 879 jbd_debug(5, "splicing direct\n");
880 } 880 }
881 return err; 881 return err;
@@ -921,9 +921,9 @@ err_out:
921 * blocks. 921 * blocks.
922 */ 922 */
923static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, 923static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
924 ext4_lblk_t iblock, unsigned int maxblocks, 924 ext4_lblk_t iblock, unsigned int maxblocks,
925 struct buffer_head *bh_result, 925 struct buffer_head *bh_result,
926 int flags) 926 int flags)
927{ 927{
928 int err = -EIO; 928 int err = -EIO;
929 ext4_lblk_t offsets[4]; 929 ext4_lblk_t offsets[4];
@@ -939,7 +939,7 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
939 J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); 939 J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL));
940 J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); 940 J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
941 depth = ext4_block_to_path(inode, iblock, offsets, 941 depth = ext4_block_to_path(inode, iblock, offsets,
942 &blocks_to_boundary); 942 &blocks_to_boundary);
943 943
944 if (depth == 0) 944 if (depth == 0)
945 goto out; 945 goto out;
@@ -987,8 +987,8 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
987 * Block out ext4_truncate while we alter the tree 987 * Block out ext4_truncate while we alter the tree
988 */ 988 */
989 err = ext4_alloc_branch(handle, inode, iblock, indirect_blks, 989 err = ext4_alloc_branch(handle, inode, iblock, indirect_blks,
990 &count, goal, 990 &count, goal,
991 offsets + (partial - chain), partial); 991 offsets + (partial - chain), partial);
992 992
993 /* 993 /*
994 * The ext4_splice_branch call will free and forget any buffers 994 * The ext4_splice_branch call will free and forget any buffers
@@ -999,8 +999,8 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
999 */ 999 */
1000 if (!err) 1000 if (!err)
1001 err = ext4_splice_branch(handle, inode, iblock, 1001 err = ext4_splice_branch(handle, inode, iblock,
1002 partial, indirect_blks, count); 1002 partial, indirect_blks, count);
1003 else 1003 else
1004 goto cleanup; 1004 goto cleanup;
1005 1005
1006 set_buffer_new(bh_result); 1006 set_buffer_new(bh_result);
@@ -1172,7 +1172,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
1172 up_read((&EXT4_I(inode)->i_data_sem)); 1172 up_read((&EXT4_I(inode)->i_data_sem));
1173 1173
1174 if (retval > 0 && buffer_mapped(bh)) { 1174 if (retval > 0 && buffer_mapped(bh)) {
1175 int ret = check_block_validity(inode, block, 1175 int ret = check_block_validity(inode, block,
1176 bh->b_blocknr, retval); 1176 bh->b_blocknr, retval);
1177 if (ret != 0) 1177 if (ret != 0)
1178 return ret; 1178 return ret;
@@ -1254,7 +1254,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
1254 1254
1255 up_write((&EXT4_I(inode)->i_data_sem)); 1255 up_write((&EXT4_I(inode)->i_data_sem));
1256 if (retval > 0 && buffer_mapped(bh)) { 1256 if (retval > 0 && buffer_mapped(bh)) {
1257 int ret = check_block_validity(inode, block, 1257 int ret = check_block_validity(inode, block,
1258 bh->b_blocknr, retval); 1258 bh->b_blocknr, retval);
1259 if (ret != 0) 1259 if (ret != 0)
1260 return ret; 1260 return ret;
@@ -1405,8 +1405,7 @@ static int walk_page_buffers(handle_t *handle,
1405 1405
1406 for (bh = head, block_start = 0; 1406 for (bh = head, block_start = 0;
1407 ret == 0 && (bh != head || !block_start); 1407 ret == 0 && (bh != head || !block_start);
1408 block_start = block_end, bh = next) 1408 block_start = block_end, bh = next) {
1409 {
1410 next = bh->b_this_page; 1409 next = bh->b_this_page;
1411 block_end = block_start + blocksize; 1410 block_end = block_start + blocksize;
1412 if (block_end <= from || block_start >= to) { 1411 if (block_end <= from || block_start >= to) {
@@ -1447,7 +1446,7 @@ static int walk_page_buffers(handle_t *handle,
1447 * write. 1446 * write.
1448 */ 1447 */
1449static int do_journal_get_write_access(handle_t *handle, 1448static int do_journal_get_write_access(handle_t *handle,
1450 struct buffer_head *bh) 1449 struct buffer_head *bh)
1451{ 1450{
1452 if (!buffer_mapped(bh) || buffer_freed(bh)) 1451 if (!buffer_mapped(bh) || buffer_freed(bh))
1453 return 0; 1452 return 0;
@@ -1455,27 +1454,24 @@ static int do_journal_get_write_access(handle_t *handle,
1455} 1454}
1456 1455
1457static int ext4_write_begin(struct file *file, struct address_space *mapping, 1456static int ext4_write_begin(struct file *file, struct address_space *mapping,
1458 loff_t pos, unsigned len, unsigned flags, 1457 loff_t pos, unsigned len, unsigned flags,
1459 struct page **pagep, void **fsdata) 1458 struct page **pagep, void **fsdata)
1460{ 1459{
1461 struct inode *inode = mapping->host; 1460 struct inode *inode = mapping->host;
1462 int ret, needed_blocks; 1461 int ret, needed_blocks;
1463 handle_t *handle; 1462 handle_t *handle;
1464 int retries = 0; 1463 int retries = 0;
1465 struct page *page; 1464 struct page *page;
1466 pgoff_t index; 1465 pgoff_t index;
1467 unsigned from, to; 1466 unsigned from, to;
1468 1467
1469 trace_mark(ext4_write_begin, 1468 trace_ext4_write_begin(inode, pos, len, flags);
1470 "dev %s ino %lu pos %llu len %u flags %u",
1471 inode->i_sb->s_id, inode->i_ino,
1472 (unsigned long long) pos, len, flags);
1473 /* 1469 /*
1474 * Reserve one block more for addition to orphan list in case 1470 * Reserve one block more for addition to orphan list in case
1475 * we allocate blocks but write fails for some reason 1471 * we allocate blocks but write fails for some reason
1476 */ 1472 */
1477 needed_blocks = ext4_writepage_trans_blocks(inode) + 1; 1473 needed_blocks = ext4_writepage_trans_blocks(inode) + 1;
1478 index = pos >> PAGE_CACHE_SHIFT; 1474 index = pos >> PAGE_CACHE_SHIFT;
1479 from = pos & (PAGE_CACHE_SIZE - 1); 1475 from = pos & (PAGE_CACHE_SIZE - 1);
1480 to = from + len; 1476 to = from + len;
1481 1477
@@ -1523,7 +1519,7 @@ retry:
1523 ext4_journal_stop(handle); 1519 ext4_journal_stop(handle);
1524 if (pos + len > inode->i_size) { 1520 if (pos + len > inode->i_size) {
1525 vmtruncate(inode, inode->i_size); 1521 vmtruncate(inode, inode->i_size);
1526 /* 1522 /*
1527 * If vmtruncate failed early the inode might 1523 * If vmtruncate failed early the inode might
1528 * still be on the orphan list; we need to 1524 * still be on the orphan list; we need to
1529 * make sure the inode is removed from the 1525 * make sure the inode is removed from the
@@ -1550,9 +1546,9 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh)
1550} 1546}
1551 1547
1552static int ext4_generic_write_end(struct file *file, 1548static int ext4_generic_write_end(struct file *file,
1553 struct address_space *mapping, 1549 struct address_space *mapping,
1554 loff_t pos, unsigned len, unsigned copied, 1550 loff_t pos, unsigned len, unsigned copied,
1555 struct page *page, void *fsdata) 1551 struct page *page, void *fsdata)
1556{ 1552{
1557 int i_size_changed = 0; 1553 int i_size_changed = 0;
1558 struct inode *inode = mapping->host; 1554 struct inode *inode = mapping->host;
@@ -1603,18 +1599,15 @@ static int ext4_generic_write_end(struct file *file,
1603 * buffers are managed internally. 1599 * buffers are managed internally.
1604 */ 1600 */
1605static int ext4_ordered_write_end(struct file *file, 1601static int ext4_ordered_write_end(struct file *file,
1606 struct address_space *mapping, 1602 struct address_space *mapping,
1607 loff_t pos, unsigned len, unsigned copied, 1603 loff_t pos, unsigned len, unsigned copied,
1608 struct page *page, void *fsdata) 1604 struct page *page, void *fsdata)
1609{ 1605{
1610 handle_t *handle = ext4_journal_current_handle(); 1606 handle_t *handle = ext4_journal_current_handle();
1611 struct inode *inode = mapping->host; 1607 struct inode *inode = mapping->host;
1612 int ret = 0, ret2; 1608 int ret = 0, ret2;
1613 1609
1614 trace_mark(ext4_ordered_write_end, 1610 trace_ext4_ordered_write_end(inode, pos, len, copied);
1615 "dev %s ino %lu pos %llu len %u copied %u",
1616 inode->i_sb->s_id, inode->i_ino,
1617 (unsigned long long) pos, len, copied);
1618 ret = ext4_jbd2_file_inode(handle, inode); 1611 ret = ext4_jbd2_file_inode(handle, inode);
1619 1612
1620 if (ret == 0) { 1613 if (ret == 0) {
@@ -1636,7 +1629,7 @@ static int ext4_ordered_write_end(struct file *file,
1636 1629
1637 if (pos + len > inode->i_size) { 1630 if (pos + len > inode->i_size) {
1638 vmtruncate(inode, inode->i_size); 1631 vmtruncate(inode, inode->i_size);
1639 /* 1632 /*
1640 * If vmtruncate failed early the inode might still be 1633 * If vmtruncate failed early the inode might still be
1641 * on the orphan list; we need to make sure the inode 1634 * on the orphan list; we need to make sure the inode
1642 * is removed from the orphan list in that case. 1635 * is removed from the orphan list in that case.
@@ -1650,18 +1643,15 @@ static int ext4_ordered_write_end(struct file *file,
1650} 1643}
1651 1644
1652static int ext4_writeback_write_end(struct file *file, 1645static int ext4_writeback_write_end(struct file *file,
1653 struct address_space *mapping, 1646 struct address_space *mapping,
1654 loff_t pos, unsigned len, unsigned copied, 1647 loff_t pos, unsigned len, unsigned copied,
1655 struct page *page, void *fsdata) 1648 struct page *page, void *fsdata)
1656{ 1649{
1657 handle_t *handle = ext4_journal_current_handle(); 1650 handle_t *handle = ext4_journal_current_handle();
1658 struct inode *inode = mapping->host; 1651 struct inode *inode = mapping->host;
1659 int ret = 0, ret2; 1652 int ret = 0, ret2;
1660 1653
1661 trace_mark(ext4_writeback_write_end, 1654 trace_ext4_writeback_write_end(inode, pos, len, copied);
1662 "dev %s ino %lu pos %llu len %u copied %u",
1663 inode->i_sb->s_id, inode->i_ino,
1664 (unsigned long long) pos, len, copied);
1665 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, 1655 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
1666 page, fsdata); 1656 page, fsdata);
1667 copied = ret2; 1657 copied = ret2;
@@ -1681,7 +1671,7 @@ static int ext4_writeback_write_end(struct file *file,
1681 1671
1682 if (pos + len > inode->i_size) { 1672 if (pos + len > inode->i_size) {
1683 vmtruncate(inode, inode->i_size); 1673 vmtruncate(inode, inode->i_size);
1684 /* 1674 /*
1685 * If vmtruncate failed early the inode might still be 1675 * If vmtruncate failed early the inode might still be
1686 * on the orphan list; we need to make sure the inode 1676 * on the orphan list; we need to make sure the inode
1687 * is removed from the orphan list in that case. 1677 * is removed from the orphan list in that case.
@@ -1694,9 +1684,9 @@ static int ext4_writeback_write_end(struct file *file,
1694} 1684}
1695 1685
1696static int ext4_journalled_write_end(struct file *file, 1686static int ext4_journalled_write_end(struct file *file,
1697 struct address_space *mapping, 1687 struct address_space *mapping,
1698 loff_t pos, unsigned len, unsigned copied, 1688 loff_t pos, unsigned len, unsigned copied,
1699 struct page *page, void *fsdata) 1689 struct page *page, void *fsdata)
1700{ 1690{
1701 handle_t *handle = ext4_journal_current_handle(); 1691 handle_t *handle = ext4_journal_current_handle();
1702 struct inode *inode = mapping->host; 1692 struct inode *inode = mapping->host;
@@ -1705,10 +1695,7 @@ static int ext4_journalled_write_end(struct file *file,
1705 unsigned from, to; 1695 unsigned from, to;
1706 loff_t new_i_size; 1696 loff_t new_i_size;
1707 1697
1708 trace_mark(ext4_journalled_write_end, 1698 trace_ext4_journalled_write_end(inode, pos, len, copied);
1709 "dev %s ino %lu pos %llu len %u copied %u",
1710 inode->i_sb->s_id, inode->i_ino,
1711 (unsigned long long) pos, len, copied);
1712 from = pos & (PAGE_CACHE_SIZE - 1); 1699 from = pos & (PAGE_CACHE_SIZE - 1);
1713 to = from + len; 1700 to = from + len;
1714 1701
@@ -1747,7 +1734,7 @@ static int ext4_journalled_write_end(struct file *file,
1747 ret = ret2; 1734 ret = ret2;
1748 if (pos + len > inode->i_size) { 1735 if (pos + len > inode->i_size) {
1749 vmtruncate(inode, inode->i_size); 1736 vmtruncate(inode, inode->i_size);
1750 /* 1737 /*
1751 * If vmtruncate failed early the inode might still be 1738 * If vmtruncate failed early the inode might still be
1752 * on the orphan list; we need to make sure the inode 1739 * on the orphan list; we need to make sure the inode
1753 * is removed from the orphan list in that case. 1740 * is removed from the orphan list in that case.
@@ -1854,7 +1841,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
1854} 1841}
1855 1842
1856static void ext4_da_page_release_reservation(struct page *page, 1843static void ext4_da_page_release_reservation(struct page *page,
1857 unsigned long offset) 1844 unsigned long offset)
1858{ 1845{
1859 int to_release = 0; 1846 int to_release = 0;
1860 struct buffer_head *head, *bh; 1847 struct buffer_head *head, *bh;
@@ -2554,9 +2541,7 @@ static int ext4_da_writepage(struct page *page,
2554 struct buffer_head *page_bufs; 2541 struct buffer_head *page_bufs;
2555 struct inode *inode = page->mapping->host; 2542 struct inode *inode = page->mapping->host;
2556 2543
2557 trace_mark(ext4_da_writepage, 2544 trace_ext4_da_writepage(inode, page);
2558 "dev %s ino %lu page_index %lu",
2559 inode->i_sb->s_id, inode->i_ino, page->index);
2560 size = i_size_read(inode); 2545 size = i_size_read(inode);
2561 if (page->index == size >> PAGE_CACHE_SHIFT) 2546 if (page->index == size >> PAGE_CACHE_SHIFT)
2562 len = size & ~PAGE_CACHE_MASK; 2547 len = size & ~PAGE_CACHE_MASK;
@@ -2667,19 +2652,7 @@ static int ext4_da_writepages(struct address_space *mapping,
2667 int needed_blocks, ret = 0, nr_to_writebump = 0; 2652 int needed_blocks, ret = 0, nr_to_writebump = 0;
2668 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); 2653 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2669 2654
2670 trace_mark(ext4_da_writepages, 2655 trace_ext4_da_writepages(inode, wbc);
2671 "dev %s ino %lu nr_t_write %ld "
2672 "pages_skipped %ld range_start %llu "
2673 "range_end %llu nonblocking %d "
2674 "for_kupdate %d for_reclaim %d "
2675 "for_writepages %d range_cyclic %d",
2676 inode->i_sb->s_id, inode->i_ino,
2677 wbc->nr_to_write, wbc->pages_skipped,
2678 (unsigned long long) wbc->range_start,
2679 (unsigned long long) wbc->range_end,
2680 wbc->nonblocking, wbc->for_kupdate,
2681 wbc->for_reclaim, wbc->for_writepages,
2682 wbc->range_cyclic);
2683 2656
2684 /* 2657 /*
2685 * No pages to write? This is mainly a kludge to avoid starting 2658 * No pages to write? This is mainly a kludge to avoid starting
@@ -2693,13 +2666,13 @@ static int ext4_da_writepages(struct address_space *mapping,
2693 * If the filesystem has aborted, it is read-only, so return 2666 * If the filesystem has aborted, it is read-only, so return
2694 * right away instead of dumping stack traces later on that 2667 * right away instead of dumping stack traces later on that
2695 * will obscure the real source of the problem. We test 2668 * will obscure the real source of the problem. We test
2696 * EXT4_MOUNT_ABORT instead of sb->s_flag's MS_RDONLY because 2669 * EXT4_MF_FS_ABORTED instead of sb->s_flag's MS_RDONLY because
2697 * the latter could be true if the filesystem is mounted 2670 * the latter could be true if the filesystem is mounted
2698 * read-only, and in that case, ext4_da_writepages should 2671 * read-only, and in that case, ext4_da_writepages should
2699 * *never* be called, so if that ever happens, we would want 2672 * *never* be called, so if that ever happens, we would want
2700 * the stack trace. 2673 * the stack trace.
2701 */ 2674 */
2702 if (unlikely(sbi->s_mount_opt & EXT4_MOUNT_ABORT)) 2675 if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED))
2703 return -EROFS; 2676 return -EROFS;
2704 2677
2705 /* 2678 /*
@@ -2845,14 +2818,7 @@ out_writepages:
2845 if (!no_nrwrite_index_update) 2818 if (!no_nrwrite_index_update)
2846 wbc->no_nrwrite_index_update = 0; 2819 wbc->no_nrwrite_index_update = 0;
2847 wbc->nr_to_write -= nr_to_writebump; 2820 wbc->nr_to_write -= nr_to_writebump;
2848 trace_mark(ext4_da_writepage_result, 2821 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
2849 "dev %s ino %lu ret %d pages_written %d "
2850 "pages_skipped %ld congestion %d "
2851 "more_io %d no_nrwrite_index_update %d",
2852 inode->i_sb->s_id, inode->i_ino, ret,
2853 pages_written, wbc->pages_skipped,
2854 wbc->encountered_congestion, wbc->more_io,
2855 wbc->no_nrwrite_index_update);
2856 return ret; 2822 return ret;
2857} 2823}
2858 2824
@@ -2884,8 +2850,8 @@ static int ext4_nonda_switch(struct super_block *sb)
2884} 2850}
2885 2851
2886static int ext4_da_write_begin(struct file *file, struct address_space *mapping, 2852static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2887 loff_t pos, unsigned len, unsigned flags, 2853 loff_t pos, unsigned len, unsigned flags,
2888 struct page **pagep, void **fsdata) 2854 struct page **pagep, void **fsdata)
2889{ 2855{
2890 int ret, retries = 0; 2856 int ret, retries = 0;
2891 struct page *page; 2857 struct page *page;
@@ -2904,11 +2870,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2904 len, flags, pagep, fsdata); 2870 len, flags, pagep, fsdata);
2905 } 2871 }
2906 *fsdata = (void *)0; 2872 *fsdata = (void *)0;
2907 2873 trace_ext4_da_write_begin(inode, pos, len, flags);
2908 trace_mark(ext4_da_write_begin,
2909 "dev %s ino %lu pos %llu len %u flags %u",
2910 inode->i_sb->s_id, inode->i_ino,
2911 (unsigned long long) pos, len, flags);
2912retry: 2874retry:
2913 /* 2875 /*
2914 * With delayed allocation, we don't log the i_disksize update 2876 * With delayed allocation, we don't log the i_disksize update
@@ -2959,7 +2921,7 @@ out:
2959 * when write to the end of file but not require block allocation 2921 * when write to the end of file but not require block allocation
2960 */ 2922 */
2961static int ext4_da_should_update_i_disksize(struct page *page, 2923static int ext4_da_should_update_i_disksize(struct page *page,
2962 unsigned long offset) 2924 unsigned long offset)
2963{ 2925{
2964 struct buffer_head *bh; 2926 struct buffer_head *bh;
2965 struct inode *inode = page->mapping->host; 2927 struct inode *inode = page->mapping->host;
@@ -2978,9 +2940,9 @@ static int ext4_da_should_update_i_disksize(struct page *page,
2978} 2940}
2979 2941
2980static int ext4_da_write_end(struct file *file, 2942static int ext4_da_write_end(struct file *file,
2981 struct address_space *mapping, 2943 struct address_space *mapping,
2982 loff_t pos, unsigned len, unsigned copied, 2944 loff_t pos, unsigned len, unsigned copied,
2983 struct page *page, void *fsdata) 2945 struct page *page, void *fsdata)
2984{ 2946{
2985 struct inode *inode = mapping->host; 2947 struct inode *inode = mapping->host;
2986 int ret = 0, ret2; 2948 int ret = 0, ret2;
@@ -3001,10 +2963,7 @@ static int ext4_da_write_end(struct file *file,
3001 } 2963 }
3002 } 2964 }
3003 2965
3004 trace_mark(ext4_da_write_end, 2966 trace_ext4_da_write_end(inode, pos, len, copied);
3005 "dev %s ino %lu pos %llu len %u copied %u",
3006 inode->i_sb->s_id, inode->i_ino,
3007 (unsigned long long) pos, len, copied);
3008 start = pos & (PAGE_CACHE_SIZE - 1); 2967 start = pos & (PAGE_CACHE_SIZE - 1);
3009 end = start + copied - 1; 2968 end = start + copied - 1;
3010 2969
@@ -3081,7 +3040,7 @@ int ext4_alloc_da_blocks(struct inode *inode)
3081 * not strictly speaking necessary (and for users of 3040 * not strictly speaking necessary (and for users of
3082 * laptop_mode, not even desirable). However, to do otherwise 3041 * laptop_mode, not even desirable). However, to do otherwise
3083 * would require replicating code paths in: 3042 * would require replicating code paths in:
3084 * 3043 *
3085 * ext4_da_writepages() -> 3044 * ext4_da_writepages() ->
3086 * write_cache_pages() ---> (via passed in callback function) 3045 * write_cache_pages() ---> (via passed in callback function)
3087 * __mpage_da_writepage() --> 3046 * __mpage_da_writepage() -->
@@ -3101,7 +3060,7 @@ int ext4_alloc_da_blocks(struct inode *inode)
3101 * write out the pages, but rather only collect contiguous 3060 * write out the pages, but rather only collect contiguous
3102 * logical block extents, call the multi-block allocator, and 3061 * logical block extents, call the multi-block allocator, and
3103 * then update the buffer heads with the block allocations. 3062 * then update the buffer heads with the block allocations.
3104 * 3063 *
3105 * For now, though, we'll cheat by calling filemap_flush(), 3064 * For now, though, we'll cheat by calling filemap_flush(),
3106 * which will map the blocks, and start the I/O, but not 3065 * which will map the blocks, and start the I/O, but not
3107 * actually wait for the I/O to complete. 3066 * actually wait for the I/O to complete.
@@ -3237,7 +3196,7 @@ static int bput_one(handle_t *handle, struct buffer_head *bh)
3237 * 3196 *
3238 */ 3197 */
3239static int __ext4_normal_writepage(struct page *page, 3198static int __ext4_normal_writepage(struct page *page,
3240 struct writeback_control *wbc) 3199 struct writeback_control *wbc)
3241{ 3200{
3242 struct inode *inode = page->mapping->host; 3201 struct inode *inode = page->mapping->host;
3243 3202
@@ -3249,15 +3208,13 @@ static int __ext4_normal_writepage(struct page *page,
3249} 3208}
3250 3209
3251static int ext4_normal_writepage(struct page *page, 3210static int ext4_normal_writepage(struct page *page,
3252 struct writeback_control *wbc) 3211 struct writeback_control *wbc)
3253{ 3212{
3254 struct inode *inode = page->mapping->host; 3213 struct inode *inode = page->mapping->host;
3255 loff_t size = i_size_read(inode); 3214 loff_t size = i_size_read(inode);
3256 loff_t len; 3215 loff_t len;
3257 3216
3258 trace_mark(ext4_normal_writepage, 3217 trace_ext4_normal_writepage(inode, page);
3259 "dev %s ino %lu page_index %lu",
3260 inode->i_sb->s_id, inode->i_ino, page->index);
3261 J_ASSERT(PageLocked(page)); 3218 J_ASSERT(PageLocked(page));
3262 if (page->index == size >> PAGE_CACHE_SHIFT) 3219 if (page->index == size >> PAGE_CACHE_SHIFT)
3263 len = size & ~PAGE_CACHE_MASK; 3220 len = size & ~PAGE_CACHE_MASK;
@@ -3287,7 +3244,7 @@ static int ext4_normal_writepage(struct page *page,
3287} 3244}
3288 3245
3289static int __ext4_journalled_writepage(struct page *page, 3246static int __ext4_journalled_writepage(struct page *page,
3290 struct writeback_control *wbc) 3247 struct writeback_control *wbc)
3291{ 3248{
3292 struct address_space *mapping = page->mapping; 3249 struct address_space *mapping = page->mapping;
3293 struct inode *inode = mapping->host; 3250 struct inode *inode = mapping->host;
@@ -3337,15 +3294,13 @@ out:
3337} 3294}
3338 3295
3339static int ext4_journalled_writepage(struct page *page, 3296static int ext4_journalled_writepage(struct page *page,
3340 struct writeback_control *wbc) 3297 struct writeback_control *wbc)
3341{ 3298{
3342 struct inode *inode = page->mapping->host; 3299 struct inode *inode = page->mapping->host;
3343 loff_t size = i_size_read(inode); 3300 loff_t size = i_size_read(inode);
3344 loff_t len; 3301 loff_t len;
3345 3302
3346 trace_mark(ext4_journalled_writepage, 3303 trace_ext4_journalled_writepage(inode, page);
3347 "dev %s ino %lu page_index %lu",
3348 inode->i_sb->s_id, inode->i_ino, page->index);
3349 J_ASSERT(PageLocked(page)); 3304 J_ASSERT(PageLocked(page));
3350 if (page->index == size >> PAGE_CACHE_SHIFT) 3305 if (page->index == size >> PAGE_CACHE_SHIFT)
3351 len = size & ~PAGE_CACHE_MASK; 3306 len = size & ~PAGE_CACHE_MASK;
@@ -3442,8 +3397,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
3442 * VFS code falls back into buffered path in that case so we are safe. 3397 * VFS code falls back into buffered path in that case so we are safe.
3443 */ 3398 */
3444static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, 3399static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
3445 const struct iovec *iov, loff_t offset, 3400 const struct iovec *iov, loff_t offset,
3446 unsigned long nr_segs) 3401 unsigned long nr_segs)
3447{ 3402{
3448 struct file *file = iocb->ki_filp; 3403 struct file *file = iocb->ki_filp;
3449 struct inode *inode = file->f_mapping->host; 3404 struct inode *inode = file->f_mapping->host;
@@ -3763,7 +3718,8 @@ static inline int all_zeroes(__le32 *p, __le32 *q)
3763 * (no partially truncated stuff there). */ 3718 * (no partially truncated stuff there). */
3764 3719
3765static Indirect *ext4_find_shared(struct inode *inode, int depth, 3720static Indirect *ext4_find_shared(struct inode *inode, int depth,
3766 ext4_lblk_t offsets[4], Indirect chain[4], __le32 *top) 3721 ext4_lblk_t offsets[4], Indirect chain[4],
3722 __le32 *top)
3767{ 3723{
3768 Indirect *partial, *p; 3724 Indirect *partial, *p;
3769 int k, err; 3725 int k, err;
@@ -3819,8 +3775,10 @@ no_top:
3819 * than `count' because there can be holes in there. 3775 * than `count' because there can be holes in there.
3820 */ 3776 */
3821static void ext4_clear_blocks(handle_t *handle, struct inode *inode, 3777static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
3822 struct buffer_head *bh, ext4_fsblk_t block_to_free, 3778 struct buffer_head *bh,
3823 unsigned long count, __le32 *first, __le32 *last) 3779 ext4_fsblk_t block_to_free,
3780 unsigned long count, __le32 *first,
3781 __le32 *last)
3824{ 3782{
3825 __le32 *p; 3783 __le32 *p;
3826 if (try_to_extend_transaction(handle, inode)) { 3784 if (try_to_extend_transaction(handle, inode)) {
@@ -3837,10 +3795,11 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
3837 } 3795 }
3838 3796
3839 /* 3797 /*
3840 * Any buffers which are on the journal will be in memory. We find 3798 * Any buffers which are on the journal will be in memory. We
3841 * them on the hash table so jbd2_journal_revoke() will run jbd2_journal_forget() 3799 * find them on the hash table so jbd2_journal_revoke() will
3842 * on them. We've already detached each block from the file, so 3800 * run jbd2_journal_forget() on them. We've already detached
3843 * bforget() in jbd2_journal_forget() should be safe. 3801 * each block from the file, so bforget() in
3802 * jbd2_journal_forget() should be safe.
3844 * 3803 *
3845 * AKPM: turn on bforget in jbd2_journal_forget()!!! 3804 * AKPM: turn on bforget in jbd2_journal_forget()!!!
3846 */ 3805 */
@@ -4212,7 +4171,7 @@ void ext4_truncate(struct inode *inode)
4212 (__le32*)partial->bh->b_data+addr_per_block, 4171 (__le32*)partial->bh->b_data+addr_per_block,
4213 (chain+n-1) - partial); 4172 (chain+n-1) - partial);
4214 BUFFER_TRACE(partial->bh, "call brelse"); 4173 BUFFER_TRACE(partial->bh, "call brelse");
4215 brelse (partial->bh); 4174 brelse(partial->bh);
4216 partial--; 4175 partial--;
4217 } 4176 }
4218do_indirects: 4177do_indirects:
@@ -4453,8 +4412,9 @@ void ext4_get_inode_flags(struct ext4_inode_info *ei)
4453 if (flags & S_DIRSYNC) 4412 if (flags & S_DIRSYNC)
4454 ei->i_flags |= EXT4_DIRSYNC_FL; 4413 ei->i_flags |= EXT4_DIRSYNC_FL;
4455} 4414}
4415
4456static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, 4416static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
4457 struct ext4_inode_info *ei) 4417 struct ext4_inode_info *ei)
4458{ 4418{
4459 blkcnt_t i_blocks ; 4419 blkcnt_t i_blocks ;
4460 struct inode *inode = &(ei->vfs_inode); 4420 struct inode *inode = &(ei->vfs_inode);
@@ -4493,10 +4453,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4493 return inode; 4453 return inode;
4494 4454
4495 ei = EXT4_I(inode); 4455 ei = EXT4_I(inode);
4496#ifdef CONFIG_EXT4_FS_POSIX_ACL
4497 ei->i_acl = EXT4_ACL_NOT_CACHED;
4498 ei->i_default_acl = EXT4_ACL_NOT_CACHED;
4499#endif
4500 4456
4501 ret = __ext4_get_inode_loc(inode, &iloc, 0); 4457 ret = __ext4_get_inode_loc(inode, &iloc, 0);
4502 if (ret < 0) 4458 if (ret < 0)
@@ -4569,7 +4525,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4569 EXT4_GOOD_OLD_INODE_SIZE + 4525 EXT4_GOOD_OLD_INODE_SIZE +
4570 ei->i_extra_isize; 4526 ei->i_extra_isize;
4571 if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) 4527 if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC))
4572 ei->i_state |= EXT4_STATE_XATTR; 4528 ei->i_state |= EXT4_STATE_XATTR;
4573 } 4529 }
4574 } else 4530 } else
4575 ei->i_extra_isize = 0; 4531 ei->i_extra_isize = 0;
@@ -4588,7 +4544,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4588 4544
4589 ret = 0; 4545 ret = 0;
4590 if (ei->i_file_acl && 4546 if (ei->i_file_acl &&
4591 ((ei->i_file_acl < 4547 ((ei->i_file_acl <
4592 (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) + 4548 (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
4593 EXT4_SB(sb)->s_gdb_count)) || 4549 EXT4_SB(sb)->s_gdb_count)) ||
4594 (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) { 4550 (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {
@@ -4603,15 +4559,15 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4603 !ext4_inode_is_fast_symlink(inode))) 4559 !ext4_inode_is_fast_symlink(inode)))
4604 /* Validate extent which is part of inode */ 4560 /* Validate extent which is part of inode */
4605 ret = ext4_ext_check_inode(inode); 4561 ret = ext4_ext_check_inode(inode);
4606 } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 4562 } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
4607 (S_ISLNK(inode->i_mode) && 4563 (S_ISLNK(inode->i_mode) &&
4608 !ext4_inode_is_fast_symlink(inode))) { 4564 !ext4_inode_is_fast_symlink(inode))) {
4609 /* Validate block references which are part of inode */ 4565 /* Validate block references which are part of inode */
4610 ret = ext4_check_inode_blockref(inode); 4566 ret = ext4_check_inode_blockref(inode);
4611 } 4567 }
4612 if (ret) { 4568 if (ret) {
4613 brelse(bh); 4569 brelse(bh);
4614 goto bad_inode; 4570 goto bad_inode;
4615 } 4571 }
4616 4572
4617 if (S_ISREG(inode->i_mode)) { 4573 if (S_ISREG(inode->i_mode)) {
@@ -4642,7 +4598,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4642 } else { 4598 } else {
4643 brelse(bh); 4599 brelse(bh);
4644 ret = -EIO; 4600 ret = -EIO;
4645 ext4_error(inode->i_sb, __func__, 4601 ext4_error(inode->i_sb, __func__,
4646 "bogus i_mode (%o) for inode=%lu", 4602 "bogus i_mode (%o) for inode=%lu",
4647 inode->i_mode, inode->i_ino); 4603 inode->i_mode, inode->i_ino);
4648 goto bad_inode; 4604 goto bad_inode;
@@ -4795,8 +4751,9 @@ static int ext4_do_update_inode(handle_t *handle,
4795 cpu_to_le32(new_encode_dev(inode->i_rdev)); 4751 cpu_to_le32(new_encode_dev(inode->i_rdev));
4796 raw_inode->i_block[2] = 0; 4752 raw_inode->i_block[2] = 0;
4797 } 4753 }
4798 } else for (block = 0; block < EXT4_N_BLOCKS; block++) 4754 } else
4799 raw_inode->i_block[block] = ei->i_data[block]; 4755 for (block = 0; block < EXT4_N_BLOCKS; block++)
4756 raw_inode->i_block[block] = ei->i_data[block];
4800 4757
4801 raw_inode->i_disk_version = cpu_to_le32(inode->i_version); 4758 raw_inode->i_disk_version = cpu_to_le32(inode->i_version);
4802 if (ei->i_extra_isize) { 4759 if (ei->i_extra_isize) {
@@ -5150,7 +5107,7 @@ int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks)
5150 * Give this, we know that the caller already has write access to iloc->bh. 5107 * Give this, we know that the caller already has write access to iloc->bh.
5151 */ 5108 */
5152int ext4_mark_iloc_dirty(handle_t *handle, 5109int ext4_mark_iloc_dirty(handle_t *handle,
5153 struct inode *inode, struct ext4_iloc *iloc) 5110 struct inode *inode, struct ext4_iloc *iloc)
5154{ 5111{
5155 int err = 0; 5112 int err = 0;
5156 5113
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 91e75f7a9e73..bb415408fdb6 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -14,6 +14,7 @@
14#include <linux/compat.h> 14#include <linux/compat.h>
15#include <linux/smp_lock.h> 15#include <linux/smp_lock.h>
16#include <linux/mount.h> 16#include <linux/mount.h>
17#include <linux/file.h>
17#include <asm/uaccess.h> 18#include <asm/uaccess.h>
18#include "ext4_jbd2.h" 19#include "ext4_jbd2.h"
19#include "ext4.h" 20#include "ext4.h"
@@ -213,6 +214,41 @@ setversion_out:
213 214
214 return err; 215 return err;
215 } 216 }
217
218 case EXT4_IOC_MOVE_EXT: {
219 struct move_extent me;
220 struct file *donor_filp;
221 int err;
222
223 if (copy_from_user(&me,
224 (struct move_extent __user *)arg, sizeof(me)))
225 return -EFAULT;
226
227 donor_filp = fget(me.donor_fd);
228 if (!donor_filp)
229 return -EBADF;
230
231 if (!capable(CAP_DAC_OVERRIDE)) {
232 if ((current->real_cred->fsuid != inode->i_uid) ||
233 !(inode->i_mode & S_IRUSR) ||
234 !(donor_filp->f_dentry->d_inode->i_mode &
235 S_IRUSR)) {
236 fput(donor_filp);
237 return -EACCES;
238 }
239 }
240
241 err = ext4_move_extents(filp, donor_filp, me.orig_start,
242 me.donor_start, me.len, &me.moved_len);
243 fput(donor_filp);
244
245 if (!err)
246 if (copy_to_user((struct move_extent *)arg,
247 &me, sizeof(me)))
248 return -EFAULT;
249 return err;
250 }
251
216 case EXT4_IOC_GROUP_ADD: { 252 case EXT4_IOC_GROUP_ADD: {
217 struct ext4_new_group_data input; 253 struct ext4_new_group_data input;
218 struct super_block *sb = inode->i_sb; 254 struct super_block *sb = inode->i_sb;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index ed8482e22c0e..519a0a686d94 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -22,6 +22,8 @@
22 */ 22 */
23 23
24#include "mballoc.h" 24#include "mballoc.h"
25#include <trace/events/ext4.h>
26
25/* 27/*
26 * MUSTDO: 28 * MUSTDO:
27 * - test ext4_ext_search_left() and ext4_ext_search_right() 29 * - test ext4_ext_search_left() and ext4_ext_search_right()
@@ -340,8 +342,6 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
340 ext4_group_t group); 342 ext4_group_t group);
341static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); 343static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
342 344
343
344
345static inline void *mb_correct_addr_and_bit(int *bit, void *addr) 345static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
346{ 346{
347#if BITS_PER_LONG == 64 347#if BITS_PER_LONG == 64
@@ -2859,9 +2859,8 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2859 discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb) 2859 discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
2860 + entry->start_blk 2860 + entry->start_blk
2861 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 2861 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
2862 trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u", 2862 trace_ext4_discard_blocks(sb, (unsigned long long)discard_block,
2863 sb->s_id, (unsigned long long) discard_block, 2863 entry->count);
2864 entry->count);
2865 sb_issue_discard(sb, discard_block, entry->count); 2864 sb_issue_discard(sb, discard_block, entry->count);
2866 2865
2867 kmem_cache_free(ext4_free_ext_cachep, entry); 2866 kmem_cache_free(ext4_free_ext_cachep, entry);
@@ -3629,10 +3628,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3629 3628
3630 mb_debug("new inode pa %p: %llu/%u for %u\n", pa, 3629 mb_debug("new inode pa %p: %llu/%u for %u\n", pa,
3631 pa->pa_pstart, pa->pa_len, pa->pa_lstart); 3630 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3632 trace_mark(ext4_mb_new_inode_pa, 3631 trace_ext4_mb_new_inode_pa(ac, pa);
3633 "dev %s ino %lu pstart %llu len %u lstart %u",
3634 sb->s_id, ac->ac_inode->i_ino,
3635 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3636 3632
3637 ext4_mb_use_inode_pa(ac, pa); 3633 ext4_mb_use_inode_pa(ac, pa);
3638 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); 3634 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
@@ -3691,9 +3687,8 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3691 pa->pa_type = MB_GROUP_PA; 3687 pa->pa_type = MB_GROUP_PA;
3692 3688
3693 mb_debug("new group pa %p: %llu/%u for %u\n", pa, 3689 mb_debug("new group pa %p: %llu/%u for %u\n", pa,
3694 pa->pa_pstart, pa->pa_len, pa->pa_lstart); 3690 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3695 trace_mark(ext4_mb_new_group_pa, "dev %s pstart %llu len %u lstart %u", 3691 trace_ext4_mb_new_group_pa(ac, pa);
3696 sb->s_id, pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3697 3692
3698 ext4_mb_use_group_pa(ac, pa); 3693 ext4_mb_use_group_pa(ac, pa);
3699 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); 3694 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
@@ -3783,10 +3778,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3783 ext4_mb_store_history(ac); 3778 ext4_mb_store_history(ac);
3784 } 3779 }
3785 3780
3786 trace_mark(ext4_mb_release_inode_pa, 3781 trace_ext4_mb_release_inode_pa(ac, pa, grp_blk_start + bit,
3787 "dev %s ino %lu block %llu count %u", 3782 next - bit);
3788 sb->s_id, pa->pa_inode->i_ino, grp_blk_start + bit,
3789 next - bit);
3790 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); 3783 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
3791 bit = next + 1; 3784 bit = next + 1;
3792 } 3785 }
@@ -3820,8 +3813,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3820 if (ac) 3813 if (ac)
3821 ac->ac_op = EXT4_MB_HISTORY_DISCARD; 3814 ac->ac_op = EXT4_MB_HISTORY_DISCARD;
3822 3815
3823 trace_mark(ext4_mb_release_group_pa, "dev %s pstart %llu len %d", 3816 trace_ext4_mb_release_group_pa(ac, pa);
3824 sb->s_id, pa->pa_pstart, pa->pa_len);
3825 BUG_ON(pa->pa_deleted == 0); 3817 BUG_ON(pa->pa_deleted == 0);
3826 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); 3818 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3827 BUG_ON(group != e4b->bd_group && pa->pa_len != 0); 3819 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
@@ -3889,6 +3881,8 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
3889 3881
3890 INIT_LIST_HEAD(&list); 3882 INIT_LIST_HEAD(&list);
3891 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 3883 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3884 if (ac)
3885 ac->ac_sb = sb;
3892repeat: 3886repeat:
3893 ext4_lock_group(sb, group); 3887 ext4_lock_group(sb, group);
3894 list_for_each_entry_safe(pa, tmp, 3888 list_for_each_entry_safe(pa, tmp,
@@ -3987,12 +3981,15 @@ void ext4_discard_preallocations(struct inode *inode)
3987 } 3981 }
3988 3982
3989 mb_debug("discard preallocation for inode %lu\n", inode->i_ino); 3983 mb_debug("discard preallocation for inode %lu\n", inode->i_ino);
3990 trace_mark(ext4_discard_preallocations, "dev %s ino %lu", sb->s_id, 3984 trace_ext4_discard_preallocations(inode);
3991 inode->i_ino);
3992 3985
3993 INIT_LIST_HEAD(&list); 3986 INIT_LIST_HEAD(&list);
3994 3987
3995 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 3988 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3989 if (ac) {
3990 ac->ac_sb = sb;
3991 ac->ac_inode = inode;
3992 }
3996repeat: 3993repeat:
3997 /* first, collect all pa's in the inode */ 3994 /* first, collect all pa's in the inode */
3998 spin_lock(&ei->i_prealloc_lock); 3995 spin_lock(&ei->i_prealloc_lock);
@@ -4276,6 +4273,8 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
4276 4273
4277 INIT_LIST_HEAD(&discard_list); 4274 INIT_LIST_HEAD(&discard_list);
4278 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 4275 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4276 if (ac)
4277 ac->ac_sb = sb;
4279 4278
4280 spin_lock(&lg->lg_prealloc_lock); 4279 spin_lock(&lg->lg_prealloc_lock);
4281 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order], 4280 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
@@ -4445,8 +4444,7 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
4445 int ret; 4444 int ret;
4446 int freed = 0; 4445 int freed = 0;
4447 4446
4448 trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d", 4447 trace_ext4_mb_discard_preallocations(sb, needed);
4449 sb->s_id, needed);
4450 for (i = 0; i < ngroups && needed > 0; i++) { 4448 for (i = 0; i < ngroups && needed > 0; i++) {
4451 ret = ext4_mb_discard_group_preallocations(sb, i, needed); 4449 ret = ext4_mb_discard_group_preallocations(sb, i, needed);
4452 freed += ret; 4450 freed += ret;
@@ -4475,17 +4473,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4475 sb = ar->inode->i_sb; 4473 sb = ar->inode->i_sb;
4476 sbi = EXT4_SB(sb); 4474 sbi = EXT4_SB(sb);
4477 4475
4478 trace_mark(ext4_request_blocks, "dev %s flags %u len %u ino %lu " 4476 trace_ext4_request_blocks(ar);
4479 "lblk %llu goal %llu lleft %llu lright %llu "
4480 "pleft %llu pright %llu ",
4481 sb->s_id, ar->flags, ar->len,
4482 ar->inode ? ar->inode->i_ino : 0,
4483 (unsigned long long) ar->logical,
4484 (unsigned long long) ar->goal,
4485 (unsigned long long) ar->lleft,
4486 (unsigned long long) ar->lright,
4487 (unsigned long long) ar->pleft,
4488 (unsigned long long) ar->pright);
4489 4477
4490 /* 4478 /*
4491 * For delayed allocation, we could skip the ENOSPC and 4479 * For delayed allocation, we could skip the ENOSPC and
@@ -4521,7 +4509,10 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4521 } 4509 }
4522 4510
4523 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 4511 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4524 if (!ac) { 4512 if (ac) {
4513 ac->ac_sb = sb;
4514 ac->ac_inode = ar->inode;
4515 } else {
4525 ar->len = 0; 4516 ar->len = 0;
4526 *errp = -ENOMEM; 4517 *errp = -ENOMEM;
4527 goto out1; 4518 goto out1;
@@ -4594,18 +4585,7 @@ out3:
4594 reserv_blks); 4585 reserv_blks);
4595 } 4586 }
4596 4587
4597 trace_mark(ext4_allocate_blocks, 4588 trace_ext4_allocate_blocks(ar, (unsigned long long)block);
4598 "dev %s block %llu flags %u len %u ino %lu "
4599 "logical %llu goal %llu lleft %llu lright %llu "
4600 "pleft %llu pright %llu ",
4601 sb->s_id, (unsigned long long) block,
4602 ar->flags, ar->len, ar->inode ? ar->inode->i_ino : 0,
4603 (unsigned long long) ar->logical,
4604 (unsigned long long) ar->goal,
4605 (unsigned long long) ar->lleft,
4606 (unsigned long long) ar->lright,
4607 (unsigned long long) ar->pleft,
4608 (unsigned long long) ar->pright);
4609 4589
4610 return block; 4590 return block;
4611} 4591}
@@ -4709,7 +4689,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4709 * Main entry point into mballoc to free blocks 4689 * Main entry point into mballoc to free blocks
4710 */ 4690 */
4711void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, 4691void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4712 unsigned long block, unsigned long count, 4692 ext4_fsblk_t block, unsigned long count,
4713 int metadata, unsigned long *freed) 4693 int metadata, unsigned long *freed)
4714{ 4694{
4715 struct buffer_head *bitmap_bh = NULL; 4695 struct buffer_head *bitmap_bh = NULL;
@@ -4735,15 +4715,12 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4735 block + count > ext4_blocks_count(es)) { 4715 block + count > ext4_blocks_count(es)) {
4736 ext4_error(sb, __func__, 4716 ext4_error(sb, __func__,
4737 "Freeing blocks not in datazone - " 4717 "Freeing blocks not in datazone - "
4738 "block = %lu, count = %lu", block, count); 4718 "block = %llu, count = %lu", block, count);
4739 goto error_return; 4719 goto error_return;
4740 } 4720 }
4741 4721
4742 ext4_debug("freeing block %lu\n", block); 4722 ext4_debug("freeing block %llu\n", block);
4743 trace_mark(ext4_free_blocks, 4723 trace_ext4_free_blocks(inode, block, count, metadata);
4744 "dev %s block %llu count %lu metadata %d ino %lu",
4745 sb->s_id, (unsigned long long) block, count, metadata,
4746 inode ? inode->i_ino : 0);
4747 4724
4748 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 4725 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4749 if (ac) { 4726 if (ac) {
@@ -4784,7 +4761,7 @@ do_more:
4784 4761
4785 ext4_error(sb, __func__, 4762 ext4_error(sb, __func__,
4786 "Freeing blocks in system zone - " 4763 "Freeing blocks in system zone - "
4787 "Block = %lu, count = %lu", block, count); 4764 "Block = %llu, count = %lu", block, count);
4788 /* err = 0. ext4_std_error should be a no op */ 4765 /* err = 0. ext4_std_error should be a no op */
4789 goto error_return; 4766 goto error_return;
4790 } 4767 }
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 75e34f69215b..c96bb19f58f9 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -19,7 +19,6 @@
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/version.h> 20#include <linux/version.h>
21#include <linux/blkdev.h> 21#include <linux/blkdev.h>
22#include <linux/marker.h>
23#include <linux/mutex.h> 22#include <linux/mutex.h>
24#include "ext4_jbd2.h" 23#include "ext4_jbd2.h"
25#include "ext4.h" 24#include "ext4.h"
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index fe64d9f79852..313a50b39741 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -458,6 +458,7 @@ int ext4_ext_migrate(struct inode *inode)
458 struct inode *tmp_inode = NULL; 458 struct inode *tmp_inode = NULL;
459 struct list_blocks_struct lb; 459 struct list_blocks_struct lb;
460 unsigned long max_entries; 460 unsigned long max_entries;
461 __u32 goal;
461 462
462 /* 463 /*
463 * If the filesystem does not support extents, or the inode 464 * If the filesystem does not support extents, or the inode
@@ -483,9 +484,10 @@ int ext4_ext_migrate(struct inode *inode)
483 retval = PTR_ERR(handle); 484 retval = PTR_ERR(handle);
484 return retval; 485 return retval;
485 } 486 }
486 tmp_inode = ext4_new_inode(handle, 487 goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
487 inode->i_sb->s_root->d_inode, 488 EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
488 S_IFREG); 489 tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
490 S_IFREG, 0, goal);
489 if (IS_ERR(tmp_inode)) { 491 if (IS_ERR(tmp_inode)) {
490 retval = -ENOMEM; 492 retval = -ENOMEM;
491 ext4_journal_stop(handle); 493 ext4_journal_stop(handle);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
new file mode 100644
index 000000000000..bbf2dd9404dc
--- /dev/null
+++ b/fs/ext4/move_extent.c
@@ -0,0 +1,1320 @@
1/*
2 * Copyright (c) 2008,2009 NEC Software Tohoku, Ltd.
3 * Written by Takashi Sato <t-sato@yk.jp.nec.com>
4 * Akira Fujita <a-fujita@rs.jp.nec.com>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of version 2.1 of the GNU Lesser General Public License
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 */
15
16#include <linux/fs.h>
17#include <linux/quotaops.h>
18#include "ext4_jbd2.h"
19#include "ext4_extents.h"
20#include "ext4.h"
21
22#define get_ext_path(path, inode, block, ret) \
23 do { \
24 path = ext4_ext_find_extent(inode, block, path); \
25 if (IS_ERR(path)) { \
26 ret = PTR_ERR(path); \
27 path = NULL; \
28 } \
29 } while (0)
30
31/**
32 * copy_extent_status - Copy the extent's initialization status
33 *
34 * @src: an extent for getting initialize status
35 * @dest: an extent to be set the status
36 */
37static void
38copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest)
39{
40 if (ext4_ext_is_uninitialized(src))
41 ext4_ext_mark_uninitialized(dest);
42 else
43 dest->ee_len = cpu_to_le16(ext4_ext_get_actual_len(dest));
44}
45
46/**
47 * mext_next_extent - Search for the next extent and set it to "extent"
48 *
49 * @inode: inode which is searched
50 * @path: this will obtain data for the next extent
51 * @extent: pointer to the next extent we have just gotten
52 *
53 * Search the next extent in the array of ext4_ext_path structure (@path)
54 * and set it to ext4_extent structure (@extent). In addition, the member of
55 * @path (->p_ext) also points the next extent. Return 0 on success, 1 if
56 * ext4_ext_path structure refers to the last extent, or a negative error
57 * value on failure.
58 */
59static int
60mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
61 struct ext4_extent **extent)
62{
63 int ppos, leaf_ppos = path->p_depth;
64
65 ppos = leaf_ppos;
66 if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
67 /* leaf block */
68 *extent = ++path[ppos].p_ext;
69 return 0;
70 }
71
72 while (--ppos >= 0) {
73 if (EXT_LAST_INDEX(path[ppos].p_hdr) >
74 path[ppos].p_idx) {
75 int cur_ppos = ppos;
76
77 /* index block */
78 path[ppos].p_idx++;
79 path[ppos].p_block = idx_pblock(path[ppos].p_idx);
80 if (path[ppos+1].p_bh)
81 brelse(path[ppos+1].p_bh);
82 path[ppos+1].p_bh =
83 sb_bread(inode->i_sb, path[ppos].p_block);
84 if (!path[ppos+1].p_bh)
85 return -EIO;
86 path[ppos+1].p_hdr =
87 ext_block_hdr(path[ppos+1].p_bh);
88
89 /* Halfway index block */
90 while (++cur_ppos < leaf_ppos) {
91 path[cur_ppos].p_idx =
92 EXT_FIRST_INDEX(path[cur_ppos].p_hdr);
93 path[cur_ppos].p_block =
94 idx_pblock(path[cur_ppos].p_idx);
95 if (path[cur_ppos+1].p_bh)
96 brelse(path[cur_ppos+1].p_bh);
97 path[cur_ppos+1].p_bh = sb_bread(inode->i_sb,
98 path[cur_ppos].p_block);
99 if (!path[cur_ppos+1].p_bh)
100 return -EIO;
101 path[cur_ppos+1].p_hdr =
102 ext_block_hdr(path[cur_ppos+1].p_bh);
103 }
104
105 /* leaf block */
106 path[leaf_ppos].p_ext = *extent =
107 EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
108 return 0;
109 }
110 }
111 /* We found the last extent */
112 return 1;
113}
114
115/**
116 * mext_double_down_read - Acquire two inodes' read semaphore
117 *
118 * @orig_inode: original inode structure
119 * @donor_inode: donor inode structure
120 * Acquire read semaphore of the two inodes (orig and donor) by i_ino order.
121 */
122static void
123mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode)
124{
125 struct inode *first = orig_inode, *second = donor_inode;
126
127 BUG_ON(orig_inode == NULL || donor_inode == NULL);
128
129 /*
130 * Use the inode number to provide the stable locking order instead
131 * of its address, because the C language doesn't guarantee you can
132 * compare pointers that don't come from the same array.
133 */
134 if (donor_inode->i_ino < orig_inode->i_ino) {
135 first = donor_inode;
136 second = orig_inode;
137 }
138
139 down_read(&EXT4_I(first)->i_data_sem);
140 down_read(&EXT4_I(second)->i_data_sem);
141}
142
143/**
144 * mext_double_down_write - Acquire two inodes' write semaphore
145 *
146 * @orig_inode: original inode structure
147 * @donor_inode: donor inode structure
148 * Acquire write semaphore of the two inodes (orig and donor) by i_ino order.
149 */
150static void
151mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode)
152{
153 struct inode *first = orig_inode, *second = donor_inode;
154
155 BUG_ON(orig_inode == NULL || donor_inode == NULL);
156
157 /*
158 * Use the inode number to provide the stable locking order instead
159 * of its address, because the C language doesn't guarantee you can
160 * compare pointers that don't come from the same array.
161 */
162 if (donor_inode->i_ino < orig_inode->i_ino) {
163 first = donor_inode;
164 second = orig_inode;
165 }
166
167 down_write(&EXT4_I(first)->i_data_sem);
168 down_write(&EXT4_I(second)->i_data_sem);
169}
170
171/**
172 * mext_double_up_read - Release two inodes' read semaphore
173 *
174 * @orig_inode: original inode structure to be released its lock first
175 * @donor_inode: donor inode structure to be released its lock second
176 * Release read semaphore of two inodes (orig and donor).
177 */
178static void
179mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode)
180{
181 BUG_ON(orig_inode == NULL || donor_inode == NULL);
182
183 up_read(&EXT4_I(orig_inode)->i_data_sem);
184 up_read(&EXT4_I(donor_inode)->i_data_sem);
185}
186
187/**
188 * mext_double_up_write - Release two inodes' write semaphore
189 *
190 * @orig_inode: original inode structure to be released its lock first
191 * @donor_inode: donor inode structure to be released its lock second
192 * Release write semaphore of two inodes (orig and donor).
193 */
194static void
195mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode)
196{
197 BUG_ON(orig_inode == NULL || donor_inode == NULL);
198
199 up_write(&EXT4_I(orig_inode)->i_data_sem);
200 up_write(&EXT4_I(donor_inode)->i_data_sem);
201}
202
203/**
204 * mext_insert_across_blocks - Insert extents across leaf block
205 *
206 * @handle: journal handle
207 * @orig_inode: original inode
208 * @o_start: first original extent to be changed
209 * @o_end: last original extent to be changed
210 * @start_ext: first new extent to be inserted
211 * @new_ext: middle of new extent to be inserted
212 * @end_ext: last new extent to be inserted
213 *
214 * Allocate a new leaf block and insert extents into it. Return 0 on success,
215 * or a negative error value on failure.
216 */
217static int
218mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
219 struct ext4_extent *o_start, struct ext4_extent *o_end,
220 struct ext4_extent *start_ext, struct ext4_extent *new_ext,
221 struct ext4_extent *end_ext)
222{
223 struct ext4_ext_path *orig_path = NULL;
224 ext4_lblk_t eblock = 0;
225 int new_flag = 0;
226 int end_flag = 0;
227 int err = 0;
228
229 if (start_ext->ee_len && new_ext->ee_len && end_ext->ee_len) {
230 if (o_start == o_end) {
231
232 /* start_ext new_ext end_ext
233 * donor |---------|-----------|--------|
234 * orig |------------------------------|
235 */
236 end_flag = 1;
237 } else {
238
239 /* start_ext new_ext end_ext
240 * donor |---------|----------|---------|
241 * orig |---------------|--------------|
242 */
243 o_end->ee_block = end_ext->ee_block;
244 o_end->ee_len = end_ext->ee_len;
245 ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
246 }
247
248 o_start->ee_len = start_ext->ee_len;
249 new_flag = 1;
250
251 } else if (start_ext->ee_len && new_ext->ee_len &&
252 !end_ext->ee_len && o_start == o_end) {
253
254 /* start_ext new_ext
255 * donor |--------------|---------------|
256 * orig |------------------------------|
257 */
258 o_start->ee_len = start_ext->ee_len;
259 new_flag = 1;
260
261 } else if (!start_ext->ee_len && new_ext->ee_len &&
262 end_ext->ee_len && o_start == o_end) {
263
264 /* new_ext end_ext
265 * donor |--------------|---------------|
266 * orig |------------------------------|
267 */
268 o_end->ee_block = end_ext->ee_block;
269 o_end->ee_len = end_ext->ee_len;
270 ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
271
272 /*
273 * Set 0 to the extent block if new_ext was
274 * the first block.
275 */
276 if (new_ext->ee_block)
277 eblock = le32_to_cpu(new_ext->ee_block);
278
279 new_flag = 1;
280 } else {
281 ext4_debug("ext4 move extent: Unexpected insert case\n");
282 return -EIO;
283 }
284
285 if (new_flag) {
286 get_ext_path(orig_path, orig_inode, eblock, err);
287 if (orig_path == NULL)
288 goto out;
289
290 if (ext4_ext_insert_extent(handle, orig_inode,
291 orig_path, new_ext))
292 goto out;
293 }
294
295 if (end_flag) {
296 get_ext_path(orig_path, orig_inode,
297 le32_to_cpu(end_ext->ee_block) - 1, err);
298 if (orig_path == NULL)
299 goto out;
300
301 if (ext4_ext_insert_extent(handle, orig_inode,
302 orig_path, end_ext))
303 goto out;
304 }
305out:
306 if (orig_path) {
307 ext4_ext_drop_refs(orig_path);
308 kfree(orig_path);
309 }
310
311 return err;
312
313}
314
315/**
316 * mext_insert_inside_block - Insert new extent to the extent block
317 *
318 * @o_start: first original extent to be moved
319 * @o_end: last original extent to be moved
320 * @start_ext: first new extent to be inserted
321 * @new_ext: middle of new extent to be inserted
322 * @end_ext: last new extent to be inserted
323 * @eh: extent header of target leaf block
324 * @range_to_move: used to decide how to insert extent
325 *
326 * Insert extents into the leaf block. The extent (@o_start) is overwritten
327 * by inserted extents.
328 */
329static void
330mext_insert_inside_block(struct ext4_extent *o_start,
331 struct ext4_extent *o_end,
332 struct ext4_extent *start_ext,
333 struct ext4_extent *new_ext,
334 struct ext4_extent *end_ext,
335 struct ext4_extent_header *eh,
336 int range_to_move)
337{
338 int i = 0;
339 unsigned long len;
340
341 /* Move the existing extents */
342 if (range_to_move && o_end < EXT_LAST_EXTENT(eh)) {
343 len = (unsigned long)(EXT_LAST_EXTENT(eh) + 1) -
344 (unsigned long)(o_end + 1);
345 memmove(o_end + 1 + range_to_move, o_end + 1, len);
346 }
347
348 /* Insert start entry */
349 if (start_ext->ee_len)
350 o_start[i++].ee_len = start_ext->ee_len;
351
352 /* Insert new entry */
353 if (new_ext->ee_len) {
354 o_start[i] = *new_ext;
355 ext4_ext_store_pblock(&o_start[i++], ext_pblock(new_ext));
356 }
357
358 /* Insert end entry */
359 if (end_ext->ee_len)
360 o_start[i] = *end_ext;
361
362 /* Increment the total entries counter on the extent block */
363 le16_add_cpu(&eh->eh_entries, range_to_move);
364}
365
366/**
367 * mext_insert_extents - Insert new extent
368 *
369 * @handle: journal handle
370 * @orig_inode: original inode
371 * @orig_path: path indicates first extent to be changed
372 * @o_start: first original extent to be changed
373 * @o_end: last original extent to be changed
374 * @start_ext: first new extent to be inserted
375 * @new_ext: middle of new extent to be inserted
376 * @end_ext: last new extent to be inserted
377 *
378 * Call the function to insert extents. If we cannot add more extents into
379 * the leaf block, we call mext_insert_across_blocks() to create a
380 * new leaf block. Otherwise call mext_insert_inside_block(). Return 0
381 * on success, or a negative error value on failure.
382 */
383static int
384mext_insert_extents(handle_t *handle, struct inode *orig_inode,
385 struct ext4_ext_path *orig_path,
386 struct ext4_extent *o_start,
387 struct ext4_extent *o_end,
388 struct ext4_extent *start_ext,
389 struct ext4_extent *new_ext,
390 struct ext4_extent *end_ext)
391{
392 struct ext4_extent_header *eh;
393 unsigned long need_slots, slots_range;
394 int range_to_move, depth, ret;
395
396 /*
397 * The extents need to be inserted
398 * start_extent + new_extent + end_extent.
399 */
400 need_slots = (start_ext->ee_len ? 1 : 0) + (end_ext->ee_len ? 1 : 0) +
401 (new_ext->ee_len ? 1 : 0);
402
403 /* The number of slots between start and end */
404 slots_range = ((unsigned long)(o_end + 1) - (unsigned long)o_start + 1)
405 / sizeof(struct ext4_extent);
406
407 /* Range to move the end of extent */
408 range_to_move = need_slots - slots_range;
409 depth = orig_path->p_depth;
410 orig_path += depth;
411 eh = orig_path->p_hdr;
412
413 if (depth) {
414 /* Register to journal */
415 ret = ext4_journal_get_write_access(handle, orig_path->p_bh);
416 if (ret)
417 return ret;
418 }
419
420 /* Expansion */
421 if (range_to_move > 0 &&
422 (range_to_move > le16_to_cpu(eh->eh_max)
423 - le16_to_cpu(eh->eh_entries))) {
424
425 ret = mext_insert_across_blocks(handle, orig_inode, o_start,
426 o_end, start_ext, new_ext, end_ext);
427 if (ret < 0)
428 return ret;
429 } else
430 mext_insert_inside_block(o_start, o_end, start_ext, new_ext,
431 end_ext, eh, range_to_move);
432
433 if (depth) {
434 ret = ext4_handle_dirty_metadata(handle, orig_inode,
435 orig_path->p_bh);
436 if (ret)
437 return ret;
438 } else {
439 ret = ext4_mark_inode_dirty(handle, orig_inode);
440 if (ret < 0)
441 return ret;
442 }
443
444 return 0;
445}
446
447/**
448 * mext_leaf_block - Move one leaf extent block into the inode.
449 *
450 * @handle: journal handle
451 * @orig_inode: original inode
452 * @orig_path: path indicates first extent to be changed
453 * @dext: donor extent
454 * @from: start offset on the target file
455 *
456 * In order to insert extents into the leaf block, we must divide the extent
457 * in the leaf block into three extents. The one is located to be inserted
458 * extents, and the others are located around it.
459 *
460 * Therefore, this function creates structures to save extents of the leaf
461 * block, and inserts extents by calling mext_insert_extents() with
462 * created extents. Return 0 on success, or a negative error value on failure.
463 */
464static int
465mext_leaf_block(handle_t *handle, struct inode *orig_inode,
466 struct ext4_ext_path *orig_path, struct ext4_extent *dext,
467 ext4_lblk_t *from)
468{
469 struct ext4_extent *oext, *o_start, *o_end, *prev_ext;
470 struct ext4_extent new_ext, start_ext, end_ext;
471 ext4_lblk_t new_ext_end;
472 ext4_fsblk_t new_phys_end;
473 int oext_alen, new_ext_alen, end_ext_alen;
474 int depth = ext_depth(orig_inode);
475 int ret;
476
477 o_start = o_end = oext = orig_path[depth].p_ext;
478 oext_alen = ext4_ext_get_actual_len(oext);
479 start_ext.ee_len = end_ext.ee_len = 0;
480
481 new_ext.ee_block = cpu_to_le32(*from);
482 ext4_ext_store_pblock(&new_ext, ext_pblock(dext));
483 new_ext.ee_len = dext->ee_len;
484 new_ext_alen = ext4_ext_get_actual_len(&new_ext);
485 new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1;
486 new_phys_end = ext_pblock(&new_ext) + new_ext_alen - 1;
487
488 /*
489 * Case: original extent is first
490 * oext |--------|
491 * new_ext |--|
492 * start_ext |--|
493 */
494 if (le32_to_cpu(oext->ee_block) < le32_to_cpu(new_ext.ee_block) &&
495 le32_to_cpu(new_ext.ee_block) <
496 le32_to_cpu(oext->ee_block) + oext_alen) {
497 start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) -
498 le32_to_cpu(oext->ee_block));
499 copy_extent_status(oext, &start_ext);
500 } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) {
501 prev_ext = oext - 1;
502 /*
503 * We can merge new_ext into previous extent,
504 * if these are contiguous and same extent type.
505 */
506 if (ext4_can_extents_be_merged(orig_inode, prev_ext,
507 &new_ext)) {
508 o_start = prev_ext;
509 start_ext.ee_len = cpu_to_le16(
510 ext4_ext_get_actual_len(prev_ext) +
511 new_ext_alen);
512 copy_extent_status(prev_ext, &start_ext);
513 new_ext.ee_len = 0;
514 }
515 }
516
517 /*
518 * Case: new_ext_end must be less than oext
519 * oext |-----------|
520 * new_ext |-------|
521 */
522 BUG_ON(le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end);
523
524 /*
525 * Case: new_ext is smaller than original extent
526 * oext |---------------|
527 * new_ext |-----------|
528 * end_ext |---|
529 */
530 if (le32_to_cpu(oext->ee_block) <= new_ext_end &&
531 new_ext_end < le32_to_cpu(oext->ee_block) + oext_alen - 1) {
532 end_ext.ee_len =
533 cpu_to_le16(le32_to_cpu(oext->ee_block) +
534 oext_alen - 1 - new_ext_end);
535 copy_extent_status(oext, &end_ext);
536 end_ext_alen = ext4_ext_get_actual_len(&end_ext);
537 ext4_ext_store_pblock(&end_ext,
538 (ext_pblock(o_end) + oext_alen - end_ext_alen));
539 end_ext.ee_block =
540 cpu_to_le32(le32_to_cpu(o_end->ee_block) +
541 oext_alen - end_ext_alen);
542 }
543
544 ret = mext_insert_extents(handle, orig_inode, orig_path, o_start,
545 o_end, &start_ext, &new_ext, &end_ext);
546 return ret;
547}
548
549/**
550 * mext_calc_swap_extents - Calculate extents for extent swapping.
551 *
552 * @tmp_dext: the extent that will belong to the original inode
553 * @tmp_oext: the extent that will belong to the donor inode
554 * @orig_off: block offset of original inode
555 * @donor_off: block offset of donor inode
556 * @max_count: the maximun length of extents
557 */
558static void
559mext_calc_swap_extents(struct ext4_extent *tmp_dext,
560 struct ext4_extent *tmp_oext,
561 ext4_lblk_t orig_off, ext4_lblk_t donor_off,
562 ext4_lblk_t max_count)
563{
564 ext4_lblk_t diff, orig_diff;
565 struct ext4_extent dext_old, oext_old;
566
567 dext_old = *tmp_dext;
568 oext_old = *tmp_oext;
569
570 /* When tmp_dext is too large, pick up the target range. */
571 diff = donor_off - le32_to_cpu(tmp_dext->ee_block);
572
573 ext4_ext_store_pblock(tmp_dext, ext_pblock(tmp_dext) + diff);
574 tmp_dext->ee_block =
575 cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff);
576 tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff);
577
578 if (max_count < ext4_ext_get_actual_len(tmp_dext))
579 tmp_dext->ee_len = cpu_to_le16(max_count);
580
581 orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block);
582 ext4_ext_store_pblock(tmp_oext, ext_pblock(tmp_oext) + orig_diff);
583
584 /* Adjust extent length if donor extent is larger than orig */
585 if (ext4_ext_get_actual_len(tmp_dext) >
586 ext4_ext_get_actual_len(tmp_oext) - orig_diff)
587 tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_oext->ee_len) -
588 orig_diff);
589
590 tmp_oext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(tmp_dext));
591
592 copy_extent_status(&oext_old, tmp_dext);
593 copy_extent_status(&dext_old, tmp_oext);
594}
595
596/**
597 * mext_replace_branches - Replace original extents with new extents
598 *
599 * @handle: journal handle
600 * @orig_inode: original inode
601 * @donor_inode: donor inode
602 * @from: block offset of orig_inode
603 * @count: block count to be replaced
604 *
605 * Replace original inode extents and donor inode extents page by page.
606 * We implement this replacement in the following three steps:
607 * 1. Save the block information of original and donor inodes into
608 * dummy extents.
609 * 2. Change the block information of original inode to point at the
610 * donor inode blocks.
611 * 3. Change the block information of donor inode to point at the saved
612 * original inode blocks in the dummy extents.
613 *
614 * Return 0 on success, or a negative error value on failure.
615 */
616static int
617mext_replace_branches(handle_t *handle, struct inode *orig_inode,
618 struct inode *donor_inode, ext4_lblk_t from,
619 ext4_lblk_t count)
620{
621 struct ext4_ext_path *orig_path = NULL;
622 struct ext4_ext_path *donor_path = NULL;
623 struct ext4_extent *oext, *dext;
624 struct ext4_extent tmp_dext, tmp_oext;
625 ext4_lblk_t orig_off = from, donor_off = from;
626 int err = 0;
627 int depth;
628 int replaced_count = 0;
629 int dext_alen;
630
631 mext_double_down_write(orig_inode, donor_inode);
632
633 /* Get the original extent for the block "orig_off" */
634 get_ext_path(orig_path, orig_inode, orig_off, err);
635 if (orig_path == NULL)
636 goto out;
637
638 /* Get the donor extent for the head */
639 get_ext_path(donor_path, donor_inode, donor_off, err);
640 if (donor_path == NULL)
641 goto out;
642 depth = ext_depth(orig_inode);
643 oext = orig_path[depth].p_ext;
644 tmp_oext = *oext;
645
646 depth = ext_depth(donor_inode);
647 dext = donor_path[depth].p_ext;
648 tmp_dext = *dext;
649
650 mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
651 donor_off, count);
652
653 /* Loop for the donor extents */
654 while (1) {
655 /* The extent for donor must be found. */
656 BUG_ON(!dext || donor_off != le32_to_cpu(tmp_dext.ee_block));
657
658 /* Set donor extent to orig extent */
659 err = mext_leaf_block(handle, orig_inode,
660 orig_path, &tmp_dext, &orig_off);
661 if (err < 0)
662 goto out;
663
664 /* Set orig extent to donor extent */
665 err = mext_leaf_block(handle, donor_inode,
666 donor_path, &tmp_oext, &donor_off);
667 if (err < 0)
668 goto out;
669
670 dext_alen = ext4_ext_get_actual_len(&tmp_dext);
671 replaced_count += dext_alen;
672 donor_off += dext_alen;
673 orig_off += dext_alen;
674
675 /* Already moved the expected blocks */
676 if (replaced_count >= count)
677 break;
678
679 if (orig_path)
680 ext4_ext_drop_refs(orig_path);
681 get_ext_path(orig_path, orig_inode, orig_off, err);
682 if (orig_path == NULL)
683 goto out;
684 depth = ext_depth(orig_inode);
685 oext = orig_path[depth].p_ext;
686 if (le32_to_cpu(oext->ee_block) +
687 ext4_ext_get_actual_len(oext) <= orig_off) {
688 err = 0;
689 goto out;
690 }
691 tmp_oext = *oext;
692
693 if (donor_path)
694 ext4_ext_drop_refs(donor_path);
695 get_ext_path(donor_path, donor_inode,
696 donor_off, err);
697 if (donor_path == NULL)
698 goto out;
699 depth = ext_depth(donor_inode);
700 dext = donor_path[depth].p_ext;
701 if (le32_to_cpu(dext->ee_block) +
702 ext4_ext_get_actual_len(dext) <= donor_off) {
703 err = 0;
704 goto out;
705 }
706 tmp_dext = *dext;
707
708 mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
709 donor_off,
710 count - replaced_count);
711 }
712
713out:
714 if (orig_path) {
715 ext4_ext_drop_refs(orig_path);
716 kfree(orig_path);
717 }
718 if (donor_path) {
719 ext4_ext_drop_refs(donor_path);
720 kfree(donor_path);
721 }
722
723 mext_double_up_write(orig_inode, donor_inode);
724 return err;
725}
726
727/**
728 * move_extent_per_page - Move extent data per page
729 *
730 * @o_filp: file structure of original file
731 * @donor_inode: donor inode
732 * @orig_page_offset: page index on original file
733 * @data_offset_in_page: block index where data swapping starts
734 * @block_len_in_page: the number of blocks to be swapped
735 * @uninit: orig extent is uninitialized or not
736 *
737 * Save the data in original inode blocks and replace original inode extents
738 * with donor inode extents by calling mext_replace_branches().
739 * Finally, write out the saved data in new original inode blocks. Return 0
740 * on success, or a negative error value on failure.
741 */
742static int
743move_extent_par_page(struct file *o_filp, struct inode *donor_inode,
744 pgoff_t orig_page_offset, int data_offset_in_page,
745 int block_len_in_page, int uninit)
746{
747 struct inode *orig_inode = o_filp->f_dentry->d_inode;
748 struct address_space *mapping = orig_inode->i_mapping;
749 struct buffer_head *bh;
750 struct page *page = NULL;
751 const struct address_space_operations *a_ops = mapping->a_ops;
752 handle_t *handle;
753 ext4_lblk_t orig_blk_offset;
754 long long offs = orig_page_offset << PAGE_CACHE_SHIFT;
755 unsigned long blocksize = orig_inode->i_sb->s_blocksize;
756 unsigned int w_flags = 0;
757 unsigned int tmp_data_len, data_len;
758 void *fsdata;
759 int ret, i, jblocks;
760 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
761
762 /*
763 * It needs twice the amount of ordinary journal buffers because
764 * inode and donor_inode may change each different metadata blocks.
765 */
766 jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
767 handle = ext4_journal_start(orig_inode, jblocks);
768 if (IS_ERR(handle)) {
769 ret = PTR_ERR(handle);
770 return ret;
771 }
772
773 if (segment_eq(get_fs(), KERNEL_DS))
774 w_flags |= AOP_FLAG_UNINTERRUPTIBLE;
775
776 orig_blk_offset = orig_page_offset * blocks_per_page +
777 data_offset_in_page;
778
779 /*
780 * If orig extent is uninitialized one,
781 * it's not necessary force the page into memory
782 * and then force it to be written out again.
783 * Just swap data blocks between orig and donor.
784 */
785 if (uninit) {
786 ret = mext_replace_branches(handle, orig_inode,
787 donor_inode, orig_blk_offset,
788 block_len_in_page);
789
790 /* Clear the inode cache not to refer to the old data */
791 ext4_ext_invalidate_cache(orig_inode);
792 ext4_ext_invalidate_cache(donor_inode);
793 goto out2;
794 }
795
796 offs = (long long)orig_blk_offset << orig_inode->i_blkbits;
797
798 /* Calculate data_len */
799 if ((orig_blk_offset + block_len_in_page - 1) ==
800 ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
801 /* Replace the last block */
802 tmp_data_len = orig_inode->i_size & (blocksize - 1);
803 /*
804 * If data_len equal zero, it shows data_len is multiples of
805 * blocksize. So we set appropriate value.
806 */
807 if (tmp_data_len == 0)
808 tmp_data_len = blocksize;
809
810 data_len = tmp_data_len +
811 ((block_len_in_page - 1) << orig_inode->i_blkbits);
812 } else {
813 data_len = block_len_in_page << orig_inode->i_blkbits;
814 }
815
816 ret = a_ops->write_begin(o_filp, mapping, offs, data_len, w_flags,
817 &page, &fsdata);
818 if (unlikely(ret < 0))
819 goto out;
820
821 if (!PageUptodate(page)) {
822 mapping->a_ops->readpage(o_filp, page);
823 lock_page(page);
824 }
825
826 /*
827 * try_to_release_page() doesn't call releasepage in writeback mode.
828 * We should care about the order of writing to the same file
829 * by multiple move extent processes.
830 * It needs to call wait_on_page_writeback() to wait for the
831 * writeback of the page.
832 */
833 if (PageWriteback(page))
834 wait_on_page_writeback(page);
835
836 /* Release old bh and drop refs */
837 try_to_release_page(page, 0);
838
839 ret = mext_replace_branches(handle, orig_inode, donor_inode,
840 orig_blk_offset, block_len_in_page);
841 if (ret < 0)
842 goto out;
843
844 /* Clear the inode cache not to refer to the old data */
845 ext4_ext_invalidate_cache(orig_inode);
846 ext4_ext_invalidate_cache(donor_inode);
847
848 if (!page_has_buffers(page))
849 create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0);
850
851 bh = page_buffers(page);
852 for (i = 0; i < data_offset_in_page; i++)
853 bh = bh->b_this_page;
854
855 for (i = 0; i < block_len_in_page; i++) {
856 ret = ext4_get_block(orig_inode,
857 (sector_t)(orig_blk_offset + i), bh, 0);
858 if (ret < 0)
859 goto out;
860
861 if (bh->b_this_page != NULL)
862 bh = bh->b_this_page;
863 }
864
865 ret = a_ops->write_end(o_filp, mapping, offs, data_len, data_len,
866 page, fsdata);
867 page = NULL;
868
869out:
870 if (unlikely(page)) {
871 if (PageLocked(page))
872 unlock_page(page);
873 page_cache_release(page);
874 }
875out2:
876 ext4_journal_stop(handle);
877
878 return ret < 0 ? ret : 0;
879}
880
881/**
882 * mext_check_argumants - Check whether move extent can be done
883 *
884 * @orig_inode: original inode
885 * @donor_inode: donor inode
886 * @orig_start: logical start offset in block for orig
887 * @donor_start: logical start offset in block for donor
888 * @len: the number of blocks to be moved
889 * @moved_len: moved block length
890 *
891 * Check the arguments of ext4_move_extents() whether the files can be
892 * exchanged with each other.
893 * Return 0 on success, or a negative error value on failure.
894 */
895static int
896mext_check_arguments(struct inode *orig_inode,
897 struct inode *donor_inode, __u64 orig_start,
898 __u64 donor_start, __u64 *len, __u64 moved_len)
899{
900 /* Regular file check */
901 if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) {
902 ext4_debug("ext4 move extent: The argument files should be "
903 "regular file [ino:orig %lu, donor %lu]\n",
904 orig_inode->i_ino, donor_inode->i_ino);
905 return -EINVAL;
906 }
907
908 /* Ext4 move extent does not support swapfile */
909 if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
910 ext4_debug("ext4 move extent: The argument files should "
911 "not be swapfile [ino:orig %lu, donor %lu]\n",
912 orig_inode->i_ino, donor_inode->i_ino);
913 return -EINVAL;
914 }
915
916 /* Files should be in the same ext4 FS */
917 if (orig_inode->i_sb != donor_inode->i_sb) {
918 ext4_debug("ext4 move extent: The argument files "
919 "should be in same FS [ino:orig %lu, donor %lu]\n",
920 orig_inode->i_ino, donor_inode->i_ino);
921 return -EINVAL;
922 }
923
924 /* orig and donor should be different file */
925 if (orig_inode->i_ino == donor_inode->i_ino) {
926 ext4_debug("ext4 move extent: The argument files should not "
927 "be same file [ino:orig %lu, donor %lu]\n",
928 orig_inode->i_ino, donor_inode->i_ino);
929 return -EINVAL;
930 }
931
932 /* Ext4 move extent supports only extent based file */
933 if (!(EXT4_I(orig_inode)->i_flags & EXT4_EXTENTS_FL)) {
934 ext4_debug("ext4 move extent: orig file is not extents "
935 "based file [ino:orig %lu]\n", orig_inode->i_ino);
936 return -EOPNOTSUPP;
937 } else if (!(EXT4_I(donor_inode)->i_flags & EXT4_EXTENTS_FL)) {
938 ext4_debug("ext4 move extent: donor file is not extents "
939 "based file [ino:donor %lu]\n", donor_inode->i_ino);
940 return -EOPNOTSUPP;
941 }
942
943 if ((!orig_inode->i_size) || (!donor_inode->i_size)) {
944 ext4_debug("ext4 move extent: File size is 0 byte\n");
945 return -EINVAL;
946 }
947
948 /* Start offset should be same */
949 if (orig_start != donor_start) {
950 ext4_debug("ext4 move extent: orig and donor's start "
951 "offset are not same [ino:orig %lu, donor %lu]\n",
952 orig_inode->i_ino, donor_inode->i_ino);
953 return -EINVAL;
954 }
955
956 if (moved_len) {
957 ext4_debug("ext4 move extent: moved_len should be 0 "
958 "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
959 donor_inode->i_ino);
960 return -EINVAL;
961 }
962
963 if ((orig_start > MAX_DEFRAG_SIZE) ||
964 (donor_start > MAX_DEFRAG_SIZE) ||
965 (*len > MAX_DEFRAG_SIZE) ||
966 (orig_start + *len > MAX_DEFRAG_SIZE)) {
967 ext4_debug("ext4 move extent: Can't handle over [%lu] blocks "
968 "[ino:orig %lu, donor %lu]\n", MAX_DEFRAG_SIZE,
969 orig_inode->i_ino, donor_inode->i_ino);
970 return -EINVAL;
971 }
972
973 if (orig_inode->i_size > donor_inode->i_size) {
974 if (orig_start >= donor_inode->i_size) {
975 ext4_debug("ext4 move extent: orig start offset "
976 "[%llu] should be less than donor file size "
977 "[%lld] [ino:orig %lu, donor_inode %lu]\n",
978 orig_start, donor_inode->i_size,
979 orig_inode->i_ino, donor_inode->i_ino);
980 return -EINVAL;
981 }
982
983 if (orig_start + *len > donor_inode->i_size) {
984 ext4_debug("ext4 move extent: End offset [%llu] should "
985 "be less than donor file size [%lld]."
986 "So adjust length from %llu to %lld "
987 "[ino:orig %lu, donor %lu]\n",
988 orig_start + *len, donor_inode->i_size,
989 *len, donor_inode->i_size - orig_start,
990 orig_inode->i_ino, donor_inode->i_ino);
991 *len = donor_inode->i_size - orig_start;
992 }
993 } else {
994 if (orig_start >= orig_inode->i_size) {
995 ext4_debug("ext4 move extent: start offset [%llu] "
996 "should be less than original file size "
997 "[%lld] [inode:orig %lu, donor %lu]\n",
998 orig_start, orig_inode->i_size,
999 orig_inode->i_ino, donor_inode->i_ino);
1000 return -EINVAL;
1001 }
1002
1003 if (orig_start + *len > orig_inode->i_size) {
1004 ext4_debug("ext4 move extent: Adjust length "
1005 "from %llu to %lld. Because it should be "
1006 "less than original file size "
1007 "[ino:orig %lu, donor %lu]\n",
1008 *len, orig_inode->i_size - orig_start,
1009 orig_inode->i_ino, donor_inode->i_ino);
1010 *len = orig_inode->i_size - orig_start;
1011 }
1012 }
1013
1014 if (!*len) {
1015 ext4_debug("ext4 move extent: len shoudld not be 0 "
1016 "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
1017 donor_inode->i_ino);
1018 return -EINVAL;
1019 }
1020
1021 return 0;
1022}
1023
1024/**
1025 * mext_inode_double_lock - Lock i_mutex on both @inode1 and @inode2
1026 *
1027 * @inode1: the inode structure
1028 * @inode2: the inode structure
1029 *
1030 * Lock two inodes' i_mutex by i_ino order. This function is moved from
1031 * fs/inode.c.
1032 */
1033static void
1034mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
1035{
1036 if (inode1 == NULL || inode2 == NULL || inode1 == inode2) {
1037 if (inode1)
1038 mutex_lock(&inode1->i_mutex);
1039 else if (inode2)
1040 mutex_lock(&inode2->i_mutex);
1041 return;
1042 }
1043
1044 if (inode1->i_ino < inode2->i_ino) {
1045 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
1046 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
1047 } else {
1048 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
1049 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
1050 }
1051}
1052
1053/**
1054 * mext_inode_double_unlock - Release i_mutex on both @inode1 and @inode2
1055 *
1056 * @inode1: the inode that is released first
1057 * @inode2: the inode that is released second
1058 *
1059 * This function is moved from fs/inode.c.
1060 */
1061
1062static void
1063mext_inode_double_unlock(struct inode *inode1, struct inode *inode2)
1064{
1065 if (inode1)
1066 mutex_unlock(&inode1->i_mutex);
1067
1068 if (inode2 && inode2 != inode1)
1069 mutex_unlock(&inode2->i_mutex);
1070}
1071
1072/**
1073 * ext4_move_extents - Exchange the specified range of a file
1074 *
1075 * @o_filp: file structure of the original file
1076 * @d_filp: file structure of the donor file
1077 * @orig_start: start offset in block for orig
1078 * @donor_start: start offset in block for donor
1079 * @len: the number of blocks to be moved
1080 * @moved_len: moved block length
1081 *
1082 * This function returns 0 and moved block length is set in moved_len
1083 * if succeed, otherwise returns error value.
1084 *
1085 * Note: ext4_move_extents() proceeds the following order.
1086 * 1:ext4_move_extents() calculates the last block number of moving extent
1087 * function by the start block number (orig_start) and the number of blocks
1088 * to be moved (len) specified as arguments.
1089 * If the {orig, donor}_start points a hole, the extent's start offset
1090 * pointed by ext_cur (current extent), holecheck_path, orig_path are set
1091 * after hole behind.
1092 * 2:Continue step 3 to step 5, until the holecheck_path points to last_extent
1093 * or the ext_cur exceeds the block_end which is last logical block number.
1094 * 3:To get the length of continues area, call mext_next_extent()
1095 * specified with the ext_cur (initial value is holecheck_path) re-cursive,
1096 * until find un-continuous extent, the start logical block number exceeds
1097 * the block_end or the extent points to the last extent.
1098 * 4:Exchange the original inode data with donor inode data
1099 * from orig_page_offset to seq_end_page.
1100 * The start indexes of data are specified as arguments.
1101 * That of the original inode is orig_page_offset,
1102 * and the donor inode is also orig_page_offset
1103 * (To easily handle blocksize != pagesize case, the offset for the
1104 * donor inode is block unit).
1105 * 5:Update holecheck_path and orig_path to points a next proceeding extent,
1106 * then returns to step 2.
1107 * 6:Release holecheck_path, orig_path and set the len to moved_len
1108 * which shows the number of moved blocks.
1109 * The moved_len is useful for the command to calculate the file offset
1110 * for starting next move extent ioctl.
1111 * 7:Return 0 on success, or a negative error value on failure.
1112 */
1113int
1114ext4_move_extents(struct file *o_filp, struct file *d_filp,
1115 __u64 orig_start, __u64 donor_start, __u64 len,
1116 __u64 *moved_len)
1117{
1118 struct inode *orig_inode = o_filp->f_dentry->d_inode;
1119 struct inode *donor_inode = d_filp->f_dentry->d_inode;
1120 struct ext4_ext_path *orig_path = NULL, *holecheck_path = NULL;
1121 struct ext4_extent *ext_prev, *ext_cur, *ext_dummy;
1122 ext4_lblk_t block_start = orig_start;
1123 ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0;
1124 ext4_lblk_t rest_blocks;
1125 pgoff_t orig_page_offset = 0, seq_end_page;
1126 int ret, depth, last_extent = 0;
1127 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
1128 int data_offset_in_page;
1129 int block_len_in_page;
1130 int uninit;
1131
1132 /* protect orig and donor against a truncate */
1133 mext_inode_double_lock(orig_inode, donor_inode);
1134
1135 mext_double_down_read(orig_inode, donor_inode);
1136 /* Check the filesystem environment whether move_extent can be done */
1137 ret = mext_check_arguments(orig_inode, donor_inode, orig_start,
1138 donor_start, &len, *moved_len);
1139 mext_double_up_read(orig_inode, donor_inode);
1140 if (ret)
1141 goto out2;
1142
1143 file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits;
1144 block_end = block_start + len - 1;
1145 if (file_end < block_end)
1146 len -= block_end - file_end;
1147
1148 get_ext_path(orig_path, orig_inode, block_start, ret);
1149 if (orig_path == NULL)
1150 goto out2;
1151
1152 /* Get path structure to check the hole */
1153 get_ext_path(holecheck_path, orig_inode, block_start, ret);
1154 if (holecheck_path == NULL)
1155 goto out;
1156
1157 depth = ext_depth(orig_inode);
1158 ext_cur = holecheck_path[depth].p_ext;
1159 if (ext_cur == NULL) {
1160 ret = -EINVAL;
1161 goto out;
1162 }
1163
1164 /*
1165 * Get proper extent whose ee_block is beyond block_start
1166 * if block_start was within the hole.
1167 */
1168 if (le32_to_cpu(ext_cur->ee_block) +
1169 ext4_ext_get_actual_len(ext_cur) - 1 < block_start) {
1170 last_extent = mext_next_extent(orig_inode,
1171 holecheck_path, &ext_cur);
1172 if (last_extent < 0) {
1173 ret = last_extent;
1174 goto out;
1175 }
1176 last_extent = mext_next_extent(orig_inode, orig_path,
1177 &ext_dummy);
1178 if (last_extent < 0) {
1179 ret = last_extent;
1180 goto out;
1181 }
1182 }
1183 seq_start = block_start;
1184
1185 /* No blocks within the specified range. */
1186 if (le32_to_cpu(ext_cur->ee_block) > block_end) {
1187 ext4_debug("ext4 move extent: The specified range of file "
1188 "may be the hole\n");
1189 ret = -EINVAL;
1190 goto out;
1191 }
1192
1193 /* Adjust start blocks */
1194 add_blocks = min(le32_to_cpu(ext_cur->ee_block) +
1195 ext4_ext_get_actual_len(ext_cur), block_end + 1) -
1196 max(le32_to_cpu(ext_cur->ee_block), block_start);
1197
1198 while (!last_extent && le32_to_cpu(ext_cur->ee_block) <= block_end) {
1199 seq_blocks += add_blocks;
1200
1201 /* Adjust tail blocks */
1202 if (seq_start + seq_blocks - 1 > block_end)
1203 seq_blocks = block_end - seq_start + 1;
1204
1205 ext_prev = ext_cur;
1206 last_extent = mext_next_extent(orig_inode, holecheck_path,
1207 &ext_cur);
1208 if (last_extent < 0) {
1209 ret = last_extent;
1210 break;
1211 }
1212 add_blocks = ext4_ext_get_actual_len(ext_cur);
1213
1214 /*
1215 * Extend the length of contiguous block (seq_blocks)
1216 * if extents are contiguous.
1217 */
1218 if (ext4_can_extents_be_merged(orig_inode,
1219 ext_prev, ext_cur) &&
1220 block_end >= le32_to_cpu(ext_cur->ee_block) &&
1221 !last_extent)
1222 continue;
1223
1224 /* Is original extent is uninitialized */
1225 uninit = ext4_ext_is_uninitialized(ext_prev);
1226
1227 data_offset_in_page = seq_start % blocks_per_page;
1228
1229 /*
1230 * Calculate data blocks count that should be swapped
1231 * at the first page.
1232 */
1233 if (data_offset_in_page + seq_blocks > blocks_per_page) {
1234 /* Swapped blocks are across pages */
1235 block_len_in_page =
1236 blocks_per_page - data_offset_in_page;
1237 } else {
1238 /* Swapped blocks are in a page */
1239 block_len_in_page = seq_blocks;
1240 }
1241
1242 orig_page_offset = seq_start >>
1243 (PAGE_CACHE_SHIFT - orig_inode->i_blkbits);
1244 seq_end_page = (seq_start + seq_blocks - 1) >>
1245 (PAGE_CACHE_SHIFT - orig_inode->i_blkbits);
1246 seq_start = le32_to_cpu(ext_cur->ee_block);
1247 rest_blocks = seq_blocks;
1248
1249 /* Discard preallocations of two inodes */
1250 down_write(&EXT4_I(orig_inode)->i_data_sem);
1251 ext4_discard_preallocations(orig_inode);
1252 up_write(&EXT4_I(orig_inode)->i_data_sem);
1253
1254 down_write(&EXT4_I(donor_inode)->i_data_sem);
1255 ext4_discard_preallocations(donor_inode);
1256 up_write(&EXT4_I(donor_inode)->i_data_sem);
1257
1258 while (orig_page_offset <= seq_end_page) {
1259
1260 /* Swap original branches with new branches */
1261 ret = move_extent_par_page(o_filp, donor_inode,
1262 orig_page_offset,
1263 data_offset_in_page,
1264 block_len_in_page, uninit);
1265 if (ret < 0)
1266 goto out;
1267 orig_page_offset++;
1268 /* Count how many blocks we have exchanged */
1269 *moved_len += block_len_in_page;
1270 BUG_ON(*moved_len > len);
1271
1272 data_offset_in_page = 0;
1273 rest_blocks -= block_len_in_page;
1274 if (rest_blocks > blocks_per_page)
1275 block_len_in_page = blocks_per_page;
1276 else
1277 block_len_in_page = rest_blocks;
1278 }
1279
1280 /* Decrease buffer counter */
1281 if (holecheck_path)
1282 ext4_ext_drop_refs(holecheck_path);
1283 get_ext_path(holecheck_path, orig_inode,
1284 seq_start, ret);
1285 if (holecheck_path == NULL)
1286 break;
1287 depth = holecheck_path->p_depth;
1288
1289 /* Decrease buffer counter */
1290 if (orig_path)
1291 ext4_ext_drop_refs(orig_path);
1292 get_ext_path(orig_path, orig_inode, seq_start, ret);
1293 if (orig_path == NULL)
1294 break;
1295
1296 ext_cur = holecheck_path[depth].p_ext;
1297 add_blocks = ext4_ext_get_actual_len(ext_cur);
1298 seq_blocks = 0;
1299
1300 }
1301out:
1302 if (orig_path) {
1303 ext4_ext_drop_refs(orig_path);
1304 kfree(orig_path);
1305 }
1306 if (holecheck_path) {
1307 ext4_ext_drop_refs(holecheck_path);
1308 kfree(holecheck_path);
1309 }
1310out2:
1311 mext_inode_double_unlock(orig_inode, donor_inode);
1312
1313 if (ret)
1314 return ret;
1315
1316 /* All of the specified blocks must be exchanged in succeed */
1317 BUG_ON(*moved_len != len);
1318
1319 return 0;
1320}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 07eb6649e4fa..de04013d16ff 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1782,7 +1782,7 @@ retry:
1782 if (IS_DIRSYNC(dir)) 1782 if (IS_DIRSYNC(dir))
1783 ext4_handle_sync(handle); 1783 ext4_handle_sync(handle);
1784 1784
1785 inode = ext4_new_inode (handle, dir, mode); 1785 inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
1786 err = PTR_ERR(inode); 1786 err = PTR_ERR(inode);
1787 if (!IS_ERR(inode)) { 1787 if (!IS_ERR(inode)) {
1788 inode->i_op = &ext4_file_inode_operations; 1788 inode->i_op = &ext4_file_inode_operations;
@@ -1816,7 +1816,7 @@ retry:
1816 if (IS_DIRSYNC(dir)) 1816 if (IS_DIRSYNC(dir))
1817 ext4_handle_sync(handle); 1817 ext4_handle_sync(handle);
1818 1818
1819 inode = ext4_new_inode(handle, dir, mode); 1819 inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
1820 err = PTR_ERR(inode); 1820 err = PTR_ERR(inode);
1821 if (!IS_ERR(inode)) { 1821 if (!IS_ERR(inode)) {
1822 init_special_inode(inode, inode->i_mode, rdev); 1822 init_special_inode(inode, inode->i_mode, rdev);
@@ -1853,7 +1853,8 @@ retry:
1853 if (IS_DIRSYNC(dir)) 1853 if (IS_DIRSYNC(dir))
1854 ext4_handle_sync(handle); 1854 ext4_handle_sync(handle);
1855 1855
1856 inode = ext4_new_inode(handle, dir, S_IFDIR | mode); 1856 inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
1857 &dentry->d_name, 0);
1857 err = PTR_ERR(inode); 1858 err = PTR_ERR(inode);
1858 if (IS_ERR(inode)) 1859 if (IS_ERR(inode))
1859 goto out_stop; 1860 goto out_stop;
@@ -2264,7 +2265,8 @@ retry:
2264 if (IS_DIRSYNC(dir)) 2265 if (IS_DIRSYNC(dir))
2265 ext4_handle_sync(handle); 2266 ext4_handle_sync(handle);
2266 2267
2267 inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO); 2268 inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO,
2269 &dentry->d_name, 0);
2268 err = PTR_ERR(inode); 2270 err = PTR_ERR(inode);
2269 if (IS_ERR(inode)) 2271 if (IS_ERR(inode))
2270 goto out_stop; 2272 goto out_stop;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 27eb289eea37..68b0351fc647 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1002,7 +1002,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1002 " too large to resize to %llu blocks safely\n", 1002 " too large to resize to %llu blocks safely\n",
1003 sb->s_id, n_blocks_count); 1003 sb->s_id, n_blocks_count);
1004 if (sizeof(sector_t) < 8) 1004 if (sizeof(sector_t) < 8)
1005 ext4_warning(sb, __func__, "CONFIG_LBD not enabled"); 1005 ext4_warning(sb, __func__, "CONFIG_LBDAF not enabled");
1006 return -EINVAL; 1006 return -EINVAL;
1007 } 1007 }
1008 1008
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 012c4251397e..8f4f079e6b9a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -37,7 +37,6 @@
37#include <linux/seq_file.h> 37#include <linux/seq_file.h>
38#include <linux/proc_fs.h> 38#include <linux/proc_fs.h>
39#include <linux/ctype.h> 39#include <linux/ctype.h>
40#include <linux/marker.h>
41#include <linux/log2.h> 40#include <linux/log2.h>
42#include <linux/crc16.h> 41#include <linux/crc16.h>
43#include <asm/uaccess.h> 42#include <asm/uaccess.h>
@@ -47,6 +46,9 @@
47#include "xattr.h" 46#include "xattr.h"
48#include "acl.h" 47#include "acl.h"
49 48
49#define CREATE_TRACE_POINTS
50#include <trace/events/ext4.h>
51
50static int default_mb_history_length = 1000; 52static int default_mb_history_length = 1000;
51 53
52module_param_named(default_mb_history_length, default_mb_history_length, 54module_param_named(default_mb_history_length, default_mb_history_length,
@@ -301,7 +303,7 @@ static void ext4_handle_error(struct super_block *sb)
301 if (!test_opt(sb, ERRORS_CONT)) { 303 if (!test_opt(sb, ERRORS_CONT)) {
302 journal_t *journal = EXT4_SB(sb)->s_journal; 304 journal_t *journal = EXT4_SB(sb)->s_journal;
303 305
304 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; 306 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
305 if (journal) 307 if (journal)
306 jbd2_journal_abort(journal, -EIO); 308 jbd2_journal_abort(journal, -EIO);
307 } 309 }
@@ -414,7 +416,7 @@ void ext4_abort(struct super_block *sb, const char *function,
414 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); 416 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
415 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 417 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
416 sb->s_flags |= MS_RDONLY; 418 sb->s_flags |= MS_RDONLY;
417 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; 419 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
418 if (EXT4_SB(sb)->s_journal) 420 if (EXT4_SB(sb)->s_journal)
419 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); 421 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
420} 422}
@@ -664,10 +666,6 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
664 if (!ei) 666 if (!ei)
665 return NULL; 667 return NULL;
666 668
667#ifdef CONFIG_EXT4_FS_POSIX_ACL
668 ei->i_acl = EXT4_ACL_NOT_CACHED;
669 ei->i_default_acl = EXT4_ACL_NOT_CACHED;
670#endif
671 ei->vfs_inode.i_version = 1; 669 ei->vfs_inode.i_version = 1;
672 ei->vfs_inode.i_data.writeback_index = 0; 670 ei->vfs_inode.i_data.writeback_index = 0;
673 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); 671 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
@@ -733,18 +731,6 @@ static void destroy_inodecache(void)
733 731
734static void ext4_clear_inode(struct inode *inode) 732static void ext4_clear_inode(struct inode *inode)
735{ 733{
736#ifdef CONFIG_EXT4_FS_POSIX_ACL
737 if (EXT4_I(inode)->i_acl &&
738 EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) {
739 posix_acl_release(EXT4_I(inode)->i_acl);
740 EXT4_I(inode)->i_acl = EXT4_ACL_NOT_CACHED;
741 }
742 if (EXT4_I(inode)->i_default_acl &&
743 EXT4_I(inode)->i_default_acl != EXT4_ACL_NOT_CACHED) {
744 posix_acl_release(EXT4_I(inode)->i_default_acl);
745 EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED;
746 }
747#endif
748 ext4_discard_preallocations(inode); 734 ext4_discard_preallocations(inode);
749 if (EXT4_JOURNAL(inode)) 735 if (EXT4_JOURNAL(inode))
750 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, 736 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
@@ -1474,7 +1460,7 @@ set_qf_format:
1474 break; 1460 break;
1475#endif 1461#endif
1476 case Opt_abort: 1462 case Opt_abort:
1477 set_opt(sbi->s_mount_opt, ABORT); 1463 sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
1478 break; 1464 break;
1479 case Opt_nobarrier: 1465 case Opt_nobarrier:
1480 clear_opt(sbi->s_mount_opt, BARRIER); 1466 clear_opt(sbi->s_mount_opt, BARRIER);
@@ -1653,7 +1639,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1653 ext4_commit_super(sb, 1); 1639 ext4_commit_super(sb, 1);
1654 if (test_opt(sb, DEBUG)) 1640 if (test_opt(sb, DEBUG))
1655 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " 1641 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
1656 "bpg=%lu, ipg=%lu, mo=%04lx]\n", 1642 "bpg=%lu, ipg=%lu, mo=%04x]\n",
1657 sb->s_blocksize, 1643 sb->s_blocksize,
1658 sbi->s_groups_count, 1644 sbi->s_groups_count,
1659 EXT4_BLOCKS_PER_GROUP(sb), 1645 EXT4_BLOCKS_PER_GROUP(sb),
@@ -1957,7 +1943,7 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files)
1957 /* small i_blocks in vfs inode? */ 1943 /* small i_blocks in vfs inode? */
1958 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 1944 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1959 /* 1945 /*
1960 * CONFIG_LBD is not enabled implies the inode 1946 * CONFIG_LBDAF is not enabled implies the inode
1961 * i_block represent total blocks in 512 bytes 1947 * i_block represent total blocks in 512 bytes
1962 * 32 == size of vfs inode i_blocks * 8 1948 * 32 == size of vfs inode i_blocks * 8
1963 */ 1949 */
@@ -2000,7 +1986,7 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
2000 1986
2001 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 1987 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
2002 /* 1988 /*
2003 * !has_huge_files or CONFIG_LBD not enabled implies that 1989 * !has_huge_files or CONFIG_LBDAF not enabled implies that
2004 * the inode i_block field represents total file blocks in 1990 * the inode i_block field represents total file blocks in
2005 * 2^32 512-byte sectors == size of vfs inode i_blocks * 8 1991 * 2^32 512-byte sectors == size of vfs inode i_blocks * 8
2006 */ 1992 */
@@ -2204,6 +2190,7 @@ EXT4_RO_ATTR(session_write_kbytes);
2204EXT4_RO_ATTR(lifetime_write_kbytes); 2190EXT4_RO_ATTR(lifetime_write_kbytes);
2205EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, 2191EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
2206 inode_readahead_blks_store, s_inode_readahead_blks); 2192 inode_readahead_blks_store, s_inode_readahead_blks);
2193EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
2207EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); 2194EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
2208EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); 2195EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
2209EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); 2196EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
@@ -2216,6 +2203,7 @@ static struct attribute *ext4_attrs[] = {
2216 ATTR_LIST(session_write_kbytes), 2203 ATTR_LIST(session_write_kbytes),
2217 ATTR_LIST(lifetime_write_kbytes), 2204 ATTR_LIST(lifetime_write_kbytes),
2218 ATTR_LIST(inode_readahead_blks), 2205 ATTR_LIST(inode_readahead_blks),
2206 ATTR_LIST(inode_goal),
2219 ATTR_LIST(mb_stats), 2207 ATTR_LIST(mb_stats),
2220 ATTR_LIST(mb_max_to_scan), 2208 ATTR_LIST(mb_max_to_scan),
2221 ATTR_LIST(mb_min_to_scan), 2209 ATTR_LIST(mb_min_to_scan),
@@ -2436,13 +2424,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2436 if (has_huge_files) { 2424 if (has_huge_files) {
2437 /* 2425 /*
2438 * Large file size enabled file system can only be 2426 * Large file size enabled file system can only be
2439 * mount if kernel is build with CONFIG_LBD 2427 * mount if kernel is build with CONFIG_LBDAF
2440 */ 2428 */
2441 if (sizeof(root->i_blocks) < sizeof(u64) && 2429 if (sizeof(root->i_blocks) < sizeof(u64) &&
2442 !(sb->s_flags & MS_RDONLY)) { 2430 !(sb->s_flags & MS_RDONLY)) {
2443 ext4_msg(sb, KERN_ERR, "Filesystem with huge " 2431 ext4_msg(sb, KERN_ERR, "Filesystem with huge "
2444 "files cannot be mounted read-write " 2432 "files cannot be mounted read-write "
2445 "without CONFIG_LBD"); 2433 "without CONFIG_LBDAF");
2446 goto failed_mount; 2434 goto failed_mount;
2447 } 2435 }
2448 } 2436 }
@@ -2566,7 +2554,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2566 ext4_msg(sb, KERN_ERR, "filesystem" 2554 ext4_msg(sb, KERN_ERR, "filesystem"
2567 " too large to mount safely"); 2555 " too large to mount safely");
2568 if (sizeof(sector_t) < 8) 2556 if (sizeof(sector_t) < 8)
2569 ext4_msg(sb, KERN_WARNING, "CONFIG_LBD not enabled"); 2557 ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
2570 goto failed_mount; 2558 goto failed_mount;
2571 } 2559 }
2572 2560
@@ -3346,7 +3334,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
3346 int ret = 0; 3334 int ret = 0;
3347 tid_t target; 3335 tid_t target;
3348 3336
3349 trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); 3337 trace_ext4_sync_fs(sb, wait);
3350 if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { 3338 if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
3351 if (wait) 3339 if (wait)
3352 jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target); 3340 jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target);
@@ -3450,7 +3438,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3450 goto restore_opts; 3438 goto restore_opts;
3451 } 3439 }
3452 3440
3453 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) 3441 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
3454 ext4_abort(sb, __func__, "Abort forced by user"); 3442 ext4_abort(sb, __func__, "Abort forced by user");
3455 3443
3456 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 3444 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
@@ -3465,7 +3453,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3465 3453
3466 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || 3454 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
3467 n_blocks_count > ext4_blocks_count(es)) { 3455 n_blocks_count > ext4_blocks_count(es)) {
3468 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) { 3456 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
3469 err = -EROFS; 3457 err = -EROFS;
3470 goto restore_opts; 3458 goto restore_opts;
3471 } 3459 }
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index b42602298087..923990e4f16e 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -241,7 +241,7 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)
241 while (*fclus < cluster) { 241 while (*fclus < cluster) {
242 /* prevent the infinite loop of cluster chain */ 242 /* prevent the infinite loop of cluster chain */
243 if (*fclus > limit) { 243 if (*fclus > limit) {
244 fat_fs_panic(sb, "%s: detected the cluster chain loop" 244 fat_fs_error(sb, "%s: detected the cluster chain loop"
245 " (i_pos %lld)", __func__, 245 " (i_pos %lld)", __func__,
246 MSDOS_I(inode)->i_pos); 246 MSDOS_I(inode)->i_pos);
247 nr = -EIO; 247 nr = -EIO;
@@ -252,7 +252,7 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)
252 if (nr < 0) 252 if (nr < 0)
253 goto out; 253 goto out;
254 else if (nr == FAT_ENT_FREE) { 254 else if (nr == FAT_ENT_FREE) {
255 fat_fs_panic(sb, "%s: invalid cluster chain" 255 fat_fs_error(sb, "%s: invalid cluster chain"
256 " (i_pos %lld)", __func__, 256 " (i_pos %lld)", __func__,
257 MSDOS_I(inode)->i_pos); 257 MSDOS_I(inode)->i_pos);
258 nr = -EIO; 258 nr = -EIO;
@@ -285,7 +285,7 @@ static int fat_bmap_cluster(struct inode *inode, int cluster)
285 if (ret < 0) 285 if (ret < 0)
286 return ret; 286 return ret;
287 else if (ret == FAT_ENT_EOF) { 287 else if (ret == FAT_ENT_EOF) {
288 fat_fs_panic(sb, "%s: request beyond EOF (i_pos %lld)", 288 fat_fs_error(sb, "%s: request beyond EOF (i_pos %lld)",
289 __func__, MSDOS_I(inode)->i_pos); 289 __func__, MSDOS_I(inode)->i_pos);
290 return -EIO; 290 return -EIO;
291 } 291 }
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index f3500294eec5..38ff75a0fe22 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -22,6 +22,19 @@
22#include <asm/uaccess.h> 22#include <asm/uaccess.h>
23#include "fat.h" 23#include "fat.h"
24 24
25/*
26 * Maximum buffer size of short name.
27 * [(MSDOS_NAME + '.') * max one char + nul]
28 * For msdos style, ['.' (hidden) + MSDOS_NAME + '.' + nul]
29 */
30#define FAT_MAX_SHORT_SIZE ((MSDOS_NAME + 1) * NLS_MAX_CHARSET_SIZE + 1)
31/*
32 * Maximum buffer size of unicode chars from slots.
33 * [(max longname slots * 13 (size in a slot) + nul) * sizeof(wchar_t)]
34 */
35#define FAT_MAX_UNI_CHARS ((MSDOS_SLOTS - 1) * 13 + 1)
36#define FAT_MAX_UNI_SIZE (FAT_MAX_UNI_CHARS * sizeof(wchar_t))
37
25static inline loff_t fat_make_i_pos(struct super_block *sb, 38static inline loff_t fat_make_i_pos(struct super_block *sb,
26 struct buffer_head *bh, 39 struct buffer_head *bh,
27 struct msdos_dir_entry *de) 40 struct msdos_dir_entry *de)
@@ -171,7 +184,8 @@ static inline int fat_uni_to_x8(struct msdos_sb_info *sbi, const wchar_t *uni,
171 unsigned char *buf, int size) 184 unsigned char *buf, int size)
172{ 185{
173 if (sbi->options.utf8) 186 if (sbi->options.utf8)
174 return utf8_wcstombs(buf, uni, size); 187 return utf16s_to_utf8s(uni, FAT_MAX_UNI_CHARS,
188 UTF16_HOST_ENDIAN, buf, size);
175 else 189 else
176 return uni16_to_x8(buf, uni, size, sbi->options.unicode_xlate, 190 return uni16_to_x8(buf, uni, size, sbi->options.unicode_xlate,
177 sbi->nls_io); 191 sbi->nls_io);
@@ -325,19 +339,6 @@ parse_long:
325} 339}
326 340
327/* 341/*
328 * Maximum buffer size of short name.
329 * [(MSDOS_NAME + '.') * max one char + nul]
330 * For msdos style, ['.' (hidden) + MSDOS_NAME + '.' + nul]
331 */
332#define FAT_MAX_SHORT_SIZE ((MSDOS_NAME + 1) * NLS_MAX_CHARSET_SIZE + 1)
333/*
334 * Maximum buffer size of unicode chars from slots.
335 * [(max longname slots * 13 (size in a slot) + nul) * sizeof(wchar_t)]
336 */
337#define FAT_MAX_UNI_CHARS ((MSDOS_SLOTS - 1) * 13 + 1)
338#define FAT_MAX_UNI_SIZE (FAT_MAX_UNI_CHARS * sizeof(wchar_t))
339
340/*
341 * Return values: negative -> error, 0 -> not found, positive -> found, 342 * Return values: negative -> error, 0 -> not found, positive -> found,
342 * value is the total amount of slots, including the shortname entry. 343 * value is the total amount of slots, including the shortname entry.
343 */ 344 */
@@ -1334,7 +1335,7 @@ found:
1334 goto error_remove; 1335 goto error_remove;
1335 } 1336 }
1336 if (dir->i_size & (sbi->cluster_size - 1)) { 1337 if (dir->i_size & (sbi->cluster_size - 1)) {
1337 fat_fs_panic(sb, "Odd directory size"); 1338 fat_fs_error(sb, "Odd directory size");
1338 dir->i_size = (dir->i_size + sbi->cluster_size - 1) 1339 dir->i_size = (dir->i_size + sbi->cluster_size - 1)
1339 & ~((loff_t)sbi->cluster_size - 1); 1340 & ~((loff_t)sbi->cluster_size - 1);
1340 } 1341 }
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index e4d88527b5dd..adb0e72a176d 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -17,6 +17,10 @@
17#define VFAT_SFN_CREATE_WIN95 0x0100 /* emulate win95 rule for create */ 17#define VFAT_SFN_CREATE_WIN95 0x0100 /* emulate win95 rule for create */
18#define VFAT_SFN_CREATE_WINNT 0x0200 /* emulate winnt rule for create */ 18#define VFAT_SFN_CREATE_WINNT 0x0200 /* emulate winnt rule for create */
19 19
20#define FAT_ERRORS_CONT 1 /* ignore error and continue */
21#define FAT_ERRORS_PANIC 2 /* panic on error */
22#define FAT_ERRORS_RO 3 /* remount r/o on error */
23
20struct fat_mount_options { 24struct fat_mount_options {
21 uid_t fs_uid; 25 uid_t fs_uid;
22 gid_t fs_gid; 26 gid_t fs_gid;
@@ -26,6 +30,7 @@ struct fat_mount_options {
26 char *iocharset; /* Charset used for filename input/display */ 30 char *iocharset; /* Charset used for filename input/display */
27 unsigned short shortname; /* flags for shortname display/create rule */ 31 unsigned short shortname; /* flags for shortname display/create rule */
28 unsigned char name_check; /* r = relaxed, n = normal, s = strict */ 32 unsigned char name_check; /* r = relaxed, n = normal, s = strict */
33 unsigned char errors; /* On error: continue, panic, remount-ro */
29 unsigned short allow_utime;/* permission for setting the [am]time */ 34 unsigned short allow_utime;/* permission for setting the [am]time */
30 unsigned quiet:1, /* set = fake successful chmods and chowns */ 35 unsigned quiet:1, /* set = fake successful chmods and chowns */
31 showexec:1, /* set = only set x bit for com/exe/bat */ 36 showexec:1, /* set = only set x bit for com/exe/bat */
@@ -316,7 +321,7 @@ extern int fat_fill_super(struct super_block *sb, void *data, int silent,
316extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, 321extern int fat_flush_inodes(struct super_block *sb, struct inode *i1,
317 struct inode *i2); 322 struct inode *i2);
318/* fat/misc.c */ 323/* fat/misc.c */
319extern void fat_fs_panic(struct super_block *s, const char *fmt, ...) 324extern void fat_fs_error(struct super_block *s, const char *fmt, ...)
320 __attribute__ ((format (printf, 2, 3))) __cold; 325 __attribute__ ((format (printf, 2, 3))) __cold;
321extern void fat_clusters_flush(struct super_block *sb); 326extern void fat_clusters_flush(struct super_block *sb);
322extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); 327extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster);
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 618f5305c2e4..a81037721a6f 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -348,7 +348,7 @@ int fat_ent_read(struct inode *inode, struct fat_entry *fatent, int entry)
348 348
349 if (entry < FAT_START_ENT || sbi->max_cluster <= entry) { 349 if (entry < FAT_START_ENT || sbi->max_cluster <= entry) {
350 fatent_brelse(fatent); 350 fatent_brelse(fatent);
351 fat_fs_panic(sb, "invalid access to FAT (entry 0x%08x)", entry); 351 fat_fs_error(sb, "invalid access to FAT (entry 0x%08x)", entry);
352 return -EIO; 352 return -EIO;
353 } 353 }
354 354
@@ -560,7 +560,7 @@ int fat_free_clusters(struct inode *inode, int cluster)
560 err = cluster; 560 err = cluster;
561 goto error; 561 goto error;
562 } else if (cluster == FAT_ENT_FREE) { 562 } else if (cluster == FAT_ENT_FREE) {
563 fat_fs_panic(sb, "%s: deleting FAT entry beyond EOF", 563 fat_fs_error(sb, "%s: deleting FAT entry beyond EOF",
564 __func__); 564 __func__);
565 err = -EIO; 565 err = -EIO;
566 goto error; 566 goto error;
diff --git a/fs/fat/file.c b/fs/fat/file.c
index e955a56b4e5e..b28ea646ff60 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -18,106 +18,112 @@
18#include <linux/security.h> 18#include <linux/security.h>
19#include "fat.h" 19#include "fat.h"
20 20
21int fat_generic_ioctl(struct inode *inode, struct file *filp, 21static int fat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr)
22 unsigned int cmd, unsigned long arg)
23{ 22{
23 u32 attr;
24
25 mutex_lock(&inode->i_mutex);
26 attr = fat_make_attrs(inode);
27 mutex_unlock(&inode->i_mutex);
28
29 return put_user(attr, user_attr);
30}
31
32static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
33{
34 struct inode *inode = file->f_path.dentry->d_inode;
24 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); 35 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
25 u32 __user *user_attr = (u32 __user *)arg; 36 int is_dir = S_ISDIR(inode->i_mode);
37 u32 attr, oldattr;
38 struct iattr ia;
39 int err;
26 40
27 switch (cmd) { 41 err = get_user(attr, user_attr);
28 case FAT_IOCTL_GET_ATTRIBUTES: 42 if (err)
29 { 43 goto out;
30 u32 attr;
31 44
32 mutex_lock(&inode->i_mutex); 45 mutex_lock(&inode->i_mutex);
33 attr = fat_make_attrs(inode); 46 err = mnt_want_write(file->f_path.mnt);
34 mutex_unlock(&inode->i_mutex); 47 if (err)
48 goto out_unlock_inode;
35 49
36 return put_user(attr, user_attr); 50 /*
51 * ATTR_VOLUME and ATTR_DIR cannot be changed; this also
52 * prevents the user from turning us into a VFAT
53 * longname entry. Also, we obviously can't set
54 * any of the NTFS attributes in the high 24 bits.
55 */
56 attr &= 0xff & ~(ATTR_VOLUME | ATTR_DIR);
57 /* Merge in ATTR_VOLUME and ATTR_DIR */
58 attr |= (MSDOS_I(inode)->i_attrs & ATTR_VOLUME) |
59 (is_dir ? ATTR_DIR : 0);
60 oldattr = fat_make_attrs(inode);
61
62 /* Equivalent to a chmod() */
63 ia.ia_valid = ATTR_MODE | ATTR_CTIME;
64 ia.ia_ctime = current_fs_time(inode->i_sb);
65 if (is_dir)
66 ia.ia_mode = fat_make_mode(sbi, attr, S_IRWXUGO);
67 else {
68 ia.ia_mode = fat_make_mode(sbi, attr,
69 S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO));
37 } 70 }
38 case FAT_IOCTL_SET_ATTRIBUTES:
39 {
40 u32 attr, oldattr;
41 int err, is_dir = S_ISDIR(inode->i_mode);
42 struct iattr ia;
43 71
44 err = get_user(attr, user_attr); 72 /* The root directory has no attributes */
45 if (err) 73 if (inode->i_ino == MSDOS_ROOT_INO && attr != ATTR_DIR) {
46 return err; 74 err = -EINVAL;
75 goto out_drop_write;
76 }
47 77
48 mutex_lock(&inode->i_mutex); 78 if (sbi->options.sys_immutable &&
49 79 ((attr | oldattr) & ATTR_SYS) &&
50 err = mnt_want_write(filp->f_path.mnt); 80 !capable(CAP_LINUX_IMMUTABLE)) {
51 if (err) 81 err = -EPERM;
52 goto up_no_drop_write; 82 goto out_drop_write;
53 83 }
54 /*
55 * ATTR_VOLUME and ATTR_DIR cannot be changed; this also
56 * prevents the user from turning us into a VFAT
57 * longname entry. Also, we obviously can't set
58 * any of the NTFS attributes in the high 24 bits.
59 */
60 attr &= 0xff & ~(ATTR_VOLUME | ATTR_DIR);
61 /* Merge in ATTR_VOLUME and ATTR_DIR */
62 attr |= (MSDOS_I(inode)->i_attrs & ATTR_VOLUME) |
63 (is_dir ? ATTR_DIR : 0);
64 oldattr = fat_make_attrs(inode);
65
66 /* Equivalent to a chmod() */
67 ia.ia_valid = ATTR_MODE | ATTR_CTIME;
68 ia.ia_ctime = current_fs_time(inode->i_sb);
69 if (is_dir)
70 ia.ia_mode = fat_make_mode(sbi, attr, S_IRWXUGO);
71 else {
72 ia.ia_mode = fat_make_mode(sbi, attr,
73 S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO));
74 }
75 84
76 /* The root directory has no attributes */ 85 /*
77 if (inode->i_ino == MSDOS_ROOT_INO && attr != ATTR_DIR) { 86 * The security check is questionable... We single
78 err = -EINVAL; 87 * out the RO attribute for checking by the security
79 goto up; 88 * module, just because it maps to a file mode.
80 } 89 */
90 err = security_inode_setattr(file->f_path.dentry, &ia);
91 if (err)
92 goto out_drop_write;
81 93
82 if (sbi->options.sys_immutable) { 94 /* This MUST be done before doing anything irreversible... */
83 if ((attr | oldattr) & ATTR_SYS) { 95 err = fat_setattr(file->f_path.dentry, &ia);
84 if (!capable(CAP_LINUX_IMMUTABLE)) { 96 if (err)
85 err = -EPERM; 97 goto out_drop_write;
86 goto up; 98
87 } 99 fsnotify_change(file->f_path.dentry, ia.ia_valid);
88 } 100 if (sbi->options.sys_immutable) {
89 } 101 if (attr & ATTR_SYS)
102 inode->i_flags |= S_IMMUTABLE;
103 else
104 inode->i_flags &= S_IMMUTABLE;
105 }
90 106
91 /* 107 fat_save_attrs(inode, attr);
92 * The security check is questionable... We single 108 mark_inode_dirty(inode);
93 * out the RO attribute for checking by the security 109out_drop_write:
94 * module, just because it maps to a file mode. 110 mnt_drop_write(file->f_path.mnt);
95 */ 111out_unlock_inode:
96 err = security_inode_setattr(filp->f_path.dentry, &ia); 112 mutex_unlock(&inode->i_mutex);
97 if (err) 113out:
98 goto up; 114 return err;
99 115}
100 /* This MUST be done before doing anything irreversible... */
101 err = fat_setattr(filp->f_path.dentry, &ia);
102 if (err)
103 goto up;
104
105 fsnotify_change(filp->f_path.dentry, ia.ia_valid);
106 if (sbi->options.sys_immutable) {
107 if (attr & ATTR_SYS)
108 inode->i_flags |= S_IMMUTABLE;
109 else
110 inode->i_flags &= S_IMMUTABLE;
111 }
112 116
113 fat_save_attrs(inode, attr); 117int fat_generic_ioctl(struct inode *inode, struct file *filp,
114 mark_inode_dirty(inode); 118 unsigned int cmd, unsigned long arg)
115up: 119{
116 mnt_drop_write(filp->f_path.mnt); 120 u32 __user *user_attr = (u32 __user *)arg;
117up_no_drop_write: 121
118 mutex_unlock(&inode->i_mutex); 122 switch (cmd) {
119 return err; 123 case FAT_IOCTL_GET_ATTRIBUTES:
120 } 124 return fat_ioctl_get_attributes(inode, user_attr);
125 case FAT_IOCTL_SET_ATTRIBUTES:
126 return fat_ioctl_set_attributes(filp, user_attr);
121 default: 127 default:
122 return -ENOTTY; /* Inappropriate ioctl for device */ 128 return -ENOTTY; /* Inappropriate ioctl for device */
123 } 129 }
@@ -225,7 +231,7 @@ static int fat_free(struct inode *inode, int skip)
225 fatent_brelse(&fatent); 231 fatent_brelse(&fatent);
226 return 0; 232 return 0;
227 } else if (ret == FAT_ENT_FREE) { 233 } else if (ret == FAT_ENT_FREE) {
228 fat_fs_panic(sb, 234 fat_fs_error(sb,
229 "%s: invalid cluster chain (i_pos %lld)", 235 "%s: invalid cluster chain (i_pos %lld)",
230 __func__, MSDOS_I(inode)->i_pos); 236 __func__, MSDOS_I(inode)->i_pos);
231 ret = -EIO; 237 ret = -EIO;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 51a5ecf9000a..8970d8c49bb0 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -76,7 +76,7 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock,
76 return 0; 76 return 0;
77 77
78 if (iblock != MSDOS_I(inode)->mmu_private >> sb->s_blocksize_bits) { 78 if (iblock != MSDOS_I(inode)->mmu_private >> sb->s_blocksize_bits) {
79 fat_fs_panic(sb, "corrupted file size (i_pos %lld, %lld)", 79 fat_fs_error(sb, "corrupted file size (i_pos %lld, %lld)",
80 MSDOS_I(inode)->i_pos, MSDOS_I(inode)->mmu_private); 80 MSDOS_I(inode)->i_pos, MSDOS_I(inode)->mmu_private);
81 return -EIO; 81 return -EIO;
82 } 82 }
@@ -856,6 +856,12 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
856 seq_puts(m, ",flush"); 856 seq_puts(m, ",flush");
857 if (opts->tz_utc) 857 if (opts->tz_utc)
858 seq_puts(m, ",tz=UTC"); 858 seq_puts(m, ",tz=UTC");
859 if (opts->errors == FAT_ERRORS_CONT)
860 seq_puts(m, ",errors=continue");
861 else if (opts->errors == FAT_ERRORS_PANIC)
862 seq_puts(m, ",errors=panic");
863 else
864 seq_puts(m, ",errors=remount-ro");
859 865
860 return 0; 866 return 0;
861} 867}
@@ -868,7 +874,8 @@ enum {
868 Opt_charset, Opt_shortname_lower, Opt_shortname_win95, 874 Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
869 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, 875 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
870 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, 876 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
871 Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err, 877 Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont,
878 Opt_err_panic, Opt_err_ro, Opt_err,
872}; 879};
873 880
874static const match_table_t fat_tokens = { 881static const match_table_t fat_tokens = {
@@ -891,6 +898,11 @@ static const match_table_t fat_tokens = {
891 {Opt_showexec, "showexec"}, 898 {Opt_showexec, "showexec"},
892 {Opt_debug, "debug"}, 899 {Opt_debug, "debug"},
893 {Opt_immutable, "sys_immutable"}, 900 {Opt_immutable, "sys_immutable"},
901 {Opt_flush, "flush"},
902 {Opt_tz_utc, "tz=UTC"},
903 {Opt_err_cont, "errors=continue"},
904 {Opt_err_panic, "errors=panic"},
905 {Opt_err_ro, "errors=remount-ro"},
894 {Opt_obsolate, "conv=binary"}, 906 {Opt_obsolate, "conv=binary"},
895 {Opt_obsolate, "conv=text"}, 907 {Opt_obsolate, "conv=text"},
896 {Opt_obsolate, "conv=auto"}, 908 {Opt_obsolate, "conv=auto"},
@@ -902,8 +914,6 @@ static const match_table_t fat_tokens = {
902 {Opt_obsolate, "cvf_format=%20s"}, 914 {Opt_obsolate, "cvf_format=%20s"},
903 {Opt_obsolate, "cvf_options=%100s"}, 915 {Opt_obsolate, "cvf_options=%100s"},
904 {Opt_obsolate, "posix"}, 916 {Opt_obsolate, "posix"},
905 {Opt_flush, "flush"},
906 {Opt_tz_utc, "tz=UTC"},
907 {Opt_err, NULL}, 917 {Opt_err, NULL},
908}; 918};
909static const match_table_t msdos_tokens = { 919static const match_table_t msdos_tokens = {
@@ -956,7 +966,7 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
956 966
957 opts->fs_uid = current_uid(); 967 opts->fs_uid = current_uid();
958 opts->fs_gid = current_gid(); 968 opts->fs_gid = current_gid();
959 opts->fs_fmask = current_umask(); 969 opts->fs_fmask = opts->fs_dmask = current_umask();
960 opts->allow_utime = -1; 970 opts->allow_utime = -1;
961 opts->codepage = fat_default_codepage; 971 opts->codepage = fat_default_codepage;
962 opts->iocharset = fat_default_iocharset; 972 opts->iocharset = fat_default_iocharset;
@@ -973,6 +983,7 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
973 opts->numtail = 1; 983 opts->numtail = 1;
974 opts->usefree = opts->nocase = 0; 984 opts->usefree = opts->nocase = 0;
975 opts->tz_utc = 0; 985 opts->tz_utc = 0;
986 opts->errors = FAT_ERRORS_RO;
976 *debug = 0; 987 *debug = 0;
977 988
978 if (!options) 989 if (!options)
@@ -1065,6 +1076,15 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
1065 case Opt_tz_utc: 1076 case Opt_tz_utc:
1066 opts->tz_utc = 1; 1077 opts->tz_utc = 1;
1067 break; 1078 break;
1079 case Opt_err_cont:
1080 opts->errors = FAT_ERRORS_CONT;
1081 break;
1082 case Opt_err_panic:
1083 opts->errors = FAT_ERRORS_PANIC;
1084 break;
1085 case Opt_err_ro:
1086 opts->errors = FAT_ERRORS_RO;
1087 break;
1068 1088
1069 /* msdos specific */ 1089 /* msdos specific */
1070 case Opt_dots: 1090 case Opt_dots:
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index ac39ebcc1496..a6c20473dfd7 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -12,14 +12,19 @@
12#include "fat.h" 12#include "fat.h"
13 13
14/* 14/*
15 * fat_fs_panic reports a severe file system problem and sets the file system 15 * fat_fs_error reports a file system problem that might indicate fa data
16 * read-only. The file system can be made writable again by remounting it. 16 * corruption/inconsistency. Depending on 'errors' mount option the
17 * panic() is called, or error message is printed FAT and nothing is done,
18 * or filesystem is remounted read-only (default behavior).
19 * In case the file system is remounted read-only, it can be made writable
20 * again by remounting it.
17 */ 21 */
18void fat_fs_panic(struct super_block *s, const char *fmt, ...) 22void fat_fs_error(struct super_block *s, const char *fmt, ...)
19{ 23{
24 struct fat_mount_options *opts = &MSDOS_SB(s)->options;
20 va_list args; 25 va_list args;
21 26
22 printk(KERN_ERR "FAT: Filesystem panic (dev %s)\n", s->s_id); 27 printk(KERN_ERR "FAT: Filesystem error (dev %s)\n", s->s_id);
23 28
24 printk(KERN_ERR " "); 29 printk(KERN_ERR " ");
25 va_start(args, fmt); 30 va_start(args, fmt);
@@ -27,13 +32,14 @@ void fat_fs_panic(struct super_block *s, const char *fmt, ...)
27 va_end(args); 32 va_end(args);
28 printk("\n"); 33 printk("\n");
29 34
30 if (!(s->s_flags & MS_RDONLY)) { 35 if (opts->errors == FAT_ERRORS_PANIC)
36 panic(" FAT fs panic from previous error\n");
37 else if (opts->errors == FAT_ERRORS_RO && !(s->s_flags & MS_RDONLY)) {
31 s->s_flags |= MS_RDONLY; 38 s->s_flags |= MS_RDONLY;
32 printk(KERN_ERR " File system has been set read-only\n"); 39 printk(KERN_ERR " File system has been set read-only\n");
33 } 40 }
34} 41}
35 42EXPORT_SYMBOL_GPL(fat_fs_error);
36EXPORT_SYMBOL_GPL(fat_fs_panic);
37 43
38/* Flushes the number of free clusters on FAT32 */ 44/* Flushes the number of free clusters on FAT32 */
39/* XXX: Need to write one per FSINFO block. Currently only writes 1 */ 45/* XXX: Need to write one per FSINFO block. Currently only writes 1 */
@@ -124,7 +130,7 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster)
124 mark_inode_dirty(inode); 130 mark_inode_dirty(inode);
125 } 131 }
126 if (new_fclus != (inode->i_blocks >> (sbi->cluster_bits - 9))) { 132 if (new_fclus != (inode->i_blocks >> (sbi->cluster_bits - 9))) {
127 fat_fs_panic(sb, "clusters badly computed (%d != %llu)", 133 fat_fs_error(sb, "clusters badly computed (%d != %llu)",
128 new_fclus, 134 new_fclus,
129 (llu)(inode->i_blocks >> (sbi->cluster_bits - 9))); 135 (llu)(inode->i_blocks >> (sbi->cluster_bits - 9)));
130 fat_cache_inval_inode(inode); 136 fat_cache_inval_inode(inode);
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 20f522861355..82f88733b681 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -608,7 +608,7 @@ error_inode:
608 sinfo.bh = NULL; 608 sinfo.bh = NULL;
609 } 609 }
610 if (corrupt < 0) { 610 if (corrupt < 0) {
611 fat_fs_panic(new_dir->i_sb, 611 fat_fs_error(new_dir->i_sb,
612 "%s: Filesystem corrupted (i_pos %lld)", 612 "%s: Filesystem corrupted (i_pos %lld)",
613 __func__, sinfo.i_pos); 613 __func__, sinfo.i_pos);
614 } 614 }
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index b50ecbe97f83..73471b7ecc8c 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -502,11 +502,11 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
502 if (utf8) { 502 if (utf8) {
503 int name_len = strlen(name); 503 int name_len = strlen(name);
504 504
505 *outlen = utf8_mbstowcs((wchar_t *)outname, name, PATH_MAX); 505 *outlen = utf8s_to_utf16s(name, PATH_MAX, (wchar_t *) outname);
506 506
507 /* 507 /*
508 * We stripped '.'s before and set len appropriately, 508 * We stripped '.'s before and set len appropriately,
509 * but utf8_mbstowcs doesn't care about len 509 * but utf8s_to_utf16s doesn't care about len
510 */ 510 */
511 *outlen -= (name_len - len); 511 *outlen -= (name_len - len);
512 512
@@ -1030,7 +1030,7 @@ error_inode:
1030 sinfo.bh = NULL; 1030 sinfo.bh = NULL;
1031 } 1031 }
1032 if (corrupt < 0) { 1032 if (corrupt < 0) {
1033 fat_fs_panic(new_dir->i_sb, 1033 fat_fs_error(new_dir->i_sb,
1034 "%s: Filesystem corrupted (i_pos %lld)", 1034 "%s: Filesystem corrupted (i_pos %lld)",
1035 __func__, sinfo.i_pos); 1035 __func__, sinfo.i_pos);
1036 } 1036 }
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 1ad703150dee..a040b764f8e3 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -198,15 +198,19 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
198} 198}
199 199
200static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, 200static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
201 uid_t uid, uid_t euid, int force) 201 int force)
202{ 202{
203 write_lock_irq(&filp->f_owner.lock); 203 write_lock_irq(&filp->f_owner.lock);
204 if (force || !filp->f_owner.pid) { 204 if (force || !filp->f_owner.pid) {
205 put_pid(filp->f_owner.pid); 205 put_pid(filp->f_owner.pid);
206 filp->f_owner.pid = get_pid(pid); 206 filp->f_owner.pid = get_pid(pid);
207 filp->f_owner.pid_type = type; 207 filp->f_owner.pid_type = type;
208 filp->f_owner.uid = uid; 208
209 filp->f_owner.euid = euid; 209 if (pid) {
210 const struct cred *cred = current_cred();
211 filp->f_owner.uid = cred->uid;
212 filp->f_owner.euid = cred->euid;
213 }
210 } 214 }
211 write_unlock_irq(&filp->f_owner.lock); 215 write_unlock_irq(&filp->f_owner.lock);
212} 216}
@@ -214,14 +218,13 @@ static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
214int __f_setown(struct file *filp, struct pid *pid, enum pid_type type, 218int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
215 int force) 219 int force)
216{ 220{
217 const struct cred *cred = current_cred();
218 int err; 221 int err;
219 222
220 err = security_file_set_fowner(filp); 223 err = security_file_set_fowner(filp);
221 if (err) 224 if (err)
222 return err; 225 return err;
223 226
224 f_modown(filp, pid, type, cred->uid, cred->euid, force); 227 f_modown(filp, pid, type, force);
225 return 0; 228 return 0;
226} 229}
227EXPORT_SYMBOL(__f_setown); 230EXPORT_SYMBOL(__f_setown);
@@ -247,7 +250,7 @@ EXPORT_SYMBOL(f_setown);
247 250
248void f_delown(struct file *filp) 251void f_delown(struct file *filp)
249{ 252{
250 f_modown(filp, NULL, PIDTYPE_PID, 0, 0, 1); 253 f_modown(filp, NULL, PIDTYPE_PID, 1);
251} 254}
252 255
253pid_t f_getown(struct file *filp) 256pid_t f_getown(struct file *filp)
@@ -425,14 +428,20 @@ static inline int sigio_perm(struct task_struct *p,
425} 428}
426 429
427static void send_sigio_to_task(struct task_struct *p, 430static void send_sigio_to_task(struct task_struct *p,
428 struct fown_struct *fown, 431 struct fown_struct *fown,
429 int fd, 432 int fd,
430 int reason) 433 int reason)
431{ 434{
432 if (!sigio_perm(p, fown, fown->signum)) 435 /*
436 * F_SETSIG can change ->signum lockless in parallel, make
437 * sure we read it once and use the same value throughout.
438 */
439 int signum = ACCESS_ONCE(fown->signum);
440
441 if (!sigio_perm(p, fown, signum))
433 return; 442 return;
434 443
435 switch (fown->signum) { 444 switch (signum) {
436 siginfo_t si; 445 siginfo_t si;
437 default: 446 default:
438 /* Queue a rt signal with the appropriate fd as its 447 /* Queue a rt signal with the appropriate fd as its
@@ -441,7 +450,7 @@ static void send_sigio_to_task(struct task_struct *p,
441 delivered even if we can't queue. Failure to 450 delivered even if we can't queue. Failure to
442 queue in this case _should_ be reported; we fall 451 queue in this case _should_ be reported; we fall
443 back to SIGIO in that case. --sct */ 452 back to SIGIO in that case. --sct */
444 si.si_signo = fown->signum; 453 si.si_signo = signum;
445 si.si_errno = 0; 454 si.si_errno = 0;
446 si.si_code = reason; 455 si.si_code = reason;
447 /* Make sure we are called with one of the POLL_* 456 /* Make sure we are called with one of the POLL_*
@@ -453,7 +462,7 @@ static void send_sigio_to_task(struct task_struct *p,
453 else 462 else
454 si.si_band = band_table[reason - POLL_IN]; 463 si.si_band = band_table[reason - POLL_IN];
455 si.si_fd = fd; 464 si.si_fd = fd;
456 if (!group_send_sig_info(fown->signum, &si, p)) 465 if (!group_send_sig_info(signum, &si, p))
457 break; 466 break;
458 /* fall-through: fall back on the old plain SIGIO signal */ 467 /* fall-through: fall back on the old plain SIGIO signal */
459 case 0: 468 case 0:
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 40308e98c6a4..c54226be5294 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -278,7 +278,26 @@ int sb_has_dirty_inodes(struct super_block *sb)
278EXPORT_SYMBOL(sb_has_dirty_inodes); 278EXPORT_SYMBOL(sb_has_dirty_inodes);
279 279
280/* 280/*
281 * Write a single inode's dirty pages and inode data out to disk. 281 * Wait for writeback on an inode to complete.
282 */
283static void inode_wait_for_writeback(struct inode *inode)
284{
285 DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
286 wait_queue_head_t *wqh;
287
288 wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
289 do {
290 spin_unlock(&inode_lock);
291 __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
292 spin_lock(&inode_lock);
293 } while (inode->i_state & I_SYNC);
294}
295
296/*
297 * Write out an inode's dirty pages. Called under inode_lock. Either the
298 * caller has ref on the inode (either via __iget or via syscall against an fd)
299 * or the inode has I_WILL_FREE set (via generic_forget_inode)
300 *
282 * If `wait' is set, wait on the writeout. 301 * If `wait' is set, wait on the writeout.
283 * 302 *
284 * The whole writeout design is quite complex and fragile. We want to avoid 303 * The whole writeout design is quite complex and fragile. We want to avoid
@@ -288,13 +307,38 @@ EXPORT_SYMBOL(sb_has_dirty_inodes);
288 * Called under inode_lock. 307 * Called under inode_lock.
289 */ 308 */
290static int 309static int
291__sync_single_inode(struct inode *inode, struct writeback_control *wbc) 310writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
292{ 311{
293 unsigned dirty;
294 struct address_space *mapping = inode->i_mapping; 312 struct address_space *mapping = inode->i_mapping;
295 int wait = wbc->sync_mode == WB_SYNC_ALL; 313 int wait = wbc->sync_mode == WB_SYNC_ALL;
314 unsigned dirty;
296 int ret; 315 int ret;
297 316
317 if (!atomic_read(&inode->i_count))
318 WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
319 else
320 WARN_ON(inode->i_state & I_WILL_FREE);
321
322 if (inode->i_state & I_SYNC) {
323 /*
324 * If this inode is locked for writeback and we are not doing
325 * writeback-for-data-integrity, move it to s_more_io so that
326 * writeback can proceed with the other inodes on s_io.
327 *
328 * We'll have another go at writing back this inode when we
329 * completed a full scan of s_io.
330 */
331 if (!wait) {
332 requeue_io(inode);
333 return 0;
334 }
335
336 /*
337 * It's a data-integrity sync. We must wait.
338 */
339 inode_wait_for_writeback(inode);
340 }
341
298 BUG_ON(inode->i_state & I_SYNC); 342 BUG_ON(inode->i_state & I_SYNC);
299 343
300 /* Set I_SYNC, reset I_DIRTY */ 344 /* Set I_SYNC, reset I_DIRTY */
@@ -321,7 +365,7 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
321 365
322 spin_lock(&inode_lock); 366 spin_lock(&inode_lock);
323 inode->i_state &= ~I_SYNC; 367 inode->i_state &= ~I_SYNC;
324 if (!(inode->i_state & I_FREEING)) { 368 if (!(inode->i_state & (I_FREEING | I_CLEAR))) {
325 if (!(inode->i_state & I_DIRTY) && 369 if (!(inode->i_state & I_DIRTY) &&
326 mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { 370 mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
327 /* 371 /*
@@ -390,50 +434,6 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
390} 434}
391 435
392/* 436/*
393 * Write out an inode's dirty pages. Called under inode_lock. Either the
394 * caller has ref on the inode (either via __iget or via syscall against an fd)
395 * or the inode has I_WILL_FREE set (via generic_forget_inode)
396 */
397static int
398__writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
399{
400 wait_queue_head_t *wqh;
401
402 if (!atomic_read(&inode->i_count))
403 WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
404 else
405 WARN_ON(inode->i_state & I_WILL_FREE);
406
407 if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_SYNC)) {
408 /*
409 * We're skipping this inode because it's locked, and we're not
410 * doing writeback-for-data-integrity. Move it to s_more_io so
411 * that writeback can proceed with the other inodes on s_io.
412 * We'll have another go at writing back this inode when we
413 * completed a full scan of s_io.
414 */
415 requeue_io(inode);
416 return 0;
417 }
418
419 /*
420 * It's a data-integrity sync. We must wait.
421 */
422 if (inode->i_state & I_SYNC) {
423 DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
424
425 wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
426 do {
427 spin_unlock(&inode_lock);
428 __wait_on_bit(wqh, &wq, inode_wait,
429 TASK_UNINTERRUPTIBLE);
430 spin_lock(&inode_lock);
431 } while (inode->i_state & I_SYNC);
432 }
433 return __sync_single_inode(inode, wbc);
434}
435
436/*
437 * Write out a superblock's list of dirty inodes. A wait will be performed 437 * Write out a superblock's list of dirty inodes. A wait will be performed
438 * upon no inodes, all inodes or the final one, depending upon sync_mode. 438 * upon no inodes, all inodes or the final one, depending upon sync_mode.
439 * 439 *
@@ -492,7 +492,7 @@ void generic_sync_sb_inodes(struct super_block *sb,
492 break; 492 break;
493 } 493 }
494 494
495 if (inode->i_state & I_NEW) { 495 if (inode->i_state & (I_NEW | I_WILL_FREE)) {
496 requeue_io(inode); 496 requeue_io(inode);
497 continue; 497 continue;
498 } 498 }
@@ -523,10 +523,10 @@ void generic_sync_sb_inodes(struct super_block *sb,
523 if (current_is_pdflush() && !writeback_acquire(bdi)) 523 if (current_is_pdflush() && !writeback_acquire(bdi))
524 break; 524 break;
525 525
526 BUG_ON(inode->i_state & I_FREEING); 526 BUG_ON(inode->i_state & (I_FREEING | I_CLEAR));
527 __iget(inode); 527 __iget(inode);
528 pages_skipped = wbc->pages_skipped; 528 pages_skipped = wbc->pages_skipped;
529 __writeback_single_inode(inode, wbc); 529 writeback_single_inode(inode, wbc);
530 if (current_is_pdflush()) 530 if (current_is_pdflush())
531 writeback_release(bdi); 531 writeback_release(bdi);
532 if (wbc->pages_skipped != pages_skipped) { 532 if (wbc->pages_skipped != pages_skipped) {
@@ -708,7 +708,7 @@ int write_inode_now(struct inode *inode, int sync)
708 708
709 might_sleep(); 709 might_sleep();
710 spin_lock(&inode_lock); 710 spin_lock(&inode_lock);
711 ret = __writeback_single_inode(inode, &wbc); 711 ret = writeback_single_inode(inode, &wbc);
712 spin_unlock(&inode_lock); 712 spin_unlock(&inode_lock);
713 if (sync) 713 if (sync)
714 inode_sync_wait(inode); 714 inode_sync_wait(inode);
@@ -732,7 +732,7 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
732 int ret; 732 int ret;
733 733
734 spin_lock(&inode_lock); 734 spin_lock(&inode_lock);
735 ret = __writeback_single_inode(inode, wbc); 735 ret = writeback_single_inode(inode, wbc);
736 spin_unlock(&inode_lock); 736 spin_unlock(&inode_lock);
737 return ret; 737 return ret;
738} 738}
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index f0df55a52929..d8673ccf90b7 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -19,7 +19,6 @@
19#include <linux/random.h> 19#include <linux/random.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/exportfs.h> 21#include <linux/exportfs.h>
22#include <linux/smp_lock.h>
23 22
24MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); 23MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
25MODULE_DESCRIPTION("Filesystem in Userspace"); 24MODULE_DESCRIPTION("Filesystem in Userspace");
@@ -260,9 +259,7 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
260 259
261static void fuse_umount_begin(struct super_block *sb) 260static void fuse_umount_begin(struct super_block *sb)
262{ 261{
263 lock_kernel();
264 fuse_abort_conn(get_fuse_conn_super(sb)); 262 fuse_abort_conn(get_fuse_conn_super(sb));
265 unlock_kernel();
266} 263}
267 264
268static void fuse_send_destroy(struct fuse_conn *fc) 265static void fuse_send_destroy(struct fuse_conn *fc)
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index cad957cdb1e5..5971359d2090 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -1,6 +1,6 @@
1config GFS2_FS 1config GFS2_FS
2 tristate "GFS2 file system support" 2 tristate "GFS2 file system support"
3 depends on EXPERIMENTAL && (64BIT || LBD) 3 depends on EXPERIMENTAL && (64BIT || LBDAF)
4 select DLM if GFS2_FS_LOCKING_DLM 4 select DLM if GFS2_FS_LOCKING_DLM
5 select CONFIGFS_FS if GFS2_FS_LOCKING_DLM 5 select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
6 select SYSFS if GFS2_FS_LOCKING_DLM 6 select SYSFS if GFS2_FS_LOCKING_DLM
diff --git a/fs/inode.c b/fs/inode.c
index a88baebf77cf..901bad1e5f12 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -25,6 +25,7 @@
25#include <linux/fsnotify.h> 25#include <linux/fsnotify.h>
26#include <linux/mount.h> 26#include <linux/mount.h>
27#include <linux/async.h> 27#include <linux/async.h>
28#include <linux/posix_acl.h>
28 29
29/* 30/*
30 * This is needed for the following functions: 31 * This is needed for the following functions:
@@ -189,6 +190,9 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
189 } 190 }
190 inode->i_private = NULL; 191 inode->i_private = NULL;
191 inode->i_mapping = mapping; 192 inode->i_mapping = mapping;
193#ifdef CONFIG_FS_POSIX_ACL
194 inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
195#endif
192 196
193#ifdef CONFIG_FSNOTIFY 197#ifdef CONFIG_FSNOTIFY
194 inode->i_fsnotify_mask = 0; 198 inode->i_fsnotify_mask = 0;
@@ -227,6 +231,12 @@ void destroy_inode(struct inode *inode)
227 ima_inode_free(inode); 231 ima_inode_free(inode);
228 security_inode_free(inode); 232 security_inode_free(inode);
229 fsnotify_inode_delete(inode); 233 fsnotify_inode_delete(inode);
234#ifdef CONFIG_FS_POSIX_ACL
235 if (inode->i_acl && inode->i_acl != ACL_NOT_CACHED)
236 posix_acl_release(inode->i_acl);
237 if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
238 posix_acl_release(inode->i_default_acl);
239#endif
230 if (inode->i_sb->s_op->destroy_inode) 240 if (inode->i_sb->s_op->destroy_inode)
231 inode->i_sb->s_op->destroy_inode(inode); 241 inode->i_sb->s_op->destroy_inode(inode);
232 else 242 else
@@ -665,12 +675,17 @@ void unlock_new_inode(struct inode *inode)
665 if (inode->i_mode & S_IFDIR) { 675 if (inode->i_mode & S_IFDIR) {
666 struct file_system_type *type = inode->i_sb->s_type; 676 struct file_system_type *type = inode->i_sb->s_type;
667 677
668 /* 678 /* Set new key only if filesystem hasn't already changed it */
669 * ensure nobody is actually holding i_mutex 679 if (!lockdep_match_class(&inode->i_mutex,
670 */ 680 &type->i_mutex_key)) {
671 mutex_destroy(&inode->i_mutex); 681 /*
672 mutex_init(&inode->i_mutex); 682 * ensure nobody is actually holding i_mutex
673 lockdep_set_class(&inode->i_mutex, &type->i_mutex_dir_key); 683 */
684 mutex_destroy(&inode->i_mutex);
685 mutex_init(&inode->i_mutex);
686 lockdep_set_class(&inode->i_mutex,
687 &type->i_mutex_dir_key);
688 }
674 } 689 }
675#endif 690#endif
676 /* 691 /*
@@ -1408,7 +1423,7 @@ EXPORT_SYMBOL(touch_atime);
1408 * for writeback. Note that this function is meant exclusively for 1423 * for writeback. Note that this function is meant exclusively for
1409 * usage in the file write path of filesystems, and filesystems may 1424 * usage in the file write path of filesystems, and filesystems may
1410 * choose to explicitly ignore update via this function with the 1425 * choose to explicitly ignore update via this function with the
1411 * S_NOCTIME inode flag, e.g. for network filesystem where these 1426 * S_NOCMTIME inode flag, e.g. for network filesystem where these
1412 * timestamps are handled by the server. 1427 * timestamps are handled by the server.
1413 */ 1428 */
1414 1429
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 286f38dfc6c0..5612880fcbe7 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -15,6 +15,7 @@
15#include <linux/uaccess.h> 15#include <linux/uaccess.h>
16#include <linux/writeback.h> 16#include <linux/writeback.h>
17#include <linux/buffer_head.h> 17#include <linux/buffer_head.h>
18#include <linux/falloc.h>
18 19
19#include <asm/ioctls.h> 20#include <asm/ioctls.h>
20 21
@@ -70,9 +71,7 @@ static int ioctl_fibmap(struct file *filp, int __user *p)
70 res = get_user(block, p); 71 res = get_user(block, p);
71 if (res) 72 if (res)
72 return res; 73 return res;
73 lock_kernel();
74 res = mapping->a_ops->bmap(mapping, block); 74 res = mapping->a_ops->bmap(mapping, block);
75 unlock_kernel();
76 return put_user(res, p); 75 return put_user(res, p);
77} 76}
78 77
@@ -405,6 +404,37 @@ EXPORT_SYMBOL(generic_block_fiemap);
405 404
406#endif /* CONFIG_BLOCK */ 405#endif /* CONFIG_BLOCK */
407 406
407/*
408 * This provides compatibility with legacy XFS pre-allocation ioctls
409 * which predate the fallocate syscall.
410 *
411 * Only the l_start, l_len and l_whence fields of the 'struct space_resv'
412 * are used here, rest are ignored.
413 */
414int ioctl_preallocate(struct file *filp, void __user *argp)
415{
416 struct inode *inode = filp->f_path.dentry->d_inode;
417 struct space_resv sr;
418
419 if (copy_from_user(&sr, argp, sizeof(sr)))
420 return -EFAULT;
421
422 switch (sr.l_whence) {
423 case SEEK_SET:
424 break;
425 case SEEK_CUR:
426 sr.l_start += filp->f_pos;
427 break;
428 case SEEK_END:
429 sr.l_start += i_size_read(inode);
430 break;
431 default:
432 return -EINVAL;
433 }
434
435 return do_fallocate(filp, FALLOC_FL_KEEP_SIZE, sr.l_start, sr.l_len);
436}
437
408static int file_ioctl(struct file *filp, unsigned int cmd, 438static int file_ioctl(struct file *filp, unsigned int cmd,
409 unsigned long arg) 439 unsigned long arg)
410{ 440{
@@ -416,6 +446,9 @@ static int file_ioctl(struct file *filp, unsigned int cmd,
416 return ioctl_fibmap(filp, p); 446 return ioctl_fibmap(filp, p);
417 case FIONREAD: 447 case FIONREAD:
418 return put_user(i_size_read(inode) - filp->f_pos, p); 448 return put_user(i_size_read(inode) - filp->f_pos, p);
449 case FS_IOC_RESVSP:
450 case FS_IOC_RESVSP64:
451 return ioctl_preallocate(filp, p);
419 } 452 }
420 453
421 return vfs_ioctl(filp, cmd, arg); 454 return vfs_ioctl(filp, cmd, arg);
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 2f0dc5a14633..8ba5441063be 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -195,9 +195,8 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp,
195 * Do not report hidden files if so instructed, or associated 195 * Do not report hidden files if so instructed, or associated
196 * files unless instructed to do so 196 * files unless instructed to do so
197 */ 197 */
198 if ((sbi->s_hide == 'y' && 198 if ((sbi->s_hide && (de->flags[-sbi->s_high_sierra] & 1)) ||
199 (de->flags[-sbi->s_high_sierra] & 1)) || 199 (!sbi->s_showassoc &&
200 (sbi->s_showassoc =='n' &&
201 (de->flags[-sbi->s_high_sierra] & 4))) { 200 (de->flags[-sbi->s_high_sierra] & 4))) {
202 filp->f_pos += de_len; 201 filp->f_pos += de_len;
203 continue; 202 continue;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 068b34b5a107..58a7963e168a 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -141,13 +141,17 @@ static const struct dentry_operations isofs_dentry_ops[] = {
141}; 141};
142 142
143struct iso9660_options{ 143struct iso9660_options{
144 char map; 144 unsigned int rock:1;
145 char rock; 145 unsigned int cruft:1;
146 unsigned int hide:1;
147 unsigned int showassoc:1;
148 unsigned int nocompress:1;
149 unsigned int overriderockperm:1;
150 unsigned int uid_set:1;
151 unsigned int gid_set:1;
152 unsigned int utf8:1;
153 unsigned char map;
146 char joliet; 154 char joliet;
147 char cruft;
148 char hide;
149 char showassoc;
150 char nocompress;
151 unsigned char check; 155 unsigned char check;
152 unsigned int blocksize; 156 unsigned int blocksize;
153 mode_t fmode; 157 mode_t fmode;
@@ -155,7 +159,6 @@ struct iso9660_options{
155 gid_t gid; 159 gid_t gid;
156 uid_t uid; 160 uid_t uid;
157 char *iocharset; 161 char *iocharset;
158 unsigned char utf8;
159 /* LVE */ 162 /* LVE */
160 s32 session; 163 s32 session;
161 s32 sbsector; 164 s32 sbsector;
@@ -312,7 +315,7 @@ enum {
312 Opt_block, Opt_check_r, Opt_check_s, Opt_cruft, Opt_gid, Opt_ignore, 315 Opt_block, Opt_check_r, Opt_check_s, Opt_cruft, Opt_gid, Opt_ignore,
313 Opt_iocharset, Opt_map_a, Opt_map_n, Opt_map_o, Opt_mode, Opt_nojoliet, 316 Opt_iocharset, Opt_map_a, Opt_map_n, Opt_map_o, Opt_mode, Opt_nojoliet,
314 Opt_norock, Opt_sb, Opt_session, Opt_uid, Opt_unhide, Opt_utf8, Opt_err, 317 Opt_norock, Opt_sb, Opt_session, Opt_uid, Opt_unhide, Opt_utf8, Opt_err,
315 Opt_nocompress, Opt_hide, Opt_showassoc, Opt_dmode, 318 Opt_nocompress, Opt_hide, Opt_showassoc, Opt_dmode, Opt_overriderockperm,
316}; 319};
317 320
318static const match_table_t tokens = { 321static const match_table_t tokens = {
@@ -340,6 +343,7 @@ static const match_table_t tokens = {
340 {Opt_gid, "gid=%u"}, 343 {Opt_gid, "gid=%u"},
341 {Opt_mode, "mode=%u"}, 344 {Opt_mode, "mode=%u"},
342 {Opt_dmode, "dmode=%u"}, 345 {Opt_dmode, "dmode=%u"},
346 {Opt_overriderockperm, "overriderockperm"},
343 {Opt_block, "block=%u"}, 347 {Opt_block, "block=%u"},
344 {Opt_ignore, "conv=binary"}, 348 {Opt_ignore, "conv=binary"},
345 {Opt_ignore, "conv=b"}, 349 {Opt_ignore, "conv=b"},
@@ -359,24 +363,22 @@ static int parse_options(char *options, struct iso9660_options *popt)
359 int option; 363 int option;
360 364
361 popt->map = 'n'; 365 popt->map = 'n';
362 popt->rock = 'y'; 366 popt->rock = 1;
363 popt->joliet = 'y'; 367 popt->joliet = 1;
364 popt->cruft = 'n'; 368 popt->cruft = 0;
365 popt->hide = 'n'; 369 popt->hide = 0;
366 popt->showassoc = 'n'; 370 popt->showassoc = 0;
367 popt->check = 'u'; /* unset */ 371 popt->check = 'u'; /* unset */
368 popt->nocompress = 0; 372 popt->nocompress = 0;
369 popt->blocksize = 1024; 373 popt->blocksize = 1024;
370 popt->fmode = popt->dmode = S_IRUGO | S_IXUGO; /* 374 popt->fmode = popt->dmode = ISOFS_INVALID_MODE;
371 * r-x for all. The disc could 375 popt->uid_set = 0;
372 * be shared with DOS machines so 376 popt->gid_set = 0;
373 * virtually anything could be
374 * a valid executable.
375 */
376 popt->gid = 0; 377 popt->gid = 0;
377 popt->uid = 0; 378 popt->uid = 0;
378 popt->iocharset = NULL; 379 popt->iocharset = NULL;
379 popt->utf8 = 0; 380 popt->utf8 = 0;
381 popt->overriderockperm = 0;
380 popt->session=-1; 382 popt->session=-1;
381 popt->sbsector=-1; 383 popt->sbsector=-1;
382 if (!options) 384 if (!options)
@@ -393,20 +395,20 @@ static int parse_options(char *options, struct iso9660_options *popt)
393 token = match_token(p, tokens, args); 395 token = match_token(p, tokens, args);
394 switch (token) { 396 switch (token) {
395 case Opt_norock: 397 case Opt_norock:
396 popt->rock = 'n'; 398 popt->rock = 0;
397 break; 399 break;
398 case Opt_nojoliet: 400 case Opt_nojoliet:
399 popt->joliet = 'n'; 401 popt->joliet = 0;
400 break; 402 break;
401 case Opt_hide: 403 case Opt_hide:
402 popt->hide = 'y'; 404 popt->hide = 1;
403 break; 405 break;
404 case Opt_unhide: 406 case Opt_unhide:
405 case Opt_showassoc: 407 case Opt_showassoc:
406 popt->showassoc = 'y'; 408 popt->showassoc = 1;
407 break; 409 break;
408 case Opt_cruft: 410 case Opt_cruft:
409 popt->cruft = 'y'; 411 popt->cruft = 1;
410 break; 412 break;
411 case Opt_utf8: 413 case Opt_utf8:
412 popt->utf8 = 1; 414 popt->utf8 = 1;
@@ -450,11 +452,13 @@ static int parse_options(char *options, struct iso9660_options *popt)
450 if (match_int(&args[0], &option)) 452 if (match_int(&args[0], &option))
451 return 0; 453 return 0;
452 popt->uid = option; 454 popt->uid = option;
455 popt->uid_set = 1;
453 break; 456 break;
454 case Opt_gid: 457 case Opt_gid:
455 if (match_int(&args[0], &option)) 458 if (match_int(&args[0], &option))
456 return 0; 459 return 0;
457 popt->gid = option; 460 popt->gid = option;
461 popt->gid_set = 1;
458 break; 462 break;
459 case Opt_mode: 463 case Opt_mode:
460 if (match_int(&args[0], &option)) 464 if (match_int(&args[0], &option))
@@ -466,6 +470,9 @@ static int parse_options(char *options, struct iso9660_options *popt)
466 return 0; 470 return 0;
467 popt->dmode = option; 471 popt->dmode = option;
468 break; 472 break;
473 case Opt_overriderockperm:
474 popt->overriderockperm = 1;
475 break;
469 case Opt_block: 476 case Opt_block:
470 if (match_int(&args[0], &option)) 477 if (match_int(&args[0], &option))
471 return 0; 478 return 0;
@@ -650,7 +657,7 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
650 goto out_freebh; 657 goto out_freebh;
651 658
652 sbi->s_high_sierra = 1; 659 sbi->s_high_sierra = 1;
653 opt.rock = 'n'; 660 opt.rock = 0;
654 h_pri = (struct hs_primary_descriptor *)vdp; 661 h_pri = (struct hs_primary_descriptor *)vdp;
655 goto root_found; 662 goto root_found;
656 } 663 }
@@ -673,7 +680,7 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
673 680
674root_found: 681root_found:
675 682
676 if (joliet_level && (pri == NULL || opt.rock == 'n')) { 683 if (joliet_level && (pri == NULL || !opt.rock)) {
677 /* This is the case of Joliet with the norock mount flag. 684 /* This is the case of Joliet with the norock mount flag.
678 * A disc with both Joliet and Rock Ridge is handled later 685 * A disc with both Joliet and Rock Ridge is handled later
679 */ 686 */
@@ -802,22 +809,31 @@ root_found:
802 s->s_op = &isofs_sops; 809 s->s_op = &isofs_sops;
803 s->s_export_op = &isofs_export_ops; 810 s->s_export_op = &isofs_export_ops;
804 sbi->s_mapping = opt.map; 811 sbi->s_mapping = opt.map;
805 sbi->s_rock = (opt.rock == 'y' ? 2 : 0); 812 sbi->s_rock = (opt.rock ? 2 : 0);
806 sbi->s_rock_offset = -1; /* initial offset, will guess until SP is found*/ 813 sbi->s_rock_offset = -1; /* initial offset, will guess until SP is found*/
807 sbi->s_cruft = opt.cruft; 814 sbi->s_cruft = opt.cruft;
808 sbi->s_hide = opt.hide; 815 sbi->s_hide = opt.hide;
809 sbi->s_showassoc = opt.showassoc; 816 sbi->s_showassoc = opt.showassoc;
810 sbi->s_uid = opt.uid; 817 sbi->s_uid = opt.uid;
811 sbi->s_gid = opt.gid; 818 sbi->s_gid = opt.gid;
819 sbi->s_uid_set = opt.uid_set;
820 sbi->s_gid_set = opt.gid_set;
812 sbi->s_utf8 = opt.utf8; 821 sbi->s_utf8 = opt.utf8;
813 sbi->s_nocompress = opt.nocompress; 822 sbi->s_nocompress = opt.nocompress;
823 sbi->s_overriderockperm = opt.overriderockperm;
814 /* 824 /*
815 * It would be incredibly stupid to allow people to mark every file 825 * It would be incredibly stupid to allow people to mark every file
816 * on the disk as suid, so we merely allow them to set the default 826 * on the disk as suid, so we merely allow them to set the default
817 * permissions. 827 * permissions.
818 */ 828 */
819 sbi->s_fmode = opt.fmode & 0777; 829 if (opt.fmode != ISOFS_INVALID_MODE)
820 sbi->s_dmode = opt.dmode & 0777; 830 sbi->s_fmode = opt.fmode & 0777;
831 else
832 sbi->s_fmode = ISOFS_INVALID_MODE;
833 if (opt.dmode != ISOFS_INVALID_MODE)
834 sbi->s_dmode = opt.dmode & 0777;
835 else
836 sbi->s_dmode = ISOFS_INVALID_MODE;
821 837
822 /* 838 /*
823 * Read the root inode, which _may_ result in changing 839 * Read the root inode, which _may_ result in changing
@@ -1095,18 +1111,6 @@ static const struct address_space_operations isofs_aops = {
1095 .bmap = _isofs_bmap 1111 .bmap = _isofs_bmap
1096}; 1112};
1097 1113
1098static inline void test_and_set_uid(uid_t *p, uid_t value)
1099{
1100 if (value)
1101 *p = value;
1102}
1103
1104static inline void test_and_set_gid(gid_t *p, gid_t value)
1105{
1106 if (value)
1107 *p = value;
1108}
1109
1110static int isofs_read_level3_size(struct inode *inode) 1114static int isofs_read_level3_size(struct inode *inode)
1111{ 1115{
1112 unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); 1116 unsigned long bufsize = ISOFS_BUFFER_SIZE(inode);
@@ -1261,7 +1265,10 @@ static int isofs_read_inode(struct inode *inode)
1261 ei->i_file_format = isofs_file_normal; 1265 ei->i_file_format = isofs_file_normal;
1262 1266
1263 if (de->flags[-high_sierra] & 2) { 1267 if (de->flags[-high_sierra] & 2) {
1264 inode->i_mode = sbi->s_dmode | S_IFDIR; 1268 if (sbi->s_dmode != ISOFS_INVALID_MODE)
1269 inode->i_mode = S_IFDIR | sbi->s_dmode;
1270 else
1271 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
1265 inode->i_nlink = 1; /* 1272 inode->i_nlink = 1; /*
1266 * Set to 1. We know there are 2, but 1273 * Set to 1. We know there are 2, but
1267 * the find utility tries to optimize 1274 * the find utility tries to optimize
@@ -1270,8 +1277,16 @@ static int isofs_read_inode(struct inode *inode)
1270 * do it the hard way. 1277 * do it the hard way.
1271 */ 1278 */
1272 } else { 1279 } else {
1273 /* Everybody gets to read the file. */ 1280 if (sbi->s_fmode != ISOFS_INVALID_MODE) {
1274 inode->i_mode = sbi->s_fmode | S_IFREG; 1281 inode->i_mode = S_IFREG | sbi->s_fmode;
1282 } else {
1283 /*
1284 * Set default permissions: r-x for all. The disc
1285 * could be shared with DOS machines so virtually
1286 * anything could be a valid executable.
1287 */
1288 inode->i_mode = S_IFREG | S_IRUGO | S_IXUGO;
1289 }
1275 inode->i_nlink = 1; 1290 inode->i_nlink = 1;
1276 } 1291 }
1277 inode->i_uid = sbi->s_uid; 1292 inode->i_uid = sbi->s_uid;
@@ -1300,7 +1315,7 @@ static int isofs_read_inode(struct inode *inode)
1300 * this CDROM was mounted with the cruft option. 1315 * this CDROM was mounted with the cruft option.
1301 */ 1316 */
1302 1317
1303 if (sbi->s_cruft == 'y') 1318 if (sbi->s_cruft)
1304 inode->i_size &= 0x00ffffff; 1319 inode->i_size &= 0x00ffffff;
1305 1320
1306 if (de->interleave[0]) { 1321 if (de->interleave[0]) {
@@ -1346,9 +1361,18 @@ static int isofs_read_inode(struct inode *inode)
1346 if (!high_sierra) { 1361 if (!high_sierra) {
1347 parse_rock_ridge_inode(de, inode); 1362 parse_rock_ridge_inode(de, inode);
1348 /* if we want uid/gid set, override the rock ridge setting */ 1363 /* if we want uid/gid set, override the rock ridge setting */
1349 test_and_set_uid(&inode->i_uid, sbi->s_uid); 1364 if (sbi->s_uid_set)
1350 test_and_set_gid(&inode->i_gid, sbi->s_gid); 1365 inode->i_uid = sbi->s_uid;
1366 if (sbi->s_gid_set)
1367 inode->i_gid = sbi->s_gid;
1351 } 1368 }
1369 /* Now set final access rights if overriding rock ridge setting */
1370 if (S_ISDIR(inode->i_mode) && sbi->s_overriderockperm &&
1371 sbi->s_dmode != ISOFS_INVALID_MODE)
1372 inode->i_mode = S_IFDIR | sbi->s_dmode;
1373 if (S_ISREG(inode->i_mode) && sbi->s_overriderockperm &&
1374 sbi->s_fmode != ISOFS_INVALID_MODE)
1375 inode->i_mode = S_IFREG | sbi->s_fmode;
1352 1376
1353 /* Install the inode operations vector */ 1377 /* Install the inode operations vector */
1354 if (S_ISREG(inode->i_mode)) { 1378 if (S_ISREG(inode->i_mode)) {
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index ccbf72faf27a..7d33de84f52a 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -35,21 +35,20 @@ struct isofs_sb_info {
35 unsigned long s_log_zone_size; 35 unsigned long s_log_zone_size;
36 unsigned long s_max_size; 36 unsigned long s_max_size;
37 37
38 unsigned char s_high_sierra; /* A simple flag */
39 unsigned char s_mapping;
40 int s_rock_offset; /* offset of SUSP fields within SU area */ 38 int s_rock_offset; /* offset of SUSP fields within SU area */
41 unsigned char s_rock;
42 unsigned char s_joliet_level; 39 unsigned char s_joliet_level;
43 unsigned char s_utf8; 40 unsigned char s_mapping;
44 unsigned char s_cruft; /* Broken disks with high 41 unsigned int s_high_sierra:1;
45 byte of length containing 42 unsigned int s_rock:2;
46 junk */ 43 unsigned int s_utf8:1;
47 unsigned char s_unhide; 44 unsigned int s_cruft:1; /* Broken disks with high byte of length
48 unsigned char s_nosuid; 45 * containing junk */
49 unsigned char s_nodev; 46 unsigned int s_nocompress:1;
50 unsigned char s_nocompress; 47 unsigned int s_hide:1;
51 unsigned char s_hide; 48 unsigned int s_showassoc:1;
52 unsigned char s_showassoc; 49 unsigned int s_overriderockperm:1;
50 unsigned int s_uid_set:1;
51 unsigned int s_gid_set:1;
53 52
54 mode_t s_fmode; 53 mode_t s_fmode;
55 mode_t s_dmode; 54 mode_t s_dmode;
@@ -58,6 +57,8 @@ struct isofs_sb_info {
58 struct nls_table *s_nls_iocharset; /* Native language support table */ 57 struct nls_table *s_nls_iocharset; /* Native language support table */
59}; 58};
60 59
60#define ISOFS_INVALID_MODE ((mode_t) -1)
61
61static inline struct isofs_sb_info *ISOFS_SB(struct super_block *sb) 62static inline struct isofs_sb_info *ISOFS_SB(struct super_block *sb)
62{ 63{
63 return sb->s_fs_info; 64 return sb->s_fs_info;
diff --git a/fs/isofs/joliet.c b/fs/isofs/joliet.c
index 92c14b850e9c..a048de81c093 100644
--- a/fs/isofs/joliet.c
+++ b/fs/isofs/joliet.c
@@ -37,37 +37,6 @@ uni16_to_x8(unsigned char *ascii, __be16 *uni, int len, struct nls_table *nls)
37 return (op - ascii); 37 return (op - ascii);
38} 38}
39 39
40/* Convert big endian wide character string to utf8 */
41static int
42wcsntombs_be(__u8 *s, const __u8 *pwcs, int inlen, int maxlen)
43{
44 const __u8 *ip;
45 __u8 *op;
46 int size;
47 __u16 c;
48
49 op = s;
50 ip = pwcs;
51 while ((*ip || ip[1]) && (maxlen > 0) && (inlen > 0)) {
52 c = (*ip << 8) | ip[1];
53 if (c > 0x7f) {
54 size = utf8_wctomb(op, c, maxlen);
55 if (size == -1) {
56 /* Ignore character and move on */
57 maxlen--;
58 } else {
59 op += size;
60 maxlen -= size;
61 }
62 } else {
63 *op++ = (__u8) c;
64 }
65 ip += 2;
66 inlen--;
67 }
68 return (op - s);
69}
70
71int 40int
72get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, struct inode * inode) 41get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, struct inode * inode)
73{ 42{
@@ -79,8 +48,9 @@ get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, st
79 nls = ISOFS_SB(inode->i_sb)->s_nls_iocharset; 48 nls = ISOFS_SB(inode->i_sb)->s_nls_iocharset;
80 49
81 if (utf8) { 50 if (utf8) {
82 len = wcsntombs_be(outname, de->name, 51 len = utf16s_to_utf8s((const wchar_t *) de->name,
83 de->name_len[0] >> 1, PAGE_SIZE); 52 de->name_len[0] >> 1, UTF16_BIG_ENDIAN,
53 outname, PAGE_SIZE);
84 } else { 54 } else {
85 len = uni16_to_x8(outname, (__be16 *) de->name, 55 len = uni16_to_x8(outname, (__be16 *) de->name,
86 de->name_len[0] >> 1, nls); 56 de->name_len[0] >> 1, nls);
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 8299889a835e..eaa831311c9c 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -142,9 +142,9 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
142 */ 142 */
143 match = 0; 143 match = 0;
144 if (dlen > 0 && 144 if (dlen > 0 &&
145 (sbi->s_hide =='n' || 145 (!sbi->s_hide ||
146 (!(de->flags[-sbi->s_high_sierra] & 1))) && 146 (!(de->flags[-sbi->s_high_sierra] & 1))) &&
147 (sbi->s_showassoc =='y' || 147 (sbi->s_showassoc ||
148 (!(de->flags[-sbi->s_high_sierra] & 4)))) { 148 (!(de->flags[-sbi->s_high_sierra] & 4)))) {
149 match = (isofs_cmp(dentry, dpnt, dlen) == 0); 149 match = (isofs_cmp(dentry, dpnt, dlen) == 0);
150 } 150 }
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index ed886e6db399..73242ba7c7b1 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1686,35 +1686,6 @@ out:
1686 return; 1686 return;
1687} 1687}
1688 1688
1689/*
1690 * journal_try_to_free_buffers() could race with journal_commit_transaction()
1691 * The latter might still hold the a count on buffers when inspecting
1692 * them on t_syncdata_list or t_locked_list.
1693 *
1694 * journal_try_to_free_buffers() will call this function to
1695 * wait for the current transaction to finish syncing data buffers, before
1696 * tryinf to free that buffer.
1697 *
1698 * Called with journal->j_state_lock held.
1699 */
1700static void journal_wait_for_transaction_sync_data(journal_t *journal)
1701{
1702 transaction_t *transaction = NULL;
1703 tid_t tid;
1704
1705 spin_lock(&journal->j_state_lock);
1706 transaction = journal->j_committing_transaction;
1707
1708 if (!transaction) {
1709 spin_unlock(&journal->j_state_lock);
1710 return;
1711 }
1712
1713 tid = transaction->t_tid;
1714 spin_unlock(&journal->j_state_lock);
1715 log_wait_commit(journal, tid);
1716}
1717
1718/** 1689/**
1719 * int journal_try_to_free_buffers() - try to free page buffers. 1690 * int journal_try_to_free_buffers() - try to free page buffers.
1720 * @journal: journal for operation 1691 * @journal: journal for operation
@@ -1786,25 +1757,6 @@ int journal_try_to_free_buffers(journal_t *journal,
1786 1757
1787 ret = try_to_free_buffers(page); 1758 ret = try_to_free_buffers(page);
1788 1759
1789 /*
1790 * There are a number of places where journal_try_to_free_buffers()
1791 * could race with journal_commit_transaction(), the later still
1792 * holds the reference to the buffers to free while processing them.
1793 * try_to_free_buffers() failed to free those buffers. Some of the
1794 * caller of releasepage() request page buffers to be dropped, otherwise
1795 * treat the fail-to-free as errors (such as generic_file_direct_IO())
1796 *
1797 * So, if the caller of try_to_release_page() wants the synchronous
1798 * behaviour(i.e make sure buffers are dropped upon return),
1799 * let's wait for the current transaction to finish flush of
1800 * dirty data buffers, then try to free those buffers again,
1801 * with the journal locked.
1802 */
1803 if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
1804 journal_wait_for_transaction_sync_data(journal);
1805 ret = try_to_free_buffers(page);
1806 }
1807
1808busy: 1760busy:
1809 return ret; 1761 return ret;
1810} 1762}
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 17159cacbd9e..5d70b3e6d49b 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -20,9 +20,9 @@
20#include <linux/time.h> 20#include <linux/time.h>
21#include <linux/fs.h> 21#include <linux/fs.h>
22#include <linux/jbd2.h> 22#include <linux/jbd2.h>
23#include <linux/marker.h>
24#include <linux/errno.h> 23#include <linux/errno.h>
25#include <linux/slab.h> 24#include <linux/slab.h>
25#include <trace/events/jbd2.h>
26 26
27/* 27/*
28 * Unlink a buffer from a transaction checkpoint list. 28 * Unlink a buffer from a transaction checkpoint list.
@@ -358,8 +358,7 @@ int jbd2_log_do_checkpoint(journal_t *journal)
358 * journal straight away. 358 * journal straight away.
359 */ 359 */
360 result = jbd2_cleanup_journal_tail(journal); 360 result = jbd2_cleanup_journal_tail(journal);
361 trace_mark(jbd2_checkpoint, "dev %s need_checkpoint %d", 361 trace_jbd2_checkpoint(journal, result);
362 journal->j_devname, result);
363 jbd_debug(1, "cleanup_journal_tail returned %d\n", result); 362 jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
364 if (result <= 0) 363 if (result <= 0)
365 return result; 364 return result;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 0b7d3b8226fd..7b4088b2364d 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -16,7 +16,6 @@
16#include <linux/time.h> 16#include <linux/time.h>
17#include <linux/fs.h> 17#include <linux/fs.h>
18#include <linux/jbd2.h> 18#include <linux/jbd2.h>
19#include <linux/marker.h>
20#include <linux/errno.h> 19#include <linux/errno.h>
21#include <linux/slab.h> 20#include <linux/slab.h>
22#include <linux/mm.h> 21#include <linux/mm.h>
@@ -26,6 +25,7 @@
26#include <linux/writeback.h> 25#include <linux/writeback.h>
27#include <linux/backing-dev.h> 26#include <linux/backing-dev.h>
28#include <linux/bio.h> 27#include <linux/bio.h>
28#include <trace/events/jbd2.h>
29 29
30/* 30/*
31 * Default IO end handler for temporary BJ_IO buffer_heads. 31 * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -253,6 +253,7 @@ static int journal_submit_data_buffers(journal_t *journal,
253 * block allocation with delalloc. We need to write 253 * block allocation with delalloc. We need to write
254 * only allocated blocks here. 254 * only allocated blocks here.
255 */ 255 */
256 trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
256 err = journal_submit_inode_data_buffers(mapping); 257 err = journal_submit_inode_data_buffers(mapping);
257 if (!ret) 258 if (!ret)
258 ret = err; 259 ret = err;
@@ -394,8 +395,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
394 commit_transaction = journal->j_running_transaction; 395 commit_transaction = journal->j_running_transaction;
395 J_ASSERT(commit_transaction->t_state == T_RUNNING); 396 J_ASSERT(commit_transaction->t_state == T_RUNNING);
396 397
397 trace_mark(jbd2_start_commit, "dev %s transaction %d", 398 trace_jbd2_start_commit(journal, commit_transaction);
398 journal->j_devname, commit_transaction->t_tid);
399 jbd_debug(1, "JBD: starting commit of transaction %d\n", 399 jbd_debug(1, "JBD: starting commit of transaction %d\n",
400 commit_transaction->t_tid); 400 commit_transaction->t_tid);
401 401
@@ -409,6 +409,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
409 */ 409 */
410 if (commit_transaction->t_synchronous_commit) 410 if (commit_transaction->t_synchronous_commit)
411 write_op = WRITE_SYNC_PLUG; 411 write_op = WRITE_SYNC_PLUG;
412 trace_jbd2_commit_locking(journal, commit_transaction);
412 stats.u.run.rs_wait = commit_transaction->t_max_wait; 413 stats.u.run.rs_wait = commit_transaction->t_max_wait;
413 stats.u.run.rs_locked = jiffies; 414 stats.u.run.rs_locked = jiffies;
414 stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start, 415 stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
@@ -484,6 +485,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
484 */ 485 */
485 jbd2_journal_switch_revoke_table(journal); 486 jbd2_journal_switch_revoke_table(journal);
486 487
488 trace_jbd2_commit_flushing(journal, commit_transaction);
487 stats.u.run.rs_flushing = jiffies; 489 stats.u.run.rs_flushing = jiffies;
488 stats.u.run.rs_locked = jbd2_time_diff(stats.u.run.rs_locked, 490 stats.u.run.rs_locked = jbd2_time_diff(stats.u.run.rs_locked,
489 stats.u.run.rs_flushing); 491 stats.u.run.rs_flushing);
@@ -520,6 +522,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
520 commit_transaction->t_state = T_COMMIT; 522 commit_transaction->t_state = T_COMMIT;
521 spin_unlock(&journal->j_state_lock); 523 spin_unlock(&journal->j_state_lock);
522 524
525 trace_jbd2_commit_logging(journal, commit_transaction);
523 stats.u.run.rs_logging = jiffies; 526 stats.u.run.rs_logging = jiffies;
524 stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing, 527 stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing,
525 stats.u.run.rs_logging); 528 stats.u.run.rs_logging);
@@ -1054,9 +1057,7 @@ restart_loop:
1054 if (journal->j_commit_callback) 1057 if (journal->j_commit_callback)
1055 journal->j_commit_callback(journal, commit_transaction); 1058 journal->j_commit_callback(journal, commit_transaction);
1056 1059
1057 trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", 1060 trace_jbd2_end_commit(journal, commit_transaction);
1058 journal->j_devname, commit_transaction->t_tid,
1059 journal->j_tail_sequence);
1060 jbd_debug(1, "JBD: commit %d complete, head %d\n", 1061 jbd_debug(1, "JBD: commit %d complete, head %d\n",
1061 journal->j_commit_sequence, journal->j_tail_sequence); 1062 journal->j_commit_sequence, journal->j_tail_sequence);
1062 if (to_free) 1063 if (to_free)
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 62be7d294ec2..18bfd5dab642 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -38,6 +38,10 @@
38#include <linux/debugfs.h> 38#include <linux/debugfs.h>
39#include <linux/seq_file.h> 39#include <linux/seq_file.h>
40#include <linux/math64.h> 40#include <linux/math64.h>
41#include <linux/hash.h>
42
43#define CREATE_TRACE_POINTS
44#include <trace/events/jbd2.h>
41 45
42#include <asm/uaccess.h> 46#include <asm/uaccess.h>
43#include <asm/page.h> 47#include <asm/page.h>
@@ -2377,6 +2381,71 @@ static void __exit journal_exit(void)
2377 jbd2_journal_destroy_caches(); 2381 jbd2_journal_destroy_caches();
2378} 2382}
2379 2383
2384/*
2385 * jbd2_dev_to_name is a utility function used by the jbd2 and ext4
2386 * tracing infrastructure to map a dev_t to a device name.
2387 *
2388 * The caller should use rcu_read_lock() in order to make sure the
2389 * device name stays valid until its done with it. We use
2390 * rcu_read_lock() as well to make sure we're safe in case the caller
2391 * gets sloppy, and because rcu_read_lock() is cheap and can be safely
2392 * nested.
2393 */
2394struct devname_cache {
2395 struct rcu_head rcu;
2396 dev_t device;
2397 char devname[BDEVNAME_SIZE];
2398};
2399#define CACHE_SIZE_BITS 6
2400static struct devname_cache *devcache[1 << CACHE_SIZE_BITS];
2401static DEFINE_SPINLOCK(devname_cache_lock);
2402
2403static void free_devcache(struct rcu_head *rcu)
2404{
2405 kfree(rcu);
2406}
2407
2408const char *jbd2_dev_to_name(dev_t device)
2409{
2410 int i = hash_32(device, CACHE_SIZE_BITS);
2411 char *ret;
2412 struct block_device *bd;
2413
2414 rcu_read_lock();
2415 if (devcache[i] && devcache[i]->device == device) {
2416 ret = devcache[i]->devname;
2417 rcu_read_unlock();
2418 return ret;
2419 }
2420 rcu_read_unlock();
2421
2422 spin_lock(&devname_cache_lock);
2423 if (devcache[i]) {
2424 if (devcache[i]->device == device) {
2425 ret = devcache[i]->devname;
2426 spin_unlock(&devname_cache_lock);
2427 return ret;
2428 }
2429 call_rcu(&devcache[i]->rcu, free_devcache);
2430 }
2431 devcache[i] = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
2432 if (!devcache[i]) {
2433 spin_unlock(&devname_cache_lock);
2434 return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
2435 }
2436 devcache[i]->device = device;
2437 bd = bdget(device);
2438 if (bd) {
2439 bdevname(bd, devcache[i]->devname);
2440 bdput(bd);
2441 } else
2442 __bdevname(device, devcache[i]->devname);
2443 ret = devcache[i]->devname;
2444 spin_unlock(&devname_cache_lock);
2445 return ret;
2446}
2447EXPORT_SYMBOL(jbd2_dev_to_name);
2448
2380MODULE_LICENSE("GPL"); 2449MODULE_LICENSE("GPL");
2381module_init(journal_init); 2450module_init(journal_init);
2382module_exit(journal_exit); 2451module_exit(journal_exit);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 996ffda06bf3..494501edba6b 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1547,36 +1547,6 @@ out:
1547 return; 1547 return;
1548} 1548}
1549 1549
1550/*
1551 * jbd2_journal_try_to_free_buffers() could race with
1552 * jbd2_journal_commit_transaction(). The later might still hold the
1553 * reference count to the buffers when inspecting them on
1554 * t_syncdata_list or t_locked_list.
1555 *
1556 * jbd2_journal_try_to_free_buffers() will call this function to
1557 * wait for the current transaction to finish syncing data buffers, before
1558 * try to free that buffer.
1559 *
1560 * Called with journal->j_state_lock hold.
1561 */
1562static void jbd2_journal_wait_for_transaction_sync_data(journal_t *journal)
1563{
1564 transaction_t *transaction;
1565 tid_t tid;
1566
1567 spin_lock(&journal->j_state_lock);
1568 transaction = journal->j_committing_transaction;
1569
1570 if (!transaction) {
1571 spin_unlock(&journal->j_state_lock);
1572 return;
1573 }
1574
1575 tid = transaction->t_tid;
1576 spin_unlock(&journal->j_state_lock);
1577 jbd2_log_wait_commit(journal, tid);
1578}
1579
1580/** 1550/**
1581 * int jbd2_journal_try_to_free_buffers() - try to free page buffers. 1551 * int jbd2_journal_try_to_free_buffers() - try to free page buffers.
1582 * @journal: journal for operation 1552 * @journal: journal for operation
@@ -1649,25 +1619,6 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
1649 1619
1650 ret = try_to_free_buffers(page); 1620 ret = try_to_free_buffers(page);
1651 1621
1652 /*
1653 * There are a number of places where jbd2_journal_try_to_free_buffers()
1654 * could race with jbd2_journal_commit_transaction(), the later still
1655 * holds the reference to the buffers to free while processing them.
1656 * try_to_free_buffers() failed to free those buffers. Some of the
1657 * caller of releasepage() request page buffers to be dropped, otherwise
1658 * treat the fail-to-free as errors (such as generic_file_direct_IO())
1659 *
1660 * So, if the caller of try_to_release_page() wants the synchronous
1661 * behaviour(i.e make sure buffers are dropped upon return),
1662 * let's wait for the current transaction to finish flush of
1663 * dirty data buffers, then try to free those buffers again,
1664 * with the journal locked.
1665 */
1666 if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
1667 jbd2_journal_wait_for_transaction_sync_data(journal);
1668 ret = try_to_free_buffers(page);
1669 }
1670
1671busy: 1622busy:
1672 return ret; 1623 return ret;
1673} 1624}
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 043740dde20c..8fcb6239218e 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -156,48 +156,25 @@ static void *jffs2_acl_to_medium(const struct posix_acl *acl, size_t *size)
156 return ERR_PTR(-EINVAL); 156 return ERR_PTR(-EINVAL);
157} 157}
158 158
159static struct posix_acl *jffs2_iget_acl(struct inode *inode, struct posix_acl **i_acl)
160{
161 struct posix_acl *acl = JFFS2_ACL_NOT_CACHED;
162
163 spin_lock(&inode->i_lock);
164 if (*i_acl != JFFS2_ACL_NOT_CACHED)
165 acl = posix_acl_dup(*i_acl);
166 spin_unlock(&inode->i_lock);
167 return acl;
168}
169
170static void jffs2_iset_acl(struct inode *inode, struct posix_acl **i_acl, struct posix_acl *acl)
171{
172 spin_lock(&inode->i_lock);
173 if (*i_acl != JFFS2_ACL_NOT_CACHED)
174 posix_acl_release(*i_acl);
175 *i_acl = posix_acl_dup(acl);
176 spin_unlock(&inode->i_lock);
177}
178
179static struct posix_acl *jffs2_get_acl(struct inode *inode, int type) 159static struct posix_acl *jffs2_get_acl(struct inode *inode, int type)
180{ 160{
181 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
182 struct posix_acl *acl; 161 struct posix_acl *acl;
183 char *value = NULL; 162 char *value = NULL;
184 int rc, xprefix; 163 int rc, xprefix;
185 164
165 acl = get_cached_acl(inode, type);
166 if (acl != ACL_NOT_CACHED)
167 return acl;
168
186 switch (type) { 169 switch (type) {
187 case ACL_TYPE_ACCESS: 170 case ACL_TYPE_ACCESS:
188 acl = jffs2_iget_acl(inode, &f->i_acl_access);
189 if (acl != JFFS2_ACL_NOT_CACHED)
190 return acl;
191 xprefix = JFFS2_XPREFIX_ACL_ACCESS; 171 xprefix = JFFS2_XPREFIX_ACL_ACCESS;
192 break; 172 break;
193 case ACL_TYPE_DEFAULT: 173 case ACL_TYPE_DEFAULT:
194 acl = jffs2_iget_acl(inode, &f->i_acl_default);
195 if (acl != JFFS2_ACL_NOT_CACHED)
196 return acl;
197 xprefix = JFFS2_XPREFIX_ACL_DEFAULT; 174 xprefix = JFFS2_XPREFIX_ACL_DEFAULT;
198 break; 175 break;
199 default: 176 default:
200 return ERR_PTR(-EINVAL); 177 BUG();
201 } 178 }
202 rc = do_jffs2_getxattr(inode, xprefix, "", NULL, 0); 179 rc = do_jffs2_getxattr(inode, xprefix, "", NULL, 0);
203 if (rc > 0) { 180 if (rc > 0) {
@@ -215,16 +192,8 @@ static struct posix_acl *jffs2_get_acl(struct inode *inode, int type)
215 } 192 }
216 if (value) 193 if (value)
217 kfree(value); 194 kfree(value);
218 if (!IS_ERR(acl)) { 195 if (!IS_ERR(acl))
219 switch (type) { 196 set_cached_acl(inode, type, acl);
220 case ACL_TYPE_ACCESS:
221 jffs2_iset_acl(inode, &f->i_acl_access, acl);
222 break;
223 case ACL_TYPE_DEFAULT:
224 jffs2_iset_acl(inode, &f->i_acl_default, acl);
225 break;
226 }
227 }
228 return acl; 197 return acl;
229} 198}
230 199
@@ -249,7 +218,6 @@ static int __jffs2_set_acl(struct inode *inode, int xprefix, struct posix_acl *a
249 218
250static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl) 219static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
251{ 220{
252 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
253 int rc, xprefix; 221 int rc, xprefix;
254 222
255 if (S_ISLNK(inode->i_mode)) 223 if (S_ISLNK(inode->i_mode))
@@ -285,16 +253,8 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
285 return -EINVAL; 253 return -EINVAL;
286 } 254 }
287 rc = __jffs2_set_acl(inode, xprefix, acl); 255 rc = __jffs2_set_acl(inode, xprefix, acl);
288 if (!rc) { 256 if (!rc)
289 switch(type) { 257 set_cached_acl(inode, type, acl);
290 case ACL_TYPE_ACCESS:
291 jffs2_iset_acl(inode, &f->i_acl_access, acl);
292 break;
293 case ACL_TYPE_DEFAULT:
294 jffs2_iset_acl(inode, &f->i_acl_default, acl);
295 break;
296 }
297 }
298 return rc; 258 return rc;
299} 259}
300 260
@@ -321,12 +281,10 @@ int jffs2_permission(struct inode *inode, int mask)
321 281
322int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode) 282int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
323{ 283{
324 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
325 struct posix_acl *acl, *clone; 284 struct posix_acl *acl, *clone;
326 int rc; 285 int rc;
327 286
328 f->i_acl_default = NULL; 287 cache_no_acl(inode);
329 f->i_acl_access = NULL;
330 288
331 if (S_ISLNK(*i_mode)) 289 if (S_ISLNK(*i_mode))
332 return 0; /* Symlink always has no-ACL */ 290 return 0; /* Symlink always has no-ACL */
@@ -339,7 +297,7 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
339 *i_mode &= ~current_umask(); 297 *i_mode &= ~current_umask();
340 } else { 298 } else {
341 if (S_ISDIR(*i_mode)) 299 if (S_ISDIR(*i_mode))
342 jffs2_iset_acl(inode, &f->i_acl_default, acl); 300 set_cached_acl(inode, ACL_TYPE_DEFAULT, acl);
343 301
344 clone = posix_acl_clone(acl, GFP_KERNEL); 302 clone = posix_acl_clone(acl, GFP_KERNEL);
345 if (!clone) 303 if (!clone)
@@ -350,7 +308,7 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
350 return rc; 308 return rc;
351 } 309 }
352 if (rc > 0) 310 if (rc > 0)
353 jffs2_iset_acl(inode, &f->i_acl_access, clone); 311 set_cached_acl(inode, ACL_TYPE_ACCESS, clone);
354 312
355 posix_acl_release(clone); 313 posix_acl_release(clone);
356 } 314 }
@@ -359,17 +317,16 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
359 317
360int jffs2_init_acl_post(struct inode *inode) 318int jffs2_init_acl_post(struct inode *inode)
361{ 319{
362 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
363 int rc; 320 int rc;
364 321
365 if (f->i_acl_default) { 322 if (inode->i_default_acl) {
366 rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_DEFAULT, f->i_acl_default); 323 rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_DEFAULT, inode->i_default_acl);
367 if (rc) 324 if (rc)
368 return rc; 325 return rc;
369 } 326 }
370 327
371 if (f->i_acl_access) { 328 if (inode->i_acl) {
372 rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_ACCESS, f->i_acl_access); 329 rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_ACCESS, inode->i_acl);
373 if (rc) 330 if (rc)
374 return rc; 331 return rc;
375 } 332 }
@@ -377,18 +334,6 @@ int jffs2_init_acl_post(struct inode *inode)
377 return 0; 334 return 0;
378} 335}
379 336
380void jffs2_clear_acl(struct jffs2_inode_info *f)
381{
382 if (f->i_acl_access && f->i_acl_access != JFFS2_ACL_NOT_CACHED) {
383 posix_acl_release(f->i_acl_access);
384 f->i_acl_access = JFFS2_ACL_NOT_CACHED;
385 }
386 if (f->i_acl_default && f->i_acl_default != JFFS2_ACL_NOT_CACHED) {
387 posix_acl_release(f->i_acl_default);
388 f->i_acl_default = JFFS2_ACL_NOT_CACHED;
389 }
390}
391
392int jffs2_acl_chmod(struct inode *inode) 337int jffs2_acl_chmod(struct inode *inode)
393{ 338{
394 struct posix_acl *acl, *clone; 339 struct posix_acl *acl, *clone;
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index 8ca058aed384..fc929f2a14f6 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -26,13 +26,10 @@ struct jffs2_acl_header {
26 26
27#ifdef CONFIG_JFFS2_FS_POSIX_ACL 27#ifdef CONFIG_JFFS2_FS_POSIX_ACL
28 28
29#define JFFS2_ACL_NOT_CACHED ((void *)-1)
30
31extern int jffs2_permission(struct inode *, int); 29extern int jffs2_permission(struct inode *, int);
32extern int jffs2_acl_chmod(struct inode *); 30extern int jffs2_acl_chmod(struct inode *);
33extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *); 31extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *);
34extern int jffs2_init_acl_post(struct inode *); 32extern int jffs2_init_acl_post(struct inode *);
35extern void jffs2_clear_acl(struct jffs2_inode_info *);
36 33
37extern struct xattr_handler jffs2_acl_access_xattr_handler; 34extern struct xattr_handler jffs2_acl_access_xattr_handler;
38extern struct xattr_handler jffs2_acl_default_xattr_handler; 35extern struct xattr_handler jffs2_acl_default_xattr_handler;
@@ -43,6 +40,5 @@ extern struct xattr_handler jffs2_acl_default_xattr_handler;
43#define jffs2_acl_chmod(inode) (0) 40#define jffs2_acl_chmod(inode) (0)
44#define jffs2_init_acl_pre(dir_i,inode,mode) (0) 41#define jffs2_init_acl_pre(dir_i,inode,mode) (0)
45#define jffs2_init_acl_post(inode) (0) 42#define jffs2_init_acl_post(inode) (0)
46#define jffs2_clear_acl(f)
47 43
48#endif /* CONFIG_JFFS2_FS_POSIX_ACL */ 44#endif /* CONFIG_JFFS2_FS_POSIX_ACL */
diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h
index 4c41db91eaa4..c6923da98263 100644
--- a/fs/jffs2/jffs2_fs_i.h
+++ b/fs/jffs2/jffs2_fs_i.h
@@ -50,10 +50,6 @@ struct jffs2_inode_info {
50 uint16_t flags; 50 uint16_t flags;
51 uint8_t usercompr; 51 uint8_t usercompr;
52 struct inode vfs_inode; 52 struct inode vfs_inode;
53#ifdef CONFIG_JFFS2_FS_POSIX_ACL
54 struct posix_acl *i_acl_access;
55 struct posix_acl *i_acl_default;
56#endif
57}; 53};
58 54
59#endif /* _JFFS2_FS_I */ 55#endif /* _JFFS2_FS_I */
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 2228380c47b9..a7f03b7ebcb3 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -56,10 +56,6 @@ static inline void jffs2_init_inode_info(struct jffs2_inode_info *f)
56 f->target = NULL; 56 f->target = NULL;
57 f->flags = 0; 57 f->flags = 0;
58 f->usercompr = 0; 58 f->usercompr = 0;
59#ifdef CONFIG_JFFS2_FS_POSIX_ACL
60 f->i_acl_access = JFFS2_ACL_NOT_CACHED;
61 f->i_acl_default = JFFS2_ACL_NOT_CACHED;
62#endif
63} 59}
64 60
65 61
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 1fc1e92356ee..1a80301004b8 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -1424,7 +1424,6 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
1424 struct jffs2_full_dirent *fd, *fds; 1424 struct jffs2_full_dirent *fd, *fds;
1425 int deleted; 1425 int deleted;
1426 1426
1427 jffs2_clear_acl(f);
1428 jffs2_xattr_delete_inode(c, f->inocache); 1427 jffs2_xattr_delete_inode(c, f->inocache);
1429 mutex_lock(&f->sem); 1428 mutex_lock(&f->sem);
1430 deleted = f->inocache && !f->inocache->pino_nlink; 1429 deleted = f->inocache && !f->inocache->pino_nlink;
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 1d437de1e9a8..7515e73e2bfb 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -196,7 +196,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
196 if (c->nextblock) { 196 if (c->nextblock) {
197 ret = file_dirty(c, c->nextblock); 197 ret = file_dirty(c, c->nextblock);
198 if (ret) 198 if (ret)
199 return ret; 199 goto out;
200 /* deleting summary information of the old nextblock */ 200 /* deleting summary information of the old nextblock */
201 jffs2_sum_reset_collected(c->summary); 201 jffs2_sum_reset_collected(c->summary);
202 } 202 }
@@ -207,7 +207,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
207 } else { 207 } else {
208 ret = file_dirty(c, jeb); 208 ret = file_dirty(c, jeb);
209 if (ret) 209 if (ret)
210 return ret; 210 goto out;
211 } 211 }
212 break; 212 break;
213 213
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 06ca1b8d2054..91fa3ad6e8c2 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -31,27 +31,24 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type)
31{ 31{
32 struct posix_acl *acl; 32 struct posix_acl *acl;
33 char *ea_name; 33 char *ea_name;
34 struct jfs_inode_info *ji = JFS_IP(inode);
35 struct posix_acl **p_acl;
36 int size; 34 int size;
37 char *value = NULL; 35 char *value = NULL;
38 36
37 acl = get_cached_acl(inode, type);
38 if (acl != ACL_NOT_CACHED)
39 return acl;
40
39 switch(type) { 41 switch(type) {
40 case ACL_TYPE_ACCESS: 42 case ACL_TYPE_ACCESS:
41 ea_name = POSIX_ACL_XATTR_ACCESS; 43 ea_name = POSIX_ACL_XATTR_ACCESS;
42 p_acl = &ji->i_acl;
43 break; 44 break;
44 case ACL_TYPE_DEFAULT: 45 case ACL_TYPE_DEFAULT:
45 ea_name = POSIX_ACL_XATTR_DEFAULT; 46 ea_name = POSIX_ACL_XATTR_DEFAULT;
46 p_acl = &ji->i_default_acl;
47 break; 47 break;
48 default: 48 default:
49 return ERR_PTR(-EINVAL); 49 return ERR_PTR(-EINVAL);
50 } 50 }
51 51
52 if (*p_acl != JFS_ACL_NOT_CACHED)
53 return posix_acl_dup(*p_acl);
54
55 size = __jfs_getxattr(inode, ea_name, NULL, 0); 52 size = __jfs_getxattr(inode, ea_name, NULL, 0);
56 53
57 if (size > 0) { 54 if (size > 0) {
@@ -62,17 +59,18 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type)
62 } 59 }
63 60
64 if (size < 0) { 61 if (size < 0) {
65 if (size == -ENODATA) { 62 if (size == -ENODATA)
66 *p_acl = NULL;
67 acl = NULL; 63 acl = NULL;
68 } else 64 else
69 acl = ERR_PTR(size); 65 acl = ERR_PTR(size);
70 } else { 66 } else {
71 acl = posix_acl_from_xattr(value, size); 67 acl = posix_acl_from_xattr(value, size);
72 if (!IS_ERR(acl))
73 *p_acl = posix_acl_dup(acl);
74 } 68 }
75 kfree(value); 69 kfree(value);
70 if (!IS_ERR(acl)) {
71 set_cached_acl(inode, type, acl);
72 posix_acl_release(acl);
73 }
76 return acl; 74 return acl;
77} 75}
78 76
@@ -80,8 +78,6 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type,
80 struct posix_acl *acl) 78 struct posix_acl *acl)
81{ 79{
82 char *ea_name; 80 char *ea_name;
83 struct jfs_inode_info *ji = JFS_IP(inode);
84 struct posix_acl **p_acl;
85 int rc; 81 int rc;
86 int size = 0; 82 int size = 0;
87 char *value = NULL; 83 char *value = NULL;
@@ -92,11 +88,9 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type,
92 switch(type) { 88 switch(type) {
93 case ACL_TYPE_ACCESS: 89 case ACL_TYPE_ACCESS:
94 ea_name = POSIX_ACL_XATTR_ACCESS; 90 ea_name = POSIX_ACL_XATTR_ACCESS;
95 p_acl = &ji->i_acl;
96 break; 91 break;
97 case ACL_TYPE_DEFAULT: 92 case ACL_TYPE_DEFAULT:
98 ea_name = POSIX_ACL_XATTR_DEFAULT; 93 ea_name = POSIX_ACL_XATTR_DEFAULT;
99 p_acl = &ji->i_default_acl;
100 if (!S_ISDIR(inode->i_mode)) 94 if (!S_ISDIR(inode->i_mode))
101 return acl ? -EACCES : 0; 95 return acl ? -EACCES : 0;
102 break; 96 break;
@@ -116,27 +110,24 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type,
116out: 110out:
117 kfree(value); 111 kfree(value);
118 112
119 if (!rc) { 113 if (!rc)
120 if (*p_acl && (*p_acl != JFS_ACL_NOT_CACHED)) 114 set_cached_acl(inode, type, acl);
121 posix_acl_release(*p_acl); 115
122 *p_acl = posix_acl_dup(acl);
123 }
124 return rc; 116 return rc;
125} 117}
126 118
127static int jfs_check_acl(struct inode *inode, int mask) 119static int jfs_check_acl(struct inode *inode, int mask)
128{ 120{
129 struct jfs_inode_info *ji = JFS_IP(inode); 121 struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS);
130 122
131 if (ji->i_acl == JFS_ACL_NOT_CACHED) { 123 if (IS_ERR(acl))
132 struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS); 124 return PTR_ERR(acl);
133 if (IS_ERR(acl)) 125 if (acl) {
134 return PTR_ERR(acl); 126 int error = posix_acl_permission(inode, acl, mask);
135 posix_acl_release(acl); 127 posix_acl_release(acl);
128 return error;
136 } 129 }
137 130
138 if (ji->i_acl)
139 return posix_acl_permission(inode, ji->i_acl, mask);
140 return -EAGAIN; 131 return -EAGAIN;
141} 132}
142 133
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
index bbbd5f202e37..41d6045dbeb0 100644
--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -391,6 +391,7 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp)
391 } 391 }
392 XADaddress(xp, xaddr); 392 XADaddress(xp, xaddr);
393 XADlength(xp, xlen); 393 XADlength(xp, xlen);
394 XADoffset(xp, prev);
394 /* 395 /*
395 * only preserve the abnr flag within the xad flags 396 * only preserve the abnr flag within the xad flags
396 * of the returned hint. 397 * of the returned hint.
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index 439901d205fe..1439f119ec83 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -74,10 +74,6 @@ struct jfs_inode_info {
74 /* xattr_sem allows us to access the xattrs without taking i_mutex */ 74 /* xattr_sem allows us to access the xattrs without taking i_mutex */
75 struct rw_semaphore xattr_sem; 75 struct rw_semaphore xattr_sem;
76 lid_t xtlid; /* lid of xtree lock on directory */ 76 lid_t xtlid; /* lid of xtree lock on directory */
77#ifdef CONFIG_JFS_POSIX_ACL
78 struct posix_acl *i_acl;
79 struct posix_acl *i_default_acl;
80#endif
81 union { 77 union {
82 struct { 78 struct {
83 xtpage_t _xtroot; /* 288: xtree root */ 79 xtpage_t _xtroot; /* 288: xtree root */
@@ -107,8 +103,6 @@ struct jfs_inode_info {
107#define i_inline u.link._inline 103#define i_inline u.link._inline
108#define i_inline_ea u.link._inline_ea 104#define i_inline_ea u.link._inline_ea
109 105
110#define JFS_ACL_NOT_CACHED ((void *)-1)
111
112#define IREAD_LOCK(ip, subclass) \ 106#define IREAD_LOCK(ip, subclass) \
113 down_read_nested(&JFS_IP(ip)->rdwrlock, subclass) 107 down_read_nested(&JFS_IP(ip)->rdwrlock, subclass)
114#define IREAD_UNLOCK(ip) up_read(&JFS_IP(ip)->rdwrlock) 108#define IREAD_UNLOCK(ip) up_read(&JFS_IP(ip)->rdwrlock)
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 09b1b6ee2186..37e6dcda8fc8 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -128,18 +128,6 @@ static void jfs_destroy_inode(struct inode *inode)
128 ji->active_ag = -1; 128 ji->active_ag = -1;
129 } 129 }
130 spin_unlock_irq(&ji->ag_lock); 130 spin_unlock_irq(&ji->ag_lock);
131
132#ifdef CONFIG_JFS_POSIX_ACL
133 if (ji->i_acl != JFS_ACL_NOT_CACHED) {
134 posix_acl_release(ji->i_acl);
135 ji->i_acl = JFS_ACL_NOT_CACHED;
136 }
137 if (ji->i_default_acl != JFS_ACL_NOT_CACHED) {
138 posix_acl_release(ji->i_default_acl);
139 ji->i_default_acl = JFS_ACL_NOT_CACHED;
140 }
141#endif
142
143 kmem_cache_free(jfs_inode_cachep, ji); 131 kmem_cache_free(jfs_inode_cachep, ji);
144} 132}
145 133
@@ -798,10 +786,6 @@ static void init_once(void *foo)
798 init_rwsem(&jfs_ip->xattr_sem); 786 init_rwsem(&jfs_ip->xattr_sem);
799 spin_lock_init(&jfs_ip->ag_lock); 787 spin_lock_init(&jfs_ip->ag_lock);
800 jfs_ip->active_ag = -1; 788 jfs_ip->active_ag = -1;
801#ifdef CONFIG_JFS_POSIX_ACL
802 jfs_ip->i_acl = JFS_ACL_NOT_CACHED;
803 jfs_ip->i_default_acl = JFS_ACL_NOT_CACHED;
804#endif
805 inode_init_once(&jfs_ip->vfs_inode); 789 inode_init_once(&jfs_ip->vfs_inode);
806} 790}
807 791
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 61dfa8173ebc..fad364548bc9 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -727,10 +727,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
727 /* 727 /*
728 * We're changing the ACL. Get rid of the cached one 728 * We're changing the ACL. Get rid of the cached one
729 */ 729 */
730 acl =JFS_IP(inode)->i_acl; 730 forget_cached_acl(inode, ACL_TYPE_ACCESS);
731 if (acl != JFS_ACL_NOT_CACHED)
732 posix_acl_release(acl);
733 JFS_IP(inode)->i_acl = JFS_ACL_NOT_CACHED;
734 731
735 return 0; 732 return 0;
736 } else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) { 733 } else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) {
@@ -746,10 +743,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
746 /* 743 /*
747 * We're changing the default ACL. Get rid of the cached one 744 * We're changing the default ACL. Get rid of the cached one
748 */ 745 */
749 acl =JFS_IP(inode)->i_default_acl; 746 forget_cached_acl(inode, ACL_TYPE_DEFAULT);
750 if (acl && (acl != JFS_ACL_NOT_CACHED))
751 posix_acl_release(acl);
752 JFS_IP(inode)->i_default_acl = JFS_ACL_NOT_CACHED;
753 747
754 return 0; 748 return 0;
755 } 749 }
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index dd7957064a8c..f2fdcbce143e 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -126,7 +126,6 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl)
126 struct nlm_lock *lock = &argp->lock; 126 struct nlm_lock *lock = &argp->lock;
127 127
128 nlmclnt_next_cookie(&argp->cookie); 128 nlmclnt_next_cookie(&argp->cookie);
129 argp->state = nsm_local_state;
130 memcpy(&lock->fh, NFS_FH(fl->fl_file->f_path.dentry->d_inode), sizeof(struct nfs_fh)); 129 memcpy(&lock->fh, NFS_FH(fl->fl_file->f_path.dentry->d_inode), sizeof(struct nfs_fh));
131 lock->caller = utsname()->nodename; 130 lock->caller = utsname()->nodename;
132 lock->oh.data = req->a_owner; 131 lock->oh.data = req->a_owner;
@@ -165,6 +164,7 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl)
165 /* Set up the argument struct */ 164 /* Set up the argument struct */
166 nlmclnt_setlockargs(call, fl); 165 nlmclnt_setlockargs(call, fl);
167 166
167 lock_kernel();
168 if (IS_SETLK(cmd) || IS_SETLKW(cmd)) { 168 if (IS_SETLK(cmd) || IS_SETLKW(cmd)) {
169 if (fl->fl_type != F_UNLCK) { 169 if (fl->fl_type != F_UNLCK) {
170 call->a_args.block = IS_SETLKW(cmd) ? 1 : 0; 170 call->a_args.block = IS_SETLKW(cmd) ? 1 : 0;
@@ -178,6 +178,7 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl)
178 178
179 fl->fl_ops->fl_release_private(fl); 179 fl->fl_ops->fl_release_private(fl);
180 fl->fl_ops = NULL; 180 fl->fl_ops = NULL;
181 unlock_kernel();
181 182
182 dprintk("lockd: clnt proc returns %d\n", status); 183 dprintk("lockd: clnt proc returns %d\n", status);
183 return status; 184 return status;
@@ -519,6 +520,7 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
519 520
520 if (nsm_monitor(host) < 0) 521 if (nsm_monitor(host) < 0)
521 goto out; 522 goto out;
523 req->a_args.state = nsm_local_state;
522 524
523 fl->fl_flags |= FL_ACCESS; 525 fl->fl_flags |= FL_ACCESS;
524 status = do_vfs_lock(fl); 526 status = do_vfs_lock(fl);
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 6d5d4a4169e5..7fce1b525849 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -53,7 +53,7 @@ static DEFINE_SPINLOCK(nsm_lock);
53/* 53/*
54 * Local NSM state 54 * Local NSM state
55 */ 55 */
56int __read_mostly nsm_local_state; 56u32 __read_mostly nsm_local_state;
57int __read_mostly nsm_use_hostnames; 57int __read_mostly nsm_use_hostnames;
58 58
59static inline struct sockaddr *nsm_addr(const struct nsm_handle *nsm) 59static inline struct sockaddr *nsm_addr(const struct nsm_handle *nsm)
@@ -112,6 +112,7 @@ static struct rpc_clnt *nsm_create(void)
112 .program = &nsm_program, 112 .program = &nsm_program,
113 .version = NSM_VERSION, 113 .version = NSM_VERSION,
114 .authflavor = RPC_AUTH_NULL, 114 .authflavor = RPC_AUTH_NULL,
115 .flags = RPC_CLNT_CREATE_NOPING,
115 }; 116 };
116 117
117 return rpc_create(&args); 118 return rpc_create(&args);
@@ -184,13 +185,19 @@ int nsm_monitor(const struct nlm_host *host)
184 nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf; 185 nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf;
185 186
186 status = nsm_mon_unmon(nsm, NSMPROC_MON, &res); 187 status = nsm_mon_unmon(nsm, NSMPROC_MON, &res);
187 if (res.status != 0) 188 if (unlikely(res.status != 0))
188 status = -EIO; 189 status = -EIO;
189 if (status < 0) 190 if (unlikely(status < 0)) {
190 printk(KERN_NOTICE "lockd: cannot monitor %s\n", nsm->sm_name); 191 printk(KERN_NOTICE "lockd: cannot monitor %s\n", nsm->sm_name);
191 else 192 return status;
192 nsm->sm_monitored = 1; 193 }
193 return status; 194
195 nsm->sm_monitored = 1;
196 if (unlikely(nsm_local_state != res.state)) {
197 nsm_local_state = res.state;
198 dprintk("lockd: NSM state changed to %d\n", nsm_local_state);
199 }
200 return 0;
194} 201}
195 202
196/** 203/**
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 83ee34203bd7..e577a78d7bac 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -326,6 +326,8 @@ static void nlmsvc_freegrantargs(struct nlm_rqst *call)
326{ 326{
327 if (call->a_args.lock.oh.data != call->a_owner) 327 if (call->a_args.lock.oh.data != call->a_owner)
328 kfree(call->a_args.lock.oh.data); 328 kfree(call->a_args.lock.oh.data);
329
330 locks_release_private(&call->a_args.lock.fl);
329} 331}
330 332
331/* 333/*
diff --git a/fs/locks.c b/fs/locks.c
index ec3deea29e37..b6440f52178f 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -151,7 +151,7 @@ static struct file_lock *locks_alloc_lock(void)
151 return kmem_cache_alloc(filelock_cache, GFP_KERNEL); 151 return kmem_cache_alloc(filelock_cache, GFP_KERNEL);
152} 152}
153 153
154static void locks_release_private(struct file_lock *fl) 154void locks_release_private(struct file_lock *fl)
155{ 155{
156 if (fl->fl_ops) { 156 if (fl->fl_ops) {
157 if (fl->fl_ops->fl_release_private) 157 if (fl->fl_ops->fl_release_private)
@@ -165,6 +165,7 @@ static void locks_release_private(struct file_lock *fl)
165 } 165 }
166 166
167} 167}
168EXPORT_SYMBOL_GPL(locks_release_private);
168 169
169/* Free a lock which is not in use. */ 170/* Free a lock which is not in use. */
170static void locks_free_lock(struct file_lock *fl) 171static void locks_free_lock(struct file_lock *fl)
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index 3aebe322271a..6ac693faae49 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -12,13 +12,14 @@
12/* bitmap.c contains the code that handles the inode and block bitmaps */ 12/* bitmap.c contains the code that handles the inode and block bitmaps */
13 13
14#include "minix.h" 14#include "minix.h"
15#include <linux/smp_lock.h>
16#include <linux/buffer_head.h> 15#include <linux/buffer_head.h>
17#include <linux/bitops.h> 16#include <linux/bitops.h>
18#include <linux/sched.h> 17#include <linux/sched.h>
19 18
20static const int nibblemap[] = { 4,3,3,2,3,2,2,1,3,2,2,1,2,1,1,0 }; 19static const int nibblemap[] = { 4,3,3,2,3,2,2,1,3,2,2,1,2,1,1,0 };
21 20
21static DEFINE_SPINLOCK(bitmap_lock);
22
22static unsigned long count_free(struct buffer_head *map[], unsigned numblocks, __u32 numbits) 23static unsigned long count_free(struct buffer_head *map[], unsigned numblocks, __u32 numbits)
23{ 24{
24 unsigned i, j, sum = 0; 25 unsigned i, j, sum = 0;
@@ -69,11 +70,11 @@ void minix_free_block(struct inode *inode, unsigned long block)
69 return; 70 return;
70 } 71 }
71 bh = sbi->s_zmap[zone]; 72 bh = sbi->s_zmap[zone];
72 lock_kernel(); 73 spin_lock(&bitmap_lock);
73 if (!minix_test_and_clear_bit(bit, bh->b_data)) 74 if (!minix_test_and_clear_bit(bit, bh->b_data))
74 printk("minix_free_block (%s:%lu): bit already cleared\n", 75 printk("minix_free_block (%s:%lu): bit already cleared\n",
75 sb->s_id, block); 76 sb->s_id, block);
76 unlock_kernel(); 77 spin_unlock(&bitmap_lock);
77 mark_buffer_dirty(bh); 78 mark_buffer_dirty(bh);
78 return; 79 return;
79} 80}
@@ -88,18 +89,18 @@ int minix_new_block(struct inode * inode)
88 struct buffer_head *bh = sbi->s_zmap[i]; 89 struct buffer_head *bh = sbi->s_zmap[i];
89 int j; 90 int j;
90 91
91 lock_kernel(); 92 spin_lock(&bitmap_lock);
92 j = minix_find_first_zero_bit(bh->b_data, bits_per_zone); 93 j = minix_find_first_zero_bit(bh->b_data, bits_per_zone);
93 if (j < bits_per_zone) { 94 if (j < bits_per_zone) {
94 minix_set_bit(j, bh->b_data); 95 minix_set_bit(j, bh->b_data);
95 unlock_kernel(); 96 spin_unlock(&bitmap_lock);
96 mark_buffer_dirty(bh); 97 mark_buffer_dirty(bh);
97 j += i * bits_per_zone + sbi->s_firstdatazone-1; 98 j += i * bits_per_zone + sbi->s_firstdatazone-1;
98 if (j < sbi->s_firstdatazone || j >= sbi->s_nzones) 99 if (j < sbi->s_firstdatazone || j >= sbi->s_nzones)
99 break; 100 break;
100 return j; 101 return j;
101 } 102 }
102 unlock_kernel(); 103 spin_unlock(&bitmap_lock);
103 } 104 }
104 return 0; 105 return 0;
105} 106}
@@ -211,10 +212,10 @@ void minix_free_inode(struct inode * inode)
211 minix_clear_inode(inode); /* clear on-disk copy */ 212 minix_clear_inode(inode); /* clear on-disk copy */
212 213
213 bh = sbi->s_imap[ino]; 214 bh = sbi->s_imap[ino];
214 lock_kernel(); 215 spin_lock(&bitmap_lock);
215 if (!minix_test_and_clear_bit(bit, bh->b_data)) 216 if (!minix_test_and_clear_bit(bit, bh->b_data))
216 printk("minix_free_inode: bit %lu already cleared\n", bit); 217 printk("minix_free_inode: bit %lu already cleared\n", bit);
217 unlock_kernel(); 218 spin_unlock(&bitmap_lock);
218 mark_buffer_dirty(bh); 219 mark_buffer_dirty(bh);
219 out: 220 out:
220 clear_inode(inode); /* clear in-memory copy */ 221 clear_inode(inode); /* clear in-memory copy */
@@ -237,7 +238,7 @@ struct inode * minix_new_inode(const struct inode * dir, int * error)
237 j = bits_per_zone; 238 j = bits_per_zone;
238 bh = NULL; 239 bh = NULL;
239 *error = -ENOSPC; 240 *error = -ENOSPC;
240 lock_kernel(); 241 spin_lock(&bitmap_lock);
241 for (i = 0; i < sbi->s_imap_blocks; i++) { 242 for (i = 0; i < sbi->s_imap_blocks; i++) {
242 bh = sbi->s_imap[i]; 243 bh = sbi->s_imap[i];
243 j = minix_find_first_zero_bit(bh->b_data, bits_per_zone); 244 j = minix_find_first_zero_bit(bh->b_data, bits_per_zone);
@@ -245,17 +246,17 @@ struct inode * minix_new_inode(const struct inode * dir, int * error)
245 break; 246 break;
246 } 247 }
247 if (!bh || j >= bits_per_zone) { 248 if (!bh || j >= bits_per_zone) {
248 unlock_kernel(); 249 spin_unlock(&bitmap_lock);
249 iput(inode); 250 iput(inode);
250 return NULL; 251 return NULL;
251 } 252 }
252 if (minix_test_and_set_bit(j, bh->b_data)) { /* shouldn't happen */ 253 if (minix_test_and_set_bit(j, bh->b_data)) { /* shouldn't happen */
253 unlock_kernel(); 254 spin_unlock(&bitmap_lock);
254 printk("minix_new_inode: bit already set\n"); 255 printk("minix_new_inode: bit already set\n");
255 iput(inode); 256 iput(inode);
256 return NULL; 257 return NULL;
257 } 258 }
258 unlock_kernel(); 259 spin_unlock(&bitmap_lock);
259 mark_buffer_dirty(bh); 260 mark_buffer_dirty(bh);
260 j += i * bits_per_zone; 261 j += i * bits_per_zone;
261 if (!j || j > sbi->s_ninodes) { 262 if (!j || j > sbi->s_ninodes) {
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index e5f206467e40..d407e7a0b6fe 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -11,7 +11,6 @@
11#include "minix.h" 11#include "minix.h"
12#include <linux/buffer_head.h> 12#include <linux/buffer_head.h>
13#include <linux/highmem.h> 13#include <linux/highmem.h>
14#include <linux/smp_lock.h>
15#include <linux/swap.h> 14#include <linux/swap.h>
16 15
17typedef struct minix_dir_entry minix_dirent; 16typedef struct minix_dir_entry minix_dirent;
@@ -20,6 +19,7 @@ typedef struct minix3_dir_entry minix3_dirent;
20static int minix_readdir(struct file *, void *, filldir_t); 19static int minix_readdir(struct file *, void *, filldir_t);
21 20
22const struct file_operations minix_dir_operations = { 21const struct file_operations minix_dir_operations = {
22 .llseek = generic_file_llseek,
23 .read = generic_read_dir, 23 .read = generic_read_dir,
24 .readdir = minix_readdir, 24 .readdir = minix_readdir,
25 .fsync = simple_fsync, 25 .fsync = simple_fsync,
@@ -102,8 +102,6 @@ static int minix_readdir(struct file * filp, void * dirent, filldir_t filldir)
102 char *name; 102 char *name;
103 __u32 inumber; 103 __u32 inumber;
104 104
105 lock_kernel();
106
107 pos = (pos + chunk_size-1) & ~(chunk_size-1); 105 pos = (pos + chunk_size-1) & ~(chunk_size-1);
108 if (pos >= inode->i_size) 106 if (pos >= inode->i_size)
109 goto done; 107 goto done;
@@ -146,7 +144,6 @@ static int minix_readdir(struct file * filp, void * dirent, filldir_t filldir)
146 144
147done: 145done:
148 filp->f_pos = (n << PAGE_CACHE_SHIFT) | offset; 146 filp->f_pos = (n << PAGE_CACHE_SHIFT) | offset;
149 unlock_kernel();
150 return 0; 147 return 0;
151} 148}
152 149
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index f91a23693597..74ea82d72164 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -35,8 +35,6 @@ static void minix_put_super(struct super_block *sb)
35 int i; 35 int i;
36 struct minix_sb_info *sbi = minix_sb(sb); 36 struct minix_sb_info *sbi = minix_sb(sb);
37 37
38 lock_kernel();
39
40 if (!(sb->s_flags & MS_RDONLY)) { 38 if (!(sb->s_flags & MS_RDONLY)) {
41 if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */ 39 if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */
42 sbi->s_ms->s_state = sbi->s_mount_state; 40 sbi->s_ms->s_state = sbi->s_mount_state;
@@ -50,8 +48,6 @@ static void minix_put_super(struct super_block *sb)
50 kfree(sbi->s_imap); 48 kfree(sbi->s_imap);
51 sb->s_fs_info = NULL; 49 sb->s_fs_info = NULL;
52 kfree(sbi); 50 kfree(sbi);
53
54 unlock_kernel();
55} 51}
56 52
57static struct kmem_cache * minix_inode_cachep; 53static struct kmem_cache * minix_inode_cachep;
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index cb7fdd11f9a5..9dcf95b42116 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -1,3 +1,6 @@
1#ifndef FS_MINIX_H
2#define FS_MINIX_H
3
1#include <linux/fs.h> 4#include <linux/fs.h>
2#include <linux/pagemap.h> 5#include <linux/pagemap.h>
3#include <linux/minix_fs.h> 6#include <linux/minix_fs.h>
@@ -86,3 +89,5 @@ static inline struct minix_inode_info *minix_i(struct inode *inode)
86{ 89{
87 return list_entry(inode, struct minix_inode_info, vfs_inode); 90 return list_entry(inode, struct minix_inode_info, vfs_inode);
88} 91}
92
93#endif /* FS_MINIX_H */
diff --git a/fs/namei.c b/fs/namei.c
index 527119afb6a5..5b961eb71cbf 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1698,8 +1698,11 @@ struct file *do_filp_open(int dfd, const char *pathname,
1698 if (error) 1698 if (error)
1699 return ERR_PTR(error); 1699 return ERR_PTR(error);
1700 error = path_walk(pathname, &nd); 1700 error = path_walk(pathname, &nd);
1701 if (error) 1701 if (error) {
1702 if (nd.root.mnt)
1703 path_put(&nd.root);
1702 return ERR_PTR(error); 1704 return ERR_PTR(error);
1705 }
1703 if (unlikely(!audit_dummy_context())) 1706 if (unlikely(!audit_dummy_context()))
1704 audit_inode(pathname, nd.path.dentry); 1707 audit_inode(pathname, nd.path.dentry);
1705 1708
@@ -1759,6 +1762,8 @@ do_last:
1759 } 1762 }
1760 filp = nameidata_to_filp(&nd, open_flag); 1763 filp = nameidata_to_filp(&nd, open_flag);
1761 mnt_drop_write(nd.path.mnt); 1764 mnt_drop_write(nd.path.mnt);
1765 if (nd.root.mnt)
1766 path_put(&nd.root);
1762 return filp; 1767 return filp;
1763 } 1768 }
1764 1769
@@ -1819,6 +1824,8 @@ ok:
1819 */ 1824 */
1820 if (will_write) 1825 if (will_write)
1821 mnt_drop_write(nd.path.mnt); 1826 mnt_drop_write(nd.path.mnt);
1827 if (nd.root.mnt)
1828 path_put(&nd.root);
1822 return filp; 1829 return filp;
1823 1830
1824exit_mutex_unlock: 1831exit_mutex_unlock:
@@ -1859,6 +1866,8 @@ do_link:
1859 * with "intent.open". 1866 * with "intent.open".
1860 */ 1867 */
1861 release_open_intent(&nd); 1868 release_open_intent(&nd);
1869 if (nd.root.mnt)
1870 path_put(&nd.root);
1862 return ERR_PTR(error); 1871 return ERR_PTR(error);
1863 } 1872 }
1864 nd.flags &= ~LOOKUP_PARENT; 1873 nd.flags &= ~LOOKUP_PARENT;
diff --git a/fs/namespace.c b/fs/namespace.c
index 2dd333b0fe7f..3dc283fd4716 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -42,6 +42,8 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
42static int event; 42static int event;
43static DEFINE_IDA(mnt_id_ida); 43static DEFINE_IDA(mnt_id_ida);
44static DEFINE_IDA(mnt_group_ida); 44static DEFINE_IDA(mnt_group_ida);
45static int mnt_id_start = 0;
46static int mnt_group_start = 1;
45 47
46static struct list_head *mount_hashtable __read_mostly; 48static struct list_head *mount_hashtable __read_mostly;
47static struct kmem_cache *mnt_cache __read_mostly; 49static struct kmem_cache *mnt_cache __read_mostly;
@@ -69,7 +71,9 @@ static int mnt_alloc_id(struct vfsmount *mnt)
69retry: 71retry:
70 ida_pre_get(&mnt_id_ida, GFP_KERNEL); 72 ida_pre_get(&mnt_id_ida, GFP_KERNEL);
71 spin_lock(&vfsmount_lock); 73 spin_lock(&vfsmount_lock);
72 res = ida_get_new(&mnt_id_ida, &mnt->mnt_id); 74 res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
75 if (!res)
76 mnt_id_start = mnt->mnt_id + 1;
73 spin_unlock(&vfsmount_lock); 77 spin_unlock(&vfsmount_lock);
74 if (res == -EAGAIN) 78 if (res == -EAGAIN)
75 goto retry; 79 goto retry;
@@ -79,8 +83,11 @@ retry:
79 83
80static void mnt_free_id(struct vfsmount *mnt) 84static void mnt_free_id(struct vfsmount *mnt)
81{ 85{
86 int id = mnt->mnt_id;
82 spin_lock(&vfsmount_lock); 87 spin_lock(&vfsmount_lock);
83 ida_remove(&mnt_id_ida, mnt->mnt_id); 88 ida_remove(&mnt_id_ida, id);
89 if (mnt_id_start > id)
90 mnt_id_start = id;
84 spin_unlock(&vfsmount_lock); 91 spin_unlock(&vfsmount_lock);
85} 92}
86 93
@@ -91,10 +98,18 @@ static void mnt_free_id(struct vfsmount *mnt)
91 */ 98 */
92static int mnt_alloc_group_id(struct vfsmount *mnt) 99static int mnt_alloc_group_id(struct vfsmount *mnt)
93{ 100{
101 int res;
102
94 if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL)) 103 if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL))
95 return -ENOMEM; 104 return -ENOMEM;
96 105
97 return ida_get_new_above(&mnt_group_ida, 1, &mnt->mnt_group_id); 106 res = ida_get_new_above(&mnt_group_ida,
107 mnt_group_start,
108 &mnt->mnt_group_id);
109 if (!res)
110 mnt_group_start = mnt->mnt_group_id + 1;
111
112 return res;
98} 113}
99 114
100/* 115/*
@@ -102,7 +117,10 @@ static int mnt_alloc_group_id(struct vfsmount *mnt)
102 */ 117 */
103void mnt_release_group_id(struct vfsmount *mnt) 118void mnt_release_group_id(struct vfsmount *mnt)
104{ 119{
105 ida_remove(&mnt_group_ida, mnt->mnt_group_id); 120 int id = mnt->mnt_group_id;
121 ida_remove(&mnt_group_ida, id);
122 if (mnt_group_start > id)
123 mnt_group_start = id;
106 mnt->mnt_group_id = 0; 124 mnt->mnt_group_id = 0;
107} 125}
108 126
@@ -1937,6 +1955,21 @@ dput_out:
1937 return retval; 1955 return retval;
1938} 1956}
1939 1957
1958static struct mnt_namespace *alloc_mnt_ns(void)
1959{
1960 struct mnt_namespace *new_ns;
1961
1962 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
1963 if (!new_ns)
1964 return ERR_PTR(-ENOMEM);
1965 atomic_set(&new_ns->count, 1);
1966 new_ns->root = NULL;
1967 INIT_LIST_HEAD(&new_ns->list);
1968 init_waitqueue_head(&new_ns->poll);
1969 new_ns->event = 0;
1970 return new_ns;
1971}
1972
1940/* 1973/*
1941 * Allocate a new namespace structure and populate it with contents 1974 * Allocate a new namespace structure and populate it with contents
1942 * copied from the namespace of the passed in task structure. 1975 * copied from the namespace of the passed in task structure.
@@ -1948,14 +1981,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
1948 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL; 1981 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
1949 struct vfsmount *p, *q; 1982 struct vfsmount *p, *q;
1950 1983
1951 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); 1984 new_ns = alloc_mnt_ns();
1952 if (!new_ns) 1985 if (IS_ERR(new_ns))
1953 return ERR_PTR(-ENOMEM); 1986 return new_ns;
1954
1955 atomic_set(&new_ns->count, 1);
1956 INIT_LIST_HEAD(&new_ns->list);
1957 init_waitqueue_head(&new_ns->poll);
1958 new_ns->event = 0;
1959 1987
1960 down_write(&namespace_sem); 1988 down_write(&namespace_sem);
1961 /* First pass: copy the tree topology */ 1989 /* First pass: copy the tree topology */
@@ -2019,6 +2047,24 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2019 return new_ns; 2047 return new_ns;
2020} 2048}
2021 2049
2050/**
2051 * create_mnt_ns - creates a private namespace and adds a root filesystem
2052 * @mnt: pointer to the new root filesystem mountpoint
2053 */
2054struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
2055{
2056 struct mnt_namespace *new_ns;
2057
2058 new_ns = alloc_mnt_ns();
2059 if (!IS_ERR(new_ns)) {
2060 mnt->mnt_ns = new_ns;
2061 new_ns->root = mnt;
2062 list_add(&new_ns->list, &new_ns->root->mnt_list);
2063 }
2064 return new_ns;
2065}
2066EXPORT_SYMBOL(create_mnt_ns);
2067
2022SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, 2068SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
2023 char __user *, type, unsigned long, flags, void __user *, data) 2069 char __user *, type, unsigned long, flags, void __user *, data)
2024{ 2070{
@@ -2194,16 +2240,9 @@ static void __init init_mount_tree(void)
2194 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); 2240 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
2195 if (IS_ERR(mnt)) 2241 if (IS_ERR(mnt))
2196 panic("Can't create rootfs"); 2242 panic("Can't create rootfs");
2197 ns = kmalloc(sizeof(*ns), GFP_KERNEL); 2243 ns = create_mnt_ns(mnt);
2198 if (!ns) 2244 if (IS_ERR(ns))
2199 panic("Can't allocate initial namespace"); 2245 panic("Can't allocate initial namespace");
2200 atomic_set(&ns->count, 1);
2201 INIT_LIST_HEAD(&ns->list);
2202 init_waitqueue_head(&ns->poll);
2203 ns->event = 0;
2204 list_add(&mnt->mnt_list, &ns->list);
2205 ns->root = mnt;
2206 mnt->mnt_ns = ns;
2207 2246
2208 init_task.nsproxy->mnt_ns = ns; 2247 init_task.nsproxy->mnt_ns = ns;
2209 get_mnt_ns(ns); 2248 get_mnt_ns(ns);
@@ -2246,10 +2285,14 @@ void __init mnt_init(void)
2246 init_mount_tree(); 2285 init_mount_tree();
2247} 2286}
2248 2287
2249void __put_mnt_ns(struct mnt_namespace *ns) 2288void put_mnt_ns(struct mnt_namespace *ns)
2250{ 2289{
2251 struct vfsmount *root = ns->root; 2290 struct vfsmount *root;
2252 LIST_HEAD(umount_list); 2291 LIST_HEAD(umount_list);
2292
2293 if (!atomic_dec_and_lock(&ns->count, &vfsmount_lock))
2294 return;
2295 root = ns->root;
2253 ns->root = NULL; 2296 ns->root = NULL;
2254 spin_unlock(&vfsmount_lock); 2297 spin_unlock(&vfsmount_lock);
2255 down_write(&namespace_sem); 2298 down_write(&namespace_sem);
@@ -2260,3 +2303,4 @@ void __put_mnt_ns(struct mnt_namespace *ns)
2260 release_mounts(&umount_list); 2303 release_mounts(&umount_list);
2261 kfree(ns); 2304 kfree(ns);
2262} 2305}
2306EXPORT_SYMBOL(put_mnt_ns);
diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c
index 97645f112114..0ec6237a5970 100644
--- a/fs/ncpfs/ncplib_kernel.c
+++ b/fs/ncpfs/ncplib_kernel.c
@@ -1113,11 +1113,13 @@ ncp__io2vol(struct ncp_server *server, unsigned char *vname, unsigned int *vlen,
1113 1113
1114 if (NCP_IS_FLAG(server, NCP_FLAG_UTF8)) { 1114 if (NCP_IS_FLAG(server, NCP_FLAG_UTF8)) {
1115 int k; 1115 int k;
1116 unicode_t u;
1116 1117
1117 k = utf8_mbtowc(&ec, iname, iname_end - iname); 1118 k = utf8_to_utf32(iname, iname_end - iname, &u);
1118 if (k < 0) 1119 if (k < 0 || u > MAX_WCHAR_T)
1119 return -EINVAL; 1120 return -EINVAL;
1120 iname += k; 1121 iname += k;
1122 ec = u;
1121 } else { 1123 } else {
1122 if (*iname == NCP_ESC) { 1124 if (*iname == NCP_ESC) {
1123 int k; 1125 int k;
@@ -1214,7 +1216,7 @@ ncp__vol2io(struct ncp_server *server, unsigned char *iname, unsigned int *ilen,
1214 if (NCP_IS_FLAG(server, NCP_FLAG_UTF8)) { 1216 if (NCP_IS_FLAG(server, NCP_FLAG_UTF8)) {
1215 int k; 1217 int k;
1216 1218
1217 k = utf8_wctomb(iname, ec, iname_end - iname); 1219 k = utf32_to_utf8(ec, iname, iname_end - iname);
1218 if (k < 0) { 1220 if (k < 0) {
1219 err = -ENAMETOOLONG; 1221 err = -ENAMETOOLONG;
1220 goto quit; 1222 goto quit;
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index e67f3ec07736..2a77bc25d5af 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -1,6 +1,6 @@
1config NFS_FS 1config NFS_FS
2 tristate "NFS client support" 2 tristate "NFS client support"
3 depends on INET 3 depends on INET && FILE_LOCKING
4 select LOCKD 4 select LOCKD
5 select SUNRPC 5 select SUNRPC
6 select NFS_ACL_SUPPORT if NFS_V3_ACL 6 select NFS_ACL_SUPPORT if NFS_V3_ACL
@@ -74,6 +74,15 @@ config NFS_V4
74 74
75 If unsure, say N. 75 If unsure, say N.
76 76
77config NFS_V4_1
78 bool "NFS client support for NFSv4.1 (DEVELOPER ONLY)"
79 depends on NFS_V4 && EXPERIMENTAL
80 help
81 This option enables support for minor version 1 of the NFSv4 protocol
82 (draft-ietf-nfsv4-minorversion1) in the kernel's NFS client.
83
84 Unless you're an NFS developer, say N.
85
77config ROOT_NFS 86config ROOT_NFS
78 bool "Root file system on NFS" 87 bool "Root file system on NFS"
79 depends on NFS_FS=y && IP_PNP 88 depends on NFS_FS=y && IP_PNP
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index a886e692ddd0..7f604c7941fb 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -17,6 +17,9 @@
17#include <linux/freezer.h> 17#include <linux/freezer.h>
18#include <linux/kthread.h> 18#include <linux/kthread.h>
19#include <linux/sunrpc/svcauth_gss.h> 19#include <linux/sunrpc/svcauth_gss.h>
20#if defined(CONFIG_NFS_V4_1)
21#include <linux/sunrpc/bc_xprt.h>
22#endif
20 23
21#include <net/inet_sock.h> 24#include <net/inet_sock.h>
22 25
@@ -28,11 +31,12 @@
28 31
29struct nfs_callback_data { 32struct nfs_callback_data {
30 unsigned int users; 33 unsigned int users;
34 struct svc_serv *serv;
31 struct svc_rqst *rqst; 35 struct svc_rqst *rqst;
32 struct task_struct *task; 36 struct task_struct *task;
33}; 37};
34 38
35static struct nfs_callback_data nfs_callback_info; 39static struct nfs_callback_data nfs_callback_info[NFS4_MAX_MINOR_VERSION + 1];
36static DEFINE_MUTEX(nfs_callback_mutex); 40static DEFINE_MUTEX(nfs_callback_mutex);
37static struct svc_program nfs4_callback_program; 41static struct svc_program nfs4_callback_program;
38 42
@@ -56,10 +60,10 @@ module_param_call(callback_tcpport, param_set_port, param_get_int,
56 &nfs_callback_set_tcpport, 0644); 60 &nfs_callback_set_tcpport, 0644);
57 61
58/* 62/*
59 * This is the callback kernel thread. 63 * This is the NFSv4 callback kernel thread.
60 */ 64 */
61static int 65static int
62nfs_callback_svc(void *vrqstp) 66nfs4_callback_svc(void *vrqstp)
63{ 67{
64 int err, preverr = 0; 68 int err, preverr = 0;
65 struct svc_rqst *rqstp = vrqstp; 69 struct svc_rqst *rqstp = vrqstp;
@@ -97,20 +101,12 @@ nfs_callback_svc(void *vrqstp)
97} 101}
98 102
99/* 103/*
100 * Bring up the callback thread if it is not already up. 104 * Prepare to bring up the NFSv4 callback service
101 */ 105 */
102int nfs_callback_up(void) 106struct svc_rqst *
107nfs4_callback_up(struct svc_serv *serv)
103{ 108{
104 struct svc_serv *serv = NULL; 109 int ret;
105 int ret = 0;
106
107 mutex_lock(&nfs_callback_mutex);
108 if (nfs_callback_info.users++ || nfs_callback_info.task != NULL)
109 goto out;
110 serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL);
111 ret = -ENOMEM;
112 if (!serv)
113 goto out_err;
114 110
115 ret = svc_create_xprt(serv, "tcp", PF_INET, 111 ret = svc_create_xprt(serv, "tcp", PF_INET,
116 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); 112 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
@@ -127,27 +123,174 @@ int nfs_callback_up(void)
127 nfs_callback_tcpport6 = ret; 123 nfs_callback_tcpport6 = ret;
128 dprintk("NFS: Callback listener port = %u (af %u)\n", 124 dprintk("NFS: Callback listener port = %u (af %u)\n",
129 nfs_callback_tcpport6, PF_INET6); 125 nfs_callback_tcpport6, PF_INET6);
130 } else if (ret != -EAFNOSUPPORT) 126 } else if (ret == -EAFNOSUPPORT)
127 ret = 0;
128 else
131 goto out_err; 129 goto out_err;
132#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ 130#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
133 131
134 nfs_callback_info.rqst = svc_prepare_thread(serv, &serv->sv_pools[0]); 132 return svc_prepare_thread(serv, &serv->sv_pools[0]);
135 if (IS_ERR(nfs_callback_info.rqst)) { 133
136 ret = PTR_ERR(nfs_callback_info.rqst); 134out_err:
137 nfs_callback_info.rqst = NULL; 135 if (ret == 0)
136 ret = -ENOMEM;
137 return ERR_PTR(ret);
138}
139
140#if defined(CONFIG_NFS_V4_1)
141/*
142 * The callback service for NFSv4.1 callbacks
143 */
144static int
145nfs41_callback_svc(void *vrqstp)
146{
147 struct svc_rqst *rqstp = vrqstp;
148 struct svc_serv *serv = rqstp->rq_server;
149 struct rpc_rqst *req;
150 int error;
151 DEFINE_WAIT(wq);
152
153 set_freezable();
154
155 /*
156 * FIXME: do we really need to run this under the BKL? If so, please
157 * add a comment about what it's intended to protect.
158 */
159 lock_kernel();
160 while (!kthread_should_stop()) {
161 prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE);
162 spin_lock_bh(&serv->sv_cb_lock);
163 if (!list_empty(&serv->sv_cb_list)) {
164 req = list_first_entry(&serv->sv_cb_list,
165 struct rpc_rqst, rq_bc_list);
166 list_del(&req->rq_bc_list);
167 spin_unlock_bh(&serv->sv_cb_lock);
168 dprintk("Invoking bc_svc_process()\n");
169 error = bc_svc_process(serv, req, rqstp);
170 dprintk("bc_svc_process() returned w/ error code= %d\n",
171 error);
172 } else {
173 spin_unlock_bh(&serv->sv_cb_lock);
174 schedule();
175 }
176 finish_wait(&serv->sv_cb_waitq, &wq);
177 }
178 unlock_kernel();
179 return 0;
180}
181
182/*
183 * Bring up the NFSv4.1 callback service
184 */
185struct svc_rqst *
186nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
187{
188 struct svc_xprt *bc_xprt;
189 struct svc_rqst *rqstp = ERR_PTR(-ENOMEM);
190
191 dprintk("--> %s\n", __func__);
192 /* Create a svc_sock for the service */
193 bc_xprt = svc_sock_create(serv, xprt->prot);
194 if (!bc_xprt)
195 goto out;
196
197 /*
198 * Save the svc_serv in the transport so that it can
199 * be referenced when the session backchannel is initialized
200 */
201 serv->bc_xprt = bc_xprt;
202 xprt->bc_serv = serv;
203
204 INIT_LIST_HEAD(&serv->sv_cb_list);
205 spin_lock_init(&serv->sv_cb_lock);
206 init_waitqueue_head(&serv->sv_cb_waitq);
207 rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]);
208 if (IS_ERR(rqstp))
209 svc_sock_destroy(bc_xprt);
210out:
211 dprintk("--> %s return %p\n", __func__, rqstp);
212 return rqstp;
213}
214
215static inline int nfs_minorversion_callback_svc_setup(u32 minorversion,
216 struct svc_serv *serv, struct rpc_xprt *xprt,
217 struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp))
218{
219 if (minorversion) {
220 *rqstpp = nfs41_callback_up(serv, xprt);
221 *callback_svc = nfs41_callback_svc;
222 }
223 return minorversion;
224}
225
226static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt,
227 struct nfs_callback_data *cb_info)
228{
229 if (minorversion)
230 xprt->bc_serv = cb_info->serv;
231}
232#else
233static inline int nfs_minorversion_callback_svc_setup(u32 minorversion,
234 struct svc_serv *serv, struct rpc_xprt *xprt,
235 struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp))
236{
237 return 0;
238}
239
240static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt,
241 struct nfs_callback_data *cb_info)
242{
243}
244#endif /* CONFIG_NFS_V4_1 */
245
246/*
247 * Bring up the callback thread if it is not already up.
248 */
249int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
250{
251 struct svc_serv *serv = NULL;
252 struct svc_rqst *rqstp;
253 int (*callback_svc)(void *vrqstp);
254 struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion];
255 char svc_name[12];
256 int ret = 0;
257 int minorversion_setup;
258
259 mutex_lock(&nfs_callback_mutex);
260 if (cb_info->users++ || cb_info->task != NULL) {
261 nfs_callback_bc_serv(minorversion, xprt, cb_info);
262 goto out;
263 }
264 serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL);
265 if (!serv) {
266 ret = -ENOMEM;
267 goto out_err;
268 }
269
270 minorversion_setup = nfs_minorversion_callback_svc_setup(minorversion,
271 serv, xprt, &rqstp, &callback_svc);
272 if (!minorversion_setup) {
273 /* v4.0 callback setup */
274 rqstp = nfs4_callback_up(serv);
275 callback_svc = nfs4_callback_svc;
276 }
277
278 if (IS_ERR(rqstp)) {
279 ret = PTR_ERR(rqstp);
138 goto out_err; 280 goto out_err;
139 } 281 }
140 282
141 svc_sock_update_bufs(serv); 283 svc_sock_update_bufs(serv);
142 284
143 nfs_callback_info.task = kthread_run(nfs_callback_svc, 285 sprintf(svc_name, "nfsv4.%u-svc", minorversion);
144 nfs_callback_info.rqst, 286 cb_info->serv = serv;
145 "nfsv4-svc"); 287 cb_info->rqst = rqstp;
146 if (IS_ERR(nfs_callback_info.task)) { 288 cb_info->task = kthread_run(callback_svc, cb_info->rqst, svc_name);
147 ret = PTR_ERR(nfs_callback_info.task); 289 if (IS_ERR(cb_info->task)) {
148 svc_exit_thread(nfs_callback_info.rqst); 290 ret = PTR_ERR(cb_info->task);
149 nfs_callback_info.rqst = NULL; 291 svc_exit_thread(cb_info->rqst);
150 nfs_callback_info.task = NULL; 292 cb_info->rqst = NULL;
293 cb_info->task = NULL;
151 goto out_err; 294 goto out_err;
152 } 295 }
153out: 296out:
@@ -164,22 +307,25 @@ out:
164out_err: 307out_err:
165 dprintk("NFS: Couldn't create callback socket or server thread; " 308 dprintk("NFS: Couldn't create callback socket or server thread; "
166 "err = %d\n", ret); 309 "err = %d\n", ret);
167 nfs_callback_info.users--; 310 cb_info->users--;
168 goto out; 311 goto out;
169} 312}
170 313
171/* 314/*
172 * Kill the callback thread if it's no longer being used. 315 * Kill the callback thread if it's no longer being used.
173 */ 316 */
174void nfs_callback_down(void) 317void nfs_callback_down(int minorversion)
175{ 318{
319 struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion];
320
176 mutex_lock(&nfs_callback_mutex); 321 mutex_lock(&nfs_callback_mutex);
177 nfs_callback_info.users--; 322 cb_info->users--;
178 if (nfs_callback_info.users == 0 && nfs_callback_info.task != NULL) { 323 if (cb_info->users == 0 && cb_info->task != NULL) {
179 kthread_stop(nfs_callback_info.task); 324 kthread_stop(cb_info->task);
180 svc_exit_thread(nfs_callback_info.rqst); 325 svc_exit_thread(cb_info->rqst);
181 nfs_callback_info.rqst = NULL; 326 cb_info->serv = NULL;
182 nfs_callback_info.task = NULL; 327 cb_info->rqst = NULL;
328 cb_info->task = NULL;
183 } 329 }
184 mutex_unlock(&nfs_callback_mutex); 330 mutex_unlock(&nfs_callback_mutex);
185} 331}
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index e110e286a262..07baa8254ca1 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -20,13 +20,24 @@ enum nfs4_callback_procnum {
20enum nfs4_callback_opnum { 20enum nfs4_callback_opnum {
21 OP_CB_GETATTR = 3, 21 OP_CB_GETATTR = 3,
22 OP_CB_RECALL = 4, 22 OP_CB_RECALL = 4,
23/* Callback operations new to NFSv4.1 */
24 OP_CB_LAYOUTRECALL = 5,
25 OP_CB_NOTIFY = 6,
26 OP_CB_PUSH_DELEG = 7,
27 OP_CB_RECALL_ANY = 8,
28 OP_CB_RECALLABLE_OBJ_AVAIL = 9,
29 OP_CB_RECALL_SLOT = 10,
30 OP_CB_SEQUENCE = 11,
31 OP_CB_WANTS_CANCELLED = 12,
32 OP_CB_NOTIFY_LOCK = 13,
33 OP_CB_NOTIFY_DEVICEID = 14,
23 OP_CB_ILLEGAL = 10044, 34 OP_CB_ILLEGAL = 10044,
24}; 35};
25 36
26struct cb_compound_hdr_arg { 37struct cb_compound_hdr_arg {
27 unsigned int taglen; 38 unsigned int taglen;
28 const char *tag; 39 const char *tag;
29 unsigned int callback_ident; 40 unsigned int minorversion;
30 unsigned nops; 41 unsigned nops;
31}; 42};
32 43
@@ -59,16 +70,59 @@ struct cb_recallargs {
59 uint32_t truncate; 70 uint32_t truncate;
60}; 71};
61 72
73#if defined(CONFIG_NFS_V4_1)
74
75struct referring_call {
76 uint32_t rc_sequenceid;
77 uint32_t rc_slotid;
78};
79
80struct referring_call_list {
81 struct nfs4_sessionid rcl_sessionid;
82 uint32_t rcl_nrefcalls;
83 struct referring_call *rcl_refcalls;
84};
85
86struct cb_sequenceargs {
87 struct sockaddr *csa_addr;
88 struct nfs4_sessionid csa_sessionid;
89 uint32_t csa_sequenceid;
90 uint32_t csa_slotid;
91 uint32_t csa_highestslotid;
92 uint32_t csa_cachethis;
93 uint32_t csa_nrclists;
94 struct referring_call_list *csa_rclists;
95};
96
97struct cb_sequenceres {
98 __be32 csr_status;
99 struct nfs4_sessionid csr_sessionid;
100 uint32_t csr_sequenceid;
101 uint32_t csr_slotid;
102 uint32_t csr_highestslotid;
103 uint32_t csr_target_highestslotid;
104};
105
106extern unsigned nfs4_callback_sequence(struct cb_sequenceargs *args,
107 struct cb_sequenceres *res);
108
109#endif /* CONFIG_NFS_V4_1 */
110
62extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res); 111extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
63extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy); 112extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy);
64 113
65#ifdef CONFIG_NFS_V4 114#ifdef CONFIG_NFS_V4
66extern int nfs_callback_up(void); 115extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt);
67extern void nfs_callback_down(void); 116extern void nfs_callback_down(int minorversion);
68#else 117#endif /* CONFIG_NFS_V4 */
69#define nfs_callback_up() (0) 118
70#define nfs_callback_down() do {} while(0) 119/*
71#endif 120 * nfs41: Callbacks are expected to not cause substantial latency,
121 * so we limit their concurrency to 1 by setting up the maximum number
122 * of slots for the backchannel.
123 */
124#define NFS41_BC_MIN_CALLBACKS 1
125#define NFS41_BC_MAX_CALLBACKS 1
72 126
73extern unsigned int nfs_callback_set_tcpport; 127extern unsigned int nfs_callback_set_tcpport;
74extern unsigned short nfs_callback_tcpport; 128extern unsigned short nfs_callback_tcpport;
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index f7e83e23cf9f..b7da1f54da68 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -101,3 +101,130 @@ out:
101 dprintk("%s: exit with status = %d\n", __func__, ntohl(res)); 101 dprintk("%s: exit with status = %d\n", __func__, ntohl(res));
102 return res; 102 return res;
103} 103}
104
105#if defined(CONFIG_NFS_V4_1)
106
107/*
108 * Validate the sequenceID sent by the server.
109 * Return success if the sequenceID is one more than what we last saw on
110 * this slot, accounting for wraparound. Increments the slot's sequence.
111 *
112 * We don't yet implement a duplicate request cache, so at this time
113 * we will log replays, and process them as if we had not seen them before,
114 * but we don't bump the sequence in the slot. Not too worried about it,
115 * since we only currently implement idempotent callbacks anyway.
116 *
117 * We have a single slot backchannel at this time, so we don't bother
118 * checking the used_slots bit array on the table. The lower layer guarantees
119 * a single outstanding callback request at a time.
120 */
121static int
122validate_seqid(struct nfs4_slot_table *tbl, u32 slotid, u32 seqid)
123{
124 struct nfs4_slot *slot;
125
126 dprintk("%s enter. slotid %d seqid %d\n",
127 __func__, slotid, seqid);
128
129 if (slotid > NFS41_BC_MAX_CALLBACKS)
130 return htonl(NFS4ERR_BADSLOT);
131
132 slot = tbl->slots + slotid;
133 dprintk("%s slot table seqid: %d\n", __func__, slot->seq_nr);
134
135 /* Normal */
136 if (likely(seqid == slot->seq_nr + 1)) {
137 slot->seq_nr++;
138 return htonl(NFS4_OK);
139 }
140
141 /* Replay */
142 if (seqid == slot->seq_nr) {
143 dprintk("%s seqid %d is a replay - no DRC available\n",
144 __func__, seqid);
145 return htonl(NFS4_OK);
146 }
147
148 /* Wraparound */
149 if (seqid == 1 && (slot->seq_nr + 1) == 0) {
150 slot->seq_nr = 1;
151 return htonl(NFS4_OK);
152 }
153
154 /* Misordered request */
155 return htonl(NFS4ERR_SEQ_MISORDERED);
156}
157
158/*
159 * Returns a pointer to a held 'struct nfs_client' that matches the server's
160 * address, major version number, and session ID. It is the caller's
161 * responsibility to release the returned reference.
162 *
163 * Returns NULL if there are no connections with sessions, or if no session
164 * matches the one of interest.
165 */
166 static struct nfs_client *find_client_with_session(
167 const struct sockaddr *addr, u32 nfsversion,
168 struct nfs4_sessionid *sessionid)
169{
170 struct nfs_client *clp;
171
172 clp = nfs_find_client(addr, 4);
173 if (clp == NULL)
174 return NULL;
175
176 do {
177 struct nfs_client *prev = clp;
178
179 if (clp->cl_session != NULL) {
180 if (memcmp(clp->cl_session->sess_id.data,
181 sessionid->data,
182 NFS4_MAX_SESSIONID_LEN) == 0) {
183 /* Returns a held reference to clp */
184 return clp;
185 }
186 }
187 clp = nfs_find_client_next(prev);
188 nfs_put_client(prev);
189 } while (clp != NULL);
190
191 return NULL;
192}
193
194/* FIXME: referring calls should be processed */
195unsigned nfs4_callback_sequence(struct cb_sequenceargs *args,
196 struct cb_sequenceres *res)
197{
198 struct nfs_client *clp;
199 int i, status;
200
201 for (i = 0; i < args->csa_nrclists; i++)
202 kfree(args->csa_rclists[i].rcl_refcalls);
203 kfree(args->csa_rclists);
204
205 status = htonl(NFS4ERR_BADSESSION);
206 clp = find_client_with_session(args->csa_addr, 4, &args->csa_sessionid);
207 if (clp == NULL)
208 goto out;
209
210 status = validate_seqid(&clp->cl_session->bc_slot_table,
211 args->csa_slotid, args->csa_sequenceid);
212 if (status)
213 goto out_putclient;
214
215 memcpy(&res->csr_sessionid, &args->csa_sessionid,
216 sizeof(res->csr_sessionid));
217 res->csr_sequenceid = args->csa_sequenceid;
218 res->csr_slotid = args->csa_slotid;
219 res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
220 res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
221
222out_putclient:
223 nfs_put_client(clp);
224out:
225 dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
226 res->csr_status = status;
227 return res->csr_status;
228}
229
230#endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index dd0ef34b5845..e5a2dac5f715 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -20,6 +20,11 @@
20 2 + 2 + 3 + 3) 20 2 + 2 + 3 + 3)
21#define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) 21#define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
22 22
23#if defined(CONFIG_NFS_V4_1)
24#define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \
25 4 + 1 + 3)
26#endif /* CONFIG_NFS_V4_1 */
27
23#define NFSDBG_FACILITY NFSDBG_CALLBACK 28#define NFSDBG_FACILITY NFSDBG_CALLBACK
24 29
25typedef __be32 (*callback_process_op_t)(void *, void *); 30typedef __be32 (*callback_process_op_t)(void *, void *);
@@ -132,7 +137,6 @@ static __be32 decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
132static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound_hdr_arg *hdr) 137static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound_hdr_arg *hdr)
133{ 138{
134 __be32 *p; 139 __be32 *p;
135 unsigned int minor_version;
136 __be32 status; 140 __be32 status;
137 141
138 status = decode_string(xdr, &hdr->taglen, &hdr->tag); 142 status = decode_string(xdr, &hdr->taglen, &hdr->tag);
@@ -147,15 +151,19 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
147 p = read_buf(xdr, 12); 151 p = read_buf(xdr, 12);
148 if (unlikely(p == NULL)) 152 if (unlikely(p == NULL))
149 return htonl(NFS4ERR_RESOURCE); 153 return htonl(NFS4ERR_RESOURCE);
150 minor_version = ntohl(*p++); 154 hdr->minorversion = ntohl(*p++);
151 /* Check minor version is zero. */ 155 /* Check minor version is zero or one. */
152 if (minor_version != 0) { 156 if (hdr->minorversion <= 1) {
153 printk(KERN_WARNING "%s: NFSv4 server callback with illegal minor version %u!\n", 157 p++; /* skip callback_ident */
154 __func__, minor_version); 158 } else {
159 printk(KERN_WARNING "%s: NFSv4 server callback with "
160 "illegal minor version %u!\n",
161 __func__, hdr->minorversion);
155 return htonl(NFS4ERR_MINOR_VERS_MISMATCH); 162 return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
156 } 163 }
157 hdr->callback_ident = ntohl(*p++);
158 hdr->nops = ntohl(*p); 164 hdr->nops = ntohl(*p);
165 dprintk("%s: minorversion %d nops %d\n", __func__,
166 hdr->minorversion, hdr->nops);
159 return 0; 167 return 0;
160} 168}
161 169
@@ -204,6 +212,122 @@ out:
204 return status; 212 return status;
205} 213}
206 214
215#if defined(CONFIG_NFS_V4_1)
216
217static unsigned decode_sessionid(struct xdr_stream *xdr,
218 struct nfs4_sessionid *sid)
219{
220 uint32_t *p;
221 int len = NFS4_MAX_SESSIONID_LEN;
222
223 p = read_buf(xdr, len);
224 if (unlikely(p == NULL))
225 return htonl(NFS4ERR_RESOURCE);;
226
227 memcpy(sid->data, p, len);
228 return 0;
229}
230
231static unsigned decode_rc_list(struct xdr_stream *xdr,
232 struct referring_call_list *rc_list)
233{
234 uint32_t *p;
235 int i;
236 unsigned status;
237
238 status = decode_sessionid(xdr, &rc_list->rcl_sessionid);
239 if (status)
240 goto out;
241
242 status = htonl(NFS4ERR_RESOURCE);
243 p = read_buf(xdr, sizeof(uint32_t));
244 if (unlikely(p == NULL))
245 goto out;
246
247 rc_list->rcl_nrefcalls = ntohl(*p++);
248 if (rc_list->rcl_nrefcalls) {
249 p = read_buf(xdr,
250 rc_list->rcl_nrefcalls * 2 * sizeof(uint32_t));
251 if (unlikely(p == NULL))
252 goto out;
253 rc_list->rcl_refcalls = kmalloc(rc_list->rcl_nrefcalls *
254 sizeof(*rc_list->rcl_refcalls),
255 GFP_KERNEL);
256 if (unlikely(rc_list->rcl_refcalls == NULL))
257 goto out;
258 for (i = 0; i < rc_list->rcl_nrefcalls; i++) {
259 rc_list->rcl_refcalls[i].rc_sequenceid = ntohl(*p++);
260 rc_list->rcl_refcalls[i].rc_slotid = ntohl(*p++);
261 }
262 }
263 status = 0;
264
265out:
266 return status;
267}
268
269static unsigned decode_cb_sequence_args(struct svc_rqst *rqstp,
270 struct xdr_stream *xdr,
271 struct cb_sequenceargs *args)
272{
273 uint32_t *p;
274 int i;
275 unsigned status;
276
277 status = decode_sessionid(xdr, &args->csa_sessionid);
278 if (status)
279 goto out;
280
281 status = htonl(NFS4ERR_RESOURCE);
282 p = read_buf(xdr, 5 * sizeof(uint32_t));
283 if (unlikely(p == NULL))
284 goto out;
285
286 args->csa_addr = svc_addr(rqstp);
287 args->csa_sequenceid = ntohl(*p++);
288 args->csa_slotid = ntohl(*p++);
289 args->csa_highestslotid = ntohl(*p++);
290 args->csa_cachethis = ntohl(*p++);
291 args->csa_nrclists = ntohl(*p++);
292 args->csa_rclists = NULL;
293 if (args->csa_nrclists) {
294 args->csa_rclists = kmalloc(args->csa_nrclists *
295 sizeof(*args->csa_rclists),
296 GFP_KERNEL);
297 if (unlikely(args->csa_rclists == NULL))
298 goto out;
299
300 for (i = 0; i < args->csa_nrclists; i++) {
301 status = decode_rc_list(xdr, &args->csa_rclists[i]);
302 if (status)
303 goto out_free;
304 }
305 }
306 status = 0;
307
308 dprintk("%s: sessionid %x:%x:%x:%x sequenceid %u slotid %u "
309 "highestslotid %u cachethis %d nrclists %u\n",
310 __func__,
311 ((u32 *)&args->csa_sessionid)[0],
312 ((u32 *)&args->csa_sessionid)[1],
313 ((u32 *)&args->csa_sessionid)[2],
314 ((u32 *)&args->csa_sessionid)[3],
315 args->csa_sequenceid, args->csa_slotid,
316 args->csa_highestslotid, args->csa_cachethis,
317 args->csa_nrclists);
318out:
319 dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
320 return status;
321
322out_free:
323 for (i = 0; i < args->csa_nrclists; i++)
324 kfree(args->csa_rclists[i].rcl_refcalls);
325 kfree(args->csa_rclists);
326 goto out;
327}
328
329#endif /* CONFIG_NFS_V4_1 */
330
207static __be32 encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) 331static __be32 encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
208{ 332{
209 __be32 *p; 333 __be32 *p;
@@ -353,31 +477,134 @@ out:
353 return status; 477 return status;
354} 478}
355 479
356static __be32 process_op(struct svc_rqst *rqstp, 480#if defined(CONFIG_NFS_V4_1)
481
482static unsigned encode_sessionid(struct xdr_stream *xdr,
483 const struct nfs4_sessionid *sid)
484{
485 uint32_t *p;
486 int len = NFS4_MAX_SESSIONID_LEN;
487
488 p = xdr_reserve_space(xdr, len);
489 if (unlikely(p == NULL))
490 return htonl(NFS4ERR_RESOURCE);
491
492 memcpy(p, sid, len);
493 return 0;
494}
495
496static unsigned encode_cb_sequence_res(struct svc_rqst *rqstp,
497 struct xdr_stream *xdr,
498 const struct cb_sequenceres *res)
499{
500 uint32_t *p;
501 unsigned status = res->csr_status;
502
503 if (unlikely(status != 0))
504 goto out;
505
506 encode_sessionid(xdr, &res->csr_sessionid);
507
508 p = xdr_reserve_space(xdr, 4 * sizeof(uint32_t));
509 if (unlikely(p == NULL))
510 return htonl(NFS4ERR_RESOURCE);
511
512 *p++ = htonl(res->csr_sequenceid);
513 *p++ = htonl(res->csr_slotid);
514 *p++ = htonl(res->csr_highestslotid);
515 *p++ = htonl(res->csr_target_highestslotid);
516out:
517 dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
518 return status;
519}
520
521static __be32
522preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
523{
524 if (op_nr == OP_CB_SEQUENCE) {
525 if (nop != 0)
526 return htonl(NFS4ERR_SEQUENCE_POS);
527 } else {
528 if (nop == 0)
529 return htonl(NFS4ERR_OP_NOT_IN_SESSION);
530 }
531
532 switch (op_nr) {
533 case OP_CB_GETATTR:
534 case OP_CB_RECALL:
535 case OP_CB_SEQUENCE:
536 *op = &callback_ops[op_nr];
537 break;
538
539 case OP_CB_LAYOUTRECALL:
540 case OP_CB_NOTIFY_DEVICEID:
541 case OP_CB_NOTIFY:
542 case OP_CB_PUSH_DELEG:
543 case OP_CB_RECALL_ANY:
544 case OP_CB_RECALLABLE_OBJ_AVAIL:
545 case OP_CB_RECALL_SLOT:
546 case OP_CB_WANTS_CANCELLED:
547 case OP_CB_NOTIFY_LOCK:
548 return htonl(NFS4ERR_NOTSUPP);
549
550 default:
551 return htonl(NFS4ERR_OP_ILLEGAL);
552 }
553
554 return htonl(NFS_OK);
555}
556
557#else /* CONFIG_NFS_V4_1 */
558
559static __be32
560preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
561{
562 return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
563}
564
565#endif /* CONFIG_NFS_V4_1 */
566
567static __be32
568preprocess_nfs4_op(unsigned int op_nr, struct callback_op **op)
569{
570 switch (op_nr) {
571 case OP_CB_GETATTR:
572 case OP_CB_RECALL:
573 *op = &callback_ops[op_nr];
574 break;
575 default:
576 return htonl(NFS4ERR_OP_ILLEGAL);
577 }
578
579 return htonl(NFS_OK);
580}
581
582static __be32 process_op(uint32_t minorversion, int nop,
583 struct svc_rqst *rqstp,
357 struct xdr_stream *xdr_in, void *argp, 584 struct xdr_stream *xdr_in, void *argp,
358 struct xdr_stream *xdr_out, void *resp) 585 struct xdr_stream *xdr_out, void *resp)
359{ 586{
360 struct callback_op *op = &callback_ops[0]; 587 struct callback_op *op = &callback_ops[0];
361 unsigned int op_nr = OP_CB_ILLEGAL; 588 unsigned int op_nr = OP_CB_ILLEGAL;
362 __be32 status = 0; 589 __be32 status;
363 long maxlen; 590 long maxlen;
364 __be32 res; 591 __be32 res;
365 592
366 dprintk("%s: start\n", __func__); 593 dprintk("%s: start\n", __func__);
367 status = decode_op_hdr(xdr_in, &op_nr); 594 status = decode_op_hdr(xdr_in, &op_nr);
368 if (likely(status == 0)) { 595 if (unlikely(status)) {
369 switch (op_nr) { 596 status = htonl(NFS4ERR_OP_ILLEGAL);
370 case OP_CB_GETATTR: 597 goto out;
371 case OP_CB_RECALL:
372 op = &callback_ops[op_nr];
373 break;
374 default:
375 op_nr = OP_CB_ILLEGAL;
376 op = &callback_ops[0];
377 status = htonl(NFS4ERR_OP_ILLEGAL);
378 }
379 } 598 }
380 599
600 dprintk("%s: minorversion=%d nop=%d op_nr=%u\n",
601 __func__, minorversion, nop, op_nr);
602
603 status = minorversion ? preprocess_nfs41_op(nop, op_nr, &op) :
604 preprocess_nfs4_op(op_nr, &op);
605 if (status == htonl(NFS4ERR_OP_ILLEGAL))
606 op_nr = OP_CB_ILLEGAL;
607out:
381 maxlen = xdr_out->end - xdr_out->p; 608 maxlen = xdr_out->end - xdr_out->p;
382 if (maxlen > 0 && maxlen < PAGE_SIZE) { 609 if (maxlen > 0 && maxlen < PAGE_SIZE) {
383 if (likely(status == 0 && op->decode_args != NULL)) 610 if (likely(status == 0 && op->decode_args != NULL))
@@ -425,7 +652,8 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
425 return rpc_system_err; 652 return rpc_system_err;
426 653
427 while (status == 0 && nops != hdr_arg.nops) { 654 while (status == 0 && nops != hdr_arg.nops) {
428 status = process_op(rqstp, &xdr_in, argp, &xdr_out, resp); 655 status = process_op(hdr_arg.minorversion, nops,
656 rqstp, &xdr_in, argp, &xdr_out, resp);
429 nops++; 657 nops++;
430 } 658 }
431 659
@@ -452,7 +680,15 @@ static struct callback_op callback_ops[] = {
452 .process_op = (callback_process_op_t)nfs4_callback_recall, 680 .process_op = (callback_process_op_t)nfs4_callback_recall,
453 .decode_args = (callback_decode_arg_t)decode_recall_args, 681 .decode_args = (callback_decode_arg_t)decode_recall_args,
454 .res_maxsize = CB_OP_RECALL_RES_MAXSZ, 682 .res_maxsize = CB_OP_RECALL_RES_MAXSZ,
455 } 683 },
684#if defined(CONFIG_NFS_V4_1)
685 [OP_CB_SEQUENCE] = {
686 .process_op = (callback_process_op_t)nfs4_callback_sequence,
687 .decode_args = (callback_decode_arg_t)decode_cb_sequence_args,
688 .encode_res = (callback_encode_res_t)encode_cb_sequence_res,
689 .res_maxsize = CB_OP_SEQUENCE_RES_MAXSZ,
690 },
691#endif /* CONFIG_NFS_V4_1 */
456}; 692};
457 693
458/* 694/*
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 75c9cd2aa119..c2d061675d80 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -37,6 +37,7 @@
37#include <linux/in6.h> 37#include <linux/in6.h>
38#include <net/ipv6.h> 38#include <net/ipv6.h>
39#include <linux/nfs_xdr.h> 39#include <linux/nfs_xdr.h>
40#include <linux/sunrpc/bc_xprt.h>
40 41
41#include <asm/system.h> 42#include <asm/system.h>
42 43
@@ -102,6 +103,7 @@ struct nfs_client_initdata {
102 size_t addrlen; 103 size_t addrlen;
103 const struct nfs_rpc_ops *rpc_ops; 104 const struct nfs_rpc_ops *rpc_ops;
104 int proto; 105 int proto;
106 u32 minorversion;
105}; 107};
106 108
107/* 109/*
@@ -114,18 +116,13 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
114{ 116{
115 struct nfs_client *clp; 117 struct nfs_client *clp;
116 struct rpc_cred *cred; 118 struct rpc_cred *cred;
119 int err = -ENOMEM;
117 120
118 if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) 121 if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
119 goto error_0; 122 goto error_0;
120 123
121 clp->rpc_ops = cl_init->rpc_ops; 124 clp->rpc_ops = cl_init->rpc_ops;
122 125
123 if (cl_init->rpc_ops->version == 4) {
124 if (nfs_callback_up() < 0)
125 goto error_2;
126 __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state);
127 }
128
129 atomic_set(&clp->cl_count, 1); 126 atomic_set(&clp->cl_count, 1);
130 clp->cl_cons_state = NFS_CS_INITING; 127 clp->cl_cons_state = NFS_CS_INITING;
131 128
@@ -133,9 +130,10 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
133 clp->cl_addrlen = cl_init->addrlen; 130 clp->cl_addrlen = cl_init->addrlen;
134 131
135 if (cl_init->hostname) { 132 if (cl_init->hostname) {
133 err = -ENOMEM;
136 clp->cl_hostname = kstrdup(cl_init->hostname, GFP_KERNEL); 134 clp->cl_hostname = kstrdup(cl_init->hostname, GFP_KERNEL);
137 if (!clp->cl_hostname) 135 if (!clp->cl_hostname)
138 goto error_3; 136 goto error_cleanup;
139 } 137 }
140 138
141 INIT_LIST_HEAD(&clp->cl_superblocks); 139 INIT_LIST_HEAD(&clp->cl_superblocks);
@@ -150,6 +148,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
150 rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); 148 rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
151 clp->cl_boot_time = CURRENT_TIME; 149 clp->cl_boot_time = CURRENT_TIME;
152 clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; 150 clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
151 clp->cl_minorversion = cl_init->minorversion;
153#endif 152#endif
154 cred = rpc_lookup_machine_cred(); 153 cred = rpc_lookup_machine_cred();
155 if (!IS_ERR(cred)) 154 if (!IS_ERR(cred))
@@ -159,13 +158,10 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
159 158
160 return clp; 159 return clp;
161 160
162error_3: 161error_cleanup:
163 if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
164 nfs_callback_down();
165error_2:
166 kfree(clp); 162 kfree(clp);
167error_0: 163error_0:
168 return NULL; 164 return ERR_PTR(err);
169} 165}
170 166
171static void nfs4_shutdown_client(struct nfs_client *clp) 167static void nfs4_shutdown_client(struct nfs_client *clp)
@@ -182,12 +178,42 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
182} 178}
183 179
184/* 180/*
181 * Destroy the NFS4 callback service
182 */
183static void nfs4_destroy_callback(struct nfs_client *clp)
184{
185#ifdef CONFIG_NFS_V4
186 if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
187 nfs_callback_down(clp->cl_minorversion);
188#endif /* CONFIG_NFS_V4 */
189}
190
191/*
192 * Clears/puts all minor version specific parts from an nfs_client struct
193 * reverting it to minorversion 0.
194 */
195static void nfs4_clear_client_minor_version(struct nfs_client *clp)
196{
197#ifdef CONFIG_NFS_V4_1
198 if (nfs4_has_session(clp)) {
199 nfs4_destroy_session(clp->cl_session);
200 clp->cl_session = NULL;
201 }
202
203 clp->cl_call_sync = _nfs4_call_sync;
204#endif /* CONFIG_NFS_V4_1 */
205
206 nfs4_destroy_callback(clp);
207}
208
209/*
185 * Destroy a shared client record 210 * Destroy a shared client record
186 */ 211 */
187static void nfs_free_client(struct nfs_client *clp) 212static void nfs_free_client(struct nfs_client *clp)
188{ 213{
189 dprintk("--> nfs_free_client(%u)\n", clp->rpc_ops->version); 214 dprintk("--> nfs_free_client(%u)\n", clp->rpc_ops->version);
190 215
216 nfs4_clear_client_minor_version(clp);
191 nfs4_shutdown_client(clp); 217 nfs4_shutdown_client(clp);
192 218
193 nfs_fscache_release_client_cookie(clp); 219 nfs_fscache_release_client_cookie(clp);
@@ -196,9 +222,6 @@ static void nfs_free_client(struct nfs_client *clp)
196 if (!IS_ERR(clp->cl_rpcclient)) 222 if (!IS_ERR(clp->cl_rpcclient))
197 rpc_shutdown_client(clp->cl_rpcclient); 223 rpc_shutdown_client(clp->cl_rpcclient);
198 224
199 if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
200 nfs_callback_down();
201
202 if (clp->cl_machine_cred != NULL) 225 if (clp->cl_machine_cred != NULL)
203 put_rpccred(clp->cl_machine_cred); 226 put_rpccred(clp->cl_machine_cred);
204 227
@@ -347,7 +370,8 @@ struct nfs_client *nfs_find_client(const struct sockaddr *addr, u32 nfsversion)
347 struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; 370 struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
348 371
349 /* Don't match clients that failed to initialise properly */ 372 /* Don't match clients that failed to initialise properly */
350 if (clp->cl_cons_state != NFS_CS_READY) 373 if (!(clp->cl_cons_state == NFS_CS_READY ||
374 clp->cl_cons_state == NFS_CS_SESSION_INITING))
351 continue; 375 continue;
352 376
353 /* Different NFS versions cannot share the same nfs_client */ 377 /* Different NFS versions cannot share the same nfs_client */
@@ -420,7 +444,9 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
420 444
421 if (clp->cl_proto != data->proto) 445 if (clp->cl_proto != data->proto)
422 continue; 446 continue;
423 447 /* Match nfsv4 minorversion */
448 if (clp->cl_minorversion != data->minorversion)
449 continue;
424 /* Match the full socket address */ 450 /* Match the full socket address */
425 if (!nfs_sockaddr_cmp(sap, clap)) 451 if (!nfs_sockaddr_cmp(sap, clap))
426 continue; 452 continue;
@@ -456,9 +482,10 @@ static struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_in
456 spin_unlock(&nfs_client_lock); 482 spin_unlock(&nfs_client_lock);
457 483
458 new = nfs_alloc_client(cl_init); 484 new = nfs_alloc_client(cl_init);
459 } while (new); 485 } while (!IS_ERR(new));
460 486
461 return ERR_PTR(-ENOMEM); 487 dprintk("--> nfs_get_client() = %ld [failed]\n", PTR_ERR(new));
488 return new;
462 489
463 /* install a new client and return with it unready */ 490 /* install a new client and return with it unready */
464install_client: 491install_client:
@@ -478,7 +505,7 @@ found_client:
478 nfs_free_client(new); 505 nfs_free_client(new);
479 506
480 error = wait_event_killable(nfs_client_active_wq, 507 error = wait_event_killable(nfs_client_active_wq,
481 clp->cl_cons_state != NFS_CS_INITING); 508 clp->cl_cons_state < NFS_CS_INITING);
482 if (error < 0) { 509 if (error < 0) {
483 nfs_put_client(clp); 510 nfs_put_client(clp);
484 return ERR_PTR(-ERESTARTSYS); 511 return ERR_PTR(-ERESTARTSYS);
@@ -499,13 +526,29 @@ found_client:
499/* 526/*
500 * Mark a server as ready or failed 527 * Mark a server as ready or failed
501 */ 528 */
502static void nfs_mark_client_ready(struct nfs_client *clp, int state) 529void nfs_mark_client_ready(struct nfs_client *clp, int state)
503{ 530{
504 clp->cl_cons_state = state; 531 clp->cl_cons_state = state;
505 wake_up_all(&nfs_client_active_wq); 532 wake_up_all(&nfs_client_active_wq);
506} 533}
507 534
508/* 535/*
536 * With sessions, the client is not marked ready until after a
537 * successful EXCHANGE_ID and CREATE_SESSION.
538 *
539 * Map errors cl_cons_state errors to EPROTONOSUPPORT to indicate
540 * other versions of NFS can be tried.
541 */
542int nfs4_check_client_ready(struct nfs_client *clp)
543{
544 if (!nfs4_has_session(clp))
545 return 0;
546 if (clp->cl_cons_state < NFS_CS_READY)
547 return -EPROTONOSUPPORT;
548 return 0;
549}
550
551/*
509 * Initialise the timeout values for a connection 552 * Initialise the timeout values for a connection
510 */ 553 */
511static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, 554static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
@@ -1050,6 +1093,61 @@ error:
1050 1093
1051#ifdef CONFIG_NFS_V4 1094#ifdef CONFIG_NFS_V4
1052/* 1095/*
1096 * Initialize the NFS4 callback service
1097 */
1098static int nfs4_init_callback(struct nfs_client *clp)
1099{
1100 int error;
1101
1102 if (clp->rpc_ops->version == 4) {
1103 if (nfs4_has_session(clp)) {
1104 error = xprt_setup_backchannel(
1105 clp->cl_rpcclient->cl_xprt,
1106 NFS41_BC_MIN_CALLBACKS);
1107 if (error < 0)
1108 return error;
1109 }
1110
1111 error = nfs_callback_up(clp->cl_minorversion,
1112 clp->cl_rpcclient->cl_xprt);
1113 if (error < 0) {
1114 dprintk("%s: failed to start callback. Error = %d\n",
1115 __func__, error);
1116 return error;
1117 }
1118 __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state);
1119 }
1120 return 0;
1121}
1122
1123/*
1124 * Initialize the minor version specific parts of an NFS4 client record
1125 */
1126static int nfs4_init_client_minor_version(struct nfs_client *clp)
1127{
1128 clp->cl_call_sync = _nfs4_call_sync;
1129
1130#if defined(CONFIG_NFS_V4_1)
1131 if (clp->cl_minorversion) {
1132 struct nfs4_session *session = NULL;
1133 /*
1134 * Create the session and mark it expired.
1135 * When a SEQUENCE operation encounters the expired session
1136 * it will do session recovery to initialize it.
1137 */
1138 session = nfs4_alloc_session(clp);
1139 if (!session)
1140 return -ENOMEM;
1141
1142 clp->cl_session = session;
1143 clp->cl_call_sync = _nfs4_call_sync_session;
1144 }
1145#endif /* CONFIG_NFS_V4_1 */
1146
1147 return nfs4_init_callback(clp);
1148}
1149
1150/*
1053 * Initialise an NFS4 client record 1151 * Initialise an NFS4 client record
1054 */ 1152 */
1055static int nfs4_init_client(struct nfs_client *clp, 1153static int nfs4_init_client(struct nfs_client *clp,
@@ -1083,7 +1181,12 @@ static int nfs4_init_client(struct nfs_client *clp,
1083 } 1181 }
1084 __set_bit(NFS_CS_IDMAP, &clp->cl_res_state); 1182 __set_bit(NFS_CS_IDMAP, &clp->cl_res_state);
1085 1183
1086 nfs_mark_client_ready(clp, NFS_CS_READY); 1184 error = nfs4_init_client_minor_version(clp);
1185 if (error < 0)
1186 goto error;
1187
1188 if (!nfs4_has_session(clp))
1189 nfs_mark_client_ready(clp, NFS_CS_READY);
1087 return 0; 1190 return 0;
1088 1191
1089error: 1192error:
@@ -1101,7 +1204,8 @@ static int nfs4_set_client(struct nfs_server *server,
1101 const size_t addrlen, 1204 const size_t addrlen,
1102 const char *ip_addr, 1205 const char *ip_addr,
1103 rpc_authflavor_t authflavour, 1206 rpc_authflavor_t authflavour,
1104 int proto, const struct rpc_timeout *timeparms) 1207 int proto, const struct rpc_timeout *timeparms,
1208 u32 minorversion)
1105{ 1209{
1106 struct nfs_client_initdata cl_init = { 1210 struct nfs_client_initdata cl_init = {
1107 .hostname = hostname, 1211 .hostname = hostname,
@@ -1109,6 +1213,7 @@ static int nfs4_set_client(struct nfs_server *server,
1109 .addrlen = addrlen, 1213 .addrlen = addrlen,
1110 .rpc_ops = &nfs_v4_clientops, 1214 .rpc_ops = &nfs_v4_clientops,
1111 .proto = proto, 1215 .proto = proto,
1216 .minorversion = minorversion,
1112 }; 1217 };
1113 struct nfs_client *clp; 1218 struct nfs_client *clp;
1114 int error; 1219 int error;
@@ -1138,6 +1243,36 @@ error:
1138} 1243}
1139 1244
1140/* 1245/*
1246 * Initialize a session.
1247 * Note: save the mount rsize and wsize for create_server negotiation.
1248 */
1249static void nfs4_init_session(struct nfs_client *clp,
1250 unsigned int wsize, unsigned int rsize)
1251{
1252#if defined(CONFIG_NFS_V4_1)
1253 if (nfs4_has_session(clp)) {
1254 clp->cl_session->fc_attrs.max_rqst_sz = wsize;
1255 clp->cl_session->fc_attrs.max_resp_sz = rsize;
1256 }
1257#endif /* CONFIG_NFS_V4_1 */
1258}
1259
1260/*
1261 * Session has been established, and the client marked ready.
1262 * Set the mount rsize and wsize with negotiated fore channel
1263 * attributes which will be bound checked in nfs_server_set_fsinfo.
1264 */
1265static void nfs4_session_set_rwsize(struct nfs_server *server)
1266{
1267#ifdef CONFIG_NFS_V4_1
1268 if (!nfs4_has_session(server->nfs_client))
1269 return;
1270 server->rsize = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
1271 server->wsize = server->nfs_client->cl_session->fc_attrs.max_rqst_sz;
1272#endif /* CONFIG_NFS_V4_1 */
1273}
1274
1275/*
1141 * Create a version 4 volume record 1276 * Create a version 4 volume record
1142 */ 1277 */
1143static int nfs4_init_server(struct nfs_server *server, 1278static int nfs4_init_server(struct nfs_server *server,
@@ -1164,7 +1299,8 @@ static int nfs4_init_server(struct nfs_server *server,
1164 data->client_address, 1299 data->client_address,
1165 data->auth_flavors[0], 1300 data->auth_flavors[0],
1166 data->nfs_server.protocol, 1301 data->nfs_server.protocol,
1167 &timeparms); 1302 &timeparms,
1303 data->minorversion);
1168 if (error < 0) 1304 if (error < 0)
1169 goto error; 1305 goto error;
1170 1306
@@ -1214,6 +1350,8 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
1214 BUG_ON(!server->nfs_client->rpc_ops); 1350 BUG_ON(!server->nfs_client->rpc_ops);
1215 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); 1351 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
1216 1352
1353 nfs4_init_session(server->nfs_client, server->wsize, server->rsize);
1354
1217 /* Probe the root fh to retrieve its FSID */ 1355 /* Probe the root fh to retrieve its FSID */
1218 error = nfs4_path_walk(server, mntfh, data->nfs_server.export_path); 1356 error = nfs4_path_walk(server, mntfh, data->nfs_server.export_path);
1219 if (error < 0) 1357 if (error < 0)
@@ -1224,6 +1362,8 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
1224 (unsigned long long) server->fsid.minor); 1362 (unsigned long long) server->fsid.minor);
1225 dprintk("Mount FH: %d\n", mntfh->size); 1363 dprintk("Mount FH: %d\n", mntfh->size);
1226 1364
1365 nfs4_session_set_rwsize(server);
1366
1227 error = nfs_probe_fsinfo(server, mntfh, &fattr); 1367 error = nfs_probe_fsinfo(server, mntfh, &fattr);
1228 if (error < 0) 1368 if (error < 0)
1229 goto error; 1369 goto error;
@@ -1282,7 +1422,8 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1282 parent_client->cl_ipaddr, 1422 parent_client->cl_ipaddr,
1283 data->authflavor, 1423 data->authflavor,
1284 parent_server->client->cl_xprt->prot, 1424 parent_server->client->cl_xprt->prot,
1285 parent_server->client->cl_timeout); 1425 parent_server->client->cl_timeout,
1426 parent_client->cl_minorversion);
1286 if (error < 0) 1427 if (error < 0)
1287 goto error; 1428 goto error;
1288 1429
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 968225a88015..af05b918cb5b 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -68,29 +68,26 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
68{ 68{
69 struct inode *inode = state->inode; 69 struct inode *inode = state->inode;
70 struct file_lock *fl; 70 struct file_lock *fl;
71 int status; 71 int status = 0;
72
73 if (inode->i_flock == NULL)
74 goto out;
72 75
76 /* Protect inode->i_flock using the BKL */
77 lock_kernel();
73 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { 78 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
74 if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) 79 if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
75 continue; 80 continue;
76 if (nfs_file_open_context(fl->fl_file) != ctx) 81 if (nfs_file_open_context(fl->fl_file) != ctx)
77 continue; 82 continue;
83 unlock_kernel();
78 status = nfs4_lock_delegation_recall(state, fl); 84 status = nfs4_lock_delegation_recall(state, fl);
79 if (status >= 0) 85 if (status < 0)
80 continue; 86 goto out;
81 switch (status) { 87 lock_kernel();
82 default:
83 printk(KERN_ERR "%s: unhandled error %d.\n",
84 __func__, status);
85 case -NFS4ERR_EXPIRED:
86 /* kill_proc(fl->fl_pid, SIGLOST, 1); */
87 case -NFS4ERR_STALE_CLIENTID:
88 nfs4_schedule_state_recovery(NFS_SERVER(inode)->nfs_client);
89 goto out_err;
90 }
91 } 88 }
92 return 0; 89 unlock_kernel();
93out_err: 90out:
94 return status; 91 return status;
95} 92}
96 93
@@ -268,7 +265,10 @@ static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegat
268 struct nfs_inode *nfsi = NFS_I(inode); 265 struct nfs_inode *nfsi = NFS_I(inode);
269 266
270 nfs_msync_inode(inode); 267 nfs_msync_inode(inode);
271 /* Guard against new delegated open calls */ 268 /*
269 * Guard against new delegated open/lock/unlock calls and against
270 * state recovery
271 */
272 down_write(&nfsi->rwsem); 272 down_write(&nfsi->rwsem);
273 nfs_delegation_claim_opens(inode, &delegation->stateid); 273 nfs_delegation_claim_opens(inode, &delegation->stateid);
274 up_write(&nfsi->rwsem); 274 up_write(&nfsi->rwsem);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 08f6b040d289..489fc01a3204 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -259,6 +259,9 @@ static void nfs_direct_read_release(void *calldata)
259} 259}
260 260
261static const struct rpc_call_ops nfs_read_direct_ops = { 261static const struct rpc_call_ops nfs_read_direct_ops = {
262#if defined(CONFIG_NFS_V4_1)
263 .rpc_call_prepare = nfs_read_prepare,
264#endif /* CONFIG_NFS_V4_1 */
262 .rpc_call_done = nfs_direct_read_result, 265 .rpc_call_done = nfs_direct_read_result,
263 .rpc_release = nfs_direct_read_release, 266 .rpc_release = nfs_direct_read_release,
264}; 267};
@@ -535,6 +538,9 @@ static void nfs_direct_commit_release(void *calldata)
535} 538}
536 539
537static const struct rpc_call_ops nfs_commit_direct_ops = { 540static const struct rpc_call_ops nfs_commit_direct_ops = {
541#if defined(CONFIG_NFS_V4_1)
542 .rpc_call_prepare = nfs_write_prepare,
543#endif /* CONFIG_NFS_V4_1 */
538 .rpc_call_done = nfs_direct_commit_result, 544 .rpc_call_done = nfs_direct_commit_result,
539 .rpc_release = nfs_direct_commit_release, 545 .rpc_release = nfs_direct_commit_release,
540}; 546};
@@ -673,6 +679,9 @@ out_unlock:
673} 679}
674 680
675static const struct rpc_call_ops nfs_write_direct_ops = { 681static const struct rpc_call_ops nfs_write_direct_ops = {
682#if defined(CONFIG_NFS_V4_1)
683 .rpc_call_prepare = nfs_write_prepare,
684#endif /* CONFIG_NFS_V4_1 */
676 .rpc_call_done = nfs_direct_write_result, 685 .rpc_call_done = nfs_direct_write_result,
677 .rpc_release = nfs_direct_write_release, 686 .rpc_release = nfs_direct_write_release,
678}; 687};
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index ec7e27d00bc6..0055b813ec2c 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -48,6 +48,9 @@ static ssize_t nfs_file_splice_read(struct file *filp, loff_t *ppos,
48 size_t count, unsigned int flags); 48 size_t count, unsigned int flags);
49static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov, 49static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov,
50 unsigned long nr_segs, loff_t pos); 50 unsigned long nr_segs, loff_t pos);
51static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
52 struct file *filp, loff_t *ppos,
53 size_t count, unsigned int flags);
51static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov, 54static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov,
52 unsigned long nr_segs, loff_t pos); 55 unsigned long nr_segs, loff_t pos);
53static int nfs_file_flush(struct file *, fl_owner_t id); 56static int nfs_file_flush(struct file *, fl_owner_t id);
@@ -73,6 +76,7 @@ const struct file_operations nfs_file_operations = {
73 .lock = nfs_lock, 76 .lock = nfs_lock,
74 .flock = nfs_flock, 77 .flock = nfs_flock,
75 .splice_read = nfs_file_splice_read, 78 .splice_read = nfs_file_splice_read,
79 .splice_write = nfs_file_splice_write,
76 .check_flags = nfs_check_flags, 80 .check_flags = nfs_check_flags,
77 .setlease = nfs_setlease, 81 .setlease = nfs_setlease,
78}; 82};
@@ -587,12 +591,38 @@ out_swapfile:
587 goto out; 591 goto out;
588} 592}
589 593
594static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
595 struct file *filp, loff_t *ppos,
596 size_t count, unsigned int flags)
597{
598 struct dentry *dentry = filp->f_path.dentry;
599 struct inode *inode = dentry->d_inode;
600 ssize_t ret;
601
602 dprintk("NFS splice_write(%s/%s, %lu@%llu)\n",
603 dentry->d_parent->d_name.name, dentry->d_name.name,
604 (unsigned long) count, (unsigned long long) *ppos);
605
606 /*
607 * The combination of splice and an O_APPEND destination is disallowed.
608 */
609
610 nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
611
612 ret = generic_file_splice_write(pipe, filp, ppos, count, flags);
613 if (ret >= 0 && nfs_need_sync_write(filp, inode)) {
614 int err = nfs_do_fsync(nfs_file_open_context(filp), inode);
615 if (err < 0)
616 ret = err;
617 }
618 return ret;
619}
620
590static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) 621static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
591{ 622{
592 struct inode *inode = filp->f_mapping->host; 623 struct inode *inode = filp->f_mapping->host;
593 int status = 0; 624 int status = 0;
594 625
595 lock_kernel();
596 /* Try local locking first */ 626 /* Try local locking first */
597 posix_test_lock(filp, fl); 627 posix_test_lock(filp, fl);
598 if (fl->fl_type != F_UNLCK) { 628 if (fl->fl_type != F_UNLCK) {
@@ -608,7 +638,6 @@ static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
608 638
609 status = NFS_PROTO(inode)->lock(filp, cmd, fl); 639 status = NFS_PROTO(inode)->lock(filp, cmd, fl);
610out: 640out:
611 unlock_kernel();
612 return status; 641 return status;
613out_noconflict: 642out_noconflict:
614 fl->fl_type = F_UNLCK; 643 fl->fl_type = F_UNLCK;
@@ -650,13 +679,11 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
650 * If we're signalled while cleaning up locks on process exit, we 679 * If we're signalled while cleaning up locks on process exit, we
651 * still need to complete the unlock. 680 * still need to complete the unlock.
652 */ 681 */
653 lock_kernel();
654 /* Use local locking if mounted with "-onolock" */ 682 /* Use local locking if mounted with "-onolock" */
655 if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) 683 if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
656 status = NFS_PROTO(inode)->lock(filp, cmd, fl); 684 status = NFS_PROTO(inode)->lock(filp, cmd, fl);
657 else 685 else
658 status = do_vfs_lock(filp, fl); 686 status = do_vfs_lock(filp, fl);
659 unlock_kernel();
660 return status; 687 return status;
661} 688}
662 689
@@ -673,13 +700,11 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
673 if (status != 0) 700 if (status != 0)
674 goto out; 701 goto out;
675 702
676 lock_kernel();
677 /* Use local locking if mounted with "-onolock" */ 703 /* Use local locking if mounted with "-onolock" */
678 if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) 704 if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
679 status = NFS_PROTO(inode)->lock(filp, cmd, fl); 705 status = NFS_PROTO(inode)->lock(filp, cmd, fl);
680 else 706 else
681 status = do_vfs_lock(filp, fl); 707 status = do_vfs_lock(filp, fl);
682 unlock_kernel();
683 if (status < 0) 708 if (status < 0)
684 goto out; 709 goto out;
685 /* 710 /*
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index e4d6a8348adf..7dd90a6769d0 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -2,6 +2,7 @@
2 * NFS internal definitions 2 * NFS internal definitions
3 */ 3 */
4 4
5#include "nfs4_fs.h"
5#include <linux/mount.h> 6#include <linux/mount.h>
6#include <linux/security.h> 7#include <linux/security.h>
7 8
@@ -17,6 +18,18 @@ struct nfs_string;
17 */ 18 */
18#define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1) 19#define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1)
19 20
21/*
22 * Determine if sessions are in use.
23 */
24static inline int nfs4_has_session(const struct nfs_client *clp)
25{
26#ifdef CONFIG_NFS_V4_1
27 if (clp->cl_session)
28 return 1;
29#endif /* CONFIG_NFS_V4_1 */
30 return 0;
31}
32
20struct nfs_clone_mount { 33struct nfs_clone_mount {
21 const struct super_block *sb; 34 const struct super_block *sb;
22 const struct dentry *dentry; 35 const struct dentry *dentry;
@@ -30,6 +43,12 @@ struct nfs_clone_mount {
30}; 43};
31 44
32/* 45/*
46 * Note: RFC 1813 doesn't limit the number of auth flavors that
47 * a server can return, so make something up.
48 */
49#define NFS_MAX_SECFLAVORS (12)
50
51/*
33 * In-kernel mount arguments 52 * In-kernel mount arguments
34 */ 53 */
35struct nfs_parsed_mount_data { 54struct nfs_parsed_mount_data {
@@ -44,6 +63,7 @@ struct nfs_parsed_mount_data {
44 unsigned int auth_flavor_len; 63 unsigned int auth_flavor_len;
45 rpc_authflavor_t auth_flavors[1]; 64 rpc_authflavor_t auth_flavors[1];
46 char *client_address; 65 char *client_address;
66 unsigned int minorversion;
47 char *fscache_uniq; 67 char *fscache_uniq;
48 68
49 struct { 69 struct {
@@ -77,6 +97,8 @@ struct nfs_mount_request {
77 unsigned short protocol; 97 unsigned short protocol;
78 struct nfs_fh *fh; 98 struct nfs_fh *fh;
79 int noresvport; 99 int noresvport;
100 unsigned int *auth_flav_len;
101 rpc_authflavor_t *auth_flavs;
80}; 102};
81 103
82extern int nfs_mount(struct nfs_mount_request *info); 104extern int nfs_mount(struct nfs_mount_request *info);
@@ -99,6 +121,8 @@ extern void nfs_free_server(struct nfs_server *server);
99extern struct nfs_server *nfs_clone_server(struct nfs_server *, 121extern struct nfs_server *nfs_clone_server(struct nfs_server *,
100 struct nfs_fh *, 122 struct nfs_fh *,
101 struct nfs_fattr *); 123 struct nfs_fattr *);
124extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
125extern int nfs4_check_client_ready(struct nfs_client *clp);
102#ifdef CONFIG_PROC_FS 126#ifdef CONFIG_PROC_FS
103extern int __init nfs_fs_proc_init(void); 127extern int __init nfs_fs_proc_init(void);
104extern void nfs_fs_proc_exit(void); 128extern void nfs_fs_proc_exit(void);
@@ -146,6 +170,20 @@ extern __be32 * nfs_decode_dirent(__be32 *, struct nfs_entry *, int);
146extern struct rpc_procinfo nfs3_procedures[]; 170extern struct rpc_procinfo nfs3_procedures[];
147extern __be32 *nfs3_decode_dirent(__be32 *, struct nfs_entry *, int); 171extern __be32 *nfs3_decode_dirent(__be32 *, struct nfs_entry *, int);
148 172
173/* nfs4proc.c */
174static inline void nfs4_restart_rpc(struct rpc_task *task,
175 const struct nfs_client *clp)
176{
177#ifdef CONFIG_NFS_V4_1
178 if (nfs4_has_session(clp) &&
179 test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) {
180 rpc_restart_call_prepare(task);
181 return;
182 }
183#endif /* CONFIG_NFS_V4_1 */
184 rpc_restart_call(task);
185}
186
149/* nfs4xdr.c */ 187/* nfs4xdr.c */
150#ifdef CONFIG_NFS_V4 188#ifdef CONFIG_NFS_V4
151extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus); 189extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus);
@@ -205,6 +243,38 @@ extern int nfs4_path_walk(struct nfs_server *server,
205 const char *path); 243 const char *path);
206#endif 244#endif
207 245
246/* read.c */
247extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
248
249/* write.c */
250extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
251
252/* nfs4proc.c */
253extern int _nfs4_call_sync(struct nfs_server *server,
254 struct rpc_message *msg,
255 struct nfs4_sequence_args *args,
256 struct nfs4_sequence_res *res,
257 int cache_reply);
258extern int _nfs4_call_sync_session(struct nfs_server *server,
259 struct rpc_message *msg,
260 struct nfs4_sequence_args *args,
261 struct nfs4_sequence_res *res,
262 int cache_reply);
263
264#ifdef CONFIG_NFS_V4_1
265extern void nfs41_sequence_free_slot(const struct nfs_client *,
266 struct nfs4_sequence_res *res);
267#endif /* CONFIG_NFS_V4_1 */
268
269static inline void nfs4_sequence_free_slot(const struct nfs_client *clp,
270 struct nfs4_sequence_res *res)
271{
272#ifdef CONFIG_NFS_V4_1
273 if (nfs4_has_session(clp))
274 nfs41_sequence_free_slot(clp, res);
275#endif /* CONFIG_NFS_V4_1 */
276}
277
208/* 278/*
209 * Determine the device name as a string 279 * Determine the device name as a string
210 */ 280 */
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
index a2ab2529b5ca..ceda50aad73c 100644
--- a/fs/nfs/iostat.h
+++ b/fs/nfs/iostat.h
@@ -31,7 +31,7 @@ static inline void nfs_inc_server_stats(const struct nfs_server *server,
31 cpu = get_cpu(); 31 cpu = get_cpu();
32 iostats = per_cpu_ptr(server->io_stats, cpu); 32 iostats = per_cpu_ptr(server->io_stats, cpu);
33 iostats->events[stat]++; 33 iostats->events[stat]++;
34 put_cpu_no_resched(); 34 put_cpu();
35} 35}
36 36
37static inline void nfs_inc_stats(const struct inode *inode, 37static inline void nfs_inc_stats(const struct inode *inode,
@@ -50,7 +50,7 @@ static inline void nfs_add_server_stats(const struct nfs_server *server,
50 cpu = get_cpu(); 50 cpu = get_cpu();
51 iostats = per_cpu_ptr(server->io_stats, cpu); 51 iostats = per_cpu_ptr(server->io_stats, cpu);
52 iostats->bytes[stat] += addend; 52 iostats->bytes[stat] += addend;
53 put_cpu_no_resched(); 53 put_cpu();
54} 54}
55 55
56static inline void nfs_add_stats(const struct inode *inode, 56static inline void nfs_add_stats(const struct inode *inode,
@@ -71,7 +71,7 @@ static inline void nfs_add_fscache_stats(struct inode *inode,
71 cpu = get_cpu(); 71 cpu = get_cpu();
72 iostats = per_cpu_ptr(NFS_SERVER(inode)->io_stats, cpu); 72 iostats = per_cpu_ptr(NFS_SERVER(inode)->io_stats, cpu);
73 iostats->fscache[stat] += addend; 73 iostats->fscache[stat] += addend;
74 put_cpu_no_resched(); 74 put_cpu();
75} 75}
76#endif 76#endif
77 77
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index ca905a5bb1ba..38ef9eaec407 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -20,8 +20,116 @@
20# define NFSDBG_FACILITY NFSDBG_MOUNT 20# define NFSDBG_FACILITY NFSDBG_MOUNT
21#endif 21#endif
22 22
23/*
24 * Defined by RFC 1094, section A.3; and RFC 1813, section 5.1.4
25 */
26#define MNTPATHLEN (1024)
27
28/*
29 * XDR data type sizes
30 */
31#define encode_dirpath_sz (1 + XDR_QUADLEN(MNTPATHLEN))
32#define MNT_status_sz (1)
33#define MNT_fhs_status_sz (1)
34#define MNT_fhandle_sz XDR_QUADLEN(NFS2_FHSIZE)
35#define MNT_fhandle3_sz (1 + XDR_QUADLEN(NFS3_FHSIZE))
36#define MNT_authflav3_sz (1 + NFS_MAX_SECFLAVORS)
37
38/*
39 * XDR argument and result sizes
40 */
41#define MNT_enc_dirpath_sz encode_dirpath_sz
42#define MNT_dec_mountres_sz (MNT_status_sz + MNT_fhandle_sz)
43#define MNT_dec_mountres3_sz (MNT_status_sz + MNT_fhandle_sz + \
44 MNT_authflav3_sz)
45
46/*
47 * Defined by RFC 1094, section A.5
48 */
49enum {
50 MOUNTPROC_NULL = 0,
51 MOUNTPROC_MNT = 1,
52 MOUNTPROC_DUMP = 2,
53 MOUNTPROC_UMNT = 3,
54 MOUNTPROC_UMNTALL = 4,
55 MOUNTPROC_EXPORT = 5,
56};
57
58/*
59 * Defined by RFC 1813, section 5.2
60 */
61enum {
62 MOUNTPROC3_NULL = 0,
63 MOUNTPROC3_MNT = 1,
64 MOUNTPROC3_DUMP = 2,
65 MOUNTPROC3_UMNT = 3,
66 MOUNTPROC3_UMNTALL = 4,
67 MOUNTPROC3_EXPORT = 5,
68};
69
23static struct rpc_program mnt_program; 70static struct rpc_program mnt_program;
24 71
72/*
73 * Defined by OpenGroup XNFS Version 3W, chapter 8
74 */
75enum mountstat {
76 MNT_OK = 0,
77 MNT_EPERM = 1,
78 MNT_ENOENT = 2,
79 MNT_EACCES = 13,
80 MNT_EINVAL = 22,
81};
82
83static struct {
84 u32 status;
85 int errno;
86} mnt_errtbl[] = {
87 { .status = MNT_OK, .errno = 0, },
88 { .status = MNT_EPERM, .errno = -EPERM, },
89 { .status = MNT_ENOENT, .errno = -ENOENT, },
90 { .status = MNT_EACCES, .errno = -EACCES, },
91 { .status = MNT_EINVAL, .errno = -EINVAL, },
92};
93
94/*
95 * Defined by RFC 1813, section 5.1.5
96 */
97enum mountstat3 {
98 MNT3_OK = 0, /* no error */
99 MNT3ERR_PERM = 1, /* Not owner */
100 MNT3ERR_NOENT = 2, /* No such file or directory */
101 MNT3ERR_IO = 5, /* I/O error */
102 MNT3ERR_ACCES = 13, /* Permission denied */
103 MNT3ERR_NOTDIR = 20, /* Not a directory */
104 MNT3ERR_INVAL = 22, /* Invalid argument */
105 MNT3ERR_NAMETOOLONG = 63, /* Filename too long */
106 MNT3ERR_NOTSUPP = 10004, /* Operation not supported */
107 MNT3ERR_SERVERFAULT = 10006, /* A failure on the server */
108};
109
110static struct {
111 u32 status;
112 int errno;
113} mnt3_errtbl[] = {
114 { .status = MNT3_OK, .errno = 0, },
115 { .status = MNT3ERR_PERM, .errno = -EPERM, },
116 { .status = MNT3ERR_NOENT, .errno = -ENOENT, },
117 { .status = MNT3ERR_IO, .errno = -EIO, },
118 { .status = MNT3ERR_ACCES, .errno = -EACCES, },
119 { .status = MNT3ERR_NOTDIR, .errno = -ENOTDIR, },
120 { .status = MNT3ERR_INVAL, .errno = -EINVAL, },
121 { .status = MNT3ERR_NAMETOOLONG, .errno = -ENAMETOOLONG, },
122 { .status = MNT3ERR_NOTSUPP, .errno = -ENOTSUPP, },
123 { .status = MNT3ERR_SERVERFAULT, .errno = -ESERVERFAULT, },
124};
125
126struct mountres {
127 int errno;
128 struct nfs_fh *fh;
129 unsigned int *auth_count;
130 rpc_authflavor_t *auth_flavors;
131};
132
25struct mnt_fhstatus { 133struct mnt_fhstatus {
26 u32 status; 134 u32 status;
27 struct nfs_fh *fh; 135 struct nfs_fh *fh;
@@ -35,8 +143,10 @@ struct mnt_fhstatus {
35 */ 143 */
36int nfs_mount(struct nfs_mount_request *info) 144int nfs_mount(struct nfs_mount_request *info)
37{ 145{
38 struct mnt_fhstatus result = { 146 struct mountres result = {
39 .fh = info->fh 147 .fh = info->fh,
148 .auth_count = info->auth_flav_len,
149 .auth_flavors = info->auth_flavs,
40 }; 150 };
41 struct rpc_message msg = { 151 struct rpc_message msg = {
42 .rpc_argp = info->dirpath, 152 .rpc_argp = info->dirpath,
@@ -68,14 +178,14 @@ int nfs_mount(struct nfs_mount_request *info)
68 if (info->version == NFS_MNT3_VERSION) 178 if (info->version == NFS_MNT3_VERSION)
69 msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT]; 179 msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT];
70 else 180 else
71 msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT]; 181 msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC_MNT];
72 182
73 status = rpc_call_sync(mnt_clnt, &msg, 0); 183 status = rpc_call_sync(mnt_clnt, &msg, 0);
74 rpc_shutdown_client(mnt_clnt); 184 rpc_shutdown_client(mnt_clnt);
75 185
76 if (status < 0) 186 if (status < 0)
77 goto out_call_err; 187 goto out_call_err;
78 if (result.status != 0) 188 if (result.errno != 0)
79 goto out_mnt_err; 189 goto out_mnt_err;
80 190
81 dprintk("NFS: MNT request succeeded\n"); 191 dprintk("NFS: MNT request succeeded\n");
@@ -86,72 +196,215 @@ out:
86 196
87out_clnt_err: 197out_clnt_err:
88 status = PTR_ERR(mnt_clnt); 198 status = PTR_ERR(mnt_clnt);
89 dprintk("NFS: failed to create RPC client, status=%d\n", status); 199 dprintk("NFS: failed to create MNT RPC client, status=%d\n", status);
90 goto out; 200 goto out;
91 201
92out_call_err: 202out_call_err:
93 dprintk("NFS: failed to start MNT request, status=%d\n", status); 203 dprintk("NFS: MNT request failed, status=%d\n", status);
94 goto out; 204 goto out;
95 205
96out_mnt_err: 206out_mnt_err:
97 dprintk("NFS: MNT server returned result %d\n", result.status); 207 dprintk("NFS: MNT server returned result %d\n", result.errno);
98 status = nfs_stat_to_errno(result.status); 208 status = result.errno;
99 goto out; 209 goto out;
100} 210}
101 211
102/* 212/*
103 * XDR encode/decode functions for MOUNT 213 * XDR encode/decode functions for MOUNT
104 */ 214 */
105static int xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p, 215
106 const char *path) 216static int encode_mntdirpath(struct xdr_stream *xdr, const char *pathname)
217{
218 const u32 pathname_len = strlen(pathname);
219 __be32 *p;
220
221 if (unlikely(pathname_len > MNTPATHLEN))
222 return -EIO;
223
224 p = xdr_reserve_space(xdr, sizeof(u32) + pathname_len);
225 if (unlikely(p == NULL))
226 return -EIO;
227 xdr_encode_opaque(p, pathname, pathname_len);
228
229 return 0;
230}
231
232static int mnt_enc_dirpath(struct rpc_rqst *req, __be32 *p,
233 const char *dirpath)
234{
235 struct xdr_stream xdr;
236
237 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
238 return encode_mntdirpath(&xdr, dirpath);
239}
240
241/*
242 * RFC 1094: "A non-zero status indicates some sort of error. In this
243 * case, the status is a UNIX error number." This can be problematic
244 * if the server and client use different errno values for the same
245 * error.
246 *
247 * However, the OpenGroup XNFS spec provides a simple mapping that is
248 * independent of local errno values on the server and the client.
249 */
250static int decode_status(struct xdr_stream *xdr, struct mountres *res)
107{ 251{
108 p = xdr_encode_string(p, path); 252 unsigned int i;
253 u32 status;
254 __be32 *p;
255
256 p = xdr_inline_decode(xdr, sizeof(status));
257 if (unlikely(p == NULL))
258 return -EIO;
259 status = ntohl(*p);
109 260
110 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 261 for (i = 0; i <= ARRAY_SIZE(mnt_errtbl); i++) {
262 if (mnt_errtbl[i].status == status) {
263 res->errno = mnt_errtbl[i].errno;
264 return 0;
265 }
266 }
267
268 dprintk("NFS: unrecognized MNT status code: %u\n", status);
269 res->errno = -EACCES;
111 return 0; 270 return 0;
112} 271}
113 272
114static int xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p, 273static int decode_fhandle(struct xdr_stream *xdr, struct mountres *res)
115 struct mnt_fhstatus *res)
116{ 274{
117 struct nfs_fh *fh = res->fh; 275 struct nfs_fh *fh = res->fh;
276 __be32 *p;
277
278 p = xdr_inline_decode(xdr, NFS2_FHSIZE);
279 if (unlikely(p == NULL))
280 return -EIO;
281
282 fh->size = NFS2_FHSIZE;
283 memcpy(fh->data, p, NFS2_FHSIZE);
284 return 0;
285}
286
287static int mnt_dec_mountres(struct rpc_rqst *req, __be32 *p,
288 struct mountres *res)
289{
290 struct xdr_stream xdr;
291 int status;
292
293 xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
294
295 status = decode_status(&xdr, res);
296 if (unlikely(status != 0 || res->errno != 0))
297 return status;
298 return decode_fhandle(&xdr, res);
299}
300
301static int decode_fhs_status(struct xdr_stream *xdr, struct mountres *res)
302{
303 unsigned int i;
304 u32 status;
305 __be32 *p;
118 306
119 if ((res->status = ntohl(*p++)) == 0) { 307 p = xdr_inline_decode(xdr, sizeof(status));
120 fh->size = NFS2_FHSIZE; 308 if (unlikely(p == NULL))
121 memcpy(fh->data, p, NFS2_FHSIZE); 309 return -EIO;
310 status = ntohl(*p);
311
312 for (i = 0; i <= ARRAY_SIZE(mnt3_errtbl); i++) {
313 if (mnt3_errtbl[i].status == status) {
314 res->errno = mnt3_errtbl[i].errno;
315 return 0;
316 }
122 } 317 }
318
319 dprintk("NFS: unrecognized MNT3 status code: %u\n", status);
320 res->errno = -EACCES;
123 return 0; 321 return 0;
124} 322}
125 323
126static int xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, 324static int decode_fhandle3(struct xdr_stream *xdr, struct mountres *res)
127 struct mnt_fhstatus *res)
128{ 325{
129 struct nfs_fh *fh = res->fh; 326 struct nfs_fh *fh = res->fh;
130 unsigned size; 327 u32 size;
131 328 __be32 *p;
132 if ((res->status = ntohl(*p++)) == 0) { 329
133 size = ntohl(*p++); 330 p = xdr_inline_decode(xdr, sizeof(size));
134 if (size <= NFS3_FHSIZE && size != 0) { 331 if (unlikely(p == NULL))
135 fh->size = size; 332 return -EIO;
136 memcpy(fh->data, p, size); 333
137 } else 334 size = ntohl(*p++);
138 res->status = -EBADHANDLE; 335 if (size > NFS3_FHSIZE || size == 0)
336 return -EIO;
337
338 p = xdr_inline_decode(xdr, size);
339 if (unlikely(p == NULL))
340 return -EIO;
341
342 fh->size = size;
343 memcpy(fh->data, p, size);
344 return 0;
345}
346
347static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res)
348{
349 rpc_authflavor_t *flavors = res->auth_flavors;
350 unsigned int *count = res->auth_count;
351 u32 entries, i;
352 __be32 *p;
353
354 if (*count == 0)
355 return 0;
356
357 p = xdr_inline_decode(xdr, sizeof(entries));
358 if (unlikely(p == NULL))
359 return -EIO;
360 entries = ntohl(*p);
361 dprintk("NFS: received %u auth flavors\n", entries);
362 if (entries > NFS_MAX_SECFLAVORS)
363 entries = NFS_MAX_SECFLAVORS;
364
365 p = xdr_inline_decode(xdr, sizeof(u32) * entries);
366 if (unlikely(p == NULL))
367 return -EIO;
368
369 if (entries > *count)
370 entries = *count;
371
372 for (i = 0; i < entries; i++) {
373 flavors[i] = ntohl(*p++);
374 dprintk("NFS:\tflavor %u: %d\n", i, flavors[i]);
139 } 375 }
376 *count = i;
377
140 return 0; 378 return 0;
141} 379}
142 380
143#define MNT_dirpath_sz (1 + 256) 381static int mnt_dec_mountres3(struct rpc_rqst *req, __be32 *p,
144#define MNT_fhstatus_sz (1 + 8) 382 struct mountres *res)
145#define MNT_fhstatus3_sz (1 + 16) 383{
384 struct xdr_stream xdr;
385 int status;
386
387 xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
388
389 status = decode_fhs_status(&xdr, res);
390 if (unlikely(status != 0 || res->errno != 0))
391 return status;
392 status = decode_fhandle3(&xdr, res);
393 if (unlikely(status != 0)) {
394 res->errno = -EBADHANDLE;
395 return 0;
396 }
397 return decode_auth_flavors(&xdr, res);
398}
146 399
147static struct rpc_procinfo mnt_procedures[] = { 400static struct rpc_procinfo mnt_procedures[] = {
148 [MNTPROC_MNT] = { 401 [MOUNTPROC_MNT] = {
149 .p_proc = MNTPROC_MNT, 402 .p_proc = MOUNTPROC_MNT,
150 .p_encode = (kxdrproc_t) xdr_encode_dirpath, 403 .p_encode = (kxdrproc_t)mnt_enc_dirpath,
151 .p_decode = (kxdrproc_t) xdr_decode_fhstatus, 404 .p_decode = (kxdrproc_t)mnt_dec_mountres,
152 .p_arglen = MNT_dirpath_sz, 405 .p_arglen = MNT_enc_dirpath_sz,
153 .p_replen = MNT_fhstatus_sz, 406 .p_replen = MNT_dec_mountres_sz,
154 .p_statidx = MNTPROC_MNT, 407 .p_statidx = MOUNTPROC_MNT,
155 .p_name = "MOUNT", 408 .p_name = "MOUNT",
156 }, 409 },
157}; 410};
@@ -159,10 +412,10 @@ static struct rpc_procinfo mnt_procedures[] = {
159static struct rpc_procinfo mnt3_procedures[] = { 412static struct rpc_procinfo mnt3_procedures[] = {
160 [MOUNTPROC3_MNT] = { 413 [MOUNTPROC3_MNT] = {
161 .p_proc = MOUNTPROC3_MNT, 414 .p_proc = MOUNTPROC3_MNT,
162 .p_encode = (kxdrproc_t) xdr_encode_dirpath, 415 .p_encode = (kxdrproc_t)mnt_enc_dirpath,
163 .p_decode = (kxdrproc_t) xdr_decode_fhstatus3, 416 .p_decode = (kxdrproc_t)mnt_dec_mountres3,
164 .p_arglen = MNT_dirpath_sz, 417 .p_arglen = MNT_enc_dirpath_sz,
165 .p_replen = MNT_fhstatus3_sz, 418 .p_replen = MNT_dec_mountres3_sz,
166 .p_statidx = MOUNTPROC3_MNT, 419 .p_statidx = MOUNTPROC3_MNT,
167 .p_name = "MOUNT", 420 .p_name = "MOUNT",
168 }, 421 },
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index f01caec84463..40c766782891 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -65,6 +65,11 @@ char *nfs_path(const char *base,
65 dentry = dentry->d_parent; 65 dentry = dentry->d_parent;
66 } 66 }
67 spin_unlock(&dcache_lock); 67 spin_unlock(&dcache_lock);
68 if (*end != '/') {
69 if (--buflen < 0)
70 goto Elong;
71 *--end = '/';
72 }
68 namelen = strlen(base); 73 namelen = strlen(base);
69 /* Strip off excess slashes in base string */ 74 /* Strip off excess slashes in base string */
70 while (namelen > 0 && base[namelen - 1] == '/') 75 while (namelen > 0 && base[namelen - 1] == '/')
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 6bbf0e6daad2..bac60515a4b3 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -207,8 +207,6 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
207 status = nfs_revalidate_inode(server, inode); 207 status = nfs_revalidate_inode(server, inode);
208 if (status < 0) 208 if (status < 0)
209 return ERR_PTR(status); 209 return ERR_PTR(status);
210 if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL)
211 nfs_zap_acl_cache(inode);
212 acl = nfs3_get_cached_acl(inode, type); 210 acl = nfs3_get_cached_acl(inode, type);
213 if (acl != ERR_PTR(-EAGAIN)) 211 if (acl != ERR_PTR(-EAGAIN))
214 return acl; 212 return acl;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 84345deab26f..61bc3a32e1e2 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -44,6 +44,7 @@ enum nfs4_client_state {
44 NFS4CLNT_RECLAIM_REBOOT, 44 NFS4CLNT_RECLAIM_REBOOT,
45 NFS4CLNT_RECLAIM_NOGRACE, 45 NFS4CLNT_RECLAIM_NOGRACE,
46 NFS4CLNT_DELEGRETURN, 46 NFS4CLNT_DELEGRETURN,
47 NFS4CLNT_SESSION_SETUP,
47}; 48};
48 49
49/* 50/*
@@ -177,6 +178,14 @@ struct nfs4_state_recovery_ops {
177 int state_flag_bit; 178 int state_flag_bit;
178 int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *); 179 int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *);
179 int (*recover_lock)(struct nfs4_state *, struct file_lock *); 180 int (*recover_lock)(struct nfs4_state *, struct file_lock *);
181 int (*establish_clid)(struct nfs_client *, struct rpc_cred *);
182 struct rpc_cred * (*get_clid_cred)(struct nfs_client *);
183};
184
185struct nfs4_state_maintenance_ops {
186 int (*sched_state_renewal)(struct nfs_client *, struct rpc_cred *);
187 struct rpc_cred * (*get_state_renewal_cred_locked)(struct nfs_client *);
188 int (*renew_lease)(struct nfs_client *, struct rpc_cred *);
180}; 189};
181 190
182extern const struct dentry_operations nfs4_dentry_operations; 191extern const struct dentry_operations nfs4_dentry_operations;
@@ -193,6 +202,7 @@ extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struc
193extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *); 202extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *);
194extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); 203extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
195extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); 204extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
205extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
196extern int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait); 206extern int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait);
197extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); 207extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
198extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); 208extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
@@ -200,8 +210,26 @@ extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fh
200extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, 210extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
201 struct nfs4_fs_locations *fs_locations, struct page *page); 211 struct nfs4_fs_locations *fs_locations, struct page *page);
202 212
203extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops; 213extern struct nfs4_state_recovery_ops *nfs4_reboot_recovery_ops[];
204extern struct nfs4_state_recovery_ops nfs4_nograce_recovery_ops; 214extern struct nfs4_state_recovery_ops *nfs4_nograce_recovery_ops[];
215#if defined(CONFIG_NFS_V4_1)
216extern int nfs4_setup_sequence(struct nfs_client *clp,
217 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
218 int cache_reply, struct rpc_task *task);
219extern void nfs4_destroy_session(struct nfs4_session *session);
220extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
221extern int nfs4_proc_create_session(struct nfs_client *, int reset);
222extern int nfs4_proc_destroy_session(struct nfs4_session *);
223#else /* CONFIG_NFS_v4_1 */
224static inline int nfs4_setup_sequence(struct nfs_client *clp,
225 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
226 int cache_reply, struct rpc_task *task)
227{
228 return 0;
229}
230#endif /* CONFIG_NFS_V4_1 */
231
232extern struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[];
205 233
206extern const u32 nfs4_fattr_bitmap[2]; 234extern const u32 nfs4_fattr_bitmap[2];
207extern const u32 nfs4_statfs_bitmap[2]; 235extern const u32 nfs4_statfs_bitmap[2];
@@ -216,7 +244,12 @@ extern void nfs4_kill_renewd(struct nfs_client *);
216extern void nfs4_renew_state(struct work_struct *); 244extern void nfs4_renew_state(struct work_struct *);
217 245
218/* nfs4state.c */ 246/* nfs4state.c */
247struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp);
219struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp); 248struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);
249#if defined(CONFIG_NFS_V4_1)
250struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp);
251struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp);
252#endif /* CONFIG_NFS_V4_1 */
220 253
221extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); 254extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
222extern void nfs4_put_state_owner(struct nfs4_state_owner *); 255extern void nfs4_put_state_owner(struct nfs4_state_owner *);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4674f8092da8..92ce43517814 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -48,11 +48,14 @@
48#include <linux/smp_lock.h> 48#include <linux/smp_lock.h>
49#include <linux/namei.h> 49#include <linux/namei.h>
50#include <linux/mount.h> 50#include <linux/mount.h>
51#include <linux/module.h>
52#include <linux/sunrpc/bc_xprt.h>
51 53
52#include "nfs4_fs.h" 54#include "nfs4_fs.h"
53#include "delegation.h" 55#include "delegation.h"
54#include "internal.h" 56#include "internal.h"
55#include "iostat.h" 57#include "iostat.h"
58#include "callback.h"
56 59
57#define NFSDBG_FACILITY NFSDBG_PROC 60#define NFSDBG_FACILITY NFSDBG_PROC
58 61
@@ -247,7 +250,25 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
247 ret = nfs4_wait_clnt_recover(clp); 250 ret = nfs4_wait_clnt_recover(clp);
248 if (ret == 0) 251 if (ret == 0)
249 exception->retry = 1; 252 exception->retry = 1;
253#if !defined(CONFIG_NFS_V4_1)
250 break; 254 break;
255#else /* !defined(CONFIG_NFS_V4_1) */
256 if (!nfs4_has_session(server->nfs_client))
257 break;
258 /* FALLTHROUGH */
259 case -NFS4ERR_BADSESSION:
260 case -NFS4ERR_BADSLOT:
261 case -NFS4ERR_BAD_HIGH_SLOT:
262 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
263 case -NFS4ERR_DEADSESSION:
264 case -NFS4ERR_SEQ_FALSE_RETRY:
265 case -NFS4ERR_SEQ_MISORDERED:
266 dprintk("%s ERROR: %d Reset session\n", __func__,
267 errorcode);
268 set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state);
269 exception->retry = 1;
270 /* FALLTHROUGH */
271#endif /* !defined(CONFIG_NFS_V4_1) */
251 case -NFS4ERR_FILE_OPEN: 272 case -NFS4ERR_FILE_OPEN:
252 case -NFS4ERR_GRACE: 273 case -NFS4ERR_GRACE:
253 case -NFS4ERR_DELAY: 274 case -NFS4ERR_DELAY:
@@ -271,6 +292,353 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp
271 spin_unlock(&clp->cl_lock); 292 spin_unlock(&clp->cl_lock);
272} 293}
273 294
295#if defined(CONFIG_NFS_V4_1)
296
297/*
298 * nfs4_free_slot - free a slot and efficiently update slot table.
299 *
300 * freeing a slot is trivially done by clearing its respective bit
301 * in the bitmap.
302 * If the freed slotid equals highest_used_slotid we want to update it
303 * so that the server would be able to size down the slot table if needed,
304 * otherwise we know that the highest_used_slotid is still in use.
305 * When updating highest_used_slotid there may be "holes" in the bitmap
306 * so we need to scan down from highest_used_slotid to 0 looking for the now
307 * highest slotid in use.
308 * If none found, highest_used_slotid is set to -1.
309 */
310static void
311nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid)
312{
313 int slotid = free_slotid;
314
315 spin_lock(&tbl->slot_tbl_lock);
316 /* clear used bit in bitmap */
317 __clear_bit(slotid, tbl->used_slots);
318
319 /* update highest_used_slotid when it is freed */
320 if (slotid == tbl->highest_used_slotid) {
321 slotid = find_last_bit(tbl->used_slots, tbl->max_slots);
322 if (slotid >= 0 && slotid < tbl->max_slots)
323 tbl->highest_used_slotid = slotid;
324 else
325 tbl->highest_used_slotid = -1;
326 }
327 rpc_wake_up_next(&tbl->slot_tbl_waitq);
328 spin_unlock(&tbl->slot_tbl_lock);
329 dprintk("%s: free_slotid %u highest_used_slotid %d\n", __func__,
330 free_slotid, tbl->highest_used_slotid);
331}
332
333void nfs41_sequence_free_slot(const struct nfs_client *clp,
334 struct nfs4_sequence_res *res)
335{
336 struct nfs4_slot_table *tbl;
337
338 if (!nfs4_has_session(clp)) {
339 dprintk("%s: No session\n", __func__);
340 return;
341 }
342 tbl = &clp->cl_session->fc_slot_table;
343 if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) {
344 dprintk("%s: No slot\n", __func__);
345 /* just wake up the next guy waiting since
346 * we may have not consumed a slot after all */
347 rpc_wake_up_next(&tbl->slot_tbl_waitq);
348 return;
349 }
350 nfs4_free_slot(tbl, res->sr_slotid);
351 res->sr_slotid = NFS4_MAX_SLOT_TABLE;
352}
353
354static void nfs41_sequence_done(struct nfs_client *clp,
355 struct nfs4_sequence_res *res,
356 int rpc_status)
357{
358 unsigned long timestamp;
359 struct nfs4_slot_table *tbl;
360 struct nfs4_slot *slot;
361
362 /*
363 * sr_status remains 1 if an RPC level error occurred. The server
364 * may or may not have processed the sequence operation..
365 * Proceed as if the server received and processed the sequence
366 * operation.
367 */
368 if (res->sr_status == 1)
369 res->sr_status = NFS_OK;
370
371 /* -ERESTARTSYS can result in skipping nfs41_sequence_setup */
372 if (res->sr_slotid == NFS4_MAX_SLOT_TABLE)
373 goto out;
374
375 tbl = &clp->cl_session->fc_slot_table;
376 slot = tbl->slots + res->sr_slotid;
377
378 if (res->sr_status == 0) {
379 /* Update the slot's sequence and clientid lease timer */
380 ++slot->seq_nr;
381 timestamp = res->sr_renewal_time;
382 spin_lock(&clp->cl_lock);
383 if (time_before(clp->cl_last_renewal, timestamp))
384 clp->cl_last_renewal = timestamp;
385 spin_unlock(&clp->cl_lock);
386 return;
387 }
388out:
389 /* The session may be reset by one of the error handlers. */
390 dprintk("%s: Error %d free the slot \n", __func__, res->sr_status);
391 nfs41_sequence_free_slot(clp, res);
392}
393
394/*
395 * nfs4_find_slot - efficiently look for a free slot
396 *
397 * nfs4_find_slot looks for an unset bit in the used_slots bitmap.
398 * If found, we mark the slot as used, update the highest_used_slotid,
399 * and respectively set up the sequence operation args.
400 * The slot number is returned if found, or NFS4_MAX_SLOT_TABLE otherwise.
401 *
402 * Note: must be called with under the slot_tbl_lock.
403 */
404static u8
405nfs4_find_slot(struct nfs4_slot_table *tbl, struct rpc_task *task)
406{
407 int slotid;
408 u8 ret_id = NFS4_MAX_SLOT_TABLE;
409 BUILD_BUG_ON((u8)NFS4_MAX_SLOT_TABLE != (int)NFS4_MAX_SLOT_TABLE);
410
411 dprintk("--> %s used_slots=%04lx highest_used=%d max_slots=%d\n",
412 __func__, tbl->used_slots[0], tbl->highest_used_slotid,
413 tbl->max_slots);
414 slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slots);
415 if (slotid >= tbl->max_slots)
416 goto out;
417 __set_bit(slotid, tbl->used_slots);
418 if (slotid > tbl->highest_used_slotid)
419 tbl->highest_used_slotid = slotid;
420 ret_id = slotid;
421out:
422 dprintk("<-- %s used_slots=%04lx highest_used=%d slotid=%d \n",
423 __func__, tbl->used_slots[0], tbl->highest_used_slotid, ret_id);
424 return ret_id;
425}
426
427static int nfs4_recover_session(struct nfs4_session *session)
428{
429 struct nfs_client *clp = session->clp;
430 int ret;
431
432 for (;;) {
433 ret = nfs4_wait_clnt_recover(clp);
434 if (ret != 0)
435 return ret;
436 if (!test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state))
437 break;
438 nfs4_schedule_state_manager(clp);
439 }
440 return 0;
441}
442
443static int nfs41_setup_sequence(struct nfs4_session *session,
444 struct nfs4_sequence_args *args,
445 struct nfs4_sequence_res *res,
446 int cache_reply,
447 struct rpc_task *task)
448{
449 struct nfs4_slot *slot;
450 struct nfs4_slot_table *tbl;
451 int status = 0;
452 u8 slotid;
453
454 dprintk("--> %s\n", __func__);
455 /* slot already allocated? */
456 if (res->sr_slotid != NFS4_MAX_SLOT_TABLE)
457 return 0;
458
459 memset(res, 0, sizeof(*res));
460 res->sr_slotid = NFS4_MAX_SLOT_TABLE;
461 tbl = &session->fc_slot_table;
462
463 spin_lock(&tbl->slot_tbl_lock);
464 if (test_bit(NFS4CLNT_SESSION_SETUP, &session->clp->cl_state)) {
465 if (tbl->highest_used_slotid != -1) {
466 rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
467 spin_unlock(&tbl->slot_tbl_lock);
468 dprintk("<-- %s: Session reset: draining\n", __func__);
469 return -EAGAIN;
470 }
471
472 /* The slot table is empty; start the reset thread */
473 dprintk("%s Session Reset\n", __func__);
474 spin_unlock(&tbl->slot_tbl_lock);
475 status = nfs4_recover_session(session);
476 if (status)
477 return status;
478 spin_lock(&tbl->slot_tbl_lock);
479 }
480
481 slotid = nfs4_find_slot(tbl, task);
482 if (slotid == NFS4_MAX_SLOT_TABLE) {
483 rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
484 spin_unlock(&tbl->slot_tbl_lock);
485 dprintk("<-- %s: no free slots\n", __func__);
486 return -EAGAIN;
487 }
488 spin_unlock(&tbl->slot_tbl_lock);
489
490 slot = tbl->slots + slotid;
491 args->sa_session = session;
492 args->sa_slotid = slotid;
493 args->sa_cache_this = cache_reply;
494
495 dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr);
496
497 res->sr_session = session;
498 res->sr_slotid = slotid;
499 res->sr_renewal_time = jiffies;
500 /*
501 * sr_status is only set in decode_sequence, and so will remain
502 * set to 1 if an rpc level failure occurs.
503 */
504 res->sr_status = 1;
505 return 0;
506}
507
508int nfs4_setup_sequence(struct nfs_client *clp,
509 struct nfs4_sequence_args *args,
510 struct nfs4_sequence_res *res,
511 int cache_reply,
512 struct rpc_task *task)
513{
514 int ret = 0;
515
516 dprintk("--> %s clp %p session %p sr_slotid %d\n",
517 __func__, clp, clp->cl_session, res->sr_slotid);
518
519 if (!nfs4_has_session(clp))
520 goto out;
521 ret = nfs41_setup_sequence(clp->cl_session, args, res, cache_reply,
522 task);
523 if (ret != -EAGAIN) {
524 /* terminate rpc task */
525 task->tk_status = ret;
526 task->tk_action = NULL;
527 }
528out:
529 dprintk("<-- %s status=%d\n", __func__, ret);
530 return ret;
531}
532
533struct nfs41_call_sync_data {
534 struct nfs_client *clp;
535 struct nfs4_sequence_args *seq_args;
536 struct nfs4_sequence_res *seq_res;
537 int cache_reply;
538};
539
540static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata)
541{
542 struct nfs41_call_sync_data *data = calldata;
543
544 dprintk("--> %s data->clp->cl_session %p\n", __func__,
545 data->clp->cl_session);
546 if (nfs4_setup_sequence(data->clp, data->seq_args,
547 data->seq_res, data->cache_reply, task))
548 return;
549 rpc_call_start(task);
550}
551
552static void nfs41_call_sync_done(struct rpc_task *task, void *calldata)
553{
554 struct nfs41_call_sync_data *data = calldata;
555
556 nfs41_sequence_done(data->clp, data->seq_res, task->tk_status);
557 nfs41_sequence_free_slot(data->clp, data->seq_res);
558}
559
560struct rpc_call_ops nfs41_call_sync_ops = {
561 .rpc_call_prepare = nfs41_call_sync_prepare,
562 .rpc_call_done = nfs41_call_sync_done,
563};
564
565static int nfs4_call_sync_sequence(struct nfs_client *clp,
566 struct rpc_clnt *clnt,
567 struct rpc_message *msg,
568 struct nfs4_sequence_args *args,
569 struct nfs4_sequence_res *res,
570 int cache_reply)
571{
572 int ret;
573 struct rpc_task *task;
574 struct nfs41_call_sync_data data = {
575 .clp = clp,
576 .seq_args = args,
577 .seq_res = res,
578 .cache_reply = cache_reply,
579 };
580 struct rpc_task_setup task_setup = {
581 .rpc_client = clnt,
582 .rpc_message = msg,
583 .callback_ops = &nfs41_call_sync_ops,
584 .callback_data = &data
585 };
586
587 res->sr_slotid = NFS4_MAX_SLOT_TABLE;
588 task = rpc_run_task(&task_setup);
589 if (IS_ERR(task))
590 ret = PTR_ERR(task);
591 else {
592 ret = task->tk_status;
593 rpc_put_task(task);
594 }
595 return ret;
596}
597
598int _nfs4_call_sync_session(struct nfs_server *server,
599 struct rpc_message *msg,
600 struct nfs4_sequence_args *args,
601 struct nfs4_sequence_res *res,
602 int cache_reply)
603{
604 return nfs4_call_sync_sequence(server->nfs_client, server->client,
605 msg, args, res, cache_reply);
606}
607
608#endif /* CONFIG_NFS_V4_1 */
609
610int _nfs4_call_sync(struct nfs_server *server,
611 struct rpc_message *msg,
612 struct nfs4_sequence_args *args,
613 struct nfs4_sequence_res *res,
614 int cache_reply)
615{
616 args->sa_session = res->sr_session = NULL;
617 return rpc_call_sync(server->client, msg, 0);
618}
619
620#define nfs4_call_sync(server, msg, args, res, cache_reply) \
621 (server)->nfs_client->cl_call_sync((server), (msg), &(args)->seq_args, \
622 &(res)->seq_res, (cache_reply))
623
624static void nfs4_sequence_done(const struct nfs_server *server,
625 struct nfs4_sequence_res *res, int rpc_status)
626{
627#ifdef CONFIG_NFS_V4_1
628 if (nfs4_has_session(server->nfs_client))
629 nfs41_sequence_done(server->nfs_client, res, rpc_status);
630#endif /* CONFIG_NFS_V4_1 */
631}
632
633/* no restart, therefore free slot here */
634static void nfs4_sequence_done_free_slot(const struct nfs_server *server,
635 struct nfs4_sequence_res *res,
636 int rpc_status)
637{
638 nfs4_sequence_done(server, res, rpc_status);
639 nfs4_sequence_free_slot(server->nfs_client, res);
640}
641
274static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) 642static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
275{ 643{
276 struct nfs_inode *nfsi = NFS_I(dir); 644 struct nfs_inode *nfsi = NFS_I(dir);
@@ -312,6 +680,7 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p)
312 p->o_res.server = p->o_arg.server; 680 p->o_res.server = p->o_arg.server;
313 nfs_fattr_init(&p->f_attr); 681 nfs_fattr_init(&p->f_attr);
314 nfs_fattr_init(&p->dir_attr); 682 nfs_fattr_init(&p->dir_attr);
683 p->o_res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
315} 684}
316 685
317static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, 686static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
@@ -804,16 +1173,30 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
804 err = _nfs4_open_delegation_recall(ctx, state, stateid); 1173 err = _nfs4_open_delegation_recall(ctx, state, stateid);
805 switch (err) { 1174 switch (err) {
806 case 0: 1175 case 0:
807 return err; 1176 case -ENOENT:
1177 case -ESTALE:
1178 goto out;
808 case -NFS4ERR_STALE_CLIENTID: 1179 case -NFS4ERR_STALE_CLIENTID:
809 case -NFS4ERR_STALE_STATEID: 1180 case -NFS4ERR_STALE_STATEID:
810 case -NFS4ERR_EXPIRED: 1181 case -NFS4ERR_EXPIRED:
811 /* Don't recall a delegation if it was lost */ 1182 /* Don't recall a delegation if it was lost */
812 nfs4_schedule_state_recovery(server->nfs_client); 1183 nfs4_schedule_state_recovery(server->nfs_client);
813 return err; 1184 goto out;
1185 case -ERESTARTSYS:
1186 /*
1187 * The show must go on: exit, but mark the
1188 * stateid as needing recovery.
1189 */
1190 case -NFS4ERR_ADMIN_REVOKED:
1191 case -NFS4ERR_BAD_STATEID:
1192 nfs4_state_mark_reclaim_nograce(server->nfs_client, state);
1193 case -ENOMEM:
1194 err = 0;
1195 goto out;
814 } 1196 }
815 err = nfs4_handle_exception(server, err, &exception); 1197 err = nfs4_handle_exception(server, err, &exception);
816 } while (exception.retry); 1198 } while (exception.retry);
1199out:
817 return err; 1200 return err;
818} 1201}
819 1202
@@ -929,6 +1312,10 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
929 nfs_copy_fh(&data->o_res.fh, data->o_arg.fh); 1312 nfs_copy_fh(&data->o_res.fh, data->o_arg.fh);
930 } 1313 }
931 data->timestamp = jiffies; 1314 data->timestamp = jiffies;
1315 if (nfs4_setup_sequence(data->o_arg.server->nfs_client,
1316 &data->o_arg.seq_args,
1317 &data->o_res.seq_res, 1, task))
1318 return;
932 rpc_call_start(task); 1319 rpc_call_start(task);
933 return; 1320 return;
934out_no_action: 1321out_no_action:
@@ -941,6 +1328,10 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata)
941 struct nfs4_opendata *data = calldata; 1328 struct nfs4_opendata *data = calldata;
942 1329
943 data->rpc_status = task->tk_status; 1330 data->rpc_status = task->tk_status;
1331
1332 nfs4_sequence_done_free_slot(data->o_arg.server, &data->o_res.seq_res,
1333 task->tk_status);
1334
944 if (RPC_ASSASSINATED(task)) 1335 if (RPC_ASSASSINATED(task))
945 return; 1336 return;
946 if (task->tk_status == 0) { 1337 if (task->tk_status == 0) {
@@ -1269,7 +1660,7 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
1269 } else 1660 } else
1270 memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); 1661 memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid));
1271 1662
1272 status = rpc_call_sync(server->client, &msg, 0); 1663 status = nfs4_call_sync(server, &msg, &arg, &res, 1);
1273 if (status == 0 && state != NULL) 1664 if (status == 0 && state != NULL)
1274 renew_lease(server, timestamp); 1665 renew_lease(server, timestamp);
1275 return status; 1666 return status;
@@ -1318,6 +1709,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
1318 struct nfs4_state *state = calldata->state; 1709 struct nfs4_state *state = calldata->state;
1319 struct nfs_server *server = NFS_SERVER(calldata->inode); 1710 struct nfs_server *server = NFS_SERVER(calldata->inode);
1320 1711
1712 nfs4_sequence_done(server, &calldata->res.seq_res, task->tk_status);
1321 if (RPC_ASSASSINATED(task)) 1713 if (RPC_ASSASSINATED(task))
1322 return; 1714 return;
1323 /* hmm. we are done with the inode, and in the process of freeing 1715 /* hmm. we are done with the inode, and in the process of freeing
@@ -1336,10 +1728,11 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
1336 break; 1728 break;
1337 default: 1729 default:
1338 if (nfs4_async_handle_error(task, server, state) == -EAGAIN) { 1730 if (nfs4_async_handle_error(task, server, state) == -EAGAIN) {
1339 rpc_restart_call(task); 1731 nfs4_restart_rpc(task, server->nfs_client);
1340 return; 1732 return;
1341 } 1733 }
1342 } 1734 }
1735 nfs4_sequence_free_slot(server->nfs_client, &calldata->res.seq_res);
1343 nfs_refresh_inode(calldata->inode, calldata->res.fattr); 1736 nfs_refresh_inode(calldata->inode, calldata->res.fattr);
1344} 1737}
1345 1738
@@ -1380,6 +1773,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
1380 calldata->arg.fmode = FMODE_WRITE; 1773 calldata->arg.fmode = FMODE_WRITE;
1381 } 1774 }
1382 calldata->timestamp = jiffies; 1775 calldata->timestamp = jiffies;
1776 if (nfs4_setup_sequence((NFS_SERVER(calldata->inode))->nfs_client,
1777 &calldata->arg.seq_args, &calldata->res.seq_res,
1778 1, task))
1779 return;
1383 rpc_call_start(task); 1780 rpc_call_start(task);
1384} 1781}
1385 1782
@@ -1419,13 +1816,15 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
1419 }; 1816 };
1420 int status = -ENOMEM; 1817 int status = -ENOMEM;
1421 1818
1422 calldata = kmalloc(sizeof(*calldata), GFP_KERNEL); 1819 calldata = kzalloc(sizeof(*calldata), GFP_KERNEL);
1423 if (calldata == NULL) 1820 if (calldata == NULL)
1424 goto out; 1821 goto out;
1425 calldata->inode = state->inode; 1822 calldata->inode = state->inode;
1426 calldata->state = state; 1823 calldata->state = state;
1427 calldata->arg.fh = NFS_FH(state->inode); 1824 calldata->arg.fh = NFS_FH(state->inode);
1428 calldata->arg.stateid = &state->open_stateid; 1825 calldata->arg.stateid = &state->open_stateid;
1826 if (nfs4_has_session(server->nfs_client))
1827 memset(calldata->arg.stateid->data, 0, 4); /* clear seqid */
1429 /* Serialization for the sequence id */ 1828 /* Serialization for the sequence id */
1430 calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); 1829 calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
1431 if (calldata->arg.seqid == NULL) 1830 if (calldata->arg.seqid == NULL)
@@ -1435,6 +1834,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
1435 calldata->res.fattr = &calldata->fattr; 1834 calldata->res.fattr = &calldata->fattr;
1436 calldata->res.seqid = calldata->arg.seqid; 1835 calldata->res.seqid = calldata->arg.seqid;
1437 calldata->res.server = server; 1836 calldata->res.server = server;
1837 calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
1438 calldata->path.mnt = mntget(path->mnt); 1838 calldata->path.mnt = mntget(path->mnt);
1439 calldata->path.dentry = dget(path->dentry); 1839 calldata->path.dentry = dget(path->dentry);
1440 1840
@@ -1584,15 +1984,18 @@ void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
1584 1984
1585static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) 1985static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
1586{ 1986{
1987 struct nfs4_server_caps_arg args = {
1988 .fhandle = fhandle,
1989 };
1587 struct nfs4_server_caps_res res = {}; 1990 struct nfs4_server_caps_res res = {};
1588 struct rpc_message msg = { 1991 struct rpc_message msg = {
1589 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SERVER_CAPS], 1992 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SERVER_CAPS],
1590 .rpc_argp = fhandle, 1993 .rpc_argp = &args,
1591 .rpc_resp = &res, 1994 .rpc_resp = &res,
1592 }; 1995 };
1593 int status; 1996 int status;
1594 1997
1595 status = rpc_call_sync(server->client, &msg, 0); 1998 status = nfs4_call_sync(server, &msg, &args, &res, 0);
1596 if (status == 0) { 1999 if (status == 0) {
1597 memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); 2000 memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
1598 if (res.attr_bitmask[0] & FATTR4_WORD0_ACL) 2001 if (res.attr_bitmask[0] & FATTR4_WORD0_ACL)
@@ -1606,6 +2009,7 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
1606 server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; 2009 server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
1607 server->acl_bitmask = res.acl_bitmask; 2010 server->acl_bitmask = res.acl_bitmask;
1608 } 2011 }
2012
1609 return status; 2013 return status;
1610} 2014}
1611 2015
@@ -1637,8 +2041,15 @@ static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
1637 .rpc_argp = &args, 2041 .rpc_argp = &args,
1638 .rpc_resp = &res, 2042 .rpc_resp = &res,
1639 }; 2043 };
2044 int status;
2045
1640 nfs_fattr_init(info->fattr); 2046 nfs_fattr_init(info->fattr);
1641 return rpc_call_sync(server->client, &msg, 0); 2047 status = nfs4_recover_expired_lease(server);
2048 if (!status)
2049 status = nfs4_check_client_ready(server->nfs_client);
2050 if (!status)
2051 status = nfs4_call_sync(server, &msg, &args, &res, 0);
2052 return status;
1642} 2053}
1643 2054
1644static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, 2055static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
@@ -1728,7 +2139,7 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
1728 }; 2139 };
1729 2140
1730 nfs_fattr_init(fattr); 2141 nfs_fattr_init(fattr);
1731 return rpc_call_sync(server->client, &msg, 0); 2142 return nfs4_call_sync(server, &msg, &args, &res, 0);
1732} 2143}
1733 2144
1734static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) 2145static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr)
@@ -1812,7 +2223,7 @@ static int _nfs4_proc_lookupfh(struct nfs_server *server, const struct nfs_fh *d
1812 nfs_fattr_init(fattr); 2223 nfs_fattr_init(fattr);
1813 2224
1814 dprintk("NFS call lookupfh %s\n", name->name); 2225 dprintk("NFS call lookupfh %s\n", name->name);
1815 status = rpc_call_sync(server->client, &msg, 0); 2226 status = nfs4_call_sync(server, &msg, &args, &res, 0);
1816 dprintk("NFS reply lookupfh: %d\n", status); 2227 dprintk("NFS reply lookupfh: %d\n", status);
1817 return status; 2228 return status;
1818} 2229}
@@ -1898,7 +2309,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
1898 args.access |= NFS4_ACCESS_EXECUTE; 2309 args.access |= NFS4_ACCESS_EXECUTE;
1899 } 2310 }
1900 nfs_fattr_init(&fattr); 2311 nfs_fattr_init(&fattr);
1901 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); 2312 status = nfs4_call_sync(server, &msg, &args, &res, 0);
1902 if (!status) { 2313 if (!status) {
1903 entry->mask = 0; 2314 entry->mask = 0;
1904 if (res.access & NFS4_ACCESS_READ) 2315 if (res.access & NFS4_ACCESS_READ)
@@ -1957,13 +2368,14 @@ static int _nfs4_proc_readlink(struct inode *inode, struct page *page,
1957 .pglen = pglen, 2368 .pglen = pglen,
1958 .pages = &page, 2369 .pages = &page,
1959 }; 2370 };
2371 struct nfs4_readlink_res res;
1960 struct rpc_message msg = { 2372 struct rpc_message msg = {
1961 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READLINK], 2373 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READLINK],
1962 .rpc_argp = &args, 2374 .rpc_argp = &args,
1963 .rpc_resp = NULL, 2375 .rpc_resp = &res,
1964 }; 2376 };
1965 2377
1966 return rpc_call_sync(NFS_CLIENT(inode), &msg, 0); 2378 return nfs4_call_sync(NFS_SERVER(inode), &msg, &args, &res, 0);
1967} 2379}
1968 2380
1969static int nfs4_proc_readlink(struct inode *inode, struct page *page, 2381static int nfs4_proc_readlink(struct inode *inode, struct page *page,
@@ -2057,7 +2469,7 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
2057 int status; 2469 int status;
2058 2470
2059 nfs_fattr_init(&res.dir_attr); 2471 nfs_fattr_init(&res.dir_attr);
2060 status = rpc_call_sync(server->client, &msg, 0); 2472 status = nfs4_call_sync(server, &msg, &args, &res, 1);
2061 if (status == 0) { 2473 if (status == 0) {
2062 update_changeattr(dir, &res.cinfo); 2474 update_changeattr(dir, &res.cinfo);
2063 nfs_post_op_update_inode(dir, &res.dir_attr); 2475 nfs_post_op_update_inode(dir, &res.dir_attr);
@@ -2092,8 +2504,10 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
2092{ 2504{
2093 struct nfs_removeres *res = task->tk_msg.rpc_resp; 2505 struct nfs_removeres *res = task->tk_msg.rpc_resp;
2094 2506
2507 nfs4_sequence_done(res->server, &res->seq_res, task->tk_status);
2095 if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN) 2508 if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
2096 return 0; 2509 return 0;
2510 nfs4_sequence_free_slot(res->server->nfs_client, &res->seq_res);
2097 update_changeattr(dir, &res->cinfo); 2511 update_changeattr(dir, &res->cinfo);
2098 nfs_post_op_update_inode(dir, &res->dir_attr); 2512 nfs_post_op_update_inode(dir, &res->dir_attr);
2099 return 1; 2513 return 1;
@@ -2125,7 +2539,7 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
2125 2539
2126 nfs_fattr_init(res.old_fattr); 2540 nfs_fattr_init(res.old_fattr);
2127 nfs_fattr_init(res.new_fattr); 2541 nfs_fattr_init(res.new_fattr);
2128 status = rpc_call_sync(server->client, &msg, 0); 2542 status = nfs4_call_sync(server, &msg, &arg, &res, 1);
2129 2543
2130 if (!status) { 2544 if (!status) {
2131 update_changeattr(old_dir, &res.old_cinfo); 2545 update_changeattr(old_dir, &res.old_cinfo);
@@ -2174,7 +2588,7 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *
2174 2588
2175 nfs_fattr_init(res.fattr); 2589 nfs_fattr_init(res.fattr);
2176 nfs_fattr_init(res.dir_attr); 2590 nfs_fattr_init(res.dir_attr);
2177 status = rpc_call_sync(server->client, &msg, 0); 2591 status = nfs4_call_sync(server, &msg, &arg, &res, 1);
2178 if (!status) { 2592 if (!status) {
2179 update_changeattr(dir, &res.cinfo); 2593 update_changeattr(dir, &res.cinfo);
2180 nfs_post_op_update_inode(dir, res.dir_attr); 2594 nfs_post_op_update_inode(dir, res.dir_attr);
@@ -2235,7 +2649,8 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,
2235 2649
2236static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_createdata *data) 2650static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_createdata *data)
2237{ 2651{
2238 int status = rpc_call_sync(NFS_CLIENT(dir), &data->msg, 0); 2652 int status = nfs4_call_sync(NFS_SERVER(dir), &data->msg,
2653 &data->arg, &data->res, 1);
2239 if (status == 0) { 2654 if (status == 0) {
2240 update_changeattr(dir, &data->res.dir_cinfo); 2655 update_changeattr(dir, &data->res.dir_cinfo);
2241 nfs_post_op_update_inode(dir, data->res.dir_fattr); 2656 nfs_post_op_update_inode(dir, data->res.dir_fattr);
@@ -2344,7 +2759,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
2344 (unsigned long long)cookie); 2759 (unsigned long long)cookie);
2345 nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); 2760 nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args);
2346 res.pgbase = args.pgbase; 2761 res.pgbase = args.pgbase;
2347 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 2762 status = nfs4_call_sync(NFS_SERVER(dir), &msg, &args, &res, 0);
2348 if (status == 0) 2763 if (status == 0)
2349 memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); 2764 memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE);
2350 2765
@@ -2422,14 +2837,17 @@ static int _nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
2422 .fh = fhandle, 2837 .fh = fhandle,
2423 .bitmask = server->attr_bitmask, 2838 .bitmask = server->attr_bitmask,
2424 }; 2839 };
2840 struct nfs4_statfs_res res = {
2841 .fsstat = fsstat,
2842 };
2425 struct rpc_message msg = { 2843 struct rpc_message msg = {
2426 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_STATFS], 2844 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_STATFS],
2427 .rpc_argp = &args, 2845 .rpc_argp = &args,
2428 .rpc_resp = fsstat, 2846 .rpc_resp = &res,
2429 }; 2847 };
2430 2848
2431 nfs_fattr_init(fsstat->fattr); 2849 nfs_fattr_init(fsstat->fattr);
2432 return rpc_call_sync(server->client, &msg, 0); 2850 return nfs4_call_sync(server, &msg, &args, &res, 0);
2433} 2851}
2434 2852
2435static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat) 2853static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat)
@@ -2451,13 +2869,16 @@ static int _nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
2451 .fh = fhandle, 2869 .fh = fhandle,
2452 .bitmask = server->attr_bitmask, 2870 .bitmask = server->attr_bitmask,
2453 }; 2871 };
2872 struct nfs4_fsinfo_res res = {
2873 .fsinfo = fsinfo,
2874 };
2454 struct rpc_message msg = { 2875 struct rpc_message msg = {
2455 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FSINFO], 2876 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FSINFO],
2456 .rpc_argp = &args, 2877 .rpc_argp = &args,
2457 .rpc_resp = fsinfo, 2878 .rpc_resp = &res,
2458 }; 2879 };
2459 2880
2460 return rpc_call_sync(server->client, &msg, 0); 2881 return nfs4_call_sync(server, &msg, &args, &res, 0);
2461} 2882}
2462 2883
2463static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) 2884static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo)
@@ -2486,10 +2907,13 @@ static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle
2486 .fh = fhandle, 2907 .fh = fhandle,
2487 .bitmask = server->attr_bitmask, 2908 .bitmask = server->attr_bitmask,
2488 }; 2909 };
2910 struct nfs4_pathconf_res res = {
2911 .pathconf = pathconf,
2912 };
2489 struct rpc_message msg = { 2913 struct rpc_message msg = {
2490 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PATHCONF], 2914 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PATHCONF],
2491 .rpc_argp = &args, 2915 .rpc_argp = &args,
2492 .rpc_resp = pathconf, 2916 .rpc_resp = &res,
2493 }; 2917 };
2494 2918
2495 /* None of the pathconf attributes are mandatory to implement */ 2919 /* None of the pathconf attributes are mandatory to implement */
@@ -2499,7 +2923,7 @@ static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle
2499 } 2923 }
2500 2924
2501 nfs_fattr_init(pathconf->fattr); 2925 nfs_fattr_init(pathconf->fattr);
2502 return rpc_call_sync(server->client, &msg, 0); 2926 return nfs4_call_sync(server, &msg, &args, &res, 0);
2503} 2927}
2504 2928
2505static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, 2929static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
@@ -2520,8 +2944,13 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
2520{ 2944{
2521 struct nfs_server *server = NFS_SERVER(data->inode); 2945 struct nfs_server *server = NFS_SERVER(data->inode);
2522 2946
2947 dprintk("--> %s\n", __func__);
2948
2949 /* nfs4_sequence_free_slot called in the read rpc_call_done */
2950 nfs4_sequence_done(server, &data->res.seq_res, task->tk_status);
2951
2523 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { 2952 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
2524 rpc_restart_call(task); 2953 nfs4_restart_rpc(task, server->nfs_client);
2525 return -EAGAIN; 2954 return -EAGAIN;
2526 } 2955 }
2527 2956
@@ -2541,8 +2970,12 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
2541{ 2970{
2542 struct inode *inode = data->inode; 2971 struct inode *inode = data->inode;
2543 2972
2973 /* slot is freed in nfs_writeback_done */
2974 nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res,
2975 task->tk_status);
2976
2544 if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { 2977 if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
2545 rpc_restart_call(task); 2978 nfs4_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
2546 return -EAGAIN; 2979 return -EAGAIN;
2547 } 2980 }
2548 if (task->tk_status >= 0) { 2981 if (task->tk_status >= 0) {
@@ -2567,10 +3000,14 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
2567{ 3000{
2568 struct inode *inode = data->inode; 3001 struct inode *inode = data->inode;
2569 3002
3003 nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res,
3004 task->tk_status);
2570 if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { 3005 if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) {
2571 rpc_restart_call(task); 3006 nfs4_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
2572 return -EAGAIN; 3007 return -EAGAIN;
2573 } 3008 }
3009 nfs4_sequence_free_slot(NFS_SERVER(inode)->nfs_client,
3010 &data->res.seq_res);
2574 nfs_refresh_inode(inode, data->res.fattr); 3011 nfs_refresh_inode(inode, data->res.fattr);
2575 return 0; 3012 return 0;
2576} 3013}
@@ -2603,6 +3040,9 @@ static void nfs4_renew_done(struct rpc_task *task, void *data)
2603 if (time_before(clp->cl_last_renewal,timestamp)) 3040 if (time_before(clp->cl_last_renewal,timestamp))
2604 clp->cl_last_renewal = timestamp; 3041 clp->cl_last_renewal = timestamp;
2605 spin_unlock(&clp->cl_lock); 3042 spin_unlock(&clp->cl_lock);
3043 dprintk("%s calling put_rpccred on rpc_cred %p\n", __func__,
3044 task->tk_msg.rpc_cred);
3045 put_rpccred(task->tk_msg.rpc_cred);
2606} 3046}
2607 3047
2608static const struct rpc_call_ops nfs4_renew_ops = { 3048static const struct rpc_call_ops nfs4_renew_ops = {
@@ -2742,12 +3182,14 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
2742 .acl_pages = pages, 3182 .acl_pages = pages,
2743 .acl_len = buflen, 3183 .acl_len = buflen,
2744 }; 3184 };
2745 size_t resp_len = buflen; 3185 struct nfs_getaclres res = {
3186 .acl_len = buflen,
3187 };
2746 void *resp_buf; 3188 void *resp_buf;
2747 struct rpc_message msg = { 3189 struct rpc_message msg = {
2748 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL], 3190 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL],
2749 .rpc_argp = &args, 3191 .rpc_argp = &args,
2750 .rpc_resp = &resp_len, 3192 .rpc_resp = &res,
2751 }; 3193 };
2752 struct page *localpage = NULL; 3194 struct page *localpage = NULL;
2753 int ret; 3195 int ret;
@@ -2761,26 +3203,26 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
2761 return -ENOMEM; 3203 return -ENOMEM;
2762 args.acl_pages[0] = localpage; 3204 args.acl_pages[0] = localpage;
2763 args.acl_pgbase = 0; 3205 args.acl_pgbase = 0;
2764 resp_len = args.acl_len = PAGE_SIZE; 3206 args.acl_len = PAGE_SIZE;
2765 } else { 3207 } else {
2766 resp_buf = buf; 3208 resp_buf = buf;
2767 buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase); 3209 buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase);
2768 } 3210 }
2769 ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); 3211 ret = nfs4_call_sync(NFS_SERVER(inode), &msg, &args, &res, 0);
2770 if (ret) 3212 if (ret)
2771 goto out_free; 3213 goto out_free;
2772 if (resp_len > args.acl_len) 3214 if (res.acl_len > args.acl_len)
2773 nfs4_write_cached_acl(inode, NULL, resp_len); 3215 nfs4_write_cached_acl(inode, NULL, res.acl_len);
2774 else 3216 else
2775 nfs4_write_cached_acl(inode, resp_buf, resp_len); 3217 nfs4_write_cached_acl(inode, resp_buf, res.acl_len);
2776 if (buf) { 3218 if (buf) {
2777 ret = -ERANGE; 3219 ret = -ERANGE;
2778 if (resp_len > buflen) 3220 if (res.acl_len > buflen)
2779 goto out_free; 3221 goto out_free;
2780 if (localpage) 3222 if (localpage)
2781 memcpy(buf, resp_buf, resp_len); 3223 memcpy(buf, resp_buf, res.acl_len);
2782 } 3224 }
2783 ret = resp_len; 3225 ret = res.acl_len;
2784out_free: 3226out_free:
2785 if (localpage) 3227 if (localpage)
2786 __free_page(localpage); 3228 __free_page(localpage);
@@ -2810,8 +3252,6 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
2810 ret = nfs_revalidate_inode(server, inode); 3252 ret = nfs_revalidate_inode(server, inode);
2811 if (ret < 0) 3253 if (ret < 0)
2812 return ret; 3254 return ret;
2813 if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL)
2814 nfs_zap_acl_cache(inode);
2815 ret = nfs4_read_cached_acl(inode, buf, buflen); 3255 ret = nfs4_read_cached_acl(inode, buf, buflen);
2816 if (ret != -ENOENT) 3256 if (ret != -ENOENT)
2817 return ret; 3257 return ret;
@@ -2827,10 +3267,11 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
2827 .acl_pages = pages, 3267 .acl_pages = pages,
2828 .acl_len = buflen, 3268 .acl_len = buflen,
2829 }; 3269 };
3270 struct nfs_setaclres res;
2830 struct rpc_message msg = { 3271 struct rpc_message msg = {
2831 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETACL], 3272 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETACL],
2832 .rpc_argp = &arg, 3273 .rpc_argp = &arg,
2833 .rpc_resp = NULL, 3274 .rpc_resp = &res,
2834 }; 3275 };
2835 int ret; 3276 int ret;
2836 3277
@@ -2838,7 +3279,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
2838 return -EOPNOTSUPP; 3279 return -EOPNOTSUPP;
2839 nfs_inode_return_delegation(inode); 3280 nfs_inode_return_delegation(inode);
2840 buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); 3281 buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
2841 ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); 3282 ret = nfs4_call_sync(server, &msg, &arg, &res, 1);
2842 nfs_access_zap_cache(inode); 3283 nfs_access_zap_cache(inode);
2843 nfs_zap_acl_cache(inode); 3284 nfs_zap_acl_cache(inode);
2844 return ret; 3285 return ret;
@@ -2857,10 +3298,8 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
2857} 3298}
2858 3299
2859static int 3300static int
2860nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state) 3301_nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs_client *clp, struct nfs4_state *state)
2861{ 3302{
2862 struct nfs_client *clp = server->nfs_client;
2863
2864 if (!clp || task->tk_status >= 0) 3303 if (!clp || task->tk_status >= 0)
2865 return 0; 3304 return 0;
2866 switch(task->tk_status) { 3305 switch(task->tk_status) {
@@ -2879,8 +3318,23 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
2879 rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); 3318 rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
2880 task->tk_status = 0; 3319 task->tk_status = 0;
2881 return -EAGAIN; 3320 return -EAGAIN;
3321#if defined(CONFIG_NFS_V4_1)
3322 case -NFS4ERR_BADSESSION:
3323 case -NFS4ERR_BADSLOT:
3324 case -NFS4ERR_BAD_HIGH_SLOT:
3325 case -NFS4ERR_DEADSESSION:
3326 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
3327 case -NFS4ERR_SEQ_FALSE_RETRY:
3328 case -NFS4ERR_SEQ_MISORDERED:
3329 dprintk("%s ERROR %d, Reset session\n", __func__,
3330 task->tk_status);
3331 set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state);
3332 task->tk_status = 0;
3333 return -EAGAIN;
3334#endif /* CONFIG_NFS_V4_1 */
2882 case -NFS4ERR_DELAY: 3335 case -NFS4ERR_DELAY:
2883 nfs_inc_server_stats(server, NFSIOS_DELAY); 3336 if (server)
3337 nfs_inc_server_stats(server, NFSIOS_DELAY);
2884 case -NFS4ERR_GRACE: 3338 case -NFS4ERR_GRACE:
2885 rpc_delay(task, NFS4_POLL_RETRY_MAX); 3339 rpc_delay(task, NFS4_POLL_RETRY_MAX);
2886 task->tk_status = 0; 3340 task->tk_status = 0;
@@ -2893,6 +3347,12 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
2893 return 0; 3347 return 0;
2894} 3348}
2895 3349
3350static int
3351nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state)
3352{
3353 return _nfs4_async_handle_error(task, server, server->nfs_client, state);
3354}
3355
2896int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred) 3356int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred)
2897{ 3357{
2898 nfs4_verifier sc_verifier; 3358 nfs4_verifier sc_verifier;
@@ -3000,6 +3460,10 @@ struct nfs4_delegreturndata {
3000static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) 3460static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
3001{ 3461{
3002 struct nfs4_delegreturndata *data = calldata; 3462 struct nfs4_delegreturndata *data = calldata;
3463
3464 nfs4_sequence_done_free_slot(data->res.server, &data->res.seq_res,
3465 task->tk_status);
3466
3003 data->rpc_status = task->tk_status; 3467 data->rpc_status = task->tk_status;
3004 if (data->rpc_status == 0) 3468 if (data->rpc_status == 0)
3005 renew_lease(data->res.server, data->timestamp); 3469 renew_lease(data->res.server, data->timestamp);
@@ -3010,7 +3474,25 @@ static void nfs4_delegreturn_release(void *calldata)
3010 kfree(calldata); 3474 kfree(calldata);
3011} 3475}
3012 3476
3477#if defined(CONFIG_NFS_V4_1)
3478static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
3479{
3480 struct nfs4_delegreturndata *d_data;
3481
3482 d_data = (struct nfs4_delegreturndata *)data;
3483
3484 if (nfs4_setup_sequence(d_data->res.server->nfs_client,
3485 &d_data->args.seq_args,
3486 &d_data->res.seq_res, 1, task))
3487 return;
3488 rpc_call_start(task);
3489}
3490#endif /* CONFIG_NFS_V4_1 */
3491
3013static const struct rpc_call_ops nfs4_delegreturn_ops = { 3492static const struct rpc_call_ops nfs4_delegreturn_ops = {
3493#if defined(CONFIG_NFS_V4_1)
3494 .rpc_call_prepare = nfs4_delegreturn_prepare,
3495#endif /* CONFIG_NFS_V4_1 */
3014 .rpc_call_done = nfs4_delegreturn_done, 3496 .rpc_call_done = nfs4_delegreturn_done,
3015 .rpc_release = nfs4_delegreturn_release, 3497 .rpc_release = nfs4_delegreturn_release,
3016}; 3498};
@@ -3032,7 +3514,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
3032 }; 3514 };
3033 int status = 0; 3515 int status = 0;
3034 3516
3035 data = kmalloc(sizeof(*data), GFP_KERNEL); 3517 data = kzalloc(sizeof(*data), GFP_KERNEL);
3036 if (data == NULL) 3518 if (data == NULL)
3037 return -ENOMEM; 3519 return -ENOMEM;
3038 data->args.fhandle = &data->fh; 3520 data->args.fhandle = &data->fh;
@@ -3042,6 +3524,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
3042 memcpy(&data->stateid, stateid, sizeof(data->stateid)); 3524 memcpy(&data->stateid, stateid, sizeof(data->stateid));
3043 data->res.fattr = &data->fattr; 3525 data->res.fattr = &data->fattr;
3044 data->res.server = server; 3526 data->res.server = server;
3527 data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
3045 nfs_fattr_init(data->res.fattr); 3528 nfs_fattr_init(data->res.fattr);
3046 data->timestamp = jiffies; 3529 data->timestamp = jiffies;
3047 data->rpc_status = 0; 3530 data->rpc_status = 0;
@@ -3127,7 +3610,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
3127 goto out; 3610 goto out;
3128 lsp = request->fl_u.nfs4_fl.owner; 3611 lsp = request->fl_u.nfs4_fl.owner;
3129 arg.lock_owner.id = lsp->ls_id.id; 3612 arg.lock_owner.id = lsp->ls_id.id;
3130 status = rpc_call_sync(server->client, &msg, 0); 3613 status = nfs4_call_sync(server, &msg, &arg, &res, 1);
3131 switch (status) { 3614 switch (status) {
3132 case 0: 3615 case 0:
3133 request->fl_type = F_UNLCK; 3616 request->fl_type = F_UNLCK;
@@ -3187,13 +3670,14 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
3187 struct nfs4_unlockdata *p; 3670 struct nfs4_unlockdata *p;
3188 struct inode *inode = lsp->ls_state->inode; 3671 struct inode *inode = lsp->ls_state->inode;
3189 3672
3190 p = kmalloc(sizeof(*p), GFP_KERNEL); 3673 p = kzalloc(sizeof(*p), GFP_KERNEL);
3191 if (p == NULL) 3674 if (p == NULL)
3192 return NULL; 3675 return NULL;
3193 p->arg.fh = NFS_FH(inode); 3676 p->arg.fh = NFS_FH(inode);
3194 p->arg.fl = &p->fl; 3677 p->arg.fl = &p->fl;
3195 p->arg.seqid = seqid; 3678 p->arg.seqid = seqid;
3196 p->res.seqid = seqid; 3679 p->res.seqid = seqid;
3680 p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
3197 p->arg.stateid = &lsp->ls_stateid; 3681 p->arg.stateid = &lsp->ls_stateid;
3198 p->lsp = lsp; 3682 p->lsp = lsp;
3199 atomic_inc(&lsp->ls_count); 3683 atomic_inc(&lsp->ls_count);
@@ -3217,6 +3701,8 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
3217{ 3701{
3218 struct nfs4_unlockdata *calldata = data; 3702 struct nfs4_unlockdata *calldata = data;
3219 3703
3704 nfs4_sequence_done(calldata->server, &calldata->res.seq_res,
3705 task->tk_status);
3220 if (RPC_ASSASSINATED(task)) 3706 if (RPC_ASSASSINATED(task))
3221 return; 3707 return;
3222 switch (task->tk_status) { 3708 switch (task->tk_status) {
@@ -3233,8 +3719,11 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
3233 break; 3719 break;
3234 default: 3720 default:
3235 if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN) 3721 if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN)
3236 rpc_restart_call(task); 3722 nfs4_restart_rpc(task,
3723 calldata->server->nfs_client);
3237 } 3724 }
3725 nfs4_sequence_free_slot(calldata->server->nfs_client,
3726 &calldata->res.seq_res);
3238} 3727}
3239 3728
3240static void nfs4_locku_prepare(struct rpc_task *task, void *data) 3729static void nfs4_locku_prepare(struct rpc_task *task, void *data)
@@ -3249,6 +3738,10 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
3249 return; 3738 return;
3250 } 3739 }
3251 calldata->timestamp = jiffies; 3740 calldata->timestamp = jiffies;
3741 if (nfs4_setup_sequence(calldata->server->nfs_client,
3742 &calldata->arg.seq_args,
3743 &calldata->res.seq_res, 1, task))
3744 return;
3252 rpc_call_start(task); 3745 rpc_call_start(task);
3253} 3746}
3254 3747
@@ -3341,6 +3834,7 @@ struct nfs4_lockdata {
3341 unsigned long timestamp; 3834 unsigned long timestamp;
3342 int rpc_status; 3835 int rpc_status;
3343 int cancelled; 3836 int cancelled;
3837 struct nfs_server *server;
3344}; 3838};
3345 3839
3346static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, 3840static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
@@ -3366,7 +3860,9 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
3366 p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; 3860 p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
3367 p->arg.lock_owner.id = lsp->ls_id.id; 3861 p->arg.lock_owner.id = lsp->ls_id.id;
3368 p->res.lock_seqid = p->arg.lock_seqid; 3862 p->res.lock_seqid = p->arg.lock_seqid;
3863 p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
3369 p->lsp = lsp; 3864 p->lsp = lsp;
3865 p->server = server;
3370 atomic_inc(&lsp->ls_count); 3866 atomic_inc(&lsp->ls_count);
3371 p->ctx = get_nfs_open_context(ctx); 3867 p->ctx = get_nfs_open_context(ctx);
3372 memcpy(&p->fl, fl, sizeof(p->fl)); 3868 memcpy(&p->fl, fl, sizeof(p->fl));
@@ -3396,6 +3892,9 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
3396 } else 3892 } else
3397 data->arg.new_lock_owner = 0; 3893 data->arg.new_lock_owner = 0;
3398 data->timestamp = jiffies; 3894 data->timestamp = jiffies;
3895 if (nfs4_setup_sequence(data->server->nfs_client, &data->arg.seq_args,
3896 &data->res.seq_res, 1, task))
3897 return;
3399 rpc_call_start(task); 3898 rpc_call_start(task);
3400 dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); 3899 dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status);
3401} 3900}
@@ -3406,6 +3905,9 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
3406 3905
3407 dprintk("%s: begin!\n", __func__); 3906 dprintk("%s: begin!\n", __func__);
3408 3907
3908 nfs4_sequence_done_free_slot(data->server, &data->res.seq_res,
3909 task->tk_status);
3910
3409 data->rpc_status = task->tk_status; 3911 data->rpc_status = task->tk_status;
3410 if (RPC_ASSASSINATED(task)) 3912 if (RPC_ASSASSINATED(task))
3411 goto out; 3913 goto out;
@@ -3487,8 +3989,6 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
3487 ret = nfs4_wait_for_completion_rpc_task(task); 3989 ret = nfs4_wait_for_completion_rpc_task(task);
3488 if (ret == 0) { 3990 if (ret == 0) {
3489 ret = data->rpc_status; 3991 ret = data->rpc_status;
3490 if (ret == -NFS4ERR_DENIED)
3491 ret = -EAGAIN;
3492 } else 3992 } else
3493 data->cancelled = 1; 3993 data->cancelled = 1;
3494 rpc_put_task(task); 3994 rpc_put_task(task);
@@ -3576,9 +4076,11 @@ static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *
3576 int err; 4076 int err;
3577 4077
3578 do { 4078 do {
4079 err = _nfs4_proc_setlk(state, cmd, request);
4080 if (err == -NFS4ERR_DENIED)
4081 err = -EAGAIN;
3579 err = nfs4_handle_exception(NFS_SERVER(state->inode), 4082 err = nfs4_handle_exception(NFS_SERVER(state->inode),
3580 _nfs4_proc_setlk(state, cmd, request), 4083 err, &exception);
3581 &exception);
3582 } while (exception.retry); 4084 } while (exception.retry);
3583 return err; 4085 return err;
3584} 4086}
@@ -3630,8 +4132,37 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
3630 goto out; 4132 goto out;
3631 do { 4133 do {
3632 err = _nfs4_do_setlk(state, F_SETLK, fl, 0); 4134 err = _nfs4_do_setlk(state, F_SETLK, fl, 0);
3633 if (err != -NFS4ERR_DELAY) 4135 switch (err) {
3634 break; 4136 default:
4137 printk(KERN_ERR "%s: unhandled error %d.\n",
4138 __func__, err);
4139 case 0:
4140 case -ESTALE:
4141 goto out;
4142 case -NFS4ERR_EXPIRED:
4143 case -NFS4ERR_STALE_CLIENTID:
4144 case -NFS4ERR_STALE_STATEID:
4145 nfs4_schedule_state_recovery(server->nfs_client);
4146 goto out;
4147 case -ERESTARTSYS:
4148 /*
4149 * The show must go on: exit, but mark the
4150 * stateid as needing recovery.
4151 */
4152 case -NFS4ERR_ADMIN_REVOKED:
4153 case -NFS4ERR_BAD_STATEID:
4154 case -NFS4ERR_OPENMODE:
4155 nfs4_state_mark_reclaim_nograce(server->nfs_client, state);
4156 err = 0;
4157 goto out;
4158 case -ENOMEM:
4159 case -NFS4ERR_DENIED:
4160 /* kill_proc(fl->fl_pid, SIGLOST, 1); */
4161 err = 0;
4162 goto out;
4163 case -NFS4ERR_DELAY:
4164 break;
4165 }
3635 err = nfs4_handle_exception(server, err, &exception); 4166 err = nfs4_handle_exception(server, err, &exception);
3636 } while (exception.retry); 4167 } while (exception.retry);
3637out: 4168out:
@@ -3706,10 +4237,13 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
3706 .page = page, 4237 .page = page,
3707 .bitmask = bitmask, 4238 .bitmask = bitmask,
3708 }; 4239 };
4240 struct nfs4_fs_locations_res res = {
4241 .fs_locations = fs_locations,
4242 };
3709 struct rpc_message msg = { 4243 struct rpc_message msg = {
3710 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FS_LOCATIONS], 4244 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FS_LOCATIONS],
3711 .rpc_argp = &args, 4245 .rpc_argp = &args,
3712 .rpc_resp = fs_locations, 4246 .rpc_resp = &res,
3713 }; 4247 };
3714 int status; 4248 int status;
3715 4249
@@ -3717,24 +4251,720 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
3717 nfs_fattr_init(&fs_locations->fattr); 4251 nfs_fattr_init(&fs_locations->fattr);
3718 fs_locations->server = server; 4252 fs_locations->server = server;
3719 fs_locations->nlocations = 0; 4253 fs_locations->nlocations = 0;
3720 status = rpc_call_sync(server->client, &msg, 0); 4254 status = nfs4_call_sync(server, &msg, &args, &res, 0);
3721 nfs_fixup_referral_attributes(&fs_locations->fattr); 4255 nfs_fixup_referral_attributes(&fs_locations->fattr);
3722 dprintk("%s: returned status = %d\n", __func__, status); 4256 dprintk("%s: returned status = %d\n", __func__, status);
3723 return status; 4257 return status;
3724} 4258}
3725 4259
3726struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = { 4260#ifdef CONFIG_NFS_V4_1
4261/*
4262 * nfs4_proc_exchange_id()
4263 *
4264 * Since the clientid has expired, all compounds using sessions
4265 * associated with the stale clientid will be returning
4266 * NFS4ERR_BADSESSION in the sequence operation, and will therefore
4267 * be in some phase of session reset.
4268 */
4269static int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
4270{
4271 nfs4_verifier verifier;
4272 struct nfs41_exchange_id_args args = {
4273 .client = clp,
4274 .flags = clp->cl_exchange_flags,
4275 };
4276 struct nfs41_exchange_id_res res = {
4277 .client = clp,
4278 };
4279 int status;
4280 struct rpc_message msg = {
4281 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_EXCHANGE_ID],
4282 .rpc_argp = &args,
4283 .rpc_resp = &res,
4284 .rpc_cred = cred,
4285 };
4286 __be32 *p;
4287
4288 dprintk("--> %s\n", __func__);
4289 BUG_ON(clp == NULL);
4290
4291 p = (u32 *)verifier.data;
4292 *p++ = htonl((u32)clp->cl_boot_time.tv_sec);
4293 *p = htonl((u32)clp->cl_boot_time.tv_nsec);
4294 args.verifier = &verifier;
4295
4296 while (1) {
4297 args.id_len = scnprintf(args.id, sizeof(args.id),
4298 "%s/%s %u",
4299 clp->cl_ipaddr,
4300 rpc_peeraddr2str(clp->cl_rpcclient,
4301 RPC_DISPLAY_ADDR),
4302 clp->cl_id_uniquifier);
4303
4304 status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
4305
4306 if (status != NFS4ERR_CLID_INUSE)
4307 break;
4308
4309 if (signalled())
4310 break;
4311
4312 if (++clp->cl_id_uniquifier == 0)
4313 break;
4314 }
4315
4316 dprintk("<-- %s status= %d\n", __func__, status);
4317 return status;
4318}
4319
4320struct nfs4_get_lease_time_data {
4321 struct nfs4_get_lease_time_args *args;
4322 struct nfs4_get_lease_time_res *res;
4323 struct nfs_client *clp;
4324};
4325
4326static void nfs4_get_lease_time_prepare(struct rpc_task *task,
4327 void *calldata)
4328{
4329 int ret;
4330 struct nfs4_get_lease_time_data *data =
4331 (struct nfs4_get_lease_time_data *)calldata;
4332
4333 dprintk("--> %s\n", __func__);
4334 /* just setup sequence, do not trigger session recovery
4335 since we're invoked within one */
4336 ret = nfs41_setup_sequence(data->clp->cl_session,
4337 &data->args->la_seq_args,
4338 &data->res->lr_seq_res, 0, task);
4339
4340 BUG_ON(ret == -EAGAIN);
4341 rpc_call_start(task);
4342 dprintk("<-- %s\n", __func__);
4343}
4344
4345/*
4346 * Called from nfs4_state_manager thread for session setup, so don't recover
4347 * from sequence operation or clientid errors.
4348 */
4349static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata)
4350{
4351 struct nfs4_get_lease_time_data *data =
4352 (struct nfs4_get_lease_time_data *)calldata;
4353
4354 dprintk("--> %s\n", __func__);
4355 nfs41_sequence_done(data->clp, &data->res->lr_seq_res, task->tk_status);
4356 switch (task->tk_status) {
4357 case -NFS4ERR_DELAY:
4358 case -NFS4ERR_GRACE:
4359 dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status);
4360 rpc_delay(task, NFS4_POLL_RETRY_MIN);
4361 task->tk_status = 0;
4362 nfs4_restart_rpc(task, data->clp);
4363 return;
4364 }
4365 nfs41_sequence_free_slot(data->clp, &data->res->lr_seq_res);
4366 dprintk("<-- %s\n", __func__);
4367}
4368
4369struct rpc_call_ops nfs4_get_lease_time_ops = {
4370 .rpc_call_prepare = nfs4_get_lease_time_prepare,
4371 .rpc_call_done = nfs4_get_lease_time_done,
4372};
4373
4374int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
4375{
4376 struct rpc_task *task;
4377 struct nfs4_get_lease_time_args args;
4378 struct nfs4_get_lease_time_res res = {
4379 .lr_fsinfo = fsinfo,
4380 };
4381 struct nfs4_get_lease_time_data data = {
4382 .args = &args,
4383 .res = &res,
4384 .clp = clp,
4385 };
4386 struct rpc_message msg = {
4387 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GET_LEASE_TIME],
4388 .rpc_argp = &args,
4389 .rpc_resp = &res,
4390 };
4391 struct rpc_task_setup task_setup = {
4392 .rpc_client = clp->cl_rpcclient,
4393 .rpc_message = &msg,
4394 .callback_ops = &nfs4_get_lease_time_ops,
4395 .callback_data = &data
4396 };
4397 int status;
4398
4399 res.lr_seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
4400 dprintk("--> %s\n", __func__);
4401 task = rpc_run_task(&task_setup);
4402
4403 if (IS_ERR(task))
4404 status = PTR_ERR(task);
4405 else {
4406 status = task->tk_status;
4407 rpc_put_task(task);
4408 }
4409 dprintk("<-- %s return %d\n", __func__, status);
4410
4411 return status;
4412}
4413
4414/*
4415 * Reset a slot table
4416 */
4417static int nfs4_reset_slot_table(struct nfs4_slot_table *tbl, int max_slots,
4418 int old_max_slots, int ivalue)
4419{
4420 int i;
4421 int ret = 0;
4422
4423 dprintk("--> %s: max_reqs=%u, tbl %p\n", __func__, max_slots, tbl);
4424
4425 /*
4426 * Until we have dynamic slot table adjustment, insist
4427 * upon the same slot table size
4428 */
4429 if (max_slots != old_max_slots) {
4430 dprintk("%s reset slot table does't match old\n",
4431 __func__);
4432 ret = -EINVAL; /*XXX NFS4ERR_REQ_TOO_BIG ? */
4433 goto out;
4434 }
4435 spin_lock(&tbl->slot_tbl_lock);
4436 for (i = 0; i < max_slots; ++i)
4437 tbl->slots[i].seq_nr = ivalue;
4438 tbl->highest_used_slotid = -1;
4439 spin_unlock(&tbl->slot_tbl_lock);
4440 dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__,
4441 tbl, tbl->slots, tbl->max_slots);
4442out:
4443 dprintk("<-- %s: return %d\n", __func__, ret);
4444 return ret;
4445}
4446
4447/*
4448 * Reset the forechannel and backchannel slot tables
4449 */
4450static int nfs4_reset_slot_tables(struct nfs4_session *session)
4451{
4452 int status;
4453
4454 status = nfs4_reset_slot_table(&session->fc_slot_table,
4455 session->fc_attrs.max_reqs,
4456 session->fc_slot_table.max_slots,
4457 1);
4458 if (status)
4459 return status;
4460
4461 status = nfs4_reset_slot_table(&session->bc_slot_table,
4462 session->bc_attrs.max_reqs,
4463 session->bc_slot_table.max_slots,
4464 0);
4465 return status;
4466}
4467
4468/* Destroy the slot table */
4469static void nfs4_destroy_slot_tables(struct nfs4_session *session)
4470{
4471 if (session->fc_slot_table.slots != NULL) {
4472 kfree(session->fc_slot_table.slots);
4473 session->fc_slot_table.slots = NULL;
4474 }
4475 if (session->bc_slot_table.slots != NULL) {
4476 kfree(session->bc_slot_table.slots);
4477 session->bc_slot_table.slots = NULL;
4478 }
4479 return;
4480}
4481
4482/*
4483 * Initialize slot table
4484 */
4485static int nfs4_init_slot_table(struct nfs4_slot_table *tbl,
4486 int max_slots, int ivalue)
4487{
4488 int i;
4489 struct nfs4_slot *slot;
4490 int ret = -ENOMEM;
4491
4492 BUG_ON(max_slots > NFS4_MAX_SLOT_TABLE);
4493
4494 dprintk("--> %s: max_reqs=%u\n", __func__, max_slots);
4495
4496 slot = kcalloc(max_slots, sizeof(struct nfs4_slot), GFP_KERNEL);
4497 if (!slot)
4498 goto out;
4499 for (i = 0; i < max_slots; ++i)
4500 slot[i].seq_nr = ivalue;
4501 ret = 0;
4502
4503 spin_lock(&tbl->slot_tbl_lock);
4504 if (tbl->slots != NULL) {
4505 spin_unlock(&tbl->slot_tbl_lock);
4506 dprintk("%s: slot table already initialized. tbl=%p slots=%p\n",
4507 __func__, tbl, tbl->slots);
4508 WARN_ON(1);
4509 goto out_free;
4510 }
4511 tbl->max_slots = max_slots;
4512 tbl->slots = slot;
4513 tbl->highest_used_slotid = -1; /* no slot is currently used */
4514 spin_unlock(&tbl->slot_tbl_lock);
4515 dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__,
4516 tbl, tbl->slots, tbl->max_slots);
4517out:
4518 dprintk("<-- %s: return %d\n", __func__, ret);
4519 return ret;
4520
4521out_free:
4522 kfree(slot);
4523 goto out;
4524}
4525
4526/*
4527 * Initialize the forechannel and backchannel tables
4528 */
4529static int nfs4_init_slot_tables(struct nfs4_session *session)
4530{
4531 int status;
4532
4533 status = nfs4_init_slot_table(&session->fc_slot_table,
4534 session->fc_attrs.max_reqs, 1);
4535 if (status)
4536 return status;
4537
4538 status = nfs4_init_slot_table(&session->bc_slot_table,
4539 session->bc_attrs.max_reqs, 0);
4540 if (status)
4541 nfs4_destroy_slot_tables(session);
4542
4543 return status;
4544}
4545
4546struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
4547{
4548 struct nfs4_session *session;
4549 struct nfs4_slot_table *tbl;
4550
4551 session = kzalloc(sizeof(struct nfs4_session), GFP_KERNEL);
4552 if (!session)
4553 return NULL;
4554
4555 set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state);
4556 /*
4557 * The create session reply races with the server back
4558 * channel probe. Mark the client NFS_CS_SESSION_INITING
4559 * so that the client back channel can find the
4560 * nfs_client struct
4561 */
4562 clp->cl_cons_state = NFS_CS_SESSION_INITING;
4563
4564 tbl = &session->fc_slot_table;
4565 spin_lock_init(&tbl->slot_tbl_lock);
4566 rpc_init_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table");
4567
4568 tbl = &session->bc_slot_table;
4569 spin_lock_init(&tbl->slot_tbl_lock);
4570 rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table");
4571
4572 session->clp = clp;
4573 return session;
4574}
4575
4576void nfs4_destroy_session(struct nfs4_session *session)
4577{
4578 nfs4_proc_destroy_session(session);
4579 dprintk("%s Destroy backchannel for xprt %p\n",
4580 __func__, session->clp->cl_rpcclient->cl_xprt);
4581 xprt_destroy_backchannel(session->clp->cl_rpcclient->cl_xprt,
4582 NFS41_BC_MIN_CALLBACKS);
4583 nfs4_destroy_slot_tables(session);
4584 kfree(session);
4585}
4586
4587/*
4588 * Initialize the values to be used by the client in CREATE_SESSION
4589 * If nfs4_init_session set the fore channel request and response sizes,
4590 * use them.
4591 *
4592 * Set the back channel max_resp_sz_cached to zero to force the client to
4593 * always set csa_cachethis to FALSE because the current implementation
4594 * of the back channel DRC only supports caching the CB_SEQUENCE operation.
4595 */
4596static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args)
4597{
4598 struct nfs4_session *session = args->client->cl_session;
4599 unsigned int mxrqst_sz = session->fc_attrs.max_rqst_sz,
4600 mxresp_sz = session->fc_attrs.max_resp_sz;
4601
4602 if (mxrqst_sz == 0)
4603 mxrqst_sz = NFS_MAX_FILE_IO_SIZE;
4604 if (mxresp_sz == 0)
4605 mxresp_sz = NFS_MAX_FILE_IO_SIZE;
4606 /* Fore channel attributes */
4607 args->fc_attrs.headerpadsz = 0;
4608 args->fc_attrs.max_rqst_sz = mxrqst_sz;
4609 args->fc_attrs.max_resp_sz = mxresp_sz;
4610 args->fc_attrs.max_resp_sz_cached = mxresp_sz;
4611 args->fc_attrs.max_ops = NFS4_MAX_OPS;
4612 args->fc_attrs.max_reqs = session->clp->cl_rpcclient->cl_xprt->max_reqs;
4613
4614 dprintk("%s: Fore Channel : max_rqst_sz=%u max_resp_sz=%u "
4615 "max_resp_sz_cached=%u max_ops=%u max_reqs=%u\n",
4616 __func__,
4617 args->fc_attrs.max_rqst_sz, args->fc_attrs.max_resp_sz,
4618 args->fc_attrs.max_resp_sz_cached, args->fc_attrs.max_ops,
4619 args->fc_attrs.max_reqs);
4620
4621 /* Back channel attributes */
4622 args->bc_attrs.headerpadsz = 0;
4623 args->bc_attrs.max_rqst_sz = PAGE_SIZE;
4624 args->bc_attrs.max_resp_sz = PAGE_SIZE;
4625 args->bc_attrs.max_resp_sz_cached = 0;
4626 args->bc_attrs.max_ops = NFS4_MAX_BACK_CHANNEL_OPS;
4627 args->bc_attrs.max_reqs = 1;
4628
4629 dprintk("%s: Back Channel : max_rqst_sz=%u max_resp_sz=%u "
4630 "max_resp_sz_cached=%u max_ops=%u max_reqs=%u\n",
4631 __func__,
4632 args->bc_attrs.max_rqst_sz, args->bc_attrs.max_resp_sz,
4633 args->bc_attrs.max_resp_sz_cached, args->bc_attrs.max_ops,
4634 args->bc_attrs.max_reqs);
4635}
4636
4637static int _verify_channel_attr(char *chan, char *attr_name, u32 sent, u32 rcvd)
4638{
4639 if (rcvd <= sent)
4640 return 0;
4641 printk(KERN_WARNING "%s: Session INVALID: %s channel %s increased. "
4642 "sent=%u rcvd=%u\n", __func__, chan, attr_name, sent, rcvd);
4643 return -EINVAL;
4644}
4645
4646#define _verify_fore_channel_attr(_name_) \
4647 _verify_channel_attr("fore", #_name_, \
4648 args->fc_attrs._name_, \
4649 session->fc_attrs._name_)
4650
4651#define _verify_back_channel_attr(_name_) \
4652 _verify_channel_attr("back", #_name_, \
4653 args->bc_attrs._name_, \
4654 session->bc_attrs._name_)
4655
4656/*
4657 * The server is not allowed to increase the fore channel header pad size,
4658 * maximum response size, or maximum number of operations.
4659 *
4660 * The back channel attributes are only negotiatied down: We send what the
4661 * (back channel) server insists upon.
4662 */
4663static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args,
4664 struct nfs4_session *session)
4665{
4666 int ret = 0;
4667
4668 ret |= _verify_fore_channel_attr(headerpadsz);
4669 ret |= _verify_fore_channel_attr(max_resp_sz);
4670 ret |= _verify_fore_channel_attr(max_ops);
4671
4672 ret |= _verify_back_channel_attr(headerpadsz);
4673 ret |= _verify_back_channel_attr(max_rqst_sz);
4674 ret |= _verify_back_channel_attr(max_resp_sz);
4675 ret |= _verify_back_channel_attr(max_resp_sz_cached);
4676 ret |= _verify_back_channel_attr(max_ops);
4677 ret |= _verify_back_channel_attr(max_reqs);
4678
4679 return ret;
4680}
4681
4682static int _nfs4_proc_create_session(struct nfs_client *clp)
4683{
4684 struct nfs4_session *session = clp->cl_session;
4685 struct nfs41_create_session_args args = {
4686 .client = clp,
4687 .cb_program = NFS4_CALLBACK,
4688 };
4689 struct nfs41_create_session_res res = {
4690 .client = clp,
4691 };
4692 struct rpc_message msg = {
4693 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE_SESSION],
4694 .rpc_argp = &args,
4695 .rpc_resp = &res,
4696 };
4697 int status;
4698
4699 nfs4_init_channel_attrs(&args);
4700 args.flags = (SESSION4_PERSIST | SESSION4_BACK_CHAN);
4701
4702 status = rpc_call_sync(session->clp->cl_rpcclient, &msg, 0);
4703
4704 if (!status)
4705 /* Verify the session's negotiated channel_attrs values */
4706 status = nfs4_verify_channel_attrs(&args, session);
4707 if (!status) {
4708 /* Increment the clientid slot sequence id */
4709 clp->cl_seqid++;
4710 }
4711
4712 return status;
4713}
4714
4715/*
4716 * Issues a CREATE_SESSION operation to the server.
4717 * It is the responsibility of the caller to verify the session is
4718 * expired before calling this routine.
4719 */
4720int nfs4_proc_create_session(struct nfs_client *clp, int reset)
4721{
4722 int status;
4723 unsigned *ptr;
4724 struct nfs_fsinfo fsinfo;
4725 struct nfs4_session *session = clp->cl_session;
4726
4727 dprintk("--> %s clp=%p session=%p\n", __func__, clp, session);
4728
4729 status = _nfs4_proc_create_session(clp);
4730 if (status)
4731 goto out;
4732
4733 /* Init or reset the fore channel */
4734 if (reset)
4735 status = nfs4_reset_slot_tables(session);
4736 else
4737 status = nfs4_init_slot_tables(session);
4738 dprintk("fore channel slot table initialization returned %d\n", status);
4739 if (status)
4740 goto out;
4741
4742 ptr = (unsigned *)&session->sess_id.data[0];
4743 dprintk("%s client>seqid %d sessionid %u:%u:%u:%u\n", __func__,
4744 clp->cl_seqid, ptr[0], ptr[1], ptr[2], ptr[3]);
4745
4746 if (reset)
4747 /* Lease time is aleady set */
4748 goto out;
4749
4750 /* Get the lease time */
4751 status = nfs4_proc_get_lease_time(clp, &fsinfo);
4752 if (status == 0) {
4753 /* Update lease time and schedule renewal */
4754 spin_lock(&clp->cl_lock);
4755 clp->cl_lease_time = fsinfo.lease_time * HZ;
4756 clp->cl_last_renewal = jiffies;
4757 clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
4758 spin_unlock(&clp->cl_lock);
4759
4760 nfs4_schedule_state_renewal(clp);
4761 }
4762out:
4763 dprintk("<-- %s\n", __func__);
4764 return status;
4765}
4766
4767/*
4768 * Issue the over-the-wire RPC DESTROY_SESSION.
4769 * The caller must serialize access to this routine.
4770 */
4771int nfs4_proc_destroy_session(struct nfs4_session *session)
4772{
4773 int status = 0;
4774 struct rpc_message msg;
4775
4776 dprintk("--> nfs4_proc_destroy_session\n");
4777
4778 /* session is still being setup */
4779 if (session->clp->cl_cons_state != NFS_CS_READY)
4780 return status;
4781
4782 msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_SESSION];
4783 msg.rpc_argp = session;
4784 msg.rpc_resp = NULL;
4785 msg.rpc_cred = NULL;
4786 status = rpc_call_sync(session->clp->cl_rpcclient, &msg, 0);
4787
4788 if (status)
4789 printk(KERN_WARNING
4790 "Got error %d from the server on DESTROY_SESSION. "
4791 "Session has been destroyed regardless...\n", status);
4792
4793 dprintk("<-- nfs4_proc_destroy_session\n");
4794 return status;
4795}
4796
4797/*
4798 * Renew the cl_session lease.
4799 */
4800static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
4801{
4802 struct nfs4_sequence_args args;
4803 struct nfs4_sequence_res res;
4804
4805 struct rpc_message msg = {
4806 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE],
4807 .rpc_argp = &args,
4808 .rpc_resp = &res,
4809 .rpc_cred = cred,
4810 };
4811
4812 args.sa_cache_this = 0;
4813
4814 return nfs4_call_sync_sequence(clp, clp->cl_rpcclient, &msg, &args,
4815 &res, 0);
4816}
4817
4818void nfs41_sequence_call_done(struct rpc_task *task, void *data)
4819{
4820 struct nfs_client *clp = (struct nfs_client *)data;
4821
4822 nfs41_sequence_done(clp, task->tk_msg.rpc_resp, task->tk_status);
4823
4824 if (task->tk_status < 0) {
4825 dprintk("%s ERROR %d\n", __func__, task->tk_status);
4826
4827 if (_nfs4_async_handle_error(task, NULL, clp, NULL)
4828 == -EAGAIN) {
4829 nfs4_restart_rpc(task, clp);
4830 return;
4831 }
4832 }
4833 nfs41_sequence_free_slot(clp, task->tk_msg.rpc_resp);
4834 dprintk("%s rpc_cred %p\n", __func__, task->tk_msg.rpc_cred);
4835
4836 put_rpccred(task->tk_msg.rpc_cred);
4837 kfree(task->tk_msg.rpc_argp);
4838 kfree(task->tk_msg.rpc_resp);
4839
4840 dprintk("<-- %s\n", __func__);
4841}
4842
4843static void nfs41_sequence_prepare(struct rpc_task *task, void *data)
4844{
4845 struct nfs_client *clp;
4846 struct nfs4_sequence_args *args;
4847 struct nfs4_sequence_res *res;
4848
4849 clp = (struct nfs_client *)data;
4850 args = task->tk_msg.rpc_argp;
4851 res = task->tk_msg.rpc_resp;
4852
4853 if (nfs4_setup_sequence(clp, args, res, 0, task))
4854 return;
4855 rpc_call_start(task);
4856}
4857
4858static const struct rpc_call_ops nfs41_sequence_ops = {
4859 .rpc_call_done = nfs41_sequence_call_done,
4860 .rpc_call_prepare = nfs41_sequence_prepare,
4861};
4862
4863static int nfs41_proc_async_sequence(struct nfs_client *clp,
4864 struct rpc_cred *cred)
4865{
4866 struct nfs4_sequence_args *args;
4867 struct nfs4_sequence_res *res;
4868 struct rpc_message msg = {
4869 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE],
4870 .rpc_cred = cred,
4871 };
4872
4873 args = kzalloc(sizeof(*args), GFP_KERNEL);
4874 if (!args)
4875 return -ENOMEM;
4876 res = kzalloc(sizeof(*res), GFP_KERNEL);
4877 if (!res) {
4878 kfree(args);
4879 return -ENOMEM;
4880 }
4881 res->sr_slotid = NFS4_MAX_SLOT_TABLE;
4882 msg.rpc_argp = args;
4883 msg.rpc_resp = res;
4884
4885 return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT,
4886 &nfs41_sequence_ops, (void *)clp);
4887}
4888
4889#endif /* CONFIG_NFS_V4_1 */
4890
4891struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
3727 .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT, 4892 .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT,
3728 .state_flag_bit = NFS_STATE_RECLAIM_REBOOT, 4893 .state_flag_bit = NFS_STATE_RECLAIM_REBOOT,
3729 .recover_open = nfs4_open_reclaim, 4894 .recover_open = nfs4_open_reclaim,
3730 .recover_lock = nfs4_lock_reclaim, 4895 .recover_lock = nfs4_lock_reclaim,
4896 .establish_clid = nfs4_init_clientid,
4897 .get_clid_cred = nfs4_get_setclientid_cred,
4898};
4899
4900#if defined(CONFIG_NFS_V4_1)
4901struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = {
4902 .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT,
4903 .state_flag_bit = NFS_STATE_RECLAIM_REBOOT,
4904 .recover_open = nfs4_open_reclaim,
4905 .recover_lock = nfs4_lock_reclaim,
4906 .establish_clid = nfs4_proc_exchange_id,
4907 .get_clid_cred = nfs4_get_exchange_id_cred,
4908};
4909#endif /* CONFIG_NFS_V4_1 */
4910
4911struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = {
4912 .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE,
4913 .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE,
4914 .recover_open = nfs4_open_expired,
4915 .recover_lock = nfs4_lock_expired,
4916 .establish_clid = nfs4_init_clientid,
4917 .get_clid_cred = nfs4_get_setclientid_cred,
3731}; 4918};
3732 4919
3733struct nfs4_state_recovery_ops nfs4_nograce_recovery_ops = { 4920#if defined(CONFIG_NFS_V4_1)
4921struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = {
3734 .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, 4922 .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE,
3735 .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, 4923 .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE,
3736 .recover_open = nfs4_open_expired, 4924 .recover_open = nfs4_open_expired,
3737 .recover_lock = nfs4_lock_expired, 4925 .recover_lock = nfs4_lock_expired,
4926 .establish_clid = nfs4_proc_exchange_id,
4927 .get_clid_cred = nfs4_get_exchange_id_cred,
4928};
4929#endif /* CONFIG_NFS_V4_1 */
4930
4931struct nfs4_state_maintenance_ops nfs40_state_renewal_ops = {
4932 .sched_state_renewal = nfs4_proc_async_renew,
4933 .get_state_renewal_cred_locked = nfs4_get_renew_cred_locked,
4934 .renew_lease = nfs4_proc_renew,
4935};
4936
4937#if defined(CONFIG_NFS_V4_1)
4938struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = {
4939 .sched_state_renewal = nfs41_proc_async_sequence,
4940 .get_state_renewal_cred_locked = nfs4_get_machine_cred_locked,
4941 .renew_lease = nfs4_proc_sequence,
4942};
4943#endif
4944
4945/*
4946 * Per minor version reboot and network partition recovery ops
4947 */
4948
4949struct nfs4_state_recovery_ops *nfs4_reboot_recovery_ops[] = {
4950 &nfs40_reboot_recovery_ops,
4951#if defined(CONFIG_NFS_V4_1)
4952 &nfs41_reboot_recovery_ops,
4953#endif
4954};
4955
4956struct nfs4_state_recovery_ops *nfs4_nograce_recovery_ops[] = {
4957 &nfs40_nograce_recovery_ops,
4958#if defined(CONFIG_NFS_V4_1)
4959 &nfs41_nograce_recovery_ops,
4960#endif
4961};
4962
4963struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[] = {
4964 &nfs40_state_renewal_ops,
4965#if defined(CONFIG_NFS_V4_1)
4966 &nfs41_state_renewal_ops,
4967#endif
3738}; 4968};
3739 4969
3740static const struct inode_operations nfs4_file_inode_operations = { 4970static const struct inode_operations nfs4_file_inode_operations = {
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index f524e932ff7b..e27c6cef18f2 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -59,12 +59,14 @@
59void 59void
60nfs4_renew_state(struct work_struct *work) 60nfs4_renew_state(struct work_struct *work)
61{ 61{
62 struct nfs4_state_maintenance_ops *ops;
62 struct nfs_client *clp = 63 struct nfs_client *clp =
63 container_of(work, struct nfs_client, cl_renewd.work); 64 container_of(work, struct nfs_client, cl_renewd.work);
64 struct rpc_cred *cred; 65 struct rpc_cred *cred;
65 long lease, timeout; 66 long lease, timeout;
66 unsigned long last, now; 67 unsigned long last, now;
67 68
69 ops = nfs4_state_renewal_ops[clp->cl_minorversion];
68 dprintk("%s: start\n", __func__); 70 dprintk("%s: start\n", __func__);
69 /* Are there any active superblocks? */ 71 /* Are there any active superblocks? */
70 if (list_empty(&clp->cl_superblocks)) 72 if (list_empty(&clp->cl_superblocks))
@@ -76,7 +78,7 @@ nfs4_renew_state(struct work_struct *work)
76 timeout = (2 * lease) / 3 + (long)last - (long)now; 78 timeout = (2 * lease) / 3 + (long)last - (long)now;
77 /* Are we close to a lease timeout? */ 79 /* Are we close to a lease timeout? */
78 if (time_after(now, last + lease/3)) { 80 if (time_after(now, last + lease/3)) {
79 cred = nfs4_get_renew_cred_locked(clp); 81 cred = ops->get_state_renewal_cred_locked(clp);
80 spin_unlock(&clp->cl_lock); 82 spin_unlock(&clp->cl_lock);
81 if (cred == NULL) { 83 if (cred == NULL) {
82 if (list_empty(&clp->cl_delegations)) { 84 if (list_empty(&clp->cl_delegations)) {
@@ -86,7 +88,7 @@ nfs4_renew_state(struct work_struct *work)
86 nfs_expire_all_delegations(clp); 88 nfs_expire_all_delegations(clp);
87 } else { 89 } else {
88 /* Queue an asynchronous RENEW. */ 90 /* Queue an asynchronous RENEW. */
89 nfs4_proc_async_renew(clp, cred); 91 ops->sched_state_renewal(clp, cred);
90 put_rpccred(cred); 92 put_rpccred(cred);
91 } 93 }
92 timeout = (2 * lease) / 3; 94 timeout = (2 * lease) / 3;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 0298e909559f..b73c5a728655 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -60,7 +60,7 @@ const nfs4_stateid zero_stateid;
60 60
61static LIST_HEAD(nfs4_clientid_list); 61static LIST_HEAD(nfs4_clientid_list);
62 62
63static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred) 63int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
64{ 64{
65 unsigned short port; 65 unsigned short port;
66 int status; 66 int status;
@@ -77,7 +77,7 @@ static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
77 return status; 77 return status;
78} 78}
79 79
80static struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp) 80struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp)
81{ 81{
82 struct rpc_cred *cred = NULL; 82 struct rpc_cred *cred = NULL;
83 83
@@ -114,17 +114,21 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
114 return cred; 114 return cred;
115} 115}
116 116
117static struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp) 117#if defined(CONFIG_NFS_V4_1)
118
119struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp)
118{ 120{
119 struct rpc_cred *cred; 121 struct rpc_cred *cred;
120 122
121 spin_lock(&clp->cl_lock); 123 spin_lock(&clp->cl_lock);
122 cred = nfs4_get_renew_cred_locked(clp); 124 cred = nfs4_get_machine_cred_locked(clp);
123 spin_unlock(&clp->cl_lock); 125 spin_unlock(&clp->cl_lock);
124 return cred; 126 return cred;
125} 127}
126 128
127static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) 129#endif /* CONFIG_NFS_V4_1 */
130
131struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
128{ 132{
129 struct nfs4_state_owner *sp; 133 struct nfs4_state_owner *sp;
130 struct rb_node *pos; 134 struct rb_node *pos;
@@ -738,12 +742,14 @@ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
738 742
739void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid) 743void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
740{ 744{
741 if (status == -NFS4ERR_BAD_SEQID) { 745 struct nfs4_state_owner *sp = container_of(seqid->sequence,
742 struct nfs4_state_owner *sp = container_of(seqid->sequence, 746 struct nfs4_state_owner, so_seqid);
743 struct nfs4_state_owner, so_seqid); 747 struct nfs_server *server = sp->so_server;
748
749 if (status == -NFS4ERR_BAD_SEQID)
744 nfs4_drop_state_owner(sp); 750 nfs4_drop_state_owner(sp);
745 } 751 if (!nfs4_has_session(server->nfs_client))
746 nfs_increment_seqid(status, seqid); 752 nfs_increment_seqid(status, seqid);
747} 753}
748 754
749/* 755/*
@@ -847,32 +853,45 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
847 struct file_lock *fl; 853 struct file_lock *fl;
848 int status = 0; 854 int status = 0;
849 855
856 if (inode->i_flock == NULL)
857 return 0;
858
859 /* Guard against delegation returns and new lock/unlock calls */
850 down_write(&nfsi->rwsem); 860 down_write(&nfsi->rwsem);
861 /* Protect inode->i_flock using the BKL */
862 lock_kernel();
851 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { 863 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
852 if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) 864 if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
853 continue; 865 continue;
854 if (nfs_file_open_context(fl->fl_file)->state != state) 866 if (nfs_file_open_context(fl->fl_file)->state != state)
855 continue; 867 continue;
868 unlock_kernel();
856 status = ops->recover_lock(state, fl); 869 status = ops->recover_lock(state, fl);
857 if (status >= 0)
858 continue;
859 switch (status) { 870 switch (status) {
871 case 0:
872 break;
873 case -ESTALE:
874 case -NFS4ERR_ADMIN_REVOKED:
875 case -NFS4ERR_STALE_STATEID:
876 case -NFS4ERR_BAD_STATEID:
877 case -NFS4ERR_EXPIRED:
878 case -NFS4ERR_NO_GRACE:
879 case -NFS4ERR_STALE_CLIENTID:
880 goto out;
860 default: 881 default:
861 printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", 882 printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n",
862 __func__, status); 883 __func__, status);
863 case -NFS4ERR_EXPIRED: 884 case -ENOMEM:
864 case -NFS4ERR_NO_GRACE: 885 case -NFS4ERR_DENIED:
865 case -NFS4ERR_RECLAIM_BAD: 886 case -NFS4ERR_RECLAIM_BAD:
866 case -NFS4ERR_RECLAIM_CONFLICT: 887 case -NFS4ERR_RECLAIM_CONFLICT:
867 /* kill_proc(fl->fl_pid, SIGLOST, 1); */ 888 /* kill_proc(fl->fl_pid, SIGLOST, 1); */
868 break; 889 status = 0;
869 case -NFS4ERR_STALE_CLIENTID:
870 goto out_err;
871 } 890 }
891 lock_kernel();
872 } 892 }
873 up_write(&nfsi->rwsem); 893 unlock_kernel();
874 return 0; 894out:
875out_err:
876 up_write(&nfsi->rwsem); 895 up_write(&nfsi->rwsem);
877 return status; 896 return status;
878} 897}
@@ -918,6 +937,7 @@ restart:
918 printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", 937 printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n",
919 __func__, status); 938 __func__, status);
920 case -ENOENT: 939 case -ENOENT:
940 case -ENOMEM:
921 case -ESTALE: 941 case -ESTALE:
922 /* 942 /*
923 * Open state on this file cannot be recovered 943 * Open state on this file cannot be recovered
@@ -928,6 +948,9 @@ restart:
928 /* Mark the file as being 'closed' */ 948 /* Mark the file as being 'closed' */
929 state->state = 0; 949 state->state = 0;
930 break; 950 break;
951 case -NFS4ERR_ADMIN_REVOKED:
952 case -NFS4ERR_STALE_STATEID:
953 case -NFS4ERR_BAD_STATEID:
931 case -NFS4ERR_RECLAIM_BAD: 954 case -NFS4ERR_RECLAIM_BAD:
932 case -NFS4ERR_RECLAIM_CONFLICT: 955 case -NFS4ERR_RECLAIM_CONFLICT:
933 nfs4_state_mark_reclaim_nograce(sp->so_client, state); 956 nfs4_state_mark_reclaim_nograce(sp->so_client, state);
@@ -1042,6 +1065,14 @@ static void nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1042 case -NFS4ERR_EXPIRED: 1065 case -NFS4ERR_EXPIRED:
1043 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); 1066 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1044 nfs4_state_start_reclaim_nograce(clp); 1067 nfs4_state_start_reclaim_nograce(clp);
1068 case -NFS4ERR_BADSESSION:
1069 case -NFS4ERR_BADSLOT:
1070 case -NFS4ERR_BAD_HIGH_SLOT:
1071 case -NFS4ERR_DEADSESSION:
1072 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1073 case -NFS4ERR_SEQ_FALSE_RETRY:
1074 case -NFS4ERR_SEQ_MISORDERED:
1075 set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state);
1045 } 1076 }
1046} 1077}
1047 1078
@@ -1075,18 +1106,22 @@ restart:
1075static int nfs4_check_lease(struct nfs_client *clp) 1106static int nfs4_check_lease(struct nfs_client *clp)
1076{ 1107{
1077 struct rpc_cred *cred; 1108 struct rpc_cred *cred;
1109 struct nfs4_state_maintenance_ops *ops =
1110 nfs4_state_renewal_ops[clp->cl_minorversion];
1078 int status = -NFS4ERR_EXPIRED; 1111 int status = -NFS4ERR_EXPIRED;
1079 1112
1080 /* Is the client already known to have an expired lease? */ 1113 /* Is the client already known to have an expired lease? */
1081 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) 1114 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
1082 return 0; 1115 return 0;
1083 cred = nfs4_get_renew_cred(clp); 1116 spin_lock(&clp->cl_lock);
1117 cred = ops->get_state_renewal_cred_locked(clp);
1118 spin_unlock(&clp->cl_lock);
1084 if (cred == NULL) { 1119 if (cred == NULL) {
1085 cred = nfs4_get_setclientid_cred(clp); 1120 cred = nfs4_get_setclientid_cred(clp);
1086 if (cred == NULL) 1121 if (cred == NULL)
1087 goto out; 1122 goto out;
1088 } 1123 }
1089 status = nfs4_proc_renew(clp, cred); 1124 status = ops->renew_lease(clp, cred);
1090 put_rpccred(cred); 1125 put_rpccred(cred);
1091out: 1126out:
1092 nfs4_recovery_handle_error(clp, status); 1127 nfs4_recovery_handle_error(clp, status);
@@ -1096,21 +1131,98 @@ out:
1096static int nfs4_reclaim_lease(struct nfs_client *clp) 1131static int nfs4_reclaim_lease(struct nfs_client *clp)
1097{ 1132{
1098 struct rpc_cred *cred; 1133 struct rpc_cred *cred;
1134 struct nfs4_state_recovery_ops *ops =
1135 nfs4_reboot_recovery_ops[clp->cl_minorversion];
1099 int status = -ENOENT; 1136 int status = -ENOENT;
1100 1137
1101 cred = nfs4_get_setclientid_cred(clp); 1138 cred = ops->get_clid_cred(clp);
1102 if (cred != NULL) { 1139 if (cred != NULL) {
1103 status = nfs4_init_client(clp, cred); 1140 status = ops->establish_clid(clp, cred);
1104 put_rpccred(cred); 1141 put_rpccred(cred);
1105 /* Handle case where the user hasn't set up machine creds */ 1142 /* Handle case where the user hasn't set up machine creds */
1106 if (status == -EACCES && cred == clp->cl_machine_cred) { 1143 if (status == -EACCES && cred == clp->cl_machine_cred) {
1107 nfs4_clear_machine_cred(clp); 1144 nfs4_clear_machine_cred(clp);
1108 status = -EAGAIN; 1145 status = -EAGAIN;
1109 } 1146 }
1147 if (status == -NFS4ERR_MINOR_VERS_MISMATCH)
1148 status = -EPROTONOSUPPORT;
1149 }
1150 return status;
1151}
1152
1153#ifdef CONFIG_NFS_V4_1
1154static void nfs4_session_recovery_handle_error(struct nfs_client *clp, int err)
1155{
1156 switch (err) {
1157 case -NFS4ERR_STALE_CLIENTID:
1158 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1159 set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state);
1160 }
1161}
1162
1163static int nfs4_reset_session(struct nfs_client *clp)
1164{
1165 int status;
1166
1167 status = nfs4_proc_destroy_session(clp->cl_session);
1168 if (status && status != -NFS4ERR_BADSESSION &&
1169 status != -NFS4ERR_DEADSESSION) {
1170 nfs4_session_recovery_handle_error(clp, status);
1171 goto out;
1110 } 1172 }
1173
1174 memset(clp->cl_session->sess_id.data, 0, NFS4_MAX_SESSIONID_LEN);
1175 status = nfs4_proc_create_session(clp, 1);
1176 if (status)
1177 nfs4_session_recovery_handle_error(clp, status);
1178 /* fall through*/
1179out:
1180 /* Wake up the next rpc task even on error */
1181 rpc_wake_up_next(&clp->cl_session->fc_slot_table.slot_tbl_waitq);
1111 return status; 1182 return status;
1112} 1183}
1113 1184
1185static int nfs4_initialize_session(struct nfs_client *clp)
1186{
1187 int status;
1188
1189 status = nfs4_proc_create_session(clp, 0);
1190 if (!status) {
1191 nfs_mark_client_ready(clp, NFS_CS_READY);
1192 } else if (status == -NFS4ERR_STALE_CLIENTID) {
1193 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1194 set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state);
1195 } else {
1196 nfs_mark_client_ready(clp, status);
1197 }
1198 return status;
1199}
1200#else /* CONFIG_NFS_V4_1 */
1201static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
1202static int nfs4_initialize_session(struct nfs_client *clp) { return 0; }
1203#endif /* CONFIG_NFS_V4_1 */
1204
1205/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors
1206 * on EXCHANGE_ID for v4.1
1207 */
1208static void nfs4_set_lease_expired(struct nfs_client *clp, int status)
1209{
1210 if (nfs4_has_session(clp)) {
1211 switch (status) {
1212 case -NFS4ERR_DELAY:
1213 case -NFS4ERR_CLID_INUSE:
1214 case -EAGAIN:
1215 break;
1216
1217 case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
1218 * in nfs4_exchange_id */
1219 default:
1220 return;
1221 }
1222 }
1223 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1224}
1225
1114static void nfs4_state_manager(struct nfs_client *clp) 1226static void nfs4_state_manager(struct nfs_client *clp)
1115{ 1227{
1116 int status = 0; 1228 int status = 0;
@@ -1121,9 +1233,12 @@ static void nfs4_state_manager(struct nfs_client *clp)
1121 /* We're going to have to re-establish a clientid */ 1233 /* We're going to have to re-establish a clientid */
1122 status = nfs4_reclaim_lease(clp); 1234 status = nfs4_reclaim_lease(clp);
1123 if (status) { 1235 if (status) {
1124 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); 1236 nfs4_set_lease_expired(clp, status);
1125 if (status == -EAGAIN) 1237 if (status == -EAGAIN)
1126 continue; 1238 continue;
1239 if (clp->cl_cons_state ==
1240 NFS_CS_SESSION_INITING)
1241 nfs_mark_client_ready(clp, status);
1127 goto out_error; 1242 goto out_error;
1128 } 1243 }
1129 clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); 1244 clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
@@ -1134,25 +1249,44 @@ static void nfs4_state_manager(struct nfs_client *clp)
1134 if (status != 0) 1249 if (status != 0)
1135 continue; 1250 continue;
1136 } 1251 }
1137 1252 /* Initialize or reset the session */
1253 if (nfs4_has_session(clp) &&
1254 test_and_clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) {
1255 if (clp->cl_cons_state == NFS_CS_SESSION_INITING)
1256 status = nfs4_initialize_session(clp);
1257 else
1258 status = nfs4_reset_session(clp);
1259 if (status) {
1260 if (status == -NFS4ERR_STALE_CLIENTID)
1261 continue;
1262 goto out_error;
1263 }
1264 }
1138 /* First recover reboot state... */ 1265 /* First recover reboot state... */
1139 if (test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) { 1266 if (test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {
1140 status = nfs4_do_reclaim(clp, &nfs4_reboot_recovery_ops); 1267 status = nfs4_do_reclaim(clp,
1268 nfs4_reboot_recovery_ops[clp->cl_minorversion]);
1141 if (status == -NFS4ERR_STALE_CLIENTID) 1269 if (status == -NFS4ERR_STALE_CLIENTID)
1142 continue; 1270 continue;
1271 if (test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state))
1272 continue;
1143 nfs4_state_end_reclaim_reboot(clp); 1273 nfs4_state_end_reclaim_reboot(clp);
1144 continue; 1274 continue;
1145 } 1275 }
1146 1276
1147 /* Now recover expired state... */ 1277 /* Now recover expired state... */
1148 if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) { 1278 if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) {
1149 status = nfs4_do_reclaim(clp, &nfs4_nograce_recovery_ops); 1279 status = nfs4_do_reclaim(clp,
1280 nfs4_nograce_recovery_ops[clp->cl_minorversion]);
1150 if (status < 0) { 1281 if (status < 0) {
1151 set_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state); 1282 set_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state);
1152 if (status == -NFS4ERR_STALE_CLIENTID) 1283 if (status == -NFS4ERR_STALE_CLIENTID)
1153 continue; 1284 continue;
1154 if (status == -NFS4ERR_EXPIRED) 1285 if (status == -NFS4ERR_EXPIRED)
1155 continue; 1286 continue;
1287 if (test_bit(NFS4CLNT_SESSION_SETUP,
1288 &clp->cl_state))
1289 continue;
1156 goto out_error; 1290 goto out_error;
1157 } else 1291 } else
1158 nfs4_state_end_reclaim_nograce(clp); 1292 nfs4_state_end_reclaim_nograce(clp);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 1690f0e44b91..617273e7d47f 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -192,12 +192,16 @@ static int nfs4_stat_to_errno(int);
192 decode_verifier_maxsz) 192 decode_verifier_maxsz)
193#define encode_remove_maxsz (op_encode_hdr_maxsz + \ 193#define encode_remove_maxsz (op_encode_hdr_maxsz + \
194 nfs4_name_maxsz) 194 nfs4_name_maxsz)
195#define decode_remove_maxsz (op_decode_hdr_maxsz + \
196 decode_change_info_maxsz)
195#define encode_rename_maxsz (op_encode_hdr_maxsz + \ 197#define encode_rename_maxsz (op_encode_hdr_maxsz + \
196 2 * nfs4_name_maxsz) 198 2 * nfs4_name_maxsz)
197#define decode_rename_maxsz (op_decode_hdr_maxsz + 5 + 5) 199#define decode_rename_maxsz (op_decode_hdr_maxsz + \
200 decode_change_info_maxsz + \
201 decode_change_info_maxsz)
198#define encode_link_maxsz (op_encode_hdr_maxsz + \ 202#define encode_link_maxsz (op_encode_hdr_maxsz + \
199 nfs4_name_maxsz) 203 nfs4_name_maxsz)
200#define decode_link_maxsz (op_decode_hdr_maxsz + 5) 204#define decode_link_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz)
201#define encode_lock_maxsz (op_encode_hdr_maxsz + \ 205#define encode_lock_maxsz (op_encode_hdr_maxsz + \
202 7 + \ 206 7 + \
203 1 + encode_stateid_maxsz + 8) 207 1 + encode_stateid_maxsz + 8)
@@ -240,43 +244,115 @@ static int nfs4_stat_to_errno(int);
240 (encode_getattr_maxsz) 244 (encode_getattr_maxsz)
241#define decode_fs_locations_maxsz \ 245#define decode_fs_locations_maxsz \
242 (0) 246 (0)
247
248#if defined(CONFIG_NFS_V4_1)
249#define NFS4_MAX_MACHINE_NAME_LEN (64)
250
251#define encode_exchange_id_maxsz (op_encode_hdr_maxsz + \
252 encode_verifier_maxsz + \
253 1 /* co_ownerid.len */ + \
254 XDR_QUADLEN(NFS4_EXCHANGE_ID_LEN) + \
255 1 /* flags */ + \
256 1 /* spa_how */ + \
257 0 /* SP4_NONE (for now) */ + \
258 1 /* zero implemetation id array */)
259#define decode_exchange_id_maxsz (op_decode_hdr_maxsz + \
260 2 /* eir_clientid */ + \
261 1 /* eir_sequenceid */ + \
262 1 /* eir_flags */ + \
263 1 /* spr_how */ + \
264 0 /* SP4_NONE (for now) */ + \
265 2 /* eir_server_owner.so_minor_id */ + \
266 /* eir_server_owner.so_major_id<> */ \
267 XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \
268 /* eir_server_scope<> */ \
269 XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \
270 1 /* eir_server_impl_id array length */ + \
271 0 /* ignored eir_server_impl_id contents */)
272#define encode_channel_attrs_maxsz (6 + 1 /* ca_rdma_ird.len (0) */)
273#define decode_channel_attrs_maxsz (6 + \
274 1 /* ca_rdma_ird.len */ + \
275 1 /* ca_rdma_ird */)
276#define encode_create_session_maxsz (op_encode_hdr_maxsz + \
277 2 /* csa_clientid */ + \
278 1 /* csa_sequence */ + \
279 1 /* csa_flags */ + \
280 encode_channel_attrs_maxsz + \
281 encode_channel_attrs_maxsz + \
282 1 /* csa_cb_program */ + \
283 1 /* csa_sec_parms.len (1) */ + \
284 1 /* cb_secflavor (AUTH_SYS) */ + \
285 1 /* stamp */ + \
286 1 /* machinename.len */ + \
287 XDR_QUADLEN(NFS4_MAX_MACHINE_NAME_LEN) + \
288 1 /* uid */ + \
289 1 /* gid */ + \
290 1 /* gids.len (0) */)
291#define decode_create_session_maxsz (op_decode_hdr_maxsz + \
292 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \
293 1 /* csr_sequence */ + \
294 1 /* csr_flags */ + \
295 decode_channel_attrs_maxsz + \
296 decode_channel_attrs_maxsz)
297#define encode_destroy_session_maxsz (op_encode_hdr_maxsz + 4)
298#define decode_destroy_session_maxsz (op_decode_hdr_maxsz)
299#define encode_sequence_maxsz (op_encode_hdr_maxsz + \
300 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 4)
301#define decode_sequence_maxsz (op_decode_hdr_maxsz + \
302 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5)
303#else /* CONFIG_NFS_V4_1 */
304#define encode_sequence_maxsz 0
305#define decode_sequence_maxsz 0
306#endif /* CONFIG_NFS_V4_1 */
307
243#define NFS4_enc_compound_sz (1024) /* XXX: large enough? */ 308#define NFS4_enc_compound_sz (1024) /* XXX: large enough? */
244#define NFS4_dec_compound_sz (1024) /* XXX: large enough? */ 309#define NFS4_dec_compound_sz (1024) /* XXX: large enough? */
245#define NFS4_enc_read_sz (compound_encode_hdr_maxsz + \ 310#define NFS4_enc_read_sz (compound_encode_hdr_maxsz + \
311 encode_sequence_maxsz + \
246 encode_putfh_maxsz + \ 312 encode_putfh_maxsz + \
247 encode_read_maxsz) 313 encode_read_maxsz)
248#define NFS4_dec_read_sz (compound_decode_hdr_maxsz + \ 314#define NFS4_dec_read_sz (compound_decode_hdr_maxsz + \
315 decode_sequence_maxsz + \
249 decode_putfh_maxsz + \ 316 decode_putfh_maxsz + \
250 decode_read_maxsz) 317 decode_read_maxsz)
251#define NFS4_enc_readlink_sz (compound_encode_hdr_maxsz + \ 318#define NFS4_enc_readlink_sz (compound_encode_hdr_maxsz + \
319 encode_sequence_maxsz + \
252 encode_putfh_maxsz + \ 320 encode_putfh_maxsz + \
253 encode_readlink_maxsz) 321 encode_readlink_maxsz)
254#define NFS4_dec_readlink_sz (compound_decode_hdr_maxsz + \ 322#define NFS4_dec_readlink_sz (compound_decode_hdr_maxsz + \
323 decode_sequence_maxsz + \
255 decode_putfh_maxsz + \ 324 decode_putfh_maxsz + \
256 decode_readlink_maxsz) 325 decode_readlink_maxsz)
257#define NFS4_enc_readdir_sz (compound_encode_hdr_maxsz + \ 326#define NFS4_enc_readdir_sz (compound_encode_hdr_maxsz + \
327 encode_sequence_maxsz + \
258 encode_putfh_maxsz + \ 328 encode_putfh_maxsz + \
259 encode_readdir_maxsz) 329 encode_readdir_maxsz)
260#define NFS4_dec_readdir_sz (compound_decode_hdr_maxsz + \ 330#define NFS4_dec_readdir_sz (compound_decode_hdr_maxsz + \
331 decode_sequence_maxsz + \
261 decode_putfh_maxsz + \ 332 decode_putfh_maxsz + \
262 decode_readdir_maxsz) 333 decode_readdir_maxsz)
263#define NFS4_enc_write_sz (compound_encode_hdr_maxsz + \ 334#define NFS4_enc_write_sz (compound_encode_hdr_maxsz + \
335 encode_sequence_maxsz + \
264 encode_putfh_maxsz + \ 336 encode_putfh_maxsz + \
265 encode_write_maxsz + \ 337 encode_write_maxsz + \
266 encode_getattr_maxsz) 338 encode_getattr_maxsz)
267#define NFS4_dec_write_sz (compound_decode_hdr_maxsz + \ 339#define NFS4_dec_write_sz (compound_decode_hdr_maxsz + \
340 decode_sequence_maxsz + \
268 decode_putfh_maxsz + \ 341 decode_putfh_maxsz + \
269 decode_write_maxsz + \ 342 decode_write_maxsz + \
270 decode_getattr_maxsz) 343 decode_getattr_maxsz)
271#define NFS4_enc_commit_sz (compound_encode_hdr_maxsz + \ 344#define NFS4_enc_commit_sz (compound_encode_hdr_maxsz + \
345 encode_sequence_maxsz + \
272 encode_putfh_maxsz + \ 346 encode_putfh_maxsz + \
273 encode_commit_maxsz + \ 347 encode_commit_maxsz + \
274 encode_getattr_maxsz) 348 encode_getattr_maxsz)
275#define NFS4_dec_commit_sz (compound_decode_hdr_maxsz + \ 349#define NFS4_dec_commit_sz (compound_decode_hdr_maxsz + \
350 decode_sequence_maxsz + \
276 decode_putfh_maxsz + \ 351 decode_putfh_maxsz + \
277 decode_commit_maxsz + \ 352 decode_commit_maxsz + \
278 decode_getattr_maxsz) 353 decode_getattr_maxsz)
279#define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \ 354#define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \
355 encode_sequence_maxsz + \
280 encode_putfh_maxsz + \ 356 encode_putfh_maxsz + \
281 encode_savefh_maxsz + \ 357 encode_savefh_maxsz + \
282 encode_open_maxsz + \ 358 encode_open_maxsz + \
@@ -285,6 +361,7 @@ static int nfs4_stat_to_errno(int);
285 encode_restorefh_maxsz + \ 361 encode_restorefh_maxsz + \
286 encode_getattr_maxsz) 362 encode_getattr_maxsz)
287#define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \ 363#define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \
364 decode_sequence_maxsz + \
288 decode_putfh_maxsz + \ 365 decode_putfh_maxsz + \
289 decode_savefh_maxsz + \ 366 decode_savefh_maxsz + \
290 decode_open_maxsz + \ 367 decode_open_maxsz + \
@@ -301,43 +378,53 @@ static int nfs4_stat_to_errno(int);
301 decode_putfh_maxsz + \ 378 decode_putfh_maxsz + \
302 decode_open_confirm_maxsz) 379 decode_open_confirm_maxsz)
303#define NFS4_enc_open_noattr_sz (compound_encode_hdr_maxsz + \ 380#define NFS4_enc_open_noattr_sz (compound_encode_hdr_maxsz + \
381 encode_sequence_maxsz + \
304 encode_putfh_maxsz + \ 382 encode_putfh_maxsz + \
305 encode_open_maxsz + \ 383 encode_open_maxsz + \
306 encode_getattr_maxsz) 384 encode_getattr_maxsz)
307#define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \ 385#define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \
386 decode_sequence_maxsz + \
308 decode_putfh_maxsz + \ 387 decode_putfh_maxsz + \
309 decode_open_maxsz + \ 388 decode_open_maxsz + \
310 decode_getattr_maxsz) 389 decode_getattr_maxsz)
311#define NFS4_enc_open_downgrade_sz \ 390#define NFS4_enc_open_downgrade_sz \
312 (compound_encode_hdr_maxsz + \ 391 (compound_encode_hdr_maxsz + \
392 encode_sequence_maxsz + \
313 encode_putfh_maxsz + \ 393 encode_putfh_maxsz + \
314 encode_open_downgrade_maxsz + \ 394 encode_open_downgrade_maxsz + \
315 encode_getattr_maxsz) 395 encode_getattr_maxsz)
316#define NFS4_dec_open_downgrade_sz \ 396#define NFS4_dec_open_downgrade_sz \
317 (compound_decode_hdr_maxsz + \ 397 (compound_decode_hdr_maxsz + \
398 decode_sequence_maxsz + \
318 decode_putfh_maxsz + \ 399 decode_putfh_maxsz + \
319 decode_open_downgrade_maxsz + \ 400 decode_open_downgrade_maxsz + \
320 decode_getattr_maxsz) 401 decode_getattr_maxsz)
321#define NFS4_enc_close_sz (compound_encode_hdr_maxsz + \ 402#define NFS4_enc_close_sz (compound_encode_hdr_maxsz + \
403 encode_sequence_maxsz + \
322 encode_putfh_maxsz + \ 404 encode_putfh_maxsz + \
323 encode_close_maxsz + \ 405 encode_close_maxsz + \
324 encode_getattr_maxsz) 406 encode_getattr_maxsz)
325#define NFS4_dec_close_sz (compound_decode_hdr_maxsz + \ 407#define NFS4_dec_close_sz (compound_decode_hdr_maxsz + \
408 decode_sequence_maxsz + \
326 decode_putfh_maxsz + \ 409 decode_putfh_maxsz + \
327 decode_close_maxsz + \ 410 decode_close_maxsz + \
328 decode_getattr_maxsz) 411 decode_getattr_maxsz)
329#define NFS4_enc_setattr_sz (compound_encode_hdr_maxsz + \ 412#define NFS4_enc_setattr_sz (compound_encode_hdr_maxsz + \
413 encode_sequence_maxsz + \
330 encode_putfh_maxsz + \ 414 encode_putfh_maxsz + \
331 encode_setattr_maxsz + \ 415 encode_setattr_maxsz + \
332 encode_getattr_maxsz) 416 encode_getattr_maxsz)
333#define NFS4_dec_setattr_sz (compound_decode_hdr_maxsz + \ 417#define NFS4_dec_setattr_sz (compound_decode_hdr_maxsz + \
418 decode_sequence_maxsz + \
334 decode_putfh_maxsz + \ 419 decode_putfh_maxsz + \
335 decode_setattr_maxsz + \ 420 decode_setattr_maxsz + \
336 decode_getattr_maxsz) 421 decode_getattr_maxsz)
337#define NFS4_enc_fsinfo_sz (compound_encode_hdr_maxsz + \ 422#define NFS4_enc_fsinfo_sz (compound_encode_hdr_maxsz + \
423 encode_sequence_maxsz + \
338 encode_putfh_maxsz + \ 424 encode_putfh_maxsz + \
339 encode_fsinfo_maxsz) 425 encode_fsinfo_maxsz)
340#define NFS4_dec_fsinfo_sz (compound_decode_hdr_maxsz + \ 426#define NFS4_dec_fsinfo_sz (compound_decode_hdr_maxsz + \
427 decode_sequence_maxsz + \
341 decode_putfh_maxsz + \ 428 decode_putfh_maxsz + \
342 decode_fsinfo_maxsz) 429 decode_fsinfo_maxsz)
343#define NFS4_enc_renew_sz (compound_encode_hdr_maxsz + \ 430#define NFS4_enc_renew_sz (compound_encode_hdr_maxsz + \
@@ -359,64 +446,81 @@ static int nfs4_stat_to_errno(int);
359 decode_putrootfh_maxsz + \ 446 decode_putrootfh_maxsz + \
360 decode_fsinfo_maxsz) 447 decode_fsinfo_maxsz)
361#define NFS4_enc_lock_sz (compound_encode_hdr_maxsz + \ 448#define NFS4_enc_lock_sz (compound_encode_hdr_maxsz + \
449 encode_sequence_maxsz + \
362 encode_putfh_maxsz + \ 450 encode_putfh_maxsz + \
363 encode_lock_maxsz) 451 encode_lock_maxsz)
364#define NFS4_dec_lock_sz (compound_decode_hdr_maxsz + \ 452#define NFS4_dec_lock_sz (compound_decode_hdr_maxsz + \
453 decode_sequence_maxsz + \
365 decode_putfh_maxsz + \ 454 decode_putfh_maxsz + \
366 decode_lock_maxsz) 455 decode_lock_maxsz)
367#define NFS4_enc_lockt_sz (compound_encode_hdr_maxsz + \ 456#define NFS4_enc_lockt_sz (compound_encode_hdr_maxsz + \
457 encode_sequence_maxsz + \
368 encode_putfh_maxsz + \ 458 encode_putfh_maxsz + \
369 encode_lockt_maxsz) 459 encode_lockt_maxsz)
370#define NFS4_dec_lockt_sz (compound_decode_hdr_maxsz + \ 460#define NFS4_dec_lockt_sz (compound_decode_hdr_maxsz + \
461 decode_sequence_maxsz + \
371 decode_putfh_maxsz + \ 462 decode_putfh_maxsz + \
372 decode_lockt_maxsz) 463 decode_lockt_maxsz)
373#define NFS4_enc_locku_sz (compound_encode_hdr_maxsz + \ 464#define NFS4_enc_locku_sz (compound_encode_hdr_maxsz + \
465 encode_sequence_maxsz + \
374 encode_putfh_maxsz + \ 466 encode_putfh_maxsz + \
375 encode_locku_maxsz) 467 encode_locku_maxsz)
376#define NFS4_dec_locku_sz (compound_decode_hdr_maxsz + \ 468#define NFS4_dec_locku_sz (compound_decode_hdr_maxsz + \
469 decode_sequence_maxsz + \
377 decode_putfh_maxsz + \ 470 decode_putfh_maxsz + \
378 decode_locku_maxsz) 471 decode_locku_maxsz)
379#define NFS4_enc_access_sz (compound_encode_hdr_maxsz + \ 472#define NFS4_enc_access_sz (compound_encode_hdr_maxsz + \
473 encode_sequence_maxsz + \
380 encode_putfh_maxsz + \ 474 encode_putfh_maxsz + \
381 encode_access_maxsz + \ 475 encode_access_maxsz + \
382 encode_getattr_maxsz) 476 encode_getattr_maxsz)
383#define NFS4_dec_access_sz (compound_decode_hdr_maxsz + \ 477#define NFS4_dec_access_sz (compound_decode_hdr_maxsz + \
478 decode_sequence_maxsz + \
384 decode_putfh_maxsz + \ 479 decode_putfh_maxsz + \
385 decode_access_maxsz + \ 480 decode_access_maxsz + \
386 decode_getattr_maxsz) 481 decode_getattr_maxsz)
387#define NFS4_enc_getattr_sz (compound_encode_hdr_maxsz + \ 482#define NFS4_enc_getattr_sz (compound_encode_hdr_maxsz + \
483 encode_sequence_maxsz + \
388 encode_putfh_maxsz + \ 484 encode_putfh_maxsz + \
389 encode_getattr_maxsz) 485 encode_getattr_maxsz)
390#define NFS4_dec_getattr_sz (compound_decode_hdr_maxsz + \ 486#define NFS4_dec_getattr_sz (compound_decode_hdr_maxsz + \
487 decode_sequence_maxsz + \
391 decode_putfh_maxsz + \ 488 decode_putfh_maxsz + \
392 decode_getattr_maxsz) 489 decode_getattr_maxsz)
393#define NFS4_enc_lookup_sz (compound_encode_hdr_maxsz + \ 490#define NFS4_enc_lookup_sz (compound_encode_hdr_maxsz + \
491 encode_sequence_maxsz + \
394 encode_putfh_maxsz + \ 492 encode_putfh_maxsz + \
395 encode_lookup_maxsz + \ 493 encode_lookup_maxsz + \
396 encode_getattr_maxsz + \ 494 encode_getattr_maxsz + \
397 encode_getfh_maxsz) 495 encode_getfh_maxsz)
398#define NFS4_dec_lookup_sz (compound_decode_hdr_maxsz + \ 496#define NFS4_dec_lookup_sz (compound_decode_hdr_maxsz + \
497 decode_sequence_maxsz + \
399 decode_putfh_maxsz + \ 498 decode_putfh_maxsz + \
400 decode_lookup_maxsz + \ 499 decode_lookup_maxsz + \
401 decode_getattr_maxsz + \ 500 decode_getattr_maxsz + \
402 decode_getfh_maxsz) 501 decode_getfh_maxsz)
403#define NFS4_enc_lookup_root_sz (compound_encode_hdr_maxsz + \ 502#define NFS4_enc_lookup_root_sz (compound_encode_hdr_maxsz + \
503 encode_sequence_maxsz + \
404 encode_putrootfh_maxsz + \ 504 encode_putrootfh_maxsz + \
405 encode_getattr_maxsz + \ 505 encode_getattr_maxsz + \
406 encode_getfh_maxsz) 506 encode_getfh_maxsz)
407#define NFS4_dec_lookup_root_sz (compound_decode_hdr_maxsz + \ 507#define NFS4_dec_lookup_root_sz (compound_decode_hdr_maxsz + \
508 decode_sequence_maxsz + \
408 decode_putrootfh_maxsz + \ 509 decode_putrootfh_maxsz + \
409 decode_getattr_maxsz + \ 510 decode_getattr_maxsz + \
410 decode_getfh_maxsz) 511 decode_getfh_maxsz)
411#define NFS4_enc_remove_sz (compound_encode_hdr_maxsz + \ 512#define NFS4_enc_remove_sz (compound_encode_hdr_maxsz + \
513 encode_sequence_maxsz + \
412 encode_putfh_maxsz + \ 514 encode_putfh_maxsz + \
413 encode_remove_maxsz + \ 515 encode_remove_maxsz + \
414 encode_getattr_maxsz) 516 encode_getattr_maxsz)
415#define NFS4_dec_remove_sz (compound_decode_hdr_maxsz + \ 517#define NFS4_dec_remove_sz (compound_decode_hdr_maxsz + \
518 decode_sequence_maxsz + \
416 decode_putfh_maxsz + \ 519 decode_putfh_maxsz + \
417 op_decode_hdr_maxsz + 5 + \ 520 decode_remove_maxsz + \
418 decode_getattr_maxsz) 521 decode_getattr_maxsz)
419#define NFS4_enc_rename_sz (compound_encode_hdr_maxsz + \ 522#define NFS4_enc_rename_sz (compound_encode_hdr_maxsz + \
523 encode_sequence_maxsz + \
420 encode_putfh_maxsz + \ 524 encode_putfh_maxsz + \
421 encode_savefh_maxsz + \ 525 encode_savefh_maxsz + \
422 encode_putfh_maxsz + \ 526 encode_putfh_maxsz + \
@@ -425,6 +529,7 @@ static int nfs4_stat_to_errno(int);
425 encode_restorefh_maxsz + \ 529 encode_restorefh_maxsz + \
426 encode_getattr_maxsz) 530 encode_getattr_maxsz)
427#define NFS4_dec_rename_sz (compound_decode_hdr_maxsz + \ 531#define NFS4_dec_rename_sz (compound_decode_hdr_maxsz + \
532 decode_sequence_maxsz + \
428 decode_putfh_maxsz + \ 533 decode_putfh_maxsz + \
429 decode_savefh_maxsz + \ 534 decode_savefh_maxsz + \
430 decode_putfh_maxsz + \ 535 decode_putfh_maxsz + \
@@ -433,6 +538,7 @@ static int nfs4_stat_to_errno(int);
433 decode_restorefh_maxsz + \ 538 decode_restorefh_maxsz + \
434 decode_getattr_maxsz) 539 decode_getattr_maxsz)
435#define NFS4_enc_link_sz (compound_encode_hdr_maxsz + \ 540#define NFS4_enc_link_sz (compound_encode_hdr_maxsz + \
541 encode_sequence_maxsz + \
436 encode_putfh_maxsz + \ 542 encode_putfh_maxsz + \
437 encode_savefh_maxsz + \ 543 encode_savefh_maxsz + \
438 encode_putfh_maxsz + \ 544 encode_putfh_maxsz + \
@@ -441,6 +547,7 @@ static int nfs4_stat_to_errno(int);
441 encode_restorefh_maxsz + \ 547 encode_restorefh_maxsz + \
442 decode_getattr_maxsz) 548 decode_getattr_maxsz)
443#define NFS4_dec_link_sz (compound_decode_hdr_maxsz + \ 549#define NFS4_dec_link_sz (compound_decode_hdr_maxsz + \
550 decode_sequence_maxsz + \
444 decode_putfh_maxsz + \ 551 decode_putfh_maxsz + \
445 decode_savefh_maxsz + \ 552 decode_savefh_maxsz + \
446 decode_putfh_maxsz + \ 553 decode_putfh_maxsz + \
@@ -449,16 +556,19 @@ static int nfs4_stat_to_errno(int);
449 decode_restorefh_maxsz + \ 556 decode_restorefh_maxsz + \
450 decode_getattr_maxsz) 557 decode_getattr_maxsz)
451#define NFS4_enc_symlink_sz (compound_encode_hdr_maxsz + \ 558#define NFS4_enc_symlink_sz (compound_encode_hdr_maxsz + \
559 encode_sequence_maxsz + \
452 encode_putfh_maxsz + \ 560 encode_putfh_maxsz + \
453 encode_symlink_maxsz + \ 561 encode_symlink_maxsz + \
454 encode_getattr_maxsz + \ 562 encode_getattr_maxsz + \
455 encode_getfh_maxsz) 563 encode_getfh_maxsz)
456#define NFS4_dec_symlink_sz (compound_decode_hdr_maxsz + \ 564#define NFS4_dec_symlink_sz (compound_decode_hdr_maxsz + \
565 decode_sequence_maxsz + \
457 decode_putfh_maxsz + \ 566 decode_putfh_maxsz + \
458 decode_symlink_maxsz + \ 567 decode_symlink_maxsz + \
459 decode_getattr_maxsz + \ 568 decode_getattr_maxsz + \
460 decode_getfh_maxsz) 569 decode_getfh_maxsz)
461#define NFS4_enc_create_sz (compound_encode_hdr_maxsz + \ 570#define NFS4_enc_create_sz (compound_encode_hdr_maxsz + \
571 encode_sequence_maxsz + \
462 encode_putfh_maxsz + \ 572 encode_putfh_maxsz + \
463 encode_savefh_maxsz + \ 573 encode_savefh_maxsz + \
464 encode_create_maxsz + \ 574 encode_create_maxsz + \
@@ -467,6 +577,7 @@ static int nfs4_stat_to_errno(int);
467 encode_restorefh_maxsz + \ 577 encode_restorefh_maxsz + \
468 encode_getattr_maxsz) 578 encode_getattr_maxsz)
469#define NFS4_dec_create_sz (compound_decode_hdr_maxsz + \ 579#define NFS4_dec_create_sz (compound_decode_hdr_maxsz + \
580 decode_sequence_maxsz + \
470 decode_putfh_maxsz + \ 581 decode_putfh_maxsz + \
471 decode_savefh_maxsz + \ 582 decode_savefh_maxsz + \
472 decode_create_maxsz + \ 583 decode_create_maxsz + \
@@ -475,52 +586,98 @@ static int nfs4_stat_to_errno(int);
475 decode_restorefh_maxsz + \ 586 decode_restorefh_maxsz + \
476 decode_getattr_maxsz) 587 decode_getattr_maxsz)
477#define NFS4_enc_pathconf_sz (compound_encode_hdr_maxsz + \ 588#define NFS4_enc_pathconf_sz (compound_encode_hdr_maxsz + \
589 encode_sequence_maxsz + \
478 encode_putfh_maxsz + \ 590 encode_putfh_maxsz + \
479 encode_getattr_maxsz) 591 encode_getattr_maxsz)
480#define NFS4_dec_pathconf_sz (compound_decode_hdr_maxsz + \ 592#define NFS4_dec_pathconf_sz (compound_decode_hdr_maxsz + \
593 decode_sequence_maxsz + \
481 decode_putfh_maxsz + \ 594 decode_putfh_maxsz + \
482 decode_getattr_maxsz) 595 decode_getattr_maxsz)
483#define NFS4_enc_statfs_sz (compound_encode_hdr_maxsz + \ 596#define NFS4_enc_statfs_sz (compound_encode_hdr_maxsz + \
597 encode_sequence_maxsz + \
484 encode_putfh_maxsz + \ 598 encode_putfh_maxsz + \
485 encode_statfs_maxsz) 599 encode_statfs_maxsz)
486#define NFS4_dec_statfs_sz (compound_decode_hdr_maxsz + \ 600#define NFS4_dec_statfs_sz (compound_decode_hdr_maxsz + \
601 decode_sequence_maxsz + \
487 decode_putfh_maxsz + \ 602 decode_putfh_maxsz + \
488 decode_statfs_maxsz) 603 decode_statfs_maxsz)
489#define NFS4_enc_server_caps_sz (compound_encode_hdr_maxsz + \ 604#define NFS4_enc_server_caps_sz (compound_encode_hdr_maxsz + \
605 encode_sequence_maxsz + \
490 encode_putfh_maxsz + \ 606 encode_putfh_maxsz + \
491 encode_getattr_maxsz) 607 encode_getattr_maxsz)
492#define NFS4_dec_server_caps_sz (compound_decode_hdr_maxsz + \ 608#define NFS4_dec_server_caps_sz (compound_decode_hdr_maxsz + \
609 decode_sequence_maxsz + \
493 decode_putfh_maxsz + \ 610 decode_putfh_maxsz + \
494 decode_getattr_maxsz) 611 decode_getattr_maxsz)
495#define NFS4_enc_delegreturn_sz (compound_encode_hdr_maxsz + \ 612#define NFS4_enc_delegreturn_sz (compound_encode_hdr_maxsz + \
613 encode_sequence_maxsz + \
496 encode_putfh_maxsz + \ 614 encode_putfh_maxsz + \
497 encode_delegreturn_maxsz + \ 615 encode_delegreturn_maxsz + \
498 encode_getattr_maxsz) 616 encode_getattr_maxsz)
499#define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \ 617#define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \
618 decode_sequence_maxsz + \
500 decode_delegreturn_maxsz + \ 619 decode_delegreturn_maxsz + \
501 decode_getattr_maxsz) 620 decode_getattr_maxsz)
502#define NFS4_enc_getacl_sz (compound_encode_hdr_maxsz + \ 621#define NFS4_enc_getacl_sz (compound_encode_hdr_maxsz + \
622 encode_sequence_maxsz + \
503 encode_putfh_maxsz + \ 623 encode_putfh_maxsz + \
504 encode_getacl_maxsz) 624 encode_getacl_maxsz)
505#define NFS4_dec_getacl_sz (compound_decode_hdr_maxsz + \ 625#define NFS4_dec_getacl_sz (compound_decode_hdr_maxsz + \
626 decode_sequence_maxsz + \
506 decode_putfh_maxsz + \ 627 decode_putfh_maxsz + \
507 decode_getacl_maxsz) 628 decode_getacl_maxsz)
508#define NFS4_enc_setacl_sz (compound_encode_hdr_maxsz + \ 629#define NFS4_enc_setacl_sz (compound_encode_hdr_maxsz + \
630 encode_sequence_maxsz + \
509 encode_putfh_maxsz + \ 631 encode_putfh_maxsz + \
510 encode_setacl_maxsz) 632 encode_setacl_maxsz)
511#define NFS4_dec_setacl_sz (compound_decode_hdr_maxsz + \ 633#define NFS4_dec_setacl_sz (compound_decode_hdr_maxsz + \
634 decode_sequence_maxsz + \
512 decode_putfh_maxsz + \ 635 decode_putfh_maxsz + \
513 decode_setacl_maxsz) 636 decode_setacl_maxsz)
514#define NFS4_enc_fs_locations_sz \ 637#define NFS4_enc_fs_locations_sz \
515 (compound_encode_hdr_maxsz + \ 638 (compound_encode_hdr_maxsz + \
639 encode_sequence_maxsz + \
516 encode_putfh_maxsz + \ 640 encode_putfh_maxsz + \
517 encode_lookup_maxsz + \ 641 encode_lookup_maxsz + \
518 encode_fs_locations_maxsz) 642 encode_fs_locations_maxsz)
519#define NFS4_dec_fs_locations_sz \ 643#define NFS4_dec_fs_locations_sz \
520 (compound_decode_hdr_maxsz + \ 644 (compound_decode_hdr_maxsz + \
645 decode_sequence_maxsz + \
521 decode_putfh_maxsz + \ 646 decode_putfh_maxsz + \
522 decode_lookup_maxsz + \ 647 decode_lookup_maxsz + \
523 decode_fs_locations_maxsz) 648 decode_fs_locations_maxsz)
649#if defined(CONFIG_NFS_V4_1)
650#define NFS4_enc_exchange_id_sz \
651 (compound_encode_hdr_maxsz + \
652 encode_exchange_id_maxsz)
653#define NFS4_dec_exchange_id_sz \
654 (compound_decode_hdr_maxsz + \
655 decode_exchange_id_maxsz)
656#define NFS4_enc_create_session_sz \
657 (compound_encode_hdr_maxsz + \
658 encode_create_session_maxsz)
659#define NFS4_dec_create_session_sz \
660 (compound_decode_hdr_maxsz + \
661 decode_create_session_maxsz)
662#define NFS4_enc_destroy_session_sz (compound_encode_hdr_maxsz + \
663 encode_destroy_session_maxsz)
664#define NFS4_dec_destroy_session_sz (compound_decode_hdr_maxsz + \
665 decode_destroy_session_maxsz)
666#define NFS4_enc_sequence_sz \
667 (compound_decode_hdr_maxsz + \
668 encode_sequence_maxsz)
669#define NFS4_dec_sequence_sz \
670 (compound_decode_hdr_maxsz + \
671 decode_sequence_maxsz)
672#define NFS4_enc_get_lease_time_sz (compound_encode_hdr_maxsz + \
673 encode_sequence_maxsz + \
674 encode_putrootfh_maxsz + \
675 encode_fsinfo_maxsz)
676#define NFS4_dec_get_lease_time_sz (compound_decode_hdr_maxsz + \
677 decode_sequence_maxsz + \
678 decode_putrootfh_maxsz + \
679 decode_fsinfo_maxsz)
680#endif /* CONFIG_NFS_V4_1 */
524 681
525static const umode_t nfs_type2fmt[] = { 682static const umode_t nfs_type2fmt[] = {
526 [NF4BAD] = 0, 683 [NF4BAD] = 0,
@@ -541,6 +698,8 @@ struct compound_hdr {
541 __be32 * nops_p; 698 __be32 * nops_p;
542 uint32_t taglen; 699 uint32_t taglen;
543 char * tag; 700 char * tag;
701 uint32_t replen; /* expected reply words */
702 u32 minorversion;
544}; 703};
545 704
546/* 705/*
@@ -576,22 +735,31 @@ static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *
576 xdr_encode_opaque(p, str, len); 735 xdr_encode_opaque(p, str, len);
577} 736}
578 737
579static void encode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr) 738static void encode_compound_hdr(struct xdr_stream *xdr,
739 struct rpc_rqst *req,
740 struct compound_hdr *hdr)
580{ 741{
581 __be32 *p; 742 __be32 *p;
743 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
744
745 /* initialize running count of expected bytes in reply.
746 * NOTE: the replied tag SHOULD be the same is the one sent,
747 * but this is not required as a MUST for the server to do so. */
748 hdr->replen = RPC_REPHDRSIZE + auth->au_rslack + 3 + hdr->taglen;
582 749
583 dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag); 750 dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag);
584 BUG_ON(hdr->taglen > NFS4_MAXTAGLEN); 751 BUG_ON(hdr->taglen > NFS4_MAXTAGLEN);
585 RESERVE_SPACE(12+(XDR_QUADLEN(hdr->taglen)<<2)); 752 RESERVE_SPACE(12+(XDR_QUADLEN(hdr->taglen)<<2));
586 WRITE32(hdr->taglen); 753 WRITE32(hdr->taglen);
587 WRITEMEM(hdr->tag, hdr->taglen); 754 WRITEMEM(hdr->tag, hdr->taglen);
588 WRITE32(NFS4_MINOR_VERSION); 755 WRITE32(hdr->minorversion);
589 hdr->nops_p = p; 756 hdr->nops_p = p;
590 WRITE32(hdr->nops); 757 WRITE32(hdr->nops);
591} 758}
592 759
593static void encode_nops(struct compound_hdr *hdr) 760static void encode_nops(struct compound_hdr *hdr)
594{ 761{
762 BUG_ON(hdr->nops > NFS4_MAX_OPS);
595 *hdr->nops_p = htonl(hdr->nops); 763 *hdr->nops_p = htonl(hdr->nops);
596} 764}
597 765
@@ -736,6 +904,7 @@ static void encode_access(struct xdr_stream *xdr, u32 access, struct compound_hd
736 WRITE32(OP_ACCESS); 904 WRITE32(OP_ACCESS);
737 WRITE32(access); 905 WRITE32(access);
738 hdr->nops++; 906 hdr->nops++;
907 hdr->replen += decode_access_maxsz;
739} 908}
740 909
741static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) 910static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr)
@@ -747,6 +916,7 @@ static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg
747 WRITE32(arg->seqid->sequence->counter); 916 WRITE32(arg->seqid->sequence->counter);
748 WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); 917 WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE);
749 hdr->nops++; 918 hdr->nops++;
919 hdr->replen += decode_close_maxsz;
750} 920}
751 921
752static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) 922static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr)
@@ -758,6 +928,7 @@ static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *ar
758 WRITE64(args->offset); 928 WRITE64(args->offset);
759 WRITE32(args->count); 929 WRITE32(args->count);
760 hdr->nops++; 930 hdr->nops++;
931 hdr->replen += decode_commit_maxsz;
761} 932}
762 933
763static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create, struct compound_hdr *hdr) 934static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create, struct compound_hdr *hdr)
@@ -789,6 +960,7 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *
789 WRITE32(create->name->len); 960 WRITE32(create->name->len);
790 WRITEMEM(create->name->name, create->name->len); 961 WRITEMEM(create->name->name, create->name->len);
791 hdr->nops++; 962 hdr->nops++;
963 hdr->replen += decode_create_maxsz;
792 964
793 encode_attrs(xdr, create->attrs, create->server); 965 encode_attrs(xdr, create->attrs, create->server);
794} 966}
@@ -802,6 +974,7 @@ static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct c
802 WRITE32(1); 974 WRITE32(1);
803 WRITE32(bitmap); 975 WRITE32(bitmap);
804 hdr->nops++; 976 hdr->nops++;
977 hdr->replen += decode_getattr_maxsz;
805} 978}
806 979
807static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1, struct compound_hdr *hdr) 980static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1, struct compound_hdr *hdr)
@@ -814,6 +987,7 @@ static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm
814 WRITE32(bm0); 987 WRITE32(bm0);
815 WRITE32(bm1); 988 WRITE32(bm1);
816 hdr->nops++; 989 hdr->nops++;
990 hdr->replen += decode_getattr_maxsz;
817} 991}
818 992
819static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) 993static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
@@ -841,6 +1015,7 @@ static void encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
841 RESERVE_SPACE(4); 1015 RESERVE_SPACE(4);
842 WRITE32(OP_GETFH); 1016 WRITE32(OP_GETFH);
843 hdr->nops++; 1017 hdr->nops++;
1018 hdr->replen += decode_getfh_maxsz;
844} 1019}
845 1020
846static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) 1021static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
@@ -852,6 +1027,7 @@ static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct
852 WRITE32(name->len); 1027 WRITE32(name->len);
853 WRITEMEM(name->name, name->len); 1028 WRITEMEM(name->name, name->len);
854 hdr->nops++; 1029 hdr->nops++;
1030 hdr->replen += decode_link_maxsz;
855} 1031}
856 1032
857static inline int nfs4_lock_type(struct file_lock *fl, int block) 1033static inline int nfs4_lock_type(struct file_lock *fl, int block)
@@ -899,6 +1075,7 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args
899 WRITE32(args->lock_seqid->sequence->counter); 1075 WRITE32(args->lock_seqid->sequence->counter);
900 } 1076 }
901 hdr->nops++; 1077 hdr->nops++;
1078 hdr->replen += decode_lock_maxsz;
902} 1079}
903 1080
904static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *args, struct compound_hdr *hdr) 1081static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *args, struct compound_hdr *hdr)
@@ -915,6 +1092,7 @@ static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *ar
915 WRITEMEM("lock id:", 8); 1092 WRITEMEM("lock id:", 8);
916 WRITE64(args->lock_owner.id); 1093 WRITE64(args->lock_owner.id);
917 hdr->nops++; 1094 hdr->nops++;
1095 hdr->replen += decode_lockt_maxsz;
918} 1096}
919 1097
920static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *args, struct compound_hdr *hdr) 1098static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *args, struct compound_hdr *hdr)
@@ -929,6 +1107,7 @@ static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *ar
929 WRITE64(args->fl->fl_start); 1107 WRITE64(args->fl->fl_start);
930 WRITE64(nfs4_lock_length(args->fl)); 1108 WRITE64(nfs4_lock_length(args->fl));
931 hdr->nops++; 1109 hdr->nops++;
1110 hdr->replen += decode_locku_maxsz;
932} 1111}
933 1112
934static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) 1113static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
@@ -941,6 +1120,7 @@ static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struc
941 WRITE32(len); 1120 WRITE32(len);
942 WRITEMEM(name->name, len); 1121 WRITEMEM(name->name, len);
943 hdr->nops++; 1122 hdr->nops++;
1123 hdr->replen += decode_lookup_maxsz;
944} 1124}
945 1125
946static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode) 1126static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode)
@@ -1080,6 +1260,7 @@ static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg,
1080 BUG(); 1260 BUG();
1081 } 1261 }
1082 hdr->nops++; 1262 hdr->nops++;
1263 hdr->replen += decode_open_maxsz;
1083} 1264}
1084 1265
1085static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg, struct compound_hdr *hdr) 1266static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg, struct compound_hdr *hdr)
@@ -1091,6 +1272,7 @@ static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_co
1091 WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); 1272 WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE);
1092 WRITE32(arg->seqid->sequence->counter); 1273 WRITE32(arg->seqid->sequence->counter);
1093 hdr->nops++; 1274 hdr->nops++;
1275 hdr->replen += decode_open_confirm_maxsz;
1094} 1276}
1095 1277
1096static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) 1278static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr)
@@ -1103,6 +1285,7 @@ static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_close
1103 WRITE32(arg->seqid->sequence->counter); 1285 WRITE32(arg->seqid->sequence->counter);
1104 encode_share_access(xdr, arg->fmode); 1286 encode_share_access(xdr, arg->fmode);
1105 hdr->nops++; 1287 hdr->nops++;
1288 hdr->replen += decode_open_downgrade_maxsz;
1106} 1289}
1107 1290
1108static void 1291static void
@@ -1116,6 +1299,7 @@ encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh, struct compound_hd
1116 WRITE32(len); 1299 WRITE32(len);
1117 WRITEMEM(fh->data, len); 1300 WRITEMEM(fh->data, len);
1118 hdr->nops++; 1301 hdr->nops++;
1302 hdr->replen += decode_putfh_maxsz;
1119} 1303}
1120 1304
1121static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr) 1305static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
@@ -1125,6 +1309,7 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1125 RESERVE_SPACE(4); 1309 RESERVE_SPACE(4);
1126 WRITE32(OP_PUTROOTFH); 1310 WRITE32(OP_PUTROOTFH);
1127 hdr->nops++; 1311 hdr->nops++;
1312 hdr->replen += decode_putrootfh_maxsz;
1128} 1313}
1129 1314
1130static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx) 1315static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx)
@@ -1153,6 +1338,7 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args,
1153 WRITE64(args->offset); 1338 WRITE64(args->offset);
1154 WRITE32(args->count); 1339 WRITE32(args->count);
1155 hdr->nops++; 1340 hdr->nops++;
1341 hdr->replen += decode_read_maxsz;
1156} 1342}
1157 1343
1158static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) 1344static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr)
@@ -1178,6 +1364,7 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
1178 WRITE32(attrs[0] & readdir->bitmask[0]); 1364 WRITE32(attrs[0] & readdir->bitmask[0]);
1179 WRITE32(attrs[1] & readdir->bitmask[1]); 1365 WRITE32(attrs[1] & readdir->bitmask[1]);
1180 hdr->nops++; 1366 hdr->nops++;
1367 hdr->replen += decode_readdir_maxsz;
1181 dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n", 1368 dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n",
1182 __func__, 1369 __func__,
1183 (unsigned long long)readdir->cookie, 1370 (unsigned long long)readdir->cookie,
@@ -1194,6 +1381,7 @@ static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *
1194 RESERVE_SPACE(4); 1381 RESERVE_SPACE(4);
1195 WRITE32(OP_READLINK); 1382 WRITE32(OP_READLINK);
1196 hdr->nops++; 1383 hdr->nops++;
1384 hdr->replen += decode_readlink_maxsz;
1197} 1385}
1198 1386
1199static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) 1387static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
@@ -1205,6 +1393,7 @@ static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struc
1205 WRITE32(name->len); 1393 WRITE32(name->len);
1206 WRITEMEM(name->name, name->len); 1394 WRITEMEM(name->name, name->len);
1207 hdr->nops++; 1395 hdr->nops++;
1396 hdr->replen += decode_remove_maxsz;
1208} 1397}
1209 1398
1210static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, const struct qstr *newname, struct compound_hdr *hdr) 1399static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, const struct qstr *newname, struct compound_hdr *hdr)
@@ -1220,6 +1409,7 @@ static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, co
1220 WRITE32(newname->len); 1409 WRITE32(newname->len);
1221 WRITEMEM(newname->name, newname->len); 1410 WRITEMEM(newname->name, newname->len);
1222 hdr->nops++; 1411 hdr->nops++;
1412 hdr->replen += decode_rename_maxsz;
1223} 1413}
1224 1414
1225static void encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid, struct compound_hdr *hdr) 1415static void encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid, struct compound_hdr *hdr)
@@ -1230,6 +1420,7 @@ static void encode_renew(struct xdr_stream *xdr, const struct nfs_client *client
1230 WRITE32(OP_RENEW); 1420 WRITE32(OP_RENEW);
1231 WRITE64(client_stateid->cl_clientid); 1421 WRITE64(client_stateid->cl_clientid);
1232 hdr->nops++; 1422 hdr->nops++;
1423 hdr->replen += decode_renew_maxsz;
1233} 1424}
1234 1425
1235static void 1426static void
@@ -1240,6 +1431,7 @@ encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1240 RESERVE_SPACE(4); 1431 RESERVE_SPACE(4);
1241 WRITE32(OP_RESTOREFH); 1432 WRITE32(OP_RESTOREFH);
1242 hdr->nops++; 1433 hdr->nops++;
1434 hdr->replen += decode_restorefh_maxsz;
1243} 1435}
1244 1436
1245static int 1437static int
@@ -1259,6 +1451,7 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun
1259 WRITE32(arg->acl_len); 1451 WRITE32(arg->acl_len);
1260 xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); 1452 xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len);
1261 hdr->nops++; 1453 hdr->nops++;
1454 hdr->replen += decode_setacl_maxsz;
1262 return 0; 1455 return 0;
1263} 1456}
1264 1457
@@ -1270,6 +1463,7 @@ encode_savefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1270 RESERVE_SPACE(4); 1463 RESERVE_SPACE(4);
1271 WRITE32(OP_SAVEFH); 1464 WRITE32(OP_SAVEFH);
1272 hdr->nops++; 1465 hdr->nops++;
1466 hdr->replen += decode_savefh_maxsz;
1273} 1467}
1274 1468
1275static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *arg, const struct nfs_server *server, struct compound_hdr *hdr) 1469static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *arg, const struct nfs_server *server, struct compound_hdr *hdr)
@@ -1280,6 +1474,7 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs
1280 WRITE32(OP_SETATTR); 1474 WRITE32(OP_SETATTR);
1281 WRITEMEM(arg->stateid.data, NFS4_STATEID_SIZE); 1475 WRITEMEM(arg->stateid.data, NFS4_STATEID_SIZE);
1282 hdr->nops++; 1476 hdr->nops++;
1477 hdr->replen += decode_setattr_maxsz;
1283 encode_attrs(xdr, arg->iap, server); 1478 encode_attrs(xdr, arg->iap, server);
1284} 1479}
1285 1480
@@ -1299,6 +1494,7 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie
1299 RESERVE_SPACE(4); 1494 RESERVE_SPACE(4);
1300 WRITE32(setclientid->sc_cb_ident); 1495 WRITE32(setclientid->sc_cb_ident);
1301 hdr->nops++; 1496 hdr->nops++;
1497 hdr->replen += decode_setclientid_maxsz;
1302} 1498}
1303 1499
1304static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state, struct compound_hdr *hdr) 1500static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state, struct compound_hdr *hdr)
@@ -1310,6 +1506,7 @@ static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_
1310 WRITE64(client_state->cl_clientid); 1506 WRITE64(client_state->cl_clientid);
1311 WRITEMEM(client_state->cl_confirm.data, NFS4_VERIFIER_SIZE); 1507 WRITEMEM(client_state->cl_confirm.data, NFS4_VERIFIER_SIZE);
1312 hdr->nops++; 1508 hdr->nops++;
1509 hdr->replen += decode_setclientid_confirm_maxsz;
1313} 1510}
1314 1511
1315static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) 1512static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr)
@@ -1328,6 +1525,7 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg
1328 1525
1329 xdr_write_pages(xdr, args->pages, args->pgbase, args->count); 1526 xdr_write_pages(xdr, args->pages, args->pgbase, args->count);
1330 hdr->nops++; 1527 hdr->nops++;
1528 hdr->replen += decode_write_maxsz;
1331} 1529}
1332 1530
1333static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid, struct compound_hdr *hdr) 1531static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid, struct compound_hdr *hdr)
@@ -1339,11 +1537,163 @@ static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *state
1339 WRITE32(OP_DELEGRETURN); 1537 WRITE32(OP_DELEGRETURN);
1340 WRITEMEM(stateid->data, NFS4_STATEID_SIZE); 1538 WRITEMEM(stateid->data, NFS4_STATEID_SIZE);
1341 hdr->nops++; 1539 hdr->nops++;
1540 hdr->replen += decode_delegreturn_maxsz;
1541}
1542
1543#if defined(CONFIG_NFS_V4_1)
1544/* NFSv4.1 operations */
1545static void encode_exchange_id(struct xdr_stream *xdr,
1546 struct nfs41_exchange_id_args *args,
1547 struct compound_hdr *hdr)
1548{
1549 __be32 *p;
1550
1551 RESERVE_SPACE(4 + sizeof(args->verifier->data));
1552 WRITE32(OP_EXCHANGE_ID);
1553 WRITEMEM(args->verifier->data, sizeof(args->verifier->data));
1554
1555 encode_string(xdr, args->id_len, args->id);
1556
1557 RESERVE_SPACE(12);
1558 WRITE32(args->flags);
1559 WRITE32(0); /* zero length state_protect4_a */
1560 WRITE32(0); /* zero length implementation id array */
1561 hdr->nops++;
1562 hdr->replen += decode_exchange_id_maxsz;
1563}
1564
1565static void encode_create_session(struct xdr_stream *xdr,
1566 struct nfs41_create_session_args *args,
1567 struct compound_hdr *hdr)
1568{
1569 __be32 *p;
1570 char machine_name[NFS4_MAX_MACHINE_NAME_LEN];
1571 uint32_t len;
1572 struct nfs_client *clp = args->client;
1573
1574 RESERVE_SPACE(4);
1575 WRITE32(OP_CREATE_SESSION);
1576
1577 RESERVE_SPACE(8);
1578 WRITE64(clp->cl_ex_clid);
1579
1580 RESERVE_SPACE(8);
1581 WRITE32(clp->cl_seqid); /*Sequence id */
1582 WRITE32(args->flags); /*flags */
1583
1584 RESERVE_SPACE(2*28); /* 2 channel_attrs */
1585 /* Fore Channel */
1586 WRITE32(args->fc_attrs.headerpadsz); /* header padding size */
1587 WRITE32(args->fc_attrs.max_rqst_sz); /* max req size */
1588 WRITE32(args->fc_attrs.max_resp_sz); /* max resp size */
1589 WRITE32(args->fc_attrs.max_resp_sz_cached); /* Max resp sz cached */
1590 WRITE32(args->fc_attrs.max_ops); /* max operations */
1591 WRITE32(args->fc_attrs.max_reqs); /* max requests */
1592 WRITE32(0); /* rdmachannel_attrs */
1593
1594 /* Back Channel */
1595 WRITE32(args->fc_attrs.headerpadsz); /* header padding size */
1596 WRITE32(args->bc_attrs.max_rqst_sz); /* max req size */
1597 WRITE32(args->bc_attrs.max_resp_sz); /* max resp size */
1598 WRITE32(args->bc_attrs.max_resp_sz_cached); /* Max resp sz cached */
1599 WRITE32(args->bc_attrs.max_ops); /* max operations */
1600 WRITE32(args->bc_attrs.max_reqs); /* max requests */
1601 WRITE32(0); /* rdmachannel_attrs */
1602
1603 RESERVE_SPACE(4);
1604 WRITE32(args->cb_program); /* cb_program */
1605
1606 RESERVE_SPACE(4); /* # of security flavors */
1607 WRITE32(1);
1608
1609 RESERVE_SPACE(4);
1610 WRITE32(RPC_AUTH_UNIX); /* auth_sys */
1611
1612 /* authsys_parms rfc1831 */
1613 RESERVE_SPACE(4);
1614 WRITE32((u32)clp->cl_boot_time.tv_nsec); /* stamp */
1615 len = scnprintf(machine_name, sizeof(machine_name), "%s",
1616 clp->cl_ipaddr);
1617 RESERVE_SPACE(16 + len);
1618 WRITE32(len);
1619 WRITEMEM(machine_name, len);
1620 WRITE32(0); /* UID */
1621 WRITE32(0); /* GID */
1622 WRITE32(0); /* No more gids */
1623 hdr->nops++;
1624 hdr->replen += decode_create_session_maxsz;
1625}
1626
1627static void encode_destroy_session(struct xdr_stream *xdr,
1628 struct nfs4_session *session,
1629 struct compound_hdr *hdr)
1630{
1631 __be32 *p;
1632 RESERVE_SPACE(4 + NFS4_MAX_SESSIONID_LEN);
1633 WRITE32(OP_DESTROY_SESSION);
1634 WRITEMEM(session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
1635 hdr->nops++;
1636 hdr->replen += decode_destroy_session_maxsz;
1342} 1637}
1638#endif /* CONFIG_NFS_V4_1 */
1639
1640static void encode_sequence(struct xdr_stream *xdr,
1641 const struct nfs4_sequence_args *args,
1642 struct compound_hdr *hdr)
1643{
1644#if defined(CONFIG_NFS_V4_1)
1645 struct nfs4_session *session = args->sa_session;
1646 struct nfs4_slot_table *tp;
1647 struct nfs4_slot *slot;
1648 __be32 *p;
1649
1650 if (!session)
1651 return;
1652
1653 tp = &session->fc_slot_table;
1654
1655 WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE);
1656 slot = tp->slots + args->sa_slotid;
1657
1658 RESERVE_SPACE(4);
1659 WRITE32(OP_SEQUENCE);
1660
1661 /*
1662 * Sessionid + seqid + slotid + max slotid + cache_this
1663 */
1664 dprintk("%s: sessionid=%u:%u:%u:%u seqid=%d slotid=%d "
1665 "max_slotid=%d cache_this=%d\n",
1666 __func__,
1667 ((u32 *)session->sess_id.data)[0],
1668 ((u32 *)session->sess_id.data)[1],
1669 ((u32 *)session->sess_id.data)[2],
1670 ((u32 *)session->sess_id.data)[3],
1671 slot->seq_nr, args->sa_slotid,
1672 tp->highest_used_slotid, args->sa_cache_this);
1673 RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 16);
1674 WRITEMEM(session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
1675 WRITE32(slot->seq_nr);
1676 WRITE32(args->sa_slotid);
1677 WRITE32(tp->highest_used_slotid);
1678 WRITE32(args->sa_cache_this);
1679 hdr->nops++;
1680 hdr->replen += decode_sequence_maxsz;
1681#endif /* CONFIG_NFS_V4_1 */
1682}
1683
1343/* 1684/*
1344 * END OF "GENERIC" ENCODE ROUTINES. 1685 * END OF "GENERIC" ENCODE ROUTINES.
1345 */ 1686 */
1346 1687
1688static u32 nfs4_xdr_minorversion(const struct nfs4_sequence_args *args)
1689{
1690#if defined(CONFIG_NFS_V4_1)
1691 if (args->sa_session)
1692 return args->sa_session->clp->cl_minorversion;
1693#endif /* CONFIG_NFS_V4_1 */
1694 return 0;
1695}
1696
1347/* 1697/*
1348 * Encode an ACCESS request 1698 * Encode an ACCESS request
1349 */ 1699 */
@@ -1351,11 +1701,12 @@ static int nfs4_xdr_enc_access(struct rpc_rqst *req, __be32 *p, const struct nfs
1351{ 1701{
1352 struct xdr_stream xdr; 1702 struct xdr_stream xdr;
1353 struct compound_hdr hdr = { 1703 struct compound_hdr hdr = {
1354 .nops = 0, 1704 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1355 }; 1705 };
1356 1706
1357 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1707 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1358 encode_compound_hdr(&xdr, &hdr); 1708 encode_compound_hdr(&xdr, req, &hdr);
1709 encode_sequence(&xdr, &args->seq_args, &hdr);
1359 encode_putfh(&xdr, args->fh, &hdr); 1710 encode_putfh(&xdr, args->fh, &hdr);
1360 encode_access(&xdr, args->access, &hdr); 1711 encode_access(&xdr, args->access, &hdr);
1361 encode_getfattr(&xdr, args->bitmask, &hdr); 1712 encode_getfattr(&xdr, args->bitmask, &hdr);
@@ -1370,11 +1721,12 @@ static int nfs4_xdr_enc_lookup(struct rpc_rqst *req, __be32 *p, const struct nfs
1370{ 1721{
1371 struct xdr_stream xdr; 1722 struct xdr_stream xdr;
1372 struct compound_hdr hdr = { 1723 struct compound_hdr hdr = {
1373 .nops = 0, 1724 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1374 }; 1725 };
1375 1726
1376 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1727 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1377 encode_compound_hdr(&xdr, &hdr); 1728 encode_compound_hdr(&xdr, req, &hdr);
1729 encode_sequence(&xdr, &args->seq_args, &hdr);
1378 encode_putfh(&xdr, args->dir_fh, &hdr); 1730 encode_putfh(&xdr, args->dir_fh, &hdr);
1379 encode_lookup(&xdr, args->name, &hdr); 1731 encode_lookup(&xdr, args->name, &hdr);
1380 encode_getfh(&xdr, &hdr); 1732 encode_getfh(&xdr, &hdr);
@@ -1390,11 +1742,12 @@ static int nfs4_xdr_enc_lookup_root(struct rpc_rqst *req, __be32 *p, const struc
1390{ 1742{
1391 struct xdr_stream xdr; 1743 struct xdr_stream xdr;
1392 struct compound_hdr hdr = { 1744 struct compound_hdr hdr = {
1393 .nops = 0, 1745 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1394 }; 1746 };
1395 1747
1396 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1748 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1397 encode_compound_hdr(&xdr, &hdr); 1749 encode_compound_hdr(&xdr, req, &hdr);
1750 encode_sequence(&xdr, &args->seq_args, &hdr);
1398 encode_putrootfh(&xdr, &hdr); 1751 encode_putrootfh(&xdr, &hdr);
1399 encode_getfh(&xdr, &hdr); 1752 encode_getfh(&xdr, &hdr);
1400 encode_getfattr(&xdr, args->bitmask, &hdr); 1753 encode_getfattr(&xdr, args->bitmask, &hdr);
@@ -1409,11 +1762,12 @@ static int nfs4_xdr_enc_remove(struct rpc_rqst *req, __be32 *p, const struct nfs
1409{ 1762{
1410 struct xdr_stream xdr; 1763 struct xdr_stream xdr;
1411 struct compound_hdr hdr = { 1764 struct compound_hdr hdr = {
1412 .nops = 0, 1765 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1413 }; 1766 };
1414 1767
1415 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1768 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1416 encode_compound_hdr(&xdr, &hdr); 1769 encode_compound_hdr(&xdr, req, &hdr);
1770 encode_sequence(&xdr, &args->seq_args, &hdr);
1417 encode_putfh(&xdr, args->fh, &hdr); 1771 encode_putfh(&xdr, args->fh, &hdr);
1418 encode_remove(&xdr, &args->name, &hdr); 1772 encode_remove(&xdr, &args->name, &hdr);
1419 encode_getfattr(&xdr, args->bitmask, &hdr); 1773 encode_getfattr(&xdr, args->bitmask, &hdr);
@@ -1428,11 +1782,12 @@ static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs
1428{ 1782{
1429 struct xdr_stream xdr; 1783 struct xdr_stream xdr;
1430 struct compound_hdr hdr = { 1784 struct compound_hdr hdr = {
1431 .nops = 0, 1785 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1432 }; 1786 };
1433 1787
1434 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1788 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1435 encode_compound_hdr(&xdr, &hdr); 1789 encode_compound_hdr(&xdr, req, &hdr);
1790 encode_sequence(&xdr, &args->seq_args, &hdr);
1436 encode_putfh(&xdr, args->old_dir, &hdr); 1791 encode_putfh(&xdr, args->old_dir, &hdr);
1437 encode_savefh(&xdr, &hdr); 1792 encode_savefh(&xdr, &hdr);
1438 encode_putfh(&xdr, args->new_dir, &hdr); 1793 encode_putfh(&xdr, args->new_dir, &hdr);
@@ -1451,11 +1806,12 @@ static int nfs4_xdr_enc_link(struct rpc_rqst *req, __be32 *p, const struct nfs4_
1451{ 1806{
1452 struct xdr_stream xdr; 1807 struct xdr_stream xdr;
1453 struct compound_hdr hdr = { 1808 struct compound_hdr hdr = {
1454 .nops = 0, 1809 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1455 }; 1810 };
1456 1811
1457 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1812 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1458 encode_compound_hdr(&xdr, &hdr); 1813 encode_compound_hdr(&xdr, req, &hdr);
1814 encode_sequence(&xdr, &args->seq_args, &hdr);
1459 encode_putfh(&xdr, args->fh, &hdr); 1815 encode_putfh(&xdr, args->fh, &hdr);
1460 encode_savefh(&xdr, &hdr); 1816 encode_savefh(&xdr, &hdr);
1461 encode_putfh(&xdr, args->dir_fh, &hdr); 1817 encode_putfh(&xdr, args->dir_fh, &hdr);
@@ -1474,11 +1830,12 @@ static int nfs4_xdr_enc_create(struct rpc_rqst *req, __be32 *p, const struct nfs
1474{ 1830{
1475 struct xdr_stream xdr; 1831 struct xdr_stream xdr;
1476 struct compound_hdr hdr = { 1832 struct compound_hdr hdr = {
1477 .nops = 0, 1833 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1478 }; 1834 };
1479 1835
1480 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1836 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1481 encode_compound_hdr(&xdr, &hdr); 1837 encode_compound_hdr(&xdr, req, &hdr);
1838 encode_sequence(&xdr, &args->seq_args, &hdr);
1482 encode_putfh(&xdr, args->dir_fh, &hdr); 1839 encode_putfh(&xdr, args->dir_fh, &hdr);
1483 encode_savefh(&xdr, &hdr); 1840 encode_savefh(&xdr, &hdr);
1484 encode_create(&xdr, args, &hdr); 1841 encode_create(&xdr, args, &hdr);
@@ -1505,11 +1862,12 @@ static int nfs4_xdr_enc_getattr(struct rpc_rqst *req, __be32 *p, const struct nf
1505{ 1862{
1506 struct xdr_stream xdr; 1863 struct xdr_stream xdr;
1507 struct compound_hdr hdr = { 1864 struct compound_hdr hdr = {
1508 .nops = 0, 1865 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1509 }; 1866 };
1510 1867
1511 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1868 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1512 encode_compound_hdr(&xdr, &hdr); 1869 encode_compound_hdr(&xdr, req, &hdr);
1870 encode_sequence(&xdr, &args->seq_args, &hdr);
1513 encode_putfh(&xdr, args->fh, &hdr); 1871 encode_putfh(&xdr, args->fh, &hdr);
1514 encode_getfattr(&xdr, args->bitmask, &hdr); 1872 encode_getfattr(&xdr, args->bitmask, &hdr);
1515 encode_nops(&hdr); 1873 encode_nops(&hdr);
@@ -1523,11 +1881,12 @@ static int nfs4_xdr_enc_close(struct rpc_rqst *req, __be32 *p, struct nfs_closea
1523{ 1881{
1524 struct xdr_stream xdr; 1882 struct xdr_stream xdr;
1525 struct compound_hdr hdr = { 1883 struct compound_hdr hdr = {
1526 .nops = 0, 1884 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1527 }; 1885 };
1528 1886
1529 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1887 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1530 encode_compound_hdr(&xdr, &hdr); 1888 encode_compound_hdr(&xdr, req, &hdr);
1889 encode_sequence(&xdr, &args->seq_args, &hdr);
1531 encode_putfh(&xdr, args->fh, &hdr); 1890 encode_putfh(&xdr, args->fh, &hdr);
1532 encode_close(&xdr, args, &hdr); 1891 encode_close(&xdr, args, &hdr);
1533 encode_getfattr(&xdr, args->bitmask, &hdr); 1892 encode_getfattr(&xdr, args->bitmask, &hdr);
@@ -1542,11 +1901,12 @@ static int nfs4_xdr_enc_open(struct rpc_rqst *req, __be32 *p, struct nfs_openarg
1542{ 1901{
1543 struct xdr_stream xdr; 1902 struct xdr_stream xdr;
1544 struct compound_hdr hdr = { 1903 struct compound_hdr hdr = {
1545 .nops = 0, 1904 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1546 }; 1905 };
1547 1906
1548 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1907 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1549 encode_compound_hdr(&xdr, &hdr); 1908 encode_compound_hdr(&xdr, req, &hdr);
1909 encode_sequence(&xdr, &args->seq_args, &hdr);
1550 encode_putfh(&xdr, args->fh, &hdr); 1910 encode_putfh(&xdr, args->fh, &hdr);
1551 encode_savefh(&xdr, &hdr); 1911 encode_savefh(&xdr, &hdr);
1552 encode_open(&xdr, args, &hdr); 1912 encode_open(&xdr, args, &hdr);
@@ -1569,7 +1929,7 @@ static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, __be32 *p, struct nfs
1569 }; 1929 };
1570 1930
1571 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1931 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1572 encode_compound_hdr(&xdr, &hdr); 1932 encode_compound_hdr(&xdr, req, &hdr);
1573 encode_putfh(&xdr, args->fh, &hdr); 1933 encode_putfh(&xdr, args->fh, &hdr);
1574 encode_open_confirm(&xdr, args, &hdr); 1934 encode_open_confirm(&xdr, args, &hdr);
1575 encode_nops(&hdr); 1935 encode_nops(&hdr);
@@ -1583,11 +1943,12 @@ static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, __be32 *p, struct nfs_
1583{ 1943{
1584 struct xdr_stream xdr; 1944 struct xdr_stream xdr;
1585 struct compound_hdr hdr = { 1945 struct compound_hdr hdr = {
1586 .nops = 0, 1946 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1587 }; 1947 };
1588 1948
1589 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1949 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1590 encode_compound_hdr(&xdr, &hdr); 1950 encode_compound_hdr(&xdr, req, &hdr);
1951 encode_sequence(&xdr, &args->seq_args, &hdr);
1591 encode_putfh(&xdr, args->fh, &hdr); 1952 encode_putfh(&xdr, args->fh, &hdr);
1592 encode_open(&xdr, args, &hdr); 1953 encode_open(&xdr, args, &hdr);
1593 encode_getfattr(&xdr, args->bitmask, &hdr); 1954 encode_getfattr(&xdr, args->bitmask, &hdr);
@@ -1602,11 +1963,12 @@ static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, __be32 *p, struct n
1602{ 1963{
1603 struct xdr_stream xdr; 1964 struct xdr_stream xdr;
1604 struct compound_hdr hdr = { 1965 struct compound_hdr hdr = {
1605 .nops = 0, 1966 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1606 }; 1967 };
1607 1968
1608 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1969 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1609 encode_compound_hdr(&xdr, &hdr); 1970 encode_compound_hdr(&xdr, req, &hdr);
1971 encode_sequence(&xdr, &args->seq_args, &hdr);
1610 encode_putfh(&xdr, args->fh, &hdr); 1972 encode_putfh(&xdr, args->fh, &hdr);
1611 encode_open_downgrade(&xdr, args, &hdr); 1973 encode_open_downgrade(&xdr, args, &hdr);
1612 encode_getfattr(&xdr, args->bitmask, &hdr); 1974 encode_getfattr(&xdr, args->bitmask, &hdr);
@@ -1621,11 +1983,12 @@ static int nfs4_xdr_enc_lock(struct rpc_rqst *req, __be32 *p, struct nfs_lock_ar
1621{ 1983{
1622 struct xdr_stream xdr; 1984 struct xdr_stream xdr;
1623 struct compound_hdr hdr = { 1985 struct compound_hdr hdr = {
1624 .nops = 0, 1986 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1625 }; 1987 };
1626 1988
1627 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1989 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1628 encode_compound_hdr(&xdr, &hdr); 1990 encode_compound_hdr(&xdr, req, &hdr);
1991 encode_sequence(&xdr, &args->seq_args, &hdr);
1629 encode_putfh(&xdr, args->fh, &hdr); 1992 encode_putfh(&xdr, args->fh, &hdr);
1630 encode_lock(&xdr, args, &hdr); 1993 encode_lock(&xdr, args, &hdr);
1631 encode_nops(&hdr); 1994 encode_nops(&hdr);
@@ -1639,11 +2002,12 @@ static int nfs4_xdr_enc_lockt(struct rpc_rqst *req, __be32 *p, struct nfs_lockt_
1639{ 2002{
1640 struct xdr_stream xdr; 2003 struct xdr_stream xdr;
1641 struct compound_hdr hdr = { 2004 struct compound_hdr hdr = {
1642 .nops = 0, 2005 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1643 }; 2006 };
1644 2007
1645 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2008 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1646 encode_compound_hdr(&xdr, &hdr); 2009 encode_compound_hdr(&xdr, req, &hdr);
2010 encode_sequence(&xdr, &args->seq_args, &hdr);
1647 encode_putfh(&xdr, args->fh, &hdr); 2011 encode_putfh(&xdr, args->fh, &hdr);
1648 encode_lockt(&xdr, args, &hdr); 2012 encode_lockt(&xdr, args, &hdr);
1649 encode_nops(&hdr); 2013 encode_nops(&hdr);
@@ -1657,11 +2021,12 @@ static int nfs4_xdr_enc_locku(struct rpc_rqst *req, __be32 *p, struct nfs_locku_
1657{ 2021{
1658 struct xdr_stream xdr; 2022 struct xdr_stream xdr;
1659 struct compound_hdr hdr = { 2023 struct compound_hdr hdr = {
1660 .nops = 0, 2024 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1661 }; 2025 };
1662 2026
1663 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2027 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1664 encode_compound_hdr(&xdr, &hdr); 2028 encode_compound_hdr(&xdr, req, &hdr);
2029 encode_sequence(&xdr, &args->seq_args, &hdr);
1665 encode_putfh(&xdr, args->fh, &hdr); 2030 encode_putfh(&xdr, args->fh, &hdr);
1666 encode_locku(&xdr, args, &hdr); 2031 encode_locku(&xdr, args, &hdr);
1667 encode_nops(&hdr); 2032 encode_nops(&hdr);
@@ -1675,22 +2040,16 @@ static int nfs4_xdr_enc_readlink(struct rpc_rqst *req, __be32 *p, const struct n
1675{ 2040{
1676 struct xdr_stream xdr; 2041 struct xdr_stream xdr;
1677 struct compound_hdr hdr = { 2042 struct compound_hdr hdr = {
1678 .nops = 0, 2043 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1679 }; 2044 };
1680 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
1681 unsigned int replen;
1682 2045
1683 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2046 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1684 encode_compound_hdr(&xdr, &hdr); 2047 encode_compound_hdr(&xdr, req, &hdr);
2048 encode_sequence(&xdr, &args->seq_args, &hdr);
1685 encode_putfh(&xdr, args->fh, &hdr); 2049 encode_putfh(&xdr, args->fh, &hdr);
1686 encode_readlink(&xdr, args, req, &hdr); 2050 encode_readlink(&xdr, args, req, &hdr);
1687 2051
1688 /* set up reply kvec 2052 xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages,
1689 * toplevel_status + taglen + rescount + OP_PUTFH + status
1690 * + OP_READLINK + status + string length = 8
1691 */
1692 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_readlink_sz) << 2;
1693 xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages,
1694 args->pgbase, args->pglen); 2053 args->pgbase, args->pglen);
1695 encode_nops(&hdr); 2054 encode_nops(&hdr);
1696 return 0; 2055 return 0;
@@ -1703,25 +2062,19 @@ static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nf
1703{ 2062{
1704 struct xdr_stream xdr; 2063 struct xdr_stream xdr;
1705 struct compound_hdr hdr = { 2064 struct compound_hdr hdr = {
1706 .nops = 0, 2065 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1707 }; 2066 };
1708 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
1709 int replen;
1710 2067
1711 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2068 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1712 encode_compound_hdr(&xdr, &hdr); 2069 encode_compound_hdr(&xdr, req, &hdr);
2070 encode_sequence(&xdr, &args->seq_args, &hdr);
1713 encode_putfh(&xdr, args->fh, &hdr); 2071 encode_putfh(&xdr, args->fh, &hdr);
1714 encode_readdir(&xdr, args, req, &hdr); 2072 encode_readdir(&xdr, args, req, &hdr);
1715 2073
1716 /* set up reply kvec 2074 xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages,
1717 * toplevel_status + taglen + rescount + OP_PUTFH + status
1718 * + OP_READDIR + status + verifer(2) = 9
1719 */
1720 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_readdir_sz) << 2;
1721 xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages,
1722 args->pgbase, args->count); 2075 args->pgbase, args->count);
1723 dprintk("%s: inlined page args = (%u, %p, %u, %u)\n", 2076 dprintk("%s: inlined page args = (%u, %p, %u, %u)\n",
1724 __func__, replen, args->pages, 2077 __func__, hdr.replen << 2, args->pages,
1725 args->pgbase, args->count); 2078 args->pgbase, args->count);
1726 encode_nops(&hdr); 2079 encode_nops(&hdr);
1727 return 0; 2080 return 0;
@@ -1732,24 +2085,18 @@ static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nf
1732 */ 2085 */
1733static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) 2086static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
1734{ 2087{
1735 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
1736 struct xdr_stream xdr; 2088 struct xdr_stream xdr;
1737 struct compound_hdr hdr = { 2089 struct compound_hdr hdr = {
1738 .nops = 0, 2090 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1739 }; 2091 };
1740 int replen;
1741 2092
1742 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2093 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1743 encode_compound_hdr(&xdr, &hdr); 2094 encode_compound_hdr(&xdr, req, &hdr);
2095 encode_sequence(&xdr, &args->seq_args, &hdr);
1744 encode_putfh(&xdr, args->fh, &hdr); 2096 encode_putfh(&xdr, args->fh, &hdr);
1745 encode_read(&xdr, args, &hdr); 2097 encode_read(&xdr, args, &hdr);
1746 2098
1747 /* set up reply kvec 2099 xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2,
1748 * toplevel status + taglen=0 + rescount + OP_PUTFH + status
1749 * + OP_READ + status + eof + datalen = 9
1750 */
1751 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_read_sz) << 2;
1752 xdr_inline_pages(&req->rq_rcv_buf, replen,
1753 args->pages, args->pgbase, args->count); 2100 args->pages, args->pgbase, args->count);
1754 req->rq_rcv_buf.flags |= XDRBUF_READ; 2101 req->rq_rcv_buf.flags |= XDRBUF_READ;
1755 encode_nops(&hdr); 2102 encode_nops(&hdr);
@@ -1763,11 +2110,12 @@ static int nfs4_xdr_enc_setattr(struct rpc_rqst *req, __be32 *p, struct nfs_seta
1763{ 2110{
1764 struct xdr_stream xdr; 2111 struct xdr_stream xdr;
1765 struct compound_hdr hdr = { 2112 struct compound_hdr hdr = {
1766 .nops = 0, 2113 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1767 }; 2114 };
1768 2115
1769 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2116 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1770 encode_compound_hdr(&xdr, &hdr); 2117 encode_compound_hdr(&xdr, req, &hdr);
2118 encode_sequence(&xdr, &args->seq_args, &hdr);
1771 encode_putfh(&xdr, args->fh, &hdr); 2119 encode_putfh(&xdr, args->fh, &hdr);
1772 encode_setattr(&xdr, args, args->server, &hdr); 2120 encode_setattr(&xdr, args, args->server, &hdr);
1773 encode_getfattr(&xdr, args->bitmask, &hdr); 2121 encode_getfattr(&xdr, args->bitmask, &hdr);
@@ -1783,20 +2131,19 @@ nfs4_xdr_enc_getacl(struct rpc_rqst *req, __be32 *p,
1783 struct nfs_getaclargs *args) 2131 struct nfs_getaclargs *args)
1784{ 2132{
1785 struct xdr_stream xdr; 2133 struct xdr_stream xdr;
1786 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
1787 struct compound_hdr hdr = { 2134 struct compound_hdr hdr = {
1788 .nops = 0, 2135 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1789 }; 2136 };
1790 int replen; 2137 uint32_t replen;
1791 2138
1792 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2139 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1793 encode_compound_hdr(&xdr, &hdr); 2140 encode_compound_hdr(&xdr, req, &hdr);
2141 encode_sequence(&xdr, &args->seq_args, &hdr);
1794 encode_putfh(&xdr, args->fh, &hdr); 2142 encode_putfh(&xdr, args->fh, &hdr);
2143 replen = hdr.replen + nfs4_fattr_bitmap_maxsz + 1;
1795 encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0, &hdr); 2144 encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0, &hdr);
1796 2145
1797 /* set up reply buffer: */ 2146 xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
1798 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_getacl_sz) << 2;
1799 xdr_inline_pages(&req->rq_rcv_buf, replen,
1800 args->acl_pages, args->acl_pgbase, args->acl_len); 2147 args->acl_pages, args->acl_pgbase, args->acl_len);
1801 encode_nops(&hdr); 2148 encode_nops(&hdr);
1802 return 0; 2149 return 0;
@@ -1809,11 +2156,12 @@ static int nfs4_xdr_enc_write(struct rpc_rqst *req, __be32 *p, struct nfs_writea
1809{ 2156{
1810 struct xdr_stream xdr; 2157 struct xdr_stream xdr;
1811 struct compound_hdr hdr = { 2158 struct compound_hdr hdr = {
1812 .nops = 0, 2159 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1813 }; 2160 };
1814 2161
1815 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2162 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1816 encode_compound_hdr(&xdr, &hdr); 2163 encode_compound_hdr(&xdr, req, &hdr);
2164 encode_sequence(&xdr, &args->seq_args, &hdr);
1817 encode_putfh(&xdr, args->fh, &hdr); 2165 encode_putfh(&xdr, args->fh, &hdr);
1818 encode_write(&xdr, args, &hdr); 2166 encode_write(&xdr, args, &hdr);
1819 req->rq_snd_buf.flags |= XDRBUF_WRITE; 2167 req->rq_snd_buf.flags |= XDRBUF_WRITE;
@@ -1829,11 +2177,12 @@ static int nfs4_xdr_enc_commit(struct rpc_rqst *req, __be32 *p, struct nfs_write
1829{ 2177{
1830 struct xdr_stream xdr; 2178 struct xdr_stream xdr;
1831 struct compound_hdr hdr = { 2179 struct compound_hdr hdr = {
1832 .nops = 0, 2180 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1833 }; 2181 };
1834 2182
1835 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2183 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1836 encode_compound_hdr(&xdr, &hdr); 2184 encode_compound_hdr(&xdr, req, &hdr);
2185 encode_sequence(&xdr, &args->seq_args, &hdr);
1837 encode_putfh(&xdr, args->fh, &hdr); 2186 encode_putfh(&xdr, args->fh, &hdr);
1838 encode_commit(&xdr, args, &hdr); 2187 encode_commit(&xdr, args, &hdr);
1839 encode_getfattr(&xdr, args->bitmask, &hdr); 2188 encode_getfattr(&xdr, args->bitmask, &hdr);
@@ -1848,11 +2197,12 @@ static int nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs4_fsin
1848{ 2197{
1849 struct xdr_stream xdr; 2198 struct xdr_stream xdr;
1850 struct compound_hdr hdr = { 2199 struct compound_hdr hdr = {
1851 .nops = 0, 2200 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1852 }; 2201 };
1853 2202
1854 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2203 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1855 encode_compound_hdr(&xdr, &hdr); 2204 encode_compound_hdr(&xdr, req, &hdr);
2205 encode_sequence(&xdr, &args->seq_args, &hdr);
1856 encode_putfh(&xdr, args->fh, &hdr); 2206 encode_putfh(&xdr, args->fh, &hdr);
1857 encode_fsinfo(&xdr, args->bitmask, &hdr); 2207 encode_fsinfo(&xdr, args->bitmask, &hdr);
1858 encode_nops(&hdr); 2208 encode_nops(&hdr);
@@ -1866,11 +2216,12 @@ static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, __be32 *p, const struct n
1866{ 2216{
1867 struct xdr_stream xdr; 2217 struct xdr_stream xdr;
1868 struct compound_hdr hdr = { 2218 struct compound_hdr hdr = {
1869 .nops = 0, 2219 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1870 }; 2220 };
1871 2221
1872 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2222 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1873 encode_compound_hdr(&xdr, &hdr); 2223 encode_compound_hdr(&xdr, req, &hdr);
2224 encode_sequence(&xdr, &args->seq_args, &hdr);
1874 encode_putfh(&xdr, args->fh, &hdr); 2225 encode_putfh(&xdr, args->fh, &hdr);
1875 encode_getattr_one(&xdr, args->bitmask[0] & nfs4_pathconf_bitmap[0], 2226 encode_getattr_one(&xdr, args->bitmask[0] & nfs4_pathconf_bitmap[0],
1876 &hdr); 2227 &hdr);
@@ -1885,11 +2236,12 @@ static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, __be32 *p, const struct nfs
1885{ 2236{
1886 struct xdr_stream xdr; 2237 struct xdr_stream xdr;
1887 struct compound_hdr hdr = { 2238 struct compound_hdr hdr = {
1888 .nops = 0, 2239 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1889 }; 2240 };
1890 2241
1891 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2242 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1892 encode_compound_hdr(&xdr, &hdr); 2243 encode_compound_hdr(&xdr, req, &hdr);
2244 encode_sequence(&xdr, &args->seq_args, &hdr);
1893 encode_putfh(&xdr, args->fh, &hdr); 2245 encode_putfh(&xdr, args->fh, &hdr);
1894 encode_getattr_two(&xdr, args->bitmask[0] & nfs4_statfs_bitmap[0], 2246 encode_getattr_two(&xdr, args->bitmask[0] & nfs4_statfs_bitmap[0],
1895 args->bitmask[1] & nfs4_statfs_bitmap[1], &hdr); 2247 args->bitmask[1] & nfs4_statfs_bitmap[1], &hdr);
@@ -1900,16 +2252,18 @@ static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, __be32 *p, const struct nfs
1900/* 2252/*
1901 * GETATTR_BITMAP request 2253 * GETATTR_BITMAP request
1902 */ 2254 */
1903static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, __be32 *p, const struct nfs_fh *fhandle) 2255static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, __be32 *p,
2256 struct nfs4_server_caps_arg *args)
1904{ 2257{
1905 struct xdr_stream xdr; 2258 struct xdr_stream xdr;
1906 struct compound_hdr hdr = { 2259 struct compound_hdr hdr = {
1907 .nops = 0, 2260 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1908 }; 2261 };
1909 2262
1910 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2263 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1911 encode_compound_hdr(&xdr, &hdr); 2264 encode_compound_hdr(&xdr, req, &hdr);
1912 encode_putfh(&xdr, fhandle, &hdr); 2265 encode_sequence(&xdr, &args->seq_args, &hdr);
2266 encode_putfh(&xdr, args->fhandle, &hdr);
1913 encode_getattr_one(&xdr, FATTR4_WORD0_SUPPORTED_ATTRS| 2267 encode_getattr_one(&xdr, FATTR4_WORD0_SUPPORTED_ATTRS|
1914 FATTR4_WORD0_LINK_SUPPORT| 2268 FATTR4_WORD0_LINK_SUPPORT|
1915 FATTR4_WORD0_SYMLINK_SUPPORT| 2269 FATTR4_WORD0_SYMLINK_SUPPORT|
@@ -1929,7 +2283,7 @@ static int nfs4_xdr_enc_renew(struct rpc_rqst *req, __be32 *p, struct nfs_client
1929 }; 2283 };
1930 2284
1931 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2285 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1932 encode_compound_hdr(&xdr, &hdr); 2286 encode_compound_hdr(&xdr, req, &hdr);
1933 encode_renew(&xdr, clp, &hdr); 2287 encode_renew(&xdr, clp, &hdr);
1934 encode_nops(&hdr); 2288 encode_nops(&hdr);
1935 return 0; 2289 return 0;
@@ -1946,7 +2300,7 @@ static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, __be32 *p, struct nfs4
1946 }; 2300 };
1947 2301
1948 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2302 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1949 encode_compound_hdr(&xdr, &hdr); 2303 encode_compound_hdr(&xdr, req, &hdr);
1950 encode_setclientid(&xdr, sc, &hdr); 2304 encode_setclientid(&xdr, sc, &hdr);
1951 encode_nops(&hdr); 2305 encode_nops(&hdr);
1952 return 0; 2306 return 0;
@@ -1964,7 +2318,7 @@ static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, str
1964 const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 }; 2318 const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
1965 2319
1966 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2320 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1967 encode_compound_hdr(&xdr, &hdr); 2321 encode_compound_hdr(&xdr, req, &hdr);
1968 encode_setclientid_confirm(&xdr, clp, &hdr); 2322 encode_setclientid_confirm(&xdr, clp, &hdr);
1969 encode_putrootfh(&xdr, &hdr); 2323 encode_putrootfh(&xdr, &hdr);
1970 encode_fsinfo(&xdr, lease_bitmap, &hdr); 2324 encode_fsinfo(&xdr, lease_bitmap, &hdr);
@@ -1979,11 +2333,12 @@ static int nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, __be32 *p, const struc
1979{ 2333{
1980 struct xdr_stream xdr; 2334 struct xdr_stream xdr;
1981 struct compound_hdr hdr = { 2335 struct compound_hdr hdr = {
1982 .nops = 0, 2336 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
1983 }; 2337 };
1984 2338
1985 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2339 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1986 encode_compound_hdr(&xdr, &hdr); 2340 encode_compound_hdr(&xdr, req, &hdr);
2341 encode_sequence(&xdr, &args->seq_args, &hdr);
1987 encode_putfh(&xdr, args->fhandle, &hdr); 2342 encode_putfh(&xdr, args->fhandle, &hdr);
1988 encode_delegreturn(&xdr, args->stateid, &hdr); 2343 encode_delegreturn(&xdr, args->stateid, &hdr);
1989 encode_getfattr(&xdr, args->bitmask, &hdr); 2344 encode_getfattr(&xdr, args->bitmask, &hdr);
@@ -1998,28 +2353,119 @@ static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs
1998{ 2353{
1999 struct xdr_stream xdr; 2354 struct xdr_stream xdr;
2000 struct compound_hdr hdr = { 2355 struct compound_hdr hdr = {
2001 .nops = 0, 2356 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2002 }; 2357 };
2003 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; 2358 uint32_t replen;
2004 int replen;
2005 2359
2006 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2360 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2007 encode_compound_hdr(&xdr, &hdr); 2361 encode_compound_hdr(&xdr, req, &hdr);
2362 encode_sequence(&xdr, &args->seq_args, &hdr);
2008 encode_putfh(&xdr, args->dir_fh, &hdr); 2363 encode_putfh(&xdr, args->dir_fh, &hdr);
2009 encode_lookup(&xdr, args->name, &hdr); 2364 encode_lookup(&xdr, args->name, &hdr);
2365 replen = hdr.replen; /* get the attribute into args->page */
2010 encode_fs_locations(&xdr, args->bitmask, &hdr); 2366 encode_fs_locations(&xdr, args->bitmask, &hdr);
2011 2367
2012 /* set up reply 2368 xdr_inline_pages(&req->rq_rcv_buf, replen << 2, &args->page,
2013 * toplevel_status + OP_PUTFH + status
2014 * + OP_LOOKUP + status + OP_GETATTR + status = 7
2015 */
2016 replen = (RPC_REPHDRSIZE + auth->au_rslack + 7) << 2;
2017 xdr_inline_pages(&req->rq_rcv_buf, replen, &args->page,
2018 0, PAGE_SIZE); 2369 0, PAGE_SIZE);
2019 encode_nops(&hdr); 2370 encode_nops(&hdr);
2020 return 0; 2371 return 0;
2021} 2372}
2022 2373
2374#if defined(CONFIG_NFS_V4_1)
2375/*
2376 * EXCHANGE_ID request
2377 */
2378static int nfs4_xdr_enc_exchange_id(struct rpc_rqst *req, uint32_t *p,
2379 struct nfs41_exchange_id_args *args)
2380{
2381 struct xdr_stream xdr;
2382 struct compound_hdr hdr = {
2383 .minorversion = args->client->cl_minorversion,
2384 };
2385
2386 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2387 encode_compound_hdr(&xdr, req, &hdr);
2388 encode_exchange_id(&xdr, args, &hdr);
2389 encode_nops(&hdr);
2390 return 0;
2391}
2392
2393/*
2394 * a CREATE_SESSION request
2395 */
2396static int nfs4_xdr_enc_create_session(struct rpc_rqst *req, uint32_t *p,
2397 struct nfs41_create_session_args *args)
2398{
2399 struct xdr_stream xdr;
2400 struct compound_hdr hdr = {
2401 .minorversion = args->client->cl_minorversion,
2402 };
2403
2404 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2405 encode_compound_hdr(&xdr, req, &hdr);
2406 encode_create_session(&xdr, args, &hdr);
2407 encode_nops(&hdr);
2408 return 0;
2409}
2410
2411/*
2412 * a DESTROY_SESSION request
2413 */
2414static int nfs4_xdr_enc_destroy_session(struct rpc_rqst *req, uint32_t *p,
2415 struct nfs4_session *session)
2416{
2417 struct xdr_stream xdr;
2418 struct compound_hdr hdr = {
2419 .minorversion = session->clp->cl_minorversion,
2420 };
2421
2422 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2423 encode_compound_hdr(&xdr, req, &hdr);
2424 encode_destroy_session(&xdr, session, &hdr);
2425 encode_nops(&hdr);
2426 return 0;
2427}
2428
2429/*
2430 * a SEQUENCE request
2431 */
2432static int nfs4_xdr_enc_sequence(struct rpc_rqst *req, uint32_t *p,
2433 struct nfs4_sequence_args *args)
2434{
2435 struct xdr_stream xdr;
2436 struct compound_hdr hdr = {
2437 .minorversion = nfs4_xdr_minorversion(args),
2438 };
2439
2440 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2441 encode_compound_hdr(&xdr, req, &hdr);
2442 encode_sequence(&xdr, args, &hdr);
2443 encode_nops(&hdr);
2444 return 0;
2445}
2446
2447/*
2448 * a GET_LEASE_TIME request
2449 */
2450static int nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req, uint32_t *p,
2451 struct nfs4_get_lease_time_args *args)
2452{
2453 struct xdr_stream xdr;
2454 struct compound_hdr hdr = {
2455 .minorversion = nfs4_xdr_minorversion(&args->la_seq_args),
2456 };
2457 const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
2458
2459 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2460 encode_compound_hdr(&xdr, req, &hdr);
2461 encode_sequence(&xdr, &args->la_seq_args, &hdr);
2462 encode_putrootfh(&xdr, &hdr);
2463 encode_fsinfo(&xdr, lease_bitmap, &hdr);
2464 encode_nops(&hdr);
2465 return 0;
2466}
2467#endif /* CONFIG_NFS_V4_1 */
2468
2023/* 2469/*
2024 * START OF "GENERIC" DECODE ROUTINES. 2470 * START OF "GENERIC" DECODE ROUTINES.
2025 * These may look a little ugly since they are imported from a "generic" 2471 * These may look a little ugly since they are imported from a "generic"
@@ -3657,7 +4103,7 @@ decode_savefh(struct xdr_stream *xdr)
3657 return decode_op_hdr(xdr, OP_SAVEFH); 4103 return decode_op_hdr(xdr, OP_SAVEFH);
3658} 4104}
3659 4105
3660static int decode_setattr(struct xdr_stream *xdr, struct nfs_setattrres *res) 4106static int decode_setattr(struct xdr_stream *xdr)
3661{ 4107{
3662 __be32 *p; 4108 __be32 *p;
3663 uint32_t bmlen; 4109 uint32_t bmlen;
@@ -3735,6 +4181,169 @@ static int decode_delegreturn(struct xdr_stream *xdr)
3735 return decode_op_hdr(xdr, OP_DELEGRETURN); 4181 return decode_op_hdr(xdr, OP_DELEGRETURN);
3736} 4182}
3737 4183
4184#if defined(CONFIG_NFS_V4_1)
4185static int decode_exchange_id(struct xdr_stream *xdr,
4186 struct nfs41_exchange_id_res *res)
4187{
4188 __be32 *p;
4189 uint32_t dummy;
4190 int status;
4191 struct nfs_client *clp = res->client;
4192
4193 status = decode_op_hdr(xdr, OP_EXCHANGE_ID);
4194 if (status)
4195 return status;
4196
4197 READ_BUF(8);
4198 READ64(clp->cl_ex_clid);
4199 READ_BUF(12);
4200 READ32(clp->cl_seqid);
4201 READ32(clp->cl_exchange_flags);
4202
4203 /* We ask for SP4_NONE */
4204 READ32(dummy);
4205 if (dummy != SP4_NONE)
4206 return -EIO;
4207
4208 /* Throw away minor_id */
4209 READ_BUF(8);
4210
4211 /* Throw away Major id */
4212 READ_BUF(4);
4213 READ32(dummy);
4214 READ_BUF(dummy);
4215
4216 /* Throw away server_scope */
4217 READ_BUF(4);
4218 READ32(dummy);
4219 READ_BUF(dummy);
4220
4221 /* Throw away Implementation id array */
4222 READ_BUF(4);
4223 READ32(dummy);
4224 READ_BUF(dummy);
4225
4226 return 0;
4227}
4228
4229static int decode_chan_attrs(struct xdr_stream *xdr,
4230 struct nfs4_channel_attrs *attrs)
4231{
4232 __be32 *p;
4233 u32 nr_attrs;
4234
4235 READ_BUF(28);
4236 READ32(attrs->headerpadsz);
4237 READ32(attrs->max_rqst_sz);
4238 READ32(attrs->max_resp_sz);
4239 READ32(attrs->max_resp_sz_cached);
4240 READ32(attrs->max_ops);
4241 READ32(attrs->max_reqs);
4242 READ32(nr_attrs);
4243 if (unlikely(nr_attrs > 1)) {
4244 printk(KERN_WARNING "%s: Invalid rdma channel attrs count %u\n",
4245 __func__, nr_attrs);
4246 return -EINVAL;
4247 }
4248 if (nr_attrs == 1)
4249 READ_BUF(4); /* skip rdma_attrs */
4250 return 0;
4251}
4252
4253static int decode_create_session(struct xdr_stream *xdr,
4254 struct nfs41_create_session_res *res)
4255{
4256 __be32 *p;
4257 int status;
4258 struct nfs_client *clp = res->client;
4259 struct nfs4_session *session = clp->cl_session;
4260
4261 status = decode_op_hdr(xdr, OP_CREATE_SESSION);
4262
4263 if (status)
4264 return status;
4265
4266 /* sessionid */
4267 READ_BUF(NFS4_MAX_SESSIONID_LEN);
4268 COPYMEM(&session->sess_id, NFS4_MAX_SESSIONID_LEN);
4269
4270 /* seqid, flags */
4271 READ_BUF(8);
4272 READ32(clp->cl_seqid);
4273 READ32(session->flags);
4274
4275 /* Channel attributes */
4276 status = decode_chan_attrs(xdr, &session->fc_attrs);
4277 if (!status)
4278 status = decode_chan_attrs(xdr, &session->bc_attrs);
4279 return status;
4280}
4281
4282static int decode_destroy_session(struct xdr_stream *xdr, void *dummy)
4283{
4284 return decode_op_hdr(xdr, OP_DESTROY_SESSION);
4285}
4286#endif /* CONFIG_NFS_V4_1 */
4287
4288static int decode_sequence(struct xdr_stream *xdr,
4289 struct nfs4_sequence_res *res,
4290 struct rpc_rqst *rqstp)
4291{
4292#if defined(CONFIG_NFS_V4_1)
4293 struct nfs4_slot *slot;
4294 struct nfs4_sessionid id;
4295 u32 dummy;
4296 int status;
4297 __be32 *p;
4298
4299 if (!res->sr_session)
4300 return 0;
4301
4302 status = decode_op_hdr(xdr, OP_SEQUENCE);
4303 if (status)
4304 goto out_err;
4305
4306 /*
4307 * If the server returns different values for sessionID, slotID or
4308 * sequence number, the server is looney tunes.
4309 */
4310 status = -ESERVERFAULT;
4311
4312 slot = &res->sr_session->fc_slot_table.slots[res->sr_slotid];
4313 READ_BUF(NFS4_MAX_SESSIONID_LEN + 20);
4314 COPYMEM(id.data, NFS4_MAX_SESSIONID_LEN);
4315 if (memcmp(id.data, res->sr_session->sess_id.data,
4316 NFS4_MAX_SESSIONID_LEN)) {
4317 dprintk("%s Invalid session id\n", __func__);
4318 goto out_err;
4319 }
4320 /* seqid */
4321 READ32(dummy);
4322 if (dummy != slot->seq_nr) {
4323 dprintk("%s Invalid sequence number\n", __func__);
4324 goto out_err;
4325 }
4326 /* slot id */
4327 READ32(dummy);
4328 if (dummy != res->sr_slotid) {
4329 dprintk("%s Invalid slot id\n", __func__);
4330 goto out_err;
4331 }
4332 /* highest slot id - currently not processed */
4333 READ32(dummy);
4334 /* target highest slot id - currently not processed */
4335 READ32(dummy);
4336 /* result flags - currently not processed */
4337 READ32(dummy);
4338 status = 0;
4339out_err:
4340 res->sr_status = status;
4341 return status;
4342#else /* CONFIG_NFS_V4_1 */
4343 return 0;
4344#endif /* CONFIG_NFS_V4_1 */
4345}
4346
3738/* 4347/*
3739 * END OF "GENERIC" DECODE ROUTINES. 4348 * END OF "GENERIC" DECODE ROUTINES.
3740 */ 4349 */
@@ -3752,6 +4361,9 @@ static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, __be32 *p, struct
3752 status = decode_compound_hdr(&xdr, &hdr); 4361 status = decode_compound_hdr(&xdr, &hdr);
3753 if (status) 4362 if (status)
3754 goto out; 4363 goto out;
4364 status = decode_sequence(&xdr, &res->seq_res, rqstp);
4365 if (status)
4366 goto out;
3755 status = decode_putfh(&xdr); 4367 status = decode_putfh(&xdr);
3756 if (status) 4368 if (status)
3757 goto out; 4369 goto out;
@@ -3773,7 +4385,11 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_ac
3773 int status; 4385 int status;
3774 4386
3775 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 4387 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3776 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) 4388 status = decode_compound_hdr(&xdr, &hdr);
4389 if (status)
4390 goto out;
4391 status = decode_sequence(&xdr, &res->seq_res, rqstp);
4392 if (status)
3777 goto out; 4393 goto out;
3778 status = decode_putfh(&xdr); 4394 status = decode_putfh(&xdr);
3779 if (status != 0) 4395 if (status != 0)
@@ -3796,7 +4412,11 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lo
3796 int status; 4412 int status;
3797 4413
3798 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 4414 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3799 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) 4415 status = decode_compound_hdr(&xdr, &hdr);
4416 if (status)
4417 goto out;
4418 status = decode_sequence(&xdr, &res->seq_res, rqstp);
4419 if (status)
3800 goto out; 4420 goto out;
3801 if ((status = decode_putfh(&xdr)) != 0) 4421 if ((status = decode_putfh(&xdr)) != 0)
3802 goto out; 4422 goto out;
@@ -3819,7 +4439,11 @@ static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, __be32 *p, struct nf
3819 int status; 4439 int status;
3820 4440
3821 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 4441 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3822 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) 4442 status = decode_compound_hdr(&xdr, &hdr);
4443 if (status)
4444 goto out;
4445 status = decode_sequence(&xdr, &res->seq_res, rqstp);
4446 if (status)
3823 goto out; 4447 goto out;
3824 if ((status = decode_putrootfh(&xdr)) != 0) 4448 if ((status = decode_putrootfh(&xdr)) != 0)
3825 goto out; 4449 goto out;
@@ -3839,7 +4463,11 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, __be32 *p, struct nfs_rem
3839 int status; 4463 int status;
3840 4464
3841 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 4465 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3842 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) 4466 status = decode_compound_hdr(&xdr, &hdr);
4467 if (status)
4468 goto out;
4469 status = decode_sequence(&xdr, &res->seq_res, rqstp);
4470 if (status)
3843 goto out; 4471 goto out;
3844 if ((status = decode_putfh(&xdr)) != 0) 4472 if ((status = decode_putfh(&xdr)) != 0)
3845 goto out; 4473 goto out;
@@ -3860,7 +4488,11 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_re
3860 int status; 4488 int status;
3861 4489
3862 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 4490 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3863 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) 4491 status = decode_compound_hdr(&xdr, &hdr);
4492 if (status)
4493 goto out;
4494 status = decode_sequence(&xdr, &res->seq_res, rqstp);
4495 if (status)
3864 goto out; 4496 goto out;
3865 if ((status = decode_putfh(&xdr)) != 0) 4497 if ((status = decode_putfh(&xdr)) != 0)
3866 goto out; 4498 goto out;
@@ -3890,7 +4522,11 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_link
3890 int status; 4522 int status;
3891 4523
3892 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 4524 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3893 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) 4525 status = decode_compound_hdr(&xdr, &hdr);
4526 if (status)
4527 goto out;
4528 status = decode_sequence(&xdr, &res->seq_res, rqstp);
4529 if (status)
3894 goto out; 4530 goto out;
3895 if ((status = decode_putfh(&xdr)) != 0) 4531 if ((status = decode_putfh(&xdr)) != 0)
3896 goto out; 4532 goto out;
@@ -3923,7 +4559,11 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_cr
3923 int status; 4559 int status;
3924 4560
3925 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 4561 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3926 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) 4562 status = decode_compound_hdr(&xdr, &hdr);
4563 if (status)
4564 goto out;
4565 status = decode_sequence(&xdr, &res->seq_res, rqstp);
4566 if (status)
3927 goto out; 4567 goto out;
3928 if ((status = decode_putfh(&xdr)) != 0) 4568 if ((status = decode_putfh(&xdr)) != 0)
3929 goto out; 4569 goto out;
@@ -3963,6 +4603,9 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_g
3963 status = decode_compound_hdr(&xdr, &hdr); 4603 status = decode_compound_hdr(&xdr, &hdr);
3964 if (status) 4604 if (status)
3965 goto out; 4605 goto out;
4606 status = decode_sequence(&xdr, &res->seq_res, rqstp);
4607 if (status)
4608 goto out;
3966 status = decode_putfh(&xdr); 4609 status = decode_putfh(&xdr);
3967 if (status) 4610 if (status)
3968 goto out; 4611 goto out;
@@ -3979,12 +4622,13 @@ nfs4_xdr_enc_setacl(struct rpc_rqst *req, __be32 *p, struct nfs_setaclargs *args
3979{ 4622{
3980 struct xdr_stream xdr; 4623 struct xdr_stream xdr;
3981 struct compound_hdr hdr = { 4624 struct compound_hdr hdr = {
3982 .nops = 0, 4625 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
3983 }; 4626 };
3984 int status; 4627 int status;
3985 4628
3986 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 4629 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
3987 encode_compound_hdr(&xdr, &hdr); 4630 encode_compound_hdr(&xdr, req, &hdr);
4631 encode_sequence(&xdr, &args->seq_args, &hdr);
3988 encode_putfh(&xdr, args->fh, &hdr); 4632 encode_putfh(&xdr, args->fh, &hdr);
3989 status = encode_setacl(&xdr, args, &hdr); 4633 status = encode_setacl(&xdr, args, &hdr);
3990 encode_nops(&hdr); 4634 encode_nops(&hdr);
@@ -3995,7 +4639,8 @@ nfs4_xdr_enc_setacl(struct rpc_rqst *req, __be32 *p, struct nfs_setaclargs *args
3995 * Decode SETACL response 4639 * Decode SETACL response
3996 */ 4640 */
3997static int 4641static int
3998nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, __be32 *p, void *res) 4642nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, __be32 *p,
4643 struct nfs_setaclres *res)
3999{ 4644{
4000 struct xdr_stream xdr; 4645 struct xdr_stream xdr;
4001 struct compound_hdr hdr; 4646 struct compound_hdr hdr;
@@ -4005,10 +4650,13 @@ nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, __be32 *p, void *res)
4005 status = decode_compound_hdr(&xdr, &hdr); 4650 status = decode_compound_hdr(&xdr, &hdr);
4006 if (status) 4651 if (status)
4007 goto out; 4652 goto out;
4653 status = decode_sequence(&xdr, &res->seq_res, rqstp);
4654 if (status)
4655 goto out;
4008 status = decode_putfh(&xdr); 4656 status = decode_putfh(&xdr);
4009 if (status) 4657 if (status)
4010 goto out; 4658 goto out;
4011 status = decode_setattr(&xdr, res); 4659 status = decode_setattr(&xdr);
4012out: 4660out:
4013 return status; 4661 return status;
4014} 4662}
@@ -4017,7 +4665,8 @@ out:
4017 * Decode GETACL response 4665 * Decode GETACL response
4018 */ 4666 */
4019static int 4667static int
4020nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, __be32 *p, size_t *acl_len) 4668nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, __be32 *p,
4669 struct nfs_getaclres *res)
4021{ 4670{
4022 struct xdr_stream xdr; 4671 struct xdr_stream xdr;
4023 struct compound_hdr hdr; 4672 struct compound_hdr hdr;
@@ -4027,10 +4676,13 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, __be32 *p, size_t *acl_len)
4027 status = decode_compound_hdr(&xdr, &hdr); 4676 status = decode_compound_hdr(&xdr, &hdr);
4028 if (status) 4677 if (status)
4029 goto out; 4678 goto out;
4679 status = decode_sequence(&xdr, &res->seq_res, rqstp);
4680 if (status)
4681 goto out;
4030 status = decode_putfh(&xdr); 4682 status = decode_putfh(&xdr);
4031 if (status) 4683 if (status)
4032 goto out; 4684 goto out;
4033 status = decode_getacl(&xdr, rqstp, acl_len); 4685 status = decode_getacl(&xdr, rqstp, &res->acl_len);
4034 4686
4035out: 4687out:
4036 return status; 4688 return status;
@@ -4049,6 +4701,9 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_clos
4049 status = decode_compound_hdr(&xdr, &hdr); 4701 status = decode_compound_hdr(&xdr, &hdr);
4050 if (status) 4702 if (status)
4051 goto out; 4703 goto out;
4704 status = decode_sequence(&xdr, &res->seq_res, rqstp);
4705 if (status)
4706 goto out;
4052 status = decode_putfh(&xdr); 4707 status = decode_putfh(&xdr);
4053 if (status) 4708 if (status)
4054 goto out; 4709 goto out;
@@ -4079,6 +4734,9 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openr
4079 status = decode_compound_hdr(&xdr, &hdr); 4734 status = decode_compound_hdr(&xdr, &hdr);
4080 if (status) 4735 if (status)
4081 goto out; 4736 goto out;
4737 status = decode_sequence(&xdr, &res->seq_res, rqstp);
4738 if (status)
4739 goto out;
4082 status = decode_putfh(&xdr); 4740 status = decode_putfh(&xdr);
4083 if (status) 4741 if (status)
4084 goto out; 4742 goto out;
@@ -4133,6 +4791,9 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, __be32 *p, struct nf
4133 status = decode_compound_hdr(&xdr, &hdr); 4791 status = decode_compound_hdr(&xdr, &hdr);
4134 if (status) 4792 if (status)
4135 goto out; 4793 goto out;
4794 status = decode_sequence(&xdr, &res->seq_res, rqstp);
4795 if (status)
4796 goto out;
4136 status = decode_putfh(&xdr); 4797 status = decode_putfh(&xdr);
4137 if (status) 4798 if (status)
4138 goto out; 4799 goto out;
@@ -4157,10 +4818,13 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_se
4157 status = decode_compound_hdr(&xdr, &hdr); 4818 status = decode_compound_hdr(&xdr, &hdr);
4158 if (status) 4819 if (status)
4159 goto out; 4820 goto out;
4821 status = decode_sequence(&xdr, &res->seq_res, rqstp);
4822 if (status)
4823 goto out;
4160 status = decode_putfh(&xdr); 4824 status = decode_putfh(&xdr);
4161 if (status) 4825 if (status)
4162 goto out; 4826 goto out;
4163 status = decode_setattr(&xdr, res); 4827 status = decode_setattr(&xdr);
4164 if (status) 4828 if (status)
4165 goto out; 4829 goto out;
4166 decode_getfattr(&xdr, res->fattr, res->server); 4830 decode_getfattr(&xdr, res->fattr, res->server);
@@ -4181,6 +4845,9 @@ static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, __be32 *p, struct nfs_lock_
4181 status = decode_compound_hdr(&xdr, &hdr); 4845 status = decode_compound_hdr(&xdr, &hdr);
4182 if (status) 4846 if (status)
4183 goto out; 4847 goto out;
4848 status = decode_sequence(&xdr, &res->seq_res, rqstp);
4849 if (status)
4850 goto out;
4184 status = decode_putfh(&xdr); 4851 status = decode_putfh(&xdr);
4185 if (status) 4852 if (status)
4186 goto out; 4853 goto out;
@@ -4202,6 +4869,9 @@ static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, __be32 *p, struct nfs_lock
4202 status = decode_compound_hdr(&xdr, &hdr); 4869 status = decode_compound_hdr(&xdr, &hdr);
4203 if (status) 4870 if (status)
4204 goto out; 4871 goto out;
4872 status = decode_sequence(&xdr, &res->seq_res, rqstp);
4873 if (status)
4874 goto out;
4205 status = decode_putfh(&xdr); 4875 status = decode_putfh(&xdr);
4206 if (status) 4876 if (status)
4207 goto out; 4877 goto out;
@@ -4223,6 +4893,9 @@ static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, __be32 *p, struct nfs_lock
4223 status = decode_compound_hdr(&xdr, &hdr); 4893 status = decode_compound_hdr(&xdr, &hdr);
4224 if (status) 4894 if (status)
4225 goto out; 4895 goto out;
4896 status = decode_sequence(&xdr, &res->seq_res, rqstp);
4897 if (status)
4898 goto out;
4226 status = decode_putfh(&xdr); 4899 status = decode_putfh(&xdr);
4227 if (status) 4900 if (status)
4228 goto out; 4901 goto out;
@@ -4234,7 +4907,8 @@ out:
4234/* 4907/*
4235 * Decode READLINK response 4908 * Decode READLINK response
4236 */ 4909 */
4237static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, __be32 *p, void *res) 4910static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, __be32 *p,
4911 struct nfs4_readlink_res *res)
4238{ 4912{
4239 struct xdr_stream xdr; 4913 struct xdr_stream xdr;
4240 struct compound_hdr hdr; 4914 struct compound_hdr hdr;
@@ -4244,6 +4918,9 @@ static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, __be32 *p, void *res)
4244 status = decode_compound_hdr(&xdr, &hdr); 4918 status = decode_compound_hdr(&xdr, &hdr);
4245 if (status) 4919 if (status)
4246 goto out; 4920 goto out;
4921 status = decode_sequence(&xdr, &res->seq_res, rqstp);
4922 if (status)
4923 goto out;
4247 status = decode_putfh(&xdr); 4924 status = decode_putfh(&xdr);
4248 if (status) 4925 if (status)
4249 goto out; 4926 goto out;
@@ -4265,6 +4942,9 @@ static int nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_r
4265 status = decode_compound_hdr(&xdr, &hdr); 4942 status = decode_compound_hdr(&xdr, &hdr);
4266 if (status) 4943 if (status)
4267 goto out; 4944 goto out;
4945 status = decode_sequence(&xdr, &res->seq_res, rqstp);
4946 if (status)
4947 goto out;
4268 status = decode_putfh(&xdr); 4948 status = decode_putfh(&xdr);
4269 if (status) 4949 if (status)
4270 goto out; 4950 goto out;
@@ -4286,6 +4966,9 @@ static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, __be32 *p, struct nfs_readr
4286 status = decode_compound_hdr(&xdr, &hdr); 4966 status = decode_compound_hdr(&xdr, &hdr);
4287 if (status) 4967 if (status)
4288 goto out; 4968 goto out;
4969 status = decode_sequence(&xdr, &res->seq_res, rqstp);
4970 if (status)
4971 goto out;
4289 status = decode_putfh(&xdr); 4972 status = decode_putfh(&xdr);
4290 if (status) 4973 if (status)
4291 goto out; 4974 goto out;
@@ -4309,6 +4992,9 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, __be32 *p, struct nfs_writ
4309 status = decode_compound_hdr(&xdr, &hdr); 4992 status = decode_compound_hdr(&xdr, &hdr);
4310 if (status) 4993 if (status)
4311 goto out; 4994 goto out;
4995 status = decode_sequence(&xdr, &res->seq_res, rqstp);
4996 if (status)
4997 goto out;
4312 status = decode_putfh(&xdr); 4998 status = decode_putfh(&xdr);
4313 if (status) 4999 if (status)
4314 goto out; 5000 goto out;
@@ -4335,6 +5021,9 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, __be32 *p, struct nfs_wri
4335 status = decode_compound_hdr(&xdr, &hdr); 5021 status = decode_compound_hdr(&xdr, &hdr);
4336 if (status) 5022 if (status)
4337 goto out; 5023 goto out;
5024 status = decode_sequence(&xdr, &res->seq_res, rqstp);
5025 if (status)
5026 goto out;
4338 status = decode_putfh(&xdr); 5027 status = decode_putfh(&xdr);
4339 if (status) 5028 if (status)
4340 goto out; 5029 goto out;
@@ -4349,7 +5038,8 @@ out:
4349/* 5038/*
4350 * FSINFO request 5039 * FSINFO request
4351 */ 5040 */
4352static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *fsinfo) 5041static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p,
5042 struct nfs4_fsinfo_res *res)
4353{ 5043{
4354 struct xdr_stream xdr; 5044 struct xdr_stream xdr;
4355 struct compound_hdr hdr; 5045 struct compound_hdr hdr;
@@ -4358,16 +5048,19 @@ static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs_fsinf
4358 xdr_init_decode(&xdr, &req->rq_rcv_buf, p); 5048 xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
4359 status = decode_compound_hdr(&xdr, &hdr); 5049 status = decode_compound_hdr(&xdr, &hdr);
4360 if (!status) 5050 if (!status)
5051 status = decode_sequence(&xdr, &res->seq_res, req);
5052 if (!status)
4361 status = decode_putfh(&xdr); 5053 status = decode_putfh(&xdr);
4362 if (!status) 5054 if (!status)
4363 status = decode_fsinfo(&xdr, fsinfo); 5055 status = decode_fsinfo(&xdr, res->fsinfo);
4364 return status; 5056 return status;
4365} 5057}
4366 5058
4367/* 5059/*
4368 * PATHCONF request 5060 * PATHCONF request
4369 */ 5061 */
4370static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, __be32 *p, struct nfs_pathconf *pathconf) 5062static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, __be32 *p,
5063 struct nfs4_pathconf_res *res)
4371{ 5064{
4372 struct xdr_stream xdr; 5065 struct xdr_stream xdr;
4373 struct compound_hdr hdr; 5066 struct compound_hdr hdr;
@@ -4376,16 +5069,19 @@ static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, __be32 *p, struct nfs_pat
4376 xdr_init_decode(&xdr, &req->rq_rcv_buf, p); 5069 xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
4377 status = decode_compound_hdr(&xdr, &hdr); 5070 status = decode_compound_hdr(&xdr, &hdr);
4378 if (!status) 5071 if (!status)
5072 status = decode_sequence(&xdr, &res->seq_res, req);
5073 if (!status)
4379 status = decode_putfh(&xdr); 5074 status = decode_putfh(&xdr);
4380 if (!status) 5075 if (!status)
4381 status = decode_pathconf(&xdr, pathconf); 5076 status = decode_pathconf(&xdr, res->pathconf);
4382 return status; 5077 return status;
4383} 5078}
4384 5079
4385/* 5080/*
4386 * STATFS request 5081 * STATFS request
4387 */ 5082 */
4388static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, __be32 *p, struct nfs_fsstat *fsstat) 5083static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, __be32 *p,
5084 struct nfs4_statfs_res *res)
4389{ 5085{
4390 struct xdr_stream xdr; 5086 struct xdr_stream xdr;
4391 struct compound_hdr hdr; 5087 struct compound_hdr hdr;
@@ -4394,9 +5090,11 @@ static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, __be32 *p, struct nfs_fssta
4394 xdr_init_decode(&xdr, &req->rq_rcv_buf, p); 5090 xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
4395 status = decode_compound_hdr(&xdr, &hdr); 5091 status = decode_compound_hdr(&xdr, &hdr);
4396 if (!status) 5092 if (!status)
5093 status = decode_sequence(&xdr, &res->seq_res, req);
5094 if (!status)
4397 status = decode_putfh(&xdr); 5095 status = decode_putfh(&xdr);
4398 if (!status) 5096 if (!status)
4399 status = decode_statfs(&xdr, fsstat); 5097 status = decode_statfs(&xdr, res->fsstat);
4400 return status; 5098 return status;
4401} 5099}
4402 5100
@@ -4410,7 +5108,11 @@ static int nfs4_xdr_dec_server_caps(struct rpc_rqst *req, __be32 *p, struct nfs4
4410 int status; 5108 int status;
4411 5109
4412 xdr_init_decode(&xdr, &req->rq_rcv_buf, p); 5110 xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
4413 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) 5111 status = decode_compound_hdr(&xdr, &hdr);
5112 if (status)
5113 goto out;
5114 status = decode_sequence(&xdr, &res->seq_res, req);
5115 if (status)
4414 goto out; 5116 goto out;
4415 if ((status = decode_putfh(&xdr)) != 0) 5117 if ((status = decode_putfh(&xdr)) != 0)
4416 goto out; 5118 goto out;
@@ -4483,7 +5185,10 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, __be32 *p, struct nf
4483 5185
4484 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 5186 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
4485 status = decode_compound_hdr(&xdr, &hdr); 5187 status = decode_compound_hdr(&xdr, &hdr);
4486 if (status != 0) 5188 if (status)
5189 goto out;
5190 status = decode_sequence(&xdr, &res->seq_res, rqstp);
5191 if (status)
4487 goto out; 5192 goto out;
4488 status = decode_putfh(&xdr); 5193 status = decode_putfh(&xdr);
4489 if (status != 0) 5194 if (status != 0)
@@ -4497,7 +5202,8 @@ out:
4497/* 5202/*
4498 * FS_LOCATIONS request 5203 * FS_LOCATIONS request
4499 */ 5204 */
4500static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs4_fs_locations *res) 5205static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p,
5206 struct nfs4_fs_locations_res *res)
4501{ 5207{
4502 struct xdr_stream xdr; 5208 struct xdr_stream xdr;
4503 struct compound_hdr hdr; 5209 struct compound_hdr hdr;
@@ -4505,18 +5211,113 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs
4505 5211
4506 xdr_init_decode(&xdr, &req->rq_rcv_buf, p); 5212 xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
4507 status = decode_compound_hdr(&xdr, &hdr); 5213 status = decode_compound_hdr(&xdr, &hdr);
4508 if (status != 0) 5214 if (status)
5215 goto out;
5216 status = decode_sequence(&xdr, &res->seq_res, req);
5217 if (status)
4509 goto out; 5218 goto out;
4510 if ((status = decode_putfh(&xdr)) != 0) 5219 if ((status = decode_putfh(&xdr)) != 0)
4511 goto out; 5220 goto out;
4512 if ((status = decode_lookup(&xdr)) != 0) 5221 if ((status = decode_lookup(&xdr)) != 0)
4513 goto out; 5222 goto out;
4514 xdr_enter_page(&xdr, PAGE_SIZE); 5223 xdr_enter_page(&xdr, PAGE_SIZE);
4515 status = decode_getfattr(&xdr, &res->fattr, res->server); 5224 status = decode_getfattr(&xdr, &res->fs_locations->fattr,
5225 res->fs_locations->server);
4516out: 5226out:
4517 return status; 5227 return status;
4518} 5228}
4519 5229
5230#if defined(CONFIG_NFS_V4_1)
5231/*
5232 * EXCHANGE_ID request
5233 */
5234static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp, uint32_t *p,
5235 void *res)
5236{
5237 struct xdr_stream xdr;
5238 struct compound_hdr hdr;
5239 int status;
5240
5241 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
5242 status = decode_compound_hdr(&xdr, &hdr);
5243 if (!status)
5244 status = decode_exchange_id(&xdr, res);
5245 return status;
5246}
5247
5248/*
5249 * a CREATE_SESSION request
5250 */
5251static int nfs4_xdr_dec_create_session(struct rpc_rqst *rqstp, uint32_t *p,
5252 struct nfs41_create_session_res *res)
5253{
5254 struct xdr_stream xdr;
5255 struct compound_hdr hdr;
5256 int status;
5257
5258 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
5259 status = decode_compound_hdr(&xdr, &hdr);
5260 if (!status)
5261 status = decode_create_session(&xdr, res);
5262 return status;
5263}
5264
5265/*
5266 * a DESTROY_SESSION request
5267 */
5268static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp, uint32_t *p,
5269 void *dummy)
5270{
5271 struct xdr_stream xdr;
5272 struct compound_hdr hdr;
5273 int status;
5274
5275 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
5276 status = decode_compound_hdr(&xdr, &hdr);
5277 if (!status)
5278 status = decode_destroy_session(&xdr, dummy);
5279 return status;
5280}
5281
5282/*
5283 * a SEQUENCE request
5284 */
5285static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp, uint32_t *p,
5286 struct nfs4_sequence_res *res)
5287{
5288 struct xdr_stream xdr;
5289 struct compound_hdr hdr;
5290 int status;
5291
5292 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
5293 status = decode_compound_hdr(&xdr, &hdr);
5294 if (!status)
5295 status = decode_sequence(&xdr, res, rqstp);
5296 return status;
5297}
5298
5299/*
5300 * a GET_LEASE_TIME request
5301 */
5302static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp, uint32_t *p,
5303 struct nfs4_get_lease_time_res *res)
5304{
5305 struct xdr_stream xdr;
5306 struct compound_hdr hdr;
5307 int status;
5308
5309 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
5310 status = decode_compound_hdr(&xdr, &hdr);
5311 if (!status)
5312 status = decode_sequence(&xdr, &res->lr_seq_res, rqstp);
5313 if (!status)
5314 status = decode_putrootfh(&xdr);
5315 if (!status)
5316 status = decode_fsinfo(&xdr, res->lr_fsinfo);
5317 return status;
5318}
5319#endif /* CONFIG_NFS_V4_1 */
5320
4520__be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) 5321__be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
4521{ 5322{
4522 uint32_t bitmap[2] = {0}; 5323 uint32_t bitmap[2] = {0};
@@ -4686,6 +5487,13 @@ struct rpc_procinfo nfs4_procedures[] = {
4686 PROC(GETACL, enc_getacl, dec_getacl), 5487 PROC(GETACL, enc_getacl, dec_getacl),
4687 PROC(SETACL, enc_setacl, dec_setacl), 5488 PROC(SETACL, enc_setacl, dec_setacl),
4688 PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations), 5489 PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations),
5490#if defined(CONFIG_NFS_V4_1)
5491 PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id),
5492 PROC(CREATE_SESSION, enc_create_session, dec_create_session),
5493 PROC(DESTROY_SESSION, enc_destroy_session, dec_destroy_session),
5494 PROC(SEQUENCE, enc_sequence, dec_sequence),
5495 PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time),
5496#endif /* CONFIG_NFS_V4_1 */
4689}; 5497};
4690 5498
4691struct rpc_version nfs_version4 = { 5499struct rpc_version nfs_version4 = {
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index e3ed5908820b..8c55b27c0de4 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -92,6 +92,9 @@
92#undef NFSROOT_DEBUG 92#undef NFSROOT_DEBUG
93#define NFSDBG_FACILITY NFSDBG_ROOT 93#define NFSDBG_FACILITY NFSDBG_ROOT
94 94
95/* Default port to use if server is not running a portmapper */
96#define NFS_MNT_PORT 627
97
95/* Default path we try to mount. "%s" gets replaced by our IP address */ 98/* Default path we try to mount. "%s" gets replaced by our IP address */
96#define NFS_ROOT "/tftpboot/%s" 99#define NFS_ROOT "/tftpboot/%s"
97 100
@@ -487,6 +490,7 @@ static int __init root_nfs_get_handle(void)
487{ 490{
488 struct nfs_fh fh; 491 struct nfs_fh fh;
489 struct sockaddr_in sin; 492 struct sockaddr_in sin;
493 unsigned int auth_flav_len = 0;
490 struct nfs_mount_request request = { 494 struct nfs_mount_request request = {
491 .sap = (struct sockaddr *)&sin, 495 .sap = (struct sockaddr *)&sin,
492 .salen = sizeof(sin), 496 .salen = sizeof(sin),
@@ -496,6 +500,7 @@ static int __init root_nfs_get_handle(void)
496 .protocol = (nfs_data.flags & NFS_MOUNT_TCP) ? 500 .protocol = (nfs_data.flags & NFS_MOUNT_TCP) ?
497 XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP, 501 XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP,
498 .fh = &fh, 502 .fh = &fh,
503 .auth_flav_len = &auth_flav_len,
499 }; 504 };
500 int status; 505 int status;
501 506
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 4ace3c50a8eb..96c4ebfa46f4 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -22,6 +22,7 @@
22 22
23#include <asm/system.h> 23#include <asm/system.h>
24 24
25#include "nfs4_fs.h"
25#include "internal.h" 26#include "internal.h"
26#include "iostat.h" 27#include "iostat.h"
27#include "fscache.h" 28#include "fscache.h"
@@ -46,6 +47,7 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
46 memset(p, 0, sizeof(*p)); 47 memset(p, 0, sizeof(*p));
47 INIT_LIST_HEAD(&p->pages); 48 INIT_LIST_HEAD(&p->pages);
48 p->npages = pagecount; 49 p->npages = pagecount;
50 p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
49 if (pagecount <= ARRAY_SIZE(p->page_array)) 51 if (pagecount <= ARRAY_SIZE(p->page_array))
50 p->pagevec = p->page_array; 52 p->pagevec = p->page_array;
51 else { 53 else {
@@ -357,19 +359,25 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
357 struct nfs_readres *resp = &data->res; 359 struct nfs_readres *resp = &data->res;
358 360
359 if (resp->eof || resp->count == argp->count) 361 if (resp->eof || resp->count == argp->count)
360 return; 362 goto out;
361 363
362 /* This is a short read! */ 364 /* This is a short read! */
363 nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); 365 nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
364 /* Has the server at least made some progress? */ 366 /* Has the server at least made some progress? */
365 if (resp->count == 0) 367 if (resp->count == 0)
366 return; 368 goto out;
367 369
368 /* Yes, so retry the read at the end of the data */ 370 /* Yes, so retry the read at the end of the data */
369 argp->offset += resp->count; 371 argp->offset += resp->count;
370 argp->pgbase += resp->count; 372 argp->pgbase += resp->count;
371 argp->count -= resp->count; 373 argp->count -= resp->count;
372 rpc_restart_call(task); 374 nfs4_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client);
375 return;
376out:
377 nfs4_sequence_free_slot(NFS_SERVER(data->inode)->nfs_client,
378 &data->res.seq_res);
379 return;
380
373} 381}
374 382
375/* 383/*
@@ -406,7 +414,23 @@ static void nfs_readpage_release_partial(void *calldata)
406 nfs_readdata_release(calldata); 414 nfs_readdata_release(calldata);
407} 415}
408 416
417#if defined(CONFIG_NFS_V4_1)
418void nfs_read_prepare(struct rpc_task *task, void *calldata)
419{
420 struct nfs_read_data *data = calldata;
421
422 if (nfs4_setup_sequence(NFS_SERVER(data->inode)->nfs_client,
423 &data->args.seq_args, &data->res.seq_res,
424 0, task))
425 return;
426 rpc_call_start(task);
427}
428#endif /* CONFIG_NFS_V4_1 */
429
409static const struct rpc_call_ops nfs_read_partial_ops = { 430static const struct rpc_call_ops nfs_read_partial_ops = {
431#if defined(CONFIG_NFS_V4_1)
432 .rpc_call_prepare = nfs_read_prepare,
433#endif /* CONFIG_NFS_V4_1 */
410 .rpc_call_done = nfs_readpage_result_partial, 434 .rpc_call_done = nfs_readpage_result_partial,
411 .rpc_release = nfs_readpage_release_partial, 435 .rpc_release = nfs_readpage_release_partial,
412}; 436};
@@ -470,6 +494,9 @@ static void nfs_readpage_release_full(void *calldata)
470} 494}
471 495
472static const struct rpc_call_ops nfs_read_full_ops = { 496static const struct rpc_call_ops nfs_read_full_ops = {
497#if defined(CONFIG_NFS_V4_1)
498 .rpc_call_prepare = nfs_read_prepare,
499#endif /* CONFIG_NFS_V4_1 */
473 .rpc_call_done = nfs_readpage_result_full, 500 .rpc_call_done = nfs_readpage_result_full,
474 .rpc_release = nfs_readpage_release_full, 501 .rpc_release = nfs_readpage_release_full,
475}; 502};
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 26127b69a275..0b4cbdc60abd 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -42,6 +42,8 @@
42#include <linux/smp_lock.h> 42#include <linux/smp_lock.h>
43#include <linux/seq_file.h> 43#include <linux/seq_file.h>
44#include <linux/mount.h> 44#include <linux/mount.h>
45#include <linux/mnt_namespace.h>
46#include <linux/namei.h>
45#include <linux/nfs_idmap.h> 47#include <linux/nfs_idmap.h>
46#include <linux/vfs.h> 48#include <linux/vfs.h>
47#include <linux/inet.h> 49#include <linux/inet.h>
@@ -90,6 +92,7 @@ enum {
90 Opt_mountport, 92 Opt_mountport,
91 Opt_mountvers, 93 Opt_mountvers,
92 Opt_nfsvers, 94 Opt_nfsvers,
95 Opt_minorversion,
93 96
94 /* Mount options that take string arguments */ 97 /* Mount options that take string arguments */
95 Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, 98 Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost,
@@ -139,22 +142,23 @@ static const match_table_t nfs_mount_option_tokens = {
139 { Opt_fscache_uniq, "fsc=%s" }, 142 { Opt_fscache_uniq, "fsc=%s" },
140 { Opt_nofscache, "nofsc" }, 143 { Opt_nofscache, "nofsc" },
141 144
142 { Opt_port, "port=%u" }, 145 { Opt_port, "port=%s" },
143 { Opt_rsize, "rsize=%u" }, 146 { Opt_rsize, "rsize=%s" },
144 { Opt_wsize, "wsize=%u" }, 147 { Opt_wsize, "wsize=%s" },
145 { Opt_bsize, "bsize=%u" }, 148 { Opt_bsize, "bsize=%s" },
146 { Opt_timeo, "timeo=%u" }, 149 { Opt_timeo, "timeo=%s" },
147 { Opt_retrans, "retrans=%u" }, 150 { Opt_retrans, "retrans=%s" },
148 { Opt_acregmin, "acregmin=%u" }, 151 { Opt_acregmin, "acregmin=%s" },
149 { Opt_acregmax, "acregmax=%u" }, 152 { Opt_acregmax, "acregmax=%s" },
150 { Opt_acdirmin, "acdirmin=%u" }, 153 { Opt_acdirmin, "acdirmin=%s" },
151 { Opt_acdirmax, "acdirmax=%u" }, 154 { Opt_acdirmax, "acdirmax=%s" },
152 { Opt_actimeo, "actimeo=%u" }, 155 { Opt_actimeo, "actimeo=%s" },
153 { Opt_namelen, "namlen=%u" }, 156 { Opt_namelen, "namlen=%s" },
154 { Opt_mountport, "mountport=%u" }, 157 { Opt_mountport, "mountport=%s" },
155 { Opt_mountvers, "mountvers=%u" }, 158 { Opt_mountvers, "mountvers=%s" },
156 { Opt_nfsvers, "nfsvers=%u" }, 159 { Opt_nfsvers, "nfsvers=%s" },
157 { Opt_nfsvers, "vers=%u" }, 160 { Opt_nfsvers, "vers=%s" },
161 { Opt_minorversion, "minorversion=%u" },
158 162
159 { Opt_sec, "sec=%s" }, 163 { Opt_sec, "sec=%s" },
160 { Opt_proto, "proto=%s" }, 164 { Opt_proto, "proto=%s" },
@@ -270,10 +274,14 @@ static const struct super_operations nfs_sops = {
270#ifdef CONFIG_NFS_V4 274#ifdef CONFIG_NFS_V4
271static int nfs4_get_sb(struct file_system_type *fs_type, 275static int nfs4_get_sb(struct file_system_type *fs_type,
272 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 276 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
277static int nfs4_remote_get_sb(struct file_system_type *fs_type,
278 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
273static int nfs4_xdev_get_sb(struct file_system_type *fs_type, 279static int nfs4_xdev_get_sb(struct file_system_type *fs_type,
274 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 280 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
275static int nfs4_referral_get_sb(struct file_system_type *fs_type, 281static int nfs4_referral_get_sb(struct file_system_type *fs_type,
276 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 282 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
283static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type,
284 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
277static void nfs4_kill_super(struct super_block *sb); 285static void nfs4_kill_super(struct super_block *sb);
278 286
279static struct file_system_type nfs4_fs_type = { 287static struct file_system_type nfs4_fs_type = {
@@ -284,6 +292,14 @@ static struct file_system_type nfs4_fs_type = {
284 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 292 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
285}; 293};
286 294
295static struct file_system_type nfs4_remote_fs_type = {
296 .owner = THIS_MODULE,
297 .name = "nfs4",
298 .get_sb = nfs4_remote_get_sb,
299 .kill_sb = nfs4_kill_super,
300 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
301};
302
287struct file_system_type nfs4_xdev_fs_type = { 303struct file_system_type nfs4_xdev_fs_type = {
288 .owner = THIS_MODULE, 304 .owner = THIS_MODULE,
289 .name = "nfs4", 305 .name = "nfs4",
@@ -292,6 +308,14 @@ struct file_system_type nfs4_xdev_fs_type = {
292 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 308 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
293}; 309};
294 310
311static struct file_system_type nfs4_remote_referral_fs_type = {
312 .owner = THIS_MODULE,
313 .name = "nfs4",
314 .get_sb = nfs4_remote_referral_get_sb,
315 .kill_sb = nfs4_kill_super,
316 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
317};
318
295struct file_system_type nfs4_referral_fs_type = { 319struct file_system_type nfs4_referral_fs_type = {
296 .owner = THIS_MODULE, 320 .owner = THIS_MODULE,
297 .name = "nfs4", 321 .name = "nfs4",
@@ -514,7 +538,6 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
514 const char *nostr; 538 const char *nostr;
515 } nfs_info[] = { 539 } nfs_info[] = {
516 { NFS_MOUNT_SOFT, ",soft", ",hard" }, 540 { NFS_MOUNT_SOFT, ",soft", ",hard" },
517 { NFS_MOUNT_INTR, ",intr", ",nointr" },
518 { NFS_MOUNT_POSIX, ",posix", "" }, 541 { NFS_MOUNT_POSIX, ",posix", "" },
519 { NFS_MOUNT_NOCTO, ",nocto", "" }, 542 { NFS_MOUNT_NOCTO, ",nocto", "" },
520 { NFS_MOUNT_NOAC, ",noac", "" }, 543 { NFS_MOUNT_NOAC, ",noac", "" },
@@ -943,11 +966,6 @@ static int nfs_parse_security_flavors(char *value,
943 return 1; 966 return 1;
944} 967}
945 968
946static void nfs_parse_invalid_value(const char *option)
947{
948 dfprintk(MOUNT, "NFS: bad value specified for %s option\n", option);
949}
950
951/* 969/*
952 * Error-check and convert a string of mount options from user space into 970 * Error-check and convert a string of mount options from user space into
953 * a data structure. The whole mount string is processed; bad options are 971 * a data structure. The whole mount string is processed; bad options are
@@ -958,7 +976,7 @@ static int nfs_parse_mount_options(char *raw,
958 struct nfs_parsed_mount_data *mnt) 976 struct nfs_parsed_mount_data *mnt)
959{ 977{
960 char *p, *string, *secdata; 978 char *p, *string, *secdata;
961 int rc, sloppy = 0, errors = 0; 979 int rc, sloppy = 0, invalid_option = 0;
962 980
963 if (!raw) { 981 if (!raw) {
964 dfprintk(MOUNT, "NFS: mount options string was NULL.\n"); 982 dfprintk(MOUNT, "NFS: mount options string was NULL.\n");
@@ -982,7 +1000,9 @@ static int nfs_parse_mount_options(char *raw,
982 1000
983 while ((p = strsep(&raw, ",")) != NULL) { 1001 while ((p = strsep(&raw, ",")) != NULL) {
984 substring_t args[MAX_OPT_ARGS]; 1002 substring_t args[MAX_OPT_ARGS];
985 int option, token; 1003 unsigned long option;
1004 int int_option;
1005 int token;
986 1006
987 if (!*p) 1007 if (!*p)
988 continue; 1008 continue;
@@ -1091,114 +1111,156 @@ static int nfs_parse_mount_options(char *raw,
1091 * options that take numeric values 1111 * options that take numeric values
1092 */ 1112 */
1093 case Opt_port: 1113 case Opt_port:
1094 if (match_int(args, &option) || 1114 string = match_strdup(args);
1095 option < 0 || option > USHORT_MAX) { 1115 if (string == NULL)
1096 errors++; 1116 goto out_nomem;
1097 nfs_parse_invalid_value("port"); 1117 rc = strict_strtoul(string, 10, &option);
1098 } else 1118 kfree(string);
1099 mnt->nfs_server.port = option; 1119 if (rc != 0 || option > USHORT_MAX)
1120 goto out_invalid_value;
1121 mnt->nfs_server.port = option;
1100 break; 1122 break;
1101 case Opt_rsize: 1123 case Opt_rsize:
1102 if (match_int(args, &option) || option < 0) { 1124 string = match_strdup(args);
1103 errors++; 1125 if (string == NULL)
1104 nfs_parse_invalid_value("rsize"); 1126 goto out_nomem;
1105 } else 1127 rc = strict_strtoul(string, 10, &option);
1106 mnt->rsize = option; 1128 kfree(string);
1129 if (rc != 0)
1130 goto out_invalid_value;
1131 mnt->rsize = option;
1107 break; 1132 break;
1108 case Opt_wsize: 1133 case Opt_wsize:
1109 if (match_int(args, &option) || option < 0) { 1134 string = match_strdup(args);
1110 errors++; 1135 if (string == NULL)
1111 nfs_parse_invalid_value("wsize"); 1136 goto out_nomem;
1112 } else 1137 rc = strict_strtoul(string, 10, &option);
1113 mnt->wsize = option; 1138 kfree(string);
1139 if (rc != 0)
1140 goto out_invalid_value;
1141 mnt->wsize = option;
1114 break; 1142 break;
1115 case Opt_bsize: 1143 case Opt_bsize:
1116 if (match_int(args, &option) || option < 0) { 1144 string = match_strdup(args);
1117 errors++; 1145 if (string == NULL)
1118 nfs_parse_invalid_value("bsize"); 1146 goto out_nomem;
1119 } else 1147 rc = strict_strtoul(string, 10, &option);
1120 mnt->bsize = option; 1148 kfree(string);
1149 if (rc != 0)
1150 goto out_invalid_value;
1151 mnt->bsize = option;
1121 break; 1152 break;
1122 case Opt_timeo: 1153 case Opt_timeo:
1123 if (match_int(args, &option) || option <= 0) { 1154 string = match_strdup(args);
1124 errors++; 1155 if (string == NULL)
1125 nfs_parse_invalid_value("timeo"); 1156 goto out_nomem;
1126 } else 1157 rc = strict_strtoul(string, 10, &option);
1127 mnt->timeo = option; 1158 kfree(string);
1159 if (rc != 0 || option == 0)
1160 goto out_invalid_value;
1161 mnt->timeo = option;
1128 break; 1162 break;
1129 case Opt_retrans: 1163 case Opt_retrans:
1130 if (match_int(args, &option) || option <= 0) { 1164 string = match_strdup(args);
1131 errors++; 1165 if (string == NULL)
1132 nfs_parse_invalid_value("retrans"); 1166 goto out_nomem;
1133 } else 1167 rc = strict_strtoul(string, 10, &option);
1134 mnt->retrans = option; 1168 kfree(string);
1169 if (rc != 0 || option == 0)
1170 goto out_invalid_value;
1171 mnt->retrans = option;
1135 break; 1172 break;
1136 case Opt_acregmin: 1173 case Opt_acregmin:
1137 if (match_int(args, &option) || option < 0) { 1174 string = match_strdup(args);
1138 errors++; 1175 if (string == NULL)
1139 nfs_parse_invalid_value("acregmin"); 1176 goto out_nomem;
1140 } else 1177 rc = strict_strtoul(string, 10, &option);
1141 mnt->acregmin = option; 1178 kfree(string);
1179 if (rc != 0)
1180 goto out_invalid_value;
1181 mnt->acregmin = option;
1142 break; 1182 break;
1143 case Opt_acregmax: 1183 case Opt_acregmax:
1144 if (match_int(args, &option) || option < 0) { 1184 string = match_strdup(args);
1145 errors++; 1185 if (string == NULL)
1146 nfs_parse_invalid_value("acregmax"); 1186 goto out_nomem;
1147 } else 1187 rc = strict_strtoul(string, 10, &option);
1148 mnt->acregmax = option; 1188 kfree(string);
1189 if (rc != 0)
1190 goto out_invalid_value;
1191 mnt->acregmax = option;
1149 break; 1192 break;
1150 case Opt_acdirmin: 1193 case Opt_acdirmin:
1151 if (match_int(args, &option) || option < 0) { 1194 string = match_strdup(args);
1152 errors++; 1195 if (string == NULL)
1153 nfs_parse_invalid_value("acdirmin"); 1196 goto out_nomem;
1154 } else 1197 rc = strict_strtoul(string, 10, &option);
1155 mnt->acdirmin = option; 1198 kfree(string);
1199 if (rc != 0)
1200 goto out_invalid_value;
1201 mnt->acdirmin = option;
1156 break; 1202 break;
1157 case Opt_acdirmax: 1203 case Opt_acdirmax:
1158 if (match_int(args, &option) || option < 0) { 1204 string = match_strdup(args);
1159 errors++; 1205 if (string == NULL)
1160 nfs_parse_invalid_value("acdirmax"); 1206 goto out_nomem;
1161 } else 1207 rc = strict_strtoul(string, 10, &option);
1162 mnt->acdirmax = option; 1208 kfree(string);
1209 if (rc != 0)
1210 goto out_invalid_value;
1211 mnt->acdirmax = option;
1163 break; 1212 break;
1164 case Opt_actimeo: 1213 case Opt_actimeo:
1165 if (match_int(args, &option) || option < 0) { 1214 string = match_strdup(args);
1166 errors++; 1215 if (string == NULL)
1167 nfs_parse_invalid_value("actimeo"); 1216 goto out_nomem;
1168 } else 1217 rc = strict_strtoul(string, 10, &option);
1169 mnt->acregmin = mnt->acregmax = 1218 kfree(string);
1170 mnt->acdirmin = mnt->acdirmax = option; 1219 if (rc != 0)
1220 goto out_invalid_value;
1221 mnt->acregmin = mnt->acregmax =
1222 mnt->acdirmin = mnt->acdirmax = option;
1171 break; 1223 break;
1172 case Opt_namelen: 1224 case Opt_namelen:
1173 if (match_int(args, &option) || option < 0) { 1225 string = match_strdup(args);
1174 errors++; 1226 if (string == NULL)
1175 nfs_parse_invalid_value("namlen"); 1227 goto out_nomem;
1176 } else 1228 rc = strict_strtoul(string, 10, &option);
1177 mnt->namlen = option; 1229 kfree(string);
1230 if (rc != 0)
1231 goto out_invalid_value;
1232 mnt->namlen = option;
1178 break; 1233 break;
1179 case Opt_mountport: 1234 case Opt_mountport:
1180 if (match_int(args, &option) || 1235 string = match_strdup(args);
1181 option < 0 || option > USHORT_MAX) { 1236 if (string == NULL)
1182 errors++; 1237 goto out_nomem;
1183 nfs_parse_invalid_value("mountport"); 1238 rc = strict_strtoul(string, 10, &option);
1184 } else 1239 kfree(string);
1185 mnt->mount_server.port = option; 1240 if (rc != 0 || option > USHORT_MAX)
1241 goto out_invalid_value;
1242 mnt->mount_server.port = option;
1186 break; 1243 break;
1187 case Opt_mountvers: 1244 case Opt_mountvers:
1188 if (match_int(args, &option) || 1245 string = match_strdup(args);
1246 if (string == NULL)
1247 goto out_nomem;
1248 rc = strict_strtoul(string, 10, &option);
1249 kfree(string);
1250 if (rc != 0 ||
1189 option < NFS_MNT_VERSION || 1251 option < NFS_MNT_VERSION ||
1190 option > NFS_MNT3_VERSION) { 1252 option > NFS_MNT3_VERSION)
1191 errors++; 1253 goto out_invalid_value;
1192 nfs_parse_invalid_value("mountvers"); 1254 mnt->mount_server.version = option;
1193 } else
1194 mnt->mount_server.version = option;
1195 break; 1255 break;
1196 case Opt_nfsvers: 1256 case Opt_nfsvers:
1197 if (match_int(args, &option)) { 1257 string = match_strdup(args);
1198 errors++; 1258 if (string == NULL)
1199 nfs_parse_invalid_value("nfsvers"); 1259 goto out_nomem;
1200 break; 1260 rc = strict_strtoul(string, 10, &option);
1201 } 1261 kfree(string);
1262 if (rc != 0)
1263 goto out_invalid_value;
1202 switch (option) { 1264 switch (option) {
1203 case NFS2_VERSION: 1265 case NFS2_VERSION:
1204 mnt->flags &= ~NFS_MOUNT_VER3; 1266 mnt->flags &= ~NFS_MOUNT_VER3;
@@ -1207,10 +1269,16 @@ static int nfs_parse_mount_options(char *raw,
1207 mnt->flags |= NFS_MOUNT_VER3; 1269 mnt->flags |= NFS_MOUNT_VER3;
1208 break; 1270 break;
1209 default: 1271 default:
1210 errors++; 1272 goto out_invalid_value;
1211 nfs_parse_invalid_value("nfsvers");
1212 } 1273 }
1213 break; 1274 break;
1275 case Opt_minorversion:
1276 if (match_int(args, &int_option))
1277 return 0;
1278 if (int_option < 0 || int_option > NFS4_MAX_MINOR_VERSION)
1279 return 0;
1280 mnt->minorversion = int_option;
1281 break;
1214 1282
1215 /* 1283 /*
1216 * options that take text values 1284 * options that take text values
@@ -1222,9 +1290,9 @@ static int nfs_parse_mount_options(char *raw,
1222 rc = nfs_parse_security_flavors(string, mnt); 1290 rc = nfs_parse_security_flavors(string, mnt);
1223 kfree(string); 1291 kfree(string);
1224 if (!rc) { 1292 if (!rc) {
1225 errors++;
1226 dfprintk(MOUNT, "NFS: unrecognized " 1293 dfprintk(MOUNT, "NFS: unrecognized "
1227 "security flavor\n"); 1294 "security flavor\n");
1295 return 0;
1228 } 1296 }
1229 break; 1297 break;
1230 case Opt_proto: 1298 case Opt_proto:
@@ -1238,23 +1306,25 @@ static int nfs_parse_mount_options(char *raw,
1238 case Opt_xprt_udp: 1306 case Opt_xprt_udp:
1239 mnt->flags &= ~NFS_MOUNT_TCP; 1307 mnt->flags &= ~NFS_MOUNT_TCP;
1240 mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; 1308 mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
1309 kfree(string);
1241 break; 1310 break;
1242 case Opt_xprt_tcp: 1311 case Opt_xprt_tcp:
1243 mnt->flags |= NFS_MOUNT_TCP; 1312 mnt->flags |= NFS_MOUNT_TCP;
1244 mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; 1313 mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
1314 kfree(string);
1245 break; 1315 break;
1246 case Opt_xprt_rdma: 1316 case Opt_xprt_rdma:
1247 /* vector side protocols to TCP */ 1317 /* vector side protocols to TCP */
1248 mnt->flags |= NFS_MOUNT_TCP; 1318 mnt->flags |= NFS_MOUNT_TCP;
1249 mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; 1319 mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
1250 xprt_load_transport(string); 1320 xprt_load_transport(string);
1321 kfree(string);
1251 break; 1322 break;
1252 default: 1323 default:
1253 errors++;
1254 dfprintk(MOUNT, "NFS: unrecognized " 1324 dfprintk(MOUNT, "NFS: unrecognized "
1255 "transport protocol\n"); 1325 "transport protocol\n");
1326 return 0;
1256 } 1327 }
1257 kfree(string);
1258 break; 1328 break;
1259 case Opt_mountproto: 1329 case Opt_mountproto:
1260 string = match_strdup(args); 1330 string = match_strdup(args);
@@ -1273,9 +1343,9 @@ static int nfs_parse_mount_options(char *raw,
1273 break; 1343 break;
1274 case Opt_xprt_rdma: /* not used for side protocols */ 1344 case Opt_xprt_rdma: /* not used for side protocols */
1275 default: 1345 default:
1276 errors++;
1277 dfprintk(MOUNT, "NFS: unrecognized " 1346 dfprintk(MOUNT, "NFS: unrecognized "
1278 "transport protocol\n"); 1347 "transport protocol\n");
1348 return 0;
1279 } 1349 }
1280 break; 1350 break;
1281 case Opt_addr: 1351 case Opt_addr:
@@ -1331,9 +1401,9 @@ static int nfs_parse_mount_options(char *raw,
1331 mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE; 1401 mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE;
1332 break; 1402 break;
1333 default: 1403 default:
1334 errors++;
1335 dfprintk(MOUNT, "NFS: invalid " 1404 dfprintk(MOUNT, "NFS: invalid "
1336 "lookupcache argument\n"); 1405 "lookupcache argument\n");
1406 return 0;
1337 }; 1407 };
1338 break; 1408 break;
1339 1409
@@ -1351,20 +1421,20 @@ static int nfs_parse_mount_options(char *raw,
1351 break; 1421 break;
1352 1422
1353 default: 1423 default:
1354 errors++; 1424 invalid_option = 1;
1355 dfprintk(MOUNT, "NFS: unrecognized mount option " 1425 dfprintk(MOUNT, "NFS: unrecognized mount option "
1356 "'%s'\n", p); 1426 "'%s'\n", p);
1357 } 1427 }
1358 } 1428 }
1359 1429
1360 if (errors > 0) { 1430 if (!sloppy && invalid_option)
1361 dfprintk(MOUNT, "NFS: parsing encountered %d error%s\n", 1431 return 0;
1362 errors, (errors == 1 ? "" : "s")); 1432
1363 if (!sloppy)
1364 return 0;
1365 }
1366 return 1; 1433 return 1;
1367 1434
1435out_invalid_value:
1436 printk(KERN_INFO "NFS: bad mount option value specified: %s \n", p);
1437 return 0;
1368out_nomem: 1438out_nomem:
1369 printk(KERN_INFO "NFS: not enough memory to parse option\n"); 1439 printk(KERN_INFO "NFS: not enough memory to parse option\n");
1370 return 0; 1440 return 0;
@@ -1381,6 +1451,7 @@ out_security_failure:
1381static int nfs_try_mount(struct nfs_parsed_mount_data *args, 1451static int nfs_try_mount(struct nfs_parsed_mount_data *args,
1382 struct nfs_fh *root_fh) 1452 struct nfs_fh *root_fh)
1383{ 1453{
1454 unsigned int auth_flavor_len = 0;
1384 struct nfs_mount_request request = { 1455 struct nfs_mount_request request = {
1385 .sap = (struct sockaddr *) 1456 .sap = (struct sockaddr *)
1386 &args->mount_server.address, 1457 &args->mount_server.address,
@@ -1388,6 +1459,7 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
1388 .protocol = args->mount_server.protocol, 1459 .protocol = args->mount_server.protocol,
1389 .fh = root_fh, 1460 .fh = root_fh,
1390 .noresvport = args->flags & NFS_MOUNT_NORESVPORT, 1461 .noresvport = args->flags & NFS_MOUNT_NORESVPORT,
1462 .auth_flav_len = &auth_flavor_len,
1391 }; 1463 };
1392 int status; 1464 int status;
1393 1465
@@ -2240,6 +2312,11 @@ static void nfs4_fill_super(struct super_block *sb)
2240 nfs_initialise_sb(sb); 2312 nfs_initialise_sb(sb);
2241} 2313}
2242 2314
2315static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args)
2316{
2317 args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3);
2318}
2319
2243/* 2320/*
2244 * Validate NFSv4 mount options 2321 * Validate NFSv4 mount options
2245 */ 2322 */
@@ -2263,6 +2340,7 @@ static int nfs4_validate_mount_data(void *options,
2263 args->nfs_server.port = NFS_PORT; /* 2049 unless user set port= */ 2340 args->nfs_server.port = NFS_PORT; /* 2049 unless user set port= */
2264 args->auth_flavors[0] = RPC_AUTH_UNIX; 2341 args->auth_flavors[0] = RPC_AUTH_UNIX;
2265 args->auth_flavor_len = 0; 2342 args->auth_flavor_len = 0;
2343 args->minorversion = 0;
2266 2344
2267 switch (data->version) { 2345 switch (data->version) {
2268 case 1: 2346 case 1:
@@ -2336,6 +2414,8 @@ static int nfs4_validate_mount_data(void *options,
2336 2414
2337 nfs_validate_transport_protocol(args); 2415 nfs_validate_transport_protocol(args);
2338 2416
2417 nfs4_validate_mount_flags(args);
2418
2339 if (args->auth_flavor_len > 1) 2419 if (args->auth_flavor_len > 1)
2340 goto out_inval_auth; 2420 goto out_inval_auth;
2341 2421
@@ -2375,12 +2455,12 @@ out_no_client_address:
2375} 2455}
2376 2456
2377/* 2457/*
2378 * Get the superblock for an NFS4 mountpoint 2458 * Get the superblock for the NFS4 root partition
2379 */ 2459 */
2380static int nfs4_get_sb(struct file_system_type *fs_type, 2460static int nfs4_remote_get_sb(struct file_system_type *fs_type,
2381 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 2461 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
2382{ 2462{
2383 struct nfs_parsed_mount_data *data; 2463 struct nfs_parsed_mount_data *data = raw_data;
2384 struct super_block *s; 2464 struct super_block *s;
2385 struct nfs_server *server; 2465 struct nfs_server *server;
2386 struct nfs_fh *mntfh; 2466 struct nfs_fh *mntfh;
@@ -2391,18 +2471,12 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
2391 }; 2471 };
2392 int error = -ENOMEM; 2472 int error = -ENOMEM;
2393 2473
2394 data = kzalloc(sizeof(*data), GFP_KERNEL);
2395 mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL); 2474 mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL);
2396 if (data == NULL || mntfh == NULL) 2475 if (data == NULL || mntfh == NULL)
2397 goto out_free_fh; 2476 goto out_free_fh;
2398 2477
2399 security_init_mnt_opts(&data->lsm_opts); 2478 security_init_mnt_opts(&data->lsm_opts);
2400 2479
2401 /* Validate the mount data */
2402 error = nfs4_validate_mount_data(raw_data, data, dev_name);
2403 if (error < 0)
2404 goto out;
2405
2406 /* Get a volume representation */ 2480 /* Get a volume representation */
2407 server = nfs4_create_server(data, mntfh); 2481 server = nfs4_create_server(data, mntfh);
2408 if (IS_ERR(server)) { 2482 if (IS_ERR(server)) {
@@ -2415,7 +2489,7 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
2415 compare_super = NULL; 2489 compare_super = NULL;
2416 2490
2417 /* Get a superblock - note that we may end up sharing one that already exists */ 2491 /* Get a superblock - note that we may end up sharing one that already exists */
2418 s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata); 2492 s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
2419 if (IS_ERR(s)) { 2493 if (IS_ERR(s)) {
2420 error = PTR_ERR(s); 2494 error = PTR_ERR(s);
2421 goto out_free; 2495 goto out_free;
@@ -2452,14 +2526,9 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
2452 error = 0; 2526 error = 0;
2453 2527
2454out: 2528out:
2455 kfree(data->client_address);
2456 kfree(data->nfs_server.export_path);
2457 kfree(data->nfs_server.hostname);
2458 kfree(data->fscache_uniq);
2459 security_free_mnt_opts(&data->lsm_opts); 2529 security_free_mnt_opts(&data->lsm_opts);
2460out_free_fh: 2530out_free_fh:
2461 kfree(mntfh); 2531 kfree(mntfh);
2462 kfree(data);
2463 return error; 2532 return error;
2464 2533
2465out_free: 2534out_free:
@@ -2473,16 +2542,137 @@ error_splat_super:
2473 goto out; 2542 goto out;
2474} 2543}
2475 2544
2545static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type,
2546 int flags, void *data, const char *hostname)
2547{
2548 struct vfsmount *root_mnt;
2549 char *root_devname;
2550 size_t len;
2551
2552 len = strlen(hostname) + 3;
2553 root_devname = kmalloc(len, GFP_KERNEL);
2554 if (root_devname == NULL)
2555 return ERR_PTR(-ENOMEM);
2556 snprintf(root_devname, len, "%s:/", hostname);
2557 root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data);
2558 kfree(root_devname);
2559 return root_mnt;
2560}
2561
2562static void nfs_fix_devname(const struct path *path, struct vfsmount *mnt)
2563{
2564 char *page = (char *) __get_free_page(GFP_KERNEL);
2565 char *devname, *tmp;
2566
2567 if (page == NULL)
2568 return;
2569 devname = nfs_path(path->mnt->mnt_devname,
2570 path->mnt->mnt_root, path->dentry,
2571 page, PAGE_SIZE);
2572 if (devname == NULL)
2573 goto out_freepage;
2574 tmp = kstrdup(devname, GFP_KERNEL);
2575 if (tmp == NULL)
2576 goto out_freepage;
2577 kfree(mnt->mnt_devname);
2578 mnt->mnt_devname = tmp;
2579out_freepage:
2580 free_page((unsigned long)page);
2581}
2582
2583static int nfs_follow_remote_path(struct vfsmount *root_mnt,
2584 const char *export_path, struct vfsmount *mnt_target)
2585{
2586 struct mnt_namespace *ns_private;
2587 struct nameidata nd;
2588 struct super_block *s;
2589 int ret;
2590
2591 ns_private = create_mnt_ns(root_mnt);
2592 ret = PTR_ERR(ns_private);
2593 if (IS_ERR(ns_private))
2594 goto out_mntput;
2595
2596 ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt,
2597 export_path, LOOKUP_FOLLOW, &nd);
2598
2599 put_mnt_ns(ns_private);
2600
2601 if (ret != 0)
2602 goto out_err;
2603
2604 s = nd.path.mnt->mnt_sb;
2605 atomic_inc(&s->s_active);
2606 mnt_target->mnt_sb = s;
2607 mnt_target->mnt_root = dget(nd.path.dentry);
2608
2609 /* Correct the device pathname */
2610 nfs_fix_devname(&nd.path, mnt_target);
2611
2612 path_put(&nd.path);
2613 down_write(&s->s_umount);
2614 return 0;
2615out_mntput:
2616 mntput(root_mnt);
2617out_err:
2618 return ret;
2619}
2620
2621/*
2622 * Get the superblock for an NFS4 mountpoint
2623 */
2624static int nfs4_get_sb(struct file_system_type *fs_type,
2625 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
2626{
2627 struct nfs_parsed_mount_data *data;
2628 char *export_path;
2629 struct vfsmount *root_mnt;
2630 int error = -ENOMEM;
2631
2632 data = kzalloc(sizeof(*data), GFP_KERNEL);
2633 if (data == NULL)
2634 goto out_free_data;
2635
2636 /* Validate the mount data */
2637 error = nfs4_validate_mount_data(raw_data, data, dev_name);
2638 if (error < 0)
2639 goto out;
2640
2641 export_path = data->nfs_server.export_path;
2642 data->nfs_server.export_path = "/";
2643 root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, data,
2644 data->nfs_server.hostname);
2645 data->nfs_server.export_path = export_path;
2646
2647 error = PTR_ERR(root_mnt);
2648 if (IS_ERR(root_mnt))
2649 goto out;
2650
2651 error = nfs_follow_remote_path(root_mnt, export_path, mnt);
2652
2653out:
2654 kfree(data->client_address);
2655 kfree(data->nfs_server.export_path);
2656 kfree(data->nfs_server.hostname);
2657 kfree(data->fscache_uniq);
2658out_free_data:
2659 kfree(data);
2660 dprintk("<-- nfs4_get_sb() = %d%s\n", error,
2661 error != 0 ? " [error]" : "");
2662 return error;
2663}
2664
2476static void nfs4_kill_super(struct super_block *sb) 2665static void nfs4_kill_super(struct super_block *sb)
2477{ 2666{
2478 struct nfs_server *server = NFS_SB(sb); 2667 struct nfs_server *server = NFS_SB(sb);
2479 2668
2669 dprintk("--> %s\n", __func__);
2480 nfs_super_return_all_delegations(sb); 2670 nfs_super_return_all_delegations(sb);
2481 kill_anon_super(sb); 2671 kill_anon_super(sb);
2482
2483 nfs4_renewd_prepare_shutdown(server); 2672 nfs4_renewd_prepare_shutdown(server);
2484 nfs_fscache_release_super_cookie(sb); 2673 nfs_fscache_release_super_cookie(sb);
2485 nfs_free_server(server); 2674 nfs_free_server(server);
2675 dprintk("<-- %s\n", __func__);
2486} 2676}
2487 2677
2488/* 2678/*
@@ -2568,12 +2758,9 @@ error_splat_super:
2568 return error; 2758 return error;
2569} 2759}
2570 2760
2571/* 2761static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type,
2572 * Create an NFS4 server record on referral traversal 2762 int flags, const char *dev_name, void *raw_data,
2573 */ 2763 struct vfsmount *mnt)
2574static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
2575 const char *dev_name, void *raw_data,
2576 struct vfsmount *mnt)
2577{ 2764{
2578 struct nfs_clone_mount *data = raw_data; 2765 struct nfs_clone_mount *data = raw_data;
2579 struct super_block *s; 2766 struct super_block *s;
@@ -2652,4 +2839,36 @@ error_splat_super:
2652 return error; 2839 return error;
2653} 2840}
2654 2841
2842/*
2843 * Create an NFS4 server record on referral traversal
2844 */
2845static int nfs4_referral_get_sb(struct file_system_type *fs_type,
2846 int flags, const char *dev_name, void *raw_data,
2847 struct vfsmount *mnt)
2848{
2849 struct nfs_clone_mount *data = raw_data;
2850 char *export_path;
2851 struct vfsmount *root_mnt;
2852 int error;
2853
2854 dprintk("--> nfs4_referral_get_sb()\n");
2855
2856 export_path = data->mnt_path;
2857 data->mnt_path = "/";
2858
2859 root_mnt = nfs_do_root_mount(&nfs4_remote_referral_fs_type,
2860 flags, data, data->hostname);
2861 data->mnt_path = export_path;
2862
2863 error = PTR_ERR(root_mnt);
2864 if (IS_ERR(root_mnt))
2865 goto out;
2866
2867 error = nfs_follow_remote_path(root_mnt, export_path, mnt);
2868out:
2869 dprintk("<-- nfs4_referral_get_sb() = %d%s\n", error,
2870 error != 0 ? " [error]" : "");
2871 return error;
2872}
2873
2655#endif /* CONFIG_NFS_V4 */ 2874#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index ecc295347775..1064c91ae810 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -15,6 +15,7 @@
15#include <linux/wait.h> 15#include <linux/wait.h>
16 16
17#include "internal.h" 17#include "internal.h"
18#include "nfs4_fs.h"
18 19
19struct nfs_unlinkdata { 20struct nfs_unlinkdata {
20 struct hlist_node list; 21 struct hlist_node list;
@@ -82,7 +83,7 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata)
82 struct inode *dir = data->dir; 83 struct inode *dir = data->dir;
83 84
84 if (!NFS_PROTO(dir)->unlink_done(task, dir)) 85 if (!NFS_PROTO(dir)->unlink_done(task, dir))
85 rpc_restart_call(task); 86 nfs4_restart_rpc(task, NFS_SERVER(dir)->nfs_client);
86} 87}
87 88
88/** 89/**
@@ -102,9 +103,25 @@ static void nfs_async_unlink_release(void *calldata)
102 nfs_sb_deactive(sb); 103 nfs_sb_deactive(sb);
103} 104}
104 105
106#if defined(CONFIG_NFS_V4_1)
107void nfs_unlink_prepare(struct rpc_task *task, void *calldata)
108{
109 struct nfs_unlinkdata *data = calldata;
110 struct nfs_server *server = NFS_SERVER(data->dir);
111
112 if (nfs4_setup_sequence(server->nfs_client, &data->args.seq_args,
113 &data->res.seq_res, 1, task))
114 return;
115 rpc_call_start(task);
116}
117#endif /* CONFIG_NFS_V4_1 */
118
105static const struct rpc_call_ops nfs_unlink_ops = { 119static const struct rpc_call_ops nfs_unlink_ops = {
106 .rpc_call_done = nfs_async_unlink_done, 120 .rpc_call_done = nfs_async_unlink_done,
107 .rpc_release = nfs_async_unlink_release, 121 .rpc_release = nfs_async_unlink_release,
122#if defined(CONFIG_NFS_V4_1)
123 .rpc_call_prepare = nfs_unlink_prepare,
124#endif /* CONFIG_NFS_V4_1 */
108}; 125};
109 126
110static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data) 127static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data)
@@ -241,6 +258,7 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
241 status = PTR_ERR(data->cred); 258 status = PTR_ERR(data->cred);
242 goto out_free; 259 goto out_free;
243 } 260 }
261 data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
244 262
245 status = -EBUSY; 263 status = -EBUSY;
246 spin_lock(&dentry->d_lock); 264 spin_lock(&dentry->d_lock);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e560a78995a3..ce728829f79a 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -25,6 +25,7 @@
25#include "delegation.h" 25#include "delegation.h"
26#include "internal.h" 26#include "internal.h"
27#include "iostat.h" 27#include "iostat.h"
28#include "nfs4_fs.h"
28 29
29#define NFSDBG_FACILITY NFSDBG_PAGECACHE 30#define NFSDBG_FACILITY NFSDBG_PAGECACHE
30 31
@@ -52,6 +53,7 @@ struct nfs_write_data *nfs_commitdata_alloc(void)
52 if (p) { 53 if (p) {
53 memset(p, 0, sizeof(*p)); 54 memset(p, 0, sizeof(*p));
54 INIT_LIST_HEAD(&p->pages); 55 INIT_LIST_HEAD(&p->pages);
56 p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
55 } 57 }
56 return p; 58 return p;
57} 59}
@@ -71,6 +73,7 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
71 memset(p, 0, sizeof(*p)); 73 memset(p, 0, sizeof(*p));
72 INIT_LIST_HEAD(&p->pages); 74 INIT_LIST_HEAD(&p->pages);
73 p->npages = pagecount; 75 p->npages = pagecount;
76 p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
74 if (pagecount <= ARRAY_SIZE(p->page_array)) 77 if (pagecount <= ARRAY_SIZE(p->page_array))
75 p->pagevec = p->page_array; 78 p->pagevec = p->page_array;
76 else { 79 else {
@@ -1048,7 +1051,23 @@ out:
1048 nfs_writedata_release(calldata); 1051 nfs_writedata_release(calldata);
1049} 1052}
1050 1053
1054#if defined(CONFIG_NFS_V4_1)
1055void nfs_write_prepare(struct rpc_task *task, void *calldata)
1056{
1057 struct nfs_write_data *data = calldata;
1058 struct nfs_client *clp = (NFS_SERVER(data->inode))->nfs_client;
1059
1060 if (nfs4_setup_sequence(clp, &data->args.seq_args,
1061 &data->res.seq_res, 1, task))
1062 return;
1063 rpc_call_start(task);
1064}
1065#endif /* CONFIG_NFS_V4_1 */
1066
1051static const struct rpc_call_ops nfs_write_partial_ops = { 1067static const struct rpc_call_ops nfs_write_partial_ops = {
1068#if defined(CONFIG_NFS_V4_1)
1069 .rpc_call_prepare = nfs_write_prepare,
1070#endif /* CONFIG_NFS_V4_1 */
1052 .rpc_call_done = nfs_writeback_done_partial, 1071 .rpc_call_done = nfs_writeback_done_partial,
1053 .rpc_release = nfs_writeback_release_partial, 1072 .rpc_release = nfs_writeback_release_partial,
1054}; 1073};
@@ -1111,6 +1130,9 @@ remove_request:
1111} 1130}
1112 1131
1113static const struct rpc_call_ops nfs_write_full_ops = { 1132static const struct rpc_call_ops nfs_write_full_ops = {
1133#if defined(CONFIG_NFS_V4_1)
1134 .rpc_call_prepare = nfs_write_prepare,
1135#endif /* CONFIG_NFS_V4_1 */
1114 .rpc_call_done = nfs_writeback_done_full, 1136 .rpc_call_done = nfs_writeback_done_full,
1115 .rpc_release = nfs_writeback_release_full, 1137 .rpc_release = nfs_writeback_release_full,
1116}; 1138};
@@ -1123,6 +1145,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1123{ 1145{
1124 struct nfs_writeargs *argp = &data->args; 1146 struct nfs_writeargs *argp = &data->args;
1125 struct nfs_writeres *resp = &data->res; 1147 struct nfs_writeres *resp = &data->res;
1148 struct nfs_server *server = NFS_SERVER(data->inode);
1126 int status; 1149 int status;
1127 1150
1128 dprintk("NFS: %5u nfs_writeback_done (status %d)\n", 1151 dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
@@ -1155,7 +1178,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1155 if (time_before(complain, jiffies)) { 1178 if (time_before(complain, jiffies)) {
1156 dprintk("NFS: faulty NFS server %s:" 1179 dprintk("NFS: faulty NFS server %s:"
1157 " (committed = %d) != (stable = %d)\n", 1180 " (committed = %d) != (stable = %d)\n",
1158 NFS_SERVER(data->inode)->nfs_client->cl_hostname, 1181 server->nfs_client->cl_hostname,
1159 resp->verf->committed, argp->stable); 1182 resp->verf->committed, argp->stable);
1160 complain = jiffies + 300 * HZ; 1183 complain = jiffies + 300 * HZ;
1161 } 1184 }
@@ -1181,7 +1204,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1181 */ 1204 */
1182 argp->stable = NFS_FILE_SYNC; 1205 argp->stable = NFS_FILE_SYNC;
1183 } 1206 }
1184 rpc_restart_call(task); 1207 nfs4_restart_rpc(task, server->nfs_client);
1185 return -EAGAIN; 1208 return -EAGAIN;
1186 } 1209 }
1187 if (time_before(complain, jiffies)) { 1210 if (time_before(complain, jiffies)) {
@@ -1193,6 +1216,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1193 /* Can't do anything about it except throw an error. */ 1216 /* Can't do anything about it except throw an error. */
1194 task->tk_status = -EIO; 1217 task->tk_status = -EIO;
1195 } 1218 }
1219 nfs4_sequence_free_slot(server->nfs_client, &data->res.seq_res);
1196 return 0; 1220 return 0;
1197} 1221}
1198 1222
@@ -1349,6 +1373,9 @@ static void nfs_commit_release(void *calldata)
1349} 1373}
1350 1374
1351static const struct rpc_call_ops nfs_commit_ops = { 1375static const struct rpc_call_ops nfs_commit_ops = {
1376#if defined(CONFIG_NFS_V4_1)
1377 .rpc_call_prepare = nfs_write_prepare,
1378#endif /* CONFIG_NFS_V4_1 */
1352 .rpc_call_done = nfs_commit_done, 1379 .rpc_call_done = nfs_commit_done,
1353 .rpc_release = nfs_commit_release, 1380 .rpc_release = nfs_commit_release,
1354}; 1381};
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 8b1f8efb4690..b92a27629fb7 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -464,16 +464,11 @@ static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp)
464 if (err) 464 if (err)
465 return err; 465 return err;
466 /* 466 /*
467 * Just a quick sanity check; we could also try to check 467 * XXX: It would be nice to also check whether this
468 * whether this pseudoflavor is supported, but at worst 468 * pseudoflavor is supported, so we can discover the
469 * an unsupported pseudoflavor on the export would just 469 * problem at export time instead of when a client fails
470 * be a pseudoflavor that won't match the flavor of any 470 * to authenticate.
471 * authenticated request. The administrator will
472 * probably discover the problem when someone fails to
473 * authenticate.
474 */ 471 */
475 if (f->pseudoflavor < 0)
476 return -EINVAL;
477 err = get_int(mesg, &f->flags); 472 err = get_int(mesg, &f->flags);
478 if (err) 473 if (err)
479 return err; 474 return err;
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 7c9fe838f038..a713c418a922 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -652,8 +652,6 @@ nfsd3_proc_commit(struct svc_rqst * rqstp, struct nfsd3_commitargs *argp,
652 * NFSv3 Server procedures. 652 * NFSv3 Server procedures.
653 * Only the results of non-idempotent operations are cached. 653 * Only the results of non-idempotent operations are cached.
654 */ 654 */
655#define nfs3svc_decode_voidargs NULL
656#define nfs3svc_release_void NULL
657#define nfs3svc_decode_fhandleargs nfs3svc_decode_fhandle 655#define nfs3svc_decode_fhandleargs nfs3svc_decode_fhandle
658#define nfs3svc_encode_attrstatres nfs3svc_encode_attrstat 656#define nfs3svc_encode_attrstatres nfs3svc_encode_attrstat
659#define nfs3svc_encode_wccstatres nfs3svc_encode_wccstat 657#define nfs3svc_encode_wccstatres nfs3svc_encode_wccstat
@@ -686,28 +684,219 @@ struct nfsd3_voidargs { int dummy; };
686#define WC (7+pAT) /* WCC attributes */ 684#define WC (7+pAT) /* WCC attributes */
687 685
688static struct svc_procedure nfsd_procedures3[22] = { 686static struct svc_procedure nfsd_procedures3[22] = {
689 PROC(null, void, void, void, RC_NOCACHE, ST), 687 [NFS3PROC_NULL] = {
690 PROC(getattr, fhandle, attrstat, fhandle, RC_NOCACHE, ST+AT), 688 .pc_func = (svc_procfunc) nfsd3_proc_null,
691 PROC(setattr, sattr, wccstat, fhandle, RC_REPLBUFF, ST+WC), 689 .pc_encode = (kxdrproc_t) nfs3svc_encode_voidres,
692 PROC(lookup, dirop, dirop, fhandle2, RC_NOCACHE, ST+FH+pAT+pAT), 690 .pc_argsize = sizeof(struct nfsd3_voidargs),
693 PROC(access, access, access, fhandle, RC_NOCACHE, ST+pAT+1), 691 .pc_ressize = sizeof(struct nfsd3_voidres),
694 PROC(readlink, readlink, readlink, fhandle, RC_NOCACHE, ST+pAT+1+NFS3_MAXPATHLEN/4), 692 .pc_cachetype = RC_NOCACHE,
695 PROC(read, read, read, fhandle, RC_NOCACHE, ST+pAT+4+NFSSVC_MAXBLKSIZE/4), 693 .pc_xdrressize = ST,
696 PROC(write, write, write, fhandle, RC_REPLBUFF, ST+WC+4), 694 },
697 PROC(create, create, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC), 695 [NFS3PROC_GETATTR] = {
698 PROC(mkdir, mkdir, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC), 696 .pc_func = (svc_procfunc) nfsd3_proc_getattr,
699 PROC(symlink, symlink, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC), 697 .pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
700 PROC(mknod, mknod, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC), 698 .pc_encode = (kxdrproc_t) nfs3svc_encode_attrstatres,
701 PROC(remove, dirop, wccstat, fhandle, RC_REPLBUFF, ST+WC), 699 .pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
702 PROC(rmdir, dirop, wccstat, fhandle, RC_REPLBUFF, ST+WC), 700 .pc_argsize = sizeof(struct nfsd3_fhandleargs),
703 PROC(rename, rename, rename, fhandle2, RC_REPLBUFF, ST+WC+WC), 701 .pc_ressize = sizeof(struct nfsd3_attrstatres),
704 PROC(link, link, link, fhandle2, RC_REPLBUFF, ST+pAT+WC), 702 .pc_cachetype = RC_NOCACHE,
705 PROC(readdir, readdir, readdir, fhandle, RC_NOCACHE, 0), 703 .pc_xdrressize = ST+AT,
706 PROC(readdirplus,readdirplus, readdir, fhandle, RC_NOCACHE, 0), 704 },
707 PROC(fsstat, fhandle, fsstat, void, RC_NOCACHE, ST+pAT+2*6+1), 705 [NFS3PROC_SETATTR] = {
708 PROC(fsinfo, fhandle, fsinfo, void, RC_NOCACHE, ST+pAT+12), 706 .pc_func = (svc_procfunc) nfsd3_proc_setattr,
709 PROC(pathconf, fhandle, pathconf, void, RC_NOCACHE, ST+pAT+6), 707 .pc_decode = (kxdrproc_t) nfs3svc_decode_sattrargs,
710 PROC(commit, commit, commit, fhandle, RC_NOCACHE, ST+WC+2), 708 .pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
709 .pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
710 .pc_argsize = sizeof(struct nfsd3_sattrargs),
711 .pc_ressize = sizeof(struct nfsd3_wccstatres),
712 .pc_cachetype = RC_REPLBUFF,
713 .pc_xdrressize = ST+WC,
714 },
715 [NFS3PROC_LOOKUP] = {
716 .pc_func = (svc_procfunc) nfsd3_proc_lookup,
717 .pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
718 .pc_encode = (kxdrproc_t) nfs3svc_encode_diropres,
719 .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
720 .pc_argsize = sizeof(struct nfsd3_diropargs),
721 .pc_ressize = sizeof(struct nfsd3_diropres),
722 .pc_cachetype = RC_NOCACHE,
723 .pc_xdrressize = ST+FH+pAT+pAT,
724 },
725 [NFS3PROC_ACCESS] = {
726 .pc_func = (svc_procfunc) nfsd3_proc_access,
727 .pc_decode = (kxdrproc_t) nfs3svc_decode_accessargs,
728 .pc_encode = (kxdrproc_t) nfs3svc_encode_accessres,
729 .pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
730 .pc_argsize = sizeof(struct nfsd3_accessargs),
731 .pc_ressize = sizeof(struct nfsd3_accessres),
732 .pc_cachetype = RC_NOCACHE,
733 .pc_xdrressize = ST+pAT+1,
734 },
735 [NFS3PROC_READLINK] = {
736 .pc_func = (svc_procfunc) nfsd3_proc_readlink,
737 .pc_decode = (kxdrproc_t) nfs3svc_decode_readlinkargs,
738 .pc_encode = (kxdrproc_t) nfs3svc_encode_readlinkres,
739 .pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
740 .pc_argsize = sizeof(struct nfsd3_readlinkargs),
741 .pc_ressize = sizeof(struct nfsd3_readlinkres),
742 .pc_cachetype = RC_NOCACHE,
743 .pc_xdrressize = ST+pAT+1+NFS3_MAXPATHLEN/4,
744 },
745 [NFS3PROC_READ] = {
746 .pc_func = (svc_procfunc) nfsd3_proc_read,
747 .pc_decode = (kxdrproc_t) nfs3svc_decode_readargs,
748 .pc_encode = (kxdrproc_t) nfs3svc_encode_readres,
749 .pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
750 .pc_argsize = sizeof(struct nfsd3_readargs),
751 .pc_ressize = sizeof(struct nfsd3_readres),
752 .pc_cachetype = RC_NOCACHE,
753 .pc_xdrressize = ST+pAT+4+NFSSVC_MAXBLKSIZE/4,
754 },
755 [NFS3PROC_WRITE] = {
756 .pc_func = (svc_procfunc) nfsd3_proc_write,
757 .pc_decode = (kxdrproc_t) nfs3svc_decode_writeargs,
758 .pc_encode = (kxdrproc_t) nfs3svc_encode_writeres,
759 .pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
760 .pc_argsize = sizeof(struct nfsd3_writeargs),
761 .pc_ressize = sizeof(struct nfsd3_writeres),
762 .pc_cachetype = RC_REPLBUFF,
763 .pc_xdrressize = ST+WC+4,
764 },
765 [NFS3PROC_CREATE] = {
766 .pc_func = (svc_procfunc) nfsd3_proc_create,
767 .pc_decode = (kxdrproc_t) nfs3svc_decode_createargs,
768 .pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
769 .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
770 .pc_argsize = sizeof(struct nfsd3_createargs),
771 .pc_ressize = sizeof(struct nfsd3_createres),
772 .pc_cachetype = RC_REPLBUFF,
773 .pc_xdrressize = ST+(1+FH+pAT)+WC,
774 },
775 [NFS3PROC_MKDIR] = {
776 .pc_func = (svc_procfunc) nfsd3_proc_mkdir,
777 .pc_decode = (kxdrproc_t) nfs3svc_decode_mkdirargs,
778 .pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
779 .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
780 .pc_argsize = sizeof(struct nfsd3_mkdirargs),
781 .pc_ressize = sizeof(struct nfsd3_createres),
782 .pc_cachetype = RC_REPLBUFF,
783 .pc_xdrressize = ST+(1+FH+pAT)+WC,
784 },
785 [NFS3PROC_SYMLINK] = {
786 .pc_func = (svc_procfunc) nfsd3_proc_symlink,
787 .pc_decode = (kxdrproc_t) nfs3svc_decode_symlinkargs,
788 .pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
789 .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
790 .pc_argsize = sizeof(struct nfsd3_symlinkargs),
791 .pc_ressize = sizeof(struct nfsd3_createres),
792 .pc_cachetype = RC_REPLBUFF,
793 .pc_xdrressize = ST+(1+FH+pAT)+WC,
794 },
795 [NFS3PROC_MKNOD] = {
796 .pc_func = (svc_procfunc) nfsd3_proc_mknod,
797 .pc_decode = (kxdrproc_t) nfs3svc_decode_mknodargs,
798 .pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
799 .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
800 .pc_argsize = sizeof(struct nfsd3_mknodargs),
801 .pc_ressize = sizeof(struct nfsd3_createres),
802 .pc_cachetype = RC_REPLBUFF,
803 .pc_xdrressize = ST+(1+FH+pAT)+WC,
804 },
805 [NFS3PROC_REMOVE] = {
806 .pc_func = (svc_procfunc) nfsd3_proc_remove,
807 .pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
808 .pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
809 .pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
810 .pc_argsize = sizeof(struct nfsd3_diropargs),
811 .pc_ressize = sizeof(struct nfsd3_wccstatres),
812 .pc_cachetype = RC_REPLBUFF,
813 .pc_xdrressize = ST+WC,
814 },
815 [NFS3PROC_RMDIR] = {
816 .pc_func = (svc_procfunc) nfsd3_proc_rmdir,
817 .pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
818 .pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
819 .pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
820 .pc_argsize = sizeof(struct nfsd3_diropargs),
821 .pc_ressize = sizeof(struct nfsd3_wccstatres),
822 .pc_cachetype = RC_REPLBUFF,
823 .pc_xdrressize = ST+WC,
824 },
825 [NFS3PROC_RENAME] = {
826 .pc_func = (svc_procfunc) nfsd3_proc_rename,
827 .pc_decode = (kxdrproc_t) nfs3svc_decode_renameargs,
828 .pc_encode = (kxdrproc_t) nfs3svc_encode_renameres,
829 .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
830 .pc_argsize = sizeof(struct nfsd3_renameargs),
831 .pc_ressize = sizeof(struct nfsd3_renameres),
832 .pc_cachetype = RC_REPLBUFF,
833 .pc_xdrressize = ST+WC+WC,
834 },
835 [NFS3PROC_LINK] = {
836 .pc_func = (svc_procfunc) nfsd3_proc_link,
837 .pc_decode = (kxdrproc_t) nfs3svc_decode_linkargs,
838 .pc_encode = (kxdrproc_t) nfs3svc_encode_linkres,
839 .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
840 .pc_argsize = sizeof(struct nfsd3_linkargs),
841 .pc_ressize = sizeof(struct nfsd3_linkres),
842 .pc_cachetype = RC_REPLBUFF,
843 .pc_xdrressize = ST+pAT+WC,
844 },
845 [NFS3PROC_READDIR] = {
846 .pc_func = (svc_procfunc) nfsd3_proc_readdir,
847 .pc_decode = (kxdrproc_t) nfs3svc_decode_readdirargs,
848 .pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
849 .pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
850 .pc_argsize = sizeof(struct nfsd3_readdirargs),
851 .pc_ressize = sizeof(struct nfsd3_readdirres),
852 .pc_cachetype = RC_NOCACHE,
853 },
854 [NFS3PROC_READDIRPLUS] = {
855 .pc_func = (svc_procfunc) nfsd3_proc_readdirplus,
856 .pc_decode = (kxdrproc_t) nfs3svc_decode_readdirplusargs,
857 .pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
858 .pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
859 .pc_argsize = sizeof(struct nfsd3_readdirplusargs),
860 .pc_ressize = sizeof(struct nfsd3_readdirres),
861 .pc_cachetype = RC_NOCACHE,
862 },
863 [NFS3PROC_FSSTAT] = {
864 .pc_func = (svc_procfunc) nfsd3_proc_fsstat,
865 .pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
866 .pc_encode = (kxdrproc_t) nfs3svc_encode_fsstatres,
867 .pc_argsize = sizeof(struct nfsd3_fhandleargs),
868 .pc_ressize = sizeof(struct nfsd3_fsstatres),
869 .pc_cachetype = RC_NOCACHE,
870 .pc_xdrressize = ST+pAT+2*6+1,
871 },
872 [NFS3PROC_FSINFO] = {
873 .pc_func = (svc_procfunc) nfsd3_proc_fsinfo,
874 .pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
875 .pc_encode = (kxdrproc_t) nfs3svc_encode_fsinfores,
876 .pc_argsize = sizeof(struct nfsd3_fhandleargs),
877 .pc_ressize = sizeof(struct nfsd3_fsinfores),
878 .pc_cachetype = RC_NOCACHE,
879 .pc_xdrressize = ST+pAT+12,
880 },
881 [NFS3PROC_PATHCONF] = {
882 .pc_func = (svc_procfunc) nfsd3_proc_pathconf,
883 .pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
884 .pc_encode = (kxdrproc_t) nfs3svc_encode_pathconfres,
885 .pc_argsize = sizeof(struct nfsd3_fhandleargs),
886 .pc_ressize = sizeof(struct nfsd3_pathconfres),
887 .pc_cachetype = RC_NOCACHE,
888 .pc_xdrressize = ST+pAT+6,
889 },
890 [NFS3PROC_COMMIT] = {
891 .pc_func = (svc_procfunc) nfsd3_proc_commit,
892 .pc_decode = (kxdrproc_t) nfs3svc_decode_commitargs,
893 .pc_encode = (kxdrproc_t) nfs3svc_encode_commitres,
894 .pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
895 .pc_argsize = sizeof(struct nfsd3_commitargs),
896 .pc_ressize = sizeof(struct nfsd3_commitres),
897 .pc_cachetype = RC_NOCACHE,
898 .pc_xdrressize = ST+WC+2,
899 },
711}; 900};
712 901
713struct svc_version nfsd_version3 = { 902struct svc_version nfsd_version3 = {
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 17d0dd997204..01d4ec1c88e0 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -272,6 +272,7 @@ void fill_post_wcc(struct svc_fh *fhp)
272 272
273 err = vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry, 273 err = vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry,
274 &fhp->fh_post_attr); 274 &fhp->fh_post_attr);
275 fhp->fh_post_change = fhp->fh_dentry->d_inode->i_version;
275 if (err) 276 if (err)
276 fhp->fh_post_saved = 0; 277 fhp->fh_post_saved = 0;
277 else 278 else
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 290289bd44f7..3fd23f7aceca 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -140,8 +140,10 @@ struct nfs4_cb_compound_hdr {
140 int status; 140 int status;
141 u32 ident; 141 u32 ident;
142 u32 nops; 142 u32 nops;
143 __be32 *nops_p;
144 u32 minorversion;
143 u32 taglen; 145 u32 taglen;
144 char * tag; 146 char *tag;
145}; 147};
146 148
147static struct { 149static struct {
@@ -201,33 +203,39 @@ nfs_cb_stat_to_errno(int stat)
201 * XDR encode 203 * XDR encode
202 */ 204 */
203 205
204static int 206static void
205encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr) 207encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr)
206{ 208{
207 __be32 * p; 209 __be32 * p;
208 210
209 RESERVE_SPACE(16); 211 RESERVE_SPACE(16);
210 WRITE32(0); /* tag length is always 0 */ 212 WRITE32(0); /* tag length is always 0 */
211 WRITE32(NFS4_MINOR_VERSION); 213 WRITE32(hdr->minorversion);
212 WRITE32(hdr->ident); 214 WRITE32(hdr->ident);
215 hdr->nops_p = p;
213 WRITE32(hdr->nops); 216 WRITE32(hdr->nops);
214 return 0;
215} 217}
216 218
217static int 219static void encode_cb_nops(struct nfs4_cb_compound_hdr *hdr)
218encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec) 220{
221 *hdr->nops_p = htonl(hdr->nops);
222}
223
224static void
225encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp,
226 struct nfs4_cb_compound_hdr *hdr)
219{ 227{
220 __be32 *p; 228 __be32 *p;
221 int len = cb_rec->cbr_fh.fh_size; 229 int len = dp->dl_fh.fh_size;
222 230
223 RESERVE_SPACE(12+sizeof(cb_rec->cbr_stateid) + len); 231 RESERVE_SPACE(12+sizeof(dp->dl_stateid) + len);
224 WRITE32(OP_CB_RECALL); 232 WRITE32(OP_CB_RECALL);
225 WRITE32(cb_rec->cbr_stateid.si_generation); 233 WRITE32(dp->dl_stateid.si_generation);
226 WRITEMEM(&cb_rec->cbr_stateid.si_opaque, sizeof(stateid_opaque_t)); 234 WRITEMEM(&dp->dl_stateid.si_opaque, sizeof(stateid_opaque_t));
227 WRITE32(cb_rec->cbr_trunc); 235 WRITE32(0); /* truncate optimization not implemented */
228 WRITE32(len); 236 WRITE32(len);
229 WRITEMEM(&cb_rec->cbr_fh.fh_base, len); 237 WRITEMEM(&dp->dl_fh.fh_base, len);
230 return 0; 238 hdr->nops++;
231} 239}
232 240
233static int 241static int
@@ -241,17 +249,18 @@ nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p)
241} 249}
242 250
243static int 251static int
244nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, struct nfs4_cb_recall *args) 252nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, struct nfs4_delegation *args)
245{ 253{
246 struct xdr_stream xdr; 254 struct xdr_stream xdr;
247 struct nfs4_cb_compound_hdr hdr = { 255 struct nfs4_cb_compound_hdr hdr = {
248 .ident = args->cbr_ident, 256 .ident = args->dl_ident,
249 .nops = 1,
250 }; 257 };
251 258
252 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 259 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
253 encode_cb_compound_hdr(&xdr, &hdr); 260 encode_cb_compound_hdr(&xdr, &hdr);
254 return (encode_cb_recall(&xdr, args)); 261 encode_cb_recall(&xdr, args, &hdr);
262 encode_cb_nops(&hdr);
263 return 0;
255} 264}
256 265
257 266
@@ -358,18 +367,21 @@ static struct rpc_program cb_program = {
358 .pipe_dir_name = "/nfsd4_cb", 367 .pipe_dir_name = "/nfsd4_cb",
359}; 368};
360 369
370static int max_cb_time(void)
371{
372 return max(NFSD_LEASE_TIME/10, (time_t)1) * HZ;
373}
374
361/* Reference counting, callback cleanup, etc., all look racy as heck. 375/* Reference counting, callback cleanup, etc., all look racy as heck.
362 * And why is cb_set an atomic? */ 376 * And why is cb_set an atomic? */
363 377
364static struct rpc_clnt *setup_callback_client(struct nfs4_client *clp) 378int setup_callback_client(struct nfs4_client *clp)
365{ 379{
366 struct sockaddr_in addr; 380 struct sockaddr_in addr;
367 struct nfs4_callback *cb = &clp->cl_callback; 381 struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
368 struct rpc_timeout timeparms = { 382 struct rpc_timeout timeparms = {
369 .to_initval = (NFSD_LEASE_TIME/4) * HZ, 383 .to_initval = max_cb_time(),
370 .to_retries = 5, 384 .to_retries = 0,
371 .to_maxval = (NFSD_LEASE_TIME/2) * HZ,
372 .to_exponential = 1,
373 }; 385 };
374 struct rpc_create_args args = { 386 struct rpc_create_args args = {
375 .protocol = IPPROTO_TCP, 387 .protocol = IPPROTO_TCP,
@@ -386,7 +398,7 @@ static struct rpc_clnt *setup_callback_client(struct nfs4_client *clp)
386 struct rpc_clnt *client; 398 struct rpc_clnt *client;
387 399
388 if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) 400 if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
389 return ERR_PTR(-EINVAL); 401 return -EINVAL;
390 402
391 /* Initialize address */ 403 /* Initialize address */
392 memset(&addr, 0, sizeof(addr)); 404 memset(&addr, 0, sizeof(addr));
@@ -396,48 +408,77 @@ static struct rpc_clnt *setup_callback_client(struct nfs4_client *clp)
396 408
397 /* Create RPC client */ 409 /* Create RPC client */
398 client = rpc_create(&args); 410 client = rpc_create(&args);
399 if (IS_ERR(client)) 411 if (IS_ERR(client)) {
400 dprintk("NFSD: couldn't create callback client: %ld\n", 412 dprintk("NFSD: couldn't create callback client: %ld\n",
401 PTR_ERR(client)); 413 PTR_ERR(client));
402 return client; 414 return PTR_ERR(client);
415 }
416 cb->cb_client = client;
417 return 0;
418
419}
420
421static void warn_no_callback_path(struct nfs4_client *clp, int reason)
422{
423 dprintk("NFSD: warning: no callback path to client %.*s: error %d\n",
424 (int)clp->cl_name.len, clp->cl_name.data, reason);
425}
426
427static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
428{
429 struct nfs4_client *clp = calldata;
430
431 if (task->tk_status)
432 warn_no_callback_path(clp, task->tk_status);
433 else
434 atomic_set(&clp->cl_cb_conn.cb_set, 1);
435 put_nfs4_client(clp);
436}
437
438static const struct rpc_call_ops nfsd4_cb_probe_ops = {
439 .rpc_call_done = nfsd4_cb_probe_done,
440};
403 441
442static struct rpc_cred *lookup_cb_cred(struct nfs4_cb_conn *cb)
443{
444 struct auth_cred acred = {
445 .machine_cred = 1
446 };
447
448 /*
449 * Note in the gss case this doesn't actually have to wait for a
450 * gss upcall (or any calls to the client); this just creates a
451 * non-uptodate cred which the rpc state machine will fill in with
452 * a refresh_upcall later.
453 */
454 return rpcauth_lookup_credcache(cb->cb_client->cl_auth, &acred,
455 RPCAUTH_LOOKUP_NEW);
404} 456}
405 457
406static int do_probe_callback(void *data) 458void do_probe_callback(struct nfs4_client *clp)
407{ 459{
408 struct nfs4_client *clp = data; 460 struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
409 struct nfs4_callback *cb = &clp->cl_callback;
410 struct rpc_message msg = { 461 struct rpc_message msg = {
411 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], 462 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
412 .rpc_argp = clp, 463 .rpc_argp = clp,
413 }; 464 };
414 struct rpc_clnt *client; 465 struct rpc_cred *cred;
415 int status; 466 int status;
416 467
417 client = setup_callback_client(clp); 468 cred = lookup_cb_cred(cb);
418 if (IS_ERR(client)) { 469 if (IS_ERR(cred)) {
419 status = PTR_ERR(client); 470 status = PTR_ERR(cred);
420 dprintk("NFSD: couldn't create callback client: %d\n", 471 goto out;
421 status); 472 }
422 goto out_err; 473 cb->cb_cred = cred;
474 msg.rpc_cred = cb->cb_cred;
475 status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_SOFT,
476 &nfsd4_cb_probe_ops, (void *)clp);
477out:
478 if (status) {
479 warn_no_callback_path(clp, status);
480 put_nfs4_client(clp);
423 } 481 }
424
425 status = rpc_call_sync(client, &msg, RPC_TASK_SOFT);
426
427 if (status)
428 goto out_release_client;
429
430 cb->cb_client = client;
431 atomic_set(&cb->cb_set, 1);
432 put_nfs4_client(clp);
433 return 0;
434out_release_client:
435 rpc_shutdown_client(client);
436out_err:
437 dprintk("NFSD: warning: no callback path to client %.*s: error %d\n",
438 (int)clp->cl_name.len, clp->cl_name.data, status);
439 put_nfs4_client(clp);
440 return 0;
441} 482}
442 483
443/* 484/*
@@ -446,21 +487,65 @@ out_err:
446void 487void
447nfsd4_probe_callback(struct nfs4_client *clp) 488nfsd4_probe_callback(struct nfs4_client *clp)
448{ 489{
449 struct task_struct *t; 490 int status;
450 491
451 BUG_ON(atomic_read(&clp->cl_callback.cb_set)); 492 BUG_ON(atomic_read(&clp->cl_cb_conn.cb_set));
493
494 status = setup_callback_client(clp);
495 if (status) {
496 warn_no_callback_path(clp, status);
497 return;
498 }
452 499
453 /* the task holds a reference to the nfs4_client struct */ 500 /* the task holds a reference to the nfs4_client struct */
454 atomic_inc(&clp->cl_count); 501 atomic_inc(&clp->cl_count);
455 502
456 t = kthread_run(do_probe_callback, clp, "nfs4_cb_probe"); 503 do_probe_callback(clp);
504}
457 505
458 if (IS_ERR(t)) 506static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
459 atomic_dec(&clp->cl_count); 507{
508 struct nfs4_delegation *dp = calldata;
509 struct nfs4_client *clp = dp->dl_client;
460 510
461 return; 511 switch (task->tk_status) {
512 case -EIO:
513 /* Network partition? */
514 atomic_set(&clp->cl_cb_conn.cb_set, 0);
515 warn_no_callback_path(clp, task->tk_status);
516 case -EBADHANDLE:
517 case -NFS4ERR_BAD_STATEID:
518 /* Race: client probably got cb_recall
519 * before open reply granting delegation */
520 break;
521 default:
522 /* success, or error we can't handle */
523 return;
524 }
525 if (dp->dl_retries--) {
526 rpc_delay(task, 2*HZ);
527 task->tk_status = 0;
528 rpc_restart_call(task);
529 } else {
530 atomic_set(&clp->cl_cb_conn.cb_set, 0);
531 warn_no_callback_path(clp, task->tk_status);
532 }
533}
534
535static void nfsd4_cb_recall_release(void *calldata)
536{
537 struct nfs4_delegation *dp = calldata;
538 struct nfs4_client *clp = dp->dl_client;
539
540 nfs4_put_delegation(dp);
541 put_nfs4_client(clp);
462} 542}
463 543
544static const struct rpc_call_ops nfsd4_cb_recall_ops = {
545 .rpc_call_done = nfsd4_cb_recall_done,
546 .rpc_release = nfsd4_cb_recall_release,
547};
548
464/* 549/*
465 * called with dp->dl_count inc'ed. 550 * called with dp->dl_count inc'ed.
466 */ 551 */
@@ -468,41 +553,19 @@ void
468nfsd4_cb_recall(struct nfs4_delegation *dp) 553nfsd4_cb_recall(struct nfs4_delegation *dp)
469{ 554{
470 struct nfs4_client *clp = dp->dl_client; 555 struct nfs4_client *clp = dp->dl_client;
471 struct rpc_clnt *clnt = clp->cl_callback.cb_client; 556 struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client;
472 struct nfs4_cb_recall *cbr = &dp->dl_recall;
473 struct rpc_message msg = { 557 struct rpc_message msg = {
474 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL], 558 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
475 .rpc_argp = cbr, 559 .rpc_argp = dp,
560 .rpc_cred = clp->cl_cb_conn.cb_cred
476 }; 561 };
477 int retries = 1; 562 int status;
478 int status = 0; 563
479 564 dp->dl_retries = 1;
480 cbr->cbr_trunc = 0; /* XXX need to implement truncate optimization */ 565 status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
481 cbr->cbr_dp = dp; 566 &nfsd4_cb_recall_ops, dp);
482 567 if (status) {
483 status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT); 568 put_nfs4_client(clp);
484 while (retries--) { 569 nfs4_put_delegation(dp);
485 switch (status) {
486 case -EIO:
487 /* Network partition? */
488 atomic_set(&clp->cl_callback.cb_set, 0);
489 case -EBADHANDLE:
490 case -NFS4ERR_BAD_STATEID:
491 /* Race: client probably got cb_recall
492 * before open reply granting delegation */
493 break;
494 default:
495 goto out_put_cred;
496 }
497 ssleep(2);
498 status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT);
499 } 570 }
500out_put_cred:
501 /*
502 * Success or failure, now we're either waiting for lease expiration
503 * or deleg_return.
504 */
505 put_nfs4_client(clp);
506 nfs4_put_delegation(dp);
507 return;
508} 571}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index b2883e9c6381..7c8801769a3c 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -51,6 +51,78 @@
51 51
52#define NFSDDBG_FACILITY NFSDDBG_PROC 52#define NFSDDBG_FACILITY NFSDDBG_PROC
53 53
54static u32 nfsd_attrmask[] = {
55 NFSD_WRITEABLE_ATTRS_WORD0,
56 NFSD_WRITEABLE_ATTRS_WORD1,
57 NFSD_WRITEABLE_ATTRS_WORD2
58};
59
60static u32 nfsd41_ex_attrmask[] = {
61 NFSD_SUPPATTR_EXCLCREAT_WORD0,
62 NFSD_SUPPATTR_EXCLCREAT_WORD1,
63 NFSD_SUPPATTR_EXCLCREAT_WORD2
64};
65
66static __be32
67check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
68 u32 *bmval, u32 *writable)
69{
70 struct dentry *dentry = cstate->current_fh.fh_dentry;
71 struct svc_export *exp = cstate->current_fh.fh_export;
72
73 /*
74 * Check about attributes are supported by the NFSv4 server or not.
75 * According to spec, unsupported attributes return ERR_ATTRNOTSUPP.
76 */
77 if ((bmval[0] & ~nfsd_suppattrs0(cstate->minorversion)) ||
78 (bmval[1] & ~nfsd_suppattrs1(cstate->minorversion)) ||
79 (bmval[2] & ~nfsd_suppattrs2(cstate->minorversion)))
80 return nfserr_attrnotsupp;
81
82 /*
83 * Check FATTR4_WORD0_ACL & FATTR4_WORD0_FS_LOCATIONS can be supported
84 * in current environment or not.
85 */
86 if (bmval[0] & FATTR4_WORD0_ACL) {
87 if (!IS_POSIXACL(dentry->d_inode))
88 return nfserr_attrnotsupp;
89 }
90 if (bmval[0] & FATTR4_WORD0_FS_LOCATIONS) {
91 if (exp->ex_fslocs.locations == NULL)
92 return nfserr_attrnotsupp;
93 }
94
95 /*
96 * According to spec, read-only attributes return ERR_INVAL.
97 */
98 if (writable) {
99 if ((bmval[0] & ~writable[0]) || (bmval[1] & ~writable[1]) ||
100 (bmval[2] & ~writable[2]))
101 return nfserr_inval;
102 }
103
104 return nfs_ok;
105}
106
107static __be32
108nfsd4_check_open_attributes(struct svc_rqst *rqstp,
109 struct nfsd4_compound_state *cstate, struct nfsd4_open *open)
110{
111 __be32 status = nfs_ok;
112
113 if (open->op_create == NFS4_OPEN_CREATE) {
114 if (open->op_createmode == NFS4_CREATE_UNCHECKED
115 || open->op_createmode == NFS4_CREATE_GUARDED)
116 status = check_attr_support(rqstp, cstate,
117 open->op_bmval, nfsd_attrmask);
118 else if (open->op_createmode == NFS4_CREATE_EXCLUSIVE4_1)
119 status = check_attr_support(rqstp, cstate,
120 open->op_bmval, nfsd41_ex_attrmask);
121 }
122
123 return status;
124}
125
54static inline void 126static inline void
55fh_dup2(struct svc_fh *dst, struct svc_fh *src) 127fh_dup2(struct svc_fh *dst, struct svc_fh *src)
56{ 128{
@@ -225,6 +297,10 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
225 if (status) 297 if (status)
226 goto out; 298 goto out;
227 299
300 status = nfsd4_check_open_attributes(rqstp, cstate, open);
301 if (status)
302 goto out;
303
228 /* Openowner is now set, so sequence id will get bumped. Now we need 304 /* Openowner is now set, so sequence id will get bumped. Now we need
229 * these checks before we do any creates: */ 305 * these checks before we do any creates: */
230 status = nfserr_grace; 306 status = nfserr_grace;
@@ -395,6 +471,11 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
395 if (status) 471 if (status)
396 return status; 472 return status;
397 473
474 status = check_attr_support(rqstp, cstate, create->cr_bmval,
475 nfsd_attrmask);
476 if (status)
477 return status;
478
398 switch (create->cr_type) { 479 switch (create->cr_type) {
399 case NF4LNK: 480 case NF4LNK:
400 /* ugh! we have to null-terminate the linktext, or 481 /* ugh! we have to null-terminate the linktext, or
@@ -689,6 +770,12 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
689 if (status) 770 if (status)
690 return status; 771 return status;
691 status = nfs_ok; 772 status = nfs_ok;
773
774 status = check_attr_support(rqstp, cstate, setattr->sa_bmval,
775 nfsd_attrmask);
776 if (status)
777 goto out;
778
692 if (setattr->sa_acl != NULL) 779 if (setattr->sa_acl != NULL)
693 status = nfsd4_set_nfs4_acl(rqstp, &cstate->current_fh, 780 status = nfsd4_set_nfs4_acl(rqstp, &cstate->current_fh,
694 setattr->sa_acl); 781 setattr->sa_acl);
@@ -763,10 +850,10 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
763 if (status) 850 if (status)
764 return status; 851 return status;
765 852
766 if ((verify->ve_bmval[0] & ~nfsd_suppattrs0(cstate->minorversion)) 853 status = check_attr_support(rqstp, cstate, verify->ve_bmval, NULL);
767 || (verify->ve_bmval[1] & ~nfsd_suppattrs1(cstate->minorversion)) 854 if (status)
768 || (verify->ve_bmval[2] & ~nfsd_suppattrs2(cstate->minorversion))) 855 return status;
769 return nfserr_attrnotsupp; 856
770 if ((verify->ve_bmval[0] & FATTR4_WORD0_RDATTR_ERROR) 857 if ((verify->ve_bmval[0] & FATTR4_WORD0_RDATTR_ERROR)
771 || (verify->ve_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1)) 858 || (verify->ve_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1))
772 return nfserr_inval; 859 return nfserr_inval;
@@ -1226,24 +1313,9 @@ static const char *nfsd4_op_name(unsigned opnum)
1226 return "unknown_operation"; 1313 return "unknown_operation";
1227} 1314}
1228 1315
1229#define nfs4svc_decode_voidargs NULL
1230#define nfs4svc_release_void NULL
1231#define nfsd4_voidres nfsd4_voidargs 1316#define nfsd4_voidres nfsd4_voidargs
1232#define nfs4svc_release_compound NULL
1233struct nfsd4_voidargs { int dummy; }; 1317struct nfsd4_voidargs { int dummy; };
1234 1318
1235#define PROC(name, argt, rest, relt, cache, respsize) \
1236 { (svc_procfunc) nfsd4_proc_##name, \
1237 (kxdrproc_t) nfs4svc_decode_##argt##args, \
1238 (kxdrproc_t) nfs4svc_encode_##rest##res, \
1239 (kxdrproc_t) nfs4svc_release_##relt, \
1240 sizeof(struct nfsd4_##argt##args), \
1241 sizeof(struct nfsd4_##rest##res), \
1242 0, \
1243 cache, \
1244 respsize, \
1245 }
1246
1247/* 1319/*
1248 * TODO: At the present time, the NFSv4 server does not do XID caching 1320 * TODO: At the present time, the NFSv4 server does not do XID caching
1249 * of requests. Implementing XID caching would not be a serious problem, 1321 * of requests. Implementing XID caching would not be a serious problem,
@@ -1255,8 +1327,23 @@ struct nfsd4_voidargs { int dummy; };
1255 * better XID's. 1327 * better XID's.
1256 */ 1328 */
1257static struct svc_procedure nfsd_procedures4[2] = { 1329static struct svc_procedure nfsd_procedures4[2] = {
1258 PROC(null, void, void, void, RC_NOCACHE, 1), 1330 [NFSPROC4_NULL] = {
1259 PROC(compound, compound, compound, compound, RC_NOCACHE, NFSD_BUFSIZE/4) 1331 .pc_func = (svc_procfunc) nfsd4_proc_null,
1332 .pc_encode = (kxdrproc_t) nfs4svc_encode_voidres,
1333 .pc_argsize = sizeof(struct nfsd4_voidargs),
1334 .pc_ressize = sizeof(struct nfsd4_voidres),
1335 .pc_cachetype = RC_NOCACHE,
1336 .pc_xdrressize = 1,
1337 },
1338 [NFSPROC4_COMPOUND] = {
1339 .pc_func = (svc_procfunc) nfsd4_proc_compound,
1340 .pc_decode = (kxdrproc_t) nfs4svc_decode_compoundargs,
1341 .pc_encode = (kxdrproc_t) nfs4svc_encode_compoundres,
1342 .pc_argsize = sizeof(struct nfsd4_compoundargs),
1343 .pc_ressize = sizeof(struct nfsd4_compoundres),
1344 .pc_cachetype = RC_NOCACHE,
1345 .pc_xdrressize = NFSD_BUFSIZE/4,
1346 },
1260}; 1347};
1261 1348
1262struct svc_version nfsd_version4 = { 1349struct svc_version nfsd_version4 = {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3b711f5147a7..980a216a48c8 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -182,7 +182,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
182{ 182{
183 struct nfs4_delegation *dp; 183 struct nfs4_delegation *dp;
184 struct nfs4_file *fp = stp->st_file; 184 struct nfs4_file *fp = stp->st_file;
185 struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback; 185 struct nfs4_cb_conn *cb = &stp->st_stateowner->so_client->cl_cb_conn;
186 186
187 dprintk("NFSD alloc_init_deleg\n"); 187 dprintk("NFSD alloc_init_deleg\n");
188 if (fp->fi_had_conflict) 188 if (fp->fi_had_conflict)
@@ -203,10 +203,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
203 get_file(stp->st_vfs_file); 203 get_file(stp->st_vfs_file);
204 dp->dl_vfs_file = stp->st_vfs_file; 204 dp->dl_vfs_file = stp->st_vfs_file;
205 dp->dl_type = type; 205 dp->dl_type = type;
206 dp->dl_recall.cbr_dp = NULL; 206 dp->dl_ident = cb->cb_ident;
207 dp->dl_recall.cbr_ident = cb->cb_ident; 207 dp->dl_stateid.si_boot = get_seconds();
208 dp->dl_recall.cbr_trunc = 0;
209 dp->dl_stateid.si_boot = boot_time;
210 dp->dl_stateid.si_stateownerid = current_delegid++; 208 dp->dl_stateid.si_stateownerid = current_delegid++;
211 dp->dl_stateid.si_fileid = 0; 209 dp->dl_stateid.si_fileid = 0;
212 dp->dl_stateid.si_generation = 0; 210 dp->dl_stateid.si_generation = 0;
@@ -427,6 +425,11 @@ static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan)
427{ 425{
428 int status = 0, np = fchan->maxreqs * NFSD_PAGES_PER_SLOT; 426 int status = 0, np = fchan->maxreqs * NFSD_PAGES_PER_SLOT;
429 427
428 if (fchan->maxreqs < 1)
429 return nfserr_inval;
430 else if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
431 fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
432
430 spin_lock(&nfsd_serv->sv_lock); 433 spin_lock(&nfsd_serv->sv_lock);
431 if (np + nfsd_serv->sv_drc_pages_used > nfsd_serv->sv_drc_max_pages) 434 if (np + nfsd_serv->sv_drc_pages_used > nfsd_serv->sv_drc_max_pages)
432 np = nfsd_serv->sv_drc_max_pages - nfsd_serv->sv_drc_pages_used; 435 np = nfsd_serv->sv_drc_max_pages - nfsd_serv->sv_drc_pages_used;
@@ -446,8 +449,8 @@ static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan)
446 * fchan holds the client values on input, and the server values on output 449 * fchan holds the client values on input, and the server values on output
447 */ 450 */
448static int init_forechannel_attrs(struct svc_rqst *rqstp, 451static int init_forechannel_attrs(struct svc_rqst *rqstp,
449 struct nfsd4_session *session, 452 struct nfsd4_channel_attrs *session_fchan,
450 struct nfsd4_channel_attrs *fchan) 453 struct nfsd4_channel_attrs *fchan)
451{ 454{
452 int status = 0; 455 int status = 0;
453 __u32 maxcount = svc_max_payload(rqstp); 456 __u32 maxcount = svc_max_payload(rqstp);
@@ -457,21 +460,21 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp,
457 /* Use the client's max request and max response size if possible */ 460 /* Use the client's max request and max response size if possible */
458 if (fchan->maxreq_sz > maxcount) 461 if (fchan->maxreq_sz > maxcount)
459 fchan->maxreq_sz = maxcount; 462 fchan->maxreq_sz = maxcount;
460 session->se_fmaxreq_sz = fchan->maxreq_sz; 463 session_fchan->maxreq_sz = fchan->maxreq_sz;
461 464
462 if (fchan->maxresp_sz > maxcount) 465 if (fchan->maxresp_sz > maxcount)
463 fchan->maxresp_sz = maxcount; 466 fchan->maxresp_sz = maxcount;
464 session->se_fmaxresp_sz = fchan->maxresp_sz; 467 session_fchan->maxresp_sz = fchan->maxresp_sz;
465 468
466 /* Set the max response cached size our default which is 469 /* Set the max response cached size our default which is
467 * a multiple of PAGE_SIZE and small */ 470 * a multiple of PAGE_SIZE and small */
468 session->se_fmaxresp_cached = NFSD_PAGES_PER_SLOT * PAGE_SIZE; 471 session_fchan->maxresp_cached = NFSD_PAGES_PER_SLOT * PAGE_SIZE;
469 fchan->maxresp_cached = session->se_fmaxresp_cached; 472 fchan->maxresp_cached = session_fchan->maxresp_cached;
470 473
471 /* Use the client's maxops if possible */ 474 /* Use the client's maxops if possible */
472 if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND) 475 if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND)
473 fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND; 476 fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND;
474 session->se_fmaxops = fchan->maxops; 477 session_fchan->maxops = fchan->maxops;
475 478
476 /* try to use the client requested number of slots */ 479 /* try to use the client requested number of slots */
477 if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION) 480 if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
@@ -483,7 +486,7 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp,
483 */ 486 */
484 status = set_forechannel_maxreqs(fchan); 487 status = set_forechannel_maxreqs(fchan);
485 488
486 session->se_fnumslots = fchan->maxreqs; 489 session_fchan->maxreqs = fchan->maxreqs;
487 return status; 490 return status;
488} 491}
489 492
@@ -497,12 +500,14 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
497 memset(&tmp, 0, sizeof(tmp)); 500 memset(&tmp, 0, sizeof(tmp));
498 501
499 /* FIXME: For now, we just accept the client back channel attributes. */ 502 /* FIXME: For now, we just accept the client back channel attributes. */
500 status = init_forechannel_attrs(rqstp, &tmp, &cses->fore_channel); 503 tmp.se_bchannel = cses->back_channel;
504 status = init_forechannel_attrs(rqstp, &tmp.se_fchannel,
505 &cses->fore_channel);
501 if (status) 506 if (status)
502 goto out; 507 goto out;
503 508
504 /* allocate struct nfsd4_session and slot table in one piece */ 509 /* allocate struct nfsd4_session and slot table in one piece */
505 slotsize = tmp.se_fnumslots * sizeof(struct nfsd4_slot); 510 slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot);
506 new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL); 511 new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL);
507 if (!new) 512 if (!new)
508 goto out; 513 goto out;
@@ -576,7 +581,7 @@ free_session(struct kref *kref)
576 int i; 581 int i;
577 582
578 ses = container_of(kref, struct nfsd4_session, se_ref); 583 ses = container_of(kref, struct nfsd4_session, se_ref);
579 for (i = 0; i < ses->se_fnumslots; i++) { 584 for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
580 struct nfsd4_cache_entry *e = &ses->se_slots[i].sl_cache_entry; 585 struct nfsd4_cache_entry *e = &ses->se_slots[i].sl_cache_entry;
581 nfsd4_release_respages(e->ce_respages, e->ce_resused); 586 nfsd4_release_respages(e->ce_respages, e->ce_resused);
582 } 587 }
@@ -632,16 +637,20 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
632static void 637static void
633shutdown_callback_client(struct nfs4_client *clp) 638shutdown_callback_client(struct nfs4_client *clp)
634{ 639{
635 struct rpc_clnt *clnt = clp->cl_callback.cb_client; 640 struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client;
636 641
637 if (clnt) { 642 if (clnt) {
638 /* 643 /*
639 * Callback threads take a reference on the client, so there 644 * Callback threads take a reference on the client, so there
640 * should be no outstanding callbacks at this point. 645 * should be no outstanding callbacks at this point.
641 */ 646 */
642 clp->cl_callback.cb_client = NULL; 647 clp->cl_cb_conn.cb_client = NULL;
643 rpc_shutdown_client(clnt); 648 rpc_shutdown_client(clnt);
644 } 649 }
650 if (clp->cl_cb_conn.cb_cred) {
651 put_rpccred(clp->cl_cb_conn.cb_cred);
652 clp->cl_cb_conn.cb_cred = NULL;
653 }
645} 654}
646 655
647static inline void 656static inline void
@@ -714,7 +723,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir)
714 return NULL; 723 return NULL;
715 memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); 724 memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
716 atomic_set(&clp->cl_count, 1); 725 atomic_set(&clp->cl_count, 1);
717 atomic_set(&clp->cl_callback.cb_set, 0); 726 atomic_set(&clp->cl_cb_conn.cb_set, 0);
718 INIT_LIST_HEAD(&clp->cl_idhash); 727 INIT_LIST_HEAD(&clp->cl_idhash);
719 INIT_LIST_HEAD(&clp->cl_strhash); 728 INIT_LIST_HEAD(&clp->cl_strhash);
720 INIT_LIST_HEAD(&clp->cl_openowners); 729 INIT_LIST_HEAD(&clp->cl_openowners);
@@ -966,7 +975,7 @@ parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigne
966static void 975static void
967gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se) 976gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
968{ 977{
969 struct nfs4_callback *cb = &clp->cl_callback; 978 struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
970 979
971 /* Currently, we only support tcp for the callback channel */ 980 /* Currently, we only support tcp for the callback channel */
972 if ((se->se_callback_netid_len != 3) || memcmp((char *)se->se_callback_netid_val, "tcp", 3)) 981 if ((se->se_callback_netid_len != 3) || memcmp((char *)se->se_callback_netid_val, "tcp", 3))
@@ -975,6 +984,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
975 if ( !(parse_ipv4(se->se_callback_addr_len, se->se_callback_addr_val, 984 if ( !(parse_ipv4(se->se_callback_addr_len, se->se_callback_addr_val,
976 &cb->cb_addr, &cb->cb_port))) 985 &cb->cb_addr, &cb->cb_port)))
977 goto out_err; 986 goto out_err;
987 cb->cb_minorversion = 0;
978 cb->cb_prog = se->se_callback_prog; 988 cb->cb_prog = se->se_callback_prog;
979 cb->cb_ident = se->se_callback_ident; 989 cb->cb_ident = se->se_callback_ident;
980 return; 990 return;
@@ -1128,7 +1138,7 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
1128 * is sent (lease renewal). 1138 * is sent (lease renewal).
1129 */ 1139 */
1130 if (seq && nfsd4_not_cached(resp)) { 1140 if (seq && nfsd4_not_cached(resp)) {
1131 seq->maxslots = resp->cstate.session->se_fnumslots; 1141 seq->maxslots = resp->cstate.session->se_fchannel.maxreqs;
1132 return nfs_ok; 1142 return nfs_ok;
1133 } 1143 }
1134 1144
@@ -1238,12 +1248,6 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
1238 expire_client(conf); 1248 expire_client(conf);
1239 goto out_new; 1249 goto out_new;
1240 } 1250 }
1241 if (ip_addr != conf->cl_addr &&
1242 !(exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A)) {
1243 /* Client collision. 18.35.4 case 3 */
1244 status = nfserr_clid_inuse;
1245 goto out;
1246 }
1247 /* 1251 /*
1248 * Set bit when the owner id and verifier map to an already 1252 * Set bit when the owner id and verifier map to an already
1249 * confirmed client id (18.35.3). 1253 * confirmed client id (18.35.3).
@@ -1257,12 +1261,12 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
1257 copy_verf(conf, &verf); 1261 copy_verf(conf, &verf);
1258 new = conf; 1262 new = conf;
1259 goto out_copy; 1263 goto out_copy;
1260 } else { 1264 }
1261 /* 18.35.4 case 7 */ 1265
1262 if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) { 1266 /* 18.35.4 case 7 */
1263 status = nfserr_noent; 1267 if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) {
1264 goto out; 1268 status = nfserr_noent;
1265 } 1269 goto out;
1266 } 1270 }
1267 1271
1268 unconf = find_unconfirmed_client_by_str(dname, strhashval, true); 1272 unconf = find_unconfirmed_client_by_str(dname, strhashval, true);
@@ -1471,7 +1475,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
1471 goto out; 1475 goto out;
1472 1476
1473 status = nfserr_badslot; 1477 status = nfserr_badslot;
1474 if (seq->slotid >= session->se_fnumslots) 1478 if (seq->slotid >= session->se_fchannel.maxreqs)
1475 goto out; 1479 goto out;
1476 1480
1477 slot = &session->se_slots[seq->slotid]; 1481 slot = &session->se_slots[seq->slotid];
@@ -1686,9 +1690,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
1686 else { 1690 else {
1687 /* XXX: We just turn off callbacks until we can handle 1691 /* XXX: We just turn off callbacks until we can handle
1688 * change request correctly. */ 1692 * change request correctly. */
1689 atomic_set(&conf->cl_callback.cb_set, 0); 1693 atomic_set(&conf->cl_cb_conn.cb_set, 0);
1690 gen_confirm(conf);
1691 nfsd4_remove_clid_dir(unconf);
1692 expire_client(unconf); 1694 expire_client(unconf);
1693 status = nfs_ok; 1695 status = nfs_ok;
1694 1696
@@ -1882,7 +1884,7 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *
1882 stp->st_stateowner = sop; 1884 stp->st_stateowner = sop;
1883 get_nfs4_file(fp); 1885 get_nfs4_file(fp);
1884 stp->st_file = fp; 1886 stp->st_file = fp;
1885 stp->st_stateid.si_boot = boot_time; 1887 stp->st_stateid.si_boot = get_seconds();
1886 stp->st_stateid.si_stateownerid = sop->so_id; 1888 stp->st_stateid.si_stateownerid = sop->so_id;
1887 stp->st_stateid.si_fileid = fp->fi_id; 1889 stp->st_stateid.si_fileid = fp->fi_id;
1888 stp->st_stateid.si_generation = 0; 1890 stp->st_stateid.si_generation = 0;
@@ -2059,19 +2061,6 @@ nfs4_file_downgrade(struct file *filp, unsigned int share_access)
2059} 2061}
2060 2062
2061/* 2063/*
2062 * Recall a delegation
2063 */
2064static int
2065do_recall(void *__dp)
2066{
2067 struct nfs4_delegation *dp = __dp;
2068
2069 dp->dl_file->fi_had_conflict = true;
2070 nfsd4_cb_recall(dp);
2071 return 0;
2072}
2073
2074/*
2075 * Spawn a thread to perform a recall on the delegation represented 2064 * Spawn a thread to perform a recall on the delegation represented
2076 * by the lease (file_lock) 2065 * by the lease (file_lock)
2077 * 2066 *
@@ -2082,8 +2071,7 @@ do_recall(void *__dp)
2082static 2071static
2083void nfsd_break_deleg_cb(struct file_lock *fl) 2072void nfsd_break_deleg_cb(struct file_lock *fl)
2084{ 2073{
2085 struct nfs4_delegation *dp= (struct nfs4_delegation *)fl->fl_owner; 2074 struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
2086 struct task_struct *t;
2087 2075
2088 dprintk("NFSD nfsd_break_deleg_cb: dp %p fl %p\n",dp,fl); 2076 dprintk("NFSD nfsd_break_deleg_cb: dp %p fl %p\n",dp,fl);
2089 if (!dp) 2077 if (!dp)
@@ -2111,16 +2099,8 @@ void nfsd_break_deleg_cb(struct file_lock *fl)
2111 */ 2099 */
2112 fl->fl_break_time = 0; 2100 fl->fl_break_time = 0;
2113 2101
2114 t = kthread_run(do_recall, dp, "%s", "nfs4_cb_recall"); 2102 dp->dl_file->fi_had_conflict = true;
2115 if (IS_ERR(t)) { 2103 nfsd4_cb_recall(dp);
2116 struct nfs4_client *clp = dp->dl_client;
2117
2118 printk(KERN_INFO "NFSD: Callback thread failed for "
2119 "for client (clientid %08x/%08x)\n",
2120 clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
2121 put_nfs4_client(dp->dl_client);
2122 nfs4_put_delegation(dp);
2123 }
2124} 2104}
2125 2105
2126/* 2106/*
@@ -2422,7 +2402,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
2422{ 2402{
2423 struct nfs4_delegation *dp; 2403 struct nfs4_delegation *dp;
2424 struct nfs4_stateowner *sop = stp->st_stateowner; 2404 struct nfs4_stateowner *sop = stp->st_stateowner;
2425 struct nfs4_callback *cb = &sop->so_client->cl_callback; 2405 struct nfs4_cb_conn *cb = &sop->so_client->cl_cb_conn;
2426 struct file_lock fl, *flp = &fl; 2406 struct file_lock fl, *flp = &fl;
2427 int status, flag = 0; 2407 int status, flag = 0;
2428 2408
@@ -2614,7 +2594,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2614 renew_client(clp); 2594 renew_client(clp);
2615 status = nfserr_cb_path_down; 2595 status = nfserr_cb_path_down;
2616 if (!list_empty(&clp->cl_delegations) 2596 if (!list_empty(&clp->cl_delegations)
2617 && !atomic_read(&clp->cl_callback.cb_set)) 2597 && !atomic_read(&clp->cl_cb_conn.cb_set))
2618 goto out; 2598 goto out;
2619 status = nfs_ok; 2599 status = nfs_ok;
2620out: 2600out:
@@ -2738,12 +2718,42 @@ nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stateid *stp)
2738static int 2718static int
2739STALE_STATEID(stateid_t *stateid) 2719STALE_STATEID(stateid_t *stateid)
2740{ 2720{
2741 if (stateid->si_boot == boot_time) 2721 if (time_after((unsigned long)boot_time,
2742 return 0; 2722 (unsigned long)stateid->si_boot)) {
2743 dprintk("NFSD: stale stateid (%08x/%08x/%08x/%08x)!\n", 2723 dprintk("NFSD: stale stateid (%08x/%08x/%08x/%08x)!\n",
2744 stateid->si_boot, stateid->si_stateownerid, stateid->si_fileid, 2724 stateid->si_boot, stateid->si_stateownerid,
2745 stateid->si_generation); 2725 stateid->si_fileid, stateid->si_generation);
2746 return 1; 2726 return 1;
2727 }
2728 return 0;
2729}
2730
2731static int
2732EXPIRED_STATEID(stateid_t *stateid)
2733{
2734 if (time_before((unsigned long)boot_time,
2735 ((unsigned long)stateid->si_boot)) &&
2736 time_before((unsigned long)(stateid->si_boot + lease_time), get_seconds())) {
2737 dprintk("NFSD: expired stateid (%08x/%08x/%08x/%08x)!\n",
2738 stateid->si_boot, stateid->si_stateownerid,
2739 stateid->si_fileid, stateid->si_generation);
2740 return 1;
2741 }
2742 return 0;
2743}
2744
2745static __be32
2746stateid_error_map(stateid_t *stateid)
2747{
2748 if (STALE_STATEID(stateid))
2749 return nfserr_stale_stateid;
2750 if (EXPIRED_STATEID(stateid))
2751 return nfserr_expired;
2752
2753 dprintk("NFSD: bad stateid (%08x/%08x/%08x/%08x)!\n",
2754 stateid->si_boot, stateid->si_stateownerid,
2755 stateid->si_fileid, stateid->si_generation);
2756 return nfserr_bad_stateid;
2747} 2757}
2748 2758
2749static inline int 2759static inline int
@@ -2867,8 +2877,10 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
2867 status = nfserr_bad_stateid; 2877 status = nfserr_bad_stateid;
2868 if (is_delegation_stateid(stateid)) { 2878 if (is_delegation_stateid(stateid)) {
2869 dp = find_delegation_stateid(ino, stateid); 2879 dp = find_delegation_stateid(ino, stateid);
2870 if (!dp) 2880 if (!dp) {
2881 status = stateid_error_map(stateid);
2871 goto out; 2882 goto out;
2883 }
2872 status = check_stateid_generation(stateid, &dp->dl_stateid, 2884 status = check_stateid_generation(stateid, &dp->dl_stateid,
2873 flags); 2885 flags);
2874 if (status) 2886 if (status)
@@ -2881,8 +2893,10 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
2881 *filpp = dp->dl_vfs_file; 2893 *filpp = dp->dl_vfs_file;
2882 } else { /* open or lock stateid */ 2894 } else { /* open or lock stateid */
2883 stp = find_stateid(stateid, flags); 2895 stp = find_stateid(stateid, flags);
2884 if (!stp) 2896 if (!stp) {
2897 status = stateid_error_map(stateid);
2885 goto out; 2898 goto out;
2899 }
2886 if (nfs4_check_fh(current_fh, stp)) 2900 if (nfs4_check_fh(current_fh, stp))
2887 goto out; 2901 goto out;
2888 if (!stp->st_stateowner->so_confirmed) 2902 if (!stp->st_stateowner->so_confirmed)
@@ -2956,7 +2970,7 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
2956 */ 2970 */
2957 sop = search_close_lru(stateid->si_stateownerid, flags); 2971 sop = search_close_lru(stateid->si_stateownerid, flags);
2958 if (sop == NULL) 2972 if (sop == NULL)
2959 return nfserr_bad_stateid; 2973 return stateid_error_map(stateid);
2960 *sopp = sop; 2974 *sopp = sop;
2961 goto check_replay; 2975 goto check_replay;
2962 } 2976 }
@@ -3227,8 +3241,10 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3227 if (!is_delegation_stateid(stateid)) 3241 if (!is_delegation_stateid(stateid))
3228 goto out; 3242 goto out;
3229 dp = find_delegation_stateid(inode, stateid); 3243 dp = find_delegation_stateid(inode, stateid);
3230 if (!dp) 3244 if (!dp) {
3245 status = stateid_error_map(stateid);
3231 goto out; 3246 goto out;
3247 }
3232 status = check_stateid_generation(stateid, &dp->dl_stateid, flags); 3248 status = check_stateid_generation(stateid, &dp->dl_stateid, flags);
3233 if (status) 3249 if (status)
3234 goto out; 3250 goto out;
@@ -3455,7 +3471,7 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc
3455 stp->st_stateowner = sop; 3471 stp->st_stateowner = sop;
3456 get_nfs4_file(fp); 3472 get_nfs4_file(fp);
3457 stp->st_file = fp; 3473 stp->st_file = fp;
3458 stp->st_stateid.si_boot = boot_time; 3474 stp->st_stateid.si_boot = get_seconds();
3459 stp->st_stateid.si_stateownerid = sop->so_id; 3475 stp->st_stateid.si_stateownerid = sop->so_id;
3460 stp->st_stateid.si_fileid = fp->fi_id; 3476 stp->st_stateid.si_fileid = fp->fi_id;
3461 stp->st_stateid.si_generation = 0; 3477 stp->st_stateid.si_generation = 0;
@@ -3987,6 +4003,7 @@ nfs4_state_init(void)
3987 INIT_LIST_HEAD(&conf_str_hashtbl[i]); 4003 INIT_LIST_HEAD(&conf_str_hashtbl[i]);
3988 INIT_LIST_HEAD(&unconf_str_hashtbl[i]); 4004 INIT_LIST_HEAD(&unconf_str_hashtbl[i]);
3989 INIT_LIST_HEAD(&unconf_id_hashtbl[i]); 4005 INIT_LIST_HEAD(&unconf_id_hashtbl[i]);
4006 INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
3990 } 4007 }
3991 for (i = 0; i < SESSION_HASH_SIZE; i++) 4008 for (i = 0; i < SESSION_HASH_SIZE; i++)
3992 INIT_LIST_HEAD(&sessionid_hashtbl[i]); 4009 INIT_LIST_HEAD(&sessionid_hashtbl[i]);
@@ -4009,8 +4026,6 @@ nfs4_state_init(void)
4009 INIT_LIST_HEAD(&close_lru); 4026 INIT_LIST_HEAD(&close_lru);
4010 INIT_LIST_HEAD(&client_lru); 4027 INIT_LIST_HEAD(&client_lru);
4011 INIT_LIST_HEAD(&del_recall_lru); 4028 INIT_LIST_HEAD(&del_recall_lru);
4012 for (i = 0; i < CLIENT_HASH_SIZE; i++)
4013 INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
4014 reclaim_str_hashtbl_size = 0; 4029 reclaim_str_hashtbl_size = 0;
4015 return 0; 4030 return 0;
4016} 4031}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index b73549d293be..2dcc7feaa6ff 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -83,16 +83,6 @@ check_filename(char *str, int len, __be32 err)
83 return 0; 83 return 0;
84} 84}
85 85
86/*
87 * START OF "GENERIC" DECODE ROUTINES.
88 * These may look a little ugly since they are imported from a "generic"
89 * set of XDR encode/decode routines which are intended to be shared by
90 * all of our NFSv4 implementations (OpenBSD, MacOS X...).
91 *
92 * If the pain of reading these is too great, it should be a straightforward
93 * task to translate them into Linux-specific versions which are more
94 * consistent with the style used in NFSv2/v3...
95 */
96#define DECODE_HEAD \ 86#define DECODE_HEAD \
97 __be32 *p; \ 87 __be32 *p; \
98 __be32 status 88 __be32 status
@@ -254,20 +244,8 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval)
254 DECODE_TAIL; 244 DECODE_TAIL;
255} 245}
256 246
257static u32 nfsd_attrmask[] = {
258 NFSD_WRITEABLE_ATTRS_WORD0,
259 NFSD_WRITEABLE_ATTRS_WORD1,
260 NFSD_WRITEABLE_ATTRS_WORD2
261};
262
263static u32 nfsd41_ex_attrmask[] = {
264 NFSD_SUPPATTR_EXCLCREAT_WORD0,
265 NFSD_SUPPATTR_EXCLCREAT_WORD1,
266 NFSD_SUPPATTR_EXCLCREAT_WORD2
267};
268
269static __be32 247static __be32
270nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, u32 *writable, 248nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
271 struct iattr *iattr, struct nfs4_acl **acl) 249 struct iattr *iattr, struct nfs4_acl **acl)
272{ 250{
273 int expected_len, len = 0; 251 int expected_len, len = 0;
@@ -280,18 +258,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, u32 *writable,
280 if ((status = nfsd4_decode_bitmap(argp, bmval))) 258 if ((status = nfsd4_decode_bitmap(argp, bmval)))
281 return status; 259 return status;
282 260
283 /*
284 * According to spec, unsupported attributes return ERR_ATTRNOTSUPP;
285 * read-only attributes return ERR_INVAL.
286 */
287 if ((bmval[0] & ~nfsd_suppattrs0(argp->minorversion)) ||
288 (bmval[1] & ~nfsd_suppattrs1(argp->minorversion)) ||
289 (bmval[2] & ~nfsd_suppattrs2(argp->minorversion)))
290 return nfserr_attrnotsupp;
291 if ((bmval[0] & ~writable[0]) || (bmval[1] & ~writable[1]) ||
292 (bmval[2] & ~writable[2]))
293 return nfserr_inval;
294
295 READ_BUF(4); 261 READ_BUF(4);
296 READ32(expected_len); 262 READ32(expected_len);
297 263
@@ -424,8 +390,11 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, u32 *writable,
424 goto xdr_error; 390 goto xdr_error;
425 } 391 }
426 } 392 }
427 BUG_ON(bmval[2]); /* no such writeable attr supported yet */ 393 if (bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0
428 if (len != expected_len) 394 || bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1
395 || bmval[2] & ~NFSD_WRITEABLE_ATTRS_WORD2)
396 READ_BUF(expected_len - len);
397 else if (len != expected_len)
429 goto xdr_error; 398 goto xdr_error;
430 399
431 DECODE_TAIL; 400 DECODE_TAIL;
@@ -518,8 +487,8 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
518 if ((status = check_filename(create->cr_name, create->cr_namelen, nfserr_inval))) 487 if ((status = check_filename(create->cr_name, create->cr_namelen, nfserr_inval)))
519 return status; 488 return status;
520 489
521 status = nfsd4_decode_fattr(argp, create->cr_bmval, nfsd_attrmask, 490 status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr,
522 &create->cr_iattr, &create->cr_acl); 491 &create->cr_acl);
523 if (status) 492 if (status)
524 goto out; 493 goto out;
525 494
@@ -682,7 +651,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
682 case NFS4_CREATE_UNCHECKED: 651 case NFS4_CREATE_UNCHECKED:
683 case NFS4_CREATE_GUARDED: 652 case NFS4_CREATE_GUARDED:
684 status = nfsd4_decode_fattr(argp, open->op_bmval, 653 status = nfsd4_decode_fattr(argp, open->op_bmval,
685 nfsd_attrmask, &open->op_iattr, &open->op_acl); 654 &open->op_iattr, &open->op_acl);
686 if (status) 655 if (status)
687 goto out; 656 goto out;
688 break; 657 break;
@@ -696,8 +665,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
696 READ_BUF(8); 665 READ_BUF(8);
697 COPYMEM(open->op_verf.data, 8); 666 COPYMEM(open->op_verf.data, 8);
698 status = nfsd4_decode_fattr(argp, open->op_bmval, 667 status = nfsd4_decode_fattr(argp, open->op_bmval,
699 nfsd41_ex_attrmask, &open->op_iattr, 668 &open->op_iattr, &open->op_acl);
700 &open->op_acl);
701 if (status) 669 if (status)
702 goto out; 670 goto out;
703 break; 671 break;
@@ -893,8 +861,8 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta
893 status = nfsd4_decode_stateid(argp, &setattr->sa_stateid); 861 status = nfsd4_decode_stateid(argp, &setattr->sa_stateid);
894 if (status) 862 if (status)
895 return status; 863 return status;
896 return nfsd4_decode_fattr(argp, setattr->sa_bmval, nfsd_attrmask, 864 return nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr,
897 &setattr->sa_iattr, &setattr->sa_acl); 865 &setattr->sa_acl);
898} 866}
899 867
900static __be32 868static __be32
@@ -1328,64 +1296,64 @@ static nfsd4_dec nfsd4_dec_ops[] = {
1328}; 1296};
1329 1297
1330static nfsd4_dec nfsd41_dec_ops[] = { 1298static nfsd4_dec nfsd41_dec_ops[] = {
1331 [OP_ACCESS] (nfsd4_dec)nfsd4_decode_access, 1299 [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access,
1332 [OP_CLOSE] (nfsd4_dec)nfsd4_decode_close, 1300 [OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close,
1333 [OP_COMMIT] (nfsd4_dec)nfsd4_decode_commit, 1301 [OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit,
1334 [OP_CREATE] (nfsd4_dec)nfsd4_decode_create, 1302 [OP_CREATE] = (nfsd4_dec)nfsd4_decode_create,
1335 [OP_DELEGPURGE] (nfsd4_dec)nfsd4_decode_notsupp, 1303 [OP_DELEGPURGE] = (nfsd4_dec)nfsd4_decode_notsupp,
1336 [OP_DELEGRETURN] (nfsd4_dec)nfsd4_decode_delegreturn, 1304 [OP_DELEGRETURN] = (nfsd4_dec)nfsd4_decode_delegreturn,
1337 [OP_GETATTR] (nfsd4_dec)nfsd4_decode_getattr, 1305 [OP_GETATTR] = (nfsd4_dec)nfsd4_decode_getattr,
1338 [OP_GETFH] (nfsd4_dec)nfsd4_decode_noop, 1306 [OP_GETFH] = (nfsd4_dec)nfsd4_decode_noop,
1339 [OP_LINK] (nfsd4_dec)nfsd4_decode_link, 1307 [OP_LINK] = (nfsd4_dec)nfsd4_decode_link,
1340 [OP_LOCK] (nfsd4_dec)nfsd4_decode_lock, 1308 [OP_LOCK] = (nfsd4_dec)nfsd4_decode_lock,
1341 [OP_LOCKT] (nfsd4_dec)nfsd4_decode_lockt, 1309 [OP_LOCKT] = (nfsd4_dec)nfsd4_decode_lockt,
1342 [OP_LOCKU] (nfsd4_dec)nfsd4_decode_locku, 1310 [OP_LOCKU] = (nfsd4_dec)nfsd4_decode_locku,
1343 [OP_LOOKUP] (nfsd4_dec)nfsd4_decode_lookup, 1311 [OP_LOOKUP] = (nfsd4_dec)nfsd4_decode_lookup,
1344 [OP_LOOKUPP] (nfsd4_dec)nfsd4_decode_noop, 1312 [OP_LOOKUPP] = (nfsd4_dec)nfsd4_decode_noop,
1345 [OP_NVERIFY] (nfsd4_dec)nfsd4_decode_verify, 1313 [OP_NVERIFY] = (nfsd4_dec)nfsd4_decode_verify,
1346 [OP_OPEN] (nfsd4_dec)nfsd4_decode_open, 1314 [OP_OPEN] = (nfsd4_dec)nfsd4_decode_open,
1347 [OP_OPENATTR] (nfsd4_dec)nfsd4_decode_notsupp, 1315 [OP_OPENATTR] = (nfsd4_dec)nfsd4_decode_notsupp,
1348 [OP_OPEN_CONFIRM] (nfsd4_dec)nfsd4_decode_notsupp, 1316 [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_notsupp,
1349 [OP_OPEN_DOWNGRADE] (nfsd4_dec)nfsd4_decode_open_downgrade, 1317 [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade,
1350 [OP_PUTFH] (nfsd4_dec)nfsd4_decode_putfh, 1318 [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh,
1351 [OP_PUTPUBFH] (nfsd4_dec)nfsd4_decode_notsupp, 1319 [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_notsupp,
1352 [OP_PUTROOTFH] (nfsd4_dec)nfsd4_decode_noop, 1320 [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop,
1353 [OP_READ] (nfsd4_dec)nfsd4_decode_read, 1321 [OP_READ] = (nfsd4_dec)nfsd4_decode_read,
1354 [OP_READDIR] (nfsd4_dec)nfsd4_decode_readdir, 1322 [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir,
1355 [OP_READLINK] (nfsd4_dec)nfsd4_decode_noop, 1323 [OP_READLINK] = (nfsd4_dec)nfsd4_decode_noop,
1356 [OP_REMOVE] (nfsd4_dec)nfsd4_decode_remove, 1324 [OP_REMOVE] = (nfsd4_dec)nfsd4_decode_remove,
1357 [OP_RENAME] (nfsd4_dec)nfsd4_decode_rename, 1325 [OP_RENAME] = (nfsd4_dec)nfsd4_decode_rename,
1358 [OP_RENEW] (nfsd4_dec)nfsd4_decode_notsupp, 1326 [OP_RENEW] = (nfsd4_dec)nfsd4_decode_notsupp,
1359 [OP_RESTOREFH] (nfsd4_dec)nfsd4_decode_noop, 1327 [OP_RESTOREFH] = (nfsd4_dec)nfsd4_decode_noop,
1360 [OP_SAVEFH] (nfsd4_dec)nfsd4_decode_noop, 1328 [OP_SAVEFH] = (nfsd4_dec)nfsd4_decode_noop,
1361 [OP_SECINFO] (nfsd4_dec)nfsd4_decode_secinfo, 1329 [OP_SECINFO] = (nfsd4_dec)nfsd4_decode_secinfo,
1362 [OP_SETATTR] (nfsd4_dec)nfsd4_decode_setattr, 1330 [OP_SETATTR] = (nfsd4_dec)nfsd4_decode_setattr,
1363 [OP_SETCLIENTID] (nfsd4_dec)nfsd4_decode_notsupp, 1331 [OP_SETCLIENTID] = (nfsd4_dec)nfsd4_decode_notsupp,
1364 [OP_SETCLIENTID_CONFIRM](nfsd4_dec)nfsd4_decode_notsupp, 1332 [OP_SETCLIENTID_CONFIRM]= (nfsd4_dec)nfsd4_decode_notsupp,
1365 [OP_VERIFY] (nfsd4_dec)nfsd4_decode_verify, 1333 [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify,
1366 [OP_WRITE] (nfsd4_dec)nfsd4_decode_write, 1334 [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write,
1367 [OP_RELEASE_LOCKOWNER] (nfsd4_dec)nfsd4_decode_notsupp, 1335 [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_notsupp,
1368 1336
1369 /* new operations for NFSv4.1 */ 1337 /* new operations for NFSv4.1 */
1370 [OP_BACKCHANNEL_CTL] (nfsd4_dec)nfsd4_decode_notsupp, 1338 [OP_BACKCHANNEL_CTL] = (nfsd4_dec)nfsd4_decode_notsupp,
1371 [OP_BIND_CONN_TO_SESSION](nfsd4_dec)nfsd4_decode_notsupp, 1339 [OP_BIND_CONN_TO_SESSION]= (nfsd4_dec)nfsd4_decode_notsupp,
1372 [OP_EXCHANGE_ID] (nfsd4_dec)nfsd4_decode_exchange_id, 1340 [OP_EXCHANGE_ID] = (nfsd4_dec)nfsd4_decode_exchange_id,
1373 [OP_CREATE_SESSION] (nfsd4_dec)nfsd4_decode_create_session, 1341 [OP_CREATE_SESSION] = (nfsd4_dec)nfsd4_decode_create_session,
1374 [OP_DESTROY_SESSION] (nfsd4_dec)nfsd4_decode_destroy_session, 1342 [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session,
1375 [OP_FREE_STATEID] (nfsd4_dec)nfsd4_decode_notsupp, 1343 [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_notsupp,
1376 [OP_GET_DIR_DELEGATION] (nfsd4_dec)nfsd4_decode_notsupp, 1344 [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp,
1377 [OP_GETDEVICEINFO] (nfsd4_dec)nfsd4_decode_notsupp, 1345 [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp,
1378 [OP_GETDEVICELIST] (nfsd4_dec)nfsd4_decode_notsupp, 1346 [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp,
1379 [OP_LAYOUTCOMMIT] (nfsd4_dec)nfsd4_decode_notsupp, 1347 [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp,
1380 [OP_LAYOUTGET] (nfsd4_dec)nfsd4_decode_notsupp, 1348 [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp,
1381 [OP_LAYOUTRETURN] (nfsd4_dec)nfsd4_decode_notsupp, 1349 [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp,
1382 [OP_SECINFO_NO_NAME] (nfsd4_dec)nfsd4_decode_notsupp, 1350 [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_notsupp,
1383 [OP_SEQUENCE] (nfsd4_dec)nfsd4_decode_sequence, 1351 [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence,
1384 [OP_SET_SSV] (nfsd4_dec)nfsd4_decode_notsupp, 1352 [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp,
1385 [OP_TEST_STATEID] (nfsd4_dec)nfsd4_decode_notsupp, 1353 [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_notsupp,
1386 [OP_WANT_DELEGATION] (nfsd4_dec)nfsd4_decode_notsupp, 1354 [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp,
1387 [OP_DESTROY_CLIENTID] (nfsd4_dec)nfsd4_decode_notsupp, 1355 [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_notsupp,
1388 [OP_RECLAIM_COMPLETE] (nfsd4_dec)nfsd4_decode_notsupp, 1356 [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_notsupp,
1389}; 1357};
1390 1358
1391struct nfsd4_minorversion_ops { 1359struct nfsd4_minorversion_ops {
@@ -1489,21 +1457,6 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
1489 1457
1490 DECODE_TAIL; 1458 DECODE_TAIL;
1491} 1459}
1492/*
1493 * END OF "GENERIC" DECODE ROUTINES.
1494 */
1495
1496/*
1497 * START OF "GENERIC" ENCODE ROUTINES.
1498 * These may look a little ugly since they are imported from a "generic"
1499 * set of XDR encode/decode routines which are intended to be shared by
1500 * all of our NFSv4 implementations (OpenBSD, MacOS X...).
1501 *
1502 * If the pain of reading these is too great, it should be a straightforward
1503 * task to translate them into Linux-specific versions which are more
1504 * consistent with the style used in NFSv2/v3...
1505 */
1506#define ENCODE_HEAD __be32 *p
1507 1460
1508#define WRITE32(n) *p++ = htonl(n) 1461#define WRITE32(n) *p++ = htonl(n)
1509#define WRITE64(n) do { \ 1462#define WRITE64(n) do { \
@@ -1515,13 +1468,41 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
1515 memcpy(p, ptr, nbytes); \ 1468 memcpy(p, ptr, nbytes); \
1516 p += XDR_QUADLEN(nbytes); \ 1469 p += XDR_QUADLEN(nbytes); \
1517}} while (0) 1470}} while (0)
1518#define WRITECINFO(c) do { \ 1471
1519 *p++ = htonl(c.atomic); \ 1472static void write32(__be32 **p, u32 n)
1520 *p++ = htonl(c.before_ctime_sec); \ 1473{
1521 *p++ = htonl(c.before_ctime_nsec); \ 1474 *(*p)++ = n;
1522 *p++ = htonl(c.after_ctime_sec); \ 1475}
1523 *p++ = htonl(c.after_ctime_nsec); \ 1476
1524} while (0) 1477static void write64(__be32 **p, u64 n)
1478{
1479 write32(p, (u32)(n >> 32));
1480 write32(p, (u32)n);
1481}
1482
1483static void write_change(__be32 **p, struct kstat *stat, struct inode *inode)
1484{
1485 if (IS_I_VERSION(inode)) {
1486 write64(p, inode->i_version);
1487 } else {
1488 write32(p, stat->ctime.tv_sec);
1489 write32(p, stat->ctime.tv_nsec);
1490 }
1491}
1492
1493static void write_cinfo(__be32 **p, struct nfsd4_change_info *c)
1494{
1495 write32(p, c->atomic);
1496 if (c->change_supported) {
1497 write64(p, c->before_change);
1498 write64(p, c->after_change);
1499 } else {
1500 write32(p, c->before_ctime_sec);
1501 write32(p, c->before_ctime_nsec);
1502 write32(p, c->after_ctime_sec);
1503 write32(p, c->after_ctime_nsec);
1504 }
1505}
1525 1506
1526#define RESERVE_SPACE(nbytes) do { \ 1507#define RESERVE_SPACE(nbytes) do { \
1527 p = resp->p; \ 1508 p = resp->p; \
@@ -1874,16 +1855,9 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1874 WRITE32(NFS4_FH_PERSISTENT|NFS4_FH_VOL_RENAME); 1855 WRITE32(NFS4_FH_PERSISTENT|NFS4_FH_VOL_RENAME);
1875 } 1856 }
1876 if (bmval0 & FATTR4_WORD0_CHANGE) { 1857 if (bmval0 & FATTR4_WORD0_CHANGE) {
1877 /*
1878 * Note: This _must_ be consistent with the scheme for writing
1879 * change_info, so any changes made here must be reflected there
1880 * as well. (See xdr4.h:set_change_info() and the WRITECINFO()
1881 * macro above.)
1882 */
1883 if ((buflen -= 8) < 0) 1858 if ((buflen -= 8) < 0)
1884 goto out_resource; 1859 goto out_resource;
1885 WRITE32(stat.ctime.tv_sec); 1860 write_change(&p, &stat, dentry->d_inode);
1886 WRITE32(stat.ctime.tv_nsec);
1887 } 1861 }
1888 if (bmval0 & FATTR4_WORD0_SIZE) { 1862 if (bmval0 & FATTR4_WORD0_SIZE) {
1889 if ((buflen -= 8) < 0) 1863 if ((buflen -= 8) < 0)
@@ -2348,7 +2322,7 @@ fail:
2348static void 2322static void
2349nfsd4_encode_stateid(struct nfsd4_compoundres *resp, stateid_t *sid) 2323nfsd4_encode_stateid(struct nfsd4_compoundres *resp, stateid_t *sid)
2350{ 2324{
2351 ENCODE_HEAD; 2325 __be32 *p;
2352 2326
2353 RESERVE_SPACE(sizeof(stateid_t)); 2327 RESERVE_SPACE(sizeof(stateid_t));
2354 WRITE32(sid->si_generation); 2328 WRITE32(sid->si_generation);
@@ -2359,7 +2333,7 @@ nfsd4_encode_stateid(struct nfsd4_compoundres *resp, stateid_t *sid)
2359static __be32 2333static __be32
2360nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access) 2334nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access)
2361{ 2335{
2362 ENCODE_HEAD; 2336 __be32 *p;
2363 2337
2364 if (!nfserr) { 2338 if (!nfserr) {
2365 RESERVE_SPACE(8); 2339 RESERVE_SPACE(8);
@@ -2386,7 +2360,7 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c
2386static __be32 2360static __be32
2387nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit) 2361nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit)
2388{ 2362{
2389 ENCODE_HEAD; 2363 __be32 *p;
2390 2364
2391 if (!nfserr) { 2365 if (!nfserr) {
2392 RESERVE_SPACE(8); 2366 RESERVE_SPACE(8);
@@ -2399,11 +2373,11 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
2399static __be32 2373static __be32
2400nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create) 2374nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create)
2401{ 2375{
2402 ENCODE_HEAD; 2376 __be32 *p;
2403 2377
2404 if (!nfserr) { 2378 if (!nfserr) {
2405 RESERVE_SPACE(32); 2379 RESERVE_SPACE(32);
2406 WRITECINFO(create->cr_cinfo); 2380 write_cinfo(&p, &create->cr_cinfo);
2407 WRITE32(2); 2381 WRITE32(2);
2408 WRITE32(create->cr_bmval[0]); 2382 WRITE32(create->cr_bmval[0]);
2409 WRITE32(create->cr_bmval[1]); 2383 WRITE32(create->cr_bmval[1]);
@@ -2435,7 +2409,7 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh
2435{ 2409{
2436 struct svc_fh *fhp = *fhpp; 2410 struct svc_fh *fhp = *fhpp;
2437 unsigned int len; 2411 unsigned int len;
2438 ENCODE_HEAD; 2412 __be32 *p;
2439 2413
2440 if (!nfserr) { 2414 if (!nfserr) {
2441 len = fhp->fh_handle.fh_size; 2415 len = fhp->fh_handle.fh_size;
@@ -2454,7 +2428,7 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh
2454static void 2428static void
2455nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denied *ld) 2429nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denied *ld)
2456{ 2430{
2457 ENCODE_HEAD; 2431 __be32 *p;
2458 2432
2459 RESERVE_SPACE(32 + XDR_LEN(ld->ld_sop ? ld->ld_sop->so_owner.len : 0)); 2433 RESERVE_SPACE(32 + XDR_LEN(ld->ld_sop ? ld->ld_sop->so_owner.len : 0));
2460 WRITE64(ld->ld_start); 2434 WRITE64(ld->ld_start);
@@ -2510,11 +2484,11 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l
2510static __be32 2484static __be32
2511nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link) 2485nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link)
2512{ 2486{
2513 ENCODE_HEAD; 2487 __be32 *p;
2514 2488
2515 if (!nfserr) { 2489 if (!nfserr) {
2516 RESERVE_SPACE(20); 2490 RESERVE_SPACE(20);
2517 WRITECINFO(link->li_cinfo); 2491 write_cinfo(&p, &link->li_cinfo);
2518 ADJUST_ARGS(); 2492 ADJUST_ARGS();
2519 } 2493 }
2520 return nfserr; 2494 return nfserr;
@@ -2524,7 +2498,7 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li
2524static __be32 2498static __be32
2525nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open) 2499nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open)
2526{ 2500{
2527 ENCODE_HEAD; 2501 __be32 *p;
2528 ENCODE_SEQID_OP_HEAD; 2502 ENCODE_SEQID_OP_HEAD;
2529 2503
2530 if (nfserr) 2504 if (nfserr)
@@ -2532,7 +2506,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op
2532 2506
2533 nfsd4_encode_stateid(resp, &open->op_stateid); 2507 nfsd4_encode_stateid(resp, &open->op_stateid);
2534 RESERVE_SPACE(40); 2508 RESERVE_SPACE(40);
2535 WRITECINFO(open->op_cinfo); 2509 write_cinfo(&p, &open->op_cinfo);
2536 WRITE32(open->op_rflags); 2510 WRITE32(open->op_rflags);
2537 WRITE32(2); 2511 WRITE32(2);
2538 WRITE32(open->op_bmval[0]); 2512 WRITE32(open->op_bmval[0]);
@@ -2619,7 +2593,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
2619 int v, pn; 2593 int v, pn;
2620 unsigned long maxcount; 2594 unsigned long maxcount;
2621 long len; 2595 long len;
2622 ENCODE_HEAD; 2596 __be32 *p;
2623 2597
2624 if (nfserr) 2598 if (nfserr)
2625 return nfserr; 2599 return nfserr;
@@ -2681,7 +2655,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
2681{ 2655{
2682 int maxcount; 2656 int maxcount;
2683 char *page; 2657 char *page;
2684 ENCODE_HEAD; 2658 __be32 *p;
2685 2659
2686 if (nfserr) 2660 if (nfserr)
2687 return nfserr; 2661 return nfserr;
@@ -2730,7 +2704,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
2730 int maxcount; 2704 int maxcount;
2731 loff_t offset; 2705 loff_t offset;
2732 __be32 *page, *savep, *tailbase; 2706 __be32 *page, *savep, *tailbase;
2733 ENCODE_HEAD; 2707 __be32 *p;
2734 2708
2735 if (nfserr) 2709 if (nfserr)
2736 return nfserr; 2710 return nfserr;
@@ -2806,11 +2780,11 @@ err_no_verf:
2806static __be32 2780static __be32
2807nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove) 2781nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove)
2808{ 2782{
2809 ENCODE_HEAD; 2783 __be32 *p;
2810 2784
2811 if (!nfserr) { 2785 if (!nfserr) {
2812 RESERVE_SPACE(20); 2786 RESERVE_SPACE(20);
2813 WRITECINFO(remove->rm_cinfo); 2787 write_cinfo(&p, &remove->rm_cinfo);
2814 ADJUST_ARGS(); 2788 ADJUST_ARGS();
2815 } 2789 }
2816 return nfserr; 2790 return nfserr;
@@ -2819,12 +2793,12 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
2819static __be32 2793static __be32
2820nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename) 2794nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename)
2821{ 2795{
2822 ENCODE_HEAD; 2796 __be32 *p;
2823 2797
2824 if (!nfserr) { 2798 if (!nfserr) {
2825 RESERVE_SPACE(40); 2799 RESERVE_SPACE(40);
2826 WRITECINFO(rename->rn_sinfo); 2800 write_cinfo(&p, &rename->rn_sinfo);
2827 WRITECINFO(rename->rn_tinfo); 2801 write_cinfo(&p, &rename->rn_tinfo);
2828 ADJUST_ARGS(); 2802 ADJUST_ARGS();
2829 } 2803 }
2830 return nfserr; 2804 return nfserr;
@@ -2839,7 +2813,7 @@ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
2839 u32 nflavs; 2813 u32 nflavs;
2840 struct exp_flavor_info *flavs; 2814 struct exp_flavor_info *flavs;
2841 struct exp_flavor_info def_flavs[2]; 2815 struct exp_flavor_info def_flavs[2];
2842 ENCODE_HEAD; 2816 __be32 *p;
2843 2817
2844 if (nfserr) 2818 if (nfserr)
2845 goto out; 2819 goto out;
@@ -2904,7 +2878,7 @@ out:
2904static __be32 2878static __be32
2905nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr) 2879nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr)
2906{ 2880{
2907 ENCODE_HEAD; 2881 __be32 *p;
2908 2882
2909 RESERVE_SPACE(12); 2883 RESERVE_SPACE(12);
2910 if (nfserr) { 2884 if (nfserr) {
@@ -2924,7 +2898,7 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
2924static __be32 2898static __be32
2925nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd) 2899nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd)
2926{ 2900{
2927 ENCODE_HEAD; 2901 __be32 *p;
2928 2902
2929 if (!nfserr) { 2903 if (!nfserr) {
2930 RESERVE_SPACE(8 + sizeof(nfs4_verifier)); 2904 RESERVE_SPACE(8 + sizeof(nfs4_verifier));
@@ -2944,7 +2918,7 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n
2944static __be32 2918static __be32
2945nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write) 2919nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write)
2946{ 2920{
2947 ENCODE_HEAD; 2921 __be32 *p;
2948 2922
2949 if (!nfserr) { 2923 if (!nfserr) {
2950 RESERVE_SPACE(16); 2924 RESERVE_SPACE(16);
@@ -2960,7 +2934,7 @@ static __be32
2960nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, int nfserr, 2934nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, int nfserr,
2961 struct nfsd4_exchange_id *exid) 2935 struct nfsd4_exchange_id *exid)
2962{ 2936{
2963 ENCODE_HEAD; 2937 __be32 *p;
2964 char *major_id; 2938 char *major_id;
2965 char *server_scope; 2939 char *server_scope;
2966 int major_id_sz; 2940 int major_id_sz;
@@ -3015,7 +2989,7 @@ static __be32
3015nfsd4_encode_create_session(struct nfsd4_compoundres *resp, int nfserr, 2989nfsd4_encode_create_session(struct nfsd4_compoundres *resp, int nfserr,
3016 struct nfsd4_create_session *sess) 2990 struct nfsd4_create_session *sess)
3017{ 2991{
3018 ENCODE_HEAD; 2992 __be32 *p;
3019 2993
3020 if (nfserr) 2994 if (nfserr)
3021 return nfserr; 2995 return nfserr;
@@ -3071,7 +3045,7 @@ __be32
3071nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr, 3045nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr,
3072 struct nfsd4_sequence *seq) 3046 struct nfsd4_sequence *seq)
3073{ 3047{
3074 ENCODE_HEAD; 3048 __be32 *p;
3075 3049
3076 if (nfserr) 3050 if (nfserr)
3077 return nfserr; 3051 return nfserr;
@@ -3209,7 +3183,7 @@ static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp)
3209 dprintk("%s length %u, xb->page_len %u tlen %u pad %u\n", __func__, 3183 dprintk("%s length %u, xb->page_len %u tlen %u pad %u\n", __func__,
3210 length, xb->page_len, tlen, pad); 3184 length, xb->page_len, tlen, pad);
3211 3185
3212 if (length <= session->se_fmaxresp_cached) 3186 if (length <= session->se_fchannel.maxresp_cached)
3213 return status; 3187 return status;
3214 else 3188 else
3215 return nfserr_rep_too_big_to_cache; 3189 return nfserr_rep_too_big_to_cache;
@@ -3219,7 +3193,7 @@ void
3219nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) 3193nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
3220{ 3194{
3221 __be32 *statp; 3195 __be32 *statp;
3222 ENCODE_HEAD; 3196 __be32 *p;
3223 3197
3224 RESERVE_SPACE(8); 3198 RESERVE_SPACE(8);
3225 WRITE32(op->opnum); 3199 WRITE32(op->opnum);
@@ -3253,7 +3227,7 @@ status:
3253void 3227void
3254nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op) 3228nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
3255{ 3229{
3256 ENCODE_HEAD; 3230 __be32 *p;
3257 struct nfs4_replay *rp = op->replay; 3231 struct nfs4_replay *rp = op->replay;
3258 3232
3259 BUG_ON(!rp); 3233 BUG_ON(!rp);
@@ -3268,10 +3242,6 @@ nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
3268 ADJUST_ARGS(); 3242 ADJUST_ARGS();
3269} 3243}
3270 3244
3271/*
3272 * END OF "GENERIC" ENCODE ROUTINES.
3273 */
3274
3275int 3245int
3276nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy) 3246nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
3277{ 3247{
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 5bfc2ac60d54..4638635c5d87 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -29,15 +29,24 @@
29 */ 29 */
30#define CACHESIZE 1024 30#define CACHESIZE 1024
31#define HASHSIZE 64 31#define HASHSIZE 64
32#define REQHASH(xid) (((((__force __u32)xid) >> 24) ^ ((__force __u32)xid)) & (HASHSIZE-1))
33 32
34static struct hlist_head * hash_list; 33static struct hlist_head * cache_hash;
35static struct list_head lru_head; 34static struct list_head lru_head;
36static int cache_disabled = 1; 35static int cache_disabled = 1;
37 36
37/*
38 * Calculate the hash index from an XID.
39 */
40static inline u32 request_hash(u32 xid)
41{
42 u32 h = xid;
43 h ^= (xid >> 24);
44 return h & (HASHSIZE-1);
45}
46
38static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); 47static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
39 48
40/* 49/*
41 * locking for the reply cache: 50 * locking for the reply cache:
42 * A cache entry is "single use" if c_state == RC_INPROG 51 * A cache entry is "single use" if c_state == RC_INPROG
43 * Otherwise, it when accessing _prev or _next, the lock must be held. 52 * Otherwise, it when accessing _prev or _next, the lock must be held.
@@ -62,8 +71,8 @@ int nfsd_reply_cache_init(void)
62 i--; 71 i--;
63 } 72 }
64 73
65 hash_list = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL); 74 cache_hash = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL);
66 if (!hash_list) 75 if (!cache_hash)
67 goto out_nomem; 76 goto out_nomem;
68 77
69 cache_disabled = 0; 78 cache_disabled = 0;
@@ -88,8 +97,8 @@ void nfsd_reply_cache_shutdown(void)
88 97
89 cache_disabled = 1; 98 cache_disabled = 1;
90 99
91 kfree (hash_list); 100 kfree (cache_hash);
92 hash_list = NULL; 101 cache_hash = NULL;
93} 102}
94 103
95/* 104/*
@@ -108,7 +117,7 @@ static void
108hash_refile(struct svc_cacherep *rp) 117hash_refile(struct svc_cacherep *rp)
109{ 118{
110 hlist_del_init(&rp->c_hash); 119 hlist_del_init(&rp->c_hash);
111 hlist_add_head(&rp->c_hash, hash_list + REQHASH(rp->c_xid)); 120 hlist_add_head(&rp->c_hash, cache_hash + request_hash(rp->c_xid));
112} 121}
113 122
114/* 123/*
@@ -138,7 +147,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp, int type)
138 spin_lock(&cache_lock); 147 spin_lock(&cache_lock);
139 rtn = RC_DOIT; 148 rtn = RC_DOIT;
140 149
141 rh = &hash_list[REQHASH(xid)]; 150 rh = &cache_hash[request_hash(xid)];
142 hlist_for_each_entry(rp, hn, rh, c_hash) { 151 hlist_for_each_entry(rp, hn, rh, c_hash) {
143 if (rp->c_state != RC_UNUSED && 152 if (rp->c_state != RC_UNUSED &&
144 xid == rp->c_xid && proc == rp->c_proc && 153 xid == rp->c_xid && proc == rp->c_proc &&
@@ -165,8 +174,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp, int type)
165 } 174 }
166 } 175 }
167 176
168 /* This should not happen */ 177 /* All entries on the LRU are in-progress. This should not happen */
169 if (rp == NULL) { 178 if (&rp->c_lru == &lru_head) {
170 static int complaints; 179 static int complaints;
171 180
172 printk(KERN_WARNING "nfsd: all repcache entries locked!\n"); 181 printk(KERN_WARNING "nfsd: all repcache entries locked!\n");
@@ -264,7 +273,7 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
264 273
265 len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); 274 len = resv->iov_len - ((char*)statp - (char*)resv->iov_base);
266 len >>= 2; 275 len >>= 2;
267 276
268 /* Don't cache excessive amounts of data and XDR failures */ 277 /* Don't cache excessive amounts of data and XDR failures */
269 if (!statp || len > (256 >> 2)) { 278 if (!statp || len > (256 >> 2)) {
270 rp->c_state = RC_UNUSED; 279 rp->c_state = RC_UNUSED;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index af16849d243a..1250fb978ac1 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -207,10 +207,14 @@ static struct file_operations pool_stats_operations = {
207static ssize_t write_svc(struct file *file, char *buf, size_t size) 207static ssize_t write_svc(struct file *file, char *buf, size_t size)
208{ 208{
209 struct nfsctl_svc *data; 209 struct nfsctl_svc *data;
210 int err;
210 if (size < sizeof(*data)) 211 if (size < sizeof(*data))
211 return -EINVAL; 212 return -EINVAL;
212 data = (struct nfsctl_svc*) buf; 213 data = (struct nfsctl_svc*) buf;
213 return nfsd_svc(data->svc_port, data->svc_nthreads); 214 err = nfsd_svc(data->svc_port, data->svc_nthreads);
215 if (err < 0)
216 return err;
217 return 0;
214} 218}
215 219
216/** 220/**
@@ -692,11 +696,12 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size)
692 if (newthreads < 0) 696 if (newthreads < 0)
693 return -EINVAL; 697 return -EINVAL;
694 rv = nfsd_svc(NFS_PORT, newthreads); 698 rv = nfsd_svc(NFS_PORT, newthreads);
695 if (rv) 699 if (rv < 0)
696 return rv; 700 return rv;
697 } 701 } else
698 sprintf(buf, "%d\n", nfsd_nrthreads()); 702 rv = nfsd_nrthreads();
699 return strlen(buf); 703
704 return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%d\n", rv);
700} 705}
701 706
702/** 707/**
@@ -793,7 +798,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
793{ 798{
794 char *mesg = buf; 799 char *mesg = buf;
795 char *vers, *minorp, sign; 800 char *vers, *minorp, sign;
796 int len, num; 801 int len, num, remaining;
797 unsigned minor; 802 unsigned minor;
798 ssize_t tlen = 0; 803 ssize_t tlen = 0;
799 char *sep; 804 char *sep;
@@ -840,32 +845,50 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
840 } 845 }
841 next: 846 next:
842 vers += len + 1; 847 vers += len + 1;
843 tlen += len;
844 } while ((len = qword_get(&mesg, vers, size)) > 0); 848 } while ((len = qword_get(&mesg, vers, size)) > 0);
845 /* If all get turned off, turn them back on, as 849 /* If all get turned off, turn them back on, as
846 * having no versions is BAD 850 * having no versions is BAD
847 */ 851 */
848 nfsd_reset_versions(); 852 nfsd_reset_versions();
849 } 853 }
854
850 /* Now write current state into reply buffer */ 855 /* Now write current state into reply buffer */
851 len = 0; 856 len = 0;
852 sep = ""; 857 sep = "";
858 remaining = SIMPLE_TRANSACTION_LIMIT;
853 for (num=2 ; num <= 4 ; num++) 859 for (num=2 ; num <= 4 ; num++)
854 if (nfsd_vers(num, NFSD_AVAIL)) { 860 if (nfsd_vers(num, NFSD_AVAIL)) {
855 len += sprintf(buf+len, "%s%c%d", sep, 861 len = snprintf(buf, remaining, "%s%c%d", sep,
856 nfsd_vers(num, NFSD_TEST)?'+':'-', 862 nfsd_vers(num, NFSD_TEST)?'+':'-',
857 num); 863 num);
858 sep = " "; 864 sep = " ";
865
866 if (len > remaining)
867 break;
868 remaining -= len;
869 buf += len;
870 tlen += len;
859 } 871 }
860 if (nfsd_vers(4, NFSD_AVAIL)) 872 if (nfsd_vers(4, NFSD_AVAIL))
861 for (minor = 1; minor <= NFSD_SUPPORTED_MINOR_VERSION; minor++) 873 for (minor = 1; minor <= NFSD_SUPPORTED_MINOR_VERSION;
862 len += sprintf(buf+len, " %c4.%u", 874 minor++) {
875 len = snprintf(buf, remaining, " %c4.%u",
863 (nfsd_vers(4, NFSD_TEST) && 876 (nfsd_vers(4, NFSD_TEST) &&
864 nfsd_minorversion(minor, NFSD_TEST)) ? 877 nfsd_minorversion(minor, NFSD_TEST)) ?
865 '+' : '-', 878 '+' : '-',
866 minor); 879 minor);
867 len += sprintf(buf+len, "\n"); 880
868 return len; 881 if (len > remaining)
882 break;
883 remaining -= len;
884 buf += len;
885 tlen += len;
886 }
887
888 len = snprintf(buf, remaining, "\n");
889 if (len > remaining)
890 return -EINVAL;
891 return tlen + len;
869} 892}
870 893
871/** 894/**
@@ -910,104 +933,143 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size)
910 return rv; 933 return rv;
911} 934}
912 935
913static ssize_t __write_ports(struct file *file, char *buf, size_t size) 936/*
937 * Zero-length write. Return a list of NFSD's current listener
938 * transports.
939 */
940static ssize_t __write_ports_names(char *buf)
914{ 941{
915 if (size == 0) { 942 if (nfsd_serv == NULL)
916 int len = 0; 943 return 0;
944 return svc_xprt_names(nfsd_serv, buf, SIMPLE_TRANSACTION_LIMIT);
945}
917 946
918 if (nfsd_serv) 947/*
919 len = svc_xprt_names(nfsd_serv, buf, 0); 948 * A single 'fd' number was written, in which case it must be for
920 return len; 949 * a socket of a supported family/protocol, and we use it as an
921 } 950 * nfsd listener.
922 /* Either a single 'fd' number is written, in which 951 */
923 * case it must be for a socket of a supported family/protocol, 952static ssize_t __write_ports_addfd(char *buf)
924 * and we use it as an nfsd socket, or 953{
925 * A '-' followed by the 'name' of a socket in which case 954 char *mesg = buf;
926 * we close the socket. 955 int fd, err;
927 */ 956
928 if (isdigit(buf[0])) { 957 err = get_int(&mesg, &fd);
929 char *mesg = buf; 958 if (err != 0 || fd < 0)
930 int fd; 959 return -EINVAL;
931 int err; 960
932 err = get_int(&mesg, &fd); 961 err = nfsd_create_serv();
933 if (err) 962 if (err != 0)
934 return -EINVAL; 963 return err;
935 if (fd < 0) 964
936 return -EINVAL; 965 err = lockd_up();
937 err = nfsd_create_serv(); 966 if (err != 0)
938 if (!err) { 967 goto out;
939 err = svc_addsock(nfsd_serv, fd, buf); 968
940 if (err >= 0) { 969 err = svc_addsock(nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT);
941 err = lockd_up(); 970 if (err < 0)
942 if (err < 0) 971 lockd_down();
943 svc_sock_names(buf+strlen(buf)+1, nfsd_serv, buf); 972
944 } 973out:
945 /* Decrease the count, but don't shutdown the 974 /* Decrease the count, but don't shut down the service */
946 * the service 975 nfsd_serv->sv_nrthreads--;
947 */ 976 return err;
948 nfsd_serv->sv_nrthreads--; 977}
949 } 978
950 return err < 0 ? err : 0; 979/*
951 } 980 * A '-' followed by the 'name' of a socket means we close the socket.
952 if (buf[0] == '-' && isdigit(buf[1])) { 981 */
953 char *toclose = kstrdup(buf+1, GFP_KERNEL); 982static ssize_t __write_ports_delfd(char *buf)
954 int len = 0; 983{
955 if (!toclose) 984 char *toclose;
956 return -ENOMEM; 985 int len = 0;
957 if (nfsd_serv) 986
958 len = svc_sock_names(buf, nfsd_serv, toclose); 987 toclose = kstrdup(buf + 1, GFP_KERNEL);
959 if (len >= 0) 988 if (toclose == NULL)
960 lockd_down(); 989 return -ENOMEM;
961 kfree(toclose); 990
962 return len; 991 if (nfsd_serv != NULL)
963 } 992 len = svc_sock_names(nfsd_serv, buf,
964 /* 993 SIMPLE_TRANSACTION_LIMIT, toclose);
965 * Add a transport listener by writing it's transport name 994 if (len >= 0)
966 */ 995 lockd_down();
967 if (isalpha(buf[0])) { 996
968 int err; 997 kfree(toclose);
969 char transport[16]; 998 return len;
970 int port; 999}
971 if (sscanf(buf, "%15s %4d", transport, &port) == 2) { 1000
972 if (port < 1 || port > 65535) 1001/*
973 return -EINVAL; 1002 * A transport listener is added by writing it's transport name and
974 err = nfsd_create_serv(); 1003 * a port number.
975 if (!err) { 1004 */
976 err = svc_create_xprt(nfsd_serv, 1005static ssize_t __write_ports_addxprt(char *buf)
977 transport, PF_INET, port, 1006{
978 SVC_SOCK_ANONYMOUS); 1007 char transport[16];
979 if (err == -ENOENT) 1008 int port, err;
980 /* Give a reasonable perror msg for 1009
981 * bad transport string */ 1010 if (sscanf(buf, "%15s %4u", transport, &port) != 2)
982 err = -EPROTONOSUPPORT; 1011 return -EINVAL;
983 } 1012
984 return err < 0 ? err : 0; 1013 if (port < 1 || port > USHORT_MAX)
985 } 1014 return -EINVAL;
986 } 1015
987 /* 1016 err = nfsd_create_serv();
988 * Remove a transport by writing it's transport name and port number 1017 if (err != 0)
989 */ 1018 return err;
990 if (buf[0] == '-' && isalpha(buf[1])) { 1019
991 struct svc_xprt *xprt; 1020 err = svc_create_xprt(nfsd_serv, transport,
992 int err = -EINVAL; 1021 PF_INET, port, SVC_SOCK_ANONYMOUS);
993 char transport[16]; 1022 if (err < 0) {
994 int port; 1023 /* Give a reasonable perror msg for bad transport string */
995 if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) { 1024 if (err == -ENOENT)
996 if (port < 1 || port > 65535) 1025 err = -EPROTONOSUPPORT;
997 return -EINVAL; 1026 return err;
998 if (nfsd_serv) {
999 xprt = svc_find_xprt(nfsd_serv, transport,
1000 AF_UNSPEC, port);
1001 if (xprt) {
1002 svc_close_xprt(xprt);
1003 svc_xprt_put(xprt);
1004 err = 0;
1005 } else
1006 err = -ENOTCONN;
1007 }
1008 return err < 0 ? err : 0;
1009 }
1010 } 1027 }
1028 return 0;
1029}
1030
1031/*
1032 * A transport listener is removed by writing a "-", it's transport
1033 * name, and it's port number.
1034 */
1035static ssize_t __write_ports_delxprt(char *buf)
1036{
1037 struct svc_xprt *xprt;
1038 char transport[16];
1039 int port;
1040
1041 if (sscanf(&buf[1], "%15s %4u", transport, &port) != 2)
1042 return -EINVAL;
1043
1044 if (port < 1 || port > USHORT_MAX || nfsd_serv == NULL)
1045 return -EINVAL;
1046
1047 xprt = svc_find_xprt(nfsd_serv, transport, AF_UNSPEC, port);
1048 if (xprt == NULL)
1049 return -ENOTCONN;
1050
1051 svc_close_xprt(xprt);
1052 svc_xprt_put(xprt);
1053 return 0;
1054}
1055
1056static ssize_t __write_ports(struct file *file, char *buf, size_t size)
1057{
1058 if (size == 0)
1059 return __write_ports_names(buf);
1060
1061 if (isdigit(buf[0]))
1062 return __write_ports_addfd(buf);
1063
1064 if (buf[0] == '-' && isdigit(buf[1]))
1065 return __write_ports_delfd(buf);
1066
1067 if (isalpha(buf[0]))
1068 return __write_ports_addxprt(buf);
1069
1070 if (buf[0] == '-' && isalpha(buf[1]))
1071 return __write_ports_delxprt(buf);
1072
1011 return -EINVAL; 1073 return -EINVAL;
1012} 1074}
1013 1075
@@ -1030,7 +1092,9 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size)
1030 * buf: C string containing an unsigned 1092 * buf: C string containing an unsigned
1031 * integer value representing a bound 1093 * integer value representing a bound
1032 * but unconnected socket that is to be 1094 * but unconnected socket that is to be
1033 * used as an NFSD listener 1095 * used as an NFSD listener; listen(3)
1096 * must be called for a SOCK_STREAM
1097 * socket, otherwise it is ignored
1034 * size: non-zero length of C string in @buf 1098 * size: non-zero length of C string in @buf
1035 * Output: 1099 * Output:
1036 * On success: NFS service is started; 1100 * On success: NFS service is started;
@@ -1138,7 +1202,9 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
1138 nfsd_max_blksize = bsize; 1202 nfsd_max_blksize = bsize;
1139 mutex_unlock(&nfsd_mutex); 1203 mutex_unlock(&nfsd_mutex);
1140 } 1204 }
1141 return sprintf(buf, "%d\n", nfsd_max_blksize); 1205
1206 return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%d\n",
1207 nfsd_max_blksize);
1142} 1208}
1143 1209
1144#ifdef CONFIG_NFSD_V4 1210#ifdef CONFIG_NFSD_V4
@@ -1162,8 +1228,9 @@ static ssize_t __write_leasetime(struct file *file, char *buf, size_t size)
1162 return -EINVAL; 1228 return -EINVAL;
1163 nfs4_reset_lease(lease); 1229 nfs4_reset_lease(lease);
1164 } 1230 }
1165 sprintf(buf, "%ld\n", nfs4_lease_time()); 1231
1166 return strlen(buf); 1232 return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n",
1233 nfs4_lease_time());
1167} 1234}
1168 1235
1169/** 1236/**
@@ -1219,8 +1286,9 @@ static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size)
1219 1286
1220 status = nfs4_reset_recoverydir(recdir); 1287 status = nfs4_reset_recoverydir(recdir);
1221 } 1288 }
1222 sprintf(buf, "%s\n", nfs4_recoverydir()); 1289
1223 return strlen(buf); 1290 return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%s\n",
1291 nfs4_recoverydir());
1224} 1292}
1225 1293
1226/** 1294/**
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 9f1ca17293d3..8847f3fbfc1e 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -27,9 +27,6 @@
27#define NFSDDBG_FACILITY NFSDDBG_FH 27#define NFSDDBG_FACILITY NFSDDBG_FH
28 28
29 29
30static int nfsd_nr_verified;
31static int nfsd_nr_put;
32
33/* 30/*
34 * our acceptability function. 31 * our acceptability function.
35 * if NOSUBTREECHECK, accept anything 32 * if NOSUBTREECHECK, accept anything
@@ -251,7 +248,6 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
251 248
252 fhp->fh_dentry = dentry; 249 fhp->fh_dentry = dentry;
253 fhp->fh_export = exp; 250 fhp->fh_export = exp;
254 nfsd_nr_verified++;
255 return 0; 251 return 0;
256out: 252out:
257 exp_put(exp); 253 exp_put(exp);
@@ -552,7 +548,6 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
552 return nfserr_opnotsupp; 548 return nfserr_opnotsupp;
553 } 549 }
554 550
555 nfsd_nr_verified++;
556 return 0; 551 return 0;
557} 552}
558 553
@@ -609,7 +604,6 @@ fh_put(struct svc_fh *fhp)
609 fhp->fh_pre_saved = 0; 604 fhp->fh_pre_saved = 0;
610 fhp->fh_post_saved = 0; 605 fhp->fh_post_saved = 0;
611#endif 606#endif
612 nfsd_nr_put++;
613 } 607 }
614 if (exp) { 608 if (exp) {
615 cache_put(&exp->h, &svc_export_cache); 609 cache_put(&exp->h, &svc_export_cache);
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index e298e260b5f1..0eb9c820b7a6 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -533,45 +533,179 @@ nfsd_proc_statfs(struct svc_rqst * rqstp, struct nfsd_fhandle *argp,
533 * NFSv2 Server procedures. 533 * NFSv2 Server procedures.
534 * Only the results of non-idempotent operations are cached. 534 * Only the results of non-idempotent operations are cached.
535 */ 535 */
536#define nfsd_proc_none NULL
537#define nfssvc_release_none NULL
538struct nfsd_void { int dummy; }; 536struct nfsd_void { int dummy; };
539 537
540#define PROC(name, argt, rest, relt, cache, respsize) \
541 { (svc_procfunc) nfsd_proc_##name, \
542 (kxdrproc_t) nfssvc_decode_##argt, \
543 (kxdrproc_t) nfssvc_encode_##rest, \
544 (kxdrproc_t) nfssvc_release_##relt, \
545 sizeof(struct nfsd_##argt), \
546 sizeof(struct nfsd_##rest), \
547 0, \
548 cache, \
549 respsize, \
550 }
551
552#define ST 1 /* status */ 538#define ST 1 /* status */
553#define FH 8 /* filehandle */ 539#define FH 8 /* filehandle */
554#define AT 18 /* attributes */ 540#define AT 18 /* attributes */
555 541
556static struct svc_procedure nfsd_procedures2[18] = { 542static struct svc_procedure nfsd_procedures2[18] = {
557 PROC(null, void, void, none, RC_NOCACHE, ST), 543 [NFSPROC_NULL] = {
558 PROC(getattr, fhandle, attrstat, fhandle, RC_NOCACHE, ST+AT), 544 .pc_func = (svc_procfunc) nfsd_proc_null,
559 PROC(setattr, sattrargs, attrstat, fhandle, RC_REPLBUFF, ST+AT), 545 .pc_decode = (kxdrproc_t) nfssvc_decode_void,
560 PROC(none, void, void, none, RC_NOCACHE, ST), 546 .pc_encode = (kxdrproc_t) nfssvc_encode_void,
561 PROC(lookup, diropargs, diropres, fhandle, RC_NOCACHE, ST+FH+AT), 547 .pc_argsize = sizeof(struct nfsd_void),
562 PROC(readlink, readlinkargs, readlinkres, none, RC_NOCACHE, ST+1+NFS_MAXPATHLEN/4), 548 .pc_ressize = sizeof(struct nfsd_void),
563 PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4), 549 .pc_cachetype = RC_NOCACHE,
564 PROC(none, void, void, none, RC_NOCACHE, ST), 550 .pc_xdrressize = ST,
565 PROC(write, writeargs, attrstat, fhandle, RC_REPLBUFF, ST+AT), 551 },
566 PROC(create, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT), 552 [NFSPROC_GETATTR] = {
567 PROC(remove, diropargs, void, none, RC_REPLSTAT, ST), 553 .pc_func = (svc_procfunc) nfsd_proc_getattr,
568 PROC(rename, renameargs, void, none, RC_REPLSTAT, ST), 554 .pc_decode = (kxdrproc_t) nfssvc_decode_fhandle,
569 PROC(link, linkargs, void, none, RC_REPLSTAT, ST), 555 .pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
570 PROC(symlink, symlinkargs, void, none, RC_REPLSTAT, ST), 556 .pc_release = (kxdrproc_t) nfssvc_release_fhandle,
571 PROC(mkdir, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT), 557 .pc_argsize = sizeof(struct nfsd_fhandle),
572 PROC(rmdir, diropargs, void, none, RC_REPLSTAT, ST), 558 .pc_ressize = sizeof(struct nfsd_attrstat),
573 PROC(readdir, readdirargs, readdirres, none, RC_NOCACHE, 0), 559 .pc_cachetype = RC_NOCACHE,
574 PROC(statfs, fhandle, statfsres, none, RC_NOCACHE, ST+5), 560 .pc_xdrressize = ST+AT,
561 },
562 [NFSPROC_SETATTR] = {
563 .pc_func = (svc_procfunc) nfsd_proc_setattr,
564 .pc_decode = (kxdrproc_t) nfssvc_decode_sattrargs,
565 .pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
566 .pc_release = (kxdrproc_t) nfssvc_release_fhandle,
567 .pc_argsize = sizeof(struct nfsd_sattrargs),
568 .pc_ressize = sizeof(struct nfsd_attrstat),
569 .pc_cachetype = RC_REPLBUFF,
570 .pc_xdrressize = ST+AT,
571 },
572 [NFSPROC_ROOT] = {
573 .pc_decode = (kxdrproc_t) nfssvc_decode_void,
574 .pc_encode = (kxdrproc_t) nfssvc_encode_void,
575 .pc_argsize = sizeof(struct nfsd_void),
576 .pc_ressize = sizeof(struct nfsd_void),
577 .pc_cachetype = RC_NOCACHE,
578 .pc_xdrressize = ST,
579 },
580 [NFSPROC_LOOKUP] = {
581 .pc_func = (svc_procfunc) nfsd_proc_lookup,
582 .pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
583 .pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
584 .pc_release = (kxdrproc_t) nfssvc_release_fhandle,
585 .pc_argsize = sizeof(struct nfsd_diropargs),
586 .pc_ressize = sizeof(struct nfsd_diropres),
587 .pc_cachetype = RC_NOCACHE,
588 .pc_xdrressize = ST+FH+AT,
589 },
590 [NFSPROC_READLINK] = {
591 .pc_func = (svc_procfunc) nfsd_proc_readlink,
592 .pc_decode = (kxdrproc_t) nfssvc_decode_readlinkargs,
593 .pc_encode = (kxdrproc_t) nfssvc_encode_readlinkres,
594 .pc_argsize = sizeof(struct nfsd_readlinkargs),
595 .pc_ressize = sizeof(struct nfsd_readlinkres),
596 .pc_cachetype = RC_NOCACHE,
597 .pc_xdrressize = ST+1+NFS_MAXPATHLEN/4,
598 },
599 [NFSPROC_READ] = {
600 .pc_func = (svc_procfunc) nfsd_proc_read,
601 .pc_decode = (kxdrproc_t) nfssvc_decode_readargs,
602 .pc_encode = (kxdrproc_t) nfssvc_encode_readres,
603 .pc_release = (kxdrproc_t) nfssvc_release_fhandle,
604 .pc_argsize = sizeof(struct nfsd_readargs),
605 .pc_ressize = sizeof(struct nfsd_readres),
606 .pc_cachetype = RC_NOCACHE,
607 .pc_xdrressize = ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4,
608 },
609 [NFSPROC_WRITECACHE] = {
610 .pc_decode = (kxdrproc_t) nfssvc_decode_void,
611 .pc_encode = (kxdrproc_t) nfssvc_encode_void,
612 .pc_argsize = sizeof(struct nfsd_void),
613 .pc_ressize = sizeof(struct nfsd_void),
614 .pc_cachetype = RC_NOCACHE,
615 .pc_xdrressize = ST,
616 },
617 [NFSPROC_WRITE] = {
618 .pc_func = (svc_procfunc) nfsd_proc_write,
619 .pc_decode = (kxdrproc_t) nfssvc_decode_writeargs,
620 .pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
621 .pc_release = (kxdrproc_t) nfssvc_release_fhandle,
622 .pc_argsize = sizeof(struct nfsd_writeargs),
623 .pc_ressize = sizeof(struct nfsd_attrstat),
624 .pc_cachetype = RC_REPLBUFF,
625 .pc_xdrressize = ST+AT,
626 },
627 [NFSPROC_CREATE] = {
628 .pc_func = (svc_procfunc) nfsd_proc_create,
629 .pc_decode = (kxdrproc_t) nfssvc_decode_createargs,
630 .pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
631 .pc_release = (kxdrproc_t) nfssvc_release_fhandle,
632 .pc_argsize = sizeof(struct nfsd_createargs),
633 .pc_ressize = sizeof(struct nfsd_diropres),
634 .pc_cachetype = RC_REPLBUFF,
635 .pc_xdrressize = ST+FH+AT,
636 },
637 [NFSPROC_REMOVE] = {
638 .pc_func = (svc_procfunc) nfsd_proc_remove,
639 .pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
640 .pc_encode = (kxdrproc_t) nfssvc_encode_void,
641 .pc_argsize = sizeof(struct nfsd_diropargs),
642 .pc_ressize = sizeof(struct nfsd_void),
643 .pc_cachetype = RC_REPLSTAT,
644 .pc_xdrressize = ST,
645 },
646 [NFSPROC_RENAME] = {
647 .pc_func = (svc_procfunc) nfsd_proc_rename,
648 .pc_decode = (kxdrproc_t) nfssvc_decode_renameargs,
649 .pc_encode = (kxdrproc_t) nfssvc_encode_void,
650 .pc_argsize = sizeof(struct nfsd_renameargs),
651 .pc_ressize = sizeof(struct nfsd_void),
652 .pc_cachetype = RC_REPLSTAT,
653 .pc_xdrressize = ST,
654 },
655 [NFSPROC_LINK] = {
656 .pc_func = (svc_procfunc) nfsd_proc_link,
657 .pc_decode = (kxdrproc_t) nfssvc_decode_linkargs,
658 .pc_encode = (kxdrproc_t) nfssvc_encode_void,
659 .pc_argsize = sizeof(struct nfsd_linkargs),
660 .pc_ressize = sizeof(struct nfsd_void),
661 .pc_cachetype = RC_REPLSTAT,
662 .pc_xdrressize = ST,
663 },
664 [NFSPROC_SYMLINK] = {
665 .pc_func = (svc_procfunc) nfsd_proc_symlink,
666 .pc_decode = (kxdrproc_t) nfssvc_decode_symlinkargs,
667 .pc_encode = (kxdrproc_t) nfssvc_encode_void,
668 .pc_argsize = sizeof(struct nfsd_symlinkargs),
669 .pc_ressize = sizeof(struct nfsd_void),
670 .pc_cachetype = RC_REPLSTAT,
671 .pc_xdrressize = ST,
672 },
673 [NFSPROC_MKDIR] = {
674 .pc_func = (svc_procfunc) nfsd_proc_mkdir,
675 .pc_decode = (kxdrproc_t) nfssvc_decode_createargs,
676 .pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
677 .pc_release = (kxdrproc_t) nfssvc_release_fhandle,
678 .pc_argsize = sizeof(struct nfsd_createargs),
679 .pc_ressize = sizeof(struct nfsd_diropres),
680 .pc_cachetype = RC_REPLBUFF,
681 .pc_xdrressize = ST+FH+AT,
682 },
683 [NFSPROC_RMDIR] = {
684 .pc_func = (svc_procfunc) nfsd_proc_rmdir,
685 .pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
686 .pc_encode = (kxdrproc_t) nfssvc_encode_void,
687 .pc_argsize = sizeof(struct nfsd_diropargs),
688 .pc_ressize = sizeof(struct nfsd_void),
689 .pc_cachetype = RC_REPLSTAT,
690 .pc_xdrressize = ST,
691 },
692 [NFSPROC_READDIR] = {
693 .pc_func = (svc_procfunc) nfsd_proc_readdir,
694 .pc_decode = (kxdrproc_t) nfssvc_decode_readdirargs,
695 .pc_encode = (kxdrproc_t) nfssvc_encode_readdirres,
696 .pc_argsize = sizeof(struct nfsd_readdirargs),
697 .pc_ressize = sizeof(struct nfsd_readdirres),
698 .pc_cachetype = RC_NOCACHE,
699 },
700 [NFSPROC_STATFS] = {
701 .pc_func = (svc_procfunc) nfsd_proc_statfs,
702 .pc_decode = (kxdrproc_t) nfssvc_decode_fhandle,
703 .pc_encode = (kxdrproc_t) nfssvc_encode_statfsres,
704 .pc_argsize = sizeof(struct nfsd_fhandle),
705 .pc_ressize = sizeof(struct nfsd_statfsres),
706 .pc_cachetype = RC_NOCACHE,
707 .pc_xdrressize = ST+5,
708 },
575}; 709};
576 710
577 711
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index cbba4a935786..d4c9884cd54b 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -390,12 +390,14 @@ nfsd_svc(unsigned short port, int nrservs)
390 390
391 mutex_lock(&nfsd_mutex); 391 mutex_lock(&nfsd_mutex);
392 dprintk("nfsd: creating service\n"); 392 dprintk("nfsd: creating service\n");
393 error = -EINVAL;
394 if (nrservs <= 0) 393 if (nrservs <= 0)
395 nrservs = 0; 394 nrservs = 0;
396 if (nrservs > NFSD_MAXSERVS) 395 if (nrservs > NFSD_MAXSERVS)
397 nrservs = NFSD_MAXSERVS; 396 nrservs = NFSD_MAXSERVS;
398 397 error = 0;
398 if (nrservs == 0 && nfsd_serv == NULL)
399 goto out;
400
399 /* Readahead param cache - will no-op if it already exists */ 401 /* Readahead param cache - will no-op if it already exists */
400 error = nfsd_racache_init(2*nrservs); 402 error = nfsd_racache_init(2*nrservs);
401 if (error<0) 403 if (error<0)
@@ -413,6 +415,12 @@ nfsd_svc(unsigned short port, int nrservs)
413 goto failure; 415 goto failure;
414 416
415 error = svc_set_num_threads(nfsd_serv, NULL, nrservs); 417 error = svc_set_num_threads(nfsd_serv, NULL, nrservs);
418 if (error == 0)
419 /* We are holding a reference to nfsd_serv which
420 * we don't want to count in the return value,
421 * so subtract 1
422 */
423 error = nfsd_serv->sv_nrthreads - 1;
416 failure: 424 failure:
417 svc_destroy(nfsd_serv); /* Release server */ 425 svc_destroy(nfsd_serv); /* Release server */
418 out: 426 out:
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 99f835753596..4145083dcf88 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -966,6 +966,43 @@ static void kill_suid(struct dentry *dentry)
966 mutex_unlock(&dentry->d_inode->i_mutex); 966 mutex_unlock(&dentry->d_inode->i_mutex);
967} 967}
968 968
969/*
970 * Gathered writes: If another process is currently writing to the file,
971 * there's a high chance this is another nfsd (triggered by a bulk write
972 * from a client's biod). Rather than syncing the file with each write
973 * request, we sleep for 10 msec.
974 *
975 * I don't know if this roughly approximates C. Juszak's idea of
976 * gathered writes, but it's a nice and simple solution (IMHO), and it
977 * seems to work:-)
978 *
979 * Note: we do this only in the NFSv2 case, since v3 and higher have a
980 * better tool (separate unstable writes and commits) for solving this
981 * problem.
982 */
983static int wait_for_concurrent_writes(struct file *file)
984{
985 struct inode *inode = file->f_path.dentry->d_inode;
986 static ino_t last_ino;
987 static dev_t last_dev;
988 int err = 0;
989
990 if (atomic_read(&inode->i_writecount) > 1
991 || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
992 dprintk("nfsd: write defer %d\n", task_pid_nr(current));
993 msleep(10);
994 dprintk("nfsd: write resume %d\n", task_pid_nr(current));
995 }
996
997 if (inode->i_state & I_DIRTY) {
998 dprintk("nfsd: write sync %d\n", task_pid_nr(current));
999 err = nfsd_sync(file);
1000 }
1001 last_ino = inode->i_ino;
1002 last_dev = inode->i_sb->s_dev;
1003 return err;
1004}
1005
969static __be32 1006static __be32
970nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, 1007nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
971 loff_t offset, struct kvec *vec, int vlen, 1008 loff_t offset, struct kvec *vec, int vlen,
@@ -978,6 +1015,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
978 __be32 err = 0; 1015 __be32 err = 0;
979 int host_err; 1016 int host_err;
980 int stable = *stablep; 1017 int stable = *stablep;
1018 int use_wgather;
981 1019
982#ifdef MSNFS 1020#ifdef MSNFS
983 err = nfserr_perm; 1021 err = nfserr_perm;
@@ -996,9 +1034,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
996 * - the sync export option has been set, or 1034 * - the sync export option has been set, or
997 * - the client requested O_SYNC behavior (NFSv3 feature). 1035 * - the client requested O_SYNC behavior (NFSv3 feature).
998 * - The file system doesn't support fsync(). 1036 * - The file system doesn't support fsync().
999 * When gathered writes have been configured for this volume, 1037 * When NFSv2 gathered writes have been configured for this volume,
1000 * flushing the data to disk is handled separately below. 1038 * flushing the data to disk is handled separately below.
1001 */ 1039 */
1040 use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
1002 1041
1003 if (!file->f_op->fsync) {/* COMMIT3 cannot work */ 1042 if (!file->f_op->fsync) {/* COMMIT3 cannot work */
1004 stable = 2; 1043 stable = 2;
@@ -1007,7 +1046,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
1007 1046
1008 if (!EX_ISSYNC(exp)) 1047 if (!EX_ISSYNC(exp))
1009 stable = 0; 1048 stable = 0;
1010 if (stable && !EX_WGATHER(exp)) { 1049 if (stable && !use_wgather) {
1011 spin_lock(&file->f_lock); 1050 spin_lock(&file->f_lock);
1012 file->f_flags |= O_SYNC; 1051 file->f_flags |= O_SYNC;
1013 spin_unlock(&file->f_lock); 1052 spin_unlock(&file->f_lock);
@@ -1017,52 +1056,20 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
1017 oldfs = get_fs(); set_fs(KERNEL_DS); 1056 oldfs = get_fs(); set_fs(KERNEL_DS);
1018 host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset); 1057 host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
1019 set_fs(oldfs); 1058 set_fs(oldfs);
1020 if (host_err >= 0) { 1059 if (host_err < 0)
1021 *cnt = host_err; 1060 goto out_nfserr;
1022 nfsdstats.io_write += host_err; 1061 *cnt = host_err;
1023 fsnotify_modify(file->f_path.dentry); 1062 nfsdstats.io_write += host_err;
1024 } 1063 fsnotify_modify(file->f_path.dentry);
1025 1064
1026 /* clear setuid/setgid flag after write */ 1065 /* clear setuid/setgid flag after write */
1027 if (host_err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID))) 1066 if (inode->i_mode & (S_ISUID | S_ISGID))
1028 kill_suid(dentry); 1067 kill_suid(dentry);
1029 1068
1030 if (host_err >= 0 && stable) { 1069 if (stable && use_wgather)
1031 static ino_t last_ino; 1070 host_err = wait_for_concurrent_writes(file);
1032 static dev_t last_dev;
1033
1034 /*
1035 * Gathered writes: If another process is currently
1036 * writing to the file, there's a high chance
1037 * this is another nfsd (triggered by a bulk write
1038 * from a client's biod). Rather than syncing the
1039 * file with each write request, we sleep for 10 msec.
1040 *
1041 * I don't know if this roughly approximates
1042 * C. Juszak's idea of gathered writes, but it's a
1043 * nice and simple solution (IMHO), and it seems to
1044 * work:-)
1045 */
1046 if (EX_WGATHER(exp)) {
1047 if (atomic_read(&inode->i_writecount) > 1
1048 || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
1049 dprintk("nfsd: write defer %d\n", task_pid_nr(current));
1050 msleep(10);
1051 dprintk("nfsd: write resume %d\n", task_pid_nr(current));
1052 }
1053
1054 if (inode->i_state & I_DIRTY) {
1055 dprintk("nfsd: write sync %d\n", task_pid_nr(current));
1056 host_err=nfsd_sync(file);
1057 }
1058#if 0
1059 wake_up(&inode->i_wait);
1060#endif
1061 }
1062 last_ino = inode->i_ino;
1063 last_dev = inode->i_sb->s_dev;
1064 }
1065 1071
1072out_nfserr:
1066 dprintk("nfsd: write complete host_err=%d\n", host_err); 1073 dprintk("nfsd: write complete host_err=%d\n", host_err);
1067 if (host_err >= 0) 1074 if (host_err >= 0)
1068 err = 0; 1075 err = 0;
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 064279e33bbb..36df60b6d8a4 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -31,21 +31,26 @@
31#include "dat.h" 31#include "dat.h"
32#include "alloc.h" 32#include "alloc.h"
33 33
34struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap)
35{
36 return nilfs_dat_inode(NILFS_I_NILFS(bmap->b_inode));
37}
38
34int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level, 39int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level,
35 __u64 *ptrp) 40 __u64 *ptrp)
36{ 41{
37 __u64 ptr; 42 sector_t blocknr;
38 int ret; 43 int ret;
39 44
40 down_read(&bmap->b_sem); 45 down_read(&bmap->b_sem);
41 ret = bmap->b_ops->bop_lookup(bmap, key, level, ptrp); 46 ret = bmap->b_ops->bop_lookup(bmap, key, level, ptrp);
42 if (ret < 0) 47 if (ret < 0)
43 goto out; 48 goto out;
44 if (bmap->b_pops->bpop_translate != NULL) { 49 if (NILFS_BMAP_USE_VBN(bmap)) {
45 ret = bmap->b_pops->bpop_translate(bmap, *ptrp, &ptr); 50 ret = nilfs_dat_translate(nilfs_bmap_get_dat(bmap), *ptrp,
46 if (ret < 0) 51 &blocknr);
47 goto out; 52 if (!ret)
48 *ptrp = ptr; 53 *ptrp = blocknr;
49 } 54 }
50 55
51 out: 56 out:
@@ -53,6 +58,16 @@ int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level,
53 return ret; 58 return ret;
54} 59}
55 60
61int nilfs_bmap_lookup_contig(struct nilfs_bmap *bmap, __u64 key, __u64 *ptrp,
62 unsigned maxblocks)
63{
64 int ret;
65
66 down_read(&bmap->b_sem);
67 ret = bmap->b_ops->bop_lookup_contig(bmap, key, ptrp, maxblocks);
68 up_read(&bmap->b_sem);
69 return ret;
70}
56 71
57/** 72/**
58 * nilfs_bmap_lookup - find a record 73 * nilfs_bmap_lookup - find a record
@@ -101,8 +116,7 @@ static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
101 if (n < 0) 116 if (n < 0)
102 return n; 117 return n;
103 ret = nilfs_btree_convert_and_insert( 118 ret = nilfs_btree_convert_and_insert(
104 bmap, key, ptr, keys, ptrs, n, 119 bmap, key, ptr, keys, ptrs, n);
105 NILFS_BMAP_LARGE_LOW, NILFS_BMAP_LARGE_HIGH);
106 if (ret == 0) 120 if (ret == 0)
107 bmap->b_u.u_flags |= NILFS_BMAP_LARGE; 121 bmap->b_u.u_flags |= NILFS_BMAP_LARGE;
108 122
@@ -158,8 +172,7 @@ static int nilfs_bmap_do_delete(struct nilfs_bmap *bmap, __u64 key)
158 if (n < 0) 172 if (n < 0)
159 return n; 173 return n;
160 ret = nilfs_direct_delete_and_convert( 174 ret = nilfs_direct_delete_and_convert(
161 bmap, key, keys, ptrs, n, 175 bmap, key, keys, ptrs, n);
162 NILFS_BMAP_SMALL_LOW, NILFS_BMAP_SMALL_HIGH);
163 if (ret == 0) 176 if (ret == 0)
164 bmap->b_u.u_flags &= ~NILFS_BMAP_LARGE; 177 bmap->b_u.u_flags &= ~NILFS_BMAP_LARGE;
165 178
@@ -417,38 +430,6 @@ void nilfs_bmap_sub_blocks(const struct nilfs_bmap *bmap, int n)
417 mark_inode_dirty(bmap->b_inode); 430 mark_inode_dirty(bmap->b_inode);
418} 431}
419 432
420int nilfs_bmap_get_block(const struct nilfs_bmap *bmap, __u64 ptr,
421 struct buffer_head **bhp)
422{
423 return nilfs_btnode_get(&NILFS_BMAP_I(bmap)->i_btnode_cache,
424 ptr, 0, bhp, 0);
425}
426
427void nilfs_bmap_put_block(const struct nilfs_bmap *bmap,
428 struct buffer_head *bh)
429{
430 brelse(bh);
431}
432
433int nilfs_bmap_get_new_block(const struct nilfs_bmap *bmap, __u64 ptr,
434 struct buffer_head **bhp)
435{
436 int ret;
437
438 ret = nilfs_btnode_get(&NILFS_BMAP_I(bmap)->i_btnode_cache,
439 ptr, 0, bhp, 1);
440 if (ret < 0)
441 return ret;
442 set_buffer_nilfs_volatile(*bhp);
443 return 0;
444}
445
446void nilfs_bmap_delete_block(const struct nilfs_bmap *bmap,
447 struct buffer_head *bh)
448{
449 nilfs_btnode_delete(bh);
450}
451
452__u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap, 433__u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap,
453 const struct buffer_head *bh) 434 const struct buffer_head *bh)
454{ 435{
@@ -476,11 +457,6 @@ __u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *bmap, __u64 key)
476 return NILFS_BMAP_INVALID_PTR; 457 return NILFS_BMAP_INVALID_PTR;
477} 458}
478 459
479static struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap)
480{
481 return nilfs_dat_inode(NILFS_I_NILFS(bmap->b_inode));
482}
483
484#define NILFS_BMAP_GROUP_DIV 8 460#define NILFS_BMAP_GROUP_DIV 8
485__u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *bmap) 461__u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *bmap)
486{ 462{
@@ -493,64 +469,51 @@ __u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *bmap)
493 (entries_per_group / NILFS_BMAP_GROUP_DIV); 469 (entries_per_group / NILFS_BMAP_GROUP_DIV);
494} 470}
495 471
496static int nilfs_bmap_prepare_alloc_v(struct nilfs_bmap *bmap, 472int nilfs_bmap_prepare_alloc_v(struct nilfs_bmap *bmap,
497 union nilfs_bmap_ptr_req *req) 473 union nilfs_bmap_ptr_req *req)
498{ 474{
499 return nilfs_dat_prepare_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req); 475 return nilfs_dat_prepare_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req);
500} 476}
501 477
502static void nilfs_bmap_commit_alloc_v(struct nilfs_bmap *bmap, 478void nilfs_bmap_commit_alloc_v(struct nilfs_bmap *bmap,
503 union nilfs_bmap_ptr_req *req) 479 union nilfs_bmap_ptr_req *req)
504{ 480{
505 nilfs_dat_commit_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req); 481 nilfs_dat_commit_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req);
506} 482}
507 483
508static void nilfs_bmap_abort_alloc_v(struct nilfs_bmap *bmap, 484void nilfs_bmap_abort_alloc_v(struct nilfs_bmap *bmap,
509 union nilfs_bmap_ptr_req *req) 485 union nilfs_bmap_ptr_req *req)
510{ 486{
511 nilfs_dat_abort_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req); 487 nilfs_dat_abort_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req);
512} 488}
513 489
514static int nilfs_bmap_prepare_start_v(struct nilfs_bmap *bmap, 490int nilfs_bmap_start_v(struct nilfs_bmap *bmap, union nilfs_bmap_ptr_req *req,
515 union nilfs_bmap_ptr_req *req) 491 sector_t blocknr)
516{ 492{
517 return nilfs_dat_prepare_start(nilfs_bmap_get_dat(bmap), &req->bpr_req); 493 struct inode *dat = nilfs_bmap_get_dat(bmap);
518} 494 int ret;
519
520static void nilfs_bmap_commit_start_v(struct nilfs_bmap *bmap,
521 union nilfs_bmap_ptr_req *req,
522 sector_t blocknr)
523{
524 nilfs_dat_commit_start(nilfs_bmap_get_dat(bmap), &req->bpr_req,
525 blocknr);
526}
527 495
528static void nilfs_bmap_abort_start_v(struct nilfs_bmap *bmap, 496 ret = nilfs_dat_prepare_start(dat, &req->bpr_req);
529 union nilfs_bmap_ptr_req *req) 497 if (likely(!ret))
530{ 498 nilfs_dat_commit_start(dat, &req->bpr_req, blocknr);
531 nilfs_dat_abort_start(nilfs_bmap_get_dat(bmap), &req->bpr_req); 499 return ret;
532} 500}
533 501
534static int nilfs_bmap_prepare_end_v(struct nilfs_bmap *bmap, 502int nilfs_bmap_prepare_end_v(struct nilfs_bmap *bmap,
535 union nilfs_bmap_ptr_req *req) 503 union nilfs_bmap_ptr_req *req)
536{ 504{
537 return nilfs_dat_prepare_end(nilfs_bmap_get_dat(bmap), &req->bpr_req); 505 return nilfs_dat_prepare_end(nilfs_bmap_get_dat(bmap), &req->bpr_req);
538} 506}
539 507
540static void nilfs_bmap_commit_end_v(struct nilfs_bmap *bmap, 508void nilfs_bmap_commit_end_v(struct nilfs_bmap *bmap,
541 union nilfs_bmap_ptr_req *req) 509 union nilfs_bmap_ptr_req *req)
542{
543 nilfs_dat_commit_end(nilfs_bmap_get_dat(bmap), &req->bpr_req, 0);
544}
545
546static void nilfs_bmap_commit_end_vmdt(struct nilfs_bmap *bmap,
547 union nilfs_bmap_ptr_req *req)
548{ 510{
549 nilfs_dat_commit_end(nilfs_bmap_get_dat(bmap), &req->bpr_req, 1); 511 nilfs_dat_commit_end(nilfs_bmap_get_dat(bmap), &req->bpr_req,
512 bmap->b_ptr_type == NILFS_BMAP_PTR_VS);
550} 513}
551 514
552static void nilfs_bmap_abort_end_v(struct nilfs_bmap *bmap, 515void nilfs_bmap_abort_end_v(struct nilfs_bmap *bmap,
553 union nilfs_bmap_ptr_req *req) 516 union nilfs_bmap_ptr_req *req)
554{ 517{
555 nilfs_dat_abort_end(nilfs_bmap_get_dat(bmap), &req->bpr_req); 518 nilfs_dat_abort_end(nilfs_bmap_get_dat(bmap), &req->bpr_req);
556} 519}
@@ -566,128 +529,44 @@ int nilfs_bmap_mark_dirty(const struct nilfs_bmap *bmap, __u64 vblocknr)
566 return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), vblocknr); 529 return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), vblocknr);
567} 530}
568 531
569int nilfs_bmap_prepare_update(struct nilfs_bmap *bmap, 532int nilfs_bmap_prepare_update_v(struct nilfs_bmap *bmap,
570 union nilfs_bmap_ptr_req *oldreq, 533 union nilfs_bmap_ptr_req *oldreq,
571 union nilfs_bmap_ptr_req *newreq) 534 union nilfs_bmap_ptr_req *newreq)
572{ 535{
536 struct inode *dat = nilfs_bmap_get_dat(bmap);
573 int ret; 537 int ret;
574 538
575 ret = bmap->b_pops->bpop_prepare_end_ptr(bmap, oldreq); 539 ret = nilfs_dat_prepare_end(dat, &oldreq->bpr_req);
576 if (ret < 0) 540 if (ret < 0)
577 return ret; 541 return ret;
578 ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, newreq); 542 ret = nilfs_dat_prepare_alloc(dat, &newreq->bpr_req);
579 if (ret < 0) 543 if (ret < 0)
580 bmap->b_pops->bpop_abort_end_ptr(bmap, oldreq); 544 nilfs_dat_abort_end(dat, &oldreq->bpr_req);
581 545
582 return ret; 546 return ret;
583} 547}
584 548
585void nilfs_bmap_commit_update(struct nilfs_bmap *bmap, 549void nilfs_bmap_commit_update_v(struct nilfs_bmap *bmap,
586 union nilfs_bmap_ptr_req *oldreq, 550 union nilfs_bmap_ptr_req *oldreq,
587 union nilfs_bmap_ptr_req *newreq) 551 union nilfs_bmap_ptr_req *newreq)
588{ 552{
589 bmap->b_pops->bpop_commit_end_ptr(bmap, oldreq); 553 struct inode *dat = nilfs_bmap_get_dat(bmap);
590 bmap->b_pops->bpop_commit_alloc_ptr(bmap, newreq);
591}
592 554
593void nilfs_bmap_abort_update(struct nilfs_bmap *bmap, 555 nilfs_dat_commit_end(dat, &oldreq->bpr_req,
594 union nilfs_bmap_ptr_req *oldreq, 556 bmap->b_ptr_type == NILFS_BMAP_PTR_VS);
595 union nilfs_bmap_ptr_req *newreq) 557 nilfs_dat_commit_alloc(dat, &newreq->bpr_req);
596{
597 bmap->b_pops->bpop_abort_end_ptr(bmap, oldreq);
598 bmap->b_pops->bpop_abort_alloc_ptr(bmap, newreq);
599} 558}
600 559
601static int nilfs_bmap_translate_v(const struct nilfs_bmap *bmap, __u64 ptr, 560void nilfs_bmap_abort_update_v(struct nilfs_bmap *bmap,
602 __u64 *ptrp) 561 union nilfs_bmap_ptr_req *oldreq,
562 union nilfs_bmap_ptr_req *newreq)
603{ 563{
604 sector_t blocknr; 564 struct inode *dat = nilfs_bmap_get_dat(bmap);
605 int ret;
606
607 ret = nilfs_dat_translate(nilfs_bmap_get_dat(bmap), ptr, &blocknr);
608 if (ret < 0)
609 return ret;
610 if (ptrp != NULL)
611 *ptrp = blocknr;
612 return 0;
613}
614 565
615static int nilfs_bmap_prepare_alloc_p(struct nilfs_bmap *bmap, 566 nilfs_dat_abort_end(dat, &oldreq->bpr_req);
616 union nilfs_bmap_ptr_req *req) 567 nilfs_dat_abort_alloc(dat, &newreq->bpr_req);
617{
618 /* ignore target ptr */
619 req->bpr_ptr = bmap->b_last_allocated_ptr++;
620 return 0;
621} 568}
622 569
623static void nilfs_bmap_commit_alloc_p(struct nilfs_bmap *bmap,
624 union nilfs_bmap_ptr_req *req)
625{
626 /* do nothing */
627}
628
629static void nilfs_bmap_abort_alloc_p(struct nilfs_bmap *bmap,
630 union nilfs_bmap_ptr_req *req)
631{
632 bmap->b_last_allocated_ptr--;
633}
634
635static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_v = {
636 .bpop_prepare_alloc_ptr = nilfs_bmap_prepare_alloc_v,
637 .bpop_commit_alloc_ptr = nilfs_bmap_commit_alloc_v,
638 .bpop_abort_alloc_ptr = nilfs_bmap_abort_alloc_v,
639 .bpop_prepare_start_ptr = nilfs_bmap_prepare_start_v,
640 .bpop_commit_start_ptr = nilfs_bmap_commit_start_v,
641 .bpop_abort_start_ptr = nilfs_bmap_abort_start_v,
642 .bpop_prepare_end_ptr = nilfs_bmap_prepare_end_v,
643 .bpop_commit_end_ptr = nilfs_bmap_commit_end_v,
644 .bpop_abort_end_ptr = nilfs_bmap_abort_end_v,
645
646 .bpop_translate = nilfs_bmap_translate_v,
647};
648
649static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_vmdt = {
650 .bpop_prepare_alloc_ptr = nilfs_bmap_prepare_alloc_v,
651 .bpop_commit_alloc_ptr = nilfs_bmap_commit_alloc_v,
652 .bpop_abort_alloc_ptr = nilfs_bmap_abort_alloc_v,
653 .bpop_prepare_start_ptr = nilfs_bmap_prepare_start_v,
654 .bpop_commit_start_ptr = nilfs_bmap_commit_start_v,
655 .bpop_abort_start_ptr = nilfs_bmap_abort_start_v,
656 .bpop_prepare_end_ptr = nilfs_bmap_prepare_end_v,
657 .bpop_commit_end_ptr = nilfs_bmap_commit_end_vmdt,
658 .bpop_abort_end_ptr = nilfs_bmap_abort_end_v,
659
660 .bpop_translate = nilfs_bmap_translate_v,
661};
662
663static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_p = {
664 .bpop_prepare_alloc_ptr = nilfs_bmap_prepare_alloc_p,
665 .bpop_commit_alloc_ptr = nilfs_bmap_commit_alloc_p,
666 .bpop_abort_alloc_ptr = nilfs_bmap_abort_alloc_p,
667 .bpop_prepare_start_ptr = NULL,
668 .bpop_commit_start_ptr = NULL,
669 .bpop_abort_start_ptr = NULL,
670 .bpop_prepare_end_ptr = NULL,
671 .bpop_commit_end_ptr = NULL,
672 .bpop_abort_end_ptr = NULL,
673
674 .bpop_translate = NULL,
675};
676
677static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_gc = {
678 .bpop_prepare_alloc_ptr = NULL,
679 .bpop_commit_alloc_ptr = NULL,
680 .bpop_abort_alloc_ptr = NULL,
681 .bpop_prepare_start_ptr = NULL,
682 .bpop_commit_start_ptr = NULL,
683 .bpop_abort_start_ptr = NULL,
684 .bpop_prepare_end_ptr = NULL,
685 .bpop_commit_end_ptr = NULL,
686 .bpop_abort_end_ptr = NULL,
687
688 .bpop_translate = NULL,
689};
690
691static struct lock_class_key nilfs_bmap_dat_lock_key; 570static struct lock_class_key nilfs_bmap_dat_lock_key;
692 571
693/** 572/**
@@ -714,31 +593,26 @@ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode)
714 bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; 593 bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode;
715 switch (bmap->b_inode->i_ino) { 594 switch (bmap->b_inode->i_ino) {
716 case NILFS_DAT_INO: 595 case NILFS_DAT_INO:
717 bmap->b_pops = &nilfs_bmap_ptr_ops_p; 596 bmap->b_ptr_type = NILFS_BMAP_PTR_P;
718 bmap->b_last_allocated_key = 0; /* XXX: use macro */ 597 bmap->b_last_allocated_key = 0;
719 bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT; 598 bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT;
720 lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key); 599 lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key);
721 break; 600 break;
722 case NILFS_CPFILE_INO: 601 case NILFS_CPFILE_INO:
723 case NILFS_SUFILE_INO: 602 case NILFS_SUFILE_INO:
724 bmap->b_pops = &nilfs_bmap_ptr_ops_vmdt; 603 bmap->b_ptr_type = NILFS_BMAP_PTR_VS;
725 bmap->b_last_allocated_key = 0; /* XXX: use macro */ 604 bmap->b_last_allocated_key = 0;
726 bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR; 605 bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR;
727 break; 606 break;
728 default: 607 default:
729 bmap->b_pops = &nilfs_bmap_ptr_ops_v; 608 bmap->b_ptr_type = NILFS_BMAP_PTR_VM;
730 bmap->b_last_allocated_key = 0; /* XXX: use macro */ 609 bmap->b_last_allocated_key = 0;
731 bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR; 610 bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR;
732 break; 611 break;
733 } 612 }
734 613
735 return (bmap->b_u.u_flags & NILFS_BMAP_LARGE) ? 614 return (bmap->b_u.u_flags & NILFS_BMAP_LARGE) ?
736 nilfs_btree_init(bmap, 615 nilfs_btree_init(bmap) : nilfs_direct_init(bmap);
737 NILFS_BMAP_LARGE_LOW,
738 NILFS_BMAP_LARGE_HIGH) :
739 nilfs_direct_init(bmap,
740 NILFS_BMAP_SMALL_LOW,
741 NILFS_BMAP_SMALL_HIGH);
742} 616}
743 617
744/** 618/**
@@ -764,7 +638,7 @@ void nilfs_bmap_init_gc(struct nilfs_bmap *bmap)
764 memset(&bmap->b_u, 0, NILFS_BMAP_SIZE); 638 memset(&bmap->b_u, 0, NILFS_BMAP_SIZE);
765 init_rwsem(&bmap->b_sem); 639 init_rwsem(&bmap->b_sem);
766 bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; 640 bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode;
767 bmap->b_pops = &nilfs_bmap_ptr_ops_gc; 641 bmap->b_ptr_type = NILFS_BMAP_PTR_U;
768 bmap->b_last_allocated_key = 0; 642 bmap->b_last_allocated_key = 0;
769 bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR; 643 bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR;
770 bmap->b_state = 0; 644 bmap->b_state = 0;
diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h
index 4f2708abb1ba..b2890cdcef12 100644
--- a/fs/nilfs2/bmap.h
+++ b/fs/nilfs2/bmap.h
@@ -64,6 +64,8 @@ struct nilfs_bmap_stats {
64 */ 64 */
65struct nilfs_bmap_operations { 65struct nilfs_bmap_operations {
66 int (*bop_lookup)(const struct nilfs_bmap *, __u64, int, __u64 *); 66 int (*bop_lookup)(const struct nilfs_bmap *, __u64, int, __u64 *);
67 int (*bop_lookup_contig)(const struct nilfs_bmap *, __u64, __u64 *,
68 unsigned);
67 int (*bop_insert)(struct nilfs_bmap *, __u64, __u64); 69 int (*bop_insert)(struct nilfs_bmap *, __u64, __u64);
68 int (*bop_delete)(struct nilfs_bmap *, __u64); 70 int (*bop_delete)(struct nilfs_bmap *, __u64);
69 void (*bop_clear)(struct nilfs_bmap *); 71 void (*bop_clear)(struct nilfs_bmap *);
@@ -86,34 +88,6 @@ struct nilfs_bmap_operations {
86}; 88};
87 89
88 90
89/**
90 * struct nilfs_bmap_ptr_operations - bmap ptr operation table
91 */
92struct nilfs_bmap_ptr_operations {
93 int (*bpop_prepare_alloc_ptr)(struct nilfs_bmap *,
94 union nilfs_bmap_ptr_req *);
95 void (*bpop_commit_alloc_ptr)(struct nilfs_bmap *,
96 union nilfs_bmap_ptr_req *);
97 void (*bpop_abort_alloc_ptr)(struct nilfs_bmap *,
98 union nilfs_bmap_ptr_req *);
99 int (*bpop_prepare_start_ptr)(struct nilfs_bmap *,
100 union nilfs_bmap_ptr_req *);
101 void (*bpop_commit_start_ptr)(struct nilfs_bmap *,
102 union nilfs_bmap_ptr_req *,
103 sector_t);
104 void (*bpop_abort_start_ptr)(struct nilfs_bmap *,
105 union nilfs_bmap_ptr_req *);
106 int (*bpop_prepare_end_ptr)(struct nilfs_bmap *,
107 union nilfs_bmap_ptr_req *);
108 void (*bpop_commit_end_ptr)(struct nilfs_bmap *,
109 union nilfs_bmap_ptr_req *);
110 void (*bpop_abort_end_ptr)(struct nilfs_bmap *,
111 union nilfs_bmap_ptr_req *);
112
113 int (*bpop_translate)(const struct nilfs_bmap *, __u64, __u64 *);
114};
115
116
117#define NILFS_BMAP_SIZE (NILFS_INODE_BMAP_SIZE * sizeof(__le64)) 91#define NILFS_BMAP_SIZE (NILFS_INODE_BMAP_SIZE * sizeof(__le64))
118#define NILFS_BMAP_KEY_BIT (sizeof(unsigned long) * 8 /* CHAR_BIT */) 92#define NILFS_BMAP_KEY_BIT (sizeof(unsigned long) * 8 /* CHAR_BIT */)
119#define NILFS_BMAP_NEW_PTR_INIT \ 93#define NILFS_BMAP_NEW_PTR_INIT \
@@ -131,11 +105,9 @@ static inline int nilfs_bmap_is_new_ptr(unsigned long ptr)
131 * @b_sem: semaphore 105 * @b_sem: semaphore
132 * @b_inode: owner of bmap 106 * @b_inode: owner of bmap
133 * @b_ops: bmap operation table 107 * @b_ops: bmap operation table
134 * @b_pops: bmap ptr operation table
135 * @b_low: low watermark of conversion
136 * @b_high: high watermark of conversion
137 * @b_last_allocated_key: last allocated key for data block 108 * @b_last_allocated_key: last allocated key for data block
138 * @b_last_allocated_ptr: last allocated ptr for data block 109 * @b_last_allocated_ptr: last allocated ptr for data block
110 * @b_ptr_type: pointer type
139 * @b_state: state 111 * @b_state: state
140 */ 112 */
141struct nilfs_bmap { 113struct nilfs_bmap {
@@ -146,14 +118,22 @@ struct nilfs_bmap {
146 struct rw_semaphore b_sem; 118 struct rw_semaphore b_sem;
147 struct inode *b_inode; 119 struct inode *b_inode;
148 const struct nilfs_bmap_operations *b_ops; 120 const struct nilfs_bmap_operations *b_ops;
149 const struct nilfs_bmap_ptr_operations *b_pops;
150 __u64 b_low;
151 __u64 b_high;
152 __u64 b_last_allocated_key; 121 __u64 b_last_allocated_key;
153 __u64 b_last_allocated_ptr; 122 __u64 b_last_allocated_ptr;
123 int b_ptr_type;
154 int b_state; 124 int b_state;
155}; 125};
156 126
127/* pointer type */
128#define NILFS_BMAP_PTR_P 0 /* physical block number (i.e. LBN) */
129#define NILFS_BMAP_PTR_VS 1 /* virtual block number (single
130 version) */
131#define NILFS_BMAP_PTR_VM 2 /* virtual block number (has multiple
132 versions) */
133#define NILFS_BMAP_PTR_U (-1) /* never perform pointer operations */
134
135#define NILFS_BMAP_USE_VBN(bmap) ((bmap)->b_ptr_type > 0)
136
157/* state */ 137/* state */
158#define NILFS_BMAP_DIRTY 0x00000001 138#define NILFS_BMAP_DIRTY 0x00000001
159 139
@@ -162,6 +142,7 @@ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *);
162int nilfs_bmap_read(struct nilfs_bmap *, struct nilfs_inode *); 142int nilfs_bmap_read(struct nilfs_bmap *, struct nilfs_inode *);
163void nilfs_bmap_write(struct nilfs_bmap *, struct nilfs_inode *); 143void nilfs_bmap_write(struct nilfs_bmap *, struct nilfs_inode *);
164int nilfs_bmap_lookup(struct nilfs_bmap *, unsigned long, unsigned long *); 144int nilfs_bmap_lookup(struct nilfs_bmap *, unsigned long, unsigned long *);
145int nilfs_bmap_lookup_contig(struct nilfs_bmap *, __u64, __u64 *, unsigned);
165int nilfs_bmap_insert(struct nilfs_bmap *, unsigned long, unsigned long); 146int nilfs_bmap_insert(struct nilfs_bmap *, unsigned long, unsigned long);
166int nilfs_bmap_delete(struct nilfs_bmap *, unsigned long); 147int nilfs_bmap_delete(struct nilfs_bmap *, unsigned long);
167int nilfs_bmap_last_key(struct nilfs_bmap *, unsigned long *); 148int nilfs_bmap_last_key(struct nilfs_bmap *, unsigned long *);
@@ -182,7 +163,67 @@ void nilfs_bmap_commit_gcdat(struct nilfs_bmap *, struct nilfs_bmap *);
182/* 163/*
183 * Internal use only 164 * Internal use only
184 */ 165 */
166struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *);
167int nilfs_bmap_prepare_alloc_v(struct nilfs_bmap *,
168 union nilfs_bmap_ptr_req *);
169void nilfs_bmap_commit_alloc_v(struct nilfs_bmap *,
170 union nilfs_bmap_ptr_req *);
171void nilfs_bmap_abort_alloc_v(struct nilfs_bmap *,
172 union nilfs_bmap_ptr_req *);
185 173
174static inline int nilfs_bmap_prepare_alloc_ptr(struct nilfs_bmap *bmap,
175 union nilfs_bmap_ptr_req *req)
176{
177 if (NILFS_BMAP_USE_VBN(bmap))
178 return nilfs_bmap_prepare_alloc_v(bmap, req);
179 /* ignore target ptr */
180 req->bpr_ptr = bmap->b_last_allocated_ptr++;
181 return 0;
182}
183
184static inline void nilfs_bmap_commit_alloc_ptr(struct nilfs_bmap *bmap,
185 union nilfs_bmap_ptr_req *req)
186{
187 if (NILFS_BMAP_USE_VBN(bmap))
188 nilfs_bmap_commit_alloc_v(bmap, req);
189}
190
191static inline void nilfs_bmap_abort_alloc_ptr(struct nilfs_bmap *bmap,
192 union nilfs_bmap_ptr_req *req)
193{
194 if (NILFS_BMAP_USE_VBN(bmap))
195 nilfs_bmap_abort_alloc_v(bmap, req);
196 else
197 bmap->b_last_allocated_ptr--;
198}
199
200int nilfs_bmap_prepare_end_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *);
201void nilfs_bmap_commit_end_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *);
202void nilfs_bmap_abort_end_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *);
203
204static inline int nilfs_bmap_prepare_end_ptr(struct nilfs_bmap *bmap,
205 union nilfs_bmap_ptr_req *req)
206{
207 return NILFS_BMAP_USE_VBN(bmap) ?
208 nilfs_bmap_prepare_end_v(bmap, req) : 0;
209}
210
211static inline void nilfs_bmap_commit_end_ptr(struct nilfs_bmap *bmap,
212 union nilfs_bmap_ptr_req *req)
213{
214 if (NILFS_BMAP_USE_VBN(bmap))
215 nilfs_bmap_commit_end_v(bmap, req);
216}
217
218static inline void nilfs_bmap_abort_end_ptr(struct nilfs_bmap *bmap,
219 union nilfs_bmap_ptr_req *req)
220{
221 if (NILFS_BMAP_USE_VBN(bmap))
222 nilfs_bmap_abort_end_v(bmap, req);
223}
224
225int nilfs_bmap_start_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *,
226 sector_t);
186int nilfs_bmap_move_v(const struct nilfs_bmap *, __u64, sector_t); 227int nilfs_bmap_move_v(const struct nilfs_bmap *, __u64, sector_t);
187int nilfs_bmap_mark_dirty(const struct nilfs_bmap *, __u64); 228int nilfs_bmap_mark_dirty(const struct nilfs_bmap *, __u64);
188 229
@@ -193,28 +234,20 @@ __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *,
193__u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *, __u64); 234__u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *, __u64);
194__u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *); 235__u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *);
195 236
196int nilfs_bmap_prepare_update(struct nilfs_bmap *, 237int nilfs_bmap_prepare_update_v(struct nilfs_bmap *,
197 union nilfs_bmap_ptr_req *, 238 union nilfs_bmap_ptr_req *,
198 union nilfs_bmap_ptr_req *); 239 union nilfs_bmap_ptr_req *);
199void nilfs_bmap_commit_update(struct nilfs_bmap *, 240void nilfs_bmap_commit_update_v(struct nilfs_bmap *,
200 union nilfs_bmap_ptr_req *, 241 union nilfs_bmap_ptr_req *,
201 union nilfs_bmap_ptr_req *); 242 union nilfs_bmap_ptr_req *);
202void nilfs_bmap_abort_update(struct nilfs_bmap *, 243void nilfs_bmap_abort_update_v(struct nilfs_bmap *,
203 union nilfs_bmap_ptr_req *, 244 union nilfs_bmap_ptr_req *,
204 union nilfs_bmap_ptr_req *); 245 union nilfs_bmap_ptr_req *);
205 246
206void nilfs_bmap_add_blocks(const struct nilfs_bmap *, int); 247void nilfs_bmap_add_blocks(const struct nilfs_bmap *, int);
207void nilfs_bmap_sub_blocks(const struct nilfs_bmap *, int); 248void nilfs_bmap_sub_blocks(const struct nilfs_bmap *, int);
208 249
209 250
210int nilfs_bmap_get_block(const struct nilfs_bmap *, __u64,
211 struct buffer_head **);
212void nilfs_bmap_put_block(const struct nilfs_bmap *, struct buffer_head *);
213int nilfs_bmap_get_new_block(const struct nilfs_bmap *, __u64,
214 struct buffer_head **);
215void nilfs_bmap_delete_block(const struct nilfs_bmap *, struct buffer_head *);
216
217
218/* Assume that bmap semaphore is locked. */ 251/* Assume that bmap semaphore is locked. */
219static inline int nilfs_bmap_dirty(const struct nilfs_bmap *bmap) 252static inline int nilfs_bmap_dirty(const struct nilfs_bmap *bmap)
220{ 253{
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 4cc07b2c30e0..7e0b61be212e 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -46,15 +46,18 @@ void nilfs_btnode_cache_init_once(struct address_space *btnc)
46 INIT_LIST_HEAD(&btnc->i_mmap_nonlinear); 46 INIT_LIST_HEAD(&btnc->i_mmap_nonlinear);
47} 47}
48 48
49static struct address_space_operations def_btnode_aops; 49static struct address_space_operations def_btnode_aops = {
50 .sync_page = block_sync_page,
51};
50 52
51void nilfs_btnode_cache_init(struct address_space *btnc) 53void nilfs_btnode_cache_init(struct address_space *btnc,
54 struct backing_dev_info *bdi)
52{ 55{
53 btnc->host = NULL; /* can safely set to host inode ? */ 56 btnc->host = NULL; /* can safely set to host inode ? */
54 btnc->flags = 0; 57 btnc->flags = 0;
55 mapping_set_gfp_mask(btnc, GFP_NOFS); 58 mapping_set_gfp_mask(btnc, GFP_NOFS);
56 btnc->assoc_mapping = NULL; 59 btnc->assoc_mapping = NULL;
57 btnc->backing_dev_info = &default_backing_dev_info; 60 btnc->backing_dev_info = bdi;
58 btnc->a_ops = &def_btnode_aops; 61 btnc->a_ops = &def_btnode_aops;
59} 62}
60 63
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 35faa86444a7..3e2275172ed6 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -38,7 +38,7 @@ struct nilfs_btnode_chkey_ctxt {
38}; 38};
39 39
40void nilfs_btnode_cache_init_once(struct address_space *); 40void nilfs_btnode_cache_init_once(struct address_space *);
41void nilfs_btnode_cache_init(struct address_space *); 41void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *);
42void nilfs_btnode_cache_clear(struct address_space *); 42void nilfs_btnode_cache_clear(struct address_space *);
43int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t, 43int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t,
44 struct buffer_head **, int); 44 struct buffer_head **, int);
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 6b37a2767293..aa412724b64e 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -29,6 +29,7 @@
29#include "btnode.h" 29#include "btnode.h"
30#include "btree.h" 30#include "btree.h"
31#include "alloc.h" 31#include "alloc.h"
32#include "dat.h"
32 33
33/** 34/**
34 * struct nilfs_btree_path - A path on which B-tree operations are executed 35 * struct nilfs_btree_path - A path on which B-tree operations are executed
@@ -109,8 +110,7 @@ static void nilfs_btree_clear_path(const struct nilfs_btree *btree,
109 level < NILFS_BTREE_LEVEL_MAX; 110 level < NILFS_BTREE_LEVEL_MAX;
110 level++) { 111 level++) {
111 if (path[level].bp_bh != NULL) { 112 if (path[level].bp_bh != NULL) {
112 nilfs_bmap_put_block(&btree->bt_bmap, 113 brelse(path[level].bp_bh);
113 path[level].bp_bh);
114 path[level].bp_bh = NULL; 114 path[level].bp_bh = NULL;
115 } 115 }
116 /* sib_bh is released or deleted by prepare or commit 116 /* sib_bh is released or deleted by prepare or commit
@@ -123,10 +123,29 @@ static void nilfs_btree_clear_path(const struct nilfs_btree *btree,
123 } 123 }
124} 124}
125 125
126
127/* 126/*
128 * B-tree node operations 127 * B-tree node operations
129 */ 128 */
129static int nilfs_btree_get_block(const struct nilfs_btree *btree, __u64 ptr,
130 struct buffer_head **bhp)
131{
132 struct address_space *btnc =
133 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
134 return nilfs_btnode_get(btnc, ptr, 0, bhp, 0);
135}
136
137static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
138 __u64 ptr, struct buffer_head **bhp)
139{
140 struct address_space *btnc =
141 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
142 int ret;
143
144 ret = nilfs_btnode_get(btnc, ptr, 0, bhp, 1);
145 if (!ret)
146 set_buffer_nilfs_volatile(*bhp);
147 return ret;
148}
130 149
131static inline int 150static inline int
132nilfs_btree_node_get_flags(const struct nilfs_btree *btree, 151nilfs_btree_node_get_flags(const struct nilfs_btree *btree,
@@ -488,8 +507,7 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
488 path[level].bp_index = index; 507 path[level].bp_index = index;
489 508
490 for (level--; level >= minlevel; level--) { 509 for (level--; level >= minlevel; level--) {
491 ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr, 510 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
492 &path[level].bp_bh);
493 if (ret < 0) 511 if (ret < 0)
494 return ret; 512 return ret;
495 node = nilfs_btree_get_nonroot_node(btree, path, level); 513 node = nilfs_btree_get_nonroot_node(btree, path, level);
@@ -535,8 +553,7 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
535 path[level].bp_index = index; 553 path[level].bp_index = index;
536 554
537 for (level--; level > 0; level--) { 555 for (level--; level > 0; level--) {
538 ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr, 556 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
539 &path[level].bp_bh);
540 if (ret < 0) 557 if (ret < 0)
541 return ret; 558 return ret;
542 node = nilfs_btree_get_nonroot_node(btree, path, level); 559 node = nilfs_btree_get_nonroot_node(btree, path, level);
@@ -579,6 +596,87 @@ static int nilfs_btree_lookup(const struct nilfs_bmap *bmap,
579 return ret; 596 return ret;
580} 597}
581 598
599static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
600 __u64 key, __u64 *ptrp, unsigned maxblocks)
601{
602 struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
603 struct nilfs_btree_path *path;
604 struct nilfs_btree_node *node;
605 struct inode *dat = NULL;
606 __u64 ptr, ptr2;
607 sector_t blocknr;
608 int level = NILFS_BTREE_LEVEL_NODE_MIN;
609 int ret, cnt, index, maxlevel;
610
611 path = nilfs_btree_alloc_path(btree);
612 if (path == NULL)
613 return -ENOMEM;
614 nilfs_btree_init_path(btree, path);
615 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
616 if (ret < 0)
617 goto out;
618
619 if (NILFS_BMAP_USE_VBN(bmap)) {
620 dat = nilfs_bmap_get_dat(bmap);
621 ret = nilfs_dat_translate(dat, ptr, &blocknr);
622 if (ret < 0)
623 goto out;
624 ptr = blocknr;
625 }
626 cnt = 1;
627 if (cnt == maxblocks)
628 goto end;
629
630 maxlevel = nilfs_btree_height(btree) - 1;
631 node = nilfs_btree_get_node(btree, path, level);
632 index = path[level].bp_index + 1;
633 for (;;) {
634 while (index < nilfs_btree_node_get_nchildren(btree, node)) {
635 if (nilfs_btree_node_get_key(btree, node, index) !=
636 key + cnt)
637 goto end;
638 ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
639 if (dat) {
640 ret = nilfs_dat_translate(dat, ptr2, &blocknr);
641 if (ret < 0)
642 goto out;
643 ptr2 = blocknr;
644 }
645 if (ptr2 != ptr + cnt || ++cnt == maxblocks)
646 goto end;
647 index++;
648 continue;
649 }
650 if (level == maxlevel)
651 break;
652
653 /* look-up right sibling node */
654 node = nilfs_btree_get_node(btree, path, level + 1);
655 index = path[level + 1].bp_index + 1;
656 if (index >= nilfs_btree_node_get_nchildren(btree, node) ||
657 nilfs_btree_node_get_key(btree, node, index) != key + cnt)
658 break;
659 ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
660 path[level + 1].bp_index = index;
661
662 brelse(path[level].bp_bh);
663 path[level].bp_bh = NULL;
664 ret = nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh);
665 if (ret < 0)
666 goto out;
667 node = nilfs_btree_get_nonroot_node(btree, path, level);
668 index = 0;
669 path[level].bp_index = index;
670 }
671 end:
672 *ptrp = ptr;
673 ret = cnt;
674 out:
675 nilfs_btree_clear_path(btree, path);
676 nilfs_btree_free_path(btree, path);
677 return ret;
678}
679
582static void nilfs_btree_promote_key(struct nilfs_btree *btree, 680static void nilfs_btree_promote_key(struct nilfs_btree *btree,
583 struct nilfs_btree_path *path, 681 struct nilfs_btree_path *path,
584 int level, __u64 key) 682 int level, __u64 key)
@@ -669,13 +767,13 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree,
669 nilfs_btree_node_get_key(btree, node, 0)); 767 nilfs_btree_node_get_key(btree, node, 0));
670 768
671 if (move) { 769 if (move) {
672 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh); 770 brelse(path[level].bp_bh);
673 path[level].bp_bh = path[level].bp_sib_bh; 771 path[level].bp_bh = path[level].bp_sib_bh;
674 path[level].bp_sib_bh = NULL; 772 path[level].bp_sib_bh = NULL;
675 path[level].bp_index += lnchildren; 773 path[level].bp_index += lnchildren;
676 path[level + 1].bp_index--; 774 path[level + 1].bp_index--;
677 } else { 775 } else {
678 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); 776 brelse(path[level].bp_sib_bh);
679 path[level].bp_sib_bh = NULL; 777 path[level].bp_sib_bh = NULL;
680 path[level].bp_index -= n; 778 path[level].bp_index -= n;
681 } 779 }
@@ -722,14 +820,14 @@ static void nilfs_btree_carry_right(struct nilfs_btree *btree,
722 path[level + 1].bp_index--; 820 path[level + 1].bp_index--;
723 821
724 if (move) { 822 if (move) {
725 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh); 823 brelse(path[level].bp_bh);
726 path[level].bp_bh = path[level].bp_sib_bh; 824 path[level].bp_bh = path[level].bp_sib_bh;
727 path[level].bp_sib_bh = NULL; 825 path[level].bp_sib_bh = NULL;
728 path[level].bp_index -= 826 path[level].bp_index -=
729 nilfs_btree_node_get_nchildren(btree, node); 827 nilfs_btree_node_get_nchildren(btree, node);
730 path[level + 1].bp_index++; 828 path[level + 1].bp_index++;
731 } else { 829 } else {
732 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); 830 brelse(path[level].bp_sib_bh);
733 path[level].bp_sib_bh = NULL; 831 path[level].bp_sib_bh = NULL;
734 } 832 }
735 833
@@ -781,7 +879,7 @@ static void nilfs_btree_split(struct nilfs_btree *btree,
781 *keyp = nilfs_btree_node_get_key(btree, right, 0); 879 *keyp = nilfs_btree_node_get_key(btree, right, 0);
782 *ptrp = path[level].bp_newreq.bpr_ptr; 880 *ptrp = path[level].bp_newreq.bpr_ptr;
783 881
784 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh); 882 brelse(path[level].bp_bh);
785 path[level].bp_bh = path[level].bp_sib_bh; 883 path[level].bp_bh = path[level].bp_sib_bh;
786 path[level].bp_sib_bh = NULL; 884 path[level].bp_sib_bh = NULL;
787 } else { 885 } else {
@@ -790,7 +888,7 @@ static void nilfs_btree_split(struct nilfs_btree *btree,
790 *keyp = nilfs_btree_node_get_key(btree, right, 0); 888 *keyp = nilfs_btree_node_get_key(btree, right, 0);
791 *ptrp = path[level].bp_newreq.bpr_ptr; 889 *ptrp = path[level].bp_newreq.bpr_ptr;
792 890
793 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); 891 brelse(path[level].bp_sib_bh);
794 path[level].bp_sib_bh = NULL; 892 path[level].bp_sib_bh = NULL;
795 } 893 }
796 894
@@ -897,12 +995,12 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
897 level = NILFS_BTREE_LEVEL_DATA; 995 level = NILFS_BTREE_LEVEL_DATA;
898 996
899 /* allocate a new ptr for data block */ 997 /* allocate a new ptr for data block */
900 if (btree->bt_ops->btop_find_target != NULL) 998 if (NILFS_BMAP_USE_VBN(&btree->bt_bmap))
901 path[level].bp_newreq.bpr_ptr = 999 path[level].bp_newreq.bpr_ptr =
902 btree->bt_ops->btop_find_target(btree, path, key); 1000 nilfs_btree_find_target_v(btree, path, key);
903 1001
904 ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr( 1002 ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
905 &btree->bt_bmap, &path[level].bp_newreq); 1003 &path[level].bp_newreq);
906 if (ret < 0) 1004 if (ret < 0)
907 goto err_out_data; 1005 goto err_out_data;
908 1006
@@ -924,8 +1022,7 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
924 if (pindex > 0) { 1022 if (pindex > 0) {
925 sibptr = nilfs_btree_node_get_ptr(btree, parent, 1023 sibptr = nilfs_btree_node_get_ptr(btree, parent,
926 pindex - 1); 1024 pindex - 1);
927 ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr, 1025 ret = nilfs_btree_get_block(btree, sibptr, &bh);
928 &bh);
929 if (ret < 0) 1026 if (ret < 0)
930 goto err_out_child_node; 1027 goto err_out_child_node;
931 sib = (struct nilfs_btree_node *)bh->b_data; 1028 sib = (struct nilfs_btree_node *)bh->b_data;
@@ -936,7 +1033,7 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
936 stats->bs_nblocks++; 1033 stats->bs_nblocks++;
937 goto out; 1034 goto out;
938 } else 1035 } else
939 nilfs_bmap_put_block(&btree->bt_bmap, bh); 1036 brelse(bh);
940 } 1037 }
941 1038
942 /* right sibling */ 1039 /* right sibling */
@@ -944,8 +1041,7 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
944 nilfs_btree_node_get_nchildren(btree, parent) - 1) { 1041 nilfs_btree_node_get_nchildren(btree, parent) - 1) {
945 sibptr = nilfs_btree_node_get_ptr(btree, parent, 1042 sibptr = nilfs_btree_node_get_ptr(btree, parent,
946 pindex + 1); 1043 pindex + 1);
947 ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr, 1044 ret = nilfs_btree_get_block(btree, sibptr, &bh);
948 &bh);
949 if (ret < 0) 1045 if (ret < 0)
950 goto err_out_child_node; 1046 goto err_out_child_node;
951 sib = (struct nilfs_btree_node *)bh->b_data; 1047 sib = (struct nilfs_btree_node *)bh->b_data;
@@ -956,19 +1052,19 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
956 stats->bs_nblocks++; 1052 stats->bs_nblocks++;
957 goto out; 1053 goto out;
958 } else 1054 } else
959 nilfs_bmap_put_block(&btree->bt_bmap, bh); 1055 brelse(bh);
960 } 1056 }
961 1057
962 /* split */ 1058 /* split */
963 path[level].bp_newreq.bpr_ptr = 1059 path[level].bp_newreq.bpr_ptr =
964 path[level - 1].bp_newreq.bpr_ptr + 1; 1060 path[level - 1].bp_newreq.bpr_ptr + 1;
965 ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr( 1061 ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
966 &btree->bt_bmap, &path[level].bp_newreq); 1062 &path[level].bp_newreq);
967 if (ret < 0) 1063 if (ret < 0)
968 goto err_out_child_node; 1064 goto err_out_child_node;
969 ret = nilfs_bmap_get_new_block(&btree->bt_bmap, 1065 ret = nilfs_btree_get_new_block(btree,
970 path[level].bp_newreq.bpr_ptr, 1066 path[level].bp_newreq.bpr_ptr,
971 &bh); 1067 &bh);
972 if (ret < 0) 1068 if (ret < 0)
973 goto err_out_curr_node; 1069 goto err_out_curr_node;
974 1070
@@ -994,12 +1090,12 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
994 1090
995 /* grow */ 1091 /* grow */
996 path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; 1092 path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
997 ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr( 1093 ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
998 &btree->bt_bmap, &path[level].bp_newreq); 1094 &path[level].bp_newreq);
999 if (ret < 0) 1095 if (ret < 0)
1000 goto err_out_child_node; 1096 goto err_out_child_node;
1001 ret = nilfs_bmap_get_new_block(&btree->bt_bmap, 1097 ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1002 path[level].bp_newreq.bpr_ptr, &bh); 1098 &bh);
1003 if (ret < 0) 1099 if (ret < 0)
1004 goto err_out_curr_node; 1100 goto err_out_curr_node;
1005 1101
@@ -1023,18 +1119,16 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
1023 1119
1024 /* error */ 1120 /* error */
1025 err_out_curr_node: 1121 err_out_curr_node:
1026 btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap, 1122 nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq);
1027 &path[level].bp_newreq);
1028 err_out_child_node: 1123 err_out_child_node:
1029 for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) { 1124 for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1030 nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_sib_bh); 1125 nilfs_btnode_delete(path[level].bp_sib_bh);
1031 btree->bt_bmap.b_pops->bpop_abort_alloc_ptr( 1126 nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap,
1032 &btree->bt_bmap, &path[level].bp_newreq); 1127 &path[level].bp_newreq);
1033 1128
1034 } 1129 }
1035 1130
1036 btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap, 1131 nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq);
1037 &path[level].bp_newreq);
1038 err_out_data: 1132 err_out_data:
1039 *levelp = level; 1133 *levelp = level;
1040 stats->bs_nblocks = 0; 1134 stats->bs_nblocks = 0;
@@ -1049,14 +1143,12 @@ static void nilfs_btree_commit_insert(struct nilfs_btree *btree,
1049 1143
1050 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); 1144 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1051 ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr; 1145 ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1052 if (btree->bt_ops->btop_set_target != NULL) 1146 if (NILFS_BMAP_USE_VBN(&btree->bt_bmap))
1053 btree->bt_ops->btop_set_target(btree, key, ptr); 1147 nilfs_btree_set_target_v(btree, key, ptr);
1054 1148
1055 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { 1149 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1056 if (btree->bt_bmap.b_pops->bpop_commit_alloc_ptr != NULL) { 1150 nilfs_bmap_commit_alloc_ptr(&btree->bt_bmap,
1057 btree->bt_bmap.b_pops->bpop_commit_alloc_ptr( 1151 &path[level - 1].bp_newreq);
1058 &btree->bt_bmap, &path[level - 1].bp_newreq);
1059 }
1060 path[level].bp_op(btree, path, level, &key, &ptr); 1152 path[level].bp_op(btree, path, level, &key, &ptr);
1061 } 1153 }
1062 1154
@@ -1153,7 +1245,7 @@ static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1153 nilfs_btree_promote_key(btree, path, level + 1, 1245 nilfs_btree_promote_key(btree, path, level + 1,
1154 nilfs_btree_node_get_key(btree, node, 0)); 1246 nilfs_btree_node_get_key(btree, node, 0));
1155 1247
1156 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); 1248 brelse(path[level].bp_sib_bh);
1157 path[level].bp_sib_bh = NULL; 1249 path[level].bp_sib_bh = NULL;
1158 path[level].bp_index += n; 1250 path[level].bp_index += n;
1159} 1251}
@@ -1192,7 +1284,7 @@ static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1192 nilfs_btree_node_get_key(btree, right, 0)); 1284 nilfs_btree_node_get_key(btree, right, 0));
1193 path[level + 1].bp_index--; 1285 path[level + 1].bp_index--;
1194 1286
1195 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); 1287 brelse(path[level].bp_sib_bh);
1196 path[level].bp_sib_bh = NULL; 1288 path[level].bp_sib_bh = NULL;
1197} 1289}
1198 1290
@@ -1221,7 +1313,7 @@ static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1221 unlock_buffer(path[level].bp_bh); 1313 unlock_buffer(path[level].bp_bh);
1222 unlock_buffer(path[level].bp_sib_bh); 1314 unlock_buffer(path[level].bp_sib_bh);
1223 1315
1224 nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_bh); 1316 nilfs_btnode_delete(path[level].bp_bh);
1225 path[level].bp_bh = path[level].bp_sib_bh; 1317 path[level].bp_bh = path[level].bp_sib_bh;
1226 path[level].bp_sib_bh = NULL; 1318 path[level].bp_sib_bh = NULL;
1227 path[level].bp_index += nilfs_btree_node_get_nchildren(btree, left); 1319 path[level].bp_index += nilfs_btree_node_get_nchildren(btree, left);
@@ -1252,7 +1344,7 @@ static void nilfs_btree_concat_right(struct nilfs_btree *btree,
1252 unlock_buffer(path[level].bp_bh); 1344 unlock_buffer(path[level].bp_bh);
1253 unlock_buffer(path[level].bp_sib_bh); 1345 unlock_buffer(path[level].bp_sib_bh);
1254 1346
1255 nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_sib_bh); 1347 nilfs_btnode_delete(path[level].bp_sib_bh);
1256 path[level].bp_sib_bh = NULL; 1348 path[level].bp_sib_bh = NULL;
1257 path[level + 1].bp_index++; 1349 path[level + 1].bp_index++;
1258} 1350}
@@ -1276,7 +1368,7 @@ static void nilfs_btree_shrink(struct nilfs_btree *btree,
1276 nilfs_btree_node_move_left(btree, root, child, n); 1368 nilfs_btree_node_move_left(btree, root, child, n);
1277 unlock_buffer(path[level].bp_bh); 1369 unlock_buffer(path[level].bp_bh);
1278 1370
1279 nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_bh); 1371 nilfs_btnode_delete(path[level].bp_bh);
1280 path[level].bp_bh = NULL; 1372 path[level].bp_bh = NULL;
1281} 1373}
1282 1374
@@ -1300,12 +1392,10 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1300 path[level].bp_oldreq.bpr_ptr = 1392 path[level].bp_oldreq.bpr_ptr =
1301 nilfs_btree_node_get_ptr(btree, node, 1393 nilfs_btree_node_get_ptr(btree, node,
1302 path[level].bp_index); 1394 path[level].bp_index);
1303 if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) { 1395 ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1304 ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr( 1396 &path[level].bp_oldreq);
1305 &btree->bt_bmap, &path[level].bp_oldreq); 1397 if (ret < 0)
1306 if (ret < 0) 1398 goto err_out_child_node;
1307 goto err_out_child_node;
1308 }
1309 1399
1310 if (nilfs_btree_node_get_nchildren(btree, node) > 1400 if (nilfs_btree_node_get_nchildren(btree, node) >
1311 nilfs_btree_node_nchildren_min(btree, node)) { 1401 nilfs_btree_node_nchildren_min(btree, node)) {
@@ -1321,8 +1411,7 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1321 /* left sibling */ 1411 /* left sibling */
1322 sibptr = nilfs_btree_node_get_ptr(btree, parent, 1412 sibptr = nilfs_btree_node_get_ptr(btree, parent,
1323 pindex - 1); 1413 pindex - 1);
1324 ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr, 1414 ret = nilfs_btree_get_block(btree, sibptr, &bh);
1325 &bh);
1326 if (ret < 0) 1415 if (ret < 0)
1327 goto err_out_curr_node; 1416 goto err_out_curr_node;
1328 sib = (struct nilfs_btree_node *)bh->b_data; 1417 sib = (struct nilfs_btree_node *)bh->b_data;
@@ -1343,8 +1432,7 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1343 /* right sibling */ 1432 /* right sibling */
1344 sibptr = nilfs_btree_node_get_ptr(btree, parent, 1433 sibptr = nilfs_btree_node_get_ptr(btree, parent,
1345 pindex + 1); 1434 pindex + 1);
1346 ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr, 1435 ret = nilfs_btree_get_block(btree, sibptr, &bh);
1347 &bh);
1348 if (ret < 0) 1436 if (ret < 0)
1349 goto err_out_curr_node; 1437 goto err_out_curr_node;
1350 sib = (struct nilfs_btree_node *)bh->b_data; 1438 sib = (struct nilfs_btree_node *)bh->b_data;
@@ -1381,12 +1469,12 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1381 node = nilfs_btree_get_root(btree); 1469 node = nilfs_btree_get_root(btree);
1382 path[level].bp_oldreq.bpr_ptr = 1470 path[level].bp_oldreq.bpr_ptr =
1383 nilfs_btree_node_get_ptr(btree, node, path[level].bp_index); 1471 nilfs_btree_node_get_ptr(btree, node, path[level].bp_index);
1384 if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) { 1472
1385 ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr( 1473 ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1386 &btree->bt_bmap, &path[level].bp_oldreq); 1474 &path[level].bp_oldreq);
1387 if (ret < 0) 1475 if (ret < 0)
1388 goto err_out_child_node; 1476 goto err_out_child_node;
1389 } 1477
1390 /* child of the root node is deleted */ 1478 /* child of the root node is deleted */
1391 path[level].bp_op = nilfs_btree_do_delete; 1479 path[level].bp_op = nilfs_btree_do_delete;
1392 stats->bs_nblocks++; 1480 stats->bs_nblocks++;
@@ -1398,15 +1486,12 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1398 1486
1399 /* error */ 1487 /* error */
1400 err_out_curr_node: 1488 err_out_curr_node:
1401 if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL) 1489 nilfs_bmap_abort_end_ptr(&btree->bt_bmap, &path[level].bp_oldreq);
1402 btree->bt_bmap.b_pops->bpop_abort_end_ptr(
1403 &btree->bt_bmap, &path[level].bp_oldreq);
1404 err_out_child_node: 1490 err_out_child_node:
1405 for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) { 1491 for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1406 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); 1492 brelse(path[level].bp_sib_bh);
1407 if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL) 1493 nilfs_bmap_abort_end_ptr(&btree->bt_bmap,
1408 btree->bt_bmap.b_pops->bpop_abort_end_ptr( 1494 &path[level].bp_oldreq);
1409 &btree->bt_bmap, &path[level].bp_oldreq);
1410 } 1495 }
1411 *levelp = level; 1496 *levelp = level;
1412 stats->bs_nblocks = 0; 1497 stats->bs_nblocks = 0;
@@ -1420,9 +1505,8 @@ static void nilfs_btree_commit_delete(struct nilfs_btree *btree,
1420 int level; 1505 int level;
1421 1506
1422 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { 1507 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1423 if (btree->bt_bmap.b_pops->bpop_commit_end_ptr != NULL) 1508 nilfs_bmap_commit_end_ptr(&btree->bt_bmap,
1424 btree->bt_bmap.b_pops->bpop_commit_end_ptr( 1509 &path[level].bp_oldreq);
1425 &btree->bt_bmap, &path[level].bp_oldreq);
1426 path[level].bp_op(btree, path, level, NULL, NULL); 1510 path[level].bp_op(btree, path, level, NULL, NULL);
1427 } 1511 }
1428 1512
@@ -1501,7 +1585,7 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
1501 if (nchildren > 1) 1585 if (nchildren > 1)
1502 return 0; 1586 return 0;
1503 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1); 1587 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1504 ret = nilfs_bmap_get_block(bmap, ptr, &bh); 1588 ret = nilfs_btree_get_block(btree, ptr, &bh);
1505 if (ret < 0) 1589 if (ret < 0)
1506 return ret; 1590 return ret;
1507 node = (struct nilfs_btree_node *)bh->b_data; 1591 node = (struct nilfs_btree_node *)bh->b_data;
@@ -1515,9 +1599,9 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
1515 nextmaxkey = (nchildren > 1) ? 1599 nextmaxkey = (nchildren > 1) ?
1516 nilfs_btree_node_get_key(btree, node, nchildren - 2) : 0; 1600 nilfs_btree_node_get_key(btree, node, nchildren - 2) : 0;
1517 if (bh != NULL) 1601 if (bh != NULL)
1518 nilfs_bmap_put_block(bmap, bh); 1602 brelse(bh);
1519 1603
1520 return (maxkey == key) && (nextmaxkey < bmap->b_low); 1604 return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1521} 1605}
1522 1606
1523static int nilfs_btree_gather_data(struct nilfs_bmap *bmap, 1607static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
@@ -1542,7 +1626,7 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
1542 nchildren = nilfs_btree_node_get_nchildren(btree, root); 1626 nchildren = nilfs_btree_node_get_nchildren(btree, root);
1543 WARN_ON(nchildren > 1); 1627 WARN_ON(nchildren > 1);
1544 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1); 1628 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1545 ret = nilfs_bmap_get_block(bmap, ptr, &bh); 1629 ret = nilfs_btree_get_block(btree, ptr, &bh);
1546 if (ret < 0) 1630 if (ret < 0)
1547 return ret; 1631 return ret;
1548 node = (struct nilfs_btree_node *)bh->b_data; 1632 node = (struct nilfs_btree_node *)bh->b_data;
@@ -1563,7 +1647,7 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
1563 } 1647 }
1564 1648
1565 if (bh != NULL) 1649 if (bh != NULL)
1566 nilfs_bmap_put_block(bmap, bh); 1650 brelse(bh);
1567 1651
1568 return nitems; 1652 return nitems;
1569} 1653}
@@ -1584,10 +1668,10 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1584 1668
1585 /* for data */ 1669 /* for data */
1586 /* cannot find near ptr */ 1670 /* cannot find near ptr */
1587 if (btree->bt_ops->btop_find_target != NULL) 1671 if (NILFS_BMAP_USE_VBN(bmap))
1588 dreq->bpr_ptr 1672 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1589 = btree->bt_ops->btop_find_target(btree, NULL, key); 1673
1590 ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, dreq); 1674 ret = nilfs_bmap_prepare_alloc_ptr(bmap, dreq);
1591 if (ret < 0) 1675 if (ret < 0)
1592 return ret; 1676 return ret;
1593 1677
@@ -1595,11 +1679,11 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1595 stats->bs_nblocks++; 1679 stats->bs_nblocks++;
1596 if (nreq != NULL) { 1680 if (nreq != NULL) {
1597 nreq->bpr_ptr = dreq->bpr_ptr + 1; 1681 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1598 ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, nreq); 1682 ret = nilfs_bmap_prepare_alloc_ptr(bmap, nreq);
1599 if (ret < 0) 1683 if (ret < 0)
1600 goto err_out_dreq; 1684 goto err_out_dreq;
1601 1685
1602 ret = nilfs_bmap_get_new_block(bmap, nreq->bpr_ptr, &bh); 1686 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1603 if (ret < 0) 1687 if (ret < 0)
1604 goto err_out_nreq; 1688 goto err_out_nreq;
1605 1689
@@ -1612,9 +1696,9 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1612 1696
1613 /* error */ 1697 /* error */
1614 err_out_nreq: 1698 err_out_nreq:
1615 bmap->b_pops->bpop_abort_alloc_ptr(bmap, nreq); 1699 nilfs_bmap_abort_alloc_ptr(bmap, nreq);
1616 err_out_dreq: 1700 err_out_dreq:
1617 bmap->b_pops->bpop_abort_alloc_ptr(bmap, dreq); 1701 nilfs_bmap_abort_alloc_ptr(bmap, dreq);
1618 stats->bs_nblocks = 0; 1702 stats->bs_nblocks = 0;
1619 return ret; 1703 return ret;
1620 1704
@@ -1624,7 +1708,7 @@ static void
1624nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, 1708nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1625 __u64 key, __u64 ptr, 1709 __u64 key, __u64 ptr,
1626 const __u64 *keys, const __u64 *ptrs, 1710 const __u64 *keys, const __u64 *ptrs,
1627 int n, __u64 low, __u64 high, 1711 int n,
1628 union nilfs_bmap_ptr_req *dreq, 1712 union nilfs_bmap_ptr_req *dreq,
1629 union nilfs_bmap_ptr_req *nreq, 1713 union nilfs_bmap_ptr_req *nreq,
1630 struct buffer_head *bh) 1714 struct buffer_head *bh)
@@ -1642,12 +1726,10 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1642 1726
1643 /* convert and insert */ 1727 /* convert and insert */
1644 btree = (struct nilfs_btree *)bmap; 1728 btree = (struct nilfs_btree *)bmap;
1645 nilfs_btree_init(bmap, low, high); 1729 nilfs_btree_init(bmap);
1646 if (nreq != NULL) { 1730 if (nreq != NULL) {
1647 if (bmap->b_pops->bpop_commit_alloc_ptr != NULL) { 1731 nilfs_bmap_commit_alloc_ptr(bmap, dreq);
1648 bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq); 1732 nilfs_bmap_commit_alloc_ptr(bmap, nreq);
1649 bmap->b_pops->bpop_commit_alloc_ptr(bmap, nreq);
1650 }
1651 1733
1652 /* create child node at level 1 */ 1734 /* create child node at level 1 */
1653 lock_buffer(bh); 1735 lock_buffer(bh);
@@ -1661,7 +1743,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1661 nilfs_bmap_set_dirty(bmap); 1743 nilfs_bmap_set_dirty(bmap);
1662 1744
1663 unlock_buffer(bh); 1745 unlock_buffer(bh);
1664 nilfs_bmap_put_block(bmap, bh); 1746 brelse(bh);
1665 1747
1666 /* create root node at level 2 */ 1748 /* create root node at level 2 */
1667 node = nilfs_btree_get_root(btree); 1749 node = nilfs_btree_get_root(btree);
@@ -1669,8 +1751,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1669 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT, 1751 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1670 2, 1, &keys[0], &tmpptr); 1752 2, 1, &keys[0], &tmpptr);
1671 } else { 1753 } else {
1672 if (bmap->b_pops->bpop_commit_alloc_ptr != NULL) 1754 nilfs_bmap_commit_alloc_ptr(bmap, dreq);
1673 bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq);
1674 1755
1675 /* create root node at level 1 */ 1756 /* create root node at level 1 */
1676 node = nilfs_btree_get_root(btree); 1757 node = nilfs_btree_get_root(btree);
@@ -1682,8 +1763,8 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1682 nilfs_bmap_set_dirty(bmap); 1763 nilfs_bmap_set_dirty(bmap);
1683 } 1764 }
1684 1765
1685 if (btree->bt_ops->btop_set_target != NULL) 1766 if (NILFS_BMAP_USE_VBN(bmap))
1686 btree->bt_ops->btop_set_target(btree, key, dreq->bpr_ptr); 1767 nilfs_btree_set_target_v(btree, key, dreq->bpr_ptr);
1687} 1768}
1688 1769
1689/** 1770/**
@@ -1694,13 +1775,10 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1694 * @keys: 1775 * @keys:
1695 * @ptrs: 1776 * @ptrs:
1696 * @n: 1777 * @n:
1697 * @low:
1698 * @high:
1699 */ 1778 */
1700int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap, 1779int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
1701 __u64 key, __u64 ptr, 1780 __u64 key, __u64 ptr,
1702 const __u64 *keys, const __u64 *ptrs, 1781 const __u64 *keys, const __u64 *ptrs, int n)
1703 int n, __u64 low, __u64 high)
1704{ 1782{
1705 struct buffer_head *bh; 1783 struct buffer_head *bh;
1706 union nilfs_bmap_ptr_req dreq, nreq, *di, *ni; 1784 union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
@@ -1725,7 +1803,7 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
1725 if (ret < 0) 1803 if (ret < 0)
1726 return ret; 1804 return ret;
1727 nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n, 1805 nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n,
1728 low, high, di, ni, bh); 1806 di, ni, bh);
1729 nilfs_bmap_add_blocks(bmap, stats.bs_nblocks); 1807 nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1730 return 0; 1808 return 0;
1731} 1809}
@@ -1754,9 +1832,9 @@ static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1754 nilfs_btree_node_get_ptr(btree, parent, 1832 nilfs_btree_node_get_ptr(btree, parent,
1755 path[level + 1].bp_index); 1833 path[level + 1].bp_index);
1756 path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1; 1834 path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1757 ret = nilfs_bmap_prepare_update(&btree->bt_bmap, 1835 ret = nilfs_bmap_prepare_update_v(&btree->bt_bmap,
1758 &path[level].bp_oldreq, 1836 &path[level].bp_oldreq,
1759 &path[level].bp_newreq); 1837 &path[level].bp_newreq);
1760 if (ret < 0) 1838 if (ret < 0)
1761 return ret; 1839 return ret;
1762 1840
@@ -1768,9 +1846,9 @@ static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1768 &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, 1846 &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1769 &path[level].bp_ctxt); 1847 &path[level].bp_ctxt);
1770 if (ret < 0) { 1848 if (ret < 0) {
1771 nilfs_bmap_abort_update(&btree->bt_bmap, 1849 nilfs_bmap_abort_update_v(&btree->bt_bmap,
1772 &path[level].bp_oldreq, 1850 &path[level].bp_oldreq,
1773 &path[level].bp_newreq); 1851 &path[level].bp_newreq);
1774 return ret; 1852 return ret;
1775 } 1853 }
1776 } 1854 }
@@ -1784,9 +1862,9 @@ static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
1784{ 1862{
1785 struct nilfs_btree_node *parent; 1863 struct nilfs_btree_node *parent;
1786 1864
1787 nilfs_bmap_commit_update(&btree->bt_bmap, 1865 nilfs_bmap_commit_update_v(&btree->bt_bmap,
1788 &path[level].bp_oldreq, 1866 &path[level].bp_oldreq,
1789 &path[level].bp_newreq); 1867 &path[level].bp_newreq);
1790 1868
1791 if (buffer_nilfs_node(path[level].bp_bh)) { 1869 if (buffer_nilfs_node(path[level].bp_bh)) {
1792 nilfs_btnode_commit_change_key( 1870 nilfs_btnode_commit_change_key(
@@ -1805,9 +1883,9 @@ static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
1805 struct nilfs_btree_path *path, 1883 struct nilfs_btree_path *path,
1806 int level) 1884 int level)
1807{ 1885{
1808 nilfs_bmap_abort_update(&btree->bt_bmap, 1886 nilfs_bmap_abort_update_v(&btree->bt_bmap,
1809 &path[level].bp_oldreq, 1887 &path[level].bp_oldreq,
1810 &path[level].bp_newreq); 1888 &path[level].bp_newreq);
1811 if (buffer_nilfs_node(path[level].bp_bh)) 1889 if (buffer_nilfs_node(path[level].bp_bh))
1812 nilfs_btnode_abort_change_key( 1890 nilfs_btnode_abort_change_key(
1813 &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, 1891 &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
@@ -1930,7 +2008,9 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1930 goto out; 2008 goto out;
1931 } 2009 }
1932 2010
1933 ret = btree->bt_ops->btop_propagate(btree, path, level, bh); 2011 ret = NILFS_BMAP_USE_VBN(bmap) ?
2012 nilfs_btree_propagate_v(btree, path, level, bh) :
2013 nilfs_btree_propagate_p(btree, path, level, bh);
1934 2014
1935 out: 2015 out:
1936 nilfs_btree_clear_path(btree, path); 2016 nilfs_btree_clear_path(btree, path);
@@ -2066,12 +2146,9 @@ static int nilfs_btree_assign_v(struct nilfs_btree *btree,
2066 ptr = nilfs_btree_node_get_ptr(btree, parent, 2146 ptr = nilfs_btree_node_get_ptr(btree, parent,
2067 path[level + 1].bp_index); 2147 path[level + 1].bp_index);
2068 req.bpr_ptr = ptr; 2148 req.bpr_ptr = ptr;
2069 ret = btree->bt_bmap.b_pops->bpop_prepare_start_ptr(&btree->bt_bmap, 2149 ret = nilfs_bmap_start_v(&btree->bt_bmap, &req, blocknr);
2070 &req); 2150 if (unlikely(ret < 0))
2071 if (ret < 0)
2072 return ret; 2151 return ret;
2073 btree->bt_bmap.b_pops->bpop_commit_start_ptr(&btree->bt_bmap,
2074 &req, blocknr);
2075 2152
2076 key = nilfs_btree_node_get_key(btree, parent, 2153 key = nilfs_btree_node_get_key(btree, parent,
2077 path[level + 1].bp_index); 2154 path[level + 1].bp_index);
@@ -2114,8 +2191,9 @@ static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2114 goto out; 2191 goto out;
2115 } 2192 }
2116 2193
2117 ret = btree->bt_ops->btop_assign(btree, path, level, bh, 2194 ret = NILFS_BMAP_USE_VBN(bmap) ?
2118 blocknr, binfo); 2195 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2196 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2119 2197
2120 out: 2198 out:
2121 nilfs_btree_clear_path(btree, path); 2199 nilfs_btree_clear_path(btree, path);
@@ -2171,7 +2249,7 @@ static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2171 WARN_ON(ret == -ENOENT); 2249 WARN_ON(ret == -ENOENT);
2172 goto out; 2250 goto out;
2173 } 2251 }
2174 ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr, &bh); 2252 ret = nilfs_btree_get_block(btree, ptr, &bh);
2175 if (ret < 0) { 2253 if (ret < 0) {
2176 WARN_ON(ret == -ENOENT); 2254 WARN_ON(ret == -ENOENT);
2177 goto out; 2255 goto out;
@@ -2179,7 +2257,7 @@ static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2179 2257
2180 if (!buffer_dirty(bh)) 2258 if (!buffer_dirty(bh))
2181 nilfs_btnode_mark_dirty(bh); 2259 nilfs_btnode_mark_dirty(bh);
2182 nilfs_bmap_put_block(&btree->bt_bmap, bh); 2260 brelse(bh);
2183 if (!nilfs_bmap_dirty(&btree->bt_bmap)) 2261 if (!nilfs_bmap_dirty(&btree->bt_bmap))
2184 nilfs_bmap_set_dirty(&btree->bt_bmap); 2262 nilfs_bmap_set_dirty(&btree->bt_bmap);
2185 2263
@@ -2191,6 +2269,7 @@ static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2191 2269
2192static const struct nilfs_bmap_operations nilfs_btree_ops = { 2270static const struct nilfs_bmap_operations nilfs_btree_ops = {
2193 .bop_lookup = nilfs_btree_lookup, 2271 .bop_lookup = nilfs_btree_lookup,
2272 .bop_lookup_contig = nilfs_btree_lookup_contig,
2194 .bop_insert = nilfs_btree_insert, 2273 .bop_insert = nilfs_btree_insert,
2195 .bop_delete = nilfs_btree_delete, 2274 .bop_delete = nilfs_btree_delete,
2196 .bop_clear = NULL, 2275 .bop_clear = NULL,
@@ -2210,6 +2289,7 @@ static const struct nilfs_bmap_operations nilfs_btree_ops = {
2210 2289
2211static const struct nilfs_bmap_operations nilfs_btree_ops_gc = { 2290static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2212 .bop_lookup = NULL, 2291 .bop_lookup = NULL,
2292 .bop_lookup_contig = NULL,
2213 .bop_insert = NULL, 2293 .bop_insert = NULL,
2214 .bop_delete = NULL, 2294 .bop_delete = NULL,
2215 .bop_clear = NULL, 2295 .bop_clear = NULL,
@@ -2227,43 +2307,13 @@ static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2227 .bop_gather_data = NULL, 2307 .bop_gather_data = NULL,
2228}; 2308};
2229 2309
2230static const struct nilfs_btree_operations nilfs_btree_ops_v = { 2310int nilfs_btree_init(struct nilfs_bmap *bmap)
2231 .btop_find_target = nilfs_btree_find_target_v,
2232 .btop_set_target = nilfs_btree_set_target_v,
2233 .btop_propagate = nilfs_btree_propagate_v,
2234 .btop_assign = nilfs_btree_assign_v,
2235};
2236
2237static const struct nilfs_btree_operations nilfs_btree_ops_p = {
2238 .btop_find_target = NULL,
2239 .btop_set_target = NULL,
2240 .btop_propagate = nilfs_btree_propagate_p,
2241 .btop_assign = nilfs_btree_assign_p,
2242};
2243
2244int nilfs_btree_init(struct nilfs_bmap *bmap, __u64 low, __u64 high)
2245{ 2311{
2246 struct nilfs_btree *btree;
2247
2248 btree = (struct nilfs_btree *)bmap;
2249 bmap->b_ops = &nilfs_btree_ops; 2312 bmap->b_ops = &nilfs_btree_ops;
2250 bmap->b_low = low;
2251 bmap->b_high = high;
2252 switch (bmap->b_inode->i_ino) {
2253 case NILFS_DAT_INO:
2254 btree->bt_ops = &nilfs_btree_ops_p;
2255 break;
2256 default:
2257 btree->bt_ops = &nilfs_btree_ops_v;
2258 break;
2259 }
2260
2261 return 0; 2313 return 0;
2262} 2314}
2263 2315
2264void nilfs_btree_init_gc(struct nilfs_bmap *bmap) 2316void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2265{ 2317{
2266 bmap->b_low = NILFS_BMAP_LARGE_LOW;
2267 bmap->b_high = NILFS_BMAP_LARGE_HIGH;
2268 bmap->b_ops = &nilfs_btree_ops_gc; 2318 bmap->b_ops = &nilfs_btree_ops_gc;
2269} 2319}
diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h
index 4766deb52fb1..0e72bbbc6b64 100644
--- a/fs/nilfs2/btree.h
+++ b/fs/nilfs2/btree.h
@@ -34,28 +34,6 @@ struct nilfs_btree;
34struct nilfs_btree_path; 34struct nilfs_btree_path;
35 35
36/** 36/**
37 * struct nilfs_btree_operations - B-tree operation table
38 */
39struct nilfs_btree_operations {
40 __u64 (*btop_find_target)(const struct nilfs_btree *,
41 const struct nilfs_btree_path *, __u64);
42 void (*btop_set_target)(struct nilfs_btree *, __u64, __u64);
43
44 struct the_nilfs *(*btop_get_nilfs)(struct nilfs_btree *);
45
46 int (*btop_propagate)(struct nilfs_btree *,
47 struct nilfs_btree_path *,
48 int,
49 struct buffer_head *);
50 int (*btop_assign)(struct nilfs_btree *,
51 struct nilfs_btree_path *,
52 int,
53 struct buffer_head **,
54 sector_t,
55 union nilfs_binfo *);
56};
57
58/**
59 * struct nilfs_btree_node - B-tree node 37 * struct nilfs_btree_node - B-tree node
60 * @bn_flags: flags 38 * @bn_flags: flags
61 * @bn_level: level 39 * @bn_level: level
@@ -80,13 +58,9 @@ struct nilfs_btree_node {
80/** 58/**
81 * struct nilfs_btree - B-tree structure 59 * struct nilfs_btree - B-tree structure
82 * @bt_bmap: bmap base structure 60 * @bt_bmap: bmap base structure
83 * @bt_ops: B-tree operation table
84 */ 61 */
85struct nilfs_btree { 62struct nilfs_btree {
86 struct nilfs_bmap bt_bmap; 63 struct nilfs_bmap bt_bmap;
87
88 /* B-tree-specific members */
89 const struct nilfs_btree_operations *bt_ops;
90}; 64};
91 65
92 66
@@ -108,10 +82,9 @@ struct nilfs_btree {
108 82
109int nilfs_btree_path_cache_init(void); 83int nilfs_btree_path_cache_init(void);
110void nilfs_btree_path_cache_destroy(void); 84void nilfs_btree_path_cache_destroy(void);
111int nilfs_btree_init(struct nilfs_bmap *, __u64, __u64); 85int nilfs_btree_init(struct nilfs_bmap *);
112int nilfs_btree_convert_and_insert(struct nilfs_bmap *, __u64, __u64, 86int nilfs_btree_convert_and_insert(struct nilfs_bmap *, __u64, __u64,
113 const __u64 *, const __u64 *, 87 const __u64 *, const __u64 *, int);
114 int, __u64, __u64);
115void nilfs_btree_init_gc(struct nilfs_bmap *); 88void nilfs_btree_init_gc(struct nilfs_bmap *);
116 89
117#endif /* _NILFS_BTREE_H */ 90#endif /* _NILFS_BTREE_H */
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index cadd36b14d07..7d49813f66d6 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -295,10 +295,6 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
295 return -EINVAL; 295 return -EINVAL;
296 } 296 }
297 297
298 /* cannot delete the latest checkpoint */
299 if (start == nilfs_mdt_cno(cpfile) - 1)
300 return -EPERM;
301
302 down_write(&NILFS_MDT(cpfile)->mi_sem); 298 down_write(&NILFS_MDT(cpfile)->mi_sem);
303 299
304 ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); 300 ret = nilfs_cpfile_get_header_block(cpfile, &header_bh);
@@ -384,9 +380,10 @@ static void nilfs_cpfile_checkpoint_to_cpinfo(struct inode *cpfile,
384} 380}
385 381
386static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop, 382static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop,
387 struct nilfs_cpinfo *ci, size_t nci) 383 void *buf, unsigned cisz, size_t nci)
388{ 384{
389 struct nilfs_checkpoint *cp; 385 struct nilfs_checkpoint *cp;
386 struct nilfs_cpinfo *ci = buf;
390 struct buffer_head *bh; 387 struct buffer_head *bh;
391 size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size; 388 size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size;
392 __u64 cur_cno = nilfs_mdt_cno(cpfile), cno = *cnop; 389 __u64 cur_cno = nilfs_mdt_cno(cpfile), cno = *cnop;
@@ -410,17 +407,22 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop,
410 kaddr = kmap_atomic(bh->b_page, KM_USER0); 407 kaddr = kmap_atomic(bh->b_page, KM_USER0);
411 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); 408 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr);
412 for (i = 0; i < ncps && n < nci; i++, cp = (void *)cp + cpsz) { 409 for (i = 0; i < ncps && n < nci; i++, cp = (void *)cp + cpsz) {
413 if (!nilfs_checkpoint_invalid(cp)) 410 if (!nilfs_checkpoint_invalid(cp)) {
414 nilfs_cpfile_checkpoint_to_cpinfo( 411 nilfs_cpfile_checkpoint_to_cpinfo(cpfile, cp,
415 cpfile, cp, &ci[n++]); 412 ci);
413 ci = (void *)ci + cisz;
414 n++;
415 }
416 } 416 }
417 kunmap_atomic(kaddr, KM_USER0); 417 kunmap_atomic(kaddr, KM_USER0);
418 brelse(bh); 418 brelse(bh);
419 } 419 }
420 420
421 ret = n; 421 ret = n;
422 if (n > 0) 422 if (n > 0) {
423 *cnop = ci[n - 1].ci_cno + 1; 423 ci = (void *)ci - cisz;
424 *cnop = ci->ci_cno + 1;
425 }
424 426
425 out: 427 out:
426 up_read(&NILFS_MDT(cpfile)->mi_sem); 428 up_read(&NILFS_MDT(cpfile)->mi_sem);
@@ -428,11 +430,12 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop,
428} 430}
429 431
430static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, 432static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
431 struct nilfs_cpinfo *ci, size_t nci) 433 void *buf, unsigned cisz, size_t nci)
432{ 434{
433 struct buffer_head *bh; 435 struct buffer_head *bh;
434 struct nilfs_cpfile_header *header; 436 struct nilfs_cpfile_header *header;
435 struct nilfs_checkpoint *cp; 437 struct nilfs_checkpoint *cp;
438 struct nilfs_cpinfo *ci = buf;
436 __u64 curr = *cnop, next; 439 __u64 curr = *cnop, next;
437 unsigned long curr_blkoff, next_blkoff; 440 unsigned long curr_blkoff, next_blkoff;
438 void *kaddr; 441 void *kaddr;
@@ -472,7 +475,9 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
472 if (unlikely(nilfs_checkpoint_invalid(cp) || 475 if (unlikely(nilfs_checkpoint_invalid(cp) ||
473 !nilfs_checkpoint_snapshot(cp))) 476 !nilfs_checkpoint_snapshot(cp)))
474 break; 477 break;
475 nilfs_cpfile_checkpoint_to_cpinfo(cpfile, cp, &ci[n++]); 478 nilfs_cpfile_checkpoint_to_cpinfo(cpfile, cp, ci);
479 ci = (void *)ci + cisz;
480 n++;
476 next = le64_to_cpu(cp->cp_snapshot_list.ssl_next); 481 next = le64_to_cpu(cp->cp_snapshot_list.ssl_next);
477 if (next == 0) 482 if (next == 0)
478 break; /* reach end of the snapshot list */ 483 break; /* reach end of the snapshot list */
@@ -511,13 +516,13 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
511 */ 516 */
512 517
513ssize_t nilfs_cpfile_get_cpinfo(struct inode *cpfile, __u64 *cnop, int mode, 518ssize_t nilfs_cpfile_get_cpinfo(struct inode *cpfile, __u64 *cnop, int mode,
514 struct nilfs_cpinfo *ci, size_t nci) 519 void *buf, unsigned cisz, size_t nci)
515{ 520{
516 switch (mode) { 521 switch (mode) {
517 case NILFS_CHECKPOINT: 522 case NILFS_CHECKPOINT:
518 return nilfs_cpfile_do_get_cpinfo(cpfile, cnop, ci, nci); 523 return nilfs_cpfile_do_get_cpinfo(cpfile, cnop, buf, cisz, nci);
519 case NILFS_SNAPSHOT: 524 case NILFS_SNAPSHOT:
520 return nilfs_cpfile_do_get_ssinfo(cpfile, cnop, ci, nci); 525 return nilfs_cpfile_do_get_ssinfo(cpfile, cnop, buf, cisz, nci);
521 default: 526 default:
522 return -EINVAL; 527 return -EINVAL;
523 } 528 }
@@ -533,20 +538,14 @@ int nilfs_cpfile_delete_checkpoint(struct inode *cpfile, __u64 cno)
533 struct nilfs_cpinfo ci; 538 struct nilfs_cpinfo ci;
534 __u64 tcno = cno; 539 __u64 tcno = cno;
535 ssize_t nci; 540 ssize_t nci;
536 int ret;
537 541
538 nci = nilfs_cpfile_do_get_cpinfo(cpfile, &tcno, &ci, 1); 542 nci = nilfs_cpfile_do_get_cpinfo(cpfile, &tcno, &ci, sizeof(ci), 1);
539 if (nci < 0) 543 if (nci < 0)
540 return nci; 544 return nci;
541 else if (nci == 0 || ci.ci_cno != cno) 545 else if (nci == 0 || ci.ci_cno != cno)
542 return -ENOENT; 546 return -ENOENT;
543 547 else if (nilfs_cpinfo_snapshot(&ci))
544 /* cannot delete the latest checkpoint nor snapshots */ 548 return -EBUSY;
545 ret = nilfs_cpinfo_snapshot(&ci);
546 if (ret < 0)
547 return ret;
548 else if (ret > 0 || cno == nilfs_mdt_cno(cpfile) - 1)
549 return -EPERM;
550 549
551 return nilfs_cpfile_delete_checkpoints(cpfile, cno, cno + 1); 550 return nilfs_cpfile_delete_checkpoints(cpfile, cno, cno + 1);
552} 551}
diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h
index 1a8a1008c342..788a45950197 100644
--- a/fs/nilfs2/cpfile.h
+++ b/fs/nilfs2/cpfile.h
@@ -39,7 +39,7 @@ int nilfs_cpfile_delete_checkpoint(struct inode *, __u64);
39int nilfs_cpfile_change_cpmode(struct inode *, __u64, int); 39int nilfs_cpfile_change_cpmode(struct inode *, __u64, int);
40int nilfs_cpfile_is_snapshot(struct inode *, __u64); 40int nilfs_cpfile_is_snapshot(struct inode *, __u64);
41int nilfs_cpfile_get_stat(struct inode *, struct nilfs_cpstat *); 41int nilfs_cpfile_get_stat(struct inode *, struct nilfs_cpstat *);
42ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64 *, int, 42ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64 *, int, void *, unsigned,
43 struct nilfs_cpinfo *, size_t); 43 size_t);
44 44
45#endif /* _NILFS_CPFILE_H */ 45#endif /* _NILFS_CPFILE_H */
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index bb8a5818e7f1..0b2710e2d565 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -92,21 +92,6 @@ void nilfs_dat_abort_alloc(struct inode *dat, struct nilfs_palloc_req *req)
92 nilfs_palloc_abort_alloc_entry(dat, req); 92 nilfs_palloc_abort_alloc_entry(dat, req);
93} 93}
94 94
95int nilfs_dat_prepare_free(struct inode *dat, struct nilfs_palloc_req *req)
96{
97 int ret;
98
99 ret = nilfs_palloc_prepare_free_entry(dat, req);
100 if (ret < 0)
101 return ret;
102 ret = nilfs_dat_prepare_entry(dat, req, 0);
103 if (ret < 0) {
104 nilfs_palloc_abort_free_entry(dat, req);
105 return ret;
106 }
107 return 0;
108}
109
110void nilfs_dat_commit_free(struct inode *dat, struct nilfs_palloc_req *req) 95void nilfs_dat_commit_free(struct inode *dat, struct nilfs_palloc_req *req)
111{ 96{
112 struct nilfs_dat_entry *entry; 97 struct nilfs_dat_entry *entry;
@@ -391,36 +376,37 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp)
391 return ret; 376 return ret;
392} 377}
393 378
394ssize_t nilfs_dat_get_vinfo(struct inode *dat, struct nilfs_vinfo *vinfo, 379ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz,
395 size_t nvi) 380 size_t nvi)
396{ 381{
397 struct buffer_head *entry_bh; 382 struct buffer_head *entry_bh;
398 struct nilfs_dat_entry *entry; 383 struct nilfs_dat_entry *entry;
384 struct nilfs_vinfo *vinfo = buf;
399 __u64 first, last; 385 __u64 first, last;
400 void *kaddr; 386 void *kaddr;
401 unsigned long entries_per_block = NILFS_MDT(dat)->mi_entries_per_block; 387 unsigned long entries_per_block = NILFS_MDT(dat)->mi_entries_per_block;
402 int i, j, n, ret; 388 int i, j, n, ret;
403 389
404 for (i = 0; i < nvi; i += n) { 390 for (i = 0; i < nvi; i += n) {
405 ret = nilfs_palloc_get_entry_block(dat, vinfo[i].vi_vblocknr, 391 ret = nilfs_palloc_get_entry_block(dat, vinfo->vi_vblocknr,
406 0, &entry_bh); 392 0, &entry_bh);
407 if (ret < 0) 393 if (ret < 0)
408 return ret; 394 return ret;
409 kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); 395 kaddr = kmap_atomic(entry_bh->b_page, KM_USER0);
410 /* last virtual block number in this block */ 396 /* last virtual block number in this block */
411 first = vinfo[i].vi_vblocknr; 397 first = vinfo->vi_vblocknr;
412 do_div(first, entries_per_block); 398 do_div(first, entries_per_block);
413 first *= entries_per_block; 399 first *= entries_per_block;
414 last = first + entries_per_block - 1; 400 last = first + entries_per_block - 1;
415 for (j = i, n = 0; 401 for (j = i, n = 0;
416 j < nvi && vinfo[j].vi_vblocknr >= first && 402 j < nvi && vinfo->vi_vblocknr >= first &&
417 vinfo[j].vi_vblocknr <= last; 403 vinfo->vi_vblocknr <= last;
418 j++, n++) { 404 j++, n++, vinfo = (void *)vinfo + visz) {
419 entry = nilfs_palloc_block_get_entry( 405 entry = nilfs_palloc_block_get_entry(
420 dat, vinfo[j].vi_vblocknr, entry_bh, kaddr); 406 dat, vinfo->vi_vblocknr, entry_bh, kaddr);
421 vinfo[j].vi_start = le64_to_cpu(entry->de_start); 407 vinfo->vi_start = le64_to_cpu(entry->de_start);
422 vinfo[j].vi_end = le64_to_cpu(entry->de_end); 408 vinfo->vi_end = le64_to_cpu(entry->de_end);
423 vinfo[j].vi_blocknr = le64_to_cpu(entry->de_blocknr); 409 vinfo->vi_blocknr = le64_to_cpu(entry->de_blocknr);
424 } 410 }
425 kunmap_atomic(kaddr, KM_USER0); 411 kunmap_atomic(kaddr, KM_USER0);
426 brelse(entry_bh); 412 brelse(entry_bh);
diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h
index d9560654a4b7..d328b81eead4 100644
--- a/fs/nilfs2/dat.h
+++ b/fs/nilfs2/dat.h
@@ -47,6 +47,6 @@ void nilfs_dat_abort_end(struct inode *, struct nilfs_palloc_req *);
47int nilfs_dat_mark_dirty(struct inode *, __u64); 47int nilfs_dat_mark_dirty(struct inode *, __u64);
48int nilfs_dat_freev(struct inode *, __u64 *, size_t); 48int nilfs_dat_freev(struct inode *, __u64 *, size_t);
49int nilfs_dat_move(struct inode *, __u64, sector_t); 49int nilfs_dat_move(struct inode *, __u64, sector_t);
50ssize_t nilfs_dat_get_vinfo(struct inode *, struct nilfs_vinfo *, size_t); 50ssize_t nilfs_dat_get_vinfo(struct inode *, void *, unsigned, size_t);
51 51
52#endif /* _NILFS_DAT_H */ 52#endif /* _NILFS_DAT_H */
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
index c6379e482781..342d9765df8d 100644
--- a/fs/nilfs2/direct.c
+++ b/fs/nilfs2/direct.c
@@ -25,6 +25,7 @@
25#include "page.h" 25#include "page.h"
26#include "direct.h" 26#include "direct.h"
27#include "alloc.h" 27#include "alloc.h"
28#include "dat.h"
28 29
29static inline __le64 *nilfs_direct_dptrs(const struct nilfs_direct *direct) 30static inline __le64 *nilfs_direct_dptrs(const struct nilfs_direct *direct)
30{ 31{
@@ -62,6 +63,47 @@ static int nilfs_direct_lookup(const struct nilfs_bmap *bmap,
62 return 0; 63 return 0;
63} 64}
64 65
66static int nilfs_direct_lookup_contig(const struct nilfs_bmap *bmap,
67 __u64 key, __u64 *ptrp,
68 unsigned maxblocks)
69{
70 struct nilfs_direct *direct = (struct nilfs_direct *)bmap;
71 struct inode *dat = NULL;
72 __u64 ptr, ptr2;
73 sector_t blocknr;
74 int ret, cnt;
75
76 if (key > NILFS_DIRECT_KEY_MAX ||
77 (ptr = nilfs_direct_get_ptr(direct, key)) ==
78 NILFS_BMAP_INVALID_PTR)
79 return -ENOENT;
80
81 if (NILFS_BMAP_USE_VBN(bmap)) {
82 dat = nilfs_bmap_get_dat(bmap);
83 ret = nilfs_dat_translate(dat, ptr, &blocknr);
84 if (ret < 0)
85 return ret;
86 ptr = blocknr;
87 }
88
89 maxblocks = min_t(unsigned, maxblocks, NILFS_DIRECT_KEY_MAX - key + 1);
90 for (cnt = 1; cnt < maxblocks &&
91 (ptr2 = nilfs_direct_get_ptr(direct, key + cnt)) !=
92 NILFS_BMAP_INVALID_PTR;
93 cnt++) {
94 if (dat) {
95 ret = nilfs_dat_translate(dat, ptr2, &blocknr);
96 if (ret < 0)
97 return ret;
98 ptr2 = blocknr;
99 }
100 if (ptr2 != ptr + cnt)
101 break;
102 }
103 *ptrp = ptr;
104 return cnt;
105}
106
65static __u64 107static __u64
66nilfs_direct_find_target_v(const struct nilfs_direct *direct, __u64 key) 108nilfs_direct_find_target_v(const struct nilfs_direct *direct, __u64 key)
67{ 109{
@@ -90,10 +132,9 @@ static int nilfs_direct_prepare_insert(struct nilfs_direct *direct,
90{ 132{
91 int ret; 133 int ret;
92 134
93 if (direct->d_ops->dop_find_target != NULL) 135 if (NILFS_BMAP_USE_VBN(&direct->d_bmap))
94 req->bpr_ptr = direct->d_ops->dop_find_target(direct, key); 136 req->bpr_ptr = nilfs_direct_find_target_v(direct, key);
95 ret = direct->d_bmap.b_pops->bpop_prepare_alloc_ptr(&direct->d_bmap, 137 ret = nilfs_bmap_prepare_alloc_ptr(&direct->d_bmap, req);
96 req);
97 if (ret < 0) 138 if (ret < 0)
98 return ret; 139 return ret;
99 140
@@ -111,16 +152,14 @@ static void nilfs_direct_commit_insert(struct nilfs_direct *direct,
111 bh = (struct buffer_head *)((unsigned long)ptr); 152 bh = (struct buffer_head *)((unsigned long)ptr);
112 set_buffer_nilfs_volatile(bh); 153 set_buffer_nilfs_volatile(bh);
113 154
114 if (direct->d_bmap.b_pops->bpop_commit_alloc_ptr != NULL) 155 nilfs_bmap_commit_alloc_ptr(&direct->d_bmap, req);
115 direct->d_bmap.b_pops->bpop_commit_alloc_ptr(
116 &direct->d_bmap, req);
117 nilfs_direct_set_ptr(direct, key, req->bpr_ptr); 156 nilfs_direct_set_ptr(direct, key, req->bpr_ptr);
118 157
119 if (!nilfs_bmap_dirty(&direct->d_bmap)) 158 if (!nilfs_bmap_dirty(&direct->d_bmap))
120 nilfs_bmap_set_dirty(&direct->d_bmap); 159 nilfs_bmap_set_dirty(&direct->d_bmap);
121 160
122 if (direct->d_ops->dop_set_target != NULL) 161 if (NILFS_BMAP_USE_VBN(&direct->d_bmap))
123 direct->d_ops->dop_set_target(direct, key, req->bpr_ptr); 162 nilfs_direct_set_target_v(direct, key, req->bpr_ptr);
124} 163}
125 164
126static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) 165static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
@@ -152,25 +191,18 @@ static int nilfs_direct_prepare_delete(struct nilfs_direct *direct,
152{ 191{
153 int ret; 192 int ret;
154 193
155 if (direct->d_bmap.b_pops->bpop_prepare_end_ptr != NULL) { 194 req->bpr_ptr = nilfs_direct_get_ptr(direct, key);
156 req->bpr_ptr = nilfs_direct_get_ptr(direct, key); 195 ret = nilfs_bmap_prepare_end_ptr(&direct->d_bmap, req);
157 ret = direct->d_bmap.b_pops->bpop_prepare_end_ptr( 196 if (!ret)
158 &direct->d_bmap, req); 197 stats->bs_nblocks = 1;
159 if (ret < 0) 198 return ret;
160 return ret;
161 }
162
163 stats->bs_nblocks = 1;
164 return 0;
165} 199}
166 200
167static void nilfs_direct_commit_delete(struct nilfs_direct *direct, 201static void nilfs_direct_commit_delete(struct nilfs_direct *direct,
168 union nilfs_bmap_ptr_req *req, 202 union nilfs_bmap_ptr_req *req,
169 __u64 key) 203 __u64 key)
170{ 204{
171 if (direct->d_bmap.b_pops->bpop_commit_end_ptr != NULL) 205 nilfs_bmap_commit_end_ptr(&direct->d_bmap, req);
172 direct->d_bmap.b_pops->bpop_commit_end_ptr(
173 &direct->d_bmap, req);
174 nilfs_direct_set_ptr(direct, key, NILFS_BMAP_INVALID_PTR); 206 nilfs_direct_set_ptr(direct, key, NILFS_BMAP_INVALID_PTR);
175} 207}
176 208
@@ -244,8 +276,7 @@ static int nilfs_direct_gather_data(struct nilfs_bmap *bmap,
244} 276}
245 277
246int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap, 278int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap,
247 __u64 key, __u64 *keys, __u64 *ptrs, 279 __u64 key, __u64 *keys, __u64 *ptrs, int n)
248 int n, __u64 low, __u64 high)
249{ 280{
250 struct nilfs_direct *direct; 281 struct nilfs_direct *direct;
251 __le64 *dptrs; 282 __le64 *dptrs;
@@ -275,8 +306,7 @@ int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap,
275 dptrs[i] = NILFS_BMAP_INVALID_PTR; 306 dptrs[i] = NILFS_BMAP_INVALID_PTR;
276 } 307 }
277 308
278 nilfs_direct_init(bmap, low, high); 309 nilfs_direct_init(bmap);
279
280 return 0; 310 return 0;
281} 311}
282 312
@@ -293,11 +323,11 @@ static int nilfs_direct_propagate_v(struct nilfs_direct *direct,
293 if (!buffer_nilfs_volatile(bh)) { 323 if (!buffer_nilfs_volatile(bh)) {
294 oldreq.bpr_ptr = ptr; 324 oldreq.bpr_ptr = ptr;
295 newreq.bpr_ptr = ptr; 325 newreq.bpr_ptr = ptr;
296 ret = nilfs_bmap_prepare_update(&direct->d_bmap, &oldreq, 326 ret = nilfs_bmap_prepare_update_v(&direct->d_bmap, &oldreq,
297 &newreq); 327 &newreq);
298 if (ret < 0) 328 if (ret < 0)
299 return ret; 329 return ret;
300 nilfs_bmap_commit_update(&direct->d_bmap, &oldreq, &newreq); 330 nilfs_bmap_commit_update_v(&direct->d_bmap, &oldreq, &newreq);
301 set_buffer_nilfs_volatile(bh); 331 set_buffer_nilfs_volatile(bh);
302 nilfs_direct_set_ptr(direct, key, newreq.bpr_ptr); 332 nilfs_direct_set_ptr(direct, key, newreq.bpr_ptr);
303 } else 333 } else
@@ -309,12 +339,10 @@ static int nilfs_direct_propagate_v(struct nilfs_direct *direct,
309static int nilfs_direct_propagate(const struct nilfs_bmap *bmap, 339static int nilfs_direct_propagate(const struct nilfs_bmap *bmap,
310 struct buffer_head *bh) 340 struct buffer_head *bh)
311{ 341{
312 struct nilfs_direct *direct; 342 struct nilfs_direct *direct = (struct nilfs_direct *)bmap;
313 343
314 direct = (struct nilfs_direct *)bmap; 344 return NILFS_BMAP_USE_VBN(bmap) ?
315 return (direct->d_ops->dop_propagate != NULL) ? 345 nilfs_direct_propagate_v(direct, bh) : 0;
316 direct->d_ops->dop_propagate(direct, bh) :
317 0;
318} 346}
319 347
320static int nilfs_direct_assign_v(struct nilfs_direct *direct, 348static int nilfs_direct_assign_v(struct nilfs_direct *direct,
@@ -327,12 +355,9 @@ static int nilfs_direct_assign_v(struct nilfs_direct *direct,
327 int ret; 355 int ret;
328 356
329 req.bpr_ptr = ptr; 357 req.bpr_ptr = ptr;
330 ret = direct->d_bmap.b_pops->bpop_prepare_start_ptr( 358 ret = nilfs_bmap_start_v(&direct->d_bmap, &req, blocknr);
331 &direct->d_bmap, &req); 359 if (unlikely(ret < 0))
332 if (ret < 0)
333 return ret; 360 return ret;
334 direct->d_bmap.b_pops->bpop_commit_start_ptr(&direct->d_bmap,
335 &req, blocknr);
336 361
337 binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr); 362 binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
338 binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); 363 binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
@@ -377,12 +402,14 @@ static int nilfs_direct_assign(struct nilfs_bmap *bmap,
377 return -EINVAL; 402 return -EINVAL;
378 } 403 }
379 404
380 return direct->d_ops->dop_assign(direct, key, ptr, bh, 405 return NILFS_BMAP_USE_VBN(bmap) ?
381 blocknr, binfo); 406 nilfs_direct_assign_v(direct, key, ptr, bh, blocknr, binfo) :
407 nilfs_direct_assign_p(direct, key, ptr, bh, blocknr, binfo);
382} 408}
383 409
384static const struct nilfs_bmap_operations nilfs_direct_ops = { 410static const struct nilfs_bmap_operations nilfs_direct_ops = {
385 .bop_lookup = nilfs_direct_lookup, 411 .bop_lookup = nilfs_direct_lookup,
412 .bop_lookup_contig = nilfs_direct_lookup_contig,
386 .bop_insert = nilfs_direct_insert, 413 .bop_insert = nilfs_direct_insert,
387 .bop_delete = nilfs_direct_delete, 414 .bop_delete = nilfs_direct_delete,
388 .bop_clear = NULL, 415 .bop_clear = NULL,
@@ -401,36 +428,8 @@ static const struct nilfs_bmap_operations nilfs_direct_ops = {
401}; 428};
402 429
403 430
404static const struct nilfs_direct_operations nilfs_direct_ops_v = { 431int nilfs_direct_init(struct nilfs_bmap *bmap)
405 .dop_find_target = nilfs_direct_find_target_v,
406 .dop_set_target = nilfs_direct_set_target_v,
407 .dop_propagate = nilfs_direct_propagate_v,
408 .dop_assign = nilfs_direct_assign_v,
409};
410
411static const struct nilfs_direct_operations nilfs_direct_ops_p = {
412 .dop_find_target = NULL,
413 .dop_set_target = NULL,
414 .dop_propagate = NULL,
415 .dop_assign = nilfs_direct_assign_p,
416};
417
418int nilfs_direct_init(struct nilfs_bmap *bmap, __u64 low, __u64 high)
419{ 432{
420 struct nilfs_direct *direct;
421
422 direct = (struct nilfs_direct *)bmap;
423 bmap->b_ops = &nilfs_direct_ops; 433 bmap->b_ops = &nilfs_direct_ops;
424 bmap->b_low = low;
425 bmap->b_high = high;
426 switch (bmap->b_inode->i_ino) {
427 case NILFS_DAT_INO:
428 direct->d_ops = &nilfs_direct_ops_p;
429 break;
430 default:
431 direct->d_ops = &nilfs_direct_ops_v;
432 break;
433 }
434
435 return 0; 434 return 0;
436} 435}
diff --git a/fs/nilfs2/direct.h b/fs/nilfs2/direct.h
index 45d2c5cda812..a5ffd66e25d0 100644
--- a/fs/nilfs2/direct.h
+++ b/fs/nilfs2/direct.h
@@ -31,18 +31,6 @@
31struct nilfs_direct; 31struct nilfs_direct;
32 32
33/** 33/**
34 * struct nilfs_direct_operations - direct mapping operation table
35 */
36struct nilfs_direct_operations {
37 __u64 (*dop_find_target)(const struct nilfs_direct *, __u64);
38 void (*dop_set_target)(struct nilfs_direct *, __u64, __u64);
39 int (*dop_propagate)(struct nilfs_direct *, struct buffer_head *);
40 int (*dop_assign)(struct nilfs_direct *, __u64, __u64,
41 struct buffer_head **, sector_t,
42 union nilfs_binfo *);
43};
44
45/**
46 * struct nilfs_direct_node - direct node 34 * struct nilfs_direct_node - direct node
47 * @dn_flags: flags 35 * @dn_flags: flags
48 * @dn_pad: padding 36 * @dn_pad: padding
@@ -55,13 +43,9 @@ struct nilfs_direct_node {
55/** 43/**
56 * struct nilfs_direct - direct mapping 44 * struct nilfs_direct - direct mapping
57 * @d_bmap: bmap structure 45 * @d_bmap: bmap structure
58 * @d_ops: direct mapping operation table
59 */ 46 */
60struct nilfs_direct { 47struct nilfs_direct {
61 struct nilfs_bmap d_bmap; 48 struct nilfs_bmap d_bmap;
62
63 /* direct-mapping-specific members */
64 const struct nilfs_direct_operations *d_ops;
65}; 49};
66 50
67 51
@@ -70,9 +54,9 @@ struct nilfs_direct {
70#define NILFS_DIRECT_KEY_MAX (NILFS_DIRECT_NBLOCKS - 1) 54#define NILFS_DIRECT_KEY_MAX (NILFS_DIRECT_NBLOCKS - 1)
71 55
72 56
73int nilfs_direct_init(struct nilfs_bmap *, __u64, __u64); 57int nilfs_direct_init(struct nilfs_bmap *);
74int nilfs_direct_delete_and_convert(struct nilfs_bmap *, __u64, __u64 *, 58int nilfs_direct_delete_and_convert(struct nilfs_bmap *, __u64, __u64 *,
75 __u64 *, int, __u64, __u64); 59 __u64 *, int);
76 60
77 61
78#endif /* _NILFS_DIRECT_H */ 62#endif /* _NILFS_DIRECT_H */
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 19d2102b6a69..1b3c2bb20da9 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -52,8 +52,9 @@
52#include "dat.h" 52#include "dat.h"
53#include "ifile.h" 53#include "ifile.h"
54 54
55static struct address_space_operations def_gcinode_aops = {}; 55static struct address_space_operations def_gcinode_aops = {
56/* XXX need def_gcinode_iops/fops? */ 56 .sync_page = block_sync_page,
57};
57 58
58/* 59/*
59 * nilfs_gccache_submit_read_data() - add data buffer and submit read request 60 * nilfs_gccache_submit_read_data() - add data buffer and submit read request
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 49ab4a49bb4f..fe9d8f2a13f8 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -43,22 +43,23 @@
43 * 43 *
44 * This function does not issue actual read request of the specified data 44 * This function does not issue actual read request of the specified data
45 * block. It is done by VFS. 45 * block. It is done by VFS.
46 * Bulk read for direct-io is not supported yet. (should be supported)
47 */ 46 */
48int nilfs_get_block(struct inode *inode, sector_t blkoff, 47int nilfs_get_block(struct inode *inode, sector_t blkoff,
49 struct buffer_head *bh_result, int create) 48 struct buffer_head *bh_result, int create)
50{ 49{
51 struct nilfs_inode_info *ii = NILFS_I(inode); 50 struct nilfs_inode_info *ii = NILFS_I(inode);
52 unsigned long blknum = 0; 51 __u64 blknum = 0;
53 int err = 0, ret; 52 int err = 0, ret;
54 struct inode *dat = nilfs_dat_inode(NILFS_I_NILFS(inode)); 53 struct inode *dat = nilfs_dat_inode(NILFS_I_NILFS(inode));
54 unsigned maxblocks = bh_result->b_size >> inode->i_blkbits;
55 55
56 /* This exclusion control is a workaround; should be revised */ 56 down_read(&NILFS_MDT(dat)->mi_sem);
57 down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ 57 ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks);
58 ret = nilfs_bmap_lookup(ii->i_bmap, (unsigned long)blkoff, &blknum); 58 up_read(&NILFS_MDT(dat)->mi_sem);
59 up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ 59 if (ret >= 0) { /* found */
60 if (ret == 0) { /* found */
61 map_bh(bh_result, inode->i_sb, blknum); 60 map_bh(bh_result, inode->i_sb, blknum);
61 if (ret > 0)
62 bh_result->b_size = (ret << inode->i_blkbits);
62 goto out; 63 goto out;
63 } 64 }
64 /* data block was not found */ 65 /* data block was not found */
@@ -240,7 +241,7 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
240struct address_space_operations nilfs_aops = { 241struct address_space_operations nilfs_aops = {
241 .writepage = nilfs_writepage, 242 .writepage = nilfs_writepage,
242 .readpage = nilfs_readpage, 243 .readpage = nilfs_readpage,
243 /* .sync_page = nilfs_sync_page, */ 244 .sync_page = block_sync_page,
244 .writepages = nilfs_writepages, 245 .writepages = nilfs_writepages,
245 .set_page_dirty = nilfs_set_page_dirty, 246 .set_page_dirty = nilfs_set_page_dirty,
246 .readpages = nilfs_readpages, 247 .readpages = nilfs_readpages,
@@ -249,6 +250,7 @@ struct address_space_operations nilfs_aops = {
249 /* .releasepage = nilfs_releasepage, */ 250 /* .releasepage = nilfs_releasepage, */
250 .invalidatepage = block_invalidatepage, 251 .invalidatepage = block_invalidatepage,
251 .direct_IO = nilfs_direct_IO, 252 .direct_IO = nilfs_direct_IO,
253 .is_partially_uptodate = block_is_partially_uptodate,
252}; 254};
253 255
254struct inode *nilfs_new_inode(struct inode *dir, int mode) 256struct inode *nilfs_new_inode(struct inode *dir, int mode)
@@ -307,10 +309,6 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode)
307 /* ii->i_file_acl = 0; */ 309 /* ii->i_file_acl = 0; */
308 /* ii->i_dir_acl = 0; */ 310 /* ii->i_dir_acl = 0; */
309 ii->i_dir_start_lookup = 0; 311 ii->i_dir_start_lookup = 0;
310#ifdef CONFIG_NILFS_FS_POSIX_ACL
311 ii->i_acl = NULL;
312 ii->i_default_acl = NULL;
313#endif
314 ii->i_cno = 0; 312 ii->i_cno = 0;
315 nilfs_set_inode_flags(inode); 313 nilfs_set_inode_flags(inode);
316 spin_lock(&sbi->s_next_gen_lock); 314 spin_lock(&sbi->s_next_gen_lock);
@@ -432,10 +430,6 @@ static int __nilfs_read_inode(struct super_block *sb, unsigned long ino,
432 430
433 raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, bh); 431 raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, bh);
434 432
435#ifdef CONFIG_NILFS_FS_POSIX_ACL
436 ii->i_acl = NILFS_ACL_NOT_CACHED;
437 ii->i_default_acl = NILFS_ACL_NOT_CACHED;
438#endif
439 if (nilfs_read_inode_common(inode, raw_inode)) 433 if (nilfs_read_inode_common(inode, raw_inode))
440 goto failed_unmap; 434 goto failed_unmap;
441 435
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index d6759b92006f..6ea5f872e2de 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -152,7 +152,7 @@ nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
152 152
153 down_read(&nilfs->ns_segctor_sem); 153 down_read(&nilfs->ns_segctor_sem);
154 ret = nilfs_cpfile_get_cpinfo(nilfs->ns_cpfile, posp, flags, buf, 154 ret = nilfs_cpfile_get_cpinfo(nilfs->ns_cpfile, posp, flags, buf,
155 nmembs); 155 size, nmembs);
156 up_read(&nilfs->ns_segctor_sem); 156 up_read(&nilfs->ns_segctor_sem);
157 return ret; 157 return ret;
158} 158}
@@ -182,7 +182,8 @@ nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
182 int ret; 182 int ret;
183 183
184 down_read(&nilfs->ns_segctor_sem); 184 down_read(&nilfs->ns_segctor_sem);
185 ret = nilfs_sufile_get_suinfo(nilfs->ns_sufile, *posp, buf, nmembs); 185 ret = nilfs_sufile_get_suinfo(nilfs->ns_sufile, *posp, buf, size,
186 nmembs);
186 up_read(&nilfs->ns_segctor_sem); 187 up_read(&nilfs->ns_segctor_sem);
187 return ret; 188 return ret;
188} 189}
@@ -212,7 +213,7 @@ nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
212 int ret; 213 int ret;
213 214
214 down_read(&nilfs->ns_segctor_sem); 215 down_read(&nilfs->ns_segctor_sem);
215 ret = nilfs_dat_get_vinfo(nilfs_dat_inode(nilfs), buf, nmembs); 216 ret = nilfs_dat_get_vinfo(nilfs_dat_inode(nilfs), buf, size, nmembs);
216 up_read(&nilfs->ns_segctor_sem); 217 up_read(&nilfs->ns_segctor_sem);
217 return ret; 218 return ret;
218} 219}
@@ -435,24 +436,6 @@ static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs,
435 return nmembs; 436 return nmembs;
436} 437}
437 438
438static int nilfs_ioctl_free_segments(struct the_nilfs *nilfs,
439 struct nilfs_argv *argv, void *buf)
440{
441 size_t nmembs = argv->v_nmembs;
442 struct nilfs_sb_info *sbi = nilfs->ns_writer;
443 int ret;
444
445 if (unlikely(!sbi)) {
446 /* never happens because called for a writable mount */
447 WARN_ON(1);
448 return -EROFS;
449 }
450 ret = nilfs_segctor_add_segments_to_be_freed(
451 NILFS_SC(sbi), buf, nmembs);
452
453 return (ret < 0) ? ret : nmembs;
454}
455
456int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, 439int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs,
457 struct nilfs_argv *argv, void **kbufs) 440 struct nilfs_argv *argv, void **kbufs)
458{ 441{
@@ -491,14 +474,6 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs,
491 msg = "cannot mark copying blocks dirty"; 474 msg = "cannot mark copying blocks dirty";
492 goto failed; 475 goto failed;
493 } 476 }
494 ret = nilfs_ioctl_free_segments(nilfs, &argv[4], kbufs[4]);
495 if (ret < 0) {
496 /*
497 * can safely abort because this operation is atomic.
498 */
499 msg = "cannot set segments to be freed";
500 goto failed;
501 }
502 return 0; 477 return 0;
503 478
504 failed: 479 failed:
@@ -615,7 +590,7 @@ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp,
615 if (copy_from_user(&argv, argp, sizeof(argv))) 590 if (copy_from_user(&argv, argp, sizeof(argv)))
616 return -EFAULT; 591 return -EFAULT;
617 592
618 if (argv.v_size != membsz) 593 if (argv.v_size < membsz)
619 return -EINVAL; 594 return -EINVAL;
620 595
621 ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), dofunc); 596 ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), dofunc);
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index bb78745a0e30..3d3ddb3f5177 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -430,6 +430,7 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
430 430
431static struct address_space_operations def_mdt_aops = { 431static struct address_space_operations def_mdt_aops = {
432 .writepage = nilfs_mdt_write_page, 432 .writepage = nilfs_mdt_write_page,
433 .sync_page = block_sync_page,
433}; 434};
434 435
435static struct inode_operations def_mdt_iops; 436static struct inode_operations def_mdt_iops;
@@ -449,7 +450,7 @@ struct inode *
449nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb, 450nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb,
450 ino_t ino, gfp_t gfp_mask) 451 ino_t ino, gfp_t gfp_mask)
451{ 452{
452 struct inode *inode = nilfs_alloc_inode(sb); 453 struct inode *inode = nilfs_alloc_inode_common(nilfs);
453 454
454 if (!inode) 455 if (!inode)
455 return NULL; 456 return NULL;
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index da6fc0bba2e5..724c63766e82 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -58,10 +58,6 @@ struct nilfs_inode_info {
58 */ 58 */
59 struct rw_semaphore xattr_sem; 59 struct rw_semaphore xattr_sem;
60#endif 60#endif
61#ifdef CONFIG_NILFS_POSIX_ACL
62 struct posix_acl *i_acl;
63 struct posix_acl *i_default_acl;
64#endif
65 struct buffer_head *i_bh; /* i_bh contains a new or dirty 61 struct buffer_head *i_bh; /* i_bh contains a new or dirty
66 disk inode */ 62 disk inode */
67 struct inode vfs_inode; 63 struct inode vfs_inode;
@@ -263,6 +259,7 @@ extern void nilfs_dirty_inode(struct inode *);
263extern struct dentry *nilfs_get_parent(struct dentry *); 259extern struct dentry *nilfs_get_parent(struct dentry *);
264 260
265/* super.c */ 261/* super.c */
262extern struct inode *nilfs_alloc_inode_common(struct the_nilfs *);
266extern struct inode *nilfs_alloc_inode(struct super_block *); 263extern struct inode *nilfs_alloc_inode(struct super_block *);
267extern void nilfs_destroy_inode(struct inode *); 264extern void nilfs_destroy_inode(struct inode *);
268extern void nilfs_error(struct super_block *, const char *, const char *, ...) 265extern void nilfs_error(struct super_block *, const char *, const char *, ...)
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 57afa9d24061..d80cc71be749 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -28,7 +28,6 @@
28#include "segment.h" 28#include "segment.h"
29#include "sufile.h" 29#include "sufile.h"
30#include "page.h" 30#include "page.h"
31#include "seglist.h"
32#include "segbuf.h" 31#include "segbuf.h"
33 32
34/* 33/*
@@ -395,6 +394,24 @@ static void dispose_recovery_list(struct list_head *head)
395 } 394 }
396} 395}
397 396
397struct nilfs_segment_entry {
398 struct list_head list;
399 __u64 segnum;
400};
401
402static int nilfs_segment_list_add(struct list_head *head, __u64 segnum)
403{
404 struct nilfs_segment_entry *ent = kmalloc(sizeof(*ent), GFP_NOFS);
405
406 if (unlikely(!ent))
407 return -ENOMEM;
408
409 ent->segnum = segnum;
410 INIT_LIST_HEAD(&ent->list);
411 list_add_tail(&ent->list, head);
412 return 0;
413}
414
398void nilfs_dispose_segment_list(struct list_head *head) 415void nilfs_dispose_segment_list(struct list_head *head)
399{ 416{
400 while (!list_empty(head)) { 417 while (!list_empty(head)) {
@@ -402,7 +419,7 @@ void nilfs_dispose_segment_list(struct list_head *head)
402 = list_entry(head->next, 419 = list_entry(head->next,
403 struct nilfs_segment_entry, list); 420 struct nilfs_segment_entry, list);
404 list_del(&ent->list); 421 list_del(&ent->list);
405 nilfs_free_segment_entry(ent); 422 kfree(ent);
406 } 423 }
407} 424}
408 425
@@ -431,12 +448,10 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
431 if (unlikely(err)) 448 if (unlikely(err))
432 goto failed; 449 goto failed;
433 450
434 err = -ENOMEM;
435 for (i = 1; i < 4; i++) { 451 for (i = 1; i < 4; i++) {
436 ent = nilfs_alloc_segment_entry(segnum[i]); 452 err = nilfs_segment_list_add(head, segnum[i]);
437 if (unlikely(!ent)) 453 if (unlikely(err))
438 goto failed; 454 goto failed;
439 list_add_tail(&ent->list, head);
440 } 455 }
441 456
442 /* 457 /*
@@ -450,7 +465,7 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
450 goto failed; 465 goto failed;
451 } 466 }
452 list_del(&ent->list); 467 list_del(&ent->list);
453 nilfs_free_segment_entry(ent); 468 kfree(ent);
454 } 469 }
455 470
456 /* Allocate new segments for recovery */ 471 /* Allocate new segments for recovery */
@@ -791,7 +806,6 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
791 u64 seg_seq; 806 u64 seg_seq;
792 __u64 segnum, nextnum = 0; 807 __u64 segnum, nextnum = 0;
793 __u64 cno; 808 __u64 cno;
794 struct nilfs_segment_entry *ent;
795 LIST_HEAD(segments); 809 LIST_HEAD(segments);
796 int empty_seg = 0, scan_newer = 0; 810 int empty_seg = 0, scan_newer = 0;
797 int ret; 811 int ret;
@@ -892,12 +906,9 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
892 if (empty_seg++) 906 if (empty_seg++)
893 goto super_root_found; /* found a valid super root */ 907 goto super_root_found; /* found a valid super root */
894 908
895 ent = nilfs_alloc_segment_entry(segnum); 909 ret = nilfs_segment_list_add(&segments, segnum);
896 if (unlikely(!ent)) { 910 if (unlikely(ret))
897 ret = -ENOMEM;
898 goto failed; 911 goto failed;
899 }
900 list_add_tail(&ent->list, &segments);
901 912
902 seg_seq++; 913 seg_seq++;
903 segnum = nextnum; 914 segnum = nextnum;
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 1e68821b4a9b..9e3fe17bb96b 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -26,7 +26,6 @@
26#include <linux/crc32.h> 26#include <linux/crc32.h>
27#include "page.h" 27#include "page.h"
28#include "segbuf.h" 28#include "segbuf.h"
29#include "seglist.h"
30 29
31 30
32static struct kmem_cache *nilfs_segbuf_cachep; 31static struct kmem_cache *nilfs_segbuf_cachep;
@@ -394,7 +393,7 @@ int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
394 * Last BIO is always sent through the following 393 * Last BIO is always sent through the following
395 * submission. 394 * submission.
396 */ 395 */
397 rw |= (1 << BIO_RW_SYNCIO); 396 rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
398 res = nilfs_submit_seg_bio(wi, rw); 397 res = nilfs_submit_seg_bio(wi, rw);
399 if (unlikely(res)) 398 if (unlikely(res))
400 goto failed_bio; 399 goto failed_bio;
diff --git a/fs/nilfs2/seglist.h b/fs/nilfs2/seglist.h
deleted file mode 100644
index d39df9144e99..000000000000
--- a/fs/nilfs2/seglist.h
+++ /dev/null
@@ -1,85 +0,0 @@
1/*
2 * seglist.h - expediential structure and routines to handle list of segments
3 * (would be removed in a future release)
4 *
5 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 *
21 * Written by Ryusuke Konishi <ryusuke@osrg.net>
22 *
23 */
24#ifndef _NILFS_SEGLIST_H
25#define _NILFS_SEGLIST_H
26
27#include <linux/fs.h>
28#include <linux/buffer_head.h>
29#include <linux/nilfs2_fs.h>
30#include "sufile.h"
31
32struct nilfs_segment_entry {
33 __u64 segnum;
34
35#define NILFS_SLH_FREED 0x0001 /* The segment was freed provisonally.
36 It must be cancelled if
37 construction aborted */
38
39 unsigned flags;
40 struct list_head list;
41 struct buffer_head *bh_su;
42 struct nilfs_segment_usage *raw_su;
43};
44
45
46void nilfs_dispose_segment_list(struct list_head *);
47
48static inline struct nilfs_segment_entry *
49nilfs_alloc_segment_entry(__u64 segnum)
50{
51 struct nilfs_segment_entry *ent = kmalloc(sizeof(*ent), GFP_NOFS);
52
53 if (likely(ent)) {
54 ent->segnum = segnum;
55 ent->flags = 0;
56 ent->bh_su = NULL;
57 ent->raw_su = NULL;
58 INIT_LIST_HEAD(&ent->list);
59 }
60 return ent;
61}
62
63static inline int nilfs_open_segment_entry(struct nilfs_segment_entry *ent,
64 struct inode *sufile)
65{
66 return nilfs_sufile_get_segment_usage(sufile, ent->segnum,
67 &ent->raw_su, &ent->bh_su);
68}
69
70static inline void nilfs_close_segment_entry(struct nilfs_segment_entry *ent,
71 struct inode *sufile)
72{
73 if (!ent->bh_su)
74 return;
75 nilfs_sufile_put_segment_usage(sufile, ent->segnum, ent->bh_su);
76 ent->bh_su = NULL;
77 ent->raw_su = NULL;
78}
79
80static inline void nilfs_free_segment_entry(struct nilfs_segment_entry *ent)
81{
82 kfree(ent);
83}
84
85#endif /* _NILFS_SEGLIST_H */
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 22c7f65c2403..aa977549919e 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -39,7 +39,6 @@
39#include "sufile.h" 39#include "sufile.h"
40#include "cpfile.h" 40#include "cpfile.h"
41#include "ifile.h" 41#include "ifile.h"
42#include "seglist.h"
43#include "segbuf.h" 42#include "segbuf.h"
44 43
45 44
@@ -79,7 +78,8 @@ enum {
79/* State flags of collection */ 78/* State flags of collection */
80#define NILFS_CF_NODE 0x0001 /* Collecting node blocks */ 79#define NILFS_CF_NODE 0x0001 /* Collecting node blocks */
81#define NILFS_CF_IFILE_STARTED 0x0002 /* IFILE stage has started */ 80#define NILFS_CF_IFILE_STARTED 0x0002 /* IFILE stage has started */
82#define NILFS_CF_HISTORY_MASK (NILFS_CF_IFILE_STARTED) 81#define NILFS_CF_SUFREED 0x0004 /* segment usages has been freed */
82#define NILFS_CF_HISTORY_MASK (NILFS_CF_IFILE_STARTED | NILFS_CF_SUFREED)
83 83
84/* Operations depending on the construction mode and file type */ 84/* Operations depending on the construction mode and file type */
85struct nilfs_sc_operations { 85struct nilfs_sc_operations {
@@ -810,7 +810,7 @@ static int nilfs_segctor_clean(struct nilfs_sc_info *sci)
810{ 810{
811 return list_empty(&sci->sc_dirty_files) && 811 return list_empty(&sci->sc_dirty_files) &&
812 !test_bit(NILFS_SC_DIRTY, &sci->sc_flags) && 812 !test_bit(NILFS_SC_DIRTY, &sci->sc_flags) &&
813 list_empty(&sci->sc_cleaning_segments) && 813 sci->sc_nfreesegs == 0 &&
814 (!nilfs_doing_gc() || list_empty(&sci->sc_gc_inodes)); 814 (!nilfs_doing_gc() || list_empty(&sci->sc_gc_inodes));
815} 815}
816 816
@@ -1005,44 +1005,6 @@ static void nilfs_drop_collected_inodes(struct list_head *head)
1005 } 1005 }
1006} 1006}
1007 1007
1008static void nilfs_segctor_cancel_free_segments(struct nilfs_sc_info *sci,
1009 struct inode *sufile)
1010
1011{
1012 struct list_head *head = &sci->sc_cleaning_segments;
1013 struct nilfs_segment_entry *ent;
1014 int err;
1015
1016 list_for_each_entry(ent, head, list) {
1017 if (!(ent->flags & NILFS_SLH_FREED))
1018 break;
1019 err = nilfs_sufile_cancel_free(sufile, ent->segnum);
1020 WARN_ON(err); /* do not happen */
1021 ent->flags &= ~NILFS_SLH_FREED;
1022 }
1023}
1024
1025static int nilfs_segctor_prepare_free_segments(struct nilfs_sc_info *sci,
1026 struct inode *sufile)
1027{
1028 struct list_head *head = &sci->sc_cleaning_segments;
1029 struct nilfs_segment_entry *ent;
1030 int err;
1031
1032 list_for_each_entry(ent, head, list) {
1033 err = nilfs_sufile_free(sufile, ent->segnum);
1034 if (unlikely(err))
1035 return err;
1036 ent->flags |= NILFS_SLH_FREED;
1037 }
1038 return 0;
1039}
1040
1041static void nilfs_segctor_commit_free_segments(struct nilfs_sc_info *sci)
1042{
1043 nilfs_dispose_segment_list(&sci->sc_cleaning_segments);
1044}
1045
1046static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci, 1008static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci,
1047 struct inode *inode, 1009 struct inode *inode,
1048 struct list_head *listp, 1010 struct list_head *listp,
@@ -1161,6 +1123,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
1161 struct the_nilfs *nilfs = sbi->s_nilfs; 1123 struct the_nilfs *nilfs = sbi->s_nilfs;
1162 struct list_head *head; 1124 struct list_head *head;
1163 struct nilfs_inode_info *ii; 1125 struct nilfs_inode_info *ii;
1126 size_t ndone;
1164 int err = 0; 1127 int err = 0;
1165 1128
1166 switch (sci->sc_stage.scnt) { 1129 switch (sci->sc_stage.scnt) {
@@ -1250,10 +1213,16 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
1250 break; 1213 break;
1251 sci->sc_stage.scnt++; /* Fall through */ 1214 sci->sc_stage.scnt++; /* Fall through */
1252 case NILFS_ST_SUFILE: 1215 case NILFS_ST_SUFILE:
1253 err = nilfs_segctor_prepare_free_segments(sci, 1216 err = nilfs_sufile_freev(nilfs->ns_sufile, sci->sc_freesegs,
1254 nilfs->ns_sufile); 1217 sci->sc_nfreesegs, &ndone);
1255 if (unlikely(err)) 1218 if (unlikely(err)) {
1219 nilfs_sufile_cancel_freev(nilfs->ns_sufile,
1220 sci->sc_freesegs, ndone,
1221 NULL);
1256 break; 1222 break;
1223 }
1224 sci->sc_stage.flags |= NILFS_CF_SUFREED;
1225
1257 err = nilfs_segctor_scan_file(sci, nilfs->ns_sufile, 1226 err = nilfs_segctor_scan_file(sci, nilfs->ns_sufile,
1258 &nilfs_sc_file_ops); 1227 &nilfs_sc_file_ops);
1259 if (unlikely(err)) 1228 if (unlikely(err))
@@ -1486,7 +1455,15 @@ static void nilfs_segctor_end_construction(struct nilfs_sc_info *sci,
1486{ 1455{
1487 if (unlikely(err)) { 1456 if (unlikely(err)) {
1488 nilfs_segctor_free_incomplete_segments(sci, nilfs); 1457 nilfs_segctor_free_incomplete_segments(sci, nilfs);
1489 nilfs_segctor_cancel_free_segments(sci, nilfs->ns_sufile); 1458 if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
1459 int ret;
1460
1461 ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
1462 sci->sc_freesegs,
1463 sci->sc_nfreesegs,
1464 NULL);
1465 WARN_ON(ret); /* do not happen */
1466 }
1490 } 1467 }
1491 nilfs_segctor_clear_segment_buffers(sci); 1468 nilfs_segctor_clear_segment_buffers(sci);
1492} 1469}
@@ -1585,7 +1562,13 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci,
1585 if (mode != SC_LSEG_SR || sci->sc_stage.scnt < NILFS_ST_CPFILE) 1562 if (mode != SC_LSEG_SR || sci->sc_stage.scnt < NILFS_ST_CPFILE)
1586 break; 1563 break;
1587 1564
1588 nilfs_segctor_cancel_free_segments(sci, nilfs->ns_sufile); 1565 if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
1566 err = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
1567 sci->sc_freesegs,
1568 sci->sc_nfreesegs,
1569 NULL);
1570 WARN_ON(err); /* do not happen */
1571 }
1589 nilfs_segctor_clear_segment_buffers(sci); 1572 nilfs_segctor_clear_segment_buffers(sci);
1590 1573
1591 err = nilfs_segctor_extend_segments(sci, nilfs, nadd); 1574 err = nilfs_segctor_extend_segments(sci, nilfs, nadd);
@@ -2224,10 +2207,8 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2224 nilfs_segctor_complete_write(sci); 2207 nilfs_segctor_complete_write(sci);
2225 2208
2226 /* Commit segments */ 2209 /* Commit segments */
2227 if (has_sr) { 2210 if (has_sr)
2228 nilfs_segctor_commit_free_segments(sci);
2229 nilfs_segctor_clear_metadata_dirty(sci); 2211 nilfs_segctor_clear_metadata_dirty(sci);
2230 }
2231 2212
2232 nilfs_segctor_end_construction(sci, nilfs, 0); 2213 nilfs_segctor_end_construction(sci, nilfs, 0);
2233 2214
@@ -2301,48 +2282,6 @@ void nilfs_flush_segment(struct super_block *sb, ino_t ino)
2301 /* assign bit 0 to data files */ 2282 /* assign bit 0 to data files */
2302} 2283}
2303 2284
2304int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *sci,
2305 __u64 *segnum, size_t nsegs)
2306{
2307 struct nilfs_segment_entry *ent;
2308 struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs;
2309 struct inode *sufile = nilfs->ns_sufile;
2310 LIST_HEAD(list);
2311 __u64 *pnum;
2312 size_t i;
2313 int err;
2314
2315 for (pnum = segnum, i = 0; i < nsegs; pnum++, i++) {
2316 ent = nilfs_alloc_segment_entry(*pnum);
2317 if (unlikely(!ent)) {
2318 err = -ENOMEM;
2319 goto failed;
2320 }
2321 list_add_tail(&ent->list, &list);
2322
2323 err = nilfs_open_segment_entry(ent, sufile);
2324 if (unlikely(err))
2325 goto failed;
2326
2327 if (unlikely(!nilfs_segment_usage_dirty(ent->raw_su)))
2328 printk(KERN_WARNING "NILFS: unused segment is "
2329 "requested to be cleaned (segnum=%llu)\n",
2330 (unsigned long long)ent->segnum);
2331 nilfs_close_segment_entry(ent, sufile);
2332 }
2333 list_splice(&list, sci->sc_cleaning_segments.prev);
2334 return 0;
2335
2336 failed:
2337 nilfs_dispose_segment_list(&list);
2338 return err;
2339}
2340
2341void nilfs_segctor_clear_segments_to_be_freed(struct nilfs_sc_info *sci)
2342{
2343 nilfs_dispose_segment_list(&sci->sc_cleaning_segments);
2344}
2345
2346struct nilfs_segctor_wait_request { 2285struct nilfs_segctor_wait_request {
2347 wait_queue_t wq; 2286 wait_queue_t wq;
2348 __u32 seq; 2287 __u32 seq;
@@ -2607,10 +2546,13 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv,
2607 err = nilfs_init_gcdat_inode(nilfs); 2546 err = nilfs_init_gcdat_inode(nilfs);
2608 if (unlikely(err)) 2547 if (unlikely(err))
2609 goto out_unlock; 2548 goto out_unlock;
2549
2610 err = nilfs_ioctl_prepare_clean_segments(nilfs, argv, kbufs); 2550 err = nilfs_ioctl_prepare_clean_segments(nilfs, argv, kbufs);
2611 if (unlikely(err)) 2551 if (unlikely(err))
2612 goto out_unlock; 2552 goto out_unlock;
2613 2553
2554 sci->sc_freesegs = kbufs[4];
2555 sci->sc_nfreesegs = argv[4].v_nmembs;
2614 list_splice_init(&nilfs->ns_gc_inodes, sci->sc_gc_inodes.prev); 2556 list_splice_init(&nilfs->ns_gc_inodes, sci->sc_gc_inodes.prev);
2615 2557
2616 for (;;) { 2558 for (;;) {
@@ -2629,6 +2571,8 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv,
2629 } 2571 }
2630 2572
2631 out_unlock: 2573 out_unlock:
2574 sci->sc_freesegs = NULL;
2575 sci->sc_nfreesegs = 0;
2632 nilfs_clear_gcdat_inode(nilfs); 2576 nilfs_clear_gcdat_inode(nilfs);
2633 nilfs_transaction_unlock(sbi); 2577 nilfs_transaction_unlock(sbi);
2634 return err; 2578 return err;
@@ -2835,7 +2779,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi)
2835 INIT_LIST_HEAD(&sci->sc_dirty_files); 2779 INIT_LIST_HEAD(&sci->sc_dirty_files);
2836 INIT_LIST_HEAD(&sci->sc_segbufs); 2780 INIT_LIST_HEAD(&sci->sc_segbufs);
2837 INIT_LIST_HEAD(&sci->sc_gc_inodes); 2781 INIT_LIST_HEAD(&sci->sc_gc_inodes);
2838 INIT_LIST_HEAD(&sci->sc_cleaning_segments);
2839 INIT_LIST_HEAD(&sci->sc_copied_buffers); 2782 INIT_LIST_HEAD(&sci->sc_copied_buffers);
2840 2783
2841 sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; 2784 sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT;
@@ -2901,9 +2844,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
2901 nilfs_dispose_list(sbi, &sci->sc_dirty_files, 1); 2844 nilfs_dispose_list(sbi, &sci->sc_dirty_files, 1);
2902 } 2845 }
2903 2846
2904 if (!list_empty(&sci->sc_cleaning_segments))
2905 nilfs_dispose_segment_list(&sci->sc_cleaning_segments);
2906
2907 WARN_ON(!list_empty(&sci->sc_segbufs)); 2847 WARN_ON(!list_empty(&sci->sc_segbufs));
2908 2848
2909 down_write(&sbi->s_nilfs->ns_segctor_sem); 2849 down_write(&sbi->s_nilfs->ns_segctor_sem);
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index 476bdd5df5be..0d2a475a741b 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -90,8 +90,9 @@ struct nilfs_segsum_pointer {
90 * @sc_nblk_inc: Block count of current generation 90 * @sc_nblk_inc: Block count of current generation
91 * @sc_dirty_files: List of files to be written 91 * @sc_dirty_files: List of files to be written
92 * @sc_gc_inodes: List of GC inodes having blocks to be written 92 * @sc_gc_inodes: List of GC inodes having blocks to be written
93 * @sc_cleaning_segments: List of segments to be freed through construction
94 * @sc_copied_buffers: List of copied buffers (buffer heads) to freeze data 93 * @sc_copied_buffers: List of copied buffers (buffer heads) to freeze data
94 * @sc_freesegs: array of segment numbers to be freed
95 * @sc_nfreesegs: number of segments on @sc_freesegs
95 * @sc_dsync_inode: inode whose data pages are written for a sync operation 96 * @sc_dsync_inode: inode whose data pages are written for a sync operation
96 * @sc_dsync_start: start byte offset of data pages 97 * @sc_dsync_start: start byte offset of data pages
97 * @sc_dsync_end: end byte offset of data pages (inclusive) 98 * @sc_dsync_end: end byte offset of data pages (inclusive)
@@ -131,9 +132,11 @@ struct nilfs_sc_info {
131 132
132 struct list_head sc_dirty_files; 133 struct list_head sc_dirty_files;
133 struct list_head sc_gc_inodes; 134 struct list_head sc_gc_inodes;
134 struct list_head sc_cleaning_segments;
135 struct list_head sc_copied_buffers; 135 struct list_head sc_copied_buffers;
136 136
137 __u64 *sc_freesegs;
138 size_t sc_nfreesegs;
139
137 struct nilfs_inode_info *sc_dsync_inode; 140 struct nilfs_inode_info *sc_dsync_inode;
138 loff_t sc_dsync_start; 141 loff_t sc_dsync_start;
139 loff_t sc_dsync_end; 142 loff_t sc_dsync_end;
@@ -225,10 +228,6 @@ extern void nilfs_flush_segment(struct super_block *, ino_t);
225extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *, 228extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *,
226 void **); 229 void **);
227 230
228extern int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *,
229 __u64 *, size_t);
230extern void nilfs_segctor_clear_segments_to_be_freed(struct nilfs_sc_info *);
231
232extern int nilfs_attach_segment_constructor(struct nilfs_sb_info *); 231extern int nilfs_attach_segment_constructor(struct nilfs_sb_info *);
233extern void nilfs_detach_segment_constructor(struct nilfs_sb_info *); 232extern void nilfs_detach_segment_constructor(struct nilfs_sb_info *);
234 233
@@ -240,5 +239,6 @@ extern int nilfs_search_super_root(struct the_nilfs *, struct nilfs_sb_info *,
240extern int nilfs_recover_logical_segments(struct the_nilfs *, 239extern int nilfs_recover_logical_segments(struct the_nilfs *,
241 struct nilfs_sb_info *, 240 struct nilfs_sb_info *,
242 struct nilfs_recovery_info *); 241 struct nilfs_recovery_info *);
242extern void nilfs_dispose_segment_list(struct list_head *);
243 243
244#endif /* _NILFS_SEGMENT_H */ 244#endif /* _NILFS_SEGMENT_H */
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 98e68677f045..37994d4a59cc 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -18,6 +18,7 @@
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 * 19 *
20 * Written by Koji Sato <koji@osrg.net>. 20 * Written by Koji Sato <koji@osrg.net>.
21 * Rivised by Ryusuke Konishi <ryusuke@osrg.net>.
21 */ 22 */
22 23
23#include <linux/kernel.h> 24#include <linux/kernel.h>
@@ -108,6 +109,102 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
108 nilfs_mdt_mark_buffer_dirty(header_bh); 109 nilfs_mdt_mark_buffer_dirty(header_bh);
109} 110}
110 111
112/**
113 * nilfs_sufile_updatev - modify multiple segment usages at a time
114 * @sufile: inode of segment usage file
115 * @segnumv: array of segment numbers
116 * @nsegs: size of @segnumv array
117 * @create: creation flag
118 * @ndone: place to store number of modified segments on @segnumv
119 * @dofunc: primitive operation for the update
120 *
121 * Description: nilfs_sufile_updatev() repeatedly calls @dofunc
122 * against the given array of segments. The @dofunc is called with
123 * buffers of a header block and the sufile block in which the target
124 * segment usage entry is contained. If @ndone is given, the number
125 * of successfully modified segments from the head is stored in the
126 * place @ndone points to.
127 *
128 * Return Value: On success, zero is returned. On error, one of the
129 * following negative error codes is returned.
130 *
131 * %-EIO - I/O error.
132 *
133 * %-ENOMEM - Insufficient amount of memory available.
134 *
135 * %-ENOENT - Given segment usage is in hole block (may be returned if
136 * @create is zero)
137 *
138 * %-EINVAL - Invalid segment usage number
139 */
140int nilfs_sufile_updatev(struct inode *sufile, __u64 *segnumv, size_t nsegs,
141 int create, size_t *ndone,
142 void (*dofunc)(struct inode *, __u64,
143 struct buffer_head *,
144 struct buffer_head *))
145{
146 struct buffer_head *header_bh, *bh;
147 unsigned long blkoff, prev_blkoff;
148 __u64 *seg;
149 size_t nerr = 0, n = 0;
150 int ret = 0;
151
152 if (unlikely(nsegs == 0))
153 goto out;
154
155 down_write(&NILFS_MDT(sufile)->mi_sem);
156 for (seg = segnumv; seg < segnumv + nsegs; seg++) {
157 if (unlikely(*seg >= nilfs_sufile_get_nsegments(sufile))) {
158 printk(KERN_WARNING
159 "%s: invalid segment number: %llu\n", __func__,
160 (unsigned long long)*seg);
161 nerr++;
162 }
163 }
164 if (nerr > 0) {
165 ret = -EINVAL;
166 goto out_sem;
167 }
168
169 ret = nilfs_sufile_get_header_block(sufile, &header_bh);
170 if (ret < 0)
171 goto out_sem;
172
173 seg = segnumv;
174 blkoff = nilfs_sufile_get_blkoff(sufile, *seg);
175 ret = nilfs_mdt_get_block(sufile, blkoff, create, NULL, &bh);
176 if (ret < 0)
177 goto out_header;
178
179 for (;;) {
180 dofunc(sufile, *seg, header_bh, bh);
181
182 if (++seg >= segnumv + nsegs)
183 break;
184 prev_blkoff = blkoff;
185 blkoff = nilfs_sufile_get_blkoff(sufile, *seg);
186 if (blkoff == prev_blkoff)
187 continue;
188
189 /* get different block */
190 brelse(bh);
191 ret = nilfs_mdt_get_block(sufile, blkoff, create, NULL, &bh);
192 if (unlikely(ret < 0))
193 goto out_header;
194 }
195 brelse(bh);
196
197 out_header:
198 n = seg - segnumv;
199 brelse(header_bh);
200 out_sem:
201 up_write(&NILFS_MDT(sufile)->mi_sem);
202 out:
203 if (ndone)
204 *ndone = n;
205 return ret;
206}
207
111int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create, 208int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create,
112 void (*dofunc)(struct inode *, __u64, 209 void (*dofunc)(struct inode *, __u64,
113 struct buffer_head *, 210 struct buffer_head *,
@@ -490,7 +587,8 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
490 * nilfs_sufile_get_suinfo - 587 * nilfs_sufile_get_suinfo -
491 * @sufile: inode of segment usage file 588 * @sufile: inode of segment usage file
492 * @segnum: segment number to start looking 589 * @segnum: segment number to start looking
493 * @si: array of suinfo 590 * @buf: array of suinfo
591 * @sisz: byte size of suinfo
494 * @nsi: size of suinfo array 592 * @nsi: size of suinfo array
495 * 593 *
496 * Description: 594 * Description:
@@ -502,11 +600,12 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
502 * 600 *
503 * %-ENOMEM - Insufficient amount of memory available. 601 * %-ENOMEM - Insufficient amount of memory available.
504 */ 602 */
505ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, 603ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
506 struct nilfs_suinfo *si, size_t nsi) 604 unsigned sisz, size_t nsi)
507{ 605{
508 struct buffer_head *su_bh; 606 struct buffer_head *su_bh;
509 struct nilfs_segment_usage *su; 607 struct nilfs_segment_usage *su;
608 struct nilfs_suinfo *si = buf;
510 size_t susz = NILFS_MDT(sufile)->mi_entry_size; 609 size_t susz = NILFS_MDT(sufile)->mi_entry_size;
511 struct the_nilfs *nilfs = NILFS_MDT(sufile)->mi_nilfs; 610 struct the_nilfs *nilfs = NILFS_MDT(sufile)->mi_nilfs;
512 void *kaddr; 611 void *kaddr;
@@ -531,20 +630,22 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum,
531 if (ret != -ENOENT) 630 if (ret != -ENOENT)
532 goto out; 631 goto out;
533 /* hole */ 632 /* hole */
534 memset(&si[i], 0, sizeof(struct nilfs_suinfo) * n); 633 memset(si, 0, sisz * n);
634 si = (void *)si + sisz * n;
535 continue; 635 continue;
536 } 636 }
537 637
538 kaddr = kmap_atomic(su_bh->b_page, KM_USER0); 638 kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
539 su = nilfs_sufile_block_get_segment_usage( 639 su = nilfs_sufile_block_get_segment_usage(
540 sufile, segnum, su_bh, kaddr); 640 sufile, segnum, su_bh, kaddr);
541 for (j = 0; j < n; j++, su = (void *)su + susz) { 641 for (j = 0; j < n;
542 si[i + j].sui_lastmod = le64_to_cpu(su->su_lastmod); 642 j++, su = (void *)su + susz, si = (void *)si + sisz) {
543 si[i + j].sui_nblocks = le32_to_cpu(su->su_nblocks); 643 si->sui_lastmod = le64_to_cpu(su->su_lastmod);
544 si[i + j].sui_flags = le32_to_cpu(su->su_flags) & 644 si->sui_nblocks = le32_to_cpu(su->su_nblocks);
645 si->sui_flags = le32_to_cpu(su->su_flags) &
545 ~(1UL << NILFS_SEGMENT_USAGE_ACTIVE); 646 ~(1UL << NILFS_SEGMENT_USAGE_ACTIVE);
546 if (nilfs_segment_is_active(nilfs, segnum + j)) 647 if (nilfs_segment_is_active(nilfs, segnum + j))
547 si[i + j].sui_flags |= 648 si->sui_flags |=
548 (1UL << NILFS_SEGMENT_USAGE_ACTIVE); 649 (1UL << NILFS_SEGMENT_USAGE_ACTIVE);
549 } 650 }
550 kunmap_atomic(kaddr, KM_USER0); 651 kunmap_atomic(kaddr, KM_USER0);
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h
index a2e2efd4ade1..a2c4d76c3366 100644
--- a/fs/nilfs2/sufile.h
+++ b/fs/nilfs2/sufile.h
@@ -43,43 +43,27 @@ void nilfs_sufile_put_segment_usage(struct inode *, __u64,
43 struct buffer_head *); 43 struct buffer_head *);
44int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *); 44int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *);
45int nilfs_sufile_get_ncleansegs(struct inode *, unsigned long *); 45int nilfs_sufile_get_ncleansegs(struct inode *, unsigned long *);
46ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, struct nilfs_suinfo *, 46ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, void *, unsigned,
47 size_t); 47 size_t);
48 48
49int nilfs_sufile_updatev(struct inode *, __u64 *, size_t, int, size_t *,
50 void (*dofunc)(struct inode *, __u64,
51 struct buffer_head *,
52 struct buffer_head *));
49int nilfs_sufile_update(struct inode *, __u64, int, 53int nilfs_sufile_update(struct inode *, __u64, int,
50 void (*dofunc)(struct inode *, __u64, 54 void (*dofunc)(struct inode *, __u64,
51 struct buffer_head *, 55 struct buffer_head *,
52 struct buffer_head *)); 56 struct buffer_head *));
53void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *,
54 struct buffer_head *);
55void nilfs_sufile_do_scrap(struct inode *, __u64, struct buffer_head *, 57void nilfs_sufile_do_scrap(struct inode *, __u64, struct buffer_head *,
56 struct buffer_head *); 58 struct buffer_head *);
57void nilfs_sufile_do_free(struct inode *, __u64, struct buffer_head *, 59void nilfs_sufile_do_free(struct inode *, __u64, struct buffer_head *,
58 struct buffer_head *); 60 struct buffer_head *);
61void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *,
62 struct buffer_head *);
59void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *, 63void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *,
60 struct buffer_head *); 64 struct buffer_head *);
61 65
62/** 66/**
63 * nilfs_sufile_cancel_free -
64 * @sufile: inode of segment usage file
65 * @segnum: segment number
66 *
67 * Description:
68 *
69 * Return Value: On success, 0 is returned. On error, one of the following
70 * negative error codes is returned.
71 *
72 * %-EIO - I/O error.
73 *
74 * %-ENOMEM - Insufficient amount of memory available.
75 */
76static inline int nilfs_sufile_cancel_free(struct inode *sufile, __u64 segnum)
77{
78 return nilfs_sufile_update(sufile, segnum, 0,
79 nilfs_sufile_do_cancel_free);
80}
81
82/**
83 * nilfs_sufile_scrap - make a segment garbage 67 * nilfs_sufile_scrap - make a segment garbage
84 * @sufile: inode of segment usage file 68 * @sufile: inode of segment usage file
85 * @segnum: segment number to be freed 69 * @segnum: segment number to be freed
@@ -100,6 +84,38 @@ static inline int nilfs_sufile_free(struct inode *sufile, __u64 segnum)
100} 84}
101 85
102/** 86/**
87 * nilfs_sufile_freev - free segments
88 * @sufile: inode of segment usage file
89 * @segnumv: array of segment numbers
90 * @nsegs: size of @segnumv array
91 * @ndone: place to store the number of freed segments
92 */
93static inline int nilfs_sufile_freev(struct inode *sufile, __u64 *segnumv,
94 size_t nsegs, size_t *ndone)
95{
96 return nilfs_sufile_updatev(sufile, segnumv, nsegs, 0, ndone,
97 nilfs_sufile_do_free);
98}
99
100/**
101 * nilfs_sufile_cancel_freev - reallocate freeing segments
102 * @sufile: inode of segment usage file
103 * @segnumv: array of segment numbers
104 * @nsegs: size of @segnumv array
105 * @ndone: place to store the number of cancelled segments
106 *
107 * Return Value: On success, 0 is returned. On error, a negative error codes
108 * is returned.
109 */
110static inline int nilfs_sufile_cancel_freev(struct inode *sufile,
111 __u64 *segnumv, size_t nsegs,
112 size_t *ndone)
113{
114 return nilfs_sufile_updatev(sufile, segnumv, nsegs, 0, ndone,
115 nilfs_sufile_do_cancel_free);
116}
117
118/**
103 * nilfs_sufile_set_error - mark a segment as erroneous 119 * nilfs_sufile_set_error - mark a segment as erroneous
104 * @sufile: inode of segment usage file 120 * @sufile: inode of segment usage file
105 * @segnum: segment number 121 * @segnum: segment number
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 1777a3467bd2..8e2ec43b18f4 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -133,7 +133,7 @@ void nilfs_warning(struct super_block *sb, const char *function,
133 133
134static struct kmem_cache *nilfs_inode_cachep; 134static struct kmem_cache *nilfs_inode_cachep;
135 135
136struct inode *nilfs_alloc_inode(struct super_block *sb) 136struct inode *nilfs_alloc_inode_common(struct the_nilfs *nilfs)
137{ 137{
138 struct nilfs_inode_info *ii; 138 struct nilfs_inode_info *ii;
139 139
@@ -143,10 +143,15 @@ struct inode *nilfs_alloc_inode(struct super_block *sb)
143 ii->i_bh = NULL; 143 ii->i_bh = NULL;
144 ii->i_state = 0; 144 ii->i_state = 0;
145 ii->vfs_inode.i_version = 1; 145 ii->vfs_inode.i_version = 1;
146 nilfs_btnode_cache_init(&ii->i_btnode_cache); 146 nilfs_btnode_cache_init(&ii->i_btnode_cache, nilfs->ns_bdi);
147 return &ii->vfs_inode; 147 return &ii->vfs_inode;
148} 148}
149 149
150struct inode *nilfs_alloc_inode(struct super_block *sb)
151{
152 return nilfs_alloc_inode_common(NILFS_SB(sb)->s_nilfs);
153}
154
150void nilfs_destroy_inode(struct inode *inode) 155void nilfs_destroy_inode(struct inode *inode)
151{ 156{
152 kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); 157 kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode));
@@ -184,16 +189,6 @@ static void nilfs_clear_inode(struct inode *inode)
184{ 189{
185 struct nilfs_inode_info *ii = NILFS_I(inode); 190 struct nilfs_inode_info *ii = NILFS_I(inode);
186 191
187#ifdef CONFIG_NILFS_POSIX_ACL
188 if (ii->i_acl && ii->i_acl != NILFS_ACL_NOT_CACHED) {
189 posix_acl_release(ii->i_acl);
190 ii->i_acl = NILFS_ACL_NOT_CACHED;
191 }
192 if (ii->i_default_acl && ii->i_default_acl != NILFS_ACL_NOT_CACHED) {
193 posix_acl_release(ii->i_default_acl);
194 ii->i_default_acl = NILFS_ACL_NOT_CACHED;
195 }
196#endif
197 /* 192 /*
198 * Free resources allocated in nilfs_read_inode(), here. 193 * Free resources allocated in nilfs_read_inode(), here.
199 */ 194 */
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index e4e5c78bcc93..8b8889825716 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -32,7 +32,6 @@
32#include "cpfile.h" 32#include "cpfile.h"
33#include "sufile.h" 33#include "sufile.h"
34#include "dat.h" 34#include "dat.h"
35#include "seglist.h"
36#include "segbuf.h" 35#include "segbuf.h"
37 36
38 37
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c
index 9b0efdad8910..477d37d83b31 100644
--- a/fs/nls/nls_base.c
+++ b/fs/nls/nls_base.c
@@ -15,6 +15,7 @@
15#include <linux/errno.h> 15#include <linux/errno.h>
16#include <linux/kmod.h> 16#include <linux/kmod.h>
17#include <linux/spinlock.h> 17#include <linux/spinlock.h>
18#include <asm/byteorder.h>
18 19
19static struct nls_table default_table; 20static struct nls_table default_table;
20static struct nls_table *tables = &default_table; 21static struct nls_table *tables = &default_table;
@@ -43,10 +44,17 @@ static const struct utf8_table utf8_table[] =
43 {0, /* end of table */} 44 {0, /* end of table */}
44}; 45};
45 46
46int 47#define UNICODE_MAX 0x0010ffff
47utf8_mbtowc(wchar_t *p, const __u8 *s, int n) 48#define PLANE_SIZE 0x00010000
49
50#define SURROGATE_MASK 0xfffff800
51#define SURROGATE_PAIR 0x0000d800
52#define SURROGATE_LOW 0x00000400
53#define SURROGATE_BITS 0x000003ff
54
55int utf8_to_utf32(const u8 *s, int len, unicode_t *pu)
48{ 56{
49 long l; 57 unsigned long l;
50 int c0, c, nc; 58 int c0, c, nc;
51 const struct utf8_table *t; 59 const struct utf8_table *t;
52 60
@@ -57,12 +65,13 @@ utf8_mbtowc(wchar_t *p, const __u8 *s, int n)
57 nc++; 65 nc++;
58 if ((c0 & t->cmask) == t->cval) { 66 if ((c0 & t->cmask) == t->cval) {
59 l &= t->lmask; 67 l &= t->lmask;
60 if (l < t->lval) 68 if (l < t->lval || l > UNICODE_MAX ||
69 (l & SURROGATE_MASK) == SURROGATE_PAIR)
61 return -1; 70 return -1;
62 *p = l; 71 *pu = (unicode_t) l;
63 return nc; 72 return nc;
64 } 73 }
65 if (n <= nc) 74 if (len <= nc)
66 return -1; 75 return -1;
67 s++; 76 s++;
68 c = (*s ^ 0x80) & 0xFF; 77 c = (*s ^ 0x80) & 0xFF;
@@ -72,90 +81,133 @@ utf8_mbtowc(wchar_t *p, const __u8 *s, int n)
72 } 81 }
73 return -1; 82 return -1;
74} 83}
84EXPORT_SYMBOL(utf8_to_utf32);
75 85
76int 86int utf32_to_utf8(unicode_t u, u8 *s, int maxlen)
77utf8_mbstowcs(wchar_t *pwcs, const __u8 *s, int n)
78{ 87{
79 __u16 *op; 88 unsigned long l;
80 const __u8 *ip;
81 int size;
82
83 op = pwcs;
84 ip = s;
85 while (*ip && n > 0) {
86 if (*ip & 0x80) {
87 size = utf8_mbtowc(op, ip, n);
88 if (size == -1) {
89 /* Ignore character and move on */
90 ip++;
91 n--;
92 } else {
93 op++;
94 ip += size;
95 n -= size;
96 }
97 } else {
98 *op++ = *ip++;
99 n--;
100 }
101 }
102 return (op - pwcs);
103}
104
105int
106utf8_wctomb(__u8 *s, wchar_t wc, int maxlen)
107{
108 long l;
109 int c, nc; 89 int c, nc;
110 const struct utf8_table *t; 90 const struct utf8_table *t;
111 91
112 if (!s) 92 if (!s)
113 return 0; 93 return 0;
114 94
115 l = wc; 95 l = u;
96 if (l > UNICODE_MAX || (l & SURROGATE_MASK) == SURROGATE_PAIR)
97 return -1;
98
116 nc = 0; 99 nc = 0;
117 for (t = utf8_table; t->cmask && maxlen; t++, maxlen--) { 100 for (t = utf8_table; t->cmask && maxlen; t++, maxlen--) {
118 nc++; 101 nc++;
119 if (l <= t->lmask) { 102 if (l <= t->lmask) {
120 c = t->shift; 103 c = t->shift;
121 *s = t->cval | (l >> c); 104 *s = (u8) (t->cval | (l >> c));
122 while (c > 0) { 105 while (c > 0) {
123 c -= 6; 106 c -= 6;
124 s++; 107 s++;
125 *s = 0x80 | ((l >> c) & 0x3F); 108 *s = (u8) (0x80 | ((l >> c) & 0x3F));
126 } 109 }
127 return nc; 110 return nc;
128 } 111 }
129 } 112 }
130 return -1; 113 return -1;
131} 114}
115EXPORT_SYMBOL(utf32_to_utf8);
132 116
133int 117int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs)
134utf8_wcstombs(__u8 *s, const wchar_t *pwcs, int maxlen)
135{ 118{
136 const __u16 *ip; 119 u16 *op;
137 __u8 *op;
138 int size; 120 int size;
121 unicode_t u;
122
123 op = pwcs;
124 while (*s && len > 0) {
125 if (*s & 0x80) {
126 size = utf8_to_utf32(s, len, &u);
127 if (size < 0) {
128 /* Ignore character and move on */
129 size = 1;
130 } else if (u >= PLANE_SIZE) {
131 u -= PLANE_SIZE;
132 *op++ = (wchar_t) (SURROGATE_PAIR |
133 ((u >> 10) & SURROGATE_BITS));
134 *op++ = (wchar_t) (SURROGATE_PAIR |
135 SURROGATE_LOW |
136 (u & SURROGATE_BITS));
137 } else {
138 *op++ = (wchar_t) u;
139 }
140 s += size;
141 len -= size;
142 } else {
143 *op++ = *s++;
144 len--;
145 }
146 }
147 return op - pwcs;
148}
149EXPORT_SYMBOL(utf8s_to_utf16s);
150
151static inline unsigned long get_utf16(unsigned c, enum utf16_endian endian)
152{
153 switch (endian) {
154 default:
155 return c;
156 case UTF16_LITTLE_ENDIAN:
157 return __le16_to_cpu(c);
158 case UTF16_BIG_ENDIAN:
159 return __be16_to_cpu(c);
160 }
161}
162
163int utf16s_to_utf8s(const wchar_t *pwcs, int len, enum utf16_endian endian,
164 u8 *s, int maxlen)
165{
166 u8 *op;
167 int size;
168 unsigned long u, v;
139 169
140 op = s; 170 op = s;
141 ip = pwcs; 171 while (len > 0 && maxlen > 0) {
142 while (*ip && maxlen > 0) { 172 u = get_utf16(*pwcs, endian);
143 if (*ip > 0x7f) { 173 if (!u)
144 size = utf8_wctomb(op, *ip, maxlen); 174 break;
175 pwcs++;
176 len--;
177 if (u > 0x7f) {
178 if ((u & SURROGATE_MASK) == SURROGATE_PAIR) {
179 if (u & SURROGATE_LOW) {
180 /* Ignore character and move on */
181 continue;
182 }
183 if (len <= 0)
184 break;
185 v = get_utf16(*pwcs, endian);
186 if ((v & SURROGATE_MASK) != SURROGATE_PAIR ||
187 !(v & SURROGATE_LOW)) {
188 /* Ignore character and move on */
189 continue;
190 }
191 u = PLANE_SIZE + ((u & SURROGATE_BITS) << 10)
192 + (v & SURROGATE_BITS);
193 pwcs++;
194 len--;
195 }
196 size = utf32_to_utf8(u, op, maxlen);
145 if (size == -1) { 197 if (size == -1) {
146 /* Ignore character and move on */ 198 /* Ignore character and move on */
147 maxlen--;
148 } else { 199 } else {
149 op += size; 200 op += size;
150 maxlen -= size; 201 maxlen -= size;
151 } 202 }
152 } else { 203 } else {
153 *op++ = (__u8) *ip; 204 *op++ = (u8) u;
205 maxlen--;
154 } 206 }
155 ip++;
156 } 207 }
157 return (op - s); 208 return op - s;
158} 209}
210EXPORT_SYMBOL(utf16s_to_utf8s);
159 211
160int register_nls(struct nls_table * nls) 212int register_nls(struct nls_table * nls)
161{ 213{
@@ -467,9 +519,5 @@ EXPORT_SYMBOL(unregister_nls);
467EXPORT_SYMBOL(unload_nls); 519EXPORT_SYMBOL(unload_nls);
468EXPORT_SYMBOL(load_nls); 520EXPORT_SYMBOL(load_nls);
469EXPORT_SYMBOL(load_nls_default); 521EXPORT_SYMBOL(load_nls_default);
470EXPORT_SYMBOL(utf8_mbtowc);
471EXPORT_SYMBOL(utf8_mbstowcs);
472EXPORT_SYMBOL(utf8_wctomb);
473EXPORT_SYMBOL(utf8_wcstombs);
474 522
475MODULE_LICENSE("Dual BSD/GPL"); 523MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_utf8.c b/fs/nls/nls_utf8.c
index aa2c42fdd977..0d60a44acacd 100644
--- a/fs/nls/nls_utf8.c
+++ b/fs/nls/nls_utf8.c
@@ -15,7 +15,11 @@ static int uni2char(wchar_t uni, unsigned char *out, int boundlen)
15{ 15{
16 int n; 16 int n;
17 17
18 if ( (n = utf8_wctomb(out, uni, boundlen)) == -1) { 18 if (boundlen <= 0)
19 return -ENAMETOOLONG;
20
21 n = utf32_to_utf8(uni, out, boundlen);
22 if (n < 0) {
19 *out = '?'; 23 *out = '?';
20 return -EINVAL; 24 return -EINVAL;
21 } 25 }
@@ -25,11 +29,14 @@ static int uni2char(wchar_t uni, unsigned char *out, int boundlen)
25static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) 29static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni)
26{ 30{
27 int n; 31 int n;
32 unicode_t u;
28 33
29 if ( (n = utf8_mbtowc(uni, rawstring, boundlen)) == -1) { 34 n = utf8_to_utf32(rawstring, boundlen, &u);
35 if (n < 0 || u > MAX_WCHAR_T) {
30 *uni = 0x003f; /* ? */ 36 *uni = 0x003f; /* ? */
31 n = -EINVAL; 37 return -EINVAL;
32 } 38 }
39 *uni = (wchar_t) u;
33 return n; 40 return n;
34} 41}
35 42
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
index ea2605a58b8a..f234f3a4c8ca 100644
--- a/fs/notify/inotify/inotify.h
+++ b/fs/notify/inotify/inotify.h
@@ -15,7 +15,8 @@ struct inotify_inode_mark_entry {
15 int wd; 15 int wd;
16}; 16};
17 17
18extern void inotify_destroy_mark_entry(struct fsnotify_mark_entry *entry, struct fsnotify_group *group); 18extern void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
19 struct fsnotify_group *group);
19extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv); 20extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv);
20 21
21extern const struct fsnotify_ops inotify_fsnotify_ops; 22extern const struct fsnotify_ops inotify_fsnotify_ops;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 7ef75b83247e..47cd258fd24d 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -81,7 +81,7 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev
81 81
82static void inotify_freeing_mark(struct fsnotify_mark_entry *entry, struct fsnotify_group *group) 82static void inotify_freeing_mark(struct fsnotify_mark_entry *entry, struct fsnotify_group *group)
83{ 83{
84 inotify_destroy_mark_entry(entry, group); 84 inotify_ignored_and_remove_idr(entry, group);
85} 85}
86 86
87static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode, __u32 mask) 87static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode, __u32 mask)
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 982a412ac5bc..ff231ad23895 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -363,39 +363,17 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns
363} 363}
364 364
365/* 365/*
366 * When, for whatever reason, inotify is done with a mark (or what used to be a 366 * Send IN_IGNORED for this wd, remove this wd from the idr, and drop the
367 * watch) we need to remove that watch from the idr and we need to send IN_IGNORED 367 * internal reference help on the mark because it is in the idr.
368 * for the given wd.
369 *
370 * There is a bit of recursion here. The loop looks like:
371 * inotify_destroy_mark_entry -> fsnotify_destroy_mark_by_entry ->
372 * inotify_freeing_mark -> inotify_destory_mark_entry -> restart
373 * But the loop is broken in 2 places. fsnotify_destroy_mark_by_entry sets
374 * entry->group = NULL before the call to inotify_freeing_mark, so the if (egroup)
375 * test below will not call back to fsnotify again. But even if that test wasn't
376 * there this would still be safe since fsnotify_destroy_mark_by_entry() is
377 * safe from recursion.
378 */ 368 */
379void inotify_destroy_mark_entry(struct fsnotify_mark_entry *entry, struct fsnotify_group *group) 369void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
370 struct fsnotify_group *group)
380{ 371{
381 struct inotify_inode_mark_entry *ientry; 372 struct inotify_inode_mark_entry *ientry;
382 struct inotify_event_private_data *event_priv; 373 struct inotify_event_private_data *event_priv;
383 struct fsnotify_event_private_data *fsn_event_priv; 374 struct fsnotify_event_private_data *fsn_event_priv;
384 struct fsnotify_group *egroup;
385 struct idr *idr; 375 struct idr *idr;
386 376
387 spin_lock(&entry->lock);
388 egroup = entry->group;
389
390 /* if egroup we aren't really done and something might still send events
391 * for this inode, on the callback we'll send the IN_IGNORED */
392 if (egroup) {
393 spin_unlock(&entry->lock);
394 fsnotify_destroy_mark_by_entry(entry);
395 return;
396 }
397 spin_unlock(&entry->lock);
398
399 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); 377 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
400 378
401 event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL); 379 event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL);
@@ -699,7 +677,7 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
699 fsnotify_get_mark(entry); 677 fsnotify_get_mark(entry);
700 spin_unlock(&group->inotify_data.idr_lock); 678 spin_unlock(&group->inotify_data.idr_lock);
701 679
702 inotify_destroy_mark_entry(entry, group); 680 fsnotify_destroy_mark_by_entry(entry);
703 fsnotify_put_mark(entry); 681 fsnotify_put_mark(entry);
704 682
705out: 683out:
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 82c5085559c6..9938034762cc 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -27,6 +27,7 @@
27#include <linux/pagemap.h> 27#include <linux/pagemap.h>
28#include <linux/quotaops.h> 28#include <linux/quotaops.h>
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include <linux/log2.h>
30 31
31#include "aops.h" 32#include "aops.h"
32#include "attrib.h" 33#include "attrib.h"
@@ -1570,7 +1571,7 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
1570 ntfs_debug("Index collation rule is 0x%x.", 1571 ntfs_debug("Index collation rule is 0x%x.",
1571 le32_to_cpu(ir->collation_rule)); 1572 le32_to_cpu(ir->collation_rule));
1572 ni->itype.index.block_size = le32_to_cpu(ir->index_block_size); 1573 ni->itype.index.block_size = le32_to_cpu(ir->index_block_size);
1573 if (ni->itype.index.block_size & (ni->itype.index.block_size - 1)) { 1574 if (!is_power_of_2(ni->itype.index.block_size)) {
1574 ntfs_error(vi->i_sb, "Index block size (%u) is not a power of " 1575 ntfs_error(vi->i_sb, "Index block size (%u) is not a power of "
1575 "two.", ni->itype.index.block_size); 1576 "two.", ni->itype.index.block_size);
1576 goto unm_err_out; 1577 goto unm_err_out;
diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c
index d7932e95b1fd..89b02985c054 100644
--- a/fs/ntfs/logfile.c
+++ b/fs/ntfs/logfile.c
@@ -26,6 +26,7 @@
26#include <linux/highmem.h> 26#include <linux/highmem.h>
27#include <linux/buffer_head.h> 27#include <linux/buffer_head.h>
28#include <linux/bitops.h> 28#include <linux/bitops.h>
29#include <linux/log2.h>
29 30
30#include "attrib.h" 31#include "attrib.h"
31#include "aops.h" 32#include "aops.h"
@@ -65,7 +66,7 @@ static bool ntfs_check_restart_page_header(struct inode *vi,
65 logfile_log_page_size < NTFS_BLOCK_SIZE || 66 logfile_log_page_size < NTFS_BLOCK_SIZE ||
66 logfile_system_page_size & 67 logfile_system_page_size &
67 (logfile_system_page_size - 1) || 68 (logfile_system_page_size - 1) ||
68 logfile_log_page_size & (logfile_log_page_size - 1)) { 69 !is_power_of_2(logfile_log_page_size)) {
69 ntfs_error(vi->i_sb, "$LogFile uses unsupported page size."); 70 ntfs_error(vi->i_sb, "$LogFile uses unsupported page size.");
70 return false; 71 return false;
71 } 72 }
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 678a067d9251..9edcde4974aa 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -475,6 +475,12 @@ struct ocfs2_path {
475#define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el) 475#define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el)
476#define path_num_items(_path) ((_path)->p_tree_depth + 1) 476#define path_num_items(_path) ((_path)->p_tree_depth + 1)
477 477
478static int ocfs2_find_path(struct inode *inode, struct ocfs2_path *path,
479 u32 cpos);
480static void ocfs2_adjust_rightmost_records(struct inode *inode,
481 handle_t *handle,
482 struct ocfs2_path *path,
483 struct ocfs2_extent_rec *insert_rec);
478/* 484/*
479 * Reset the actual path elements so that we can re-use the structure 485 * Reset the actual path elements so that we can re-use the structure
480 * to build another path. Generally, this involves freeing the buffer 486 * to build another path. Generally, this involves freeing the buffer
@@ -1013,6 +1019,54 @@ static inline u32 ocfs2_sum_rightmost_rec(struct ocfs2_extent_list *el)
1013} 1019}
1014 1020
1015/* 1021/*
1022 * Change range of the branches in the right most path according to the leaf
1023 * extent block's rightmost record.
1024 */
1025static int ocfs2_adjust_rightmost_branch(handle_t *handle,
1026 struct inode *inode,
1027 struct ocfs2_extent_tree *et)
1028{
1029 int status;
1030 struct ocfs2_path *path = NULL;
1031 struct ocfs2_extent_list *el;
1032 struct ocfs2_extent_rec *rec;
1033
1034 path = ocfs2_new_path_from_et(et);
1035 if (!path) {
1036 status = -ENOMEM;
1037 return status;
1038 }
1039
1040 status = ocfs2_find_path(inode, path, UINT_MAX);
1041 if (status < 0) {
1042 mlog_errno(status);
1043 goto out;
1044 }
1045
1046 status = ocfs2_extend_trans(handle, path_num_items(path) +
1047 handle->h_buffer_credits);
1048 if (status < 0) {
1049 mlog_errno(status);
1050 goto out;
1051 }
1052
1053 status = ocfs2_journal_access_path(inode, handle, path);
1054 if (status < 0) {
1055 mlog_errno(status);
1056 goto out;
1057 }
1058
1059 el = path_leaf_el(path);
1060 rec = &el->l_recs[le32_to_cpu(el->l_next_free_rec) - 1];
1061
1062 ocfs2_adjust_rightmost_records(inode, handle, path, rec);
1063
1064out:
1065 ocfs2_free_path(path);
1066 return status;
1067}
1068
1069/*
1016 * Add an entire tree branch to our inode. eb_bh is the extent block 1070 * Add an entire tree branch to our inode. eb_bh is the extent block
1017 * to start at, if we don't want to start the branch at the dinode 1071 * to start at, if we don't want to start the branch at the dinode
1018 * structure. 1072 * structure.
@@ -1038,7 +1092,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
1038 struct ocfs2_extent_block *eb; 1092 struct ocfs2_extent_block *eb;
1039 struct ocfs2_extent_list *eb_el; 1093 struct ocfs2_extent_list *eb_el;
1040 struct ocfs2_extent_list *el; 1094 struct ocfs2_extent_list *el;
1041 u32 new_cpos; 1095 u32 new_cpos, root_end;
1042 1096
1043 mlog_entry_void(); 1097 mlog_entry_void();
1044 1098
@@ -1055,6 +1109,27 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
1055 1109
1056 new_blocks = le16_to_cpu(el->l_tree_depth); 1110 new_blocks = le16_to_cpu(el->l_tree_depth);
1057 1111
1112 eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data;
1113 new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list);
1114 root_end = ocfs2_sum_rightmost_rec(et->et_root_el);
1115
1116 /*
1117 * If there is a gap before the root end and the real end
1118 * of the righmost leaf block, we need to remove the gap
1119 * between new_cpos and root_end first so that the tree
1120 * is consistent after we add a new branch(it will start
1121 * from new_cpos).
1122 */
1123 if (root_end > new_cpos) {
1124 mlog(0, "adjust the cluster end from %u to %u\n",
1125 root_end, new_cpos);
1126 status = ocfs2_adjust_rightmost_branch(handle, inode, et);
1127 if (status) {
1128 mlog_errno(status);
1129 goto bail;
1130 }
1131 }
1132
1058 /* allocate the number of new eb blocks we need */ 1133 /* allocate the number of new eb blocks we need */
1059 new_eb_bhs = kcalloc(new_blocks, sizeof(struct buffer_head *), 1134 new_eb_bhs = kcalloc(new_blocks, sizeof(struct buffer_head *),
1060 GFP_KERNEL); 1135 GFP_KERNEL);
@@ -1071,9 +1146,6 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
1071 goto bail; 1146 goto bail;
1072 } 1147 }
1073 1148
1074 eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data;
1075 new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list);
1076
1077 /* Note: new_eb_bhs[new_blocks - 1] is the guy which will be 1149 /* Note: new_eb_bhs[new_blocks - 1] is the guy which will be
1078 * linked with the rest of the tree. 1150 * linked with the rest of the tree.
1079 * conversly, new_eb_bhs[0] is the new bottommost leaf. 1151 * conversly, new_eb_bhs[0] is the new bottommost leaf.
diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c
index 2a947c44e594..a1163b8b417c 100644
--- a/fs/ocfs2/blockcheck.c
+++ b/fs/ocfs2/blockcheck.c
@@ -22,6 +22,9 @@
22#include <linux/crc32.h> 22#include <linux/crc32.h>
23#include <linux/buffer_head.h> 23#include <linux/buffer_head.h>
24#include <linux/bitops.h> 24#include <linux/bitops.h>
25#include <linux/debugfs.h>
26#include <linux/module.h>
27#include <linux/fs.h>
25#include <asm/byteorder.h> 28#include <asm/byteorder.h>
26 29
27#include <cluster/masklog.h> 30#include <cluster/masklog.h>
@@ -222,6 +225,155 @@ void ocfs2_hamming_fix_block(void *data, unsigned int blocksize,
222 ocfs2_hamming_fix(data, blocksize * 8, 0, fix); 225 ocfs2_hamming_fix(data, blocksize * 8, 0, fix);
223} 226}
224 227
228
229/*
230 * Debugfs handling.
231 */
232
233#ifdef CONFIG_DEBUG_FS
234
235static int blockcheck_u64_get(void *data, u64 *val)
236{
237 *val = *(u64 *)data;
238 return 0;
239}
240DEFINE_SIMPLE_ATTRIBUTE(blockcheck_fops, blockcheck_u64_get, NULL, "%llu\n");
241
242static struct dentry *blockcheck_debugfs_create(const char *name,
243 struct dentry *parent,
244 u64 *value)
245{
246 return debugfs_create_file(name, S_IFREG | S_IRUSR, parent, value,
247 &blockcheck_fops);
248}
249
250static void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats)
251{
252 if (stats) {
253 debugfs_remove(stats->b_debug_check);
254 stats->b_debug_check = NULL;
255 debugfs_remove(stats->b_debug_failure);
256 stats->b_debug_failure = NULL;
257 debugfs_remove(stats->b_debug_recover);
258 stats->b_debug_recover = NULL;
259 debugfs_remove(stats->b_debug_dir);
260 stats->b_debug_dir = NULL;
261 }
262}
263
264static int ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats,
265 struct dentry *parent)
266{
267 int rc = -EINVAL;
268
269 if (!stats)
270 goto out;
271
272 stats->b_debug_dir = debugfs_create_dir("blockcheck", parent);
273 if (!stats->b_debug_dir)
274 goto out;
275
276 stats->b_debug_check =
277 blockcheck_debugfs_create("blocks_checked",
278 stats->b_debug_dir,
279 &stats->b_check_count);
280
281 stats->b_debug_failure =
282 blockcheck_debugfs_create("checksums_failed",
283 stats->b_debug_dir,
284 &stats->b_failure_count);
285
286 stats->b_debug_recover =
287 blockcheck_debugfs_create("ecc_recoveries",
288 stats->b_debug_dir,
289 &stats->b_recover_count);
290 if (stats->b_debug_check && stats->b_debug_failure &&
291 stats->b_debug_recover)
292 rc = 0;
293
294out:
295 if (rc)
296 ocfs2_blockcheck_debug_remove(stats);
297 return rc;
298}
299#else
300static inline int ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats,
301 struct dentry *parent)
302{
303 return 0;
304}
305
306static inline void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats)
307{
308}
309#endif /* CONFIG_DEBUG_FS */
310
311/* Always-called wrappers for starting and stopping the debugfs files */
312int ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats,
313 struct dentry *parent)
314{
315 return ocfs2_blockcheck_debug_install(stats, parent);
316}
317
318void ocfs2_blockcheck_stats_debugfs_remove(struct ocfs2_blockcheck_stats *stats)
319{
320 ocfs2_blockcheck_debug_remove(stats);
321}
322
323static void ocfs2_blockcheck_inc_check(struct ocfs2_blockcheck_stats *stats)
324{
325 u64 new_count;
326
327 if (!stats)
328 return;
329
330 spin_lock(&stats->b_lock);
331 stats->b_check_count++;
332 new_count = stats->b_check_count;
333 spin_unlock(&stats->b_lock);
334
335 if (!new_count)
336 mlog(ML_NOTICE, "Block check count has wrapped\n");
337}
338
339static void ocfs2_blockcheck_inc_failure(struct ocfs2_blockcheck_stats *stats)
340{
341 u64 new_count;
342
343 if (!stats)
344 return;
345
346 spin_lock(&stats->b_lock);
347 stats->b_failure_count++;
348 new_count = stats->b_failure_count;
349 spin_unlock(&stats->b_lock);
350
351 if (!new_count)
352 mlog(ML_NOTICE, "Checksum failure count has wrapped\n");
353}
354
355static void ocfs2_blockcheck_inc_recover(struct ocfs2_blockcheck_stats *stats)
356{
357 u64 new_count;
358
359 if (!stats)
360 return;
361
362 spin_lock(&stats->b_lock);
363 stats->b_recover_count++;
364 new_count = stats->b_recover_count;
365 spin_unlock(&stats->b_lock);
366
367 if (!new_count)
368 mlog(ML_NOTICE, "ECC recovery count has wrapped\n");
369}
370
371
372
373/*
374 * These are the low-level APIs for using the ocfs2_block_check structure.
375 */
376
225/* 377/*
226 * This function generates check information for a block. 378 * This function generates check information for a block.
227 * data is the block to be checked. bc is a pointer to the 379 * data is the block to be checked. bc is a pointer to the
@@ -266,12 +418,15 @@ void ocfs2_block_check_compute(void *data, size_t blocksize,
266 * Again, the data passed in should be the on-disk endian. 418 * Again, the data passed in should be the on-disk endian.
267 */ 419 */
268int ocfs2_block_check_validate(void *data, size_t blocksize, 420int ocfs2_block_check_validate(void *data, size_t blocksize,
269 struct ocfs2_block_check *bc) 421 struct ocfs2_block_check *bc,
422 struct ocfs2_blockcheck_stats *stats)
270{ 423{
271 int rc = 0; 424 int rc = 0;
272 struct ocfs2_block_check check; 425 struct ocfs2_block_check check;
273 u32 crc, ecc; 426 u32 crc, ecc;
274 427
428 ocfs2_blockcheck_inc_check(stats);
429
275 check.bc_crc32e = le32_to_cpu(bc->bc_crc32e); 430 check.bc_crc32e = le32_to_cpu(bc->bc_crc32e);
276 check.bc_ecc = le16_to_cpu(bc->bc_ecc); 431 check.bc_ecc = le16_to_cpu(bc->bc_ecc);
277 432
@@ -282,6 +437,7 @@ int ocfs2_block_check_validate(void *data, size_t blocksize,
282 if (crc == check.bc_crc32e) 437 if (crc == check.bc_crc32e)
283 goto out; 438 goto out;
284 439
440 ocfs2_blockcheck_inc_failure(stats);
285 mlog(ML_ERROR, 441 mlog(ML_ERROR,
286 "CRC32 failed: stored: %u, computed %u. Applying ECC.\n", 442 "CRC32 failed: stored: %u, computed %u. Applying ECC.\n",
287 (unsigned int)check.bc_crc32e, (unsigned int)crc); 443 (unsigned int)check.bc_crc32e, (unsigned int)crc);
@@ -292,8 +448,10 @@ int ocfs2_block_check_validate(void *data, size_t blocksize,
292 448
293 /* And check the crc32 again */ 449 /* And check the crc32 again */
294 crc = crc32_le(~0, data, blocksize); 450 crc = crc32_le(~0, data, blocksize);
295 if (crc == check.bc_crc32e) 451 if (crc == check.bc_crc32e) {
452 ocfs2_blockcheck_inc_recover(stats);
296 goto out; 453 goto out;
454 }
297 455
298 mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n", 456 mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n",
299 (unsigned int)check.bc_crc32e, (unsigned int)crc); 457 (unsigned int)check.bc_crc32e, (unsigned int)crc);
@@ -366,7 +524,8 @@ void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr,
366 * Again, the data passed in should be the on-disk endian. 524 * Again, the data passed in should be the on-disk endian.
367 */ 525 */
368int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, 526int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
369 struct ocfs2_block_check *bc) 527 struct ocfs2_block_check *bc,
528 struct ocfs2_blockcheck_stats *stats)
370{ 529{
371 int i, rc = 0; 530 int i, rc = 0;
372 struct ocfs2_block_check check; 531 struct ocfs2_block_check check;
@@ -377,6 +536,8 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
377 if (!nr) 536 if (!nr)
378 return 0; 537 return 0;
379 538
539 ocfs2_blockcheck_inc_check(stats);
540
380 check.bc_crc32e = le32_to_cpu(bc->bc_crc32e); 541 check.bc_crc32e = le32_to_cpu(bc->bc_crc32e);
381 check.bc_ecc = le16_to_cpu(bc->bc_ecc); 542 check.bc_ecc = le16_to_cpu(bc->bc_ecc);
382 543
@@ -388,6 +549,7 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
388 if (crc == check.bc_crc32e) 549 if (crc == check.bc_crc32e)
389 goto out; 550 goto out;
390 551
552 ocfs2_blockcheck_inc_failure(stats);
391 mlog(ML_ERROR, 553 mlog(ML_ERROR,
392 "CRC32 failed: stored: %u, computed %u. Applying ECC.\n", 554 "CRC32 failed: stored: %u, computed %u. Applying ECC.\n",
393 (unsigned int)check.bc_crc32e, (unsigned int)crc); 555 (unsigned int)check.bc_crc32e, (unsigned int)crc);
@@ -416,8 +578,10 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
416 /* And check the crc32 again */ 578 /* And check the crc32 again */
417 for (i = 0, crc = ~0; i < nr; i++) 579 for (i = 0, crc = ~0; i < nr; i++)
418 crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size); 580 crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size);
419 if (crc == check.bc_crc32e) 581 if (crc == check.bc_crc32e) {
582 ocfs2_blockcheck_inc_recover(stats);
420 goto out; 583 goto out;
584 }
421 585
422 mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n", 586 mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n",
423 (unsigned int)check.bc_crc32e, (unsigned int)crc); 587 (unsigned int)check.bc_crc32e, (unsigned int)crc);
@@ -448,9 +612,11 @@ int ocfs2_validate_meta_ecc(struct super_block *sb, void *data,
448 struct ocfs2_block_check *bc) 612 struct ocfs2_block_check *bc)
449{ 613{
450 int rc = 0; 614 int rc = 0;
615 struct ocfs2_super *osb = OCFS2_SB(sb);
451 616
452 if (ocfs2_meta_ecc(OCFS2_SB(sb))) 617 if (ocfs2_meta_ecc(osb))
453 rc = ocfs2_block_check_validate(data, sb->s_blocksize, bc); 618 rc = ocfs2_block_check_validate(data, sb->s_blocksize, bc,
619 &osb->osb_ecc_stats);
454 620
455 return rc; 621 return rc;
456} 622}
@@ -468,9 +634,11 @@ int ocfs2_validate_meta_ecc_bhs(struct super_block *sb,
468 struct ocfs2_block_check *bc) 634 struct ocfs2_block_check *bc)
469{ 635{
470 int rc = 0; 636 int rc = 0;
637 struct ocfs2_super *osb = OCFS2_SB(sb);
471 638
472 if (ocfs2_meta_ecc(OCFS2_SB(sb))) 639 if (ocfs2_meta_ecc(osb))
473 rc = ocfs2_block_check_validate_bhs(bhs, nr, bc); 640 rc = ocfs2_block_check_validate_bhs(bhs, nr, bc,
641 &osb->osb_ecc_stats);
474 642
475 return rc; 643 return rc;
476} 644}
diff --git a/fs/ocfs2/blockcheck.h b/fs/ocfs2/blockcheck.h
index 70ec3feda32f..d4b69febf70a 100644
--- a/fs/ocfs2/blockcheck.h
+++ b/fs/ocfs2/blockcheck.h
@@ -21,6 +21,24 @@
21#define OCFS2_BLOCKCHECK_H 21#define OCFS2_BLOCKCHECK_H
22 22
23 23
24/* Count errors and error correction from blockcheck.c */
25struct ocfs2_blockcheck_stats {
26 spinlock_t b_lock;
27 u64 b_check_count; /* Number of blocks we've checked */
28 u64 b_failure_count; /* Number of failed checksums */
29 u64 b_recover_count; /* Number of blocks fixed by ecc */
30
31 /*
32 * debugfs entries, used if this is passed to
33 * ocfs2_blockcheck_stats_debugfs_install()
34 */
35 struct dentry *b_debug_dir; /* Parent of the debugfs files */
36 struct dentry *b_debug_check; /* Exposes b_check_count */
37 struct dentry *b_debug_failure; /* Exposes b_failure_count */
38 struct dentry *b_debug_recover; /* Exposes b_recover_count */
39};
40
41
24/* High level block API */ 42/* High level block API */
25void ocfs2_compute_meta_ecc(struct super_block *sb, void *data, 43void ocfs2_compute_meta_ecc(struct super_block *sb, void *data,
26 struct ocfs2_block_check *bc); 44 struct ocfs2_block_check *bc);
@@ -37,11 +55,18 @@ int ocfs2_validate_meta_ecc_bhs(struct super_block *sb,
37void ocfs2_block_check_compute(void *data, size_t blocksize, 55void ocfs2_block_check_compute(void *data, size_t blocksize,
38 struct ocfs2_block_check *bc); 56 struct ocfs2_block_check *bc);
39int ocfs2_block_check_validate(void *data, size_t blocksize, 57int ocfs2_block_check_validate(void *data, size_t blocksize,
40 struct ocfs2_block_check *bc); 58 struct ocfs2_block_check *bc,
59 struct ocfs2_blockcheck_stats *stats);
41void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr, 60void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr,
42 struct ocfs2_block_check *bc); 61 struct ocfs2_block_check *bc);
43int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, 62int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
44 struct ocfs2_block_check *bc); 63 struct ocfs2_block_check *bc,
64 struct ocfs2_blockcheck_stats *stats);
65
66/* Debug Initialization */
67int ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats,
68 struct dentry *parent);
69void ocfs2_blockcheck_stats_debugfs_remove(struct ocfs2_blockcheck_stats *stats);
45 70
46/* 71/*
47 * Hamming code functions 72 * Hamming code functions
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 7e72a81bc2d4..696c32e50716 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -48,34 +48,33 @@
48 * only emit the appropriage printk() when the caller passes in a constant 48 * only emit the appropriage printk() when the caller passes in a constant
49 * mask, as is almost always the case. 49 * mask, as is almost always the case.
50 * 50 *
51 * All this bitmask nonsense is hidden from the /proc interface so that Joel 51 * All this bitmask nonsense is managed from the files under
52 * doesn't have an aneurism. Reading the file gives a straight forward 52 * /sys/fs/o2cb/logmask/. Reading the files gives a straightforward
53 * indication of which bits are on or off: 53 * indication of which bits are allowed (allow) or denied (off/deny).
54 * ENTRY off 54 * ENTRY deny
55 * EXIT off 55 * EXIT deny
56 * TCP off 56 * TCP off
57 * MSG off 57 * MSG off
58 * SOCKET off 58 * SOCKET off
59 * ERROR off 59 * ERROR allow
60 * NOTICE on 60 * NOTICE allow
61 * 61 *
62 * Writing changes the state of a given bit and requires a strictly formatted 62 * Writing changes the state of a given bit and requires a strictly formatted
63 * single write() call: 63 * single write() call:
64 * 64 *
65 * write(fd, "ENTRY on", 8); 65 * write(fd, "allow", 5);
66 * 66 *
67 * would turn the entry bit on. "1" is also accepted in the place of "on", and 67 * Echoing allow/deny/off string into the logmask files can flip the bits
68 * "off" and "0" behave as expected. 68 * on or off as expected; here is the bash script for example:
69 * 69 *
70 * Some trivial shell can flip all the bits on or off: 70 * log_mask="/sys/fs/o2cb/log_mask"
71 * for node in ENTRY EXIT TCP MSG SOCKET ERROR NOTICE; do
72 * echo allow >"$log_mask"/"$node"
73 * done
71 * 74 *
72 * log_mask="/proc/fs/ocfs2_nodemanager/log_mask" 75 * The debugfs.ocfs2 tool can also flip the bits with the -l option:
73 * cat $log_mask | ( 76 *
74 * while read bit status; do 77 * debugfs.ocfs2 -l TCP allow
75 * # $1 is "on" or "off", say
76 * echo "$bit $1" > $log_mask
77 * done
78 * )
79 */ 78 */
80 79
81/* for task_struct */ 80/* for task_struct */
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 9fbe849f6344..334f231a422c 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -974,7 +974,7 @@ static int o2net_tx_can_proceed(struct o2net_node *nn,
974int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, 974int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
975 size_t caller_veclen, u8 target_node, int *status) 975 size_t caller_veclen, u8 target_node, int *status)
976{ 976{
977 int ret, error = 0; 977 int ret;
978 struct o2net_msg *msg = NULL; 978 struct o2net_msg *msg = NULL;
979 size_t veclen, caller_bytes = 0; 979 size_t veclen, caller_bytes = 0;
980 struct kvec *vec = NULL; 980 struct kvec *vec = NULL;
@@ -1015,10 +1015,7 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
1015 1015
1016 o2net_set_nst_sock_time(&nst); 1016 o2net_set_nst_sock_time(&nst);
1017 1017
1018 ret = wait_event_interruptible(nn->nn_sc_wq, 1018 wait_event(nn->nn_sc_wq, o2net_tx_can_proceed(nn, &sc, &ret));
1019 o2net_tx_can_proceed(nn, &sc, &error));
1020 if (!ret && error)
1021 ret = error;
1022 if (ret) 1019 if (ret)
1023 goto out; 1020 goto out;
1024 1021
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index c5752305627c..b358f3bf896d 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2900,6 +2900,8 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
2900 alloc = ocfs2_clusters_for_bytes(sb, bytes); 2900 alloc = ocfs2_clusters_for_bytes(sb, bytes);
2901 dx_alloc = 0; 2901 dx_alloc = 0;
2902 2902
2903 down_write(&oi->ip_alloc_sem);
2904
2903 if (ocfs2_supports_indexed_dirs(osb)) { 2905 if (ocfs2_supports_indexed_dirs(osb)) {
2904 credits += ocfs2_add_dir_index_credits(sb); 2906 credits += ocfs2_add_dir_index_credits(sb);
2905 2907
@@ -2940,8 +2942,6 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
2940 goto out; 2942 goto out;
2941 } 2943 }
2942 2944
2943 down_write(&oi->ip_alloc_sem);
2944
2945 /* 2945 /*
2946 * Prepare for worst case allocation scenario of two separate 2946 * Prepare for worst case allocation scenario of two separate
2947 * extents in the unindexed tree. 2947 * extents in the unindexed tree.
@@ -2953,7 +2953,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
2953 if (IS_ERR(handle)) { 2953 if (IS_ERR(handle)) {
2954 ret = PTR_ERR(handle); 2954 ret = PTR_ERR(handle);
2955 mlog_errno(ret); 2955 mlog_errno(ret);
2956 goto out_sem; 2956 goto out;
2957 } 2957 }
2958 2958
2959 if (vfs_dq_alloc_space_nodirty(dir, 2959 if (vfs_dq_alloc_space_nodirty(dir,
@@ -3172,10 +3172,8 @@ out_commit:
3172 3172
3173 ocfs2_commit_trans(osb, handle); 3173 ocfs2_commit_trans(osb, handle);
3174 3174
3175out_sem:
3176 up_write(&oi->ip_alloc_sem);
3177
3178out: 3175out:
3176 up_write(&oi->ip_alloc_sem);
3179 if (data_ac) 3177 if (data_ac)
3180 ocfs2_free_alloc_context(data_ac); 3178 ocfs2_free_alloc_context(data_ac);
3181 if (meta_ac) 3179 if (meta_ac)
@@ -3322,11 +3320,15 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
3322 brelse(new_bh); 3320 brelse(new_bh);
3323 new_bh = NULL; 3321 new_bh = NULL;
3324 3322
3323 down_write(&OCFS2_I(dir)->ip_alloc_sem);
3324 drop_alloc_sem = 1;
3325 dir_i_size = i_size_read(dir); 3325 dir_i_size = i_size_read(dir);
3326 credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS; 3326 credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;
3327 goto do_extend; 3327 goto do_extend;
3328 } 3328 }
3329 3329
3330 down_write(&OCFS2_I(dir)->ip_alloc_sem);
3331 drop_alloc_sem = 1;
3330 dir_i_size = i_size_read(dir); 3332 dir_i_size = i_size_read(dir);
3331 mlog(0, "extending dir %llu (i_size = %lld)\n", 3333 mlog(0, "extending dir %llu (i_size = %lld)\n",
3332 (unsigned long long)OCFS2_I(dir)->ip_blkno, dir_i_size); 3334 (unsigned long long)OCFS2_I(dir)->ip_blkno, dir_i_size);
@@ -3370,9 +3372,6 @@ do_extend:
3370 credits++; /* For attaching the new dirent block to the 3372 credits++; /* For attaching the new dirent block to the
3371 * dx_root */ 3373 * dx_root */
3372 3374
3373 down_write(&OCFS2_I(dir)->ip_alloc_sem);
3374 drop_alloc_sem = 1;
3375
3376 handle = ocfs2_start_trans(osb, credits); 3375 handle = ocfs2_start_trans(osb, credits);
3377 if (IS_ERR(handle)) { 3376 if (IS_ERR(handle)) {
3378 status = PTR_ERR(handle); 3377 status = PTR_ERR(handle);
@@ -3435,10 +3434,10 @@ bail_bh:
3435 *new_de_bh = new_bh; 3434 *new_de_bh = new_bh;
3436 get_bh(*new_de_bh); 3435 get_bh(*new_de_bh);
3437bail: 3436bail:
3438 if (drop_alloc_sem)
3439 up_write(&OCFS2_I(dir)->ip_alloc_sem);
3440 if (handle) 3437 if (handle)
3441 ocfs2_commit_trans(osb, handle); 3438 ocfs2_commit_trans(osb, handle);
3439 if (drop_alloc_sem)
3440 up_write(&OCFS2_I(dir)->ip_alloc_sem);
3442 3441
3443 if (data_ac) 3442 if (data_ac)
3444 ocfs2_free_alloc_context(data_ac); 3443 ocfs2_free_alloc_context(data_ac);
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index e15fc7d50827..110bb57c46ab 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -92,6 +92,9 @@ struct ocfs2_unblock_ctl {
92 enum ocfs2_unblock_action unblock_action; 92 enum ocfs2_unblock_action unblock_action;
93}; 93};
94 94
95/* Lockdep class keys */
96struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES];
97
95static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, 98static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
96 int new_level); 99 int new_level);
97static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres); 100static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres);
@@ -248,6 +251,10 @@ static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {
248 .flags = 0, 251 .flags = 0,
249}; 252};
250 253
254static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = {
255 .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
256};
257
251static struct ocfs2_lock_res_ops ocfs2_dentry_lops = { 258static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
252 .get_osb = ocfs2_get_dentry_osb, 259 .get_osb = ocfs2_get_dentry_osb,
253 .post_unlock = ocfs2_dentry_post_unlock, 260 .post_unlock = ocfs2_dentry_post_unlock,
@@ -313,9 +320,16 @@ static int ocfs2_lock_create(struct ocfs2_super *osb,
313 u32 dlm_flags); 320 u32 dlm_flags);
314static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, 321static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
315 int wanted); 322 int wanted);
316static void ocfs2_cluster_unlock(struct ocfs2_super *osb, 323static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
317 struct ocfs2_lock_res *lockres, 324 struct ocfs2_lock_res *lockres,
318 int level); 325 int level, unsigned long caller_ip);
326static inline void ocfs2_cluster_unlock(struct ocfs2_super *osb,
327 struct ocfs2_lock_res *lockres,
328 int level)
329{
330 __ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_);
331}
332
319static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres); 333static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres);
320static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres); 334static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres);
321static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres); 335static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres);
@@ -485,6 +499,13 @@ static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
485 ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug); 499 ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug);
486 500
487 ocfs2_init_lock_stats(res); 501 ocfs2_init_lock_stats(res);
502#ifdef CONFIG_DEBUG_LOCK_ALLOC
503 if (type != OCFS2_LOCK_TYPE_OPEN)
504 lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type],
505 &lockdep_keys[type], 0);
506 else
507 res->l_lockdep_map.key = NULL;
508#endif
488} 509}
489 510
490void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) 511void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
@@ -637,6 +658,15 @@ static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,
637 &ocfs2_nfs_sync_lops, osb); 658 &ocfs2_nfs_sync_lops, osb);
638} 659}
639 660
661static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res,
662 struct ocfs2_super *osb)
663{
664 ocfs2_lock_res_init_once(res);
665 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name);
666 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN,
667 &ocfs2_orphan_scan_lops, osb);
668}
669
640void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, 670void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
641 struct ocfs2_file_private *fp) 671 struct ocfs2_file_private *fp)
642{ 672{
@@ -1239,11 +1269,13 @@ static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw,
1239 return ret; 1269 return ret;
1240} 1270}
1241 1271
1242static int ocfs2_cluster_lock(struct ocfs2_super *osb, 1272static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
1243 struct ocfs2_lock_res *lockres, 1273 struct ocfs2_lock_res *lockres,
1244 int level, 1274 int level,
1245 u32 lkm_flags, 1275 u32 lkm_flags,
1246 int arg_flags) 1276 int arg_flags,
1277 int l_subclass,
1278 unsigned long caller_ip)
1247{ 1279{
1248 struct ocfs2_mask_waiter mw; 1280 struct ocfs2_mask_waiter mw;
1249 int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR); 1281 int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR);
@@ -1386,13 +1418,37 @@ out:
1386 } 1418 }
1387 ocfs2_update_lock_stats(lockres, level, &mw, ret); 1419 ocfs2_update_lock_stats(lockres, level, &mw, ret);
1388 1420
1421#ifdef CONFIG_DEBUG_LOCK_ALLOC
1422 if (!ret && lockres->l_lockdep_map.key != NULL) {
1423 if (level == DLM_LOCK_PR)
1424 rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass,
1425 !!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
1426 caller_ip);
1427 else
1428 rwsem_acquire(&lockres->l_lockdep_map, l_subclass,
1429 !!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
1430 caller_ip);
1431 }
1432#endif
1389 mlog_exit(ret); 1433 mlog_exit(ret);
1390 return ret; 1434 return ret;
1391} 1435}
1392 1436
1393static void ocfs2_cluster_unlock(struct ocfs2_super *osb, 1437static inline int ocfs2_cluster_lock(struct ocfs2_super *osb,
1394 struct ocfs2_lock_res *lockres, 1438 struct ocfs2_lock_res *lockres,
1395 int level) 1439 int level,
1440 u32 lkm_flags,
1441 int arg_flags)
1442{
1443 return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags,
1444 0, _RET_IP_);
1445}
1446
1447
1448static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
1449 struct ocfs2_lock_res *lockres,
1450 int level,
1451 unsigned long caller_ip)
1396{ 1452{
1397 unsigned long flags; 1453 unsigned long flags;
1398 1454
@@ -1401,6 +1457,10 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
1401 ocfs2_dec_holders(lockres, level); 1457 ocfs2_dec_holders(lockres, level);
1402 ocfs2_downconvert_on_unlock(osb, lockres); 1458 ocfs2_downconvert_on_unlock(osb, lockres);
1403 spin_unlock_irqrestore(&lockres->l_lock, flags); 1459 spin_unlock_irqrestore(&lockres->l_lock, flags);
1460#ifdef CONFIG_DEBUG_LOCK_ALLOC
1461 if (lockres->l_lockdep_map.key != NULL)
1462 rwsem_release(&lockres->l_lockdep_map, 1, caller_ip);
1463#endif
1404 mlog_exit_void(); 1464 mlog_exit_void();
1405} 1465}
1406 1466
@@ -1972,7 +2032,8 @@ static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
1972{ 2032{
1973 struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2033 struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
1974 2034
1975 if (lvb->lvb_version == OCFS2_LVB_VERSION 2035 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb)
2036 && lvb->lvb_version == OCFS2_LVB_VERSION
1976 && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) 2037 && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)
1977 return 1; 2038 return 1;
1978 return 0; 2039 return 0;
@@ -2145,10 +2206,11 @@ static int ocfs2_assign_bh(struct inode *inode,
2145 * returns < 0 error if the callback will never be called, otherwise 2206 * returns < 0 error if the callback will never be called, otherwise
2146 * the result of the lock will be communicated via the callback. 2207 * the result of the lock will be communicated via the callback.
2147 */ 2208 */
2148int ocfs2_inode_lock_full(struct inode *inode, 2209int ocfs2_inode_lock_full_nested(struct inode *inode,
2149 struct buffer_head **ret_bh, 2210 struct buffer_head **ret_bh,
2150 int ex, 2211 int ex,
2151 int arg_flags) 2212 int arg_flags,
2213 int subclass)
2152{ 2214{
2153 int status, level, acquired; 2215 int status, level, acquired;
2154 u32 dlm_flags; 2216 u32 dlm_flags;
@@ -2186,7 +2248,8 @@ int ocfs2_inode_lock_full(struct inode *inode,
2186 if (arg_flags & OCFS2_META_LOCK_NOQUEUE) 2248 if (arg_flags & OCFS2_META_LOCK_NOQUEUE)
2187 dlm_flags |= DLM_LKF_NOQUEUE; 2249 dlm_flags |= DLM_LKF_NOQUEUE;
2188 2250
2189 status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags); 2251 status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags,
2252 arg_flags, subclass, _RET_IP_);
2190 if (status < 0) { 2253 if (status < 0) {
2191 if (status != -EAGAIN && status != -EIOCBRETRY) 2254 if (status != -EAGAIN && status != -EIOCBRETRY)
2192 mlog_errno(status); 2255 mlog_errno(status);
@@ -2352,6 +2415,47 @@ void ocfs2_inode_unlock(struct inode *inode,
2352 mlog_exit_void(); 2415 mlog_exit_void();
2353} 2416}
2354 2417
2418int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno)
2419{
2420 struct ocfs2_lock_res *lockres;
2421 struct ocfs2_orphan_scan_lvb *lvb;
2422 int status = 0;
2423
2424 if (ocfs2_is_hard_readonly(osb))
2425 return -EROFS;
2426
2427 if (ocfs2_mount_local(osb))
2428 return 0;
2429
2430 lockres = &osb->osb_orphan_scan.os_lockres;
2431 status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
2432 if (status < 0)
2433 return status;
2434
2435 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2436 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
2437 lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION)
2438 *seqno = be32_to_cpu(lvb->lvb_os_seqno);
2439 else
2440 *seqno = osb->osb_orphan_scan.os_seqno + 1;
2441
2442 return status;
2443}
2444
2445void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno)
2446{
2447 struct ocfs2_lock_res *lockres;
2448 struct ocfs2_orphan_scan_lvb *lvb;
2449
2450 if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) {
2451 lockres = &osb->osb_orphan_scan.os_lockres;
2452 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2453 lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
2454 lvb->lvb_os_seqno = cpu_to_be32(seqno);
2455 ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
2456 }
2457}
2458
2355int ocfs2_super_lock(struct ocfs2_super *osb, 2459int ocfs2_super_lock(struct ocfs2_super *osb,
2356 int ex) 2460 int ex)
2357{ 2461{
@@ -2842,6 +2946,7 @@ local:
2842 ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); 2946 ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
2843 ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); 2947 ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
2844 ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb); 2948 ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
2949 ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb);
2845 2950
2846 osb->cconn = conn; 2951 osb->cconn = conn;
2847 2952
@@ -2878,6 +2983,7 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb,
2878 ocfs2_lock_res_free(&osb->osb_super_lockres); 2983 ocfs2_lock_res_free(&osb->osb_super_lockres);
2879 ocfs2_lock_res_free(&osb->osb_rename_lockres); 2984 ocfs2_lock_res_free(&osb->osb_rename_lockres);
2880 ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres); 2985 ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres);
2986 ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres);
2881 2987
2882 ocfs2_cluster_disconnect(osb->cconn, hangup_pending); 2988 ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
2883 osb->cconn = NULL; 2989 osb->cconn = NULL;
@@ -3061,6 +3167,7 @@ static void ocfs2_drop_osb_locks(struct ocfs2_super *osb)
3061 ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres); 3167 ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres);
3062 ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres); 3168 ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres);
3063 ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres); 3169 ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres);
3170 ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres);
3064} 3171}
3065 3172
3066int ocfs2_drop_inode_locks(struct inode *inode) 3173int ocfs2_drop_inode_locks(struct inode *inode)
@@ -3576,7 +3683,8 @@ static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
3576 struct ocfs2_global_disk_dqinfo *gdinfo; 3683 struct ocfs2_global_disk_dqinfo *gdinfo;
3577 int status = 0; 3684 int status = 0;
3578 3685
3579 if (lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) { 3686 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
3687 lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {
3580 info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace); 3688 info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace);
3581 info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace); 3689 info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace);
3582 oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms); 3690 oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms);
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index e1fd5721cd7f..7553836931de 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -62,6 +62,14 @@ struct ocfs2_qinfo_lvb {
62 __be32 lvb_free_entry; 62 __be32 lvb_free_entry;
63}; 63};
64 64
65#define OCFS2_ORPHAN_LVB_VERSION 1
66
67struct ocfs2_orphan_scan_lvb {
68 __u8 lvb_version;
69 __u8 lvb_reserved[3];
70 __be32 lvb_os_seqno;
71};
72
65/* ocfs2_inode_lock_full() 'arg_flags' flags */ 73/* ocfs2_inode_lock_full() 'arg_flags' flags */
66/* don't wait on recovery. */ 74/* don't wait on recovery. */
67#define OCFS2_META_LOCK_RECOVERY (0x01) 75#define OCFS2_META_LOCK_RECOVERY (0x01)
@@ -70,6 +78,14 @@ struct ocfs2_qinfo_lvb {
70/* don't block waiting for the downconvert thread, instead return -EAGAIN */ 78/* don't block waiting for the downconvert thread, instead return -EAGAIN */
71#define OCFS2_LOCK_NONBLOCK (0x04) 79#define OCFS2_LOCK_NONBLOCK (0x04)
72 80
81/* Locking subclasses of inode cluster lock */
82enum {
83 OI_LS_NORMAL = 0,
84 OI_LS_PARENT,
85 OI_LS_RENAME1,
86 OI_LS_RENAME2,
87};
88
73int ocfs2_dlm_init(struct ocfs2_super *osb); 89int ocfs2_dlm_init(struct ocfs2_super *osb);
74void ocfs2_dlm_shutdown(struct ocfs2_super *osb, int hangup_pending); 90void ocfs2_dlm_shutdown(struct ocfs2_super *osb, int hangup_pending);
75void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res); 91void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res);
@@ -96,23 +112,32 @@ void ocfs2_open_unlock(struct inode *inode);
96int ocfs2_inode_lock_atime(struct inode *inode, 112int ocfs2_inode_lock_atime(struct inode *inode,
97 struct vfsmount *vfsmnt, 113 struct vfsmount *vfsmnt,
98 int *level); 114 int *level);
99int ocfs2_inode_lock_full(struct inode *inode, 115int ocfs2_inode_lock_full_nested(struct inode *inode,
100 struct buffer_head **ret_bh, 116 struct buffer_head **ret_bh,
101 int ex, 117 int ex,
102 int arg_flags); 118 int arg_flags,
119 int subclass);
103int ocfs2_inode_lock_with_page(struct inode *inode, 120int ocfs2_inode_lock_with_page(struct inode *inode,
104 struct buffer_head **ret_bh, 121 struct buffer_head **ret_bh,
105 int ex, 122 int ex,
106 struct page *page); 123 struct page *page);
124/* Variants without special locking class or flags */
125#define ocfs2_inode_lock_full(i, r, e, f)\
126 ocfs2_inode_lock_full_nested(i, r, e, f, OI_LS_NORMAL)
127#define ocfs2_inode_lock_nested(i, b, e, s)\
128 ocfs2_inode_lock_full_nested(i, b, e, 0, s)
107/* 99% of the time we don't want to supply any additional flags -- 129/* 99% of the time we don't want to supply any additional flags --
108 * those are for very specific cases only. */ 130 * those are for very specific cases only. */
109#define ocfs2_inode_lock(i, b, e) ocfs2_inode_lock_full(i, b, e, 0) 131#define ocfs2_inode_lock(i, b, e) ocfs2_inode_lock_full_nested(i, b, e, 0, OI_LS_NORMAL)
110void ocfs2_inode_unlock(struct inode *inode, 132void ocfs2_inode_unlock(struct inode *inode,
111 int ex); 133 int ex);
112int ocfs2_super_lock(struct ocfs2_super *osb, 134int ocfs2_super_lock(struct ocfs2_super *osb,
113 int ex); 135 int ex);
114void ocfs2_super_unlock(struct ocfs2_super *osb, 136void ocfs2_super_unlock(struct ocfs2_super *osb,
115 int ex); 137 int ex);
138int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno);
139void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno);
140
116int ocfs2_rename_lock(struct ocfs2_super *osb); 141int ocfs2_rename_lock(struct ocfs2_super *osb);
117void ocfs2_rename_unlock(struct ocfs2_super *osb); 142void ocfs2_rename_unlock(struct ocfs2_super *osb);
118int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex); 143int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index c2a87c885b73..62442e413a00 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -187,6 +187,9 @@ static int ocfs2_sync_file(struct file *file,
187 if (err) 187 if (err)
188 goto bail; 188 goto bail;
189 189
190 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
191 goto bail;
192
190 journal = osb->journal->j_journal; 193 journal = osb->journal->j_journal;
191 err = jbd2_journal_force_commit(journal); 194 err = jbd2_journal_force_commit(journal);
192 195
@@ -894,9 +897,9 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
894 struct ocfs2_super *osb = OCFS2_SB(sb); 897 struct ocfs2_super *osb = OCFS2_SB(sb);
895 struct buffer_head *bh = NULL; 898 struct buffer_head *bh = NULL;
896 handle_t *handle = NULL; 899 handle_t *handle = NULL;
897 int locked[MAXQUOTAS] = {0, 0}; 900 int qtype;
898 int credits, qtype; 901 struct dquot *transfer_from[MAXQUOTAS] = { };
899 struct ocfs2_mem_dqinfo *oinfo; 902 struct dquot *transfer_to[MAXQUOTAS] = { };
900 903
901 mlog_entry("(0x%p, '%.*s')\n", dentry, 904 mlog_entry("(0x%p, '%.*s')\n", dentry,
902 dentry->d_name.len, dentry->d_name.name); 905 dentry->d_name.len, dentry->d_name.name);
@@ -969,30 +972,37 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
969 972
970 if ((attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || 973 if ((attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
971 (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { 974 (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
972 credits = OCFS2_INODE_UPDATE_CREDITS; 975 /*
976 * Gather pointers to quota structures so that allocation /
977 * freeing of quota structures happens here and not inside
978 * vfs_dq_transfer() where we have problems with lock ordering
979 */
973 if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid 980 if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid
974 && OCFS2_HAS_RO_COMPAT_FEATURE(sb, 981 && OCFS2_HAS_RO_COMPAT_FEATURE(sb,
975 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { 982 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
976 oinfo = sb_dqinfo(sb, USRQUOTA)->dqi_priv; 983 transfer_to[USRQUOTA] = dqget(sb, attr->ia_uid,
977 status = ocfs2_lock_global_qf(oinfo, 1); 984 USRQUOTA);
978 if (status < 0) 985 transfer_from[USRQUOTA] = dqget(sb, inode->i_uid,
986 USRQUOTA);
987 if (!transfer_to[USRQUOTA] || !transfer_from[USRQUOTA]) {
988 status = -ESRCH;
979 goto bail_unlock; 989 goto bail_unlock;
980 credits += ocfs2_calc_qinit_credits(sb, USRQUOTA) + 990 }
981 ocfs2_calc_qdel_credits(sb, USRQUOTA);
982 locked[USRQUOTA] = 1;
983 } 991 }
984 if (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid 992 if (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid
985 && OCFS2_HAS_RO_COMPAT_FEATURE(sb, 993 && OCFS2_HAS_RO_COMPAT_FEATURE(sb,
986 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { 994 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
987 oinfo = sb_dqinfo(sb, GRPQUOTA)->dqi_priv; 995 transfer_to[GRPQUOTA] = dqget(sb, attr->ia_gid,
988 status = ocfs2_lock_global_qf(oinfo, 1); 996 GRPQUOTA);
989 if (status < 0) 997 transfer_from[GRPQUOTA] = dqget(sb, inode->i_gid,
998 GRPQUOTA);
999 if (!transfer_to[GRPQUOTA] || !transfer_from[GRPQUOTA]) {
1000 status = -ESRCH;
990 goto bail_unlock; 1001 goto bail_unlock;
991 credits += ocfs2_calc_qinit_credits(sb, GRPQUOTA) + 1002 }
992 ocfs2_calc_qdel_credits(sb, GRPQUOTA);
993 locked[GRPQUOTA] = 1;
994 } 1003 }
995 handle = ocfs2_start_trans(osb, credits); 1004 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS +
1005 2 * ocfs2_quota_trans_credits(sb));
996 if (IS_ERR(handle)) { 1006 if (IS_ERR(handle)) {
997 status = PTR_ERR(handle); 1007 status = PTR_ERR(handle);
998 mlog_errno(status); 1008 mlog_errno(status);
@@ -1030,12 +1040,6 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1030bail_commit: 1040bail_commit:
1031 ocfs2_commit_trans(osb, handle); 1041 ocfs2_commit_trans(osb, handle);
1032bail_unlock: 1042bail_unlock:
1033 for (qtype = 0; qtype < MAXQUOTAS; qtype++) {
1034 if (!locked[qtype])
1035 continue;
1036 oinfo = sb_dqinfo(sb, qtype)->dqi_priv;
1037 ocfs2_unlock_global_qf(oinfo, 1);
1038 }
1039 ocfs2_inode_unlock(inode, 1); 1043 ocfs2_inode_unlock(inode, 1);
1040bail_unlock_rw: 1044bail_unlock_rw:
1041 if (size_change) 1045 if (size_change)
@@ -1043,6 +1047,12 @@ bail_unlock_rw:
1043bail: 1047bail:
1044 brelse(bh); 1048 brelse(bh);
1045 1049
1050 /* Release quota pointers in case we acquired them */
1051 for (qtype = 0; qtype < MAXQUOTAS; qtype++) {
1052 dqput(transfer_to[qtype]);
1053 dqput(transfer_from[qtype]);
1054 }
1055
1046 if (!status && attr->ia_valid & ATTR_MODE) { 1056 if (!status && attr->ia_valid & ATTR_MODE) {
1047 status = ocfs2_acl_chmod(inode); 1057 status = ocfs2_acl_chmod(inode);
1048 if (status < 0) 1058 if (status < 0)
@@ -2016,7 +2026,7 @@ static ssize_t ocfs2_file_splice_read(struct file *in,
2016 size_t len, 2026 size_t len,
2017 unsigned int flags) 2027 unsigned int flags)
2018{ 2028{
2019 int ret = 0; 2029 int ret = 0, lock_level = 0;
2020 struct inode *inode = in->f_path.dentry->d_inode; 2030 struct inode *inode = in->f_path.dentry->d_inode;
2021 2031
2022 mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe, 2032 mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe,
@@ -2027,12 +2037,12 @@ static ssize_t ocfs2_file_splice_read(struct file *in,
2027 /* 2037 /*
2028 * See the comment in ocfs2_file_aio_read() 2038 * See the comment in ocfs2_file_aio_read()
2029 */ 2039 */
2030 ret = ocfs2_inode_lock(inode, NULL, 0); 2040 ret = ocfs2_inode_lock_atime(inode, in->f_vfsmnt, &lock_level);
2031 if (ret < 0) { 2041 if (ret < 0) {
2032 mlog_errno(ret); 2042 mlog_errno(ret);
2033 goto bail; 2043 goto bail;
2034 } 2044 }
2035 ocfs2_inode_unlock(inode, 0); 2045 ocfs2_inode_unlock(inode, lock_level);
2036 2046
2037 ret = generic_file_splice_read(in, ppos, pipe, len, flags); 2047 ret = generic_file_splice_read(in, ppos, pipe, len, flags);
2038 2048
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 10e1fa87396a..4dc8890ba316 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -215,6 +215,8 @@ bail:
215static int ocfs2_init_locked_inode(struct inode *inode, void *opaque) 215static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
216{ 216{
217 struct ocfs2_find_inode_args *args = opaque; 217 struct ocfs2_find_inode_args *args = opaque;
218 static struct lock_class_key ocfs2_quota_ip_alloc_sem_key,
219 ocfs2_file_ip_alloc_sem_key;
218 220
219 mlog_entry("inode = %p, opaque = %p\n", inode, opaque); 221 mlog_entry("inode = %p, opaque = %p\n", inode, opaque);
220 222
@@ -223,6 +225,15 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
223 if (args->fi_sysfile_type != 0) 225 if (args->fi_sysfile_type != 0)
224 lockdep_set_class(&inode->i_mutex, 226 lockdep_set_class(&inode->i_mutex,
225 &ocfs2_sysfile_lock_key[args->fi_sysfile_type]); 227 &ocfs2_sysfile_lock_key[args->fi_sysfile_type]);
228 if (args->fi_sysfile_type == USER_QUOTA_SYSTEM_INODE ||
229 args->fi_sysfile_type == GROUP_QUOTA_SYSTEM_INODE ||
230 args->fi_sysfile_type == LOCAL_USER_QUOTA_SYSTEM_INODE ||
231 args->fi_sysfile_type == LOCAL_GROUP_QUOTA_SYSTEM_INODE)
232 lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem,
233 &ocfs2_quota_ip_alloc_sem_key);
234 else
235 lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem,
236 &ocfs2_file_ip_alloc_sem_key);
226 237
227 mlog_exit(0); 238 mlog_exit(0);
228 return 0; 239 return 0;
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index a20a0f1e37fd..f033760ecbea 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -28,6 +28,8 @@
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/highmem.h> 29#include <linux/highmem.h>
30#include <linux/kthread.h> 30#include <linux/kthread.h>
31#include <linux/time.h>
32#include <linux/random.h>
31 33
32#define MLOG_MASK_PREFIX ML_JOURNAL 34#define MLOG_MASK_PREFIX ML_JOURNAL
33#include <cluster/masklog.h> 35#include <cluster/masklog.h>
@@ -52,6 +54,8 @@
52 54
53DEFINE_SPINLOCK(trans_inc_lock); 55DEFINE_SPINLOCK(trans_inc_lock);
54 56
57#define ORPHAN_SCAN_SCHEDULE_TIMEOUT 300000
58
55static int ocfs2_force_read_journal(struct inode *inode); 59static int ocfs2_force_read_journal(struct inode *inode);
56static int ocfs2_recover_node(struct ocfs2_super *osb, 60static int ocfs2_recover_node(struct ocfs2_super *osb,
57 int node_num, int slot_num); 61 int node_num, int slot_num);
@@ -1841,6 +1845,128 @@ bail:
1841 return status; 1845 return status;
1842} 1846}
1843 1847
1848/*
1849 * Scan timer should get fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT. Add some
1850 * randomness to the timeout to minimize multple nodes firing the timer at the
1851 * same time.
1852 */
1853static inline unsigned long ocfs2_orphan_scan_timeout(void)
1854{
1855 unsigned long time;
1856
1857 get_random_bytes(&time, sizeof(time));
1858 time = ORPHAN_SCAN_SCHEDULE_TIMEOUT + (time % 5000);
1859 return msecs_to_jiffies(time);
1860}
1861
1862/*
1863 * ocfs2_queue_orphan_scan calls ocfs2_queue_recovery_completion for
1864 * every slot, queuing a recovery of the slot on the ocfs2_wq thread. This
1865 * is done to catch any orphans that are left over in orphan directories.
1866 *
1867 * ocfs2_queue_orphan_scan gets called every ORPHAN_SCAN_SCHEDULE_TIMEOUT
1868 * seconds. It gets an EX lock on os_lockres and checks sequence number
1869 * stored in LVB. If the sequence number has changed, it means some other
1870 * node has done the scan. This node skips the scan and tracks the
1871 * sequence number. If the sequence number didn't change, it means a scan
1872 * hasn't happened. The node queues a scan and increments the
1873 * sequence number in the LVB.
1874 */
1875void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
1876{
1877 struct ocfs2_orphan_scan *os;
1878 int status, i;
1879 u32 seqno = 0;
1880
1881 os = &osb->osb_orphan_scan;
1882
1883 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
1884 goto out;
1885
1886 status = ocfs2_orphan_scan_lock(osb, &seqno);
1887 if (status < 0) {
1888 if (status != -EAGAIN)
1889 mlog_errno(status);
1890 goto out;
1891 }
1892
1893 /* Do no queue the tasks if the volume is being umounted */
1894 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
1895 goto unlock;
1896
1897 if (os->os_seqno != seqno) {
1898 os->os_seqno = seqno;
1899 goto unlock;
1900 }
1901
1902 for (i = 0; i < osb->max_slots; i++)
1903 ocfs2_queue_recovery_completion(osb->journal, i, NULL, NULL,
1904 NULL);
1905 /*
1906 * We queued a recovery on orphan slots, increment the sequence
1907 * number and update LVB so other node will skip the scan for a while
1908 */
1909 seqno++;
1910 os->os_count++;
1911 os->os_scantime = CURRENT_TIME;
1912unlock:
1913 ocfs2_orphan_scan_unlock(osb, seqno);
1914out:
1915 return;
1916}
1917
1918/* Worker task that gets fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT millsec */
1919void ocfs2_orphan_scan_work(struct work_struct *work)
1920{
1921 struct ocfs2_orphan_scan *os;
1922 struct ocfs2_super *osb;
1923
1924 os = container_of(work, struct ocfs2_orphan_scan,
1925 os_orphan_scan_work.work);
1926 osb = os->os_osb;
1927
1928 mutex_lock(&os->os_lock);
1929 ocfs2_queue_orphan_scan(osb);
1930 if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE)
1931 schedule_delayed_work(&os->os_orphan_scan_work,
1932 ocfs2_orphan_scan_timeout());
1933 mutex_unlock(&os->os_lock);
1934}
1935
1936void ocfs2_orphan_scan_stop(struct ocfs2_super *osb)
1937{
1938 struct ocfs2_orphan_scan *os;
1939
1940 os = &osb->osb_orphan_scan;
1941 if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) {
1942 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
1943 mutex_lock(&os->os_lock);
1944 cancel_delayed_work(&os->os_orphan_scan_work);
1945 mutex_unlock(&os->os_lock);
1946 }
1947}
1948
1949void ocfs2_orphan_scan_init(struct ocfs2_super *osb)
1950{
1951 struct ocfs2_orphan_scan *os;
1952
1953 os = &osb->osb_orphan_scan;
1954 os->os_osb = osb;
1955 os->os_count = 0;
1956 os->os_seqno = 0;
1957 os->os_scantime = CURRENT_TIME;
1958 mutex_init(&os->os_lock);
1959 INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work);
1960
1961 if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
1962 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
1963 else {
1964 atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
1965 schedule_delayed_work(&os->os_orphan_scan_work,
1966 ocfs2_orphan_scan_timeout());
1967 }
1968}
1969
1844struct ocfs2_orphan_filldir_priv { 1970struct ocfs2_orphan_filldir_priv {
1845 struct inode *head; 1971 struct inode *head;
1846 struct ocfs2_super *osb; 1972 struct ocfs2_super *osb;
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index eb7b76331eb7..5432c7f79cc6 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -144,6 +144,10 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,
144} 144}
145 145
146/* Exported only for the journal struct init code in super.c. Do not call. */ 146/* Exported only for the journal struct init code in super.c. Do not call. */
147void ocfs2_orphan_scan_init(struct ocfs2_super *osb);
148void ocfs2_orphan_scan_stop(struct ocfs2_super *osb);
149void ocfs2_orphan_scan_exit(struct ocfs2_super *osb);
150
147void ocfs2_complete_recovery(struct work_struct *work); 151void ocfs2_complete_recovery(struct work_struct *work);
148void ocfs2_wait_for_recovery(struct ocfs2_super *osb); 152void ocfs2_wait_for_recovery(struct ocfs2_super *osb);
149 153
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 33464c6b60a2..8601f934010b 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -118,7 +118,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
118 mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len, 118 mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len,
119 dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno); 119 dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno);
120 120
121 status = ocfs2_inode_lock(dir, NULL, 0); 121 status = ocfs2_inode_lock_nested(dir, NULL, 0, OI_LS_PARENT);
122 if (status < 0) { 122 if (status < 0) {
123 if (status != -ENOENT) 123 if (status != -ENOENT)
124 mlog_errno(status); 124 mlog_errno(status);
@@ -636,7 +636,7 @@ static int ocfs2_link(struct dentry *old_dentry,
636 if (S_ISDIR(inode->i_mode)) 636 if (S_ISDIR(inode->i_mode))
637 return -EPERM; 637 return -EPERM;
638 638
639 err = ocfs2_inode_lock(dir, &parent_fe_bh, 1); 639 err = ocfs2_inode_lock_nested(dir, &parent_fe_bh, 1, OI_LS_PARENT);
640 if (err < 0) { 640 if (err < 0) {
641 if (err != -ENOENT) 641 if (err != -ENOENT)
642 mlog_errno(err); 642 mlog_errno(err);
@@ -800,7 +800,8 @@ static int ocfs2_unlink(struct inode *dir,
800 return -EPERM; 800 return -EPERM;
801 } 801 }
802 802
803 status = ocfs2_inode_lock(dir, &parent_node_bh, 1); 803 status = ocfs2_inode_lock_nested(dir, &parent_node_bh, 1,
804 OI_LS_PARENT);
804 if (status < 0) { 805 if (status < 0) {
805 if (status != -ENOENT) 806 if (status != -ENOENT)
806 mlog_errno(status); 807 mlog_errno(status);
@@ -978,7 +979,8 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
978 inode1 = tmpinode; 979 inode1 = tmpinode;
979 } 980 }
980 /* lock id2 */ 981 /* lock id2 */
981 status = ocfs2_inode_lock(inode2, bh2, 1); 982 status = ocfs2_inode_lock_nested(inode2, bh2, 1,
983 OI_LS_RENAME1);
982 if (status < 0) { 984 if (status < 0) {
983 if (status != -ENOENT) 985 if (status != -ENOENT)
984 mlog_errno(status); 986 mlog_errno(status);
@@ -987,7 +989,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
987 } 989 }
988 990
989 /* lock id1 */ 991 /* lock id1 */
990 status = ocfs2_inode_lock(inode1, bh1, 1); 992 status = ocfs2_inode_lock_nested(inode1, bh1, 1, OI_LS_RENAME2);
991 if (status < 0) { 993 if (status < 0) {
992 /* 994 /*
993 * An error return must mean that no cluster locks 995 * An error return must mean that no cluster locks
@@ -1103,7 +1105,8 @@ static int ocfs2_rename(struct inode *old_dir,
1103 * won't have to concurrently downconvert the inode and the 1105 * won't have to concurrently downconvert the inode and the
1104 * dentry locks. 1106 * dentry locks.
1105 */ 1107 */
1106 status = ocfs2_inode_lock(old_inode, &old_inode_bh, 1); 1108 status = ocfs2_inode_lock_nested(old_inode, &old_inode_bh, 1,
1109 OI_LS_PARENT);
1107 if (status < 0) { 1110 if (status < 0) {
1108 if (status != -ENOENT) 1111 if (status != -ENOENT)
1109 mlog_errno(status); 1112 mlog_errno(status);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 1386281950db..c9345ebb8493 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -34,6 +34,7 @@
34#include <linux/workqueue.h> 34#include <linux/workqueue.h>
35#include <linux/kref.h> 35#include <linux/kref.h>
36#include <linux/mutex.h> 36#include <linux/mutex.h>
37#include <linux/lockdep.h>
37#ifndef CONFIG_OCFS2_COMPAT_JBD 38#ifndef CONFIG_OCFS2_COMPAT_JBD
38# include <linux/jbd2.h> 39# include <linux/jbd2.h>
39#else 40#else
@@ -47,6 +48,9 @@
47#include "ocfs2_fs.h" 48#include "ocfs2_fs.h"
48#include "ocfs2_lockid.h" 49#include "ocfs2_lockid.h"
49 50
51/* For struct ocfs2_blockcheck_stats */
52#include "blockcheck.h"
53
50/* Most user visible OCFS2 inodes will have very few pieces of 54/* Most user visible OCFS2 inodes will have very few pieces of
51 * metadata, but larger files (including bitmaps, etc) must be taken 55 * metadata, but larger files (including bitmaps, etc) must be taken
52 * into account when designing an access scheme. We allow a small 56 * into account when designing an access scheme. We allow a small
@@ -149,6 +153,25 @@ struct ocfs2_lock_res {
149 unsigned int l_lock_max_exmode; /* Max wait for EX */ 153 unsigned int l_lock_max_exmode; /* Max wait for EX */
150 unsigned int l_lock_refresh; /* Disk refreshes */ 154 unsigned int l_lock_refresh; /* Disk refreshes */
151#endif 155#endif
156#ifdef CONFIG_DEBUG_LOCK_ALLOC
157 struct lockdep_map l_lockdep_map;
158#endif
159};
160
161enum ocfs2_orphan_scan_state {
162 ORPHAN_SCAN_ACTIVE,
163 ORPHAN_SCAN_INACTIVE
164};
165
166struct ocfs2_orphan_scan {
167 struct mutex os_lock;
168 struct ocfs2_super *os_osb;
169 struct ocfs2_lock_res os_lockres; /* lock to synchronize scans */
170 struct delayed_work os_orphan_scan_work;
171 struct timespec os_scantime; /* time this node ran the scan */
172 u32 os_count; /* tracks node specific scans */
173 u32 os_seqno; /* tracks cluster wide scans */
174 atomic_t os_state; /* ACTIVE or INACTIVE */
152}; 175};
153 176
154struct ocfs2_dlm_debug { 177struct ocfs2_dlm_debug {
@@ -295,6 +318,7 @@ struct ocfs2_super
295 struct ocfs2_dinode *local_alloc_copy; 318 struct ocfs2_dinode *local_alloc_copy;
296 struct ocfs2_quota_recovery *quota_rec; 319 struct ocfs2_quota_recovery *quota_rec;
297 320
321 struct ocfs2_blockcheck_stats osb_ecc_stats;
298 struct ocfs2_alloc_stats alloc_stats; 322 struct ocfs2_alloc_stats alloc_stats;
299 char dev_str[20]; /* "major,minor" of the device */ 323 char dev_str[20]; /* "major,minor" of the device */
300 324
@@ -341,6 +365,8 @@ struct ocfs2_super
341 unsigned int *osb_orphan_wipes; 365 unsigned int *osb_orphan_wipes;
342 wait_queue_head_t osb_wipe_event; 366 wait_queue_head_t osb_wipe_event;
343 367
368 struct ocfs2_orphan_scan osb_orphan_scan;
369
344 /* used to protect metaecc calculation check of xattr. */ 370 /* used to protect metaecc calculation check of xattr. */
345 spinlock_t osb_xattr_lock; 371 spinlock_t osb_xattr_lock;
346 372
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h
index a53ce87481bf..fcdba091af3d 100644
--- a/fs/ocfs2/ocfs2_lockid.h
+++ b/fs/ocfs2/ocfs2_lockid.h
@@ -48,6 +48,7 @@ enum ocfs2_lock_type {
48 OCFS2_LOCK_TYPE_FLOCK, 48 OCFS2_LOCK_TYPE_FLOCK,
49 OCFS2_LOCK_TYPE_QINFO, 49 OCFS2_LOCK_TYPE_QINFO,
50 OCFS2_LOCK_TYPE_NFS_SYNC, 50 OCFS2_LOCK_TYPE_NFS_SYNC,
51 OCFS2_LOCK_TYPE_ORPHAN_SCAN,
51 OCFS2_NUM_LOCK_TYPES 52 OCFS2_NUM_LOCK_TYPES
52}; 53};
53 54
@@ -85,6 +86,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
85 case OCFS2_LOCK_TYPE_NFS_SYNC: 86 case OCFS2_LOCK_TYPE_NFS_SYNC:
86 c = 'Y'; 87 c = 'Y';
87 break; 88 break;
89 case OCFS2_LOCK_TYPE_ORPHAN_SCAN:
90 c = 'P';
91 break;
88 default: 92 default:
89 c = '\0'; 93 c = '\0';
90 } 94 }
@@ -104,6 +108,7 @@ static char *ocfs2_lock_type_strings[] = {
104 [OCFS2_LOCK_TYPE_OPEN] = "Open", 108 [OCFS2_LOCK_TYPE_OPEN] = "Open",
105 [OCFS2_LOCK_TYPE_FLOCK] = "Flock", 109 [OCFS2_LOCK_TYPE_FLOCK] = "Flock",
106 [OCFS2_LOCK_TYPE_QINFO] = "Quota", 110 [OCFS2_LOCK_TYPE_QINFO] = "Quota",
111 [OCFS2_LOCK_TYPE_ORPHAN_SCAN] = "OrphanScan",
107}; 112};
108 113
109static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) 114static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type)
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 1ed0f7c86869..edfa60cd155c 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -421,6 +421,7 @@ int ocfs2_global_read_dquot(struct dquot *dquot)
421 OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes; 421 OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;
422 if (!dquot->dq_off) { /* No real quota entry? */ 422 if (!dquot->dq_off) { /* No real quota entry? */
423 /* Upgrade to exclusive lock for allocation */ 423 /* Upgrade to exclusive lock for allocation */
424 ocfs2_qinfo_unlock(info, 0);
424 err = ocfs2_qinfo_lock(info, 1); 425 err = ocfs2_qinfo_lock(info, 1);
425 if (err < 0) 426 if (err < 0)
426 goto out_qlock; 427 goto out_qlock;
@@ -435,7 +436,8 @@ int ocfs2_global_read_dquot(struct dquot *dquot)
435out_qlock: 436out_qlock:
436 if (ex) 437 if (ex)
437 ocfs2_qinfo_unlock(info, 1); 438 ocfs2_qinfo_unlock(info, 1);
438 ocfs2_qinfo_unlock(info, 0); 439 else
440 ocfs2_qinfo_unlock(info, 0);
439out: 441out:
440 if (err < 0) 442 if (err < 0)
441 mlog_errno(err); 443 mlog_errno(err);
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 07deec5e9721..5a460fa82553 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -444,10 +444,6 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
444 444
445 mlog_entry("ino=%lu type=%u", (unsigned long)lqinode->i_ino, type); 445 mlog_entry("ino=%lu type=%u", (unsigned long)lqinode->i_ino, type);
446 446
447 status = ocfs2_lock_global_qf(oinfo, 1);
448 if (status < 0)
449 goto out;
450
451 list_for_each_entry_safe(rchunk, next, &(rec->r_list[type]), rc_list) { 447 list_for_each_entry_safe(rchunk, next, &(rec->r_list[type]), rc_list) {
452 chunk = rchunk->rc_chunk; 448 chunk = rchunk->rc_chunk;
453 hbh = NULL; 449 hbh = NULL;
@@ -480,12 +476,18 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
480 type); 476 type);
481 goto out_put_bh; 477 goto out_put_bh;
482 } 478 }
479 status = ocfs2_lock_global_qf(oinfo, 1);
480 if (status < 0) {
481 mlog_errno(status);
482 goto out_put_dquot;
483 }
484
483 handle = ocfs2_start_trans(OCFS2_SB(sb), 485 handle = ocfs2_start_trans(OCFS2_SB(sb),
484 OCFS2_QSYNC_CREDITS); 486 OCFS2_QSYNC_CREDITS);
485 if (IS_ERR(handle)) { 487 if (IS_ERR(handle)) {
486 status = PTR_ERR(handle); 488 status = PTR_ERR(handle);
487 mlog_errno(status); 489 mlog_errno(status);
488 goto out_put_dquot; 490 goto out_drop_lock;
489 } 491 }
490 mutex_lock(&sb_dqopt(sb)->dqio_mutex); 492 mutex_lock(&sb_dqopt(sb)->dqio_mutex);
491 spin_lock(&dq_data_lock); 493 spin_lock(&dq_data_lock);
@@ -523,6 +525,8 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
523out_commit: 525out_commit:
524 mutex_unlock(&sb_dqopt(sb)->dqio_mutex); 526 mutex_unlock(&sb_dqopt(sb)->dqio_mutex);
525 ocfs2_commit_trans(OCFS2_SB(sb), handle); 527 ocfs2_commit_trans(OCFS2_SB(sb), handle);
528out_drop_lock:
529 ocfs2_unlock_global_qf(oinfo, 1);
526out_put_dquot: 530out_put_dquot:
527 dqput(dquot); 531 dqput(dquot);
528out_put_bh: 532out_put_bh:
@@ -537,8 +541,6 @@ out_put_bh:
537 if (status < 0) 541 if (status < 0)
538 break; 542 break;
539 } 543 }
540 ocfs2_unlock_global_qf(oinfo, 1);
541out:
542 if (status < 0) 544 if (status < 0)
543 free_recovery_list(&(rec->r_list[type])); 545 free_recovery_list(&(rec->r_list[type]));
544 mlog_exit(status); 546 mlog_exit(status);
@@ -655,6 +657,9 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
655 struct ocfs2_quota_recovery *rec; 657 struct ocfs2_quota_recovery *rec;
656 int locked = 0; 658 int locked = 0;
657 659
660 /* We don't need the lock and we have to acquire quota file locks
661 * which will later depend on this lock */
662 mutex_unlock(&sb_dqopt(sb)->dqio_mutex);
658 info->dqi_maxblimit = 0x7fffffffffffffffLL; 663 info->dqi_maxblimit = 0x7fffffffffffffffLL;
659 info->dqi_maxilimit = 0x7fffffffffffffffLL; 664 info->dqi_maxilimit = 0x7fffffffffffffffLL;
660 oinfo = kmalloc(sizeof(struct ocfs2_mem_dqinfo), GFP_NOFS); 665 oinfo = kmalloc(sizeof(struct ocfs2_mem_dqinfo), GFP_NOFS);
@@ -733,6 +738,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
733 goto out_err; 738 goto out_err;
734 } 739 }
735 740
741 mutex_lock(&sb_dqopt(sb)->dqio_mutex);
736 return 0; 742 return 0;
737out_err: 743out_err:
738 if (oinfo) { 744 if (oinfo) {
@@ -746,6 +752,7 @@ out_err:
746 kfree(oinfo); 752 kfree(oinfo);
747 } 753 }
748 brelse(bh); 754 brelse(bh);
755 mutex_lock(&sb_dqopt(sb)->dqio_mutex);
749 return -1; 756 return -1;
750} 757}
751 758
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index fcd120f1493a..3f661376a2de 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -236,6 +236,16 @@ static int o2cb_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
236 return dlm_status_to_errno(lksb->lksb_o2dlm.status); 236 return dlm_status_to_errno(lksb->lksb_o2dlm.status);
237} 237}
238 238
239/*
240 * o2dlm aways has a "valid" LVB. If the dlm loses track of the LVB
241 * contents, it will zero out the LVB. Thus the caller can always trust
242 * the contents.
243 */
244static int o2cb_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb)
245{
246 return 1;
247}
248
239static void *o2cb_dlm_lvb(union ocfs2_dlm_lksb *lksb) 249static void *o2cb_dlm_lvb(union ocfs2_dlm_lksb *lksb)
240{ 250{
241 return (void *)(lksb->lksb_o2dlm.lvb); 251 return (void *)(lksb->lksb_o2dlm.lvb);
@@ -354,6 +364,7 @@ static struct ocfs2_stack_operations o2cb_stack_ops = {
354 .dlm_lock = o2cb_dlm_lock, 364 .dlm_lock = o2cb_dlm_lock,
355 .dlm_unlock = o2cb_dlm_unlock, 365 .dlm_unlock = o2cb_dlm_unlock,
356 .lock_status = o2cb_dlm_lock_status, 366 .lock_status = o2cb_dlm_lock_status,
367 .lvb_valid = o2cb_dlm_lvb_valid,
357 .lock_lvb = o2cb_dlm_lvb, 368 .lock_lvb = o2cb_dlm_lvb,
358 .dump_lksb = o2cb_dump_lksb, 369 .dump_lksb = o2cb_dump_lksb,
359}; 370};
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 9b76d41a8ac6..ff4c798a5635 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -738,6 +738,13 @@ static int user_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
738 return lksb->lksb_fsdlm.sb_status; 738 return lksb->lksb_fsdlm.sb_status;
739} 739}
740 740
741static int user_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb)
742{
743 int invalid = lksb->lksb_fsdlm.sb_flags & DLM_SBF_VALNOTVALID;
744
745 return !invalid;
746}
747
741static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb) 748static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb)
742{ 749{
743 if (!lksb->lksb_fsdlm.sb_lvbptr) 750 if (!lksb->lksb_fsdlm.sb_lvbptr)
@@ -873,6 +880,7 @@ static struct ocfs2_stack_operations ocfs2_user_plugin_ops = {
873 .dlm_lock = user_dlm_lock, 880 .dlm_lock = user_dlm_lock,
874 .dlm_unlock = user_dlm_unlock, 881 .dlm_unlock = user_dlm_unlock,
875 .lock_status = user_dlm_lock_status, 882 .lock_status = user_dlm_lock_status,
883 .lvb_valid = user_dlm_lvb_valid,
876 .lock_lvb = user_dlm_lvb, 884 .lock_lvb = user_dlm_lvb,
877 .plock = user_plock, 885 .plock = user_plock,
878 .dump_lksb = user_dlm_dump_lksb, 886 .dump_lksb = user_dlm_dump_lksb,
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 68b668b0e60a..3f2f1c45b7b6 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -6,7 +6,7 @@
6 * Code which implements an OCFS2 specific interface to underlying 6 * Code which implements an OCFS2 specific interface to underlying
7 * cluster stacks. 7 * cluster stacks.
8 * 8 *
9 * Copyright (C) 2007 Oracle. All rights reserved. 9 * Copyright (C) 2007, 2009 Oracle. All rights reserved.
10 * 10 *
11 * This program is free software; you can redistribute it and/or 11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public 12 * modify it under the terms of the GNU General Public
@@ -271,11 +271,12 @@ int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
271} 271}
272EXPORT_SYMBOL_GPL(ocfs2_dlm_lock_status); 272EXPORT_SYMBOL_GPL(ocfs2_dlm_lock_status);
273 273
274/* 274int ocfs2_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb)
275 * Why don't we cast to ocfs2_meta_lvb? The "clean" answer is that we 275{
276 * don't cast at the glue level. The real answer is that the header 276 return active_stack->sp_ops->lvb_valid(lksb);
277 * ordering is nigh impossible. 277}
278 */ 278EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb_valid);
279
279void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb) 280void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb)
280{ 281{
281 return active_stack->sp_ops->lock_lvb(lksb); 282 return active_stack->sp_ops->lock_lvb(lksb);
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h
index c571af375ef8..03a44d60eac9 100644
--- a/fs/ocfs2/stackglue.h
+++ b/fs/ocfs2/stackglue.h
@@ -186,6 +186,11 @@ struct ocfs2_stack_operations {
186 int (*lock_status)(union ocfs2_dlm_lksb *lksb); 186 int (*lock_status)(union ocfs2_dlm_lksb *lksb);
187 187
188 /* 188 /*
189 * Return non-zero if the LVB is valid.
190 */
191 int (*lvb_valid)(union ocfs2_dlm_lksb *lksb);
192
193 /*
189 * Pull the lvb pointer off of the stack-specific lksb. 194 * Pull the lvb pointer off of the stack-specific lksb.
190 */ 195 */
191 void *(*lock_lvb)(union ocfs2_dlm_lksb *lksb); 196 void *(*lock_lvb)(union ocfs2_dlm_lksb *lksb);
@@ -252,6 +257,7 @@ int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn,
252 struct ocfs2_lock_res *astarg); 257 struct ocfs2_lock_res *astarg);
253 258
254int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb); 259int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb);
260int ocfs2_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb);
255void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb); 261void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb);
256void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb); 262void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb);
257 263
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 8439f6b324b9..73a16d4666dc 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -923,14 +923,23 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
923 int nr) 923 int nr)
924{ 924{
925 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; 925 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
926 int ret;
926 927
927 if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap)) 928 if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
928 return 0; 929 return 0;
929 if (!buffer_jbd(bg_bh) || !bh2jh(bg_bh)->b_committed_data) 930
931 if (!buffer_jbd(bg_bh))
930 return 1; 932 return 1;
931 933
934 jbd_lock_bh_state(bg_bh);
932 bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data; 935 bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data;
933 return !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); 936 if (bg)
937 ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
938 else
939 ret = 1;
940 jbd_unlock_bh_state(bg_bh);
941
942 return ret;
934} 943}
935 944
936static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, 945static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
@@ -1885,6 +1894,7 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle,
1885 unsigned int tmp; 1894 unsigned int tmp;
1886 int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; 1895 int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
1887 struct ocfs2_group_desc *undo_bg = NULL; 1896 struct ocfs2_group_desc *undo_bg = NULL;
1897 int cluster_bitmap = 0;
1888 1898
1889 mlog_entry_void(); 1899 mlog_entry_void();
1890 1900
@@ -1905,18 +1915,28 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle,
1905 } 1915 }
1906 1916
1907 if (ocfs2_is_cluster_bitmap(alloc_inode)) 1917 if (ocfs2_is_cluster_bitmap(alloc_inode))
1908 undo_bg = (struct ocfs2_group_desc *) bh2jh(group_bh)->b_committed_data; 1918 cluster_bitmap = 1;
1919
1920 if (cluster_bitmap) {
1921 jbd_lock_bh_state(group_bh);
1922 undo_bg = (struct ocfs2_group_desc *)
1923 bh2jh(group_bh)->b_committed_data;
1924 BUG_ON(!undo_bg);
1925 }
1909 1926
1910 tmp = num_bits; 1927 tmp = num_bits;
1911 while(tmp--) { 1928 while(tmp--) {
1912 ocfs2_clear_bit((bit_off + tmp), 1929 ocfs2_clear_bit((bit_off + tmp),
1913 (unsigned long *) bg->bg_bitmap); 1930 (unsigned long *) bg->bg_bitmap);
1914 if (ocfs2_is_cluster_bitmap(alloc_inode)) 1931 if (cluster_bitmap)
1915 ocfs2_set_bit(bit_off + tmp, 1932 ocfs2_set_bit(bit_off + tmp,
1916 (unsigned long *) undo_bg->bg_bitmap); 1933 (unsigned long *) undo_bg->bg_bitmap);
1917 } 1934 }
1918 le16_add_cpu(&bg->bg_free_bits_count, num_bits); 1935 le16_add_cpu(&bg->bg_free_bits_count, num_bits);
1919 1936
1937 if (cluster_bitmap)
1938 jbd_unlock_bh_state(group_bh);
1939
1920 status = ocfs2_journal_dirty(handle, group_bh); 1940 status = ocfs2_journal_dirty(handle, group_bh);
1921 if (status < 0) 1941 if (status < 0)
1922 mlog_errno(status); 1942 mlog_errno(status);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 201b40a441fe..7efb349fb9bd 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -119,10 +119,12 @@ static void ocfs2_release_system_inodes(struct ocfs2_super *osb);
119static int ocfs2_check_volume(struct ocfs2_super *osb); 119static int ocfs2_check_volume(struct ocfs2_super *osb);
120static int ocfs2_verify_volume(struct ocfs2_dinode *di, 120static int ocfs2_verify_volume(struct ocfs2_dinode *di,
121 struct buffer_head *bh, 121 struct buffer_head *bh,
122 u32 sectsize); 122 u32 sectsize,
123 struct ocfs2_blockcheck_stats *stats);
123static int ocfs2_initialize_super(struct super_block *sb, 124static int ocfs2_initialize_super(struct super_block *sb,
124 struct buffer_head *bh, 125 struct buffer_head *bh,
125 int sector_size); 126 int sector_size,
127 struct ocfs2_blockcheck_stats *stats);
126static int ocfs2_get_sector(struct super_block *sb, 128static int ocfs2_get_sector(struct super_block *sb,
127 struct buffer_head **bh, 129 struct buffer_head **bh,
128 int block, 130 int block,
@@ -203,10 +205,10 @@ static const match_table_t tokens = {
203#ifdef CONFIG_DEBUG_FS 205#ifdef CONFIG_DEBUG_FS
204static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) 206static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
205{ 207{
206 int out = 0;
207 int i;
208 struct ocfs2_cluster_connection *cconn = osb->cconn; 208 struct ocfs2_cluster_connection *cconn = osb->cconn;
209 struct ocfs2_recovery_map *rm = osb->recovery_map; 209 struct ocfs2_recovery_map *rm = osb->recovery_map;
210 struct ocfs2_orphan_scan *os = &osb->osb_orphan_scan;
211 int i, out = 0;
210 212
211 out += snprintf(buf + out, len - out, 213 out += snprintf(buf + out, len - out,
212 "%10s => Id: %-s Uuid: %-s Gen: 0x%X Label: %-s\n", 214 "%10s => Id: %-s Uuid: %-s Gen: 0x%X Label: %-s\n",
@@ -231,20 +233,24 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
231 "%10s => Opts: 0x%lX AtimeQuanta: %u\n", "Mount", 233 "%10s => Opts: 0x%lX AtimeQuanta: %u\n", "Mount",
232 osb->s_mount_opt, osb->s_atime_quantum); 234 osb->s_mount_opt, osb->s_atime_quantum);
233 235
234 out += snprintf(buf + out, len - out, 236 if (cconn) {
235 "%10s => Stack: %s Name: %*s Version: %d.%d\n", 237 out += snprintf(buf + out, len - out,
236 "Cluster", 238 "%10s => Stack: %s Name: %*s "
237 (*osb->osb_cluster_stack == '\0' ? 239 "Version: %d.%d\n", "Cluster",
238 "o2cb" : osb->osb_cluster_stack), 240 (*osb->osb_cluster_stack == '\0' ?
239 cconn->cc_namelen, cconn->cc_name, 241 "o2cb" : osb->osb_cluster_stack),
240 cconn->cc_version.pv_major, cconn->cc_version.pv_minor); 242 cconn->cc_namelen, cconn->cc_name,
243 cconn->cc_version.pv_major,
244 cconn->cc_version.pv_minor);
245 }
241 246
242 spin_lock(&osb->dc_task_lock); 247 spin_lock(&osb->dc_task_lock);
243 out += snprintf(buf + out, len - out, 248 out += snprintf(buf + out, len - out,
244 "%10s => Pid: %d Count: %lu WakeSeq: %lu " 249 "%10s => Pid: %d Count: %lu WakeSeq: %lu "
245 "WorkSeq: %lu\n", "DownCnvt", 250 "WorkSeq: %lu\n", "DownCnvt",
246 task_pid_nr(osb->dc_task), osb->blocked_lock_count, 251 (osb->dc_task ? task_pid_nr(osb->dc_task) : -1),
247 osb->dc_wake_sequence, osb->dc_work_sequence); 252 osb->blocked_lock_count, osb->dc_wake_sequence,
253 osb->dc_work_sequence);
248 spin_unlock(&osb->dc_task_lock); 254 spin_unlock(&osb->dc_task_lock);
249 255
250 spin_lock(&osb->osb_lock); 256 spin_lock(&osb->osb_lock);
@@ -264,14 +270,15 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
264 270
265 out += snprintf(buf + out, len - out, 271 out += snprintf(buf + out, len - out,
266 "%10s => Pid: %d Interval: %lu Needs: %d\n", "Commit", 272 "%10s => Pid: %d Interval: %lu Needs: %d\n", "Commit",
267 task_pid_nr(osb->commit_task), osb->osb_commit_interval, 273 (osb->commit_task ? task_pid_nr(osb->commit_task) : -1),
274 osb->osb_commit_interval,
268 atomic_read(&osb->needs_checkpoint)); 275 atomic_read(&osb->needs_checkpoint));
269 276
270 out += snprintf(buf + out, len - out, 277 out += snprintf(buf + out, len - out,
271 "%10s => State: %d NumTxns: %d TxnId: %lu\n", 278 "%10s => State: %d TxnId: %lu NumTxns: %d\n",
272 "Journal", osb->journal->j_state, 279 "Journal", osb->journal->j_state,
273 atomic_read(&osb->journal->j_num_trans), 280 osb->journal->j_trans_id,
274 osb->journal->j_trans_id); 281 atomic_read(&osb->journal->j_num_trans));
275 282
276 out += snprintf(buf + out, len - out, 283 out += snprintf(buf + out, len - out,
277 "%10s => GlobalAllocs: %d LocalAllocs: %d " 284 "%10s => GlobalAllocs: %d LocalAllocs: %d "
@@ -297,9 +304,18 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
297 atomic_read(&osb->s_num_inodes_stolen)); 304 atomic_read(&osb->s_num_inodes_stolen));
298 spin_unlock(&osb->osb_lock); 305 spin_unlock(&osb->osb_lock);
299 306
307 out += snprintf(buf + out, len - out, "OrphanScan => ");
308 out += snprintf(buf + out, len - out, "Local: %u Global: %u ",
309 os->os_count, os->os_seqno);
310 out += snprintf(buf + out, len - out, " Last Scan: ");
311 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
312 out += snprintf(buf + out, len - out, "Disabled\n");
313 else
314 out += snprintf(buf + out, len - out, "%lu seconds ago\n",
315 (get_seconds() - os->os_scantime.tv_sec));
316
300 out += snprintf(buf + out, len - out, "%10s => %3s %10s\n", 317 out += snprintf(buf + out, len - out, "%10s => %3s %10s\n",
301 "Slots", "Num", "RecoGen"); 318 "Slots", "Num", "RecoGen");
302
303 for (i = 0; i < osb->max_slots; ++i) { 319 for (i = 0; i < osb->max_slots; ++i) {
304 out += snprintf(buf + out, len - out, 320 out += snprintf(buf + out, len - out,
305 "%10s %c %3d %10d\n", 321 "%10s %c %3d %10d\n",
@@ -542,7 +558,7 @@ static unsigned long long ocfs2_max_file_offset(unsigned int bbits,
542 */ 558 */
543 559
544#if BITS_PER_LONG == 32 560#if BITS_PER_LONG == 32
545# if defined(CONFIG_LBD) 561# if defined(CONFIG_LBDAF)
546 BUILD_BUG_ON(sizeof(sector_t) != 8); 562 BUILD_BUG_ON(sizeof(sector_t) != 8);
547 /* 563 /*
548 * We might be limited by page cache size. 564 * We might be limited by page cache size.
@@ -693,7 +709,8 @@ out:
693 709
694static int ocfs2_sb_probe(struct super_block *sb, 710static int ocfs2_sb_probe(struct super_block *sb,
695 struct buffer_head **bh, 711 struct buffer_head **bh,
696 int *sector_size) 712 int *sector_size,
713 struct ocfs2_blockcheck_stats *stats)
697{ 714{
698 int status, tmpstat; 715 int status, tmpstat;
699 struct ocfs1_vol_disk_hdr *hdr; 716 struct ocfs1_vol_disk_hdr *hdr;
@@ -759,7 +776,8 @@ static int ocfs2_sb_probe(struct super_block *sb,
759 goto bail; 776 goto bail;
760 } 777 }
761 di = (struct ocfs2_dinode *) (*bh)->b_data; 778 di = (struct ocfs2_dinode *) (*bh)->b_data;
762 status = ocfs2_verify_volume(di, *bh, blksize); 779 memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats));
780 status = ocfs2_verify_volume(di, *bh, blksize, stats);
763 if (status >= 0) 781 if (status >= 0)
764 goto bail; 782 goto bail;
765 brelse(*bh); 783 brelse(*bh);
@@ -965,6 +983,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
965 struct ocfs2_super *osb = NULL; 983 struct ocfs2_super *osb = NULL;
966 struct buffer_head *bh = NULL; 984 struct buffer_head *bh = NULL;
967 char nodestr[8]; 985 char nodestr[8];
986 struct ocfs2_blockcheck_stats stats;
968 987
969 mlog_entry("%p, %p, %i", sb, data, silent); 988 mlog_entry("%p, %p, %i", sb, data, silent);
970 989
@@ -974,13 +993,13 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
974 } 993 }
975 994
976 /* probe for superblock */ 995 /* probe for superblock */
977 status = ocfs2_sb_probe(sb, &bh, &sector_size); 996 status = ocfs2_sb_probe(sb, &bh, &sector_size, &stats);
978 if (status < 0) { 997 if (status < 0) {
979 mlog(ML_ERROR, "superblock probe failed!\n"); 998 mlog(ML_ERROR, "superblock probe failed!\n");
980 goto read_super_error; 999 goto read_super_error;
981 } 1000 }
982 1001
983 status = ocfs2_initialize_super(sb, bh, sector_size); 1002 status = ocfs2_initialize_super(sb, bh, sector_size, &stats);
984 osb = OCFS2_SB(sb); 1003 osb = OCFS2_SB(sb);
985 if (status < 0) { 1004 if (status < 0) {
986 mlog_errno(status); 1005 mlog_errno(status);
@@ -1090,6 +1109,18 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1090 goto read_super_error; 1109 goto read_super_error;
1091 } 1110 }
1092 1111
1112 if (ocfs2_meta_ecc(osb)) {
1113 status = ocfs2_blockcheck_stats_debugfs_install(
1114 &osb->osb_ecc_stats,
1115 osb->osb_debug_root);
1116 if (status) {
1117 mlog(ML_ERROR,
1118 "Unable to create blockcheck statistics "
1119 "files\n");
1120 goto read_super_error;
1121 }
1122 }
1123
1093 status = ocfs2_mount_volume(sb); 1124 status = ocfs2_mount_volume(sb);
1094 if (osb->root_inode) 1125 if (osb->root_inode)
1095 inode = igrab(osb->root_inode); 1126 inode = igrab(osb->root_inode);
@@ -1150,6 +1181,9 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1150 atomic_set(&osb->vol_state, VOLUME_MOUNTED_QUOTAS); 1181 atomic_set(&osb->vol_state, VOLUME_MOUNTED_QUOTAS);
1151 wake_up(&osb->osb_mount_event); 1182 wake_up(&osb->osb_mount_event);
1152 1183
1184 /* Start this when the mount is almost sure of being successful */
1185 ocfs2_orphan_scan_init(osb);
1186
1153 mlog_exit(status); 1187 mlog_exit(status);
1154 return status; 1188 return status;
1155 1189
@@ -1760,13 +1794,8 @@ static int ocfs2_mount_volume(struct super_block *sb)
1760 } 1794 }
1761 1795
1762 status = ocfs2_truncate_log_init(osb); 1796 status = ocfs2_truncate_log_init(osb);
1763 if (status < 0) { 1797 if (status < 0)
1764 mlog_errno(status); 1798 mlog_errno(status);
1765 goto leave;
1766 }
1767
1768 if (ocfs2_mount_local(osb))
1769 goto leave;
1770 1799
1771leave: 1800leave:
1772 if (unlock_super) 1801 if (unlock_super)
@@ -1790,6 +1819,9 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
1790 1819
1791 debugfs_remove(osb->osb_ctxt); 1820 debugfs_remove(osb->osb_ctxt);
1792 1821
1822 /* Orphan scan should be stopped as early as possible */
1823 ocfs2_orphan_scan_stop(osb);
1824
1793 ocfs2_disable_quotas(osb); 1825 ocfs2_disable_quotas(osb);
1794 1826
1795 ocfs2_shutdown_local_alloc(osb); 1827 ocfs2_shutdown_local_alloc(osb);
@@ -1833,6 +1865,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
1833 if (osb->cconn) 1865 if (osb->cconn)
1834 ocfs2_dlm_shutdown(osb, hangup_needed); 1866 ocfs2_dlm_shutdown(osb, hangup_needed);
1835 1867
1868 ocfs2_blockcheck_stats_debugfs_remove(&osb->osb_ecc_stats);
1836 debugfs_remove(osb->osb_debug_root); 1869 debugfs_remove(osb->osb_debug_root);
1837 1870
1838 if (hangup_needed) 1871 if (hangup_needed)
@@ -1880,7 +1913,8 @@ static int ocfs2_setup_osb_uuid(struct ocfs2_super *osb, const unsigned char *uu
1880 1913
1881static int ocfs2_initialize_super(struct super_block *sb, 1914static int ocfs2_initialize_super(struct super_block *sb,
1882 struct buffer_head *bh, 1915 struct buffer_head *bh,
1883 int sector_size) 1916 int sector_size,
1917 struct ocfs2_blockcheck_stats *stats)
1884{ 1918{
1885 int status; 1919 int status;
1886 int i, cbits, bbits; 1920 int i, cbits, bbits;
@@ -1939,6 +1973,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
1939 atomic_set(&osb->alloc_stats.bg_allocs, 0); 1973 atomic_set(&osb->alloc_stats.bg_allocs, 0);
1940 atomic_set(&osb->alloc_stats.bg_extends, 0); 1974 atomic_set(&osb->alloc_stats.bg_extends, 0);
1941 1975
1976 /* Copy the blockcheck stats from the superblock probe */
1977 osb->osb_ecc_stats = *stats;
1978
1942 ocfs2_init_node_maps(osb); 1979 ocfs2_init_node_maps(osb);
1943 1980
1944 snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", 1981 snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u",
@@ -2169,7 +2206,8 @@ bail:
2169 */ 2206 */
2170static int ocfs2_verify_volume(struct ocfs2_dinode *di, 2207static int ocfs2_verify_volume(struct ocfs2_dinode *di,
2171 struct buffer_head *bh, 2208 struct buffer_head *bh,
2172 u32 blksz) 2209 u32 blksz,
2210 struct ocfs2_blockcheck_stats *stats)
2173{ 2211{
2174 int status = -EAGAIN; 2212 int status = -EAGAIN;
2175 2213
@@ -2182,7 +2220,8 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
2182 OCFS2_FEATURE_INCOMPAT_META_ECC) { 2220 OCFS2_FEATURE_INCOMPAT_META_ECC) {
2183 status = ocfs2_block_check_validate(bh->b_data, 2221 status = ocfs2_block_check_validate(bh->b_data,
2184 bh->b_size, 2222 bh->b_size,
2185 &di->i_check); 2223 &di->i_check,
2224 stats);
2186 if (status) 2225 if (status)
2187 goto out; 2226 goto out;
2188 } 2227 }
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c
index ab713ebdd546..40e53702948c 100644
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -50,6 +50,10 @@ static inline int is_in_system_inode_array(struct ocfs2_super *osb,
50 int type, 50 int type,
51 u32 slot); 51 u32 slot);
52 52
53#ifdef CONFIG_DEBUG_LOCK_ALLOC
54static struct lock_class_key ocfs2_sysfile_cluster_lock_key[NUM_SYSTEM_INODES];
55#endif
56
53static inline int is_global_system_inode(int type) 57static inline int is_global_system_inode(int type)
54{ 58{
55 return type >= OCFS2_FIRST_ONLINE_SYSTEM_INODE && 59 return type >= OCFS2_FIRST_ONLINE_SYSTEM_INODE &&
@@ -118,6 +122,21 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb,
118 inode = NULL; 122 inode = NULL;
119 goto bail; 123 goto bail;
120 } 124 }
125#ifdef CONFIG_DEBUG_LOCK_ALLOC
126 if (type == LOCAL_USER_QUOTA_SYSTEM_INODE ||
127 type == LOCAL_GROUP_QUOTA_SYSTEM_INODE ||
128 type == JOURNAL_SYSTEM_INODE) {
129 /* Ignore inode lock on these inodes as the lock does not
130 * really belong to any process and lockdep cannot handle
131 * that */
132 OCFS2_I(inode)->ip_inode_lockres.l_lockdep_map.key = NULL;
133 } else {
134 lockdep_init_map(&OCFS2_I(inode)->ip_inode_lockres.
135 l_lockdep_map,
136 ocfs2_system_inodes[type].si_name,
137 &ocfs2_sysfile_cluster_lock_key[type], 0);
138 }
139#endif
121bail: 140bail:
122 141
123 return inode; 142 return inode;
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 15631019dc63..ba320e250747 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -3154,7 +3154,7 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode,
3154 le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash)); 3154 le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
3155 if (func) { 3155 if (func) {
3156 ret = func(inode, bucket, para); 3156 ret = func(inode, bucket, para);
3157 if (ret) 3157 if (ret && ret != -ERANGE)
3158 mlog_errno(ret); 3158 mlog_errno(ret);
3159 /* Fall through to bucket_relse() */ 3159 /* Fall through to bucket_relse() */
3160 } 3160 }
@@ -3261,7 +3261,8 @@ static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
3261 ocfs2_list_xattr_bucket, 3261 ocfs2_list_xattr_bucket,
3262 &xl); 3262 &xl);
3263 if (ret) { 3263 if (ret) {
3264 mlog_errno(ret); 3264 if (ret != -ERANGE)
3265 mlog_errno(ret);
3265 goto out; 3266 goto out;
3266 } 3267 }
3267 3268
diff --git a/fs/open.c b/fs/open.c
index 7200e23d9258..dd98e8076024 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -378,63 +378,63 @@ SYSCALL_ALIAS(sys_ftruncate64, SyS_ftruncate64);
378#endif 378#endif
379#endif /* BITS_PER_LONG == 32 */ 379#endif /* BITS_PER_LONG == 32 */
380 380
381SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) 381
382int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
382{ 383{
383 struct file *file; 384 struct inode *inode = file->f_path.dentry->d_inode;
384 struct inode *inode; 385 long ret;
385 long ret = -EINVAL;
386 386
387 if (offset < 0 || len <= 0) 387 if (offset < 0 || len <= 0)
388 goto out; 388 return -EINVAL;
389 389
390 /* Return error if mode is not supported */ 390 /* Return error if mode is not supported */
391 ret = -EOPNOTSUPP;
392 if (mode && !(mode & FALLOC_FL_KEEP_SIZE)) 391 if (mode && !(mode & FALLOC_FL_KEEP_SIZE))
393 goto out; 392 return -EOPNOTSUPP;
394 393
395 ret = -EBADF;
396 file = fget(fd);
397 if (!file)
398 goto out;
399 if (!(file->f_mode & FMODE_WRITE)) 394 if (!(file->f_mode & FMODE_WRITE))
400 goto out_fput; 395 return -EBADF;
401 /* 396 /*
402 * Revalidate the write permissions, in case security policy has 397 * Revalidate the write permissions, in case security policy has
403 * changed since the files were opened. 398 * changed since the files were opened.
404 */ 399 */
405 ret = security_file_permission(file, MAY_WRITE); 400 ret = security_file_permission(file, MAY_WRITE);
406 if (ret) 401 if (ret)
407 goto out_fput; 402 return ret;
408 403
409 inode = file->f_path.dentry->d_inode;
410
411 ret = -ESPIPE;
412 if (S_ISFIFO(inode->i_mode)) 404 if (S_ISFIFO(inode->i_mode))
413 goto out_fput; 405 return -ESPIPE;
414 406
415 ret = -ENODEV;
416 /* 407 /*
417 * Let individual file system decide if it supports preallocation 408 * Let individual file system decide if it supports preallocation
418 * for directories or not. 409 * for directories or not.
419 */ 410 */
420 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) 411 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
421 goto out_fput; 412 return -ENODEV;
422 413
423 ret = -EFBIG;
424 /* Check for wrap through zero too */ 414 /* Check for wrap through zero too */
425 if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) 415 if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
426 goto out_fput; 416 return -EFBIG;
427 417
428 if (inode->i_op->fallocate) 418 if (!inode->i_op->fallocate)
429 ret = inode->i_op->fallocate(inode, mode, offset, len); 419 return -EOPNOTSUPP;
430 else
431 ret = -EOPNOTSUPP;
432 420
433out_fput: 421 return inode->i_op->fallocate(inode, mode, offset, len);
434 fput(file);
435out:
436 return ret;
437} 422}
423
424SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
425{
426 struct file *file;
427 int error = -EBADF;
428
429 file = fget(fd);
430 if (file) {
431 error = do_fallocate(file, mode, offset, len);
432 fput(file);
433 }
434
435 return error;
436}
437
438#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS 438#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
439asmlinkage long SyS_fallocate(long fd, long mode, loff_t offset, loff_t len) 439asmlinkage long SyS_fallocate(long fd, long mode, loff_t offset, loff_t len)
440{ 440{
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 63d965193b22..11a7b5c68153 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -18,6 +18,7 @@ proc-y += meminfo.o
18proc-y += stat.o 18proc-y += stat.o
19proc-y += uptime.o 19proc-y += uptime.o
20proc-y += version.o 20proc-y += version.o
21proc-y += softirqs.o
21proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o 22proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o
22proc-$(CONFIG_NET) += proc_net.o 23proc-$(CONFIG_NET) += proc_net.o
23proc-$(CONFIG_PROC_KCORE) += kcore.o 24proc-$(CONFIG_PROC_KCORE) += kcore.o
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1539e630c47d..3ce5ae9e3d2d 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1006,7 +1006,12 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf,
1006 1006
1007 if (!task) 1007 if (!task)
1008 return -ESRCH; 1008 return -ESRCH;
1009 oom_adjust = task->oomkilladj; 1009 task_lock(task);
1010 if (task->mm)
1011 oom_adjust = task->mm->oom_adj;
1012 else
1013 oom_adjust = OOM_DISABLE;
1014 task_unlock(task);
1010 put_task_struct(task); 1015 put_task_struct(task);
1011 1016
1012 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); 1017 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
@@ -1035,11 +1040,19 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1035 task = get_proc_task(file->f_path.dentry->d_inode); 1040 task = get_proc_task(file->f_path.dentry->d_inode);
1036 if (!task) 1041 if (!task)
1037 return -ESRCH; 1042 return -ESRCH;
1038 if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) { 1043 task_lock(task);
1044 if (!task->mm) {
1045 task_unlock(task);
1046 put_task_struct(task);
1047 return -EINVAL;
1048 }
1049 if (oom_adjust < task->mm->oom_adj && !capable(CAP_SYS_RESOURCE)) {
1050 task_unlock(task);
1039 put_task_struct(task); 1051 put_task_struct(task);
1040 return -EACCES; 1052 return -EACCES;
1041 } 1053 }
1042 task->oomkilladj = oom_adjust; 1054 task->mm->oom_adj = oom_adjust;
1055 task_unlock(task);
1043 put_task_struct(task); 1056 put_task_struct(task);
1044 if (end - buffer == 0) 1057 if (end - buffer == 0)
1045 return -EIO; 1058 return -EIO;
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index c6b0302af4c4..d5c410d47fae 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -64,10 +64,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
64 "Inactive(anon): %8lu kB\n" 64 "Inactive(anon): %8lu kB\n"
65 "Active(file): %8lu kB\n" 65 "Active(file): %8lu kB\n"
66 "Inactive(file): %8lu kB\n" 66 "Inactive(file): %8lu kB\n"
67#ifdef CONFIG_UNEVICTABLE_LRU
68 "Unevictable: %8lu kB\n" 67 "Unevictable: %8lu kB\n"
69 "Mlocked: %8lu kB\n" 68 "Mlocked: %8lu kB\n"
70#endif
71#ifdef CONFIG_HIGHMEM 69#ifdef CONFIG_HIGHMEM
72 "HighTotal: %8lu kB\n" 70 "HighTotal: %8lu kB\n"
73 "HighFree: %8lu kB\n" 71 "HighFree: %8lu kB\n"
@@ -109,10 +107,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
109 K(pages[LRU_INACTIVE_ANON]), 107 K(pages[LRU_INACTIVE_ANON]),
110 K(pages[LRU_ACTIVE_FILE]), 108 K(pages[LRU_ACTIVE_FILE]),
111 K(pages[LRU_INACTIVE_FILE]), 109 K(pages[LRU_INACTIVE_FILE]),
112#ifdef CONFIG_UNEVICTABLE_LRU
113 K(pages[LRU_UNEVICTABLE]), 110 K(pages[LRU_UNEVICTABLE]),
114 K(global_page_state(NR_MLOCK)), 111 K(global_page_state(NR_MLOCK)),
115#endif
116#ifdef CONFIG_HIGHMEM 112#ifdef CONFIG_HIGHMEM
117 K(i.totalhigh), 113 K(i.totalhigh),
118 K(i.freehigh), 114 K(i.freehigh),
diff --git a/fs/proc/page.c b/fs/proc/page.c
index e9983837d08d..2707c6c7a20f 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -6,11 +6,13 @@
6#include <linux/mmzone.h> 6#include <linux/mmzone.h>
7#include <linux/proc_fs.h> 7#include <linux/proc_fs.h>
8#include <linux/seq_file.h> 8#include <linux/seq_file.h>
9#include <linux/hugetlb.h>
9#include <asm/uaccess.h> 10#include <asm/uaccess.h>
10#include "internal.h" 11#include "internal.h"
11 12
12#define KPMSIZE sizeof(u64) 13#define KPMSIZE sizeof(u64)
13#define KPMMASK (KPMSIZE - 1) 14#define KPMMASK (KPMSIZE - 1)
15
14/* /proc/kpagecount - an array exposing page counts 16/* /proc/kpagecount - an array exposing page counts
15 * 17 *
16 * Each entry is a u64 representing the corresponding 18 * Each entry is a u64 representing the corresponding
@@ -32,20 +34,22 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
32 return -EINVAL; 34 return -EINVAL;
33 35
34 while (count > 0) { 36 while (count > 0) {
35 ppage = NULL;
36 if (pfn_valid(pfn)) 37 if (pfn_valid(pfn))
37 ppage = pfn_to_page(pfn); 38 ppage = pfn_to_page(pfn);
38 pfn++; 39 else
40 ppage = NULL;
39 if (!ppage) 41 if (!ppage)
40 pcount = 0; 42 pcount = 0;
41 else 43 else
42 pcount = page_mapcount(ppage); 44 pcount = page_mapcount(ppage);
43 45
44 if (put_user(pcount, out++)) { 46 if (put_user(pcount, out)) {
45 ret = -EFAULT; 47 ret = -EFAULT;
46 break; 48 break;
47 } 49 }
48 50
51 pfn++;
52 out++;
49 count -= KPMSIZE; 53 count -= KPMSIZE;
50 } 54 }
51 55
@@ -68,19 +72,122 @@ static const struct file_operations proc_kpagecount_operations = {
68 72
69/* These macros are used to decouple internal flags from exported ones */ 73/* These macros are used to decouple internal flags from exported ones */
70 74
71#define KPF_LOCKED 0 75#define KPF_LOCKED 0
72#define KPF_ERROR 1 76#define KPF_ERROR 1
73#define KPF_REFERENCED 2 77#define KPF_REFERENCED 2
74#define KPF_UPTODATE 3 78#define KPF_UPTODATE 3
75#define KPF_DIRTY 4 79#define KPF_DIRTY 4
76#define KPF_LRU 5 80#define KPF_LRU 5
77#define KPF_ACTIVE 6 81#define KPF_ACTIVE 6
78#define KPF_SLAB 7 82#define KPF_SLAB 7
79#define KPF_WRITEBACK 8 83#define KPF_WRITEBACK 8
80#define KPF_RECLAIM 9 84#define KPF_RECLAIM 9
81#define KPF_BUDDY 10 85#define KPF_BUDDY 10
86
87/* 11-20: new additions in 2.6.31 */
88#define KPF_MMAP 11
89#define KPF_ANON 12
90#define KPF_SWAPCACHE 13
91#define KPF_SWAPBACKED 14
92#define KPF_COMPOUND_HEAD 15
93#define KPF_COMPOUND_TAIL 16
94#define KPF_HUGE 17
95#define KPF_UNEVICTABLE 18
96#define KPF_NOPAGE 20
97
98/* kernel hacking assistances
99 * WARNING: subject to change, never rely on them!
100 */
101#define KPF_RESERVED 32
102#define KPF_MLOCKED 33
103#define KPF_MAPPEDTODISK 34
104#define KPF_PRIVATE 35
105#define KPF_PRIVATE_2 36
106#define KPF_OWNER_PRIVATE 37
107#define KPF_ARCH 38
108#define KPF_UNCACHED 39
109
110static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit)
111{
112 return ((kflags >> kbit) & 1) << ubit;
113}
82 114
83#define kpf_copy_bit(flags, dstpos, srcpos) (((flags >> srcpos) & 1) << dstpos) 115static u64 get_uflags(struct page *page)
116{
117 u64 k;
118 u64 u;
119
120 /*
121 * pseudo flag: KPF_NOPAGE
122 * it differentiates a memory hole from a page with no flags
123 */
124 if (!page)
125 return 1 << KPF_NOPAGE;
126
127 k = page->flags;
128 u = 0;
129
130 /*
131 * pseudo flags for the well known (anonymous) memory mapped pages
132 *
133 * Note that page->_mapcount is overloaded in SLOB/SLUB/SLQB, so the
134 * simple test in page_mapped() is not enough.
135 */
136 if (!PageSlab(page) && page_mapped(page))
137 u |= 1 << KPF_MMAP;
138 if (PageAnon(page))
139 u |= 1 << KPF_ANON;
140
141 /*
142 * compound pages: export both head/tail info
143 * they together define a compound page's start/end pos and order
144 */
145 if (PageHead(page))
146 u |= 1 << KPF_COMPOUND_HEAD;
147 if (PageTail(page))
148 u |= 1 << KPF_COMPOUND_TAIL;
149 if (PageHuge(page))
150 u |= 1 << KPF_HUGE;
151
152 u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked);
153
154 /*
155 * Caveats on high order pages:
156 * PG_buddy will only be set on the head page; SLUB/SLQB do the same
157 * for PG_slab; SLOB won't set PG_slab at all on compound pages.
158 */
159 u |= kpf_copy_bit(k, KPF_SLAB, PG_slab);
160 u |= kpf_copy_bit(k, KPF_BUDDY, PG_buddy);
161
162 u |= kpf_copy_bit(k, KPF_ERROR, PG_error);
163 u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty);
164 u |= kpf_copy_bit(k, KPF_UPTODATE, PG_uptodate);
165 u |= kpf_copy_bit(k, KPF_WRITEBACK, PG_writeback);
166
167 u |= kpf_copy_bit(k, KPF_LRU, PG_lru);
168 u |= kpf_copy_bit(k, KPF_REFERENCED, PG_referenced);
169 u |= kpf_copy_bit(k, KPF_ACTIVE, PG_active);
170 u |= kpf_copy_bit(k, KPF_RECLAIM, PG_reclaim);
171
172 u |= kpf_copy_bit(k, KPF_SWAPCACHE, PG_swapcache);
173 u |= kpf_copy_bit(k, KPF_SWAPBACKED, PG_swapbacked);
174
175 u |= kpf_copy_bit(k, KPF_UNEVICTABLE, PG_unevictable);
176 u |= kpf_copy_bit(k, KPF_MLOCKED, PG_mlocked);
177
178#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
179 u |= kpf_copy_bit(k, KPF_UNCACHED, PG_uncached);
180#endif
181
182 u |= kpf_copy_bit(k, KPF_RESERVED, PG_reserved);
183 u |= kpf_copy_bit(k, KPF_MAPPEDTODISK, PG_mappedtodisk);
184 u |= kpf_copy_bit(k, KPF_PRIVATE, PG_private);
185 u |= kpf_copy_bit(k, KPF_PRIVATE_2, PG_private_2);
186 u |= kpf_copy_bit(k, KPF_OWNER_PRIVATE, PG_owner_priv_1);
187 u |= kpf_copy_bit(k, KPF_ARCH, PG_arch_1);
188
189 return u;
190};
84 191
85static ssize_t kpageflags_read(struct file *file, char __user *buf, 192static ssize_t kpageflags_read(struct file *file, char __user *buf,
86 size_t count, loff_t *ppos) 193 size_t count, loff_t *ppos)
@@ -90,7 +197,6 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf,
90 unsigned long src = *ppos; 197 unsigned long src = *ppos;
91 unsigned long pfn; 198 unsigned long pfn;
92 ssize_t ret = 0; 199 ssize_t ret = 0;
93 u64 kflags, uflags;
94 200
95 pfn = src / KPMSIZE; 201 pfn = src / KPMSIZE;
96 count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src); 202 count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src);
@@ -98,32 +204,18 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf,
98 return -EINVAL; 204 return -EINVAL;
99 205
100 while (count > 0) { 206 while (count > 0) {
101 ppage = NULL;
102 if (pfn_valid(pfn)) 207 if (pfn_valid(pfn))
103 ppage = pfn_to_page(pfn); 208 ppage = pfn_to_page(pfn);
104 pfn++;
105 if (!ppage)
106 kflags = 0;
107 else 209 else
108 kflags = ppage->flags; 210 ppage = NULL;
109 211
110 uflags = kpf_copy_bit(kflags, KPF_LOCKED, PG_locked) | 212 if (put_user(get_uflags(ppage), out)) {
111 kpf_copy_bit(kflags, KPF_ERROR, PG_error) |
112 kpf_copy_bit(kflags, KPF_REFERENCED, PG_referenced) |
113 kpf_copy_bit(kflags, KPF_UPTODATE, PG_uptodate) |
114 kpf_copy_bit(kflags, KPF_DIRTY, PG_dirty) |
115 kpf_copy_bit(kflags, KPF_LRU, PG_lru) |
116 kpf_copy_bit(kflags, KPF_ACTIVE, PG_active) |
117 kpf_copy_bit(kflags, KPF_SLAB, PG_slab) |
118 kpf_copy_bit(kflags, KPF_WRITEBACK, PG_writeback) |
119 kpf_copy_bit(kflags, KPF_RECLAIM, PG_reclaim) |
120 kpf_copy_bit(kflags, KPF_BUDDY, PG_buddy);
121
122 if (put_user(uflags, out++)) {
123 ret = -EFAULT; 213 ret = -EFAULT;
124 break; 214 break;
125 } 215 }
126 216
217 pfn++;
218 out++;
127 count -= KPMSIZE; 219 count -= KPMSIZE;
128 } 220 }
129 221
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index fc6c3025befd..7ba79a54948c 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -195,20 +195,20 @@ void proc_device_tree_add_node(struct device_node *np,
195 p = fixup_name(np, de, p); 195 p = fixup_name(np, de, p);
196 196
197 ent = proc_mkdir(p, de); 197 ent = proc_mkdir(p, de);
198 if (ent == 0) 198 if (ent == NULL)
199 break; 199 break;
200 proc_device_tree_add_node(child, ent); 200 proc_device_tree_add_node(child, ent);
201 } 201 }
202 of_node_put(child); 202 of_node_put(child);
203 203
204 for (pp = np->properties; pp != 0; pp = pp->next) { 204 for (pp = np->properties; pp != NULL; pp = pp->next) {
205 p = pp->name; 205 p = pp->name;
206 206
207 if (duplicate_name(de, p)) 207 if (duplicate_name(de, p))
208 p = fixup_name(np, de, p); 208 p = fixup_name(np, de, p);
209 209
210 ent = __proc_device_tree_add_prop(de, pp, p); 210 ent = __proc_device_tree_add_prop(de, pp, p);
211 if (ent == 0) 211 if (ent == NULL)
212 break; 212 break;
213 } 213 }
214} 214}
@@ -221,10 +221,10 @@ void __init proc_device_tree_init(void)
221 struct device_node *root; 221 struct device_node *root;
222 222
223 proc_device_tree = proc_mkdir("device-tree", NULL); 223 proc_device_tree = proc_mkdir("device-tree", NULL);
224 if (proc_device_tree == 0) 224 if (proc_device_tree == NULL)
225 return; 225 return;
226 root = of_find_node_by_path("/"); 226 root = of_find_node_by_path("/");
227 if (root == 0) { 227 if (root == NULL) {
228 printk(KERN_ERR "/proc/device-tree: can't find root\n"); 228 printk(KERN_ERR "/proc/device-tree: can't find root\n");
229 return; 229 return;
230 } 230 }
diff --git a/fs/proc/softirqs.c b/fs/proc/softirqs.c
new file mode 100644
index 000000000000..1807c2419f17
--- /dev/null
+++ b/fs/proc/softirqs.c
@@ -0,0 +1,44 @@
1#include <linux/init.h>
2#include <linux/kernel_stat.h>
3#include <linux/proc_fs.h>
4#include <linux/seq_file.h>
5
6/*
7 * /proc/softirqs ... display the number of softirqs
8 */
9static int show_softirqs(struct seq_file *p, void *v)
10{
11 int i, j;
12
13 seq_printf(p, " ");
14 for_each_possible_cpu(i)
15 seq_printf(p, "CPU%-8d", i);
16 seq_printf(p, "\n");
17
18 for (i = 0; i < NR_SOFTIRQS; i++) {
19 seq_printf(p, "%8s:", softirq_to_name[i]);
20 for_each_possible_cpu(j)
21 seq_printf(p, " %10u", kstat_softirqs_cpu(i, j));
22 seq_printf(p, "\n");
23 }
24 return 0;
25}
26
27static int softirqs_open(struct inode *inode, struct file *file)
28{
29 return single_open(file, show_softirqs, NULL);
30}
31
32static const struct file_operations proc_softirqs_operations = {
33 .open = softirqs_open,
34 .read = seq_read,
35 .llseek = seq_lseek,
36 .release = single_release,
37};
38
39static int __init proc_softirqs_init(void)
40{
41 proc_create("softirqs", 0, NULL, &proc_softirqs_operations);
42 return 0;
43}
44module_init(proc_softirqs_init);
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 81e4eb60972e..7cc726c6d70a 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -29,6 +29,8 @@ static int show_stat(struct seq_file *p, void *v)
29 cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; 29 cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
30 cputime64_t guest; 30 cputime64_t guest;
31 u64 sum = 0; 31 u64 sum = 0;
32 u64 sum_softirq = 0;
33 unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
32 struct timespec boottime; 34 struct timespec boottime;
33 unsigned int per_irq_sum; 35 unsigned int per_irq_sum;
34 36
@@ -53,6 +55,13 @@ static int show_stat(struct seq_file *p, void *v)
53 sum += kstat_irqs_cpu(j, i); 55 sum += kstat_irqs_cpu(j, i);
54 } 56 }
55 sum += arch_irq_stat_cpu(i); 57 sum += arch_irq_stat_cpu(i);
58
59 for (j = 0; j < NR_SOFTIRQS; j++) {
60 unsigned int softirq_stat = kstat_softirqs_cpu(j, i);
61
62 per_softirq_sums[j] += softirq_stat;
63 sum_softirq += softirq_stat;
64 }
56 } 65 }
57 sum += arch_irq_stat(); 66 sum += arch_irq_stat();
58 67
@@ -115,6 +124,12 @@ static int show_stat(struct seq_file *p, void *v)
115 nr_running(), 124 nr_running(),
116 nr_iowait()); 125 nr_iowait());
117 126
127 seq_printf(p, "softirq %llu", (unsigned long long)sum_softirq);
128
129 for (i = 0; i < NR_SOFTIRQS; i++)
130 seq_printf(p, " %u", per_softirq_sums[i]);
131 seq_printf(p, "\n");
132
118 return 0; 133 return 0;
119} 134}
120 135
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 5edcc3f92ba7..0872afa58d39 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -166,12 +166,7 @@ static const struct file_operations proc_vmcore_operations = {
166 166
167static struct vmcore* __init get_new_element(void) 167static struct vmcore* __init get_new_element(void)
168{ 168{
169 struct vmcore *p; 169 return kzalloc(sizeof(struct vmcore), GFP_KERNEL);
170
171 p = kmalloc(sizeof(*p), GFP_KERNEL);
172 if (p)
173 memset(p, 0, sizeof(*p));
174 return p;
175} 170}
176 171
177static u64 __init get_vmcore_size_elf64(char *elfptr) 172static u64 __init get_vmcore_size_elf64(char *elfptr)
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 3a6b193d8444..0ff7566c767c 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -202,9 +202,12 @@ static int ramfs_parse_options(char *data, struct ramfs_mount_opts *opts)
202 return -EINVAL; 202 return -EINVAL;
203 opts->mode = option & S_IALLUGO; 203 opts->mode = option & S_IALLUGO;
204 break; 204 break;
205 default: 205 /*
206 printk(KERN_ERR "ramfs: bad mount option: %s\n", p); 206 * We might like to report bad mount options here;
207 return -EINVAL; 207 * but traditionally ramfs has ignored all mount options,
208 * and as it is used as a !CONFIG_SHMEM simple substitute
209 * for tmpfs, better continue to ignore other mount options.
210 */
208 } 211 }
209 } 212 }
210 213
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index 4beb964a2a3e..128d3f7c8aa5 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -1270,9 +1270,8 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
1270 1270
1271 RFALSE(ih, "PAP-12210: ih must be 0"); 1271 RFALSE(ih, "PAP-12210: ih must be 0");
1272 1272
1273 if (is_direntry_le_ih 1273 aux_ih = B_N_PITEM_HEAD(tbS0, item_pos);
1274 (aux_ih = 1274 if (is_direntry_le_ih(aux_ih)) {
1275 B_N_PITEM_HEAD(tbS0, item_pos))) {
1276 /* we append to directory item */ 1275 /* we append to directory item */
1277 1276
1278 int entry_count; 1277 int entry_count;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 6fd0f47e45db..a14d6cd9eeda 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1131,8 +1131,6 @@ static void init_inode(struct inode *inode, struct treepath *path)
1131 REISERFS_I(inode)->i_trans_id = 0; 1131 REISERFS_I(inode)->i_trans_id = 0;
1132 REISERFS_I(inode)->i_jl = NULL; 1132 REISERFS_I(inode)->i_jl = NULL;
1133 mutex_init(&(REISERFS_I(inode)->i_mmap)); 1133 mutex_init(&(REISERFS_I(inode)->i_mmap));
1134 reiserfs_init_acl_access(inode);
1135 reiserfs_init_acl_default(inode);
1136 reiserfs_init_xattr_rwsem(inode); 1134 reiserfs_init_xattr_rwsem(inode);
1137 1135
1138 if (stat_data_v1(ih)) { 1136 if (stat_data_v1(ih)) {
@@ -1834,8 +1832,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1834 REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK; 1832 REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK;
1835 sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode); 1833 sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode);
1836 mutex_init(&(REISERFS_I(inode)->i_mmap)); 1834 mutex_init(&(REISERFS_I(inode)->i_mmap));
1837 reiserfs_init_acl_access(inode);
1838 reiserfs_init_acl_default(inode);
1839 reiserfs_init_xattr_rwsem(inode); 1835 reiserfs_init_xattr_rwsem(inode);
1840 1836
1841 /* key to search for correct place for new stat data */ 1837 /* key to search for correct place for new stat data */
diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c
index 381750a155f6..03d85cbf90bf 100644
--- a/fs/reiserfs/lbalance.c
+++ b/fs/reiserfs/lbalance.c
@@ -390,7 +390,8 @@ static void leaf_item_bottle(struct buffer_info *dest_bi,
390 390
391 if (last_first == FIRST_TO_LAST) { 391 if (last_first == FIRST_TO_LAST) {
392 /* if ( if item in position item_num in buffer SOURCE is directory item ) */ 392 /* if ( if item in position item_num in buffer SOURCE is directory item ) */
393 if (is_direntry_le_ih(ih = B_N_PITEM_HEAD(src, item_num))) 393 ih = B_N_PITEM_HEAD(src, item_num);
394 if (is_direntry_le_ih(ih))
394 leaf_copy_dir_entries(dest_bi, src, FIRST_TO_LAST, 395 leaf_copy_dir_entries(dest_bi, src, FIRST_TO_LAST,
395 item_num, 0, cpy_bytes); 396 item_num, 0, cpy_bytes);
396 else { 397 else {
@@ -418,7 +419,8 @@ static void leaf_item_bottle(struct buffer_info *dest_bi,
418 } 419 }
419 } else { 420 } else {
420 /* if ( if item in position item_num in buffer SOURCE is directory item ) */ 421 /* if ( if item in position item_num in buffer SOURCE is directory item ) */
421 if (is_direntry_le_ih(ih = B_N_PITEM_HEAD(src, item_num))) 422 ih = B_N_PITEM_HEAD(src, item_num);
423 if (is_direntry_le_ih(ih))
422 leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST, 424 leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST,
423 item_num, 425 item_num,
424 I_ENTRY_COUNT(ih) - cpy_bytes, 426 I_ENTRY_COUNT(ih) - cpy_bytes,
@@ -774,8 +776,8 @@ void leaf_delete_items(struct buffer_info *cur_bi, int last_first,
774 leaf_delete_items_entirely(cur_bi, first + 1, 776 leaf_delete_items_entirely(cur_bi, first + 1,
775 del_num - 1); 777 del_num - 1);
776 778
777 if (is_direntry_le_ih 779 ih = B_N_PITEM_HEAD(bh, B_NR_ITEMS(bh) - 1);
778 (ih = B_N_PITEM_HEAD(bh, B_NR_ITEMS(bh) - 1))) 780 if (is_direntry_le_ih(ih))
779 /* the last item is directory */ 781 /* the last item is directory */
780 /* len = numbers of directory entries in this item */ 782 /* len = numbers of directory entries in this item */
781 len = ih_entry_count(ih); 783 len = ih_entry_count(ih);
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c
index 238e9d9b31e0..18b315d3d104 100644
--- a/fs/reiserfs/resize.c
+++ b/fs/reiserfs/resize.c
@@ -82,7 +82,6 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
82 if (reiserfs_allocate_list_bitmaps(s, jbitmap, bmap_nr_new) < 0) { 82 if (reiserfs_allocate_list_bitmaps(s, jbitmap, bmap_nr_new) < 0) {
83 printk 83 printk
84 ("reiserfs_resize: unable to allocate memory for journal bitmaps\n"); 84 ("reiserfs_resize: unable to allocate memory for journal bitmaps\n");
85 unlock_super(s);
86 return -ENOMEM; 85 return -ENOMEM;
87 } 86 }
88 /* the new journal bitmaps are zero filled, now we copy in the bitmap 87 /* the new journal bitmaps are zero filled, now we copy in the bitmap
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 2969773cfc22..d3aeb061612b 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -529,10 +529,6 @@ static void init_once(void *foo)
529 529
530 INIT_LIST_HEAD(&ei->i_prealloc_list); 530 INIT_LIST_HEAD(&ei->i_prealloc_list);
531 inode_init_once(&ei->vfs_inode); 531 inode_init_once(&ei->vfs_inode);
532#ifdef CONFIG_REISERFS_FS_POSIX_ACL
533 ei->i_acl_access = NULL;
534 ei->i_acl_default = NULL;
535#endif
536} 532}
537 533
538static int init_inodecache(void) 534static int init_inodecache(void)
@@ -580,25 +576,6 @@ static void reiserfs_dirty_inode(struct inode *inode)
580 reiserfs_write_unlock(inode->i_sb); 576 reiserfs_write_unlock(inode->i_sb);
581} 577}
582 578
583#ifdef CONFIG_REISERFS_FS_POSIX_ACL
584static void reiserfs_clear_inode(struct inode *inode)
585{
586 struct posix_acl *acl;
587
588 acl = REISERFS_I(inode)->i_acl_access;
589 if (acl && !IS_ERR(acl))
590 posix_acl_release(acl);
591 REISERFS_I(inode)->i_acl_access = NULL;
592
593 acl = REISERFS_I(inode)->i_acl_default;
594 if (acl && !IS_ERR(acl))
595 posix_acl_release(acl);
596 REISERFS_I(inode)->i_acl_default = NULL;
597}
598#else
599#define reiserfs_clear_inode NULL
600#endif
601
602#ifdef CONFIG_QUOTA 579#ifdef CONFIG_QUOTA
603static ssize_t reiserfs_quota_write(struct super_block *, int, const char *, 580static ssize_t reiserfs_quota_write(struct super_block *, int, const char *,
604 size_t, loff_t); 581 size_t, loff_t);
@@ -612,7 +589,6 @@ static const struct super_operations reiserfs_sops = {
612 .write_inode = reiserfs_write_inode, 589 .write_inode = reiserfs_write_inode,
613 .dirty_inode = reiserfs_dirty_inode, 590 .dirty_inode = reiserfs_dirty_inode,
614 .delete_inode = reiserfs_delete_inode, 591 .delete_inode = reiserfs_delete_inode,
615 .clear_inode = reiserfs_clear_inode,
616 .put_super = reiserfs_put_super, 592 .put_super = reiserfs_put_super,
617 .write_super = reiserfs_write_super, 593 .write_super = reiserfs_write_super,
618 .sync_fs = reiserfs_sync_fs, 594 .sync_fs = reiserfs_sync_fs,
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index c303c426fe2b..35d6e672a279 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -188,29 +188,6 @@ static void *posix_acl_to_disk(const struct posix_acl *acl, size_t * size)
188 return ERR_PTR(-EINVAL); 188 return ERR_PTR(-EINVAL);
189} 189}
190 190
191static inline void iset_acl(struct inode *inode, struct posix_acl **i_acl,
192 struct posix_acl *acl)
193{
194 spin_lock(&inode->i_lock);
195 if (*i_acl != ERR_PTR(-ENODATA))
196 posix_acl_release(*i_acl);
197 *i_acl = posix_acl_dup(acl);
198 spin_unlock(&inode->i_lock);
199}
200
201static inline struct posix_acl *iget_acl(struct inode *inode,
202 struct posix_acl **i_acl)
203{
204 struct posix_acl *acl = ERR_PTR(-ENODATA);
205
206 spin_lock(&inode->i_lock);
207 if (*i_acl != ERR_PTR(-ENODATA))
208 acl = posix_acl_dup(*i_acl);
209 spin_unlock(&inode->i_lock);
210
211 return acl;
212}
213
214/* 191/*
215 * Inode operation get_posix_acl(). 192 * Inode operation get_posix_acl().
216 * 193 *
@@ -220,34 +197,29 @@ static inline struct posix_acl *iget_acl(struct inode *inode,
220struct posix_acl *reiserfs_get_acl(struct inode *inode, int type) 197struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
221{ 198{
222 char *name, *value; 199 char *name, *value;
223 struct posix_acl *acl, **p_acl; 200 struct posix_acl *acl;
224 int size; 201 int size;
225 int retval; 202 int retval;
226 struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); 203
204 acl = get_cached_acl(inode, type);
205 if (acl != ACL_NOT_CACHED)
206 return acl;
227 207
228 switch (type) { 208 switch (type) {
229 case ACL_TYPE_ACCESS: 209 case ACL_TYPE_ACCESS:
230 name = POSIX_ACL_XATTR_ACCESS; 210 name = POSIX_ACL_XATTR_ACCESS;
231 p_acl = &reiserfs_i->i_acl_access;
232 break; 211 break;
233 case ACL_TYPE_DEFAULT: 212 case ACL_TYPE_DEFAULT:
234 name = POSIX_ACL_XATTR_DEFAULT; 213 name = POSIX_ACL_XATTR_DEFAULT;
235 p_acl = &reiserfs_i->i_acl_default;
236 break; 214 break;
237 default: 215 default:
238 return ERR_PTR(-EINVAL); 216 BUG();
239 } 217 }
240 218
241 acl = iget_acl(inode, p_acl);
242 if (acl && !IS_ERR(acl))
243 return acl;
244 else if (PTR_ERR(acl) == -ENODATA)
245 return NULL;
246
247 size = reiserfs_xattr_get(inode, name, NULL, 0); 219 size = reiserfs_xattr_get(inode, name, NULL, 0);
248 if (size < 0) { 220 if (size < 0) {
249 if (size == -ENODATA || size == -ENOSYS) { 221 if (size == -ENODATA || size == -ENOSYS) {
250 *p_acl = ERR_PTR(-ENODATA); 222 set_cached_acl(inode, type, NULL);
251 return NULL; 223 return NULL;
252 } 224 }
253 return ERR_PTR(size); 225 return ERR_PTR(size);
@@ -262,14 +234,13 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
262 /* This shouldn't actually happen as it should have 234 /* This shouldn't actually happen as it should have
263 been caught above.. but just in case */ 235 been caught above.. but just in case */
264 acl = NULL; 236 acl = NULL;
265 *p_acl = ERR_PTR(-ENODATA);
266 } else if (retval < 0) { 237 } else if (retval < 0) {
267 acl = ERR_PTR(retval); 238 acl = ERR_PTR(retval);
268 } else { 239 } else {
269 acl = posix_acl_from_disk(value, retval); 240 acl = posix_acl_from_disk(value, retval);
270 if (!IS_ERR(acl))
271 iset_acl(inode, p_acl, acl);
272 } 241 }
242 if (!IS_ERR(acl))
243 set_cached_acl(inode, type, acl);
273 244
274 kfree(value); 245 kfree(value);
275 return acl; 246 return acl;
@@ -287,10 +258,8 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
287{ 258{
288 char *name; 259 char *name;
289 void *value = NULL; 260 void *value = NULL;
290 struct posix_acl **p_acl;
291 size_t size = 0; 261 size_t size = 0;
292 int error; 262 int error;
293 struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
294 263
295 if (S_ISLNK(inode->i_mode)) 264 if (S_ISLNK(inode->i_mode))
296 return -EOPNOTSUPP; 265 return -EOPNOTSUPP;
@@ -298,7 +267,6 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
298 switch (type) { 267 switch (type) {
299 case ACL_TYPE_ACCESS: 268 case ACL_TYPE_ACCESS:
300 name = POSIX_ACL_XATTR_ACCESS; 269 name = POSIX_ACL_XATTR_ACCESS;
301 p_acl = &reiserfs_i->i_acl_access;
302 if (acl) { 270 if (acl) {
303 mode_t mode = inode->i_mode; 271 mode_t mode = inode->i_mode;
304 error = posix_acl_equiv_mode(acl, &mode); 272 error = posix_acl_equiv_mode(acl, &mode);
@@ -313,7 +281,6 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
313 break; 281 break;
314 case ACL_TYPE_DEFAULT: 282 case ACL_TYPE_DEFAULT:
315 name = POSIX_ACL_XATTR_DEFAULT; 283 name = POSIX_ACL_XATTR_DEFAULT;
316 p_acl = &reiserfs_i->i_acl_default;
317 if (!S_ISDIR(inode->i_mode)) 284 if (!S_ISDIR(inode->i_mode))
318 return acl ? -EACCES : 0; 285 return acl ? -EACCES : 0;
319 break; 286 break;
@@ -346,7 +313,7 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
346 kfree(value); 313 kfree(value);
347 314
348 if (!error) 315 if (!error)
349 iset_acl(inode, p_acl, acl); 316 set_cached_acl(inode, type, acl);
350 317
351 return error; 318 return error;
352} 319}
@@ -379,11 +346,8 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
379 } 346 }
380 347
381 acl = reiserfs_get_acl(dir, ACL_TYPE_DEFAULT); 348 acl = reiserfs_get_acl(dir, ACL_TYPE_DEFAULT);
382 if (IS_ERR(acl)) { 349 if (IS_ERR(acl))
383 if (PTR_ERR(acl) == -ENODATA)
384 goto apply_umask;
385 return PTR_ERR(acl); 350 return PTR_ERR(acl);
386 }
387 351
388 if (acl) { 352 if (acl) {
389 struct posix_acl *acl_copy; 353 struct posix_acl *acl_copy;
diff --git a/fs/select.c b/fs/select.c
index 0fe0e1469df3..d870237e42c7 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -168,7 +168,7 @@ static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p)
168 return table->entry++; 168 return table->entry++;
169} 169}
170 170
171static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) 171static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
172{ 172{
173 struct poll_wqueues *pwq = wait->private; 173 struct poll_wqueues *pwq = wait->private;
174 DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task); 174 DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task);
@@ -194,6 +194,16 @@ static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
194 return default_wake_function(&dummy_wait, mode, sync, key); 194 return default_wake_function(&dummy_wait, mode, sync, key);
195} 195}
196 196
197static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
198{
199 struct poll_table_entry *entry;
200
201 entry = container_of(wait, struct poll_table_entry, wait);
202 if (key && !((unsigned long)key & entry->key))
203 return 0;
204 return __pollwake(wait, mode, sync, key);
205}
206
197/* Add a new entry */ 207/* Add a new entry */
198static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, 208static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
199 poll_table *p) 209 poll_table *p)
@@ -205,6 +215,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
205 get_file(filp); 215 get_file(filp);
206 entry->filp = filp; 216 entry->filp = filp;
207 entry->wait_address = wait_address; 217 entry->wait_address = wait_address;
218 entry->key = p->key;
208 init_waitqueue_func_entry(&entry->wait, pollwake); 219 init_waitqueue_func_entry(&entry->wait, pollwake);
209 entry->wait.private = pwq; 220 entry->wait.private = pwq;
210 add_wait_queue(wait_address, &entry->wait); 221 add_wait_queue(wait_address, &entry->wait);
@@ -362,6 +373,18 @@ get_max:
362#define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR) 373#define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
363#define POLLEX_SET (POLLPRI) 374#define POLLEX_SET (POLLPRI)
364 375
376static inline void wait_key_set(poll_table *wait, unsigned long in,
377 unsigned long out, unsigned long bit)
378{
379 if (wait) {
380 wait->key = POLLEX_SET;
381 if (in & bit)
382 wait->key |= POLLIN_SET;
383 if (out & bit)
384 wait->key |= POLLOUT_SET;
385 }
386}
387
365int do_select(int n, fd_set_bits *fds, struct timespec *end_time) 388int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
366{ 389{
367 ktime_t expire, *to = NULL; 390 ktime_t expire, *to = NULL;
@@ -418,20 +441,25 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
418 if (file) { 441 if (file) {
419 f_op = file->f_op; 442 f_op = file->f_op;
420 mask = DEFAULT_POLLMASK; 443 mask = DEFAULT_POLLMASK;
421 if (f_op && f_op->poll) 444 if (f_op && f_op->poll) {
422 mask = (*f_op->poll)(file, retval ? NULL : wait); 445 wait_key_set(wait, in, out, bit);
446 mask = (*f_op->poll)(file, wait);
447 }
423 fput_light(file, fput_needed); 448 fput_light(file, fput_needed);
424 if ((mask & POLLIN_SET) && (in & bit)) { 449 if ((mask & POLLIN_SET) && (in & bit)) {
425 res_in |= bit; 450 res_in |= bit;
426 retval++; 451 retval++;
452 wait = NULL;
427 } 453 }
428 if ((mask & POLLOUT_SET) && (out & bit)) { 454 if ((mask & POLLOUT_SET) && (out & bit)) {
429 res_out |= bit; 455 res_out |= bit;
430 retval++; 456 retval++;
457 wait = NULL;
431 } 458 }
432 if ((mask & POLLEX_SET) && (ex & bit)) { 459 if ((mask & POLLEX_SET) && (ex & bit)) {
433 res_ex |= bit; 460 res_ex |= bit;
434 retval++; 461 retval++;
462 wait = NULL;
435 } 463 }
436 } 464 }
437 } 465 }
@@ -685,8 +713,12 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
685 mask = POLLNVAL; 713 mask = POLLNVAL;
686 if (file != NULL) { 714 if (file != NULL) {
687 mask = DEFAULT_POLLMASK; 715 mask = DEFAULT_POLLMASK;
688 if (file->f_op && file->f_op->poll) 716 if (file->f_op && file->f_op->poll) {
717 if (pwait)
718 pwait->key = pollfd->events |
719 POLLERR | POLLHUP;
689 mask = file->f_op->poll(file, pwait); 720 mask = file->f_op->poll(file, pwait);
721 }
690 /* Mask out unneeded events. */ 722 /* Mask out unneeded events. */
691 mask &= pollfd->events | POLLERR | POLLHUP; 723 mask &= pollfd->events | POLLERR | POLLHUP;
692 fput_light(file, fput_needed); 724 fput_light(file, fput_needed);
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 7f40f30c55c5..6c959275f2d0 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -640,6 +640,26 @@ int seq_puts(struct seq_file *m, const char *s)
640} 640}
641EXPORT_SYMBOL(seq_puts); 641EXPORT_SYMBOL(seq_puts);
642 642
643/**
644 * seq_write - write arbitrary data to buffer
645 * @seq: seq_file identifying the buffer to which data should be written
646 * @data: data address
647 * @len: number of bytes
648 *
649 * Return 0 on success, non-zero otherwise.
650 */
651int seq_write(struct seq_file *seq, const void *data, size_t len)
652{
653 if (seq->count + len < seq->size) {
654 memcpy(seq->buf + seq->count, data, len);
655 seq->count += len;
656 return 0;
657 }
658 seq->count = seq->size;
659 return -1;
660}
661EXPORT_SYMBOL(seq_write);
662
643struct list_head *seq_list_start(struct list_head *head, loff_t pos) 663struct list_head *seq_list_start(struct list_head *head, loff_t pos)
644{ 664{
645 struct list_head *lh; 665 struct list_head *lh;
diff --git a/fs/super.c b/fs/super.c
index 83b47416d006..2761d3e22ed9 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -545,24 +545,18 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
545 if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY)) { 545 if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY)) {
546 if (force) 546 if (force)
547 mark_files_ro(sb); 547 mark_files_ro(sb);
548 else if (!fs_may_remount_ro(sb)) { 548 else if (!fs_may_remount_ro(sb))
549 unlock_kernel();
550 return -EBUSY; 549 return -EBUSY;
551 }
552 retval = vfs_dq_off(sb, 1); 550 retval = vfs_dq_off(sb, 1);
553 if (retval < 0 && retval != -ENOSYS) { 551 if (retval < 0 && retval != -ENOSYS)
554 unlock_kernel();
555 return -EBUSY; 552 return -EBUSY;
556 }
557 } 553 }
558 remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY); 554 remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY);
559 555
560 if (sb->s_op->remount_fs) { 556 if (sb->s_op->remount_fs) {
561 retval = sb->s_op->remount_fs(sb, &flags, data); 557 retval = sb->s_op->remount_fs(sb, &flags, data);
562 if (retval) { 558 if (retval)
563 unlock_kernel();
564 return retval; 559 return retval;
565 }
566 } 560 }
567 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); 561 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
568 if (remount_rw) 562 if (remount_rw)
@@ -614,6 +608,7 @@ void emergency_remount(void)
614 608
615static DEFINE_IDA(unnamed_dev_ida); 609static DEFINE_IDA(unnamed_dev_ida);
616static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */ 610static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */
611static int unnamed_dev_start = 0; /* don't bother trying below it */
617 612
618int set_anon_super(struct super_block *s, void *data) 613int set_anon_super(struct super_block *s, void *data)
619{ 614{
@@ -624,7 +619,9 @@ int set_anon_super(struct super_block *s, void *data)
624 if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0) 619 if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0)
625 return -ENOMEM; 620 return -ENOMEM;
626 spin_lock(&unnamed_dev_lock); 621 spin_lock(&unnamed_dev_lock);
627 error = ida_get_new(&unnamed_dev_ida, &dev); 622 error = ida_get_new_above(&unnamed_dev_ida, unnamed_dev_start, &dev);
623 if (!error)
624 unnamed_dev_start = dev + 1;
628 spin_unlock(&unnamed_dev_lock); 625 spin_unlock(&unnamed_dev_lock);
629 if (error == -EAGAIN) 626 if (error == -EAGAIN)
630 /* We raced and lost with another CPU. */ 627 /* We raced and lost with another CPU. */
@@ -635,6 +632,8 @@ int set_anon_super(struct super_block *s, void *data)
635 if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) { 632 if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) {
636 spin_lock(&unnamed_dev_lock); 633 spin_lock(&unnamed_dev_lock);
637 ida_remove(&unnamed_dev_ida, dev); 634 ida_remove(&unnamed_dev_ida, dev);
635 if (unnamed_dev_start > dev)
636 unnamed_dev_start = dev;
638 spin_unlock(&unnamed_dev_lock); 637 spin_unlock(&unnamed_dev_lock);
639 return -EMFILE; 638 return -EMFILE;
640 } 639 }
@@ -651,6 +650,8 @@ void kill_anon_super(struct super_block *sb)
651 generic_shutdown_super(sb); 650 generic_shutdown_super(sb);
652 spin_lock(&unnamed_dev_lock); 651 spin_lock(&unnamed_dev_lock);
653 ida_remove(&unnamed_dev_ida, slot); 652 ida_remove(&unnamed_dev_ida, slot);
653 if (slot < unnamed_dev_start)
654 unnamed_dev_start = slot;
654 spin_unlock(&unnamed_dev_lock); 655 spin_unlock(&unnamed_dev_lock);
655} 656}
656 657
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index a3ba217fbe74..1d897ad808e0 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -192,8 +192,11 @@ static void *sysfs_follow_link(struct dentry *dentry, struct nameidata *nd)
192{ 192{
193 int error = -ENOMEM; 193 int error = -ENOMEM;
194 unsigned long page = get_zeroed_page(GFP_KERNEL); 194 unsigned long page = get_zeroed_page(GFP_KERNEL);
195 if (page) 195 if (page) {
196 error = sysfs_getlink(dentry, (char *) page); 196 error = sysfs_getlink(dentry, (char *) page);
197 if (error < 0)
198 free_page((unsigned long)page);
199 }
197 nd_set_link(nd, error ? ERR_PTR(error) : (char *)page); 200 nd_set_link(nd, error ? ERR_PTR(error) : (char *)page);
198 return NULL; 201 return NULL;
199} 202}
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index c7798079e644..4e50286a4cc3 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -15,13 +15,13 @@
15 15
16#include <linux/pagemap.h> 16#include <linux/pagemap.h>
17#include <linux/highmem.h> 17#include <linux/highmem.h>
18#include <linux/smp_lock.h>
19#include <linux/swap.h> 18#include <linux/swap.h>
20#include "sysv.h" 19#include "sysv.h"
21 20
22static int sysv_readdir(struct file *, void *, filldir_t); 21static int sysv_readdir(struct file *, void *, filldir_t);
23 22
24const struct file_operations sysv_dir_operations = { 23const struct file_operations sysv_dir_operations = {
24 .llseek = generic_file_llseek,
25 .read = generic_read_dir, 25 .read = generic_read_dir,
26 .readdir = sysv_readdir, 26 .readdir = sysv_readdir,
27 .fsync = simple_fsync, 27 .fsync = simple_fsync,
@@ -74,8 +74,6 @@ static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir)
74 unsigned long n = pos >> PAGE_CACHE_SHIFT; 74 unsigned long n = pos >> PAGE_CACHE_SHIFT;
75 unsigned long npages = dir_pages(inode); 75 unsigned long npages = dir_pages(inode);
76 76
77 lock_kernel();
78
79 pos = (pos + SYSV_DIRSIZE-1) & ~(SYSV_DIRSIZE-1); 77 pos = (pos + SYSV_DIRSIZE-1) & ~(SYSV_DIRSIZE-1);
80 if (pos >= inode->i_size) 78 if (pos >= inode->i_size)
81 goto done; 79 goto done;
@@ -113,7 +111,6 @@ static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir)
113 111
114done: 112done:
115 filp->f_pos = ((loff_t)n << PAGE_CACHE_SHIFT) | offset; 113 filp->f_pos = ((loff_t)n << PAGE_CACHE_SHIFT) | offset;
116 unlock_kernel();
117 return 0; 114 return 0;
118} 115}
119 116
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 479923456a54..9824743832a7 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -21,7 +21,6 @@
21 * the superblock. 21 * the superblock.
22 */ 22 */
23 23
24#include <linux/smp_lock.h>
25#include <linux/highuid.h> 24#include <linux/highuid.h>
26#include <linux/slab.h> 25#include <linux/slab.h>
27#include <linux/init.h> 26#include <linux/init.h>
@@ -37,7 +36,6 @@ static int sysv_sync_fs(struct super_block *sb, int wait)
37 unsigned long time = get_seconds(), old_time; 36 unsigned long time = get_seconds(), old_time;
38 37
39 lock_super(sb); 38 lock_super(sb);
40 lock_kernel();
41 39
42 /* 40 /*
43 * If we are going to write out the super block, 41 * If we are going to write out the super block,
@@ -52,7 +50,6 @@ static int sysv_sync_fs(struct super_block *sb, int wait)
52 mark_buffer_dirty(sbi->s_bh2); 50 mark_buffer_dirty(sbi->s_bh2);
53 } 51 }
54 52
55 unlock_kernel();
56 unlock_super(sb); 53 unlock_super(sb);
57 54
58 return 0; 55 return 0;
@@ -82,8 +79,6 @@ static void sysv_put_super(struct super_block *sb)
82{ 79{
83 struct sysv_sb_info *sbi = SYSV_SB(sb); 80 struct sysv_sb_info *sbi = SYSV_SB(sb);
84 81
85 lock_kernel();
86
87 if (sb->s_dirt) 82 if (sb->s_dirt)
88 sysv_write_super(sb); 83 sysv_write_super(sb);
89 84
@@ -99,8 +94,6 @@ static void sysv_put_super(struct super_block *sb)
99 brelse(sbi->s_bh2); 94 brelse(sbi->s_bh2);
100 95
101 kfree(sbi); 96 kfree(sbi);
102
103 unlock_kernel();
104} 97}
105 98
106static int sysv_statfs(struct dentry *dentry, struct kstatfs *buf) 99static int sysv_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -275,7 +268,6 @@ int sysv_write_inode(struct inode *inode, int wait)
275 return -EIO; 268 return -EIO;
276 } 269 }
277 270
278 lock_kernel();
279 raw_inode->i_mode = cpu_to_fs16(sbi, inode->i_mode); 271 raw_inode->i_mode = cpu_to_fs16(sbi, inode->i_mode);
280 raw_inode->i_uid = cpu_to_fs16(sbi, fs_high2lowuid(inode->i_uid)); 272 raw_inode->i_uid = cpu_to_fs16(sbi, fs_high2lowuid(inode->i_uid));
281 raw_inode->i_gid = cpu_to_fs16(sbi, fs_high2lowgid(inode->i_gid)); 273 raw_inode->i_gid = cpu_to_fs16(sbi, fs_high2lowgid(inode->i_gid));
@@ -291,7 +283,6 @@ int sysv_write_inode(struct inode *inode, int wait)
291 for (block = 0; block < 10+1+1+1; block++) 283 for (block = 0; block < 10+1+1+1; block++)
292 write3byte(sbi, (u8 *)&si->i_data[block], 284 write3byte(sbi, (u8 *)&si->i_data[block],
293 &raw_inode->i_data[3*block]); 285 &raw_inode->i_data[3*block]);
294 unlock_kernel();
295 mark_buffer_dirty(bh); 286 mark_buffer_dirty(bh);
296 if (wait) { 287 if (wait) {
297 sync_dirty_buffer(bh); 288 sync_dirty_buffer(bh);
@@ -315,9 +306,7 @@ static void sysv_delete_inode(struct inode *inode)
315 truncate_inode_pages(&inode->i_data, 0); 306 truncate_inode_pages(&inode->i_data, 0);
316 inode->i_size = 0; 307 inode->i_size = 0;
317 sysv_truncate(inode); 308 sysv_truncate(inode);
318 lock_kernel();
319 sysv_free_inode(inode); 309 sysv_free_inode(inode);
320 unlock_kernel();
321} 310}
322 311
323static struct kmem_cache *sysv_inode_cachep; 312static struct kmem_cache *sysv_inode_cachep;
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index af1914462f02..eaf6d891d46f 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -91,7 +91,6 @@ static int shrink_liability(struct ubifs_info *c, int nr_to_write)
91 return nr_written; 91 return nr_written;
92} 92}
93 93
94
95/** 94/**
96 * run_gc - run garbage collector. 95 * run_gc - run garbage collector.
97 * @c: UBIFS file-system description object 96 * @c: UBIFS file-system description object
@@ -628,7 +627,7 @@ void ubifs_convert_page_budget(struct ubifs_info *c)
628 * 627 *
629 * This function releases budget corresponding to a dirty inode. It is usually 628 * This function releases budget corresponding to a dirty inode. It is usually
630 * called when after the inode has been written to the media and marked as 629 * called when after the inode has been written to the media and marked as
631 * clean. 630 * clean. It also causes the "no space" flags to be cleared.
632 */ 631 */
633void ubifs_release_dirty_inode_budget(struct ubifs_info *c, 632void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
634 struct ubifs_inode *ui) 633 struct ubifs_inode *ui)
@@ -636,6 +635,7 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
636 struct ubifs_budget_req req; 635 struct ubifs_budget_req req;
637 636
638 memset(&req, 0, sizeof(struct ubifs_budget_req)); 637 memset(&req, 0, sizeof(struct ubifs_budget_req));
638 /* The "no space" flags will be cleared because dd_growth is > 0 */
639 req.dd_growth = c->inode_budget + ALIGN(ui->data_len, 8); 639 req.dd_growth = c->inode_budget + ALIGN(ui->data_len, 8);
640 ubifs_release_budget(c, &req); 640 ubifs_release_budget(c, &req);
641} 641}
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index f55d523c52bb..552fb0111fff 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -528,6 +528,25 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
528 inode->i_nlink, dir->i_ino); 528 inode->i_nlink, dir->i_ino);
529 ubifs_assert(mutex_is_locked(&dir->i_mutex)); 529 ubifs_assert(mutex_is_locked(&dir->i_mutex));
530 ubifs_assert(mutex_is_locked(&inode->i_mutex)); 530 ubifs_assert(mutex_is_locked(&inode->i_mutex));
531
532 /*
533 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
534 * otherwise has the potential to corrupt the orphan inode list.
535 *
536 * Indeed, consider a scenario when 'vfs_link(dirA/fileA)' and
537 * 'vfs_unlink(dirA/fileA, dirB/fileB)' race. 'vfs_link()' does not
538 * lock 'dirA->i_mutex', so this is possible. Both of the functions
539 * lock 'fileA->i_mutex' though. Suppose 'vfs_unlink()' wins, and takes
540 * 'fileA->i_mutex' mutex first. Suppose 'fileA->i_nlink' is 1. In this
541 * case 'ubifs_unlink()' will drop the last reference, and put 'inodeA'
542 * to the list of orphans. After this, 'vfs_link()' will link
543 * 'dirB/fileB' to 'inodeA'. This is a problem because, for example,
544 * the subsequent 'vfs_unlink(dirB/fileB)' will add the same inode
545 * to the list of orphans.
546 */
547 if (inode->i_nlink == 0)
548 return -ENOENT;
549
531 err = dbg_check_synced_i_size(inode); 550 err = dbg_check_synced_i_size(inode);
532 if (err) 551 if (err)
533 return err; 552 return err;
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index e8e632a1dcdf..bc5857199ec2 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -293,13 +293,14 @@ void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last)
293 * 293 *
294 * This function is called when the write-buffer timer expires. 294 * This function is called when the write-buffer timer expires.
295 */ 295 */
296static void wbuf_timer_callback_nolock(unsigned long data) 296static enum hrtimer_restart wbuf_timer_callback_nolock(struct hrtimer *timer)
297{ 297{
298 struct ubifs_wbuf *wbuf = (struct ubifs_wbuf *)data; 298 struct ubifs_wbuf *wbuf = container_of(timer, struct ubifs_wbuf, timer);
299 299
300 wbuf->need_sync = 1; 300 wbuf->need_sync = 1;
301 wbuf->c->need_wbuf_sync = 1; 301 wbuf->c->need_wbuf_sync = 1;
302 ubifs_wake_up_bgt(wbuf->c); 302 ubifs_wake_up_bgt(wbuf->c);
303 return HRTIMER_NORESTART;
303} 304}
304 305
305/** 306/**
@@ -308,13 +309,12 @@ static void wbuf_timer_callback_nolock(unsigned long data)
308 */ 309 */
309static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) 310static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
310{ 311{
311 ubifs_assert(!timer_pending(&wbuf->timer)); 312 ubifs_assert(!hrtimer_active(&wbuf->timer));
312 313
313 if (!wbuf->timeout) 314 if (!ktime_to_ns(wbuf->softlimit))
314 return; 315 return;
315 316 hrtimer_start_range_ns(&wbuf->timer, wbuf->softlimit, wbuf->delta,
316 wbuf->timer.expires = jiffies + wbuf->timeout; 317 HRTIMER_MODE_REL);
317 add_timer(&wbuf->timer);
318} 318}
319 319
320/** 320/**
@@ -329,7 +329,7 @@ static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
329 * should be canceled. 329 * should be canceled.
330 */ 330 */
331 wbuf->need_sync = 0; 331 wbuf->need_sync = 0;
332 del_timer(&wbuf->timer); 332 hrtimer_cancel(&wbuf->timer);
333} 333}
334 334
335/** 335/**
@@ -825,6 +825,7 @@ out:
825int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) 825int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
826{ 826{
827 size_t size; 827 size_t size;
828 ktime_t hardlimit;
828 829
829 wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL); 830 wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL);
830 if (!wbuf->buf) 831 if (!wbuf->buf)
@@ -845,14 +846,21 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
845 wbuf->sync_callback = NULL; 846 wbuf->sync_callback = NULL;
846 mutex_init(&wbuf->io_mutex); 847 mutex_init(&wbuf->io_mutex);
847 spin_lock_init(&wbuf->lock); 848 spin_lock_init(&wbuf->lock);
848
849 wbuf->c = c; 849 wbuf->c = c;
850 init_timer(&wbuf->timer);
851 wbuf->timer.function = wbuf_timer_callback_nolock;
852 wbuf->timer.data = (unsigned long)wbuf;
853 wbuf->timeout = DEFAULT_WBUF_TIMEOUT;
854 wbuf->next_ino = 0; 850 wbuf->next_ino = 0;
855 851
852 hrtimer_init(&wbuf->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
853 wbuf->timer.function = wbuf_timer_callback_nolock;
854 /*
855 * Make write-buffer soft limit to be 20% of the hard limit. The
856 * write-buffer timer is allowed to expire any time between the soft
857 * and hard limits.
858 */
859 hardlimit = ktime_set(DEFAULT_WBUF_TIMEOUT_SECS, 0);
860 wbuf->delta = (DEFAULT_WBUF_TIMEOUT_SECS * NSEC_PER_SEC) * 2 / 10;
861 wbuf->softlimit = ktime_sub_ns(hardlimit, wbuf->delta);
862 hrtimer_set_expires_range_ns(&wbuf->timer, wbuf->softlimit,
863 wbuf->delta);
856 return 0; 864 return 0;
857} 865}
858 866
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 10662975d2ef..805605250f12 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -343,33 +343,15 @@ int ubifs_write_rcvrd_mst_node(struct ubifs_info *c)
343 * 343 *
344 * This function returns %1 if @offs was in the last write to the LEB whose data 344 * This function returns %1 if @offs was in the last write to the LEB whose data
345 * is in @buf, otherwise %0 is returned. The determination is made by checking 345 * is in @buf, otherwise %0 is returned. The determination is made by checking
346 * for subsequent empty space starting from the next min_io_size boundary (or a 346 * for subsequent empty space starting from the next @c->min_io_size boundary.
347 * bit less than the common header size if min_io_size is one).
348 */ 347 */
349static int is_last_write(const struct ubifs_info *c, void *buf, int offs) 348static int is_last_write(const struct ubifs_info *c, void *buf, int offs)
350{ 349{
351 int empty_offs; 350 int empty_offs, check_len;
352 int check_len;
353 uint8_t *p; 351 uint8_t *p;
354 352
355 if (c->min_io_size == 1) {
356 check_len = c->leb_size - offs;
357 p = buf + check_len;
358 for (; check_len > 0; check_len--)
359 if (*--p != 0xff)
360 break;
361 /*
362 * 'check_len' is the size of the corruption which cannot be
363 * more than the size of 1 node if it was caused by an unclean
364 * unmount.
365 */
366 if (check_len > UBIFS_MAX_NODE_SZ)
367 return 0;
368 return 1;
369 }
370
371 /* 353 /*
372 * Round up to the next c->min_io_size boundary i.e. 'offs' is in the 354 * Round up to the next @c->min_io_size boundary i.e. @offs is in the
373 * last wbuf written. After that should be empty space. 355 * last wbuf written. After that should be empty space.
374 */ 356 */
375 empty_offs = ALIGN(offs + 1, c->min_io_size); 357 empty_offs = ALIGN(offs + 1, c->min_io_size);
@@ -392,7 +374,7 @@ static int is_last_write(const struct ubifs_info *c, void *buf, int offs)
392 * 374 *
393 * This function pads up to the next min_io_size boundary (if there is one) and 375 * This function pads up to the next min_io_size boundary (if there is one) and
394 * sets empty space to all 0xff. @buf, @offs and @len are updated to the next 376 * sets empty space to all 0xff. @buf, @offs and @len are updated to the next
395 * min_io_size boundary (if there is one). 377 * @c->min_io_size boundary.
396 */ 378 */
397static void clean_buf(const struct ubifs_info *c, void **buf, int lnum, 379static void clean_buf(const struct ubifs_info *c, void **buf, int lnum,
398 int *offs, int *len) 380 int *offs, int *len)
@@ -402,11 +384,6 @@ static void clean_buf(const struct ubifs_info *c, void **buf, int lnum,
402 lnum = lnum; 384 lnum = lnum;
403 dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs); 385 dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs);
404 386
405 if (c->min_io_size == 1) {
406 memset(*buf, 0xff, c->leb_size - *offs);
407 return;
408 }
409
410 ubifs_assert(!(*offs & 7)); 387 ubifs_assert(!(*offs & 7));
411 empty_offs = ALIGN(*offs, c->min_io_size); 388 empty_offs = ALIGN(*offs, c->min_io_size);
412 pad_len = empty_offs - *offs; 389 pad_len = empty_offs - *offs;
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 3589eab02a2f..79fad43f3c57 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -361,6 +361,11 @@ static void ubifs_delete_inode(struct inode *inode)
361out: 361out:
362 if (ui->dirty) 362 if (ui->dirty)
363 ubifs_release_dirty_inode_budget(c, ui); 363 ubifs_release_dirty_inode_budget(c, ui);
364 else {
365 /* We've deleted something - clean the "no space" flags */
366 c->nospace = c->nospace_rp = 0;
367 smp_wmb();
368 }
364 clear_inode(inode); 369 clear_inode(inode);
365} 370}
366 371
@@ -792,7 +797,7 @@ static int alloc_wbufs(struct ubifs_info *c)
792 * does not need to be synchronized by timer. 797 * does not need to be synchronized by timer.
793 */ 798 */
794 c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM; 799 c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM;
795 c->jheads[GCHD].wbuf.timeout = 0; 800 c->jheads[GCHD].wbuf.softlimit = ktime_set(0, 0);
796 801
797 return 0; 802 return 0;
798} 803}
@@ -933,6 +938,27 @@ static const match_table_t tokens = {
933}; 938};
934 939
935/** 940/**
941 * parse_standard_option - parse a standard mount option.
942 * @option: the option to parse
943 *
944 * Normally, standard mount options like "sync" are passed to file-systems as
945 * flags. However, when a "rootflags=" kernel boot parameter is used, they may
946 * be present in the options string. This function tries to deal with this
947 * situation and parse standard options. Returns 0 if the option was not
948 * recognized, and the corresponding integer flag if it was.
949 *
950 * UBIFS is only interested in the "sync" option, so do not check for anything
951 * else.
952 */
953static int parse_standard_option(const char *option)
954{
955 ubifs_msg("parse %s", option);
956 if (!strcmp(option, "sync"))
957 return MS_SYNCHRONOUS;
958 return 0;
959}
960
961/**
936 * ubifs_parse_options - parse mount parameters. 962 * ubifs_parse_options - parse mount parameters.
937 * @c: UBIFS file-system description object 963 * @c: UBIFS file-system description object
938 * @options: parameters to parse 964 * @options: parameters to parse
@@ -1008,9 +1034,19 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
1008 break; 1034 break;
1009 } 1035 }
1010 default: 1036 default:
1011 ubifs_err("unrecognized mount option \"%s\" " 1037 {
1012 "or missing value", p); 1038 unsigned long flag;
1013 return -EINVAL; 1039 struct super_block *sb = c->vfs_sb;
1040
1041 flag = parse_standard_option(p);
1042 if (!flag) {
1043 ubifs_err("unrecognized mount option \"%s\" "
1044 "or missing value", p);
1045 return -EINVAL;
1046 }
1047 sb->s_flags |= flag;
1048 break;
1049 }
1014 } 1050 }
1015 } 1051 }
1016 1052
@@ -1180,6 +1216,7 @@ static int mount_ubifs(struct ubifs_info *c)
1180 if (!ubifs_compr_present(c->default_compr)) { 1216 if (!ubifs_compr_present(c->default_compr)) {
1181 ubifs_err("'compressor \"%s\" is not compiled in", 1217 ubifs_err("'compressor \"%s\" is not compiled in",
1182 ubifs_compr_name(c->default_compr)); 1218 ubifs_compr_name(c->default_compr));
1219 err = -ENOTSUPP;
1183 goto out_free; 1220 goto out_free;
1184 } 1221 }
1185 1222
@@ -1656,7 +1693,7 @@ static void ubifs_remount_ro(struct ubifs_info *c)
1656 1693
1657 for (i = 0; i < c->jhead_cnt; i++) { 1694 for (i = 0; i < c->jhead_cnt; i++) {
1658 ubifs_wbuf_sync(&c->jheads[i].wbuf); 1695 ubifs_wbuf_sync(&c->jheads[i].wbuf);
1659 del_timer_sync(&c->jheads[i].wbuf.timer); 1696 hrtimer_cancel(&c->jheads[i].wbuf.timer);
1660 } 1697 }
1661 1698
1662 c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); 1699 c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY);
@@ -1719,7 +1756,7 @@ static void ubifs_put_super(struct super_block *sb)
1719 if (c->jheads) 1756 if (c->jheads)
1720 for (i = 0; i < c->jhead_cnt; i++) { 1757 for (i = 0; i < c->jhead_cnt; i++) {
1721 ubifs_wbuf_sync(&c->jheads[i].wbuf); 1758 ubifs_wbuf_sync(&c->jheads[i].wbuf);
1722 del_timer_sync(&c->jheads[i].wbuf.timer); 1759 hrtimer_cancel(&c->jheads[i].wbuf.timer);
1723 } 1760 }
1724 1761
1725 /* 1762 /*
@@ -1911,6 +1948,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
1911 INIT_LIST_HEAD(&c->orph_list); 1948 INIT_LIST_HEAD(&c->orph_list);
1912 INIT_LIST_HEAD(&c->orph_new); 1949 INIT_LIST_HEAD(&c->orph_new);
1913 1950
1951 c->vfs_sb = sb;
1914 c->highest_inum = UBIFS_FIRST_INO; 1952 c->highest_inum = UBIFS_FIRST_INO;
1915 c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM; 1953 c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM;
1916 1954
@@ -1937,18 +1975,18 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
1937 err = bdi_init(&c->bdi); 1975 err = bdi_init(&c->bdi);
1938 if (err) 1976 if (err)
1939 goto out_close; 1977 goto out_close;
1978 err = bdi_register(&c->bdi, NULL, "ubifs");
1979 if (err)
1980 goto out_bdi;
1940 1981
1941 err = ubifs_parse_options(c, data, 0); 1982 err = ubifs_parse_options(c, data, 0);
1942 if (err) 1983 if (err)
1943 goto out_bdi; 1984 goto out_bdi;
1944 1985
1945 c->vfs_sb = sb;
1946
1947 sb->s_fs_info = c; 1986 sb->s_fs_info = c;
1948 sb->s_magic = UBIFS_SUPER_MAGIC; 1987 sb->s_magic = UBIFS_SUPER_MAGIC;
1949 sb->s_blocksize = UBIFS_BLOCK_SIZE; 1988 sb->s_blocksize = UBIFS_BLOCK_SIZE;
1950 sb->s_blocksize_bits = UBIFS_BLOCK_SHIFT; 1989 sb->s_blocksize_bits = UBIFS_BLOCK_SHIFT;
1951 sb->s_dev = c->vi.cdev;
1952 sb->s_maxbytes = c->max_inode_sz = key_max_inode_size(c); 1990 sb->s_maxbytes = c->max_inode_sz = key_max_inode_size(c);
1953 if (c->max_inode_sz > MAX_LFS_FILESIZE) 1991 if (c->max_inode_sz > MAX_LFS_FILESIZE)
1954 sb->s_maxbytes = c->max_inode_sz = MAX_LFS_FILESIZE; 1992 sb->s_maxbytes = c->max_inode_sz = MAX_LFS_FILESIZE;
@@ -1993,16 +2031,9 @@ out_free:
1993static int sb_test(struct super_block *sb, void *data) 2031static int sb_test(struct super_block *sb, void *data)
1994{ 2032{
1995 dev_t *dev = data; 2033 dev_t *dev = data;
2034 struct ubifs_info *c = sb->s_fs_info;
1996 2035
1997 return sb->s_dev == *dev; 2036 return c->vi.cdev == *dev;
1998}
1999
2000static int sb_set(struct super_block *sb, void *data)
2001{
2002 dev_t *dev = data;
2003
2004 sb->s_dev = *dev;
2005 return 0;
2006} 2037}
2007 2038
2008static int ubifs_get_sb(struct file_system_type *fs_type, int flags, 2039static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
@@ -2030,7 +2061,7 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
2030 2061
2031 dbg_gen("opened ubi%d_%d", vi.ubi_num, vi.vol_id); 2062 dbg_gen("opened ubi%d_%d", vi.ubi_num, vi.vol_id);
2032 2063
2033 sb = sget(fs_type, &sb_test, &sb_set, &vi.cdev); 2064 sb = sget(fs_type, &sb_test, &set_anon_super, &vi.cdev);
2034 if (IS_ERR(sb)) { 2065 if (IS_ERR(sb)) {
2035 err = PTR_ERR(sb); 2066 err = PTR_ERR(sb);
2036 goto out_close; 2067 goto out_close;
@@ -2070,16 +2101,11 @@ out_close:
2070 return err; 2101 return err;
2071} 2102}
2072 2103
2073static void ubifs_kill_sb(struct super_block *sb)
2074{
2075 generic_shutdown_super(sb);
2076}
2077
2078static struct file_system_type ubifs_fs_type = { 2104static struct file_system_type ubifs_fs_type = {
2079 .name = "ubifs", 2105 .name = "ubifs",
2080 .owner = THIS_MODULE, 2106 .owner = THIS_MODULE,
2081 .get_sb = ubifs_get_sb, 2107 .get_sb = ubifs_get_sb,
2082 .kill_sb = ubifs_kill_sb 2108 .kill_sb = kill_anon_super,
2083}; 2109};
2084 2110
2085/* 2111/*
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 0a8341e14088..1bf01d820066 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -95,8 +95,8 @@
95 */ 95 */
96#define BGT_NAME_PATTERN "ubifs_bgt%d_%d" 96#define BGT_NAME_PATTERN "ubifs_bgt%d_%d"
97 97
98/* Default write-buffer synchronization timeout (5 secs) */ 98/* Default write-buffer synchronization timeout in seconds */
99#define DEFAULT_WBUF_TIMEOUT (5 * HZ) 99#define DEFAULT_WBUF_TIMEOUT_SECS 5
100 100
101/* Maximum possible inode number (only 32-bit inodes are supported now) */ 101/* Maximum possible inode number (only 32-bit inodes are supported now) */
102#define MAX_INUM 0xFFFFFFFF 102#define MAX_INUM 0xFFFFFFFF
@@ -650,8 +650,10 @@ typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c,
650 * @io_mutex: serializes write-buffer I/O 650 * @io_mutex: serializes write-buffer I/O
651 * @lock: serializes @buf, @lnum, @offs, @avail, @used, @next_ino and @inodes 651 * @lock: serializes @buf, @lnum, @offs, @avail, @used, @next_ino and @inodes
652 * fields 652 * fields
653 * @softlimit: soft write-buffer timeout interval
654 * @delta: hard and soft timeouts delta (the timer expire inteval is @softlimit
655 * and @softlimit + @delta)
653 * @timer: write-buffer timer 656 * @timer: write-buffer timer
654 * @timeout: timer expire interval in jiffies
655 * @need_sync: it is set if its timer expired and needs sync 657 * @need_sync: it is set if its timer expired and needs sync
656 * @next_ino: points to the next position of the following inode number 658 * @next_ino: points to the next position of the following inode number
657 * @inodes: stores the inode numbers of the nodes which are in wbuf 659 * @inodes: stores the inode numbers of the nodes which are in wbuf
@@ -678,8 +680,9 @@ struct ubifs_wbuf {
678 int (*sync_callback)(struct ubifs_info *c, int lnum, int free, int pad); 680 int (*sync_callback)(struct ubifs_info *c, int lnum, int free, int pad);
679 struct mutex io_mutex; 681 struct mutex io_mutex;
680 spinlock_t lock; 682 spinlock_t lock;
681 struct timer_list timer; 683 ktime_t softlimit;
682 int timeout; 684 unsigned long long delta;
685 struct hrtimer timer;
683 int need_sync; 686 int need_sync;
684 int next_ino; 687 int next_ino;
685 ino_t *inodes; 688 ino_t *inodes;
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index cfd31e229c89..adafcf556531 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -55,9 +55,9 @@
55 * ACL support is not implemented. 55 * ACL support is not implemented.
56 */ 56 */
57 57
58#include "ubifs.h"
58#include <linux/xattr.h> 59#include <linux/xattr.h>
59#include <linux/posix_acl_xattr.h> 60#include <linux/posix_acl_xattr.h>
60#include "ubifs.h"
61 61
62/* 62/*
63 * Limit the number of extended attributes per inode so that the total size 63 * Limit the number of extended attributes per inode so that the total size
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index e48e9a3af763..1e068535b58b 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -238,7 +238,7 @@ static int udf_bitmap_prealloc_blocks(struct super_block *sb,
238 238
239 mutex_lock(&sbi->s_alloc_mutex); 239 mutex_lock(&sbi->s_alloc_mutex);
240 part_len = sbi->s_partmaps[partition].s_partition_len; 240 part_len = sbi->s_partmaps[partition].s_partition_len;
241 if (first_block < 0 || first_block >= part_len) 241 if (first_block >= part_len)
242 goto out; 242 goto out;
243 243
244 if (first_block + block_count > part_len) 244 if (first_block + block_count > part_len)
@@ -297,7 +297,7 @@ static int udf_bitmap_new_block(struct super_block *sb,
297 mutex_lock(&sbi->s_alloc_mutex); 297 mutex_lock(&sbi->s_alloc_mutex);
298 298
299repeat: 299repeat:
300 if (goal < 0 || goal >= sbi->s_partmaps[partition].s_partition_len) 300 if (goal >= sbi->s_partmaps[partition].s_partition_len)
301 goal = 0; 301 goal = 0;
302 302
303 nr_groups = bitmap->s_nr_groups; 303 nr_groups = bitmap->s_nr_groups;
@@ -666,8 +666,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
666 int8_t etype = -1; 666 int8_t etype = -1;
667 struct udf_inode_info *iinfo; 667 struct udf_inode_info *iinfo;
668 668
669 if (first_block < 0 || 669 if (first_block >= sbi->s_partmaps[partition].s_partition_len)
670 first_block >= sbi->s_partmaps[partition].s_partition_len)
671 return 0; 670 return 0;
672 671
673 iinfo = UDF_I(table); 672 iinfo = UDF_I(table);
@@ -743,7 +742,7 @@ static int udf_table_new_block(struct super_block *sb,
743 return newblock; 742 return newblock;
744 743
745 mutex_lock(&sbi->s_alloc_mutex); 744 mutex_lock(&sbi->s_alloc_mutex);
746 if (goal < 0 || goal >= sbi->s_partmaps[partition].s_partition_len) 745 if (goal >= sbi->s_partmaps[partition].s_partition_len)
747 goal = 0; 746 goal = 0;
748 747
749 /* We search for the closest matching block to goal. If we find 748 /* We search for the closest matching block to goal. If we find
diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c
index 703843f30ffd..1b88fd5df05d 100644
--- a/fs/udf/lowlevel.c
+++ b/fs/udf/lowlevel.c
@@ -56,7 +56,12 @@ unsigned long udf_get_last_block(struct super_block *sb)
56 struct block_device *bdev = sb->s_bdev; 56 struct block_device *bdev = sb->s_bdev;
57 unsigned long lblock = 0; 57 unsigned long lblock = 0;
58 58
59 if (ioctl_by_bdev(bdev, CDROM_LAST_WRITTEN, (unsigned long) &lblock)) 59 /*
60 * ioctl failed or returned obviously bogus value?
61 * Try using the device size...
62 */
63 if (ioctl_by_bdev(bdev, CDROM_LAST_WRITTEN, (unsigned long) &lblock) ||
64 lblock == 0)
60 lblock = bdev->bd_inode->i_size >> sb->s_blocksize_bits; 65 lblock = bdev->bd_inode->i_size >> sb->s_blocksize_bits;
61 66
62 if (lblock) 67 if (lblock)
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 3d2512c21f05..7cf33379fd46 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -56,9 +56,7 @@ static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t off
56 56
57 57
58 UFSD("ptrs=uspi->s_apb = %d,double_blocks=%ld \n",ptrs,double_blocks); 58 UFSD("ptrs=uspi->s_apb = %d,double_blocks=%ld \n",ptrs,double_blocks);
59 if (i_block < 0) { 59 if (i_block < direct_blocks) {
60 ufs_warning(inode->i_sb, "ufs_block_to_path", "block < 0");
61 } else if (i_block < direct_blocks) {
62 offsets[n++] = i_block; 60 offsets[n++] = i_block;
63 } else if ((i_block -= direct_blocks) < indirect_blocks) { 61 } else if ((i_block -= direct_blocks) < indirect_blocks) {
64 offsets[n++] = UFS_IND_BLOCK; 62 offsets[n++] = UFS_IND_BLOCK;
@@ -440,8 +438,6 @@ int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head
440 lock_kernel(); 438 lock_kernel();
441 439
442 UFSD("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment); 440 UFSD("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment);
443 if (fragment < 0)
444 goto abort_negative;
445 if (fragment > 441 if (fragment >
446 ((UFS_NDADDR + uspi->s_apb + uspi->s_2apb + uspi->s_3apb) 442 ((UFS_NDADDR + uspi->s_apb + uspi->s_2apb + uspi->s_3apb)
447 << uspi->s_fpbshift)) 443 << uspi->s_fpbshift))
@@ -504,10 +500,6 @@ abort:
504 unlock_kernel(); 500 unlock_kernel();
505 return err; 501 return err;
506 502
507abort_negative:
508 ufs_warning(sb, "ufs_get_block", "block < 0");
509 goto abort;
510
511abort_too_big: 503abort_too_big:
512 ufs_warning(sb, "ufs_get_block", "block > big"); 504 ufs_warning(sb, "ufs_get_block", "block > big");
513 goto abort; 505 goto abort;
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index 1e9d1246eebc..b23a54506446 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -25,14 +25,10 @@
25#include <linux/posix_acl_xattr.h> 25#include <linux/posix_acl_xattr.h>
26 26
27 27
28#define XFS_ACL_NOT_CACHED ((void *)-1)
29
30/* 28/*
31 * Locking scheme: 29 * Locking scheme:
32 * - all ACL updates are protected by inode->i_mutex, which is taken before 30 * - all ACL updates are protected by inode->i_mutex, which is taken before
33 * calling into this file. 31 * calling into this file.
34 * - access and updates to the ip->i_acl and ip->i_default_acl pointers are
35 * protected by inode->i_lock.
36 */ 32 */
37 33
38STATIC struct posix_acl * 34STATIC struct posix_acl *
@@ -102,59 +98,35 @@ xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl)
102 } 98 }
103} 99}
104 100
105/*
106 * Update the cached ACL pointer in the inode.
107 *
108 * Because we don't hold any locks while reading/writing the attribute
109 * from/to disk another thread could have raced and updated the cached
110 * ACL value before us. In that case we release the previous cached value
111 * and update it with our new value.
112 */
113STATIC void
114xfs_update_cached_acl(struct inode *inode, struct posix_acl **p_acl,
115 struct posix_acl *acl)
116{
117 spin_lock(&inode->i_lock);
118 if (*p_acl && *p_acl != XFS_ACL_NOT_CACHED)
119 posix_acl_release(*p_acl);
120 *p_acl = posix_acl_dup(acl);
121 spin_unlock(&inode->i_lock);
122}
123
124struct posix_acl * 101struct posix_acl *
125xfs_get_acl(struct inode *inode, int type) 102xfs_get_acl(struct inode *inode, int type)
126{ 103{
127 struct xfs_inode *ip = XFS_I(inode); 104 struct xfs_inode *ip = XFS_I(inode);
128 struct posix_acl *acl = NULL, **p_acl; 105 struct posix_acl *acl;
129 struct xfs_acl *xfs_acl; 106 struct xfs_acl *xfs_acl;
130 int len = sizeof(struct xfs_acl); 107 int len = sizeof(struct xfs_acl);
131 char *ea_name; 108 char *ea_name;
132 int error; 109 int error;
133 110
111 acl = get_cached_acl(inode, type);
112 if (acl != ACL_NOT_CACHED)
113 return acl;
114
134 switch (type) { 115 switch (type) {
135 case ACL_TYPE_ACCESS: 116 case ACL_TYPE_ACCESS:
136 ea_name = SGI_ACL_FILE; 117 ea_name = SGI_ACL_FILE;
137 p_acl = &ip->i_acl;
138 break; 118 break;
139 case ACL_TYPE_DEFAULT: 119 case ACL_TYPE_DEFAULT:
140 ea_name = SGI_ACL_DEFAULT; 120 ea_name = SGI_ACL_DEFAULT;
141 p_acl = &ip->i_default_acl;
142 break; 121 break;
143 default: 122 default:
144 return ERR_PTR(-EINVAL); 123 BUG();
145 } 124 }
146 125
147 spin_lock(&inode->i_lock);
148 if (*p_acl != XFS_ACL_NOT_CACHED)
149 acl = posix_acl_dup(*p_acl);
150 spin_unlock(&inode->i_lock);
151
152 /* 126 /*
153 * If we have a cached ACLs value just return it, not need to 127 * If we have a cached ACLs value just return it, not need to
154 * go out to the disk. 128 * go out to the disk.
155 */ 129 */
156 if (acl)
157 return acl;
158 130
159 xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL); 131 xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
160 if (!xfs_acl) 132 if (!xfs_acl)
@@ -165,7 +137,7 @@ xfs_get_acl(struct inode *inode, int type)
165 /* 137 /*
166 * If the attribute doesn't exist make sure we have a negative 138 * If the attribute doesn't exist make sure we have a negative
167 * cache entry, for any other error assume it is transient and 139 * cache entry, for any other error assume it is transient and
168 * leave the cache entry as XFS_ACL_NOT_CACHED. 140 * leave the cache entry as ACL_NOT_CACHED.
169 */ 141 */
170 if (error == -ENOATTR) { 142 if (error == -ENOATTR) {
171 acl = NULL; 143 acl = NULL;
@@ -179,7 +151,7 @@ xfs_get_acl(struct inode *inode, int type)
179 goto out; 151 goto out;
180 152
181 out_update_cache: 153 out_update_cache:
182 xfs_update_cached_acl(inode, p_acl, acl); 154 set_cached_acl(inode, type, acl);
183 out: 155 out:
184 kfree(xfs_acl); 156 kfree(xfs_acl);
185 return acl; 157 return acl;
@@ -189,7 +161,6 @@ STATIC int
189xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) 161xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
190{ 162{
191 struct xfs_inode *ip = XFS_I(inode); 163 struct xfs_inode *ip = XFS_I(inode);
192 struct posix_acl **p_acl;
193 char *ea_name; 164 char *ea_name;
194 int error; 165 int error;
195 166
@@ -199,13 +170,11 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
199 switch (type) { 170 switch (type) {
200 case ACL_TYPE_ACCESS: 171 case ACL_TYPE_ACCESS:
201 ea_name = SGI_ACL_FILE; 172 ea_name = SGI_ACL_FILE;
202 p_acl = &ip->i_acl;
203 break; 173 break;
204 case ACL_TYPE_DEFAULT: 174 case ACL_TYPE_DEFAULT:
205 if (!S_ISDIR(inode->i_mode)) 175 if (!S_ISDIR(inode->i_mode))
206 return acl ? -EACCES : 0; 176 return acl ? -EACCES : 0;
207 ea_name = SGI_ACL_DEFAULT; 177 ea_name = SGI_ACL_DEFAULT;
208 p_acl = &ip->i_default_acl;
209 break; 178 break;
210 default: 179 default:
211 return -EINVAL; 180 return -EINVAL;
@@ -242,7 +211,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
242 } 211 }
243 212
244 if (!error) 213 if (!error)
245 xfs_update_cached_acl(inode, p_acl, acl); 214 set_cached_acl(inode, type, acl);
246 return error; 215 return error;
247} 216}
248 217
@@ -384,30 +353,6 @@ xfs_acl_chmod(struct inode *inode)
384 return error; 353 return error;
385} 354}
386 355
387void
388xfs_inode_init_acls(struct xfs_inode *ip)
389{
390 /*
391 * No need for locking, inode is not live yet.
392 */
393 ip->i_acl = XFS_ACL_NOT_CACHED;
394 ip->i_default_acl = XFS_ACL_NOT_CACHED;
395}
396
397void
398xfs_inode_clear_acls(struct xfs_inode *ip)
399{
400 /*
401 * No need for locking here, the inode is not live anymore
402 * and just about to be freed.
403 */
404 if (ip->i_acl != XFS_ACL_NOT_CACHED)
405 posix_acl_release(ip->i_acl);
406 if (ip->i_default_acl != XFS_ACL_NOT_CACHED)
407 posix_acl_release(ip->i_default_acl);
408}
409
410
411/* 356/*
412 * System xattr handlers. 357 * System xattr handlers.
413 * 358 *
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index f65a53f8752f..6127e24062d0 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -24,7 +24,7 @@
24 * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits. 24 * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits.
25 * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set. 25 * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set.
26 */ 26 */
27#if defined(CONFIG_LBD) || (BITS_PER_LONG == 64) 27#if defined(CONFIG_LBDAF) || (BITS_PER_LONG == 64)
28# define XFS_BIG_BLKNOS 1 28# define XFS_BIG_BLKNOS 1
29# define XFS_BIG_INUMS 1 29# define XFS_BIG_INUMS 1
30#else 30#else
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 2e09efbca8db..a220d36f789b 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -616,7 +616,7 @@ xfs_max_file_offset(
616 */ 616 */
617 617
618#if BITS_PER_LONG == 32 618#if BITS_PER_LONG == 32
619# if defined(CONFIG_LBD) 619# if defined(CONFIG_LBDAF)
620 ASSERT(sizeof(sector_t) == 8); 620 ASSERT(sizeof(sector_t) == 8);
621 pagefactor = PAGE_CACHE_SIZE; 621 pagefactor = PAGE_CACHE_SIZE;
622 bitshift = BITS_PER_LONG; 622 bitshift = BITS_PER_LONG;
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 63dc1f2efad5..947b150df8ed 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -46,8 +46,6 @@ extern int xfs_check_acl(struct inode *inode, int mask);
46extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); 46extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
47extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl); 47extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl);
48extern int xfs_acl_chmod(struct inode *inode); 48extern int xfs_acl_chmod(struct inode *inode);
49extern void xfs_inode_init_acls(struct xfs_inode *ip);
50extern void xfs_inode_clear_acls(struct xfs_inode *ip);
51extern int posix_acl_access_exists(struct inode *inode); 49extern int posix_acl_access_exists(struct inode *inode);
52extern int posix_acl_default_exists(struct inode *inode); 50extern int posix_acl_default_exists(struct inode *inode);
53 51
@@ -57,8 +55,6 @@ extern struct xattr_handler xfs_xattr_system_handler;
57# define xfs_get_acl(inode, type) NULL 55# define xfs_get_acl(inode, type) NULL
58# define xfs_inherit_acl(inode, default_acl) 0 56# define xfs_inherit_acl(inode, default_acl) 0
59# define xfs_acl_chmod(inode) 0 57# define xfs_acl_chmod(inode) 0
60# define xfs_inode_init_acls(ip)
61# define xfs_inode_clear_acls(ip)
62# define posix_acl_access_exists(inode) 0 58# define posix_acl_access_exists(inode) 0
63# define posix_acl_default_exists(inode) 0 59# define posix_acl_default_exists(inode) 0
64#endif /* CONFIG_XFS_POSIX_ACL */ 60#endif /* CONFIG_XFS_POSIX_ACL */
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 76c540f719e4..5fcec6f020a7 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -83,7 +83,6 @@ xfs_inode_alloc(
83 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); 83 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
84 ip->i_size = 0; 84 ip->i_size = 0;
85 ip->i_new_size = 0; 85 ip->i_new_size = 0;
86 xfs_inode_init_acls(ip);
87 86
88 /* 87 /*
89 * Initialize inode's trace buffers. 88 * Initialize inode's trace buffers.
@@ -560,7 +559,6 @@ xfs_ireclaim(
560 ASSERT(atomic_read(&ip->i_pincount) == 0); 559 ASSERT(atomic_read(&ip->i_pincount) == 0);
561 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 560 ASSERT(!spin_is_locked(&ip->i_flags_lock));
562 ASSERT(completion_done(&ip->i_flush)); 561 ASSERT(completion_done(&ip->i_flush));
563 xfs_inode_clear_acls(ip);
564 kmem_zone_free(xfs_inode_zone, ip); 562 kmem_zone_free(xfs_inode_zone, ip);
565} 563}
566 564
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 77016702938b..1804f866a71d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -273,11 +273,6 @@ typedef struct xfs_inode {
273 /* VFS inode */ 273 /* VFS inode */
274 struct inode i_vnode; /* embedded VFS inode */ 274 struct inode i_vnode; /* embedded VFS inode */
275 275
276#ifdef CONFIG_XFS_POSIX_ACL
277 struct posix_acl *i_acl;
278 struct posix_acl *i_default_acl;
279#endif
280
281 /* Trace buffers per inode. */ 276 /* Trace buffers per inode. */
282#ifdef XFS_INODE_TRACE 277#ifdef XFS_INODE_TRACE
283 struct ktrace *i_trace; /* general inode trace */ 278 struct ktrace *i_trace; /* general inode trace */