aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.c6
-rw-r--r--fs/9p/vfs_inode.c4
-rw-r--r--fs/Kconfig40
-rw-r--r--fs/Makefile1
-rw-r--r--fs/adfs/inode.c1
-rw-r--r--fs/adfs/super.c6
-rw-r--r--fs/affs/affs.h1
-rw-r--r--fs/affs/super.c7
-rw-r--r--fs/afs/inode.c1
-rw-r--r--fs/afs/proc.c2
-rw-r--r--fs/afs/vlocation.c3
-rw-r--r--fs/afs/volume.c3
-rw-r--r--fs/autofs/autofs_i.h2
-rw-r--r--fs/autofs/inode.c7
-rw-r--r--fs/autofs/symlink.c2
-rw-r--r--fs/autofs4/autofs_i.h2
-rw-r--r--fs/autofs4/expire.c6
-rw-r--r--fs/autofs4/inode.c2
-rw-r--r--fs/autofs4/root.c46
-rw-r--r--fs/befs/linuxvfs.c5
-rw-r--r--fs/bfs/dir.c2
-rw-r--r--fs/bfs/inode.c10
-rw-r--r--fs/binfmt_aout.c14
-rw-r--r--fs/binfmt_elf.c26
-rw-r--r--fs/binfmt_elf_fdpic.c7
-rw-r--r--fs/binfmt_misc.c15
-rw-r--r--fs/block_dev.c36
-rw-r--r--fs/buffer.c2
-rw-r--r--fs/char_dev.c107
-rw-r--r--fs/cifs/cifsfs.c21
-rw-r--r--fs/cifs/readdir.c5
-rw-r--r--fs/coda/coda_linux.c2
-rw-r--r--fs/coda/dir.c2
-rw-r--r--fs/coda/inode.c3
-rw-r--r--fs/compat.c5
-rw-r--r--fs/configfs/file.c3
-rw-r--r--fs/configfs/inode.c4
-rw-r--r--fs/cramfs/inode.c15
-rw-r--r--fs/cramfs/uncompress.c3
-rw-r--r--fs/dcache.c164
-rw-r--r--fs/debugfs/file.c60
-rw-r--r--fs/debugfs/inode.c20
-rw-r--r--fs/devpts/inode.c6
-rw-r--r--fs/dquot.c5
-rw-r--r--fs/efs/super.c6
-rw-r--r--fs/eventpoll.c1
-rw-r--r--fs/exec.c56
-rw-r--r--fs/ext2/acl.c4
-rw-r--r--fs/ext2/ialloc.c1
-rw-r--r--fs/ext2/inode.c1
-rw-r--r--fs/ext2/super.c38
-rw-r--r--fs/ext2/xattr.c3
-rw-r--r--fs/ext3/acl.c6
-rw-r--r--fs/ext3/balloc.c350
-rw-r--r--fs/ext3/bitmap.c2
-rw-r--r--fs/ext3/dir.c19
-rw-r--r--fs/ext3/file.c2
-rw-r--r--fs/ext3/fsync.c6
-rw-r--r--fs/ext3/hash.c8
-rw-r--r--fs/ext3/ialloc.c55
-rw-r--r--fs/ext3/inode.c77
-rw-r--r--fs/ext3/namei.c50
-rw-r--r--fs/ext3/resize.c42
-rw-r--r--fs/ext3/super.c110
-rw-r--r--fs/ext3/xattr.c16
-rw-r--r--fs/fat/cache.c3
-rw-r--r--fs/fat/file.c13
-rw-r--r--fs/fat/inode.c97
-rw-r--r--fs/file.c84
-rw-r--r--fs/file_table.c2
-rw-r--r--fs/filesystems.c2
-rw-r--r--fs/freevxfs/vxfs.h2
-rw-r--r--fs/freevxfs/vxfs_inode.c5
-rw-r--r--fs/freevxfs/vxfs_super.c11
-rw-r--r--fs/fuse/control.c6
-rw-r--r--fs/fuse/dev.c2
-rw-r--r--fs/fuse/dir.c2
-rw-r--r--fs/fuse/inode.c2
-rw-r--r--fs/generic_acl.c197
-rw-r--r--fs/hfs/bnode.c3
-rw-r--r--fs/hfs/btree.c3
-rw-r--r--fs/hfs/inode.c2
-rw-r--r--fs/hfs/super.c6
-rw-r--r--fs/hfsplus/bnode.c3
-rw-r--r--fs/hfsplus/btree.c3
-rw-r--r--fs/hfsplus/inode.c2
-rw-r--r--fs/hfsplus/super.c3
-rw-r--r--fs/hostfs/hostfs_kern.c1
-rw-r--r--fs/hpfs/buffer.c2
-rw-r--r--fs/hpfs/hpfs_fn.h1
-rw-r--r--fs/hpfs/inode.c1
-rw-r--r--fs/hpfs/super.c7
-rw-r--r--fs/hppfs/hppfs_kern.c1
-rw-r--r--fs/hugetlbfs/inode.c3
-rw-r--r--fs/inode.c7
-rw-r--r--fs/isofs/inode.c58
-rw-r--r--fs/jbd/checkpoint.c33
-rw-r--r--fs/jbd/commit.c182
-rw-r--r--fs/jbd/journal.c95
-rw-r--r--fs/jbd/recovery.c58
-rw-r--r--fs/jbd/revoke.c70
-rw-r--r--fs/jbd/transaction.c134
-rw-r--r--fs/jffs/inode-v23.c44
-rw-r--r--fs/jffs/intrep.c11
-rw-r--r--fs/jffs/jffs_fm.c6
-rw-r--r--fs/jffs2/fs.c2
-rw-r--r--fs/jffs2/super.c3
-rw-r--r--fs/jfs/jfs_extent.c2
-rw-r--r--fs/jfs/jfs_imap.c1
-rw-r--r--fs/jfs/jfs_inode.c1
-rw-r--r--fs/jfs/jfs_metapage.c2
-rw-r--r--fs/jfs/jfs_txnmgr.c4
-rw-r--r--fs/libfs.c14
-rw-r--r--fs/lockd/clntlock.c2
-rw-r--r--fs/lockd/clntproc.c12
-rw-r--r--fs/lockd/host.c55
-rw-r--r--fs/lockd/mon.c41
-rw-r--r--fs/lockd/svcsubs.c3
-rw-r--r--fs/mbcache.c1
-rw-r--r--fs/minix/bitmap.c2
-rw-r--r--fs/minix/inode.c13
-rw-r--r--fs/msdos/namei.c11
-rw-r--r--fs/namei.c142
-rw-r--r--fs/namespace.c22
-rw-r--r--fs/ncpfs/inode.c7
-rw-r--r--fs/ncpfs/symlink.c4
-rw-r--r--fs/nfs/Makefile6
-rw-r--r--fs/nfs/callback.c31
-rw-r--r--fs/nfs/callback.h7
-rw-r--r--fs/nfs/callback_proc.c13
-rw-r--r--fs/nfs/client.c1448
-rw-r--r--fs/nfs/delegation.c42
-rw-r--r--fs/nfs/delegation.h10
-rw-r--r--fs/nfs/dir.c341
-rw-r--r--fs/nfs/direct.c3
-rw-r--r--fs/nfs/file.c4
-rw-r--r--fs/nfs/getroot.c311
-rw-r--r--fs/nfs/idmap.c45
-rw-r--r--fs/nfs/inode.c55
-rw-r--r--fs/nfs/internal.h105
-rw-r--r--fs/nfs/mount_clnt.c30
-rw-r--r--fs/nfs/namespace.c46
-rw-r--r--fs/nfs/nfs2xdr.c21
-rw-r--r--fs/nfs/nfs3proc.c44
-rw-r--r--fs/nfs/nfs3xdr.c7
-rw-r--r--fs/nfs/nfs4_fs.h78
-rw-r--r--fs/nfs/nfs4namespace.c118
-rw-r--r--fs/nfs/nfs4proc.c218
-rw-r--r--fs/nfs/nfs4renewd.c20
-rw-r--r--fs/nfs/nfs4state.c174
-rw-r--r--fs/nfs/nfs4xdr.c50
-rw-r--r--fs/nfs/pagelist.c3
-rw-r--r--fs/nfs/proc.c43
-rw-r--r--fs/nfs/read.c24
-rw-r--r--fs/nfs/super.c1421
-rw-r--r--fs/nfs/write.c14
-rw-r--r--fs/nfsd/nfs4callback.c66
-rw-r--r--fs/nfsd/nfs4idmap.c3
-rw-r--r--fs/nfsd/nfs4state.c8
-rw-r--r--fs/ntfs/dir.c5
-rw-r--r--fs/ntfs/inode.c6
-rw-r--r--fs/ntfs/mft.c9
-rw-r--r--fs/ntfs/super.c28
-rw-r--r--fs/ntfs/unistr.c4
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h8
-rw-r--r--fs/ocfs2/dcache.c359
-rw-r--r--fs/ocfs2/dcache.h27
-rw-r--r--fs/ocfs2/dlm/dlmapi.h1
-rw-r--r--fs/ocfs2/dlm/dlmast.c6
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h1
-rw-r--r--fs/ocfs2/dlm/dlmfs.c6
-rw-r--r--fs/ocfs2/dlm/dlmlock.c10
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c4
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c3
-rw-r--r--fs/ocfs2/dlm/userdlm.c81
-rw-r--r--fs/ocfs2/dlm/userdlm.h1
-rw-r--r--fs/ocfs2/dlmglue.c1096
-rw-r--r--fs/ocfs2/dlmglue.h21
-rw-r--r--fs/ocfs2/export.c8
-rw-r--r--fs/ocfs2/inode.c160
-rw-r--r--fs/ocfs2/inode.h8
-rw-r--r--fs/ocfs2/journal.c3
-rw-r--r--fs/ocfs2/namei.c116
-rw-r--r--fs/ocfs2/ocfs2_lockid.h25
-rw-r--r--fs/ocfs2/super.c6
-rw-r--r--fs/ocfs2/sysfile.c6
-rw-r--r--fs/ocfs2/vote.c180
-rw-r--r--fs/ocfs2/vote.h5
-rw-r--r--fs/open.c15
-rw-r--r--fs/openpromfs/inode.c2
-rw-r--r--fs/partitions/efi.c9
-rw-r--r--fs/partitions/msdos.c31
-rw-r--r--fs/pipe.c1
-rw-r--r--fs/proc/array.c3
-rw-r--r--fs/proc/base.c3
-rw-r--r--fs/proc/internal.h1
-rw-r--r--fs/proc/kcore.c10
-rw-r--r--fs/proc/nommu.c20
-rw-r--r--fs/proc/proc_misc.c11
-rw-r--r--fs/proc/task_mmu.c5
-rw-r--r--fs/proc/task_nommu.c74
-rw-r--r--fs/qnx4/inode.c8
-rw-r--r--fs/ramfs/inode.c1
-rw-r--r--fs/reiserfs/Makefile2
-rw-r--r--fs/reiserfs/file.c2
-rw-r--r--fs/reiserfs/inode.c18
-rw-r--r--fs/reiserfs/journal.c54
-rw-r--r--fs/reiserfs/super.c31
-rw-r--r--fs/romfs/inode.c3
-rw-r--r--fs/select.c8
-rw-r--r--fs/smbfs/inode.c5
-rw-r--r--fs/smbfs/proc.c1
-rw-r--r--fs/smbfs/request.c3
-rw-r--r--fs/stat.c3
-rw-r--r--fs/super.c2
-rw-r--r--fs/sysfs/bin.c13
-rw-r--r--fs/sysfs/dir.c2
-rw-r--r--fs/sysfs/inode.c12
-rw-r--r--fs/sysfs/symlink.c14
-rw-r--r--fs/sysfs/sysfs.h2
-rw-r--r--fs/sysv/ialloc.c2
-rw-r--r--fs/sysv/inode.c2
-rw-r--r--fs/sysv/super.c6
-rw-r--r--fs/udf/ialloc.c7
-rw-r--r--fs/udf/inode.c2
-rw-r--r--fs/udf/super.c7
-rw-r--r--fs/ufs/ialloc.c1
-rw-r--r--fs/ufs/inode.c1
-rw-r--r--fs/ufs/super.c6
-rw-r--r--fs/xfs/Makefile-linux-2.61
-rw-r--r--fs/xfs/linux-2.6/kmem.c29
-rw-r--r--fs/xfs/linux-2.6/kmem.h10
-rw-r--r--fs/xfs/linux-2.6/sema.h2
-rw-r--r--fs/xfs/linux-2.6/sv.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c51
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h7
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_globals.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c19
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c29
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h14
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c10
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_vfs.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h2
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c26
-rw-r--r--fs/xfs/quota/xfs_qm.c14
-rw-r--r--fs/xfs/quota/xfs_qm.h6
-rw-r--r--fs/xfs/quota/xfs_quota_priv.h2
-rw-r--r--fs/xfs/support/ktrace.c2
-rw-r--r--fs/xfs/xfs_ag.h2
-rw-r--r--fs/xfs/xfs_alloc.c10
-rw-r--r--fs/xfs/xfs_alloc_btree.c132
-rw-r--r--fs/xfs/xfs_attr.c181
-rw-r--r--fs/xfs/xfs_attr.h8
-rw-r--r--fs/xfs/xfs_attr_leaf.c351
-rw-r--r--fs/xfs/xfs_attr_leaf.h41
-rw-r--r--fs/xfs/xfs_behavior.c20
-rw-r--r--fs/xfs/xfs_behavior.h2
-rw-r--r--fs/xfs/xfs_bmap.c90
-rw-r--r--fs/xfs/xfs_bmap_btree.c113
-rw-r--r--fs/xfs/xfs_bmap_btree.h11
-rw-r--r--fs/xfs/xfs_btree.c8
-rw-r--r--fs/xfs/xfs_btree.h5
-rw-r--r--fs/xfs/xfs_buf_item.c22
-rw-r--r--fs/xfs/xfs_da_btree.c33
-rw-r--r--fs/xfs/xfs_error.h9
-rw-r--r--fs/xfs/xfs_extfree_item.c69
-rw-r--r--fs/xfs/xfs_extfree_item.h50
-rw-r--r--fs/xfs/xfs_fs.h8
-rw-r--r--fs/xfs/xfs_ialloc.c11
-rw-r--r--fs/xfs/xfs_ialloc_btree.c62
-rw-r--r--fs/xfs/xfs_ialloc_btree.h19
-rw-r--r--fs/xfs/xfs_iget.c44
-rw-r--r--fs/xfs/xfs_inode.c30
-rw-r--r--fs/xfs/xfs_inode.h12
-rw-r--r--fs/xfs/xfs_inode_item.c16
-rw-r--r--fs/xfs/xfs_inode_item.h66
-rw-r--r--fs/xfs/xfs_iomap.c89
-rw-r--r--fs/xfs/xfs_itable.c184
-rw-r--r--fs/xfs/xfs_itable.h16
-rw-r--r--fs/xfs/xfs_log.c19
-rw-r--r--fs/xfs/xfs_log.h8
-rw-r--r--fs/xfs/xfs_log_priv.h10
-rw-r--r--fs/xfs/xfs_mount.h5
-rw-r--r--fs/xfs/xfs_quota.h2
-rw-r--r--fs/xfs/xfs_rtalloc.c38
-rw-r--r--fs/xfs/xfs_sb.h22
-rw-r--r--fs/xfs/xfs_trans.h2
-rw-r--r--fs/xfs/xfs_trans_ail.c4
-rw-r--r--fs/xfs/xfs_trans_priv.h12
-rw-r--r--fs/xfs/xfs_vfsops.c2
-rw-r--r--fs/xfs/xfs_vnodeops.c26
295 files changed, 7621 insertions, 4941 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 22f7ccd58d38..0f628041e3f7 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -460,8 +460,10 @@ static int __init init_v9fs(void)
460 460
461 ret = v9fs_mux_global_init(); 461 ret = v9fs_mux_global_init();
462 if (!ret) 462 if (!ret)
463 ret = register_filesystem(&v9fs_fs_type); 463 return ret;
464 464 ret = register_filesystem(&v9fs_fs_type);
465 if (!ret)
466 v9fs_mux_global_exit();
465 return ret; 467 return ret;
466} 468}
467 469
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index eae50c9d6dc4..7a7ec2d1d2f4 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -204,7 +204,6 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
204 inode->i_mode = mode; 204 inode->i_mode = mode;
205 inode->i_uid = current->fsuid; 205 inode->i_uid = current->fsuid;
206 inode->i_gid = current->fsgid; 206 inode->i_gid = current->fsgid;
207 inode->i_blksize = sb->s_blocksize;
208 inode->i_blocks = 0; 207 inode->i_blocks = 0;
209 inode->i_rdev = 0; 208 inode->i_rdev = 0;
210 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 209 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
@@ -950,9 +949,8 @@ v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode,
950 949
951 inode->i_size = stat->length; 950 inode->i_size = stat->length;
952 951
953 inode->i_blksize = sb->s_blocksize;
954 inode->i_blocks = 952 inode->i_blocks =
955 (inode->i_size + inode->i_blksize - 1) >> sb->s_blocksize_bits; 953 (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
956} 954}
957 955
958/** 956/**
diff --git a/fs/Kconfig b/fs/Kconfig
index 530581628311..4fd9efac29ab 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -826,6 +826,25 @@ config PROC_VMCORE
826 help 826 help
827 Exports the dump image of crashed kernel in ELF format. 827 Exports the dump image of crashed kernel in ELF format.
828 828
829config PROC_SYSCTL
830 bool "Sysctl support (/proc/sys)" if EMBEDDED
831 depends on PROC_FS
832 select SYSCTL
833 default y
834 ---help---
835 The sysctl interface provides a means of dynamically changing
836 certain kernel parameters and variables on the fly without requiring
837 a recompile of the kernel or reboot of the system. The primary
838 interface is through /proc/sys. If you say Y here a tree of
839 modifiable sysctl entries will be generated beneath the
840 /proc/sys directory. They are explained in the files
841 in <file:Documentation/sysctl/>. Note that enabling this
842 option will enlarge the kernel by at least 8 KB.
843
844 As it is generally a good thing, you should say Y here unless
845 building a kernel for install/rescue disks or your system is very
846 limited in memory.
847
829config SYSFS 848config SYSFS
830 bool "sysfs file system support" if EMBEDDED 849 bool "sysfs file system support" if EMBEDDED
831 default y 850 default y
@@ -862,6 +881,19 @@ config TMPFS
862 881
863 See <file:Documentation/filesystems/tmpfs.txt> for details. 882 See <file:Documentation/filesystems/tmpfs.txt> for details.
864 883
884config TMPFS_POSIX_ACL
885 bool "Tmpfs POSIX Access Control Lists"
886 depends on TMPFS
887 select GENERIC_ACL
888 help
889 POSIX Access Control Lists (ACLs) support permissions for users and
890 groups beyond the owner/group/world scheme.
891
892 To learn more about Access Control Lists, visit the POSIX ACLs for
893 Linux website <http://acl.bestbits.at/>.
894
895 If you don't know what Access Control Lists are, say N.
896
865config HUGETLBFS 897config HUGETLBFS
866 bool "HugeTLB file system support" 898 bool "HugeTLB file system support"
867 depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || BROKEN 899 depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || BROKEN
@@ -1471,8 +1503,8 @@ config NFS_V4
1471 If unsure, say N. 1503 If unsure, say N.
1472 1504
1473config NFS_DIRECTIO 1505config NFS_DIRECTIO
1474 bool "Allow direct I/O on NFS files (EXPERIMENTAL)" 1506 bool "Allow direct I/O on NFS files"
1475 depends on NFS_FS && EXPERIMENTAL 1507 depends on NFS_FS
1476 help 1508 help
1477 This option enables applications to perform uncached I/O on files 1509 This option enables applications to perform uncached I/O on files
1478 in NFS file systems using the O_DIRECT open() flag. When O_DIRECT 1510 in NFS file systems using the O_DIRECT open() flag. When O_DIRECT
@@ -1921,6 +1953,10 @@ config 9P_FS
1921 1953
1922 If unsure, say N. 1954 If unsure, say N.
1923 1955
1956config GENERIC_ACL
1957 bool
1958 select FS_POSIX_ACL
1959
1924endmenu 1960endmenu
1925 1961
1926menu "Partition Types" 1962menu "Partition Types"
diff --git a/fs/Makefile b/fs/Makefile
index 89135428a539..46b8cfe497b2 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -35,6 +35,7 @@ obj-$(CONFIG_BINFMT_FLAT) += binfmt_flat.o
35obj-$(CONFIG_FS_MBCACHE) += mbcache.o 35obj-$(CONFIG_FS_MBCACHE) += mbcache.o
36obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o 36obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o
37obj-$(CONFIG_NFS_COMMON) += nfs_common/ 37obj-$(CONFIG_NFS_COMMON) += nfs_common/
38obj-$(CONFIG_GENERIC_ACL) += generic_acl.o
38 39
39obj-$(CONFIG_QUOTA) += dquot.o 40obj-$(CONFIG_QUOTA) += dquot.o
40obj-$(CONFIG_QFMT_V1) += quota_v1.o 41obj-$(CONFIG_QFMT_V1) += quota_v1.o
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 534f3eecc985..7e7a04be1278 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -269,7 +269,6 @@ adfs_iget(struct super_block *sb, struct object_info *obj)
269 inode->i_ino = obj->file_id; 269 inode->i_ino = obj->file_id;
270 inode->i_size = obj->size; 270 inode->i_size = obj->size;
271 inode->i_nlink = 2; 271 inode->i_nlink = 2;
272 inode->i_blksize = PAGE_SIZE;
273 inode->i_blocks = (inode->i_size + sb->s_blocksize - 1) >> 272 inode->i_blocks = (inode->i_size + sb->s_blocksize - 1) >>
274 sb->s_blocksize_bits; 273 sb->s_blocksize_bits;
275 274
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 82011019494c..9ade139086fc 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -251,8 +251,7 @@ static int init_inodecache(void)
251 251
252static void destroy_inodecache(void) 252static void destroy_inodecache(void)
253{ 253{
254 if (kmem_cache_destroy(adfs_inode_cachep)) 254 kmem_cache_destroy(adfs_inode_cachep);
255 printk(KERN_INFO "adfs_inode_cache: not all structures were freed\n");
256} 255}
257 256
258static struct super_operations adfs_sops = { 257static struct super_operations adfs_sops = {
@@ -339,11 +338,10 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
339 338
340 sb->s_flags |= MS_NODIRATIME; 339 sb->s_flags |= MS_NODIRATIME;
341 340
342 asb = kmalloc(sizeof(*asb), GFP_KERNEL); 341 asb = kzalloc(sizeof(*asb), GFP_KERNEL);
343 if (!asb) 342 if (!asb)
344 return -ENOMEM; 343 return -ENOMEM;
345 sb->s_fs_info = asb; 344 sb->s_fs_info = asb;
346 memset(asb, 0, sizeof(*asb));
347 345
348 /* set default options */ 346 /* set default options */
349 asb->s_uid = 0; 347 asb->s_uid = 0;
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 0ddd4cc0d1a0..1dc8438ef389 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -1,7 +1,6 @@
1#include <linux/types.h> 1#include <linux/types.h>
2#include <linux/fs.h> 2#include <linux/fs.h>
3#include <linux/buffer_head.h> 3#include <linux/buffer_head.h>
4#include <linux/affs_fs.h>
5#include <linux/amigaffs.h> 4#include <linux/amigaffs.h>
6 5
7/* AmigaOS allows file names with up to 30 characters length. 6/* AmigaOS allows file names with up to 30 characters length.
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 5200f4938df0..5ea72c3a16c3 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -14,6 +14,7 @@
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/statfs.h> 15#include <linux/statfs.h>
16#include <linux/parser.h> 16#include <linux/parser.h>
17#include <linux/magic.h>
17#include "affs.h" 18#include "affs.h"
18 19
19extern struct timezone sys_tz; 20extern struct timezone sys_tz;
@@ -108,8 +109,7 @@ static int init_inodecache(void)
108 109
109static void destroy_inodecache(void) 110static void destroy_inodecache(void)
110{ 111{
111 if (kmem_cache_destroy(affs_inode_cachep)) 112 kmem_cache_destroy(affs_inode_cachep);
112 printk(KERN_INFO "affs_inode_cache: not all structures were freed\n");
113} 113}
114 114
115static struct super_operations affs_sops = { 115static struct super_operations affs_sops = {
@@ -279,11 +279,10 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
279 sb->s_op = &affs_sops; 279 sb->s_op = &affs_sops;
280 sb->s_flags |= MS_NODIRATIME; 280 sb->s_flags |= MS_NODIRATIME;
281 281
282 sbi = kmalloc(sizeof(struct affs_sb_info), GFP_KERNEL); 282 sbi = kzalloc(sizeof(struct affs_sb_info), GFP_KERNEL);
283 if (!sbi) 283 if (!sbi)
284 return -ENOMEM; 284 return -ENOMEM;
285 sb->s_fs_info = sbi; 285 sb->s_fs_info = sbi;
286 memset(sbi, 0, sizeof(*sbi));
287 init_MUTEX(&sbi->s_bmlock); 286 init_MUTEX(&sbi->s_bmlock);
288 287
289 if (!parse_options(data,&uid,&gid,&i,&reserved,&root_block, 288 if (!parse_options(data,&uid,&gid,&i,&reserved,&root_block,
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 4ebb30a50ed5..6f37754906c2 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -72,7 +72,6 @@ static int afs_inode_map_status(struct afs_vnode *vnode)
72 inode->i_ctime.tv_sec = vnode->status.mtime_server; 72 inode->i_ctime.tv_sec = vnode->status.mtime_server;
73 inode->i_ctime.tv_nsec = 0; 73 inode->i_ctime.tv_nsec = 0;
74 inode->i_atime = inode->i_mtime = inode->i_ctime; 74 inode->i_atime = inode->i_mtime = inode->i_ctime;
75 inode->i_blksize = PAGE_CACHE_SIZE;
76 inode->i_blocks = 0; 75 inode->i_blocks = 0;
77 inode->i_version = vnode->fid.unique; 76 inode->i_version = vnode->fid.unique;
78 inode->i_mapping->a_ops = &afs_fs_aops; 77 inode->i_mapping->a_ops = &afs_fs_aops;
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 101d21b6c037..86463ec9ccb4 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -775,6 +775,7 @@ static int afs_proc_cell_servers_release(struct inode *inode,
775 * first item 775 * first item
776 */ 776 */
777static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos) 777static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos)
778 __acquires(m->private->sv_lock)
778{ 779{
779 struct list_head *_p; 780 struct list_head *_p;
780 struct afs_cell *cell = m->private; 781 struct afs_cell *cell = m->private;
@@ -823,6 +824,7 @@ static void *afs_proc_cell_servers_next(struct seq_file *p, void *v,
823 * clean up after reading from the cells list 824 * clean up after reading from the cells list
824 */ 825 */
825static void afs_proc_cell_servers_stop(struct seq_file *p, void *v) 826static void afs_proc_cell_servers_stop(struct seq_file *p, void *v)
827 __releases(p->private->sv_lock)
826{ 828{
827 struct afs_cell *cell = p->private; 829 struct afs_cell *cell = p->private;
828 830
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index 331f730a1fb3..782ee7c600ca 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -281,11 +281,10 @@ int afs_vlocation_lookup(struct afs_cell *cell,
281 spin_unlock(&cell->vl_gylock); 281 spin_unlock(&cell->vl_gylock);
282 282
283 /* not in the cell's in-memory lists - create a new record */ 283 /* not in the cell's in-memory lists - create a new record */
284 vlocation = kmalloc(sizeof(struct afs_vlocation), GFP_KERNEL); 284 vlocation = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL);
285 if (!vlocation) 285 if (!vlocation)
286 return -ENOMEM; 286 return -ENOMEM;
287 287
288 memset(vlocation, 0, sizeof(struct afs_vlocation));
289 atomic_set(&vlocation->usage, 1); 288 atomic_set(&vlocation->usage, 1);
290 INIT_LIST_HEAD(&vlocation->link); 289 INIT_LIST_HEAD(&vlocation->link);
291 rwlock_init(&vlocation->lock); 290 rwlock_init(&vlocation->lock);
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 0ff4b86476e3..768c6dbd323a 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -186,11 +186,10 @@ int afs_volume_lookup(const char *name, struct afs_cell *cell, int rwpath,
186 _debug("creating new volume record"); 186 _debug("creating new volume record");
187 187
188 ret = -ENOMEM; 188 ret = -ENOMEM;
189 volume = kmalloc(sizeof(struct afs_volume), GFP_KERNEL); 189 volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL);
190 if (!volume) 190 if (!volume)
191 goto error_up; 191 goto error_up;
192 192
193 memset(volume, 0, sizeof(struct afs_volume));
194 atomic_set(&volume->usage, 1); 193 atomic_set(&volume->usage, 1);
195 volume->type = type; 194 volume->type = type;
196 volume->type_force = force; 195 volume->type_force = force;
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index a62327f1bdff..c7700d9b3f96 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -37,8 +37,6 @@
37#define DPRINTK(D) ((void)0) 37#define DPRINTK(D) ((void)0)
38#endif 38#endif
39 39
40#define AUTOFS_SUPER_MAGIC 0x0187
41
42/* 40/*
43 * If the daemon returns a negative response (AUTOFS_IOC_FAIL) then the 41 * If the daemon returns a negative response (AUTOFS_IOC_FAIL) then the
44 * kernel will keep the negative response cached for up to the time given 42 * kernel will keep the negative response cached for up to the time given
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index 65e5ed42190e..2c9759baad61 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -16,6 +16,7 @@
16#include <linux/file.h> 16#include <linux/file.h>
17#include <linux/parser.h> 17#include <linux/parser.h>
18#include <linux/bitops.h> 18#include <linux/bitops.h>
19#include <linux/magic.h>
19#include "autofs_i.h" 20#include "autofs_i.h"
20#include <linux/module.h> 21#include <linux/module.h>
21 22
@@ -128,10 +129,9 @@ int autofs_fill_super(struct super_block *s, void *data, int silent)
128 struct autofs_sb_info *sbi; 129 struct autofs_sb_info *sbi;
129 int minproto, maxproto; 130 int minproto, maxproto;
130 131
131 sbi = kmalloc(sizeof(*sbi), GFP_KERNEL); 132 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
132 if ( !sbi ) 133 if ( !sbi )
133 goto fail_unlock; 134 goto fail_unlock;
134 memset(sbi, 0, sizeof(*sbi));
135 DPRINTK(("autofs: starting up, sbi = %p\n",sbi)); 135 DPRINTK(("autofs: starting up, sbi = %p\n",sbi));
136 136
137 s->s_fs_info = sbi; 137 s->s_fs_info = sbi;
@@ -216,7 +216,6 @@ static void autofs_read_inode(struct inode *inode)
216 inode->i_nlink = 2; 216 inode->i_nlink = 2;
217 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 217 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
218 inode->i_blocks = 0; 218 inode->i_blocks = 0;
219 inode->i_blksize = 1024;
220 219
221 if ( ino == AUTOFS_ROOT_INO ) { 220 if ( ino == AUTOFS_ROOT_INO ) {
222 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; 221 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
@@ -241,7 +240,7 @@ static void autofs_read_inode(struct inode *inode)
241 240
242 inode->i_op = &autofs_symlink_inode_operations; 241 inode->i_op = &autofs_symlink_inode_operations;
243 sl = &sbi->symlink[n]; 242 sl = &sbi->symlink[n];
244 inode->u.generic_ip = sl; 243 inode->i_private = sl;
245 inode->i_mode = S_IFLNK | S_IRWXUGO; 244 inode->i_mode = S_IFLNK | S_IRWXUGO;
246 inode->i_mtime.tv_sec = inode->i_ctime.tv_sec = sl->mtime; 245 inode->i_mtime.tv_sec = inode->i_ctime.tv_sec = sl->mtime;
247 inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0; 246 inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0;
diff --git a/fs/autofs/symlink.c b/fs/autofs/symlink.c
index 52e8772b066e..c74f2eb65775 100644
--- a/fs/autofs/symlink.c
+++ b/fs/autofs/symlink.c
@@ -15,7 +15,7 @@
15/* Nothing to release.. */ 15/* Nothing to release.. */
16static void *autofs_follow_link(struct dentry *dentry, struct nameidata *nd) 16static void *autofs_follow_link(struct dentry *dentry, struct nameidata *nd)
17{ 17{
18 char *s=((struct autofs_symlink *)dentry->d_inode->u.generic_ip)->data; 18 char *s=((struct autofs_symlink *)dentry->d_inode->i_private)->data;
19 nd_set_link(nd, s); 19 nd_set_link(nd, s);
20 return NULL; 20 return NULL;
21} 21}
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index d6603d02304c..480ab178cba5 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -40,8 +40,6 @@
40#define DPRINTK(fmt,args...) do {} while(0) 40#define DPRINTK(fmt,args...) do {} while(0)
41#endif 41#endif
42 42
43#define AUTOFS_SUPER_MAGIC 0x0187
44
45/* Unified info structure. This is pointed to by both the dentry and 43/* Unified info structure. This is pointed to by both the dentry and
46 inode structures. Each file in the filesystem has an instance of this 44 inode structures. Each file in the filesystem has an instance of this
47 structure. It holds a reference to the dentry, so dentries are never 45 structure. It holds a reference to the dentry, so dentries are never
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 8dbd44f10e9d..d96e5c14a9ca 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -32,7 +32,7 @@ static inline int autofs4_can_expire(struct dentry *dentry,
32 32
33 if (!do_now) { 33 if (!do_now) {
34 /* Too young to die */ 34 /* Too young to die */
35 if (time_after(ino->last_used + timeout, now)) 35 if (!timeout || time_after(ino->last_used + timeout, now))
36 return 0; 36 return 0;
37 37
38 /* update last_used here :- 38 /* update last_used here :-
@@ -253,7 +253,7 @@ static struct dentry *autofs4_expire_direct(struct super_block *sb,
253 struct dentry *root = dget(sb->s_root); 253 struct dentry *root = dget(sb->s_root);
254 int do_now = how & AUTOFS_EXP_IMMEDIATE; 254 int do_now = how & AUTOFS_EXP_IMMEDIATE;
255 255
256 if (!sbi->exp_timeout || !root) 256 if (!root)
257 return NULL; 257 return NULL;
258 258
259 now = jiffies; 259 now = jiffies;
@@ -293,7 +293,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
293 int do_now = how & AUTOFS_EXP_IMMEDIATE; 293 int do_now = how & AUTOFS_EXP_IMMEDIATE;
294 int exp_leaves = how & AUTOFS_EXP_LEAVES; 294 int exp_leaves = how & AUTOFS_EXP_LEAVES;
295 295
296 if ( !sbi->exp_timeout || !root ) 296 if (!root)
297 return NULL; 297 return NULL;
298 298
299 now = jiffies; 299 now = jiffies;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index fde78b110ddd..800ce876caec 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -19,6 +19,7 @@
19#include <linux/parser.h> 19#include <linux/parser.h>
20#include <linux/bitops.h> 20#include <linux/bitops.h>
21#include <linux/smp_lock.h> 21#include <linux/smp_lock.h>
22#include <linux/magic.h>
22#include "autofs_i.h" 23#include "autofs_i.h"
23#include <linux/module.h> 24#include <linux/module.h>
24 25
@@ -446,7 +447,6 @@ struct inode *autofs4_get_inode(struct super_block *sb,
446 inode->i_uid = 0; 447 inode->i_uid = 0;
447 inode->i_gid = 0; 448 inode->i_gid = 0;
448 } 449 }
449 inode->i_blksize = PAGE_CACHE_SIZE;
450 inode->i_blocks = 0; 450 inode->i_blocks = 0;
451 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 451 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
452 452
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 5100f984783f..563ef9d7da9f 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -137,7 +137,9 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
137 nd.flags = LOOKUP_DIRECTORY; 137 nd.flags = LOOKUP_DIRECTORY;
138 ret = (dentry->d_op->d_revalidate)(dentry, &nd); 138 ret = (dentry->d_op->d_revalidate)(dentry, &nd);
139 139
140 if (!ret) { 140 if (ret <= 0) {
141 if (ret < 0)
142 status = ret;
141 dcache_dir_close(inode, file); 143 dcache_dir_close(inode, file);
142 goto out; 144 goto out;
143 } 145 }
@@ -279,9 +281,6 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
279 281
280 DPRINTK("mount done status=%d", status); 282 DPRINTK("mount done status=%d", status);
281 283
282 if (status && dentry->d_inode)
283 return status; /* Try to get the kernel to invalidate this dentry */
284
285 /* Turn this into a real negative dentry? */ 284 /* Turn this into a real negative dentry? */
286 if (status == -ENOENT) { 285 if (status == -ENOENT) {
287 spin_lock(&dentry->d_lock); 286 spin_lock(&dentry->d_lock);
@@ -357,7 +356,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
357 * don't try to mount it again. 356 * don't try to mount it again.
358 */ 357 */
359 spin_lock(&dcache_lock); 358 spin_lock(&dcache_lock);
360 if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { 359 if (!d_mountpoint(dentry) && __simple_empty(dentry)) {
361 spin_unlock(&dcache_lock); 360 spin_unlock(&dcache_lock);
362 361
363 status = try_to_fill_dentry(dentry, 0); 362 status = try_to_fill_dentry(dentry, 0);
@@ -400,13 +399,23 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
400 struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); 399 struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
401 int oz_mode = autofs4_oz_mode(sbi); 400 int oz_mode = autofs4_oz_mode(sbi);
402 int flags = nd ? nd->flags : 0; 401 int flags = nd ? nd->flags : 0;
403 int status = 0; 402 int status = 1;
404 403
405 /* Pending dentry */ 404 /* Pending dentry */
406 if (autofs4_ispending(dentry)) { 405 if (autofs4_ispending(dentry)) {
407 if (!oz_mode) 406 /* The daemon never causes a mount to trigger */
408 status = try_to_fill_dentry(dentry, flags); 407 if (oz_mode)
409 return !status; 408 return 1;
409
410 /*
411 * A zero status is success otherwise we have a
412 * negative error code.
413 */
414 status = try_to_fill_dentry(dentry, flags);
415 if (status == 0)
416 return 1;
417
418 return status;
410 } 419 }
411 420
412 /* Negative dentry.. invalidate if "old" */ 421 /* Negative dentry.. invalidate if "old" */
@@ -421,9 +430,19 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
421 DPRINTK("dentry=%p %.*s, emptydir", 430 DPRINTK("dentry=%p %.*s, emptydir",
422 dentry, dentry->d_name.len, dentry->d_name.name); 431 dentry, dentry->d_name.len, dentry->d_name.name);
423 spin_unlock(&dcache_lock); 432 spin_unlock(&dcache_lock);
424 if (!oz_mode) 433 /* The daemon never causes a mount to trigger */
425 status = try_to_fill_dentry(dentry, flags); 434 if (oz_mode)
426 return !status; 435 return 1;
436
437 /*
438 * A zero status is success otherwise we have a
439 * negative error code.
440 */
441 status = try_to_fill_dentry(dentry, flags);
442 if (status == 0)
443 return 1;
444
445 return status;
427 } 446 }
428 spin_unlock(&dcache_lock); 447 spin_unlock(&dcache_lock);
429 448
@@ -518,6 +537,9 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
518 return ERR_PTR(-ERESTARTNOINTR); 537 return ERR_PTR(-ERESTARTNOINTR);
519 } 538 }
520 } 539 }
540 spin_lock(&dentry->d_lock);
541 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
542 spin_unlock(&dentry->d_lock);
521 } 543 }
522 544
523 /* 545 /*
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 50cfca5c7efd..57020c7a7e65 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -365,7 +365,6 @@ befs_read_inode(struct inode *inode)
365 inode->i_mtime.tv_nsec = 0; /* lower 16 bits are not a time */ 365 inode->i_mtime.tv_nsec = 0; /* lower 16 bits are not a time */
366 inode->i_ctime = inode->i_mtime; 366 inode->i_ctime = inode->i_mtime;
367 inode->i_atime = inode->i_mtime; 367 inode->i_atime = inode->i_mtime;
368 inode->i_blksize = befs_sb->block_size;
369 368
370 befs_ino->i_inode_num = fsrun_to_cpu(sb, raw_inode->inode_num); 369 befs_ino->i_inode_num = fsrun_to_cpu(sb, raw_inode->inode_num);
371 befs_ino->i_parent = fsrun_to_cpu(sb, raw_inode->parent); 370 befs_ino->i_parent = fsrun_to_cpu(sb, raw_inode->parent);
@@ -446,9 +445,7 @@ befs_init_inodecache(void)
446static void 445static void
447befs_destroy_inodecache(void) 446befs_destroy_inodecache(void)
448{ 447{
449 if (kmem_cache_destroy(befs_inode_cachep)) 448 kmem_cache_destroy(befs_inode_cachep);
450 printk(KERN_ERR "befs_destroy_inodecache: "
451 "not all structures were freed\n");
452} 449}
453 450
454/* 451/*
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 26fad9621738..dcf04cb13283 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -102,7 +102,7 @@ static int bfs_create(struct inode * dir, struct dentry * dentry, int mode,
102 inode->i_uid = current->fsuid; 102 inode->i_uid = current->fsuid;
103 inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid; 103 inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid;
104 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 104 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
105 inode->i_blocks = inode->i_blksize = 0; 105 inode->i_blocks = 0;
106 inode->i_op = &bfs_file_inops; 106 inode->i_op = &bfs_file_inops;
107 inode->i_fop = &bfs_file_operations; 107 inode->i_fop = &bfs_file_operations;
108 inode->i_mapping->a_ops = &bfs_aops; 108 inode->i_mapping->a_ops = &bfs_aops;
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index cf74f3d4d966..ed27ffb3459e 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -76,7 +76,6 @@ static void bfs_read_inode(struct inode * inode)
76 inode->i_size = BFS_FILESIZE(di); 76 inode->i_size = BFS_FILESIZE(di);
77 inode->i_blocks = BFS_FILEBLOCKS(di); 77 inode->i_blocks = BFS_FILEBLOCKS(di);
78 if (inode->i_size || inode->i_blocks) dprintf("Registered inode with %lld size, %ld blocks\n", inode->i_size, inode->i_blocks); 78 if (inode->i_size || inode->i_blocks) dprintf("Registered inode with %lld size, %ld blocks\n", inode->i_size, inode->i_blocks);
79 inode->i_blksize = PAGE_SIZE;
80 inode->i_atime.tv_sec = le32_to_cpu(di->i_atime); 79 inode->i_atime.tv_sec = le32_to_cpu(di->i_atime);
81 inode->i_mtime.tv_sec = le32_to_cpu(di->i_mtime); 80 inode->i_mtime.tv_sec = le32_to_cpu(di->i_mtime);
82 inode->i_ctime.tv_sec = le32_to_cpu(di->i_ctime); 81 inode->i_ctime.tv_sec = le32_to_cpu(di->i_ctime);
@@ -268,8 +267,7 @@ static int init_inodecache(void)
268 267
269static void destroy_inodecache(void) 268static void destroy_inodecache(void)
270{ 269{
271 if (kmem_cache_destroy(bfs_inode_cachep)) 270 kmem_cache_destroy(bfs_inode_cachep);
272 printk(KERN_INFO "bfs_inode_cache: not all structures were freed\n");
273} 271}
274 272
275static struct super_operations bfs_sops = { 273static struct super_operations bfs_sops = {
@@ -311,11 +309,10 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
311 unsigned i, imap_len; 309 unsigned i, imap_len;
312 struct bfs_sb_info * info; 310 struct bfs_sb_info * info;
313 311
314 info = kmalloc(sizeof(*info), GFP_KERNEL); 312 info = kzalloc(sizeof(*info), GFP_KERNEL);
315 if (!info) 313 if (!info)
316 return -ENOMEM; 314 return -ENOMEM;
317 s->s_fs_info = info; 315 s->s_fs_info = info;
318 memset(info, 0, sizeof(*info));
319 316
320 sb_set_blocksize(s, BFS_BSIZE); 317 sb_set_blocksize(s, BFS_BSIZE);
321 318
@@ -338,10 +335,9 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
338 + BFS_ROOT_INO - 1; 335 + BFS_ROOT_INO - 1;
339 336
340 imap_len = info->si_lasti/8 + 1; 337 imap_len = info->si_lasti/8 + 1;
341 info->si_imap = kmalloc(imap_len, GFP_KERNEL); 338 info->si_imap = kzalloc(imap_len, GFP_KERNEL);
342 if (!info->si_imap) 339 if (!info->si_imap)
343 goto out; 340 goto out;
344 memset(info->si_imap, 0, imap_len);
345 for (i=0; i<BFS_ROOT_INO; i++) 341 for (i=0; i<BFS_ROOT_INO; i++)
346 set_bit(i, info->si_imap); 342 set_bit(i, info->si_imap);
347 343
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index f312103434d4..517e111bb7ef 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -278,6 +278,13 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
278 return -ENOEXEC; 278 return -ENOEXEC;
279 } 279 }
280 280
281 /*
282 * Requires a mmap handler. This prevents people from using a.out
283 * as part of an exploit attack against /proc-related vulnerabilities.
284 */
285 if (!bprm->file->f_op || !bprm->file->f_op->mmap)
286 return -ENOEXEC;
287
281 fd_offset = N_TXTOFF(ex); 288 fd_offset = N_TXTOFF(ex);
282 289
283 /* Check initial limits. This avoids letting people circumvent 290 /* Check initial limits. This avoids letting people circumvent
@@ -476,6 +483,13 @@ static int load_aout_library(struct file *file)
476 goto out; 483 goto out;
477 } 484 }
478 485
486 /*
487 * Requires a mmap handler. This prevents people from using a.out
488 * as part of an exploit attack against /proc-related vulnerabilities.
489 */
490 if (!file->f_op || !file->f_op->mmap)
491 goto out;
492
479 if (N_FLAGS(ex)) 493 if (N_FLAGS(ex))
480 goto out; 494 goto out;
481 495
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 672a3b90bc55..6eb48e1446ec 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -515,7 +515,8 @@ static unsigned long randomize_stack_top(unsigned long stack_top)
515{ 515{
516 unsigned int random_variable = 0; 516 unsigned int random_variable = 0;
517 517
518 if (current->flags & PF_RANDOMIZE) { 518 if ((current->flags & PF_RANDOMIZE) &&
519 !(current->personality & ADDR_NO_RANDOMIZE)) {
519 random_variable = get_random_int() & STACK_RND_MASK; 520 random_variable = get_random_int() & STACK_RND_MASK;
520 random_variable <<= PAGE_SHIFT; 521 random_variable <<= PAGE_SHIFT;
521 } 522 }
@@ -1037,10 +1038,8 @@ out_free_interp:
1037out_free_file: 1038out_free_file:
1038 sys_close(elf_exec_fileno); 1039 sys_close(elf_exec_fileno);
1039out_free_fh: 1040out_free_fh:
1040 if (files) { 1041 if (files)
1041 put_files_struct(current->files); 1042 reset_files_struct(current, files);
1042 current->files = files;
1043 }
1044out_free_ph: 1043out_free_ph:
1045 kfree(elf_phdata); 1044 kfree(elf_phdata);
1046 goto out; 1045 goto out;
@@ -1262,7 +1261,7 @@ static void fill_elf_header(struct elfhdr *elf, int segs)
1262 return; 1261 return;
1263} 1262}
1264 1263
1265static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, off_t offset) 1264static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1266{ 1265{
1267 phdr->p_type = PT_NOTE; 1266 phdr->p_type = PT_NOTE;
1268 phdr->p_offset = offset; 1267 phdr->p_offset = offset;
@@ -1428,7 +1427,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1428 int i; 1427 int i;
1429 struct vm_area_struct *vma; 1428 struct vm_area_struct *vma;
1430 struct elfhdr *elf = NULL; 1429 struct elfhdr *elf = NULL;
1431 off_t offset = 0, dataoff; 1430 loff_t offset = 0, dataoff;
1432 unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur; 1431 unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1433 int numnote; 1432 int numnote;
1434 struct memelfnote *notes = NULL; 1433 struct memelfnote *notes = NULL;
@@ -1480,20 +1479,19 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1480 1479
1481 if (signr) { 1480 if (signr) {
1482 struct elf_thread_status *tmp; 1481 struct elf_thread_status *tmp;
1483 read_lock(&tasklist_lock); 1482 rcu_read_lock();
1484 do_each_thread(g,p) 1483 do_each_thread(g,p)
1485 if (current->mm == p->mm && current != p) { 1484 if (current->mm == p->mm && current != p) {
1486 tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); 1485 tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
1487 if (!tmp) { 1486 if (!tmp) {
1488 read_unlock(&tasklist_lock); 1487 rcu_read_unlock();
1489 goto cleanup; 1488 goto cleanup;
1490 } 1489 }
1491 INIT_LIST_HEAD(&tmp->list);
1492 tmp->thread = p; 1490 tmp->thread = p;
1493 list_add(&tmp->list, &thread_list); 1491 list_add(&tmp->list, &thread_list);
1494 } 1492 }
1495 while_each_thread(g,p); 1493 while_each_thread(g,p);
1496 read_unlock(&tasklist_lock); 1494 rcu_read_unlock();
1497 list_for_each(t, &thread_list) { 1495 list_for_each(t, &thread_list) {
1498 struct elf_thread_status *tmp; 1496 struct elf_thread_status *tmp;
1499 int sz; 1497 int sz;
@@ -1661,11 +1659,11 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1661 ELF_CORE_WRITE_EXTRA_DATA; 1659 ELF_CORE_WRITE_EXTRA_DATA;
1662#endif 1660#endif
1663 1661
1664 if ((off_t)file->f_pos != offset) { 1662 if (file->f_pos != offset) {
1665 /* Sanity check */ 1663 /* Sanity check */
1666 printk(KERN_WARNING 1664 printk(KERN_WARNING
1667 "elf_core_dump: file->f_pos (%ld) != offset (%ld)\n", 1665 "elf_core_dump: file->f_pos (%Ld) != offset (%Ld)\n",
1668 (off_t)file->f_pos, offset); 1666 file->f_pos, offset);
1669 } 1667 }
1670 1668
1671end_coredump: 1669end_coredump:
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 2f3365829229..f86d5c9ce5eb 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1597,20 +1597,19 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1597 1597
1598 if (signr) { 1598 if (signr) {
1599 struct elf_thread_status *tmp; 1599 struct elf_thread_status *tmp;
1600 read_lock(&tasklist_lock); 1600 rcu_read_lock();
1601 do_each_thread(g,p) 1601 do_each_thread(g,p)
1602 if (current->mm == p->mm && current != p) { 1602 if (current->mm == p->mm && current != p) {
1603 tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); 1603 tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
1604 if (!tmp) { 1604 if (!tmp) {
1605 read_unlock(&tasklist_lock); 1605 rcu_read_unlock();
1606 goto cleanup; 1606 goto cleanup;
1607 } 1607 }
1608 INIT_LIST_HEAD(&tmp->list);
1609 tmp->thread = p; 1608 tmp->thread = p;
1610 list_add(&tmp->list, &thread_list); 1609 list_add(&tmp->list, &thread_list);
1611 } 1610 }
1612 while_each_thread(g,p); 1611 while_each_thread(g,p);
1613 read_unlock(&tasklist_lock); 1612 rcu_read_unlock();
1614 list_for_each(t, &thread_list) { 1613 list_for_each(t, &thread_list) {
1615 struct elf_thread_status *tmp; 1614 struct elf_thread_status *tmp;
1616 int sz; 1615 int sz;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 34ebbc191e46..1713c48fef54 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -215,10 +215,8 @@ _error:
215 bprm->interp_flags = 0; 215 bprm->interp_flags = 0;
216 bprm->interp_data = 0; 216 bprm->interp_data = 0;
217_unshare: 217_unshare:
218 if (files) { 218 if (files)
219 put_files_struct(current->files); 219 reset_files_struct(current, files);
220 current->files = files;
221 }
222 goto _ret; 220 goto _ret;
223} 221}
224 222
@@ -507,7 +505,6 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode)
507 inode->i_mode = mode; 505 inode->i_mode = mode;
508 inode->i_uid = 0; 506 inode->i_uid = 0;
509 inode->i_gid = 0; 507 inode->i_gid = 0;
510 inode->i_blksize = PAGE_CACHE_SIZE;
511 inode->i_blocks = 0; 508 inode->i_blocks = 0;
512 inode->i_atime = inode->i_mtime = inode->i_ctime = 509 inode->i_atime = inode->i_mtime = inode->i_ctime =
513 current_fs_time(inode->i_sb); 510 current_fs_time(inode->i_sb);
@@ -517,7 +514,7 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode)
517 514
518static void bm_clear_inode(struct inode *inode) 515static void bm_clear_inode(struct inode *inode)
519{ 516{
520 kfree(inode->u.generic_ip); 517 kfree(inode->i_private);
521} 518}
522 519
523static void kill_node(Node *e) 520static void kill_node(Node *e)
@@ -545,7 +542,7 @@ static void kill_node(Node *e)
545static ssize_t 542static ssize_t
546bm_entry_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos) 543bm_entry_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos)
547{ 544{
548 Node *e = file->f_dentry->d_inode->u.generic_ip; 545 Node *e = file->f_dentry->d_inode->i_private;
549 loff_t pos = *ppos; 546 loff_t pos = *ppos;
550 ssize_t res; 547 ssize_t res;
551 char *page; 548 char *page;
@@ -579,7 +576,7 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
579 size_t count, loff_t *ppos) 576 size_t count, loff_t *ppos)
580{ 577{
581 struct dentry *root; 578 struct dentry *root;
582 Node *e = file->f_dentry->d_inode->u.generic_ip; 579 Node *e = file->f_dentry->d_inode->i_private;
583 int res = parse_command(buffer, count); 580 int res = parse_command(buffer, count);
584 581
585 switch (res) { 582 switch (res) {
@@ -646,7 +643,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
646 } 643 }
647 644
648 e->dentry = dget(dentry); 645 e->dentry = dget(dentry);
649 inode->u.generic_ip = e; 646 inode->i_private = e;
650 inode->i_fop = &bm_entry_operations; 647 inode->i_fop = &bm_entry_operations;
651 648
652 d_instantiate(dentry, inode); 649 d_instantiate(dentry, inode);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 045f98854f14..4346468139e8 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -543,11 +543,11 @@ static struct kobject *bdev_get_holder(struct block_device *bdev)
543 return kobject_get(bdev->bd_disk->holder_dir); 543 return kobject_get(bdev->bd_disk->holder_dir);
544} 544}
545 545
546static void add_symlink(struct kobject *from, struct kobject *to) 546static int add_symlink(struct kobject *from, struct kobject *to)
547{ 547{
548 if (!from || !to) 548 if (!from || !to)
549 return; 549 return 0;
550 sysfs_create_link(from, to, kobject_name(to)); 550 return sysfs_create_link(from, to, kobject_name(to));
551} 551}
552 552
553static void del_symlink(struct kobject *from, struct kobject *to) 553static void del_symlink(struct kobject *from, struct kobject *to)
@@ -648,30 +648,38 @@ static void free_bd_holder(struct bd_holder *bo)
648 * If there is no matching entry with @bo in @bdev->bd_holder_list, 648 * If there is no matching entry with @bo in @bdev->bd_holder_list,
649 * add @bo to the list, create symlinks. 649 * add @bo to the list, create symlinks.
650 * 650 *
651 * Returns 1 if @bo was added to the list. 651 * Returns 0 if symlinks are created or already there.
652 * Returns 0 if @bo wasn't used by any reason and should be freed. 652 * Returns -ve if something fails and @bo can be freed.
653 */ 653 */
654static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo) 654static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo)
655{ 655{
656 struct bd_holder *tmp; 656 struct bd_holder *tmp;
657 int ret;
657 658
658 if (!bo) 659 if (!bo)
659 return 0; 660 return -EINVAL;
660 661
661 list_for_each_entry(tmp, &bdev->bd_holder_list, list) { 662 list_for_each_entry(tmp, &bdev->bd_holder_list, list) {
662 if (tmp->sdir == bo->sdir) { 663 if (tmp->sdir == bo->sdir) {
663 tmp->count++; 664 tmp->count++;
665 /* We've already done what we need to do here. */
666 free_bd_holder(bo);
664 return 0; 667 return 0;
665 } 668 }
666 } 669 }
667 670
668 if (!bd_holder_grab_dirs(bdev, bo)) 671 if (!bd_holder_grab_dirs(bdev, bo))
669 return 0; 672 return -EBUSY;
670 673
671 add_symlink(bo->sdir, bo->sdev); 674 ret = add_symlink(bo->sdir, bo->sdev);
672 add_symlink(bo->hdir, bo->hdev); 675 if (ret == 0) {
673 list_add_tail(&bo->list, &bdev->bd_holder_list); 676 ret = add_symlink(bo->hdir, bo->hdev);
674 return 1; 677 if (ret)
678 del_symlink(bo->sdir, bo->sdev);
679 }
680 if (ret == 0)
681 list_add_tail(&bo->list, &bdev->bd_holder_list);
682 return ret;
675} 683}
676 684
677/** 685/**
@@ -741,7 +749,9 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
741 749
742 mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION); 750 mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION);
743 res = bd_claim(bdev, holder); 751 res = bd_claim(bdev, holder);
744 if (res || !add_bd_holder(bdev, bo)) 752 if (res == 0)
753 res = add_bd_holder(bdev, bo);
754 if (res)
745 free_bd_holder(bo); 755 free_bd_holder(bo);
746 mutex_unlock(&bdev->bd_mutex); 756 mutex_unlock(&bdev->bd_mutex);
747 757
@@ -1021,7 +1031,7 @@ do_open(struct block_device *bdev, struct file *file, unsigned int subclass)
1021 rescan_partitions(bdev->bd_disk, bdev); 1031 rescan_partitions(bdev->bd_disk, bdev);
1022 } else { 1032 } else {
1023 mutex_lock_nested(&bdev->bd_contains->bd_mutex, 1033 mutex_lock_nested(&bdev->bd_contains->bd_mutex,
1024 BD_MUTEX_PARTITION); 1034 BD_MUTEX_WHOLE);
1025 bdev->bd_contains->bd_part_count++; 1035 bdev->bd_contains->bd_part_count++;
1026 mutex_unlock(&bdev->bd_contains->bd_mutex); 1036 mutex_unlock(&bdev->bd_contains->bd_mutex);
1027 } 1037 }
diff --git a/fs/buffer.c b/fs/buffer.c
index 71649ef9b658..3b6d701073e7 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2987,6 +2987,7 @@ int try_to_free_buffers(struct page *page)
2987 2987
2988 spin_lock(&mapping->private_lock); 2988 spin_lock(&mapping->private_lock);
2989 ret = drop_buffers(page, &buffers_to_free); 2989 ret = drop_buffers(page, &buffers_to_free);
2990 spin_unlock(&mapping->private_lock);
2990 if (ret) { 2991 if (ret) {
2991 /* 2992 /*
2992 * If the filesystem writes its buffers by hand (eg ext3) 2993 * If the filesystem writes its buffers by hand (eg ext3)
@@ -2998,7 +2999,6 @@ int try_to_free_buffers(struct page *page)
2998 */ 2999 */
2999 clear_page_dirty(page); 3000 clear_page_dirty(page);
3000 } 3001 }
3001 spin_unlock(&mapping->private_lock);
3002out: 3002out:
3003 if (buffers_to_free) { 3003 if (buffers_to_free) {
3004 struct buffer_head *bh = buffers_to_free; 3004 struct buffer_head *bh = buffers_to_free;
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 3483d3cf8087..1f3285affa39 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -19,11 +19,30 @@
19#include <linux/kobj_map.h> 19#include <linux/kobj_map.h>
20#include <linux/cdev.h> 20#include <linux/cdev.h>
21#include <linux/mutex.h> 21#include <linux/mutex.h>
22#include <linux/backing-dev.h>
22 23
23#ifdef CONFIG_KMOD 24#ifdef CONFIG_KMOD
24#include <linux/kmod.h> 25#include <linux/kmod.h>
25#endif 26#endif
26 27
28/*
29 * capabilities for /dev/mem, /dev/kmem and similar directly mappable character
30 * devices
31 * - permits shared-mmap for read, write and/or exec
32 * - does not permit private mmap in NOMMU mode (can't do COW)
33 * - no readahead or I/O queue unplugging required
34 */
35struct backing_dev_info directly_mappable_cdev_bdi = {
36 .capabilities = (
37#ifdef CONFIG_MMU
38 /* permit private copies of the data to be taken */
39 BDI_CAP_MAP_COPY |
40#endif
41 /* permit direct mmap, for read, write or exec */
42 BDI_CAP_MAP_DIRECT |
43 BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP),
44};
45
27static struct kobj_map *cdev_map; 46static struct kobj_map *cdev_map;
28 47
29static DEFINE_MUTEX(chrdevs_lock); 48static DEFINE_MUTEX(chrdevs_lock);
@@ -109,13 +128,31 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor,
109 128
110 for (cp = &chrdevs[i]; *cp; cp = &(*cp)->next) 129 for (cp = &chrdevs[i]; *cp; cp = &(*cp)->next)
111 if ((*cp)->major > major || 130 if ((*cp)->major > major ||
112 ((*cp)->major == major && (*cp)->baseminor >= baseminor)) 131 ((*cp)->major == major &&
132 (((*cp)->baseminor >= baseminor) ||
133 ((*cp)->baseminor + (*cp)->minorct > baseminor))))
113 break; 134 break;
114 if (*cp && (*cp)->major == major && 135
115 (*cp)->baseminor < baseminor + minorct) { 136 /* Check for overlapping minor ranges. */
116 ret = -EBUSY; 137 if (*cp && (*cp)->major == major) {
117 goto out; 138 int old_min = (*cp)->baseminor;
139 int old_max = (*cp)->baseminor + (*cp)->minorct - 1;
140 int new_min = baseminor;
141 int new_max = baseminor + minorct - 1;
142
143 /* New driver overlaps from the left. */
144 if (new_max >= old_min && new_max <= old_max) {
145 ret = -EBUSY;
146 goto out;
147 }
148
149 /* New driver overlaps from the right. */
150 if (new_min <= old_max && new_min >= old_min) {
151 ret = -EBUSY;
152 goto out;
153 }
118 } 154 }
155
119 cd->next = *cp; 156 cd->next = *cp;
120 *cp = cd; 157 *cp = cd;
121 mutex_unlock(&chrdevs_lock); 158 mutex_unlock(&chrdevs_lock);
@@ -146,6 +183,15 @@ __unregister_chrdev_region(unsigned major, unsigned baseminor, int minorct)
146 return cd; 183 return cd;
147} 184}
148 185
186/**
187 * register_chrdev_region() - register a range of device numbers
188 * @from: the first in the desired range of device numbers; must include
189 * the major number.
190 * @count: the number of consecutive device numbers required
191 * @name: the name of the device or driver.
192 *
193 * Return value is zero on success, a negative error code on failure.
194 */
149int register_chrdev_region(dev_t from, unsigned count, const char *name) 195int register_chrdev_region(dev_t from, unsigned count, const char *name)
150{ 196{
151 struct char_device_struct *cd; 197 struct char_device_struct *cd;
@@ -171,6 +217,17 @@ fail:
171 return PTR_ERR(cd); 217 return PTR_ERR(cd);
172} 218}
173 219
220/**
221 * alloc_chrdev_region() - register a range of char device numbers
222 * @dev: output parameter for first assigned number
223 * @baseminor: first of the requested range of minor numbers
224 * @count: the number of minor numbers required
225 * @name: the name of the associated device or driver
226 *
227 * Allocates a range of char device numbers. The major number will be
228 * chosen dynamically, and returned (along with the first minor number)
229 * in @dev. Returns zero or a negative error code.
230 */
174int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count, 231int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count,
175 const char *name) 232 const char *name)
176{ 233{
@@ -240,6 +297,15 @@ out2:
240 return err; 297 return err;
241} 298}
242 299
300/**
301 * unregister_chrdev_region() - return a range of device numbers
302 * @from: the first in the range of numbers to unregister
303 * @count: the number of device numbers to unregister
304 *
305 * This function will unregister a range of @count device numbers,
306 * starting with @from. The caller should normally be the one who
307 * allocated those numbers in the first place...
308 */
243void unregister_chrdev_region(dev_t from, unsigned count) 309void unregister_chrdev_region(dev_t from, unsigned count)
244{ 310{
245 dev_t to = from + count; 311 dev_t to = from + count;
@@ -377,6 +443,16 @@ static int exact_lock(dev_t dev, void *data)
377 return cdev_get(p) ? 0 : -1; 443 return cdev_get(p) ? 0 : -1;
378} 444}
379 445
446/**
447 * cdev_add() - add a char device to the system
448 * @p: the cdev structure for the device
449 * @dev: the first device number for which this device is responsible
450 * @count: the number of consecutive minor numbers corresponding to this
451 * device
452 *
453 * cdev_add() adds the device represented by @p to the system, making it
454 * live immediately. A negative error code is returned on failure.
455 */
380int cdev_add(struct cdev *p, dev_t dev, unsigned count) 456int cdev_add(struct cdev *p, dev_t dev, unsigned count)
381{ 457{
382 p->dev = dev; 458 p->dev = dev;
@@ -389,6 +465,13 @@ static void cdev_unmap(dev_t dev, unsigned count)
389 kobj_unmap(cdev_map, dev, count); 465 kobj_unmap(cdev_map, dev, count);
390} 466}
391 467
468/**
469 * cdev_del() - remove a cdev from the system
470 * @p: the cdev structure to be removed
471 *
472 * cdev_del() removes @p from the system, possibly freeing the structure
473 * itself.
474 */
392void cdev_del(struct cdev *p) 475void cdev_del(struct cdev *p)
393{ 476{
394 cdev_unmap(p->dev, p->count); 477 cdev_unmap(p->dev, p->count);
@@ -417,6 +500,11 @@ static struct kobj_type ktype_cdev_dynamic = {
417 .release = cdev_dynamic_release, 500 .release = cdev_dynamic_release,
418}; 501};
419 502
503/**
504 * cdev_alloc() - allocate a cdev structure
505 *
506 * Allocates and returns a cdev structure, or NULL on failure.
507 */
420struct cdev *cdev_alloc(void) 508struct cdev *cdev_alloc(void)
421{ 509{
422 struct cdev *p = kzalloc(sizeof(struct cdev), GFP_KERNEL); 510 struct cdev *p = kzalloc(sizeof(struct cdev), GFP_KERNEL);
@@ -428,6 +516,14 @@ struct cdev *cdev_alloc(void)
428 return p; 516 return p;
429} 517}
430 518
519/**
520 * cdev_init() - initialize a cdev structure
521 * @cdev: the structure to initialize
522 * @fops: the file_operations for this device
523 *
524 * Initializes @cdev, remembering @fops, making it ready to add to the
525 * system with cdev_add().
526 */
431void cdev_init(struct cdev *cdev, const struct file_operations *fops) 527void cdev_init(struct cdev *cdev, const struct file_operations *fops)
432{ 528{
433 memset(cdev, 0, sizeof *cdev); 529 memset(cdev, 0, sizeof *cdev);
@@ -461,3 +557,4 @@ EXPORT_SYMBOL(cdev_del);
461EXPORT_SYMBOL(cdev_add); 557EXPORT_SYMBOL(cdev_add);
462EXPORT_SYMBOL(register_chrdev); 558EXPORT_SYMBOL(register_chrdev);
463EXPORT_SYMBOL(unregister_chrdev); 559EXPORT_SYMBOL(unregister_chrdev);
560EXPORT_SYMBOL(directly_mappable_cdev_bdi);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index c3ef1c0d0e68..22bcf4d7e7ae 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -253,7 +253,6 @@ cifs_alloc_inode(struct super_block *sb)
253 file data or metadata */ 253 file data or metadata */
254 cifs_inode->clientCanCacheRead = FALSE; 254 cifs_inode->clientCanCacheRead = FALSE;
255 cifs_inode->clientCanCacheAll = FALSE; 255 cifs_inode->clientCanCacheAll = FALSE;
256 cifs_inode->vfs_inode.i_blksize = CIFS_MAX_MSGSIZE;
257 cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ 256 cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */
258 cifs_inode->vfs_inode.i_flags = S_NOATIME | S_NOCMTIME; 257 cifs_inode->vfs_inode.i_flags = S_NOATIME | S_NOCMTIME;
259 INIT_LIST_HEAD(&cifs_inode->openFileList); 258 INIT_LIST_HEAD(&cifs_inode->openFileList);
@@ -699,8 +698,7 @@ cifs_init_inodecache(void)
699static void 698static void
700cifs_destroy_inodecache(void) 699cifs_destroy_inodecache(void)
701{ 700{
702 if (kmem_cache_destroy(cifs_inode_cachep)) 701 kmem_cache_destroy(cifs_inode_cachep);
703 printk(KERN_WARNING "cifs_inode_cache: error freeing\n");
704} 702}
705 703
706static int 704static int
@@ -778,13 +776,9 @@ static void
778cifs_destroy_request_bufs(void) 776cifs_destroy_request_bufs(void)
779{ 777{
780 mempool_destroy(cifs_req_poolp); 778 mempool_destroy(cifs_req_poolp);
781 if (kmem_cache_destroy(cifs_req_cachep)) 779 kmem_cache_destroy(cifs_req_cachep);
782 printk(KERN_WARNING
783 "cifs_destroy_request_cache: error not all structures were freed\n");
784 mempool_destroy(cifs_sm_req_poolp); 780 mempool_destroy(cifs_sm_req_poolp);
785 if (kmem_cache_destroy(cifs_sm_req_cachep)) 781 kmem_cache_destroy(cifs_sm_req_cachep);
786 printk(KERN_WARNING
787 "cifs_destroy_request_cache: cifs_small_rq free error\n");
788} 782}
789 783
790static int 784static int
@@ -819,13 +813,8 @@ static void
819cifs_destroy_mids(void) 813cifs_destroy_mids(void)
820{ 814{
821 mempool_destroy(cifs_mid_poolp); 815 mempool_destroy(cifs_mid_poolp);
822 if (kmem_cache_destroy(cifs_mid_cachep)) 816 kmem_cache_destroy(cifs_mid_cachep);
823 printk(KERN_WARNING 817 kmem_cache_destroy(cifs_oplock_cachep);
824 "cifs_destroy_mids: error not all structures were freed\n");
825
826 if (kmem_cache_destroy(cifs_oplock_cachep))
827 printk(KERN_WARNING
828 "error not all oplock structures were freed\n");
829} 818}
830 819
831static int cifs_oplock_thread(void * dummyarg) 820static int cifs_oplock_thread(void * dummyarg)
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 9aeb58a7d369..b27b34537bf2 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -216,10 +216,9 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
216 216
217 if (allocation_size < end_of_file) 217 if (allocation_size < end_of_file)
218 cFYI(1, ("May be sparse file, allocation less than file size")); 218 cFYI(1, ("May be sparse file, allocation less than file size"));
219 cFYI(1, ("File Size %ld and blocks %llu and blocksize %ld", 219 cFYI(1, ("File Size %ld and blocks %llu",
220 (unsigned long)tmp_inode->i_size, 220 (unsigned long)tmp_inode->i_size,
221 (unsigned long long)tmp_inode->i_blocks, 221 (unsigned long long)tmp_inode->i_blocks));
222 tmp_inode->i_blksize));
223 if (S_ISREG(tmp_inode->i_mode)) { 222 if (S_ISREG(tmp_inode->i_mode)) {
224 cFYI(1, ("File inode")); 223 cFYI(1, ("File inode"));
225 tmp_inode->i_op = &cifs_file_inode_ops; 224 tmp_inode->i_op = &cifs_file_inode_ops;
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index 5597080cb811..95a54253c047 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -110,8 +110,6 @@ void coda_vattr_to_iattr(struct inode *inode, struct coda_vattr *attr)
110 inode->i_nlink = attr->va_nlink; 110 inode->i_nlink = attr->va_nlink;
111 if (attr->va_size != -1) 111 if (attr->va_size != -1)
112 inode->i_size = attr->va_size; 112 inode->i_size = attr->va_size;
113 if (attr->va_blocksize != -1)
114 inode->i_blksize = attr->va_blocksize;
115 if (attr->va_size != -1) 113 if (attr->va_size != -1)
116 inode->i_blocks = (attr->va_size + 511) >> 9; 114 inode->i_blocks = (attr->va_size + 511) >> 9;
117 if (attr->va_atime.tv_sec != -1) 115 if (attr->va_atime.tv_sec != -1)
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 71f2ea632e53..8651ea6a23b7 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -513,7 +513,7 @@ static int coda_venus_readdir(struct file *filp, filldir_t filldir,
513 ino_t ino; 513 ino_t ino;
514 int ret, i; 514 int ret, i;
515 515
516 vdir = (struct venus_dirent *)kmalloc(sizeof(*vdir), GFP_KERNEL); 516 vdir = kmalloc(sizeof(*vdir), GFP_KERNEL);
517 if (!vdir) return -ENOMEM; 517 if (!vdir) return -ENOMEM;
518 518
519 i = filp->f_pos; 519 i = filp->f_pos;
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 87f1dc8aa24b..88d123321164 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -80,8 +80,7 @@ int coda_init_inodecache(void)
80 80
81void coda_destroy_inodecache(void) 81void coda_destroy_inodecache(void)
82{ 82{
83 if (kmem_cache_destroy(coda_inode_cachep)) 83 kmem_cache_destroy(coda_inode_cachep);
84 printk(KERN_INFO "coda_inode_cache: not all structures were freed\n");
85} 84}
86 85
87static int coda_remount(struct super_block *sb, int *flags, char *data) 86static int coda_remount(struct super_block *sb, int *flags, char *data)
diff --git a/fs/compat.c b/fs/compat.c
index e31e9cf96647..ce982f6e8c80 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1855,7 +1855,7 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
1855 1855
1856 } while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec)); 1856 } while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec));
1857 1857
1858 if (tsp && !(current->personality & STICKY_TIMEOUTS)) { 1858 if (ret == 0 && tsp && !(current->personality & STICKY_TIMEOUTS)) {
1859 struct compat_timespec rts; 1859 struct compat_timespec rts;
1860 1860
1861 rts.tv_sec = timeout / HZ; 1861 rts.tv_sec = timeout / HZ;
@@ -1866,7 +1866,8 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
1866 } 1866 }
1867 if (compat_timespec_compare(&rts, &ts) >= 0) 1867 if (compat_timespec_compare(&rts, &ts) >= 0)
1868 rts = ts; 1868 rts = ts;
1869 copy_to_user(tsp, &rts, sizeof(rts)); 1869 if (copy_to_user(tsp, &rts, sizeof(rts)))
1870 ret = -EFAULT;
1870 } 1871 }
1871 1872
1872 if (ret == -ERESTARTNOHAND) { 1873 if (ret == -ERESTARTNOHAND) {
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index f499803743e0..85105e50f7db 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -274,9 +274,8 @@ static int check_perm(struct inode * inode, struct file * file)
274 /* No error? Great, allocate a buffer for the file, and store it 274 /* No error? Great, allocate a buffer for the file, and store it
275 * it in file->private_data for easy access. 275 * it in file->private_data for easy access.
276 */ 276 */
277 buffer = kmalloc(sizeof(struct configfs_buffer),GFP_KERNEL); 277 buffer = kzalloc(sizeof(struct configfs_buffer),GFP_KERNEL);
278 if (buffer) { 278 if (buffer) {
279 memset(buffer,0,sizeof(struct configfs_buffer));
280 init_MUTEX(&buffer->sem); 279 init_MUTEX(&buffer->sem);
281 buffer->needs_read_fill = 1; 280 buffer->needs_read_fill = 1;
282 buffer->ops = ops; 281 buffer->ops = ops;
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index e14488ca6411..fb18917954a9 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -76,11 +76,10 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
76 76
77 if (!sd_iattr) { 77 if (!sd_iattr) {
78 /* setting attributes for the first time, allocate now */ 78 /* setting attributes for the first time, allocate now */
79 sd_iattr = kmalloc(sizeof(struct iattr), GFP_KERNEL); 79 sd_iattr = kzalloc(sizeof(struct iattr), GFP_KERNEL);
80 if (!sd_iattr) 80 if (!sd_iattr)
81 return -ENOMEM; 81 return -ENOMEM;
82 /* assign default attributes */ 82 /* assign default attributes */
83 memset(sd_iattr, 0, sizeof(struct iattr));
84 sd_iattr->ia_mode = sd->s_mode; 83 sd_iattr->ia_mode = sd->s_mode;
85 sd_iattr->ia_uid = 0; 84 sd_iattr->ia_uid = 0;
86 sd_iattr->ia_gid = 0; 85 sd_iattr->ia_gid = 0;
@@ -136,7 +135,6 @@ struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd)
136{ 135{
137 struct inode * inode = new_inode(configfs_sb); 136 struct inode * inode = new_inode(configfs_sb);
138 if (inode) { 137 if (inode) {
139 inode->i_blksize = PAGE_CACHE_SIZE;
140 inode->i_blocks = 0; 138 inode->i_blocks = 0;
141 inode->i_mapping->a_ops = &configfs_aops; 139 inode->i_mapping->a_ops = &configfs_aops;
142 inode->i_mapping->backing_dev_info = &configfs_backing_dev_info; 140 inode->i_mapping->backing_dev_info = &configfs_backing_dev_info;
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 223c0431042d..a624c3ec8189 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -73,7 +73,6 @@ static int cramfs_iget5_set(struct inode *inode, void *opaque)
73 inode->i_uid = cramfs_inode->uid; 73 inode->i_uid = cramfs_inode->uid;
74 inode->i_size = cramfs_inode->size; 74 inode->i_size = cramfs_inode->size;
75 inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1; 75 inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1;
76 inode->i_blksize = PAGE_CACHE_SIZE;
77 inode->i_gid = cramfs_inode->gid; 76 inode->i_gid = cramfs_inode->gid;
78 /* Struct copy intentional */ 77 /* Struct copy intentional */
79 inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime; 78 inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime;
@@ -242,11 +241,10 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
242 241
243 sb->s_flags |= MS_RDONLY; 242 sb->s_flags |= MS_RDONLY;
244 243
245 sbi = kmalloc(sizeof(struct cramfs_sb_info), GFP_KERNEL); 244 sbi = kzalloc(sizeof(struct cramfs_sb_info), GFP_KERNEL);
246 if (!sbi) 245 if (!sbi)
247 return -ENOMEM; 246 return -ENOMEM;
248 sb->s_fs_info = sbi; 247 sb->s_fs_info = sbi;
249 memset(sbi, 0, sizeof(struct cramfs_sb_info));
250 248
251 /* Invalidate the read buffers on mount: think disk change.. */ 249 /* Invalidate the read buffers on mount: think disk change.. */
252 mutex_lock(&read_mutex); 250 mutex_lock(&read_mutex);
@@ -545,8 +543,15 @@ static struct file_system_type cramfs_fs_type = {
545 543
546static int __init init_cramfs_fs(void) 544static int __init init_cramfs_fs(void)
547{ 545{
548 cramfs_uncompress_init(); 546 int rv;
549 return register_filesystem(&cramfs_fs_type); 547
548 rv = cramfs_uncompress_init();
549 if (rv < 0)
550 return rv;
551 rv = register_filesystem(&cramfs_fs_type);
552 if (rv < 0)
553 cramfs_uncompress_exit();
554 return rv;
550} 555}
551 556
552static void __exit exit_cramfs_fs(void) 557static void __exit exit_cramfs_fs(void)
diff --git a/fs/cramfs/uncompress.c b/fs/cramfs/uncompress.c
index 8def89f2c438..fc3ccb74626f 100644
--- a/fs/cramfs/uncompress.c
+++ b/fs/cramfs/uncompress.c
@@ -68,11 +68,10 @@ int cramfs_uncompress_init(void)
68 return 0; 68 return 0;
69} 69}
70 70
71int cramfs_uncompress_exit(void) 71void cramfs_uncompress_exit(void)
72{ 72{
73 if (!--initialized) { 73 if (!--initialized) {
74 zlib_inflateEnd(&stream); 74 zlib_inflateEnd(&stream);
75 vfree(stream.workspace); 75 vfree(stream.workspace);
76 } 76 }
77 return 0;
78} 77}
diff --git a/fs/dcache.c b/fs/dcache.c
index 1b4a3a34ec57..17b392a2049e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -828,17 +828,19 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
828 * (or otherwise set) by the caller to indicate that it is now 828 * (or otherwise set) by the caller to indicate that it is now
829 * in use by the dcache. 829 * in use by the dcache.
830 */ 830 */
831struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) 831static struct dentry *__d_instantiate_unique(struct dentry *entry,
832 struct inode *inode)
832{ 833{
833 struct dentry *alias; 834 struct dentry *alias;
834 int len = entry->d_name.len; 835 int len = entry->d_name.len;
835 const char *name = entry->d_name.name; 836 const char *name = entry->d_name.name;
836 unsigned int hash = entry->d_name.hash; 837 unsigned int hash = entry->d_name.hash;
837 838
838 BUG_ON(!list_empty(&entry->d_alias)); 839 if (!inode) {
839 spin_lock(&dcache_lock); 840 entry->d_inode = NULL;
840 if (!inode) 841 return NULL;
841 goto do_negative; 842 }
843
842 list_for_each_entry(alias, &inode->i_dentry, d_alias) { 844 list_for_each_entry(alias, &inode->i_dentry, d_alias) {
843 struct qstr *qstr = &alias->d_name; 845 struct qstr *qstr = &alias->d_name;
844 846
@@ -851,19 +853,35 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
851 if (memcmp(qstr->name, name, len)) 853 if (memcmp(qstr->name, name, len))
852 continue; 854 continue;
853 dget_locked(alias); 855 dget_locked(alias);
854 spin_unlock(&dcache_lock);
855 BUG_ON(!d_unhashed(alias));
856 iput(inode);
857 return alias; 856 return alias;
858 } 857 }
858
859 list_add(&entry->d_alias, &inode->i_dentry); 859 list_add(&entry->d_alias, &inode->i_dentry);
860do_negative:
861 entry->d_inode = inode; 860 entry->d_inode = inode;
862 fsnotify_d_instantiate(entry, inode); 861 fsnotify_d_instantiate(entry, inode);
863 spin_unlock(&dcache_lock);
864 security_d_instantiate(entry, inode);
865 return NULL; 862 return NULL;
866} 863}
864
865struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
866{
867 struct dentry *result;
868
869 BUG_ON(!list_empty(&entry->d_alias));
870
871 spin_lock(&dcache_lock);
872 result = __d_instantiate_unique(entry, inode);
873 spin_unlock(&dcache_lock);
874
875 if (!result) {
876 security_d_instantiate(entry, inode);
877 return NULL;
878 }
879
880 BUG_ON(!d_unhashed(result));
881 iput(inode);
882 return result;
883}
884
867EXPORT_SYMBOL(d_instantiate_unique); 885EXPORT_SYMBOL(d_instantiate_unique);
868 886
869/** 887/**
@@ -1235,6 +1253,11 @@ static void __d_rehash(struct dentry * entry, struct hlist_head *list)
1235 hlist_add_head_rcu(&entry->d_hash, list); 1253 hlist_add_head_rcu(&entry->d_hash, list);
1236} 1254}
1237 1255
1256static void _d_rehash(struct dentry * entry)
1257{
1258 __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
1259}
1260
1238/** 1261/**
1239 * d_rehash - add an entry back to the hash 1262 * d_rehash - add an entry back to the hash
1240 * @entry: dentry to add to the hash 1263 * @entry: dentry to add to the hash
@@ -1244,11 +1267,9 @@ static void __d_rehash(struct dentry * entry, struct hlist_head *list)
1244 1267
1245void d_rehash(struct dentry * entry) 1268void d_rehash(struct dentry * entry)
1246{ 1269{
1247 struct hlist_head *list = d_hash(entry->d_parent, entry->d_name.hash);
1248
1249 spin_lock(&dcache_lock); 1270 spin_lock(&dcache_lock);
1250 spin_lock(&entry->d_lock); 1271 spin_lock(&entry->d_lock);
1251 __d_rehash(entry, list); 1272 _d_rehash(entry);
1252 spin_unlock(&entry->d_lock); 1273 spin_unlock(&entry->d_lock);
1253 spin_unlock(&dcache_lock); 1274 spin_unlock(&dcache_lock);
1254} 1275}
@@ -1386,6 +1407,120 @@ already_unhashed:
1386 spin_unlock(&dcache_lock); 1407 spin_unlock(&dcache_lock);
1387} 1408}
1388 1409
1410/*
1411 * Prepare an anonymous dentry for life in the superblock's dentry tree as a
1412 * named dentry in place of the dentry to be replaced.
1413 */
1414static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
1415{
1416 struct dentry *dparent, *aparent;
1417
1418 switch_names(dentry, anon);
1419 do_switch(dentry->d_name.len, anon->d_name.len);
1420 do_switch(dentry->d_name.hash, anon->d_name.hash);
1421
1422 dparent = dentry->d_parent;
1423 aparent = anon->d_parent;
1424
1425 dentry->d_parent = (aparent == anon) ? dentry : aparent;
1426 list_del(&dentry->d_u.d_child);
1427 if (!IS_ROOT(dentry))
1428 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
1429 else
1430 INIT_LIST_HEAD(&dentry->d_u.d_child);
1431
1432 anon->d_parent = (dparent == dentry) ? anon : dparent;
1433 list_del(&anon->d_u.d_child);
1434 if (!IS_ROOT(anon))
1435 list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs);
1436 else
1437 INIT_LIST_HEAD(&anon->d_u.d_child);
1438
1439 anon->d_flags &= ~DCACHE_DISCONNECTED;
1440}
1441
1442/**
1443 * d_materialise_unique - introduce an inode into the tree
1444 * @dentry: candidate dentry
1445 * @inode: inode to bind to the dentry, to which aliases may be attached
1446 *
1447 * Introduces an dentry into the tree, substituting an extant disconnected
1448 * root directory alias in its place if there is one
1449 */
1450struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
1451{
1452 struct dentry *alias, *actual;
1453
1454 BUG_ON(!d_unhashed(dentry));
1455
1456 spin_lock(&dcache_lock);
1457
1458 if (!inode) {
1459 actual = dentry;
1460 dentry->d_inode = NULL;
1461 goto found_lock;
1462 }
1463
1464 /* See if a disconnected directory already exists as an anonymous root
1465 * that we should splice into the tree instead */
1466 if (S_ISDIR(inode->i_mode) && (alias = __d_find_alias(inode, 1))) {
1467 spin_lock(&alias->d_lock);
1468
1469 /* Is this a mountpoint that we could splice into our tree? */
1470 if (IS_ROOT(alias))
1471 goto connect_mountpoint;
1472
1473 if (alias->d_name.len == dentry->d_name.len &&
1474 alias->d_parent == dentry->d_parent &&
1475 memcmp(alias->d_name.name,
1476 dentry->d_name.name,
1477 dentry->d_name.len) == 0)
1478 goto replace_with_alias;
1479
1480 spin_unlock(&alias->d_lock);
1481
1482 /* Doh! Seem to be aliasing directories for some reason... */
1483 dput(alias);
1484 }
1485
1486 /* Add a unique reference */
1487 actual = __d_instantiate_unique(dentry, inode);
1488 if (!actual)
1489 actual = dentry;
1490 else if (unlikely(!d_unhashed(actual)))
1491 goto shouldnt_be_hashed;
1492
1493found_lock:
1494 spin_lock(&actual->d_lock);
1495found:
1496 _d_rehash(actual);
1497 spin_unlock(&actual->d_lock);
1498 spin_unlock(&dcache_lock);
1499
1500 if (actual == dentry) {
1501 security_d_instantiate(dentry, inode);
1502 return NULL;
1503 }
1504
1505 iput(inode);
1506 return actual;
1507
1508 /* Convert the anonymous/root alias into an ordinary dentry */
1509connect_mountpoint:
1510 __d_materialise_dentry(dentry, alias);
1511
1512 /* Replace the candidate dentry with the alias in the tree */
1513replace_with_alias:
1514 __d_drop(alias);
1515 actual = alias;
1516 goto found;
1517
1518shouldnt_be_hashed:
1519 spin_unlock(&dcache_lock);
1520 BUG();
1521 goto shouldnt_be_hashed;
1522}
1523
1389/** 1524/**
1390 * d_path - return the path of a dentry 1525 * d_path - return the path of a dentry
1391 * @dentry: dentry to report 1526 * @dentry: dentry to report
@@ -1784,6 +1919,7 @@ EXPORT_SYMBOL(d_instantiate);
1784EXPORT_SYMBOL(d_invalidate); 1919EXPORT_SYMBOL(d_invalidate);
1785EXPORT_SYMBOL(d_lookup); 1920EXPORT_SYMBOL(d_lookup);
1786EXPORT_SYMBOL(d_move); 1921EXPORT_SYMBOL(d_move);
1922EXPORT_SYMBOL_GPL(d_materialise_unique);
1787EXPORT_SYMBOL(d_path); 1923EXPORT_SYMBOL(d_path);
1788EXPORT_SYMBOL(d_prune_aliases); 1924EXPORT_SYMBOL(d_prune_aliases);
1789EXPORT_SYMBOL(d_rehash); 1925EXPORT_SYMBOL(d_rehash);
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 39640fd03458..bf3901ab1744 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -32,8 +32,8 @@ static ssize_t default_write_file(struct file *file, const char __user *buf,
32 32
33static int default_open(struct inode *inode, struct file *file) 33static int default_open(struct inode *inode, struct file *file)
34{ 34{
35 if (inode->u.generic_ip) 35 if (inode->i_private)
36 file->private_data = inode->u.generic_ip; 36 file->private_data = inode->i_private;
37 37
38 return 0; 38 return 0;
39} 39}
@@ -55,12 +55,11 @@ static u64 debugfs_u8_get(void *data)
55DEFINE_SIMPLE_ATTRIBUTE(fops_u8, debugfs_u8_get, debugfs_u8_set, "%llu\n"); 55DEFINE_SIMPLE_ATTRIBUTE(fops_u8, debugfs_u8_get, debugfs_u8_set, "%llu\n");
56 56
57/** 57/**
58 * debugfs_create_u8 - create a file in the debugfs filesystem that is used to read and write an unsigned 8 bit value. 58 * debugfs_create_u8 - create a debugfs file that is used to read and write an unsigned 8-bit value
59 *
60 * @name: a pointer to a string containing the name of the file to create. 59 * @name: a pointer to a string containing the name of the file to create.
61 * @mode: the permission that the file should have 60 * @mode: the permission that the file should have
62 * @parent: a pointer to the parent dentry for this file. This should be a 61 * @parent: a pointer to the parent dentry for this file. This should be a
63 * directory dentry if set. If this paramater is NULL, then the 62 * directory dentry if set. If this parameter is %NULL, then the
64 * file will be created in the root of the debugfs filesystem. 63 * file will be created in the root of the debugfs filesystem.
65 * @value: a pointer to the variable that the file should read to and write 64 * @value: a pointer to the variable that the file should read to and write
66 * from. 65 * from.
@@ -72,11 +71,11 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u8, debugfs_u8_get, debugfs_u8_set, "%llu\n");
72 * This function will return a pointer to a dentry if it succeeds. This 71 * This function will return a pointer to a dentry if it succeeds. This
73 * pointer must be passed to the debugfs_remove() function when the file is 72 * pointer must be passed to the debugfs_remove() function when the file is
74 * to be removed (no automatic cleanup happens if your module is unloaded, 73 * to be removed (no automatic cleanup happens if your module is unloaded,
75 * you are responsible here.) If an error occurs, NULL will be returned. 74 * you are responsible here.) If an error occurs, %NULL will be returned.
76 * 75 *
77 * If debugfs is not enabled in the kernel, the value -ENODEV will be 76 * If debugfs is not enabled in the kernel, the value -%ENODEV will be
78 * returned. It is not wise to check for this value, but rather, check for 77 * returned. It is not wise to check for this value, but rather, check for
79 * NULL or !NULL instead as to eliminate the need for #ifdef in the calling 78 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
80 * code. 79 * code.
81 */ 80 */
82struct dentry *debugfs_create_u8(const char *name, mode_t mode, 81struct dentry *debugfs_create_u8(const char *name, mode_t mode,
@@ -97,12 +96,11 @@ static u64 debugfs_u16_get(void *data)
97DEFINE_SIMPLE_ATTRIBUTE(fops_u16, debugfs_u16_get, debugfs_u16_set, "%llu\n"); 96DEFINE_SIMPLE_ATTRIBUTE(fops_u16, debugfs_u16_get, debugfs_u16_set, "%llu\n");
98 97
99/** 98/**
100 * debugfs_create_u16 - create a file in the debugfs filesystem that is used to read and write an unsigned 16 bit value. 99 * debugfs_create_u16 - create a debugfs file that is used to read and write an unsigned 16-bit value
101 *
102 * @name: a pointer to a string containing the name of the file to create. 100 * @name: a pointer to a string containing the name of the file to create.
103 * @mode: the permission that the file should have 101 * @mode: the permission that the file should have
104 * @parent: a pointer to the parent dentry for this file. This should be a 102 * @parent: a pointer to the parent dentry for this file. This should be a
105 * directory dentry if set. If this paramater is NULL, then the 103 * directory dentry if set. If this parameter is %NULL, then the
106 * file will be created in the root of the debugfs filesystem. 104 * file will be created in the root of the debugfs filesystem.
107 * @value: a pointer to the variable that the file should read to and write 105 * @value: a pointer to the variable that the file should read to and write
108 * from. 106 * from.
@@ -114,11 +112,11 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u16, debugfs_u16_get, debugfs_u16_set, "%llu\n");
114 * This function will return a pointer to a dentry if it succeeds. This 112 * This function will return a pointer to a dentry if it succeeds. This
115 * pointer must be passed to the debugfs_remove() function when the file is 113 * pointer must be passed to the debugfs_remove() function when the file is
116 * to be removed (no automatic cleanup happens if your module is unloaded, 114 * to be removed (no automatic cleanup happens if your module is unloaded,
117 * you are responsible here.) If an error occurs, NULL will be returned. 115 * you are responsible here.) If an error occurs, %NULL will be returned.
118 * 116 *
119 * If debugfs is not enabled in the kernel, the value -ENODEV will be 117 * If debugfs is not enabled in the kernel, the value -%ENODEV will be
120 * returned. It is not wise to check for this value, but rather, check for 118 * returned. It is not wise to check for this value, but rather, check for
121 * NULL or !NULL instead as to eliminate the need for #ifdef in the calling 119 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
122 * code. 120 * code.
123 */ 121 */
124struct dentry *debugfs_create_u16(const char *name, mode_t mode, 122struct dentry *debugfs_create_u16(const char *name, mode_t mode,
@@ -139,12 +137,11 @@ static u64 debugfs_u32_get(void *data)
139DEFINE_SIMPLE_ATTRIBUTE(fops_u32, debugfs_u32_get, debugfs_u32_set, "%llu\n"); 137DEFINE_SIMPLE_ATTRIBUTE(fops_u32, debugfs_u32_get, debugfs_u32_set, "%llu\n");
140 138
141/** 139/**
142 * debugfs_create_u32 - create a file in the debugfs filesystem that is used to read and write an unsigned 32 bit value. 140 * debugfs_create_u32 - create a debugfs file that is used to read and write an unsigned 32-bit value
143 *
144 * @name: a pointer to a string containing the name of the file to create. 141 * @name: a pointer to a string containing the name of the file to create.
145 * @mode: the permission that the file should have 142 * @mode: the permission that the file should have
146 * @parent: a pointer to the parent dentry for this file. This should be a 143 * @parent: a pointer to the parent dentry for this file. This should be a
147 * directory dentry if set. If this paramater is NULL, then the 144 * directory dentry if set. If this parameter is %NULL, then the
148 * file will be created in the root of the debugfs filesystem. 145 * file will be created in the root of the debugfs filesystem.
149 * @value: a pointer to the variable that the file should read to and write 146 * @value: a pointer to the variable that the file should read to and write
150 * from. 147 * from.
@@ -156,11 +153,11 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u32, debugfs_u32_get, debugfs_u32_set, "%llu\n");
156 * This function will return a pointer to a dentry if it succeeds. This 153 * This function will return a pointer to a dentry if it succeeds. This
157 * pointer must be passed to the debugfs_remove() function when the file is 154 * pointer must be passed to the debugfs_remove() function when the file is
158 * to be removed (no automatic cleanup happens if your module is unloaded, 155 * to be removed (no automatic cleanup happens if your module is unloaded,
159 * you are responsible here.) If an error occurs, NULL will be returned. 156 * you are responsible here.) If an error occurs, %NULL will be returned.
160 * 157 *
161 * If debugfs is not enabled in the kernel, the value -ENODEV will be 158 * If debugfs is not enabled in the kernel, the value -%ENODEV will be
162 * returned. It is not wise to check for this value, but rather, check for 159 * returned. It is not wise to check for this value, but rather, check for
163 * NULL or !NULL instead as to eliminate the need for #ifdef in the calling 160 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
164 * code. 161 * code.
165 */ 162 */
166struct dentry *debugfs_create_u32(const char *name, mode_t mode, 163struct dentry *debugfs_create_u32(const char *name, mode_t mode,
@@ -219,12 +216,11 @@ static const struct file_operations fops_bool = {
219}; 216};
220 217
221/** 218/**
222 * debugfs_create_bool - create a file in the debugfs filesystem that is used to read and write a boolean value. 219 * debugfs_create_bool - create a debugfs file that is used to read and write a boolean value
223 *
224 * @name: a pointer to a string containing the name of the file to create. 220 * @name: a pointer to a string containing the name of the file to create.
225 * @mode: the permission that the file should have 221 * @mode: the permission that the file should have
226 * @parent: a pointer to the parent dentry for this file. This should be a 222 * @parent: a pointer to the parent dentry for this file. This should be a
227 * directory dentry if set. If this paramater is NULL, then the 223 * directory dentry if set. If this parameter is %NULL, then the
228 * file will be created in the root of the debugfs filesystem. 224 * file will be created in the root of the debugfs filesystem.
229 * @value: a pointer to the variable that the file should read to and write 225 * @value: a pointer to the variable that the file should read to and write
230 * from. 226 * from.
@@ -236,11 +232,11 @@ static const struct file_operations fops_bool = {
236 * This function will return a pointer to a dentry if it succeeds. This 232 * This function will return a pointer to a dentry if it succeeds. This
237 * pointer must be passed to the debugfs_remove() function when the file is 233 * pointer must be passed to the debugfs_remove() function when the file is
238 * to be removed (no automatic cleanup happens if your module is unloaded, 234 * to be removed (no automatic cleanup happens if your module is unloaded,
239 * you are responsible here.) If an error occurs, NULL will be returned. 235 * you are responsible here.) If an error occurs, %NULL will be returned.
240 * 236 *
241 * If debugfs is not enabled in the kernel, the value -ENODEV will be 237 * If debugfs is not enabled in the kernel, the value -%ENODEV will be
242 * returned. It is not wise to check for this value, but rather, check for 238 * returned. It is not wise to check for this value, but rather, check for
243 * NULL or !NULL instead as to eliminate the need for #ifdef in the calling 239 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
244 * code. 240 * code.
245 */ 241 */
246struct dentry *debugfs_create_bool(const char *name, mode_t mode, 242struct dentry *debugfs_create_bool(const char *name, mode_t mode,
@@ -264,13 +260,11 @@ static struct file_operations fops_blob = {
264}; 260};
265 261
266/** 262/**
267 * debugfs_create_blob - create a file in the debugfs filesystem that is 263 * debugfs_create_blob - create a debugfs file that is used to read and write a binary blob
268 * used to read and write a binary blob.
269 *
270 * @name: a pointer to a string containing the name of the file to create. 264 * @name: a pointer to a string containing the name of the file to create.
271 * @mode: the permission that the file should have 265 * @mode: the permission that the file should have
272 * @parent: a pointer to the parent dentry for this file. This should be a 266 * @parent: a pointer to the parent dentry for this file. This should be a
273 * directory dentry if set. If this paramater is NULL, then the 267 * directory dentry if set. If this parameter is %NULL, then the
274 * file will be created in the root of the debugfs filesystem. 268 * file will be created in the root of the debugfs filesystem.
275 * @blob: a pointer to a struct debugfs_blob_wrapper which contains a pointer 269 * @blob: a pointer to a struct debugfs_blob_wrapper which contains a pointer
276 * to the blob data and the size of the data. 270 * to the blob data and the size of the data.
@@ -282,11 +276,11 @@ static struct file_operations fops_blob = {
282 * This function will return a pointer to a dentry if it succeeds. This 276 * This function will return a pointer to a dentry if it succeeds. This
283 * pointer must be passed to the debugfs_remove() function when the file is 277 * pointer must be passed to the debugfs_remove() function when the file is
284 * to be removed (no automatic cleanup happens if your module is unloaded, 278 * to be removed (no automatic cleanup happens if your module is unloaded,
285 * you are responsible here.) If an error occurs, NULL will be returned. 279 * you are responsible here.) If an error occurs, %NULL will be returned.
286 * 280 *
287 * If debugfs is not enabled in the kernel, the value -ENODEV will be 281 * If debugfs is not enabled in the kernel, the value -%ENODEV will be
288 * returned. It is not wise to check for this value, but rather, check for 282 * returned. It is not wise to check for this value, but rather, check for
289 * NULL or !NULL instead as to eliminate the need for #ifdef in the calling 283 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
290 * code. 284 * code.
291 */ 285 */
292struct dentry *debugfs_create_blob(const char *name, mode_t mode, 286struct dentry *debugfs_create_blob(const char *name, mode_t mode,
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index e8ae3042b806..269e649e6dc6 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -40,7 +40,6 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d
40 inode->i_mode = mode; 40 inode->i_mode = mode;
41 inode->i_uid = 0; 41 inode->i_uid = 0;
42 inode->i_gid = 0; 42 inode->i_gid = 0;
43 inode->i_blksize = PAGE_CACHE_SIZE;
44 inode->i_blocks = 0; 43 inode->i_blocks = 0;
45 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 44 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
46 switch (mode & S_IFMT) { 45 switch (mode & S_IFMT) {
@@ -162,14 +161,13 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
162 161
163/** 162/**
164 * debugfs_create_file - create a file in the debugfs filesystem 163 * debugfs_create_file - create a file in the debugfs filesystem
165 *
166 * @name: a pointer to a string containing the name of the file to create. 164 * @name: a pointer to a string containing the name of the file to create.
167 * @mode: the permission that the file should have 165 * @mode: the permission that the file should have
168 * @parent: a pointer to the parent dentry for this file. This should be a 166 * @parent: a pointer to the parent dentry for this file. This should be a
169 * directory dentry if set. If this paramater is NULL, then the 167 * directory dentry if set. If this paramater is NULL, then the
170 * file will be created in the root of the debugfs filesystem. 168 * file will be created in the root of the debugfs filesystem.
171 * @data: a pointer to something that the caller will want to get to later 169 * @data: a pointer to something that the caller will want to get to later
172 * on. The inode.u.generic_ip pointer will point to this value on 170 * on. The inode.i_private pointer will point to this value on
173 * the open() call. 171 * the open() call.
174 * @fops: a pointer to a struct file_operations that should be used for 172 * @fops: a pointer to a struct file_operations that should be used for
175 * this file. 173 * this file.
@@ -182,11 +180,11 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
182 * This function will return a pointer to a dentry if it succeeds. This 180 * This function will return a pointer to a dentry if it succeeds. This
183 * pointer must be passed to the debugfs_remove() function when the file is 181 * pointer must be passed to the debugfs_remove() function when the file is
184 * to be removed (no automatic cleanup happens if your module is unloaded, 182 * to be removed (no automatic cleanup happens if your module is unloaded,
185 * you are responsible here.) If an error occurs, NULL will be returned. 183 * you are responsible here.) If an error occurs, %NULL will be returned.
186 * 184 *
187 * If debugfs is not enabled in the kernel, the value -ENODEV will be 185 * If debugfs is not enabled in the kernel, the value -%ENODEV will be
188 * returned. It is not wise to check for this value, but rather, check for 186 * returned. It is not wise to check for this value, but rather, check for
189 * NULL or !NULL instead as to eliminate the need for #ifdef in the calling 187 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
190 * code. 188 * code.
191 */ 189 */
192struct dentry *debugfs_create_file(const char *name, mode_t mode, 190struct dentry *debugfs_create_file(const char *name, mode_t mode,
@@ -210,7 +208,7 @@ struct dentry *debugfs_create_file(const char *name, mode_t mode,
210 208
211 if (dentry->d_inode) { 209 if (dentry->d_inode) {
212 if (data) 210 if (data)
213 dentry->d_inode->u.generic_ip = data; 211 dentry->d_inode->i_private = data;
214 if (fops) 212 if (fops)
215 dentry->d_inode->i_fop = fops; 213 dentry->d_inode->i_fop = fops;
216 } 214 }
@@ -221,7 +219,6 @@ EXPORT_SYMBOL_GPL(debugfs_create_file);
221 219
222/** 220/**
223 * debugfs_create_dir - create a directory in the debugfs filesystem 221 * debugfs_create_dir - create a directory in the debugfs filesystem
224 *
225 * @name: a pointer to a string containing the name of the directory to 222 * @name: a pointer to a string containing the name of the directory to
226 * create. 223 * create.
227 * @parent: a pointer to the parent dentry for this file. This should be a 224 * @parent: a pointer to the parent dentry for this file. This should be a
@@ -233,11 +230,11 @@ EXPORT_SYMBOL_GPL(debugfs_create_file);
233 * This function will return a pointer to a dentry if it succeeds. This 230 * This function will return a pointer to a dentry if it succeeds. This
234 * pointer must be passed to the debugfs_remove() function when the file is 231 * pointer must be passed to the debugfs_remove() function when the file is
235 * to be removed (no automatic cleanup happens if your module is unloaded, 232 * to be removed (no automatic cleanup happens if your module is unloaded,
236 * you are responsible here.) If an error occurs, NULL will be returned. 233 * you are responsible here.) If an error occurs, %NULL will be returned.
237 * 234 *
238 * If debugfs is not enabled in the kernel, the value -ENODEV will be 235 * If debugfs is not enabled in the kernel, the value -%ENODEV will be
239 * returned. It is not wise to check for this value, but rather, check for 236 * returned. It is not wise to check for this value, but rather, check for
240 * NULL or !NULL instead as to eliminate the need for #ifdef in the calling 237 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
241 * code. 238 * code.
242 */ 239 */
243struct dentry *debugfs_create_dir(const char *name, struct dentry *parent) 240struct dentry *debugfs_create_dir(const char *name, struct dentry *parent)
@@ -250,7 +247,6 @@ EXPORT_SYMBOL_GPL(debugfs_create_dir);
250 247
251/** 248/**
252 * debugfs_remove - removes a file or directory from the debugfs filesystem 249 * debugfs_remove - removes a file or directory from the debugfs filesystem
253 *
254 * @dentry: a pointer to a the dentry of the file or directory to be 250 * @dentry: a pointer to a the dentry of the file or directory to be
255 * removed. 251 * removed.
256 * 252 *
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index f7aef5bb584a..5f7b5a6025bf 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -113,7 +113,6 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
113 inode->i_ino = 1; 113 inode->i_ino = 1;
114 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 114 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
115 inode->i_blocks = 0; 115 inode->i_blocks = 0;
116 inode->i_blksize = 1024;
117 inode->i_uid = inode->i_gid = 0; 116 inode->i_uid = inode->i_gid = 0;
118 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; 117 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
119 inode->i_op = &simple_dir_inode_operations; 118 inode->i_op = &simple_dir_inode_operations;
@@ -172,12 +171,11 @@ int devpts_pty_new(struct tty_struct *tty)
172 return -ENOMEM; 171 return -ENOMEM;
173 172
174 inode->i_ino = number+2; 173 inode->i_ino = number+2;
175 inode->i_blksize = 1024;
176 inode->i_uid = config.setuid ? config.uid : current->fsuid; 174 inode->i_uid = config.setuid ? config.uid : current->fsuid;
177 inode->i_gid = config.setgid ? config.gid : current->fsgid; 175 inode->i_gid = config.setgid ? config.gid : current->fsgid;
178 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 176 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
179 init_special_inode(inode, S_IFCHR|config.mode, device); 177 init_special_inode(inode, S_IFCHR|config.mode, device);
180 inode->u.generic_ip = tty; 178 inode->i_private = tty;
181 179
182 dentry = get_node(number); 180 dentry = get_node(number);
183 if (!IS_ERR(dentry) && !dentry->d_inode) 181 if (!IS_ERR(dentry) && !dentry->d_inode)
@@ -196,7 +194,7 @@ struct tty_struct *devpts_get_tty(int number)
196 tty = NULL; 194 tty = NULL;
197 if (!IS_ERR(dentry)) { 195 if (!IS_ERR(dentry)) {
198 if (dentry->d_inode) 196 if (dentry->d_inode)
199 tty = dentry->d_inode->u.generic_ip; 197 tty = dentry->d_inode->i_private;
200 dput(dentry); 198 dput(dentry);
201 } 199 }
202 200
diff --git a/fs/dquot.c b/fs/dquot.c
index 0122a279106a..9af789567e51 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -834,6 +834,9 @@ static void print_warning(struct dquot *dquot, const char warntype)
834 if (!need_print_warning(dquot) || (flag && test_and_set_bit(flag, &dquot->dq_flags))) 834 if (!need_print_warning(dquot) || (flag && test_and_set_bit(flag, &dquot->dq_flags)))
835 return; 835 return;
836 836
837 mutex_lock(&tty_mutex);
838 if (!current->signal->tty)
839 goto out_lock;
837 tty_write_message(current->signal->tty, dquot->dq_sb->s_id); 840 tty_write_message(current->signal->tty, dquot->dq_sb->s_id);
838 if (warntype == ISOFTWARN || warntype == BSOFTWARN) 841 if (warntype == ISOFTWARN || warntype == BSOFTWARN)
839 tty_write_message(current->signal->tty, ": warning, "); 842 tty_write_message(current->signal->tty, ": warning, ");
@@ -861,6 +864,8 @@ static void print_warning(struct dquot *dquot, const char warntype)
861 break; 864 break;
862 } 865 }
863 tty_write_message(current->signal->tty, msg); 866 tty_write_message(current->signal->tty, msg);
867out_lock:
868 mutex_unlock(&tty_mutex);
864} 869}
865 870
866static inline void flush_warnings(struct dquot **dquots, char *warntype) 871static inline void flush_warnings(struct dquot **dquots, char *warntype)
diff --git a/fs/efs/super.c b/fs/efs/super.c
index 8ac2462ae5dd..b3f50651eb6b 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -90,8 +90,7 @@ static int init_inodecache(void)
90 90
91static void destroy_inodecache(void) 91static void destroy_inodecache(void)
92{ 92{
93 if (kmem_cache_destroy(efs_inode_cachep)) 93 kmem_cache_destroy(efs_inode_cachep);
94 printk(KERN_INFO "efs_inode_cache: not all structures were freed\n");
95} 94}
96 95
97static void efs_put_super(struct super_block *s) 96static void efs_put_super(struct super_block *s)
@@ -248,11 +247,10 @@ static int efs_fill_super(struct super_block *s, void *d, int silent)
248 struct buffer_head *bh; 247 struct buffer_head *bh;
249 struct inode *root; 248 struct inode *root;
250 249
251 sb = kmalloc(sizeof(struct efs_sb_info), GFP_KERNEL); 250 sb = kzalloc(sizeof(struct efs_sb_info), GFP_KERNEL);
252 if (!sb) 251 if (!sb)
253 return -ENOMEM; 252 return -ENOMEM;
254 s->s_fs_info = sb; 253 s->s_fs_info = sb;
255 memset(sb, 0, sizeof(struct efs_sb_info));
256 254
257 s->s_magic = EFS_SUPER_MAGIC; 255 s->s_magic = EFS_SUPER_MAGIC;
258 if (!sb_set_blocksize(s, EFS_BLOCKSIZE)) { 256 if (!sb_set_blocksize(s, EFS_BLOCKSIZE)) {
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 3a3567433b92..8d544334bcd2 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1590,7 +1590,6 @@ static struct inode *ep_eventpoll_inode(void)
1590 inode->i_uid = current->fsuid; 1590 inode->i_uid = current->fsuid;
1591 inode->i_gid = current->fsgid; 1591 inode->i_gid = current->fsgid;
1592 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 1592 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
1593 inode->i_blksize = PAGE_SIZE;
1594 return inode; 1593 return inode;
1595 1594
1596eexit_1: 1595eexit_1:
diff --git a/fs/exec.c b/fs/exec.c
index 54135df2a966..a8efe35176b0 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -595,7 +595,7 @@ static int de_thread(struct task_struct *tsk)
595 if (!newsighand) 595 if (!newsighand)
596 return -ENOMEM; 596 return -ENOMEM;
597 597
598 if (thread_group_empty(current)) 598 if (thread_group_empty(tsk))
599 goto no_thread_group; 599 goto no_thread_group;
600 600
601 /* 601 /*
@@ -620,17 +620,17 @@ static int de_thread(struct task_struct *tsk)
620 * Reparenting needs write_lock on tasklist_lock, 620 * Reparenting needs write_lock on tasklist_lock,
621 * so it is safe to do it under read_lock. 621 * so it is safe to do it under read_lock.
622 */ 622 */
623 if (unlikely(current->group_leader == child_reaper)) 623 if (unlikely(tsk->group_leader == child_reaper))
624 child_reaper = current; 624 child_reaper = tsk;
625 625
626 zap_other_threads(current); 626 zap_other_threads(tsk);
627 read_unlock(&tasklist_lock); 627 read_unlock(&tasklist_lock);
628 628
629 /* 629 /*
630 * Account for the thread group leader hanging around: 630 * Account for the thread group leader hanging around:
631 */ 631 */
632 count = 1; 632 count = 1;
633 if (!thread_group_leader(current)) { 633 if (!thread_group_leader(tsk)) {
634 count = 2; 634 count = 2;
635 /* 635 /*
636 * The SIGALRM timer survives the exec, but needs to point 636 * The SIGALRM timer survives the exec, but needs to point
@@ -639,14 +639,14 @@ static int de_thread(struct task_struct *tsk)
639 * synchronize with any firing (by calling del_timer_sync) 639 * synchronize with any firing (by calling del_timer_sync)
640 * before we can safely let the old group leader die. 640 * before we can safely let the old group leader die.
641 */ 641 */
642 sig->tsk = current; 642 sig->tsk = tsk;
643 spin_unlock_irq(lock); 643 spin_unlock_irq(lock);
644 if (hrtimer_cancel(&sig->real_timer)) 644 if (hrtimer_cancel(&sig->real_timer))
645 hrtimer_restart(&sig->real_timer); 645 hrtimer_restart(&sig->real_timer);
646 spin_lock_irq(lock); 646 spin_lock_irq(lock);
647 } 647 }
648 while (atomic_read(&sig->count) > count) { 648 while (atomic_read(&sig->count) > count) {
649 sig->group_exit_task = current; 649 sig->group_exit_task = tsk;
650 sig->notify_count = count; 650 sig->notify_count = count;
651 __set_current_state(TASK_UNINTERRUPTIBLE); 651 __set_current_state(TASK_UNINTERRUPTIBLE);
652 spin_unlock_irq(lock); 652 spin_unlock_irq(lock);
@@ -662,13 +662,13 @@ static int de_thread(struct task_struct *tsk)
662 * do is to wait for the thread group leader to become inactive, 662 * do is to wait for the thread group leader to become inactive,
663 * and to assume its PID: 663 * and to assume its PID:
664 */ 664 */
665 if (!thread_group_leader(current)) { 665 if (!thread_group_leader(tsk)) {
666 /* 666 /*
667 * Wait for the thread group leader to be a zombie. 667 * Wait for the thread group leader to be a zombie.
668 * It should already be zombie at this point, most 668 * It should already be zombie at this point, most
669 * of the time. 669 * of the time.
670 */ 670 */
671 leader = current->group_leader; 671 leader = tsk->group_leader;
672 while (leader->exit_state != EXIT_ZOMBIE) 672 while (leader->exit_state != EXIT_ZOMBIE)
673 yield(); 673 yield();
674 674
@@ -682,12 +682,12 @@ static int de_thread(struct task_struct *tsk)
682 * When we take on its identity by switching to its PID, we 682 * When we take on its identity by switching to its PID, we
683 * also take its birthdate (always earlier than our own). 683 * also take its birthdate (always earlier than our own).
684 */ 684 */
685 current->start_time = leader->start_time; 685 tsk->start_time = leader->start_time;
686 686
687 write_lock_irq(&tasklist_lock); 687 write_lock_irq(&tasklist_lock);
688 688
689 BUG_ON(leader->tgid != current->tgid); 689 BUG_ON(leader->tgid != tsk->tgid);
690 BUG_ON(current->pid == current->tgid); 690 BUG_ON(tsk->pid == tsk->tgid);
691 /* 691 /*
692 * An exec() starts a new thread group with the 692 * An exec() starts a new thread group with the
693 * TGID of the previous thread group. Rehash the 693 * TGID of the previous thread group. Rehash the
@@ -696,24 +696,21 @@ static int de_thread(struct task_struct *tsk)
696 */ 696 */
697 697
698 /* Become a process group leader with the old leader's pid. 698 /* Become a process group leader with the old leader's pid.
699 * Note: The old leader also uses thispid until release_task 699 * The old leader becomes a thread of the this thread group.
700 * Note: The old leader also uses this pid until release_task
700 * is called. Odd but simple and correct. 701 * is called. Odd but simple and correct.
701 */ 702 */
702 detach_pid(current, PIDTYPE_PID); 703 detach_pid(tsk, PIDTYPE_PID);
703 current->pid = leader->pid; 704 tsk->pid = leader->pid;
704 attach_pid(current, PIDTYPE_PID, current->pid); 705 attach_pid(tsk, PIDTYPE_PID, tsk->pid);
705 attach_pid(current, PIDTYPE_PGID, current->signal->pgrp); 706 transfer_pid(leader, tsk, PIDTYPE_PGID);
706 attach_pid(current, PIDTYPE_SID, current->signal->session); 707 transfer_pid(leader, tsk, PIDTYPE_SID);
707 list_replace_rcu(&leader->tasks, &current->tasks); 708 list_replace_rcu(&leader->tasks, &tsk->tasks);
708 709
709 current->group_leader = current; 710 tsk->group_leader = tsk;
710 leader->group_leader = current; 711 leader->group_leader = tsk;
711 712
712 /* Reduce leader to a thread */ 713 tsk->exit_signal = SIGCHLD;
713 detach_pid(leader, PIDTYPE_PGID);
714 detach_pid(leader, PIDTYPE_SID);
715
716 current->exit_signal = SIGCHLD;
717 714
718 BUG_ON(leader->exit_state != EXIT_ZOMBIE); 715 BUG_ON(leader->exit_state != EXIT_ZOMBIE);
719 leader->exit_state = EXIT_DEAD; 716 leader->exit_state = EXIT_DEAD;
@@ -753,7 +750,7 @@ no_thread_group:
753 spin_lock(&oldsighand->siglock); 750 spin_lock(&oldsighand->siglock);
754 spin_lock_nested(&newsighand->siglock, SINGLE_DEPTH_NESTING); 751 spin_lock_nested(&newsighand->siglock, SINGLE_DEPTH_NESTING);
755 752
756 rcu_assign_pointer(current->sighand, newsighand); 753 rcu_assign_pointer(tsk->sighand, newsighand);
757 recalc_sigpending(); 754 recalc_sigpending();
758 755
759 spin_unlock(&newsighand->siglock); 756 spin_unlock(&newsighand->siglock);
@@ -764,7 +761,7 @@ no_thread_group:
764 kmem_cache_free(sighand_cachep, oldsighand); 761 kmem_cache_free(sighand_cachep, oldsighand);
765 } 762 }
766 763
767 BUG_ON(!thread_group_leader(current)); 764 BUG_ON(!thread_group_leader(tsk));
768 return 0; 765 return 0;
769} 766}
770 767
@@ -901,8 +898,7 @@ int flush_old_exec(struct linux_binprm * bprm)
901 return 0; 898 return 0;
902 899
903mmap_failed: 900mmap_failed:
904 put_files_struct(current->files); 901 reset_files_struct(current, files);
905 current->files = files;
906out: 902out:
907 return retval; 903 return retval;
908} 904}
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index da52b4a5db64..7c420b800c34 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -89,8 +89,8 @@ ext2_acl_to_disk(const struct posix_acl *acl, size_t *size)
89 size_t n; 89 size_t n;
90 90
91 *size = ext2_acl_size(acl->a_count); 91 *size = ext2_acl_size(acl->a_count);
92 ext_acl = (ext2_acl_header *)kmalloc(sizeof(ext2_acl_header) + 92 ext_acl = kmalloc(sizeof(ext2_acl_header) + acl->a_count *
93 acl->a_count * sizeof(ext2_acl_entry), GFP_KERNEL); 93 sizeof(ext2_acl_entry), GFP_KERNEL);
94 if (!ext_acl) 94 if (!ext_acl)
95 return ERR_PTR(-ENOMEM); 95 return ERR_PTR(-ENOMEM);
96 ext_acl->a_version = cpu_to_le32(EXT2_ACL_VERSION); 96 ext_acl->a_version = cpu_to_le32(EXT2_ACL_VERSION);
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 695f69ccf908..2cb545bf0f3c 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -574,7 +574,6 @@ got:
574 inode->i_mode = mode; 574 inode->i_mode = mode;
575 575
576 inode->i_ino = ino; 576 inode->i_ino = ino;
577 inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size (for stat), not the fs block size */
578 inode->i_blocks = 0; 577 inode->i_blocks = 0;
579 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 578 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
580 memset(ei->i_data, 0, sizeof(ei->i_data)); 579 memset(ei->i_data, 0, sizeof(ei->i_data));
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index fb4d3220eb8d..dd4e14c221e0 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1094,7 +1094,6 @@ void ext2_read_inode (struct inode * inode)
1094 brelse (bh); 1094 brelse (bh);
1095 goto bad_inode; 1095 goto bad_inode;
1096 } 1096 }
1097 inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size (for stat), not the fs block size */
1098 inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); 1097 inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
1099 ei->i_flags = le32_to_cpu(raw_inode->i_flags); 1098 ei->i_flags = le32_to_cpu(raw_inode->i_flags);
1100 ei->i_faddr = le32_to_cpu(raw_inode->i_faddr); 1099 ei->i_faddr = le32_to_cpu(raw_inode->i_faddr);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 4286ff6330b6..513cd421ac0b 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -184,8 +184,7 @@ static int init_inodecache(void)
184 184
185static void destroy_inodecache(void) 185static void destroy_inodecache(void)
186{ 186{
187 if (kmem_cache_destroy(ext2_inode_cachep)) 187 kmem_cache_destroy(ext2_inode_cachep);
188 printk(KERN_INFO "ext2_inode_cache: not all structures were freed\n");
189} 188}
190 189
191static void ext2_clear_inode(struct inode *inode) 190static void ext2_clear_inode(struct inode *inode)
@@ -544,17 +543,24 @@ static int ext2_check_descriptors (struct super_block * sb)
544 int i; 543 int i;
545 int desc_block = 0; 544 int desc_block = 0;
546 struct ext2_sb_info *sbi = EXT2_SB(sb); 545 struct ext2_sb_info *sbi = EXT2_SB(sb);
547 unsigned long block = le32_to_cpu(sbi->s_es->s_first_data_block); 546 unsigned long first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
547 unsigned long last_block;
548 struct ext2_group_desc * gdp = NULL; 548 struct ext2_group_desc * gdp = NULL;
549 549
550 ext2_debug ("Checking group descriptors"); 550 ext2_debug ("Checking group descriptors");
551 551
552 for (i = 0; i < sbi->s_groups_count; i++) 552 for (i = 0; i < sbi->s_groups_count; i++)
553 { 553 {
554 if (i == sbi->s_groups_count - 1)
555 last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1;
556 else
557 last_block = first_block +
558 (EXT2_BLOCKS_PER_GROUP(sb) - 1);
559
554 if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0) 560 if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0)
555 gdp = (struct ext2_group_desc *) sbi->s_group_desc[desc_block++]->b_data; 561 gdp = (struct ext2_group_desc *) sbi->s_group_desc[desc_block++]->b_data;
556 if (le32_to_cpu(gdp->bg_block_bitmap) < block || 562 if (le32_to_cpu(gdp->bg_block_bitmap) < first_block ||
557 le32_to_cpu(gdp->bg_block_bitmap) >= block + EXT2_BLOCKS_PER_GROUP(sb)) 563 le32_to_cpu(gdp->bg_block_bitmap) > last_block)
558 { 564 {
559 ext2_error (sb, "ext2_check_descriptors", 565 ext2_error (sb, "ext2_check_descriptors",
560 "Block bitmap for group %d" 566 "Block bitmap for group %d"
@@ -562,8 +568,8 @@ static int ext2_check_descriptors (struct super_block * sb)
562 i, (unsigned long) le32_to_cpu(gdp->bg_block_bitmap)); 568 i, (unsigned long) le32_to_cpu(gdp->bg_block_bitmap));
563 return 0; 569 return 0;
564 } 570 }
565 if (le32_to_cpu(gdp->bg_inode_bitmap) < block || 571 if (le32_to_cpu(gdp->bg_inode_bitmap) < first_block ||
566 le32_to_cpu(gdp->bg_inode_bitmap) >= block + EXT2_BLOCKS_PER_GROUP(sb)) 572 le32_to_cpu(gdp->bg_inode_bitmap) > last_block)
567 { 573 {
568 ext2_error (sb, "ext2_check_descriptors", 574 ext2_error (sb, "ext2_check_descriptors",
569 "Inode bitmap for group %d" 575 "Inode bitmap for group %d"
@@ -571,9 +577,9 @@ static int ext2_check_descriptors (struct super_block * sb)
571 i, (unsigned long) le32_to_cpu(gdp->bg_inode_bitmap)); 577 i, (unsigned long) le32_to_cpu(gdp->bg_inode_bitmap));
572 return 0; 578 return 0;
573 } 579 }
574 if (le32_to_cpu(gdp->bg_inode_table) < block || 580 if (le32_to_cpu(gdp->bg_inode_table) < first_block ||
575 le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group >= 581 le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group >
576 block + EXT2_BLOCKS_PER_GROUP(sb)) 582 last_block)
577 { 583 {
578 ext2_error (sb, "ext2_check_descriptors", 584 ext2_error (sb, "ext2_check_descriptors",
579 "Inode table for group %d" 585 "Inode table for group %d"
@@ -581,7 +587,7 @@ static int ext2_check_descriptors (struct super_block * sb)
581 i, (unsigned long) le32_to_cpu(gdp->bg_inode_table)); 587 i, (unsigned long) le32_to_cpu(gdp->bg_inode_table));
582 return 0; 588 return 0;
583 } 589 }
584 block += EXT2_BLOCKS_PER_GROUP(sb); 590 first_block += EXT2_BLOCKS_PER_GROUP(sb);
585 gdp++; 591 gdp++;
586 } 592 }
587 return 1; 593 return 1;
@@ -648,11 +654,10 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
648 int i, j; 654 int i, j;
649 __le32 features; 655 __le32 features;
650 656
651 sbi = kmalloc(sizeof(*sbi), GFP_KERNEL); 657 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
652 if (!sbi) 658 if (!sbi)
653 return -ENOMEM; 659 return -ENOMEM;
654 sb->s_fs_info = sbi; 660 sb->s_fs_info = sbi;
655 memset(sbi, 0, sizeof(*sbi));
656 661
657 /* 662 /*
658 * See what the current blocksize for the device is, and 663 * See what the current blocksize for the device is, and
@@ -861,10 +866,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
861 866
862 if (EXT2_BLOCKS_PER_GROUP(sb) == 0) 867 if (EXT2_BLOCKS_PER_GROUP(sb) == 0)
863 goto cantfind_ext2; 868 goto cantfind_ext2;
864 sbi->s_groups_count = (le32_to_cpu(es->s_blocks_count) - 869 sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
865 le32_to_cpu(es->s_first_data_block) + 870 le32_to_cpu(es->s_first_data_block) - 1)
866 EXT2_BLOCKS_PER_GROUP(sb) - 1) / 871 / EXT2_BLOCKS_PER_GROUP(sb)) + 1;
867 EXT2_BLOCKS_PER_GROUP(sb);
868 db_count = (sbi->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) / 872 db_count = (sbi->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) /
869 EXT2_DESC_PER_BLOCK(sb); 873 EXT2_DESC_PER_BLOCK(sb);
870 sbi->s_group_desc = kmalloc (db_count * sizeof (struct buffer_head *), GFP_KERNEL); 874 sbi->s_group_desc = kmalloc (db_count * sizeof (struct buffer_head *), GFP_KERNEL);
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 86ae8e93adb9..af52a7f8b291 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -521,11 +521,10 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
521 } 521 }
522 } else { 522 } else {
523 /* Allocate a buffer where we construct the new block. */ 523 /* Allocate a buffer where we construct the new block. */
524 header = kmalloc(sb->s_blocksize, GFP_KERNEL); 524 header = kzalloc(sb->s_blocksize, GFP_KERNEL);
525 error = -ENOMEM; 525 error = -ENOMEM;
526 if (header == NULL) 526 if (header == NULL)
527 goto cleanup; 527 goto cleanup;
528 memset(header, 0, sb->s_blocksize);
529 end = (char *)header + sb->s_blocksize; 528 end = (char *)header + sb->s_blocksize;
530 header->h_magic = cpu_to_le32(EXT2_XATTR_MAGIC); 529 header->h_magic = cpu_to_le32(EXT2_XATTR_MAGIC);
531 header->h_blocks = header->h_refcount = cpu_to_le32(1); 530 header->h_blocks = header->h_refcount = cpu_to_le32(1);
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 0d21d558b87a..1e5038d9a01b 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -90,8 +90,8 @@ ext3_acl_to_disk(const struct posix_acl *acl, size_t *size)
90 size_t n; 90 size_t n;
91 91
92 *size = ext3_acl_size(acl->a_count); 92 *size = ext3_acl_size(acl->a_count);
93 ext_acl = (ext3_acl_header *)kmalloc(sizeof(ext3_acl_header) + 93 ext_acl = kmalloc(sizeof(ext3_acl_header) + acl->a_count *
94 acl->a_count * sizeof(ext3_acl_entry), GFP_KERNEL); 94 sizeof(ext3_acl_entry), GFP_KERNEL);
95 if (!ext_acl) 95 if (!ext_acl)
96 return ERR_PTR(-ENOMEM); 96 return ERR_PTR(-ENOMEM);
97 ext_acl->a_version = cpu_to_le32(EXT3_ACL_VERSION); 97 ext_acl->a_version = cpu_to_le32(EXT3_ACL_VERSION);
@@ -258,7 +258,7 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
258 default: 258 default:
259 return -EINVAL; 259 return -EINVAL;
260 } 260 }
261 if (acl) { 261 if (acl) {
262 value = ext3_acl_to_disk(acl, &size); 262 value = ext3_acl_to_disk(acl, &size);
263 if (IS_ERR(value)) 263 if (IS_ERR(value))
264 return (int)PTR_ERR(value); 264 return (int)PTR_ERR(value);
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 063d994bda0b..b41a7d7e20f0 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -38,6 +38,13 @@
38 38
39#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) 39#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
40 40
41/**
42 * ext3_get_group_desc() -- load group descriptor from disk
43 * @sb: super block
44 * @block_group: given block group
45 * @bh: pointer to the buffer head to store the block
46 * group descriptor
47 */
41struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb, 48struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
42 unsigned int block_group, 49 unsigned int block_group,
43 struct buffer_head ** bh) 50 struct buffer_head ** bh)
@@ -73,8 +80,12 @@ struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
73 return desc + offset; 80 return desc + offset;
74} 81}
75 82
76/* 83/**
77 * Read the bitmap for a given block_group, reading into the specified 84 * read_block_bitmap()
85 * @sb: super block
86 * @block_group: given block group
87 *
88 * Read the bitmap for a given block_group, reading into the specified
78 * slot in the superblock's bitmap cache. 89 * slot in the superblock's bitmap cache.
79 * 90 *
80 * Return buffer_head on success or NULL in case of failure. 91 * Return buffer_head on success or NULL in case of failure.
@@ -103,15 +114,22 @@ error_out:
103 * Operations include: 114 * Operations include:
104 * dump, find, add, remove, is_empty, find_next_reservable_window, etc. 115 * dump, find, add, remove, is_empty, find_next_reservable_window, etc.
105 * 116 *
106 * We use sorted double linked list for the per-filesystem reservation 117 * We use a red-black tree to represent per-filesystem reservation
107 * window list. (like in vm_region). 118 * windows.
119 *
120 */
121
122/**
123 * __rsv_window_dump() -- Dump the filesystem block allocation reservation map
124 * @rb_root: root of per-filesystem reservation rb tree
125 * @verbose: verbose mode
126 * @fn: function which wishes to dump the reservation map
108 * 127 *
109 * Initially, we keep those small operations in the abstract functions, 128 * If verbose is turned on, it will print the whole block reservation
110 * so later if we need a better searching tree than double linked-list, 129 * windows(start, end). Otherwise, it will only print out the "bad" windows,
111 * we could easily switch to that without changing too much 130 * those windows that overlap with their immediate neighbors.
112 * code.
113 */ 131 */
114#if 0 132#if 1
115static void __rsv_window_dump(struct rb_root *root, int verbose, 133static void __rsv_window_dump(struct rb_root *root, int verbose,
116 const char *fn) 134 const char *fn)
117{ 135{
@@ -129,7 +147,7 @@ restart:
129 rsv = list_entry(n, struct ext3_reserve_window_node, rsv_node); 147 rsv = list_entry(n, struct ext3_reserve_window_node, rsv_node);
130 if (verbose) 148 if (verbose)
131 printk("reservation window 0x%p " 149 printk("reservation window 0x%p "
132 "start: %d, end: %d\n", 150 "start: %lu, end: %lu\n",
133 rsv, rsv->rsv_start, rsv->rsv_end); 151 rsv, rsv->rsv_start, rsv->rsv_end);
134 if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) { 152 if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) {
135 printk("Bad reservation %p (start >= end)\n", 153 printk("Bad reservation %p (start >= end)\n",
@@ -161,6 +179,22 @@ restart:
161#define rsv_window_dump(root, verbose) do {} while (0) 179#define rsv_window_dump(root, verbose) do {} while (0)
162#endif 180#endif
163 181
182/**
183 * goal_in_my_reservation()
184 * @rsv: inode's reservation window
185 * @grp_goal: given goal block relative to the allocation block group
186 * @group: the current allocation block group
187 * @sb: filesystem super block
188 *
189 * Test if the given goal block (group relative) is within the file's
190 * own block reservation window range.
191 *
192 * If the reservation window is outside the goal allocation group, return 0;
193 * grp_goal (given goal block) could be -1, which means no specific
194 * goal block. In this case, always return 1.
195 * If the goal block is within the reservation window, return 1;
196 * otherwise, return 0;
197 */
164static int 198static int
165goal_in_my_reservation(struct ext3_reserve_window *rsv, ext3_grpblk_t grp_goal, 199goal_in_my_reservation(struct ext3_reserve_window *rsv, ext3_grpblk_t grp_goal,
166 unsigned int group, struct super_block * sb) 200 unsigned int group, struct super_block * sb)
@@ -168,7 +202,7 @@ goal_in_my_reservation(struct ext3_reserve_window *rsv, ext3_grpblk_t grp_goal,
168 ext3_fsblk_t group_first_block, group_last_block; 202 ext3_fsblk_t group_first_block, group_last_block;
169 203
170 group_first_block = ext3_group_first_block_no(sb, group); 204 group_first_block = ext3_group_first_block_no(sb, group);
171 group_last_block = group_first_block + EXT3_BLOCKS_PER_GROUP(sb) - 1; 205 group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
172 206
173 if ((rsv->_rsv_start > group_last_block) || 207 if ((rsv->_rsv_start > group_last_block) ||
174 (rsv->_rsv_end < group_first_block)) 208 (rsv->_rsv_end < group_first_block))
@@ -179,7 +213,11 @@ goal_in_my_reservation(struct ext3_reserve_window *rsv, ext3_grpblk_t grp_goal,
179 return 1; 213 return 1;
180} 214}
181 215
182/* 216/**
217 * search_reserve_window()
218 * @rb_root: root of reservation tree
219 * @goal: target allocation block
220 *
183 * Find the reserved window which includes the goal, or the previous one 221 * Find the reserved window which includes the goal, or the previous one
184 * if the goal is not in any window. 222 * if the goal is not in any window.
185 * Returns NULL if there are no windows or if all windows start after the goal. 223 * Returns NULL if there are no windows or if all windows start after the goal.
@@ -216,6 +254,13 @@ search_reserve_window(struct rb_root *root, ext3_fsblk_t goal)
216 return rsv; 254 return rsv;
217} 255}
218 256
257/**
258 * ext3_rsv_window_add() -- Insert a window to the block reservation rb tree.
259 * @sb: super block
260 * @rsv: reservation window to add
261 *
262 * Must be called with rsv_lock hold.
263 */
219void ext3_rsv_window_add(struct super_block *sb, 264void ext3_rsv_window_add(struct super_block *sb,
220 struct ext3_reserve_window_node *rsv) 265 struct ext3_reserve_window_node *rsv)
221{ 266{
@@ -236,14 +281,25 @@ void ext3_rsv_window_add(struct super_block *sb,
236 p = &(*p)->rb_left; 281 p = &(*p)->rb_left;
237 else if (start > this->rsv_end) 282 else if (start > this->rsv_end)
238 p = &(*p)->rb_right; 283 p = &(*p)->rb_right;
239 else 284 else {
285 rsv_window_dump(root, 1);
240 BUG(); 286 BUG();
287 }
241 } 288 }
242 289
243 rb_link_node(node, parent, p); 290 rb_link_node(node, parent, p);
244 rb_insert_color(node, root); 291 rb_insert_color(node, root);
245} 292}
246 293
294/**
295 * ext3_rsv_window_remove() -- unlink a window from the reservation rb tree
296 * @sb: super block
297 * @rsv: reservation window to remove
298 *
299 * Mark the block reservation window as not allocated, and unlink it
300 * from the filesystem reservation window rb tree. Must be called with
301 * rsv_lock hold.
302 */
247static void rsv_window_remove(struct super_block *sb, 303static void rsv_window_remove(struct super_block *sb,
248 struct ext3_reserve_window_node *rsv) 304 struct ext3_reserve_window_node *rsv)
249{ 305{
@@ -253,11 +309,39 @@ static void rsv_window_remove(struct super_block *sb,
253 rb_erase(&rsv->rsv_node, &EXT3_SB(sb)->s_rsv_window_root); 309 rb_erase(&rsv->rsv_node, &EXT3_SB(sb)->s_rsv_window_root);
254} 310}
255 311
312/*
313 * rsv_is_empty() -- Check if the reservation window is allocated.
314 * @rsv: given reservation window to check
315 *
316 * returns 1 if the end block is EXT3_RESERVE_WINDOW_NOT_ALLOCATED.
317 */
256static inline int rsv_is_empty(struct ext3_reserve_window *rsv) 318static inline int rsv_is_empty(struct ext3_reserve_window *rsv)
257{ 319{
258 /* a valid reservation end block could not be 0 */ 320 /* a valid reservation end block could not be 0 */
259 return (rsv->_rsv_end == EXT3_RESERVE_WINDOW_NOT_ALLOCATED); 321 return rsv->_rsv_end == EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
260} 322}
323
324/**
325 * ext3_init_block_alloc_info()
326 * @inode: file inode structure
327 *
328 * Allocate and initialize the reservation window structure, and
329 * link the window to the ext3 inode structure at last
330 *
331 * The reservation window structure is only dynamically allocated
332 * and linked to ext3 inode the first time the open file
333 * needs a new block. So, before every ext3_new_block(s) call, for
334 * regular files, we should check whether the reservation window
335 * structure exists or not. In the latter case, this function is called.
336 * Fail to do so will result in block reservation being turned off for that
337 * open file.
338 *
339 * This function is called from ext3_get_blocks_handle(), also called
340 * when setting the reservation window size through ioctl before the file
341 * is open for write (needs block allocation).
342 *
343 * Needs truncate_mutex protection prior to call this function.
344 */
261void ext3_init_block_alloc_info(struct inode *inode) 345void ext3_init_block_alloc_info(struct inode *inode)
262{ 346{
263 struct ext3_inode_info *ei = EXT3_I(inode); 347 struct ext3_inode_info *ei = EXT3_I(inode);
@@ -271,7 +355,7 @@ void ext3_init_block_alloc_info(struct inode *inode)
271 rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; 355 rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
272 rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; 356 rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
273 357
274 /* 358 /*
275 * if filesystem is mounted with NORESERVATION, the goal 359 * if filesystem is mounted with NORESERVATION, the goal
276 * reservation window size is set to zero to indicate 360 * reservation window size is set to zero to indicate
277 * block reservation is off 361 * block reservation is off
@@ -287,6 +371,19 @@ void ext3_init_block_alloc_info(struct inode *inode)
287 ei->i_block_alloc_info = block_i; 371 ei->i_block_alloc_info = block_i;
288} 372}
289 373
374/**
375 * ext3_discard_reservation()
376 * @inode: inode
377 *
378 * Discard(free) block reservation window on last file close, or truncate
379 * or at last iput().
380 *
381 * It is being called in three cases:
382 * ext3_release_file(): last writer close the file
383 * ext3_clear_inode(): last iput(), when nobody link to this file.
384 * ext3_truncate(): when the block indirect map is about to change.
385 *
386 */
290void ext3_discard_reservation(struct inode *inode) 387void ext3_discard_reservation(struct inode *inode)
291{ 388{
292 struct ext3_inode_info *ei = EXT3_I(inode); 389 struct ext3_inode_info *ei = EXT3_I(inode);
@@ -306,7 +403,14 @@ void ext3_discard_reservation(struct inode *inode)
306 } 403 }
307} 404}
308 405
309/* Free given blocks, update quota and i_blocks field */ 406/**
407 * ext3_free_blocks_sb() -- Free given blocks and update quota
408 * @handle: handle to this transaction
409 * @sb: super block
410 * @block: start physcial block to free
411 * @count: number of blocks to free
412 * @pdquot_freed_blocks: pointer to quota
413 */
310void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb, 414void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb,
311 ext3_fsblk_t block, unsigned long count, 415 ext3_fsblk_t block, unsigned long count,
312 unsigned long *pdquot_freed_blocks) 416 unsigned long *pdquot_freed_blocks)
@@ -419,8 +523,8 @@ do_more:
419 } 523 }
420 /* @@@ This prevents newly-allocated data from being 524 /* @@@ This prevents newly-allocated data from being
421 * freed and then reallocated within the same 525 * freed and then reallocated within the same
422 * transaction. 526 * transaction.
423 * 527 *
424 * Ideally we would want to allow that to happen, but to 528 * Ideally we would want to allow that to happen, but to
425 * do so requires making journal_forget() capable of 529 * do so requires making journal_forget() capable of
426 * revoking the queued write of a data block, which 530 * revoking the queued write of a data block, which
@@ -433,7 +537,7 @@ do_more:
433 * safe not to set the allocation bit in the committed 537 * safe not to set the allocation bit in the committed
434 * bitmap, because we know that there is no outstanding 538 * bitmap, because we know that there is no outstanding
435 * activity on the buffer any more and so it is safe to 539 * activity on the buffer any more and so it is safe to
436 * reallocate it. 540 * reallocate it.
437 */ 541 */
438 BUFFER_TRACE(bitmap_bh, "set in b_committed_data"); 542 BUFFER_TRACE(bitmap_bh, "set in b_committed_data");
439 J_ASSERT_BH(bitmap_bh, 543 J_ASSERT_BH(bitmap_bh,
@@ -490,7 +594,13 @@ error_return:
490 return; 594 return;
491} 595}
492 596
493/* Free given blocks, update quota and i_blocks field */ 597/**
598 * ext3_free_blocks() -- Free given blocks and update quota
599 * @handle: handle for this transaction
600 * @inode: inode
601 * @block: start physical block to free
602 * @count: number of blocks to count
603 */
494void ext3_free_blocks(handle_t *handle, struct inode *inode, 604void ext3_free_blocks(handle_t *handle, struct inode *inode,
495 ext3_fsblk_t block, unsigned long count) 605 ext3_fsblk_t block, unsigned long count)
496{ 606{
@@ -508,7 +618,11 @@ void ext3_free_blocks(handle_t *handle, struct inode *inode,
508 return; 618 return;
509} 619}
510 620
511/* 621/**
622 * ext3_test_allocatable()
623 * @nr: given allocation block group
624 * @bh: bufferhead contains the bitmap of the given block group
625 *
512 * For ext3 allocations, we must not reuse any blocks which are 626 * For ext3 allocations, we must not reuse any blocks which are
513 * allocated in the bitmap buffer's "last committed data" copy. This 627 * allocated in the bitmap buffer's "last committed data" copy. This
514 * prevents deletes from freeing up the page for reuse until we have 628 * prevents deletes from freeing up the page for reuse until we have
@@ -518,7 +632,7 @@ void ext3_free_blocks(handle_t *handle, struct inode *inode,
518 * data would allow the old block to be overwritten before the 632 * data would allow the old block to be overwritten before the
519 * transaction committed (because we force data to disk before commit). 633 * transaction committed (because we force data to disk before commit).
520 * This would lead to corruption if we crashed between overwriting the 634 * This would lead to corruption if we crashed between overwriting the
521 * data and committing the delete. 635 * data and committing the delete.
522 * 636 *
523 * @@@ We may want to make this allocation behaviour conditional on 637 * @@@ We may want to make this allocation behaviour conditional on
524 * data-writes at some point, and disable it for metadata allocations or 638 * data-writes at some point, and disable it for metadata allocations or
@@ -541,6 +655,16 @@ static int ext3_test_allocatable(ext3_grpblk_t nr, struct buffer_head *bh)
541 return ret; 655 return ret;
542} 656}
543 657
658/**
659 * bitmap_search_next_usable_block()
660 * @start: the starting block (group relative) of the search
661 * @bh: bufferhead contains the block group bitmap
662 * @maxblocks: the ending block (group relative) of the reservation
663 *
664 * The bitmap search --- search forward alternately through the actual
665 * bitmap on disk and the last-committed copy in journal, until we find a
666 * bit free in both bitmaps.
667 */
544static ext3_grpblk_t 668static ext3_grpblk_t
545bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh, 669bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
546 ext3_grpblk_t maxblocks) 670 ext3_grpblk_t maxblocks)
@@ -548,11 +672,6 @@ bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
548 ext3_grpblk_t next; 672 ext3_grpblk_t next;
549 struct journal_head *jh = bh2jh(bh); 673 struct journal_head *jh = bh2jh(bh);
550 674
551 /*
552 * The bitmap search --- search forward alternately through the actual
553 * bitmap and the last-committed copy until we find a bit free in
554 * both
555 */
556 while (start < maxblocks) { 675 while (start < maxblocks) {
557 next = ext3_find_next_zero_bit(bh->b_data, maxblocks, start); 676 next = ext3_find_next_zero_bit(bh->b_data, maxblocks, start);
558 if (next >= maxblocks) 677 if (next >= maxblocks)
@@ -562,14 +681,20 @@ bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
562 jbd_lock_bh_state(bh); 681 jbd_lock_bh_state(bh);
563 if (jh->b_committed_data) 682 if (jh->b_committed_data)
564 start = ext3_find_next_zero_bit(jh->b_committed_data, 683 start = ext3_find_next_zero_bit(jh->b_committed_data,
565 maxblocks, next); 684 maxblocks, next);
566 jbd_unlock_bh_state(bh); 685 jbd_unlock_bh_state(bh);
567 } 686 }
568 return -1; 687 return -1;
569} 688}
570 689
571/* 690/**
572 * Find an allocatable block in a bitmap. We honour both the bitmap and 691 * find_next_usable_block()
692 * @start: the starting block (group relative) to find next
693 * allocatable block in bitmap.
694 * @bh: bufferhead contains the block group bitmap
695 * @maxblocks: the ending block (group relative) for the search
696 *
697 * Find an allocatable block in a bitmap. We honor both the bitmap and
573 * its last-committed copy (if that exists), and perform the "most 698 * its last-committed copy (if that exists), and perform the "most
574 * appropriate allocation" algorithm of looking for a free block near 699 * appropriate allocation" algorithm of looking for a free block near
575 * the initial goal; then for a free byte somewhere in the bitmap; then 700 * the initial goal; then for a free byte somewhere in the bitmap; then
@@ -584,7 +709,7 @@ find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
584 709
585 if (start > 0) { 710 if (start > 0) {
586 /* 711 /*
587 * The goal was occupied; search forward for a free 712 * The goal was occupied; search forward for a free
588 * block within the next XX blocks. 713 * block within the next XX blocks.
589 * 714 *
590 * end_goal is more or less random, but it has to be 715 * end_goal is more or less random, but it has to be
@@ -620,7 +745,11 @@ find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
620 return here; 745 return here;
621} 746}
622 747
623/* 748/**
749 * claim_block()
750 * @block: the free block (group relative) to allocate
751 * @bh: the bufferhead containts the block group bitmap
752 *
624 * We think we can allocate this block in this bitmap. Try to set the bit. 753 * We think we can allocate this block in this bitmap. Try to set the bit.
625 * If that succeeds then check that nobody has allocated and then freed the 754 * If that succeeds then check that nobody has allocated and then freed the
626 * block since we saw that is was not marked in b_committed_data. If it _was_ 755 * block since we saw that is was not marked in b_committed_data. If it _was_
@@ -646,7 +775,26 @@ claim_block(spinlock_t *lock, ext3_grpblk_t block, struct buffer_head *bh)
646 return ret; 775 return ret;
647} 776}
648 777
649/* 778/**
779 * ext3_try_to_allocate()
780 * @sb: superblock
781 * @handle: handle to this transaction
782 * @group: given allocation block group
783 * @bitmap_bh: bufferhead holds the block bitmap
784 * @grp_goal: given target block within the group
785 * @count: target number of blocks to allocate
786 * @my_rsv: reservation window
787 *
788 * Attempt to allocate blocks within a give range. Set the range of allocation
789 * first, then find the first free bit(s) from the bitmap (within the range),
790 * and at last, allocate the blocks by claiming the found free bit as allocated.
791 *
792 * To set the range of this allocation:
793 * if there is a reservation window, only try to allocate block(s) from the
794 * file's own reservation window;
795 * Otherwise, the allocation range starts from the give goal block, ends at
796 * the block group's last block.
797 *
650 * If we failed to allocate the desired block then we may end up crossing to a 798 * If we failed to allocate the desired block then we may end up crossing to a
651 * new bitmap. In that case we must release write access to the old one via 799 * new bitmap. In that case we must release write access to the old one via
652 * ext3_journal_release_buffer(), else we'll run out of credits. 800 * ext3_journal_release_buffer(), else we'll run out of credits.
@@ -703,7 +851,8 @@ repeat:
703 } 851 }
704 start = grp_goal; 852 start = grp_goal;
705 853
706 if (!claim_block(sb_bgl_lock(EXT3_SB(sb), group), grp_goal, bitmap_bh)) { 854 if (!claim_block(sb_bgl_lock(EXT3_SB(sb), group),
855 grp_goal, bitmap_bh)) {
707 /* 856 /*
708 * The block was allocated by another thread, or it was 857 * The block was allocated by another thread, or it was
709 * allocated and then freed by another thread 858 * allocated and then freed by another thread
@@ -718,7 +867,8 @@ repeat:
718 grp_goal++; 867 grp_goal++;
719 while (num < *count && grp_goal < end 868 while (num < *count && grp_goal < end
720 && ext3_test_allocatable(grp_goal, bitmap_bh) 869 && ext3_test_allocatable(grp_goal, bitmap_bh)
721 && claim_block(sb_bgl_lock(EXT3_SB(sb), group), grp_goal, bitmap_bh)) { 870 && claim_block(sb_bgl_lock(EXT3_SB(sb), group),
871 grp_goal, bitmap_bh)) {
722 num++; 872 num++;
723 grp_goal++; 873 grp_goal++;
724 } 874 }
@@ -730,12 +880,12 @@ fail_access:
730} 880}
731 881
732/** 882/**
733 * find_next_reservable_window(): 883 * find_next_reservable_window():
734 * find a reservable space within the given range. 884 * find a reservable space within the given range.
735 * It does not allocate the reservation window for now: 885 * It does not allocate the reservation window for now:
736 * alloc_new_reservation() will do the work later. 886 * alloc_new_reservation() will do the work later.
737 * 887 *
738 * @search_head: the head of the searching list; 888 * @search_head: the head of the searching list;
739 * This is not necessarily the list head of the whole filesystem 889 * This is not necessarily the list head of the whole filesystem
740 * 890 *
741 * We have both head and start_block to assist the search 891 * We have both head and start_block to assist the search
@@ -743,12 +893,12 @@ fail_access:
743 * but we will shift to the place where start_block is, 893 * but we will shift to the place where start_block is,
744 * then start from there, when looking for a reservable space. 894 * then start from there, when looking for a reservable space.
745 * 895 *
746 * @size: the target new reservation window size 896 * @size: the target new reservation window size
747 * 897 *
748 * @group_first_block: the first block we consider to start 898 * @group_first_block: the first block we consider to start
749 * the real search from 899 * the real search from
750 * 900 *
751 * @last_block: 901 * @last_block:
752 * the maximum block number that our goal reservable space 902 * the maximum block number that our goal reservable space
753 * could start from. This is normally the last block in this 903 * could start from. This is normally the last block in this
754 * group. The search will end when we found the start of next 904 * group. The search will end when we found the start of next
@@ -756,10 +906,10 @@ fail_access:
756 * This could handle the cross boundary reservation window 906 * This could handle the cross boundary reservation window
757 * request. 907 * request.
758 * 908 *
759 * basically we search from the given range, rather than the whole 909 * basically we search from the given range, rather than the whole
760 * reservation double linked list, (start_block, last_block) 910 * reservation double linked list, (start_block, last_block)
761 * to find a free region that is of my size and has not 911 * to find a free region that is of my size and has not
762 * been reserved. 912 * been reserved.
763 * 913 *
764 */ 914 */
765static int find_next_reservable_window( 915static int find_next_reservable_window(
@@ -812,7 +962,7 @@ static int find_next_reservable_window(
812 /* 962 /*
813 * Found a reserveable space big enough. We could 963 * Found a reserveable space big enough. We could
814 * have a reservation across the group boundary here 964 * have a reservation across the group boundary here
815 */ 965 */
816 break; 966 break;
817 } 967 }
818 } 968 }
@@ -848,7 +998,7 @@ static int find_next_reservable_window(
848} 998}
849 999
850/** 1000/**
851 * alloc_new_reservation()--allocate a new reservation window 1001 * alloc_new_reservation()--allocate a new reservation window
852 * 1002 *
853 * To make a new reservation, we search part of the filesystem 1003 * To make a new reservation, we search part of the filesystem
854 * reservation list (the list that inside the group). We try to 1004 * reservation list (the list that inside the group). We try to
@@ -897,7 +1047,7 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
897 spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock; 1047 spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
898 1048
899 group_first_block = ext3_group_first_block_no(sb, group); 1049 group_first_block = ext3_group_first_block_no(sb, group);
900 group_end_block = group_first_block + EXT3_BLOCKS_PER_GROUP(sb) - 1; 1050 group_end_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
901 1051
902 if (grp_goal < 0) 1052 if (grp_goal < 0)
903 start_block = group_first_block; 1053 start_block = group_first_block;
@@ -929,9 +1079,10 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
929 if ((my_rsv->rsv_alloc_hit > 1079 if ((my_rsv->rsv_alloc_hit >
930 (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) { 1080 (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) {
931 /* 1081 /*
932 * if we previously allocation hit ration is greater than half 1082 * if the previously allocation hit ratio is
933 * we double the size of reservation window next time 1083 * greater than 1/2, then we double the size of
934 * otherwise keep the same 1084 * the reservation window the next time,
1085 * otherwise we keep the same size window
935 */ 1086 */
936 size = size * 2; 1087 size = size * 2;
937 if (size > EXT3_MAX_RESERVE_BLOCKS) 1088 if (size > EXT3_MAX_RESERVE_BLOCKS)
@@ -1010,6 +1161,23 @@ retry:
1010 goto retry; 1161 goto retry;
1011} 1162}
1012 1163
1164/**
1165 * try_to_extend_reservation()
1166 * @my_rsv: given reservation window
1167 * @sb: super block
1168 * @size: the delta to extend
1169 *
1170 * Attempt to expand the reservation window large enough to have
1171 * required number of free blocks
1172 *
1173 * Since ext3_try_to_allocate() will always allocate blocks within
1174 * the reservation window range, if the window size is too small,
1175 * multiple blocks allocation has to stop at the end of the reservation
1176 * window. To make this more efficient, given the total number of
1177 * blocks needed and the current size of the window, we try to
1178 * expand the reservation window size if necessary on a best-effort
1179 * basis before ext3_new_blocks() tries to allocate blocks,
1180 */
1013static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv, 1181static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
1014 struct super_block *sb, int size) 1182 struct super_block *sb, int size)
1015{ 1183{
@@ -1035,7 +1203,17 @@ static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
1035 spin_unlock(rsv_lock); 1203 spin_unlock(rsv_lock);
1036} 1204}
1037 1205
1038/* 1206/**
1207 * ext3_try_to_allocate_with_rsv()
1208 * @sb: superblock
1209 * @handle: handle to this transaction
1210 * @group: given allocation block group
1211 * @bitmap_bh: bufferhead holds the block bitmap
1212 * @grp_goal: given target block within the group
1213 * @count: target number of blocks to allocate
1214 * @my_rsv: reservation window
1215 * @errp: pointer to store the error code
1216 *
1039 * This is the main function used to allocate a new block and its reservation 1217 * This is the main function used to allocate a new block and its reservation
1040 * window. 1218 * window.
1041 * 1219 *
@@ -1051,9 +1229,7 @@ static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
1051 * reservation), and there are lots of free blocks, but they are all 1229 * reservation), and there are lots of free blocks, but they are all
1052 * being reserved. 1230 * being reserved.
1053 * 1231 *
1054 * We use a sorted double linked list for the per-filesystem reservation list. 1232 * We use a red-black tree for the per-filesystem reservation list.
1055 * The insert, remove and find a free space(non-reserved) operations for the
1056 * sorted double linked list should be fast.
1057 * 1233 *
1058 */ 1234 */
1059static ext3_grpblk_t 1235static ext3_grpblk_t
@@ -1063,7 +1239,7 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
1063 struct ext3_reserve_window_node * my_rsv, 1239 struct ext3_reserve_window_node * my_rsv,
1064 unsigned long *count, int *errp) 1240 unsigned long *count, int *errp)
1065{ 1241{
1066 ext3_fsblk_t group_first_block; 1242 ext3_fsblk_t group_first_block, group_last_block;
1067 ext3_grpblk_t ret = 0; 1243 ext3_grpblk_t ret = 0;
1068 int fatal; 1244 int fatal;
1069 unsigned long num = *count; 1245 unsigned long num = *count;
@@ -1100,6 +1276,7 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
1100 * first block is the block number of the first block in this group 1276 * first block is the block number of the first block in this group
1101 */ 1277 */
1102 group_first_block = ext3_group_first_block_no(sb, group); 1278 group_first_block = ext3_group_first_block_no(sb, group);
1279 group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
1103 1280
1104 /* 1281 /*
1105 * Basically we will allocate a new block from inode's reservation 1282 * Basically we will allocate a new block from inode's reservation
@@ -1118,7 +1295,8 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
1118 */ 1295 */
1119 while (1) { 1296 while (1) {
1120 if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) || 1297 if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) ||
1121 !goal_in_my_reservation(&my_rsv->rsv_window, grp_goal, group, sb)) { 1298 !goal_in_my_reservation(&my_rsv->rsv_window,
1299 grp_goal, group, sb)) {
1122 if (my_rsv->rsv_goal_size < *count) 1300 if (my_rsv->rsv_goal_size < *count)
1123 my_rsv->rsv_goal_size = *count; 1301 my_rsv->rsv_goal_size = *count;
1124 ret = alloc_new_reservation(my_rsv, grp_goal, sb, 1302 ret = alloc_new_reservation(my_rsv, grp_goal, sb,
@@ -1126,17 +1304,21 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
1126 if (ret < 0) 1304 if (ret < 0)
1127 break; /* failed */ 1305 break; /* failed */
1128 1306
1129 if (!goal_in_my_reservation(&my_rsv->rsv_window, grp_goal, group, sb)) 1307 if (!goal_in_my_reservation(&my_rsv->rsv_window,
1308 grp_goal, group, sb))
1130 grp_goal = -1; 1309 grp_goal = -1;
1131 } else if (grp_goal > 0 && (my_rsv->rsv_end-grp_goal+1) < *count) 1310 } else if (grp_goal > 0 &&
1311 (my_rsv->rsv_end-grp_goal+1) < *count)
1132 try_to_extend_reservation(my_rsv, sb, 1312 try_to_extend_reservation(my_rsv, sb,
1133 *count-my_rsv->rsv_end + grp_goal - 1); 1313 *count-my_rsv->rsv_end + grp_goal - 1);
1134 1314
1135 if ((my_rsv->rsv_start >= group_first_block + EXT3_BLOCKS_PER_GROUP(sb)) 1315 if ((my_rsv->rsv_start > group_last_block) ||
1136 || (my_rsv->rsv_end < group_first_block)) 1316 (my_rsv->rsv_end < group_first_block)) {
1317 rsv_window_dump(&EXT3_SB(sb)->s_rsv_window_root, 1);
1137 BUG(); 1318 BUG();
1138 ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, grp_goal, 1319 }
1139 &num, &my_rsv->rsv_window); 1320 ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh,
1321 grp_goal, &num, &my_rsv->rsv_window);
1140 if (ret >= 0) { 1322 if (ret >= 0) {
1141 my_rsv->rsv_alloc_hit += num; 1323 my_rsv->rsv_alloc_hit += num;
1142 *count = num; 1324 *count = num;
@@ -1161,6 +1343,12 @@ out:
1161 return ret; 1343 return ret;
1162} 1344}
1163 1345
1346/**
1347 * ext3_has_free_blocks()
1348 * @sbi: in-core super block structure.
1349 *
1350 * Check if filesystem has at least 1 free block available for allocation.
1351 */
1164static int ext3_has_free_blocks(struct ext3_sb_info *sbi) 1352static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
1165{ 1353{
1166 ext3_fsblk_t free_blocks, root_blocks; 1354 ext3_fsblk_t free_blocks, root_blocks;
@@ -1175,11 +1363,17 @@ static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
1175 return 1; 1363 return 1;
1176} 1364}
1177 1365
1178/* 1366/**
1367 * ext3_should_retry_alloc()
1368 * @sb: super block
1369 * @retries number of attemps has been made
1370 *
1179 * ext3_should_retry_alloc() is called when ENOSPC is returned, and if 1371 * ext3_should_retry_alloc() is called when ENOSPC is returned, and if
1180 * it is profitable to retry the operation, this function will wait 1372 * it is profitable to retry the operation, this function will wait
1181 * for the current or commiting transaction to complete, and then 1373 * for the current or commiting transaction to complete, and then
1182 * return TRUE. 1374 * return TRUE.
1375 *
1376 * if the total number of retries exceed three times, return FALSE.
1183 */ 1377 */
1184int ext3_should_retry_alloc(struct super_block *sb, int *retries) 1378int ext3_should_retry_alloc(struct super_block *sb, int *retries)
1185{ 1379{
@@ -1191,13 +1385,19 @@ int ext3_should_retry_alloc(struct super_block *sb, int *retries)
1191 return journal_force_commit_nested(EXT3_SB(sb)->s_journal); 1385 return journal_force_commit_nested(EXT3_SB(sb)->s_journal);
1192} 1386}
1193 1387
1194/* 1388/**
1195 * ext3_new_block uses a goal block to assist allocation. If the goal is 1389 * ext3_new_blocks() -- core block(s) allocation function
1196 * free, or there is a free block within 32 blocks of the goal, that block 1390 * @handle: handle to this transaction
1197 * is allocated. Otherwise a forward search is made for a free block; within 1391 * @inode: file inode
1198 * each block group the search first looks for an entire free byte in the block 1392 * @goal: given target block(filesystem wide)
1199 * bitmap, and then for any free bit if that fails. 1393 * @count: target number of blocks to allocate
1200 * This function also updates quota and i_blocks field. 1394 * @errp: error code
1395 *
1396 * ext3_new_blocks uses a goal block to assist allocation. It tries to
1397 * allocate block(s) from the block group contains the goal block first. If that
1398 * fails, it will try to allocate block(s) from other block groups without
1399 * any specific goal block.
1400 *
1201 */ 1401 */
1202ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode, 1402ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
1203 ext3_fsblk_t goal, unsigned long *count, int *errp) 1403 ext3_fsblk_t goal, unsigned long *count, int *errp)
@@ -1303,7 +1503,7 @@ retry_alloc:
1303 smp_rmb(); 1503 smp_rmb();
1304 1504
1305 /* 1505 /*
1306 * Now search the rest of the groups. We assume that 1506 * Now search the rest of the groups. We assume that
1307 * i and gdp correctly point to the last group visited. 1507 * i and gdp correctly point to the last group visited.
1308 */ 1508 */
1309 for (bgi = 0; bgi < ngroups; bgi++) { 1509 for (bgi = 0; bgi < ngroups; bgi++) {
@@ -1428,7 +1628,7 @@ allocated:
1428 1628
1429 spin_lock(sb_bgl_lock(sbi, group_no)); 1629 spin_lock(sb_bgl_lock(sbi, group_no));
1430 gdp->bg_free_blocks_count = 1630 gdp->bg_free_blocks_count =
1431 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - num); 1631 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num);
1432 spin_unlock(sb_bgl_lock(sbi, group_no)); 1632 spin_unlock(sb_bgl_lock(sbi, group_no));
1433 percpu_counter_mod(&sbi->s_freeblocks_counter, -num); 1633 percpu_counter_mod(&sbi->s_freeblocks_counter, -num);
1434 1634
@@ -1471,6 +1671,12 @@ ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode,
1471 return ext3_new_blocks(handle, inode, goal, &count, errp); 1671 return ext3_new_blocks(handle, inode, goal, &count, errp);
1472} 1672}
1473 1673
1674/**
1675 * ext3_count_free_blocks() -- count filesystem free blocks
1676 * @sb: superblock
1677 *
1678 * Adds up the number of free blocks from each block group.
1679 */
1474ext3_fsblk_t ext3_count_free_blocks(struct super_block *sb) 1680ext3_fsblk_t ext3_count_free_blocks(struct super_block *sb)
1475{ 1681{
1476 ext3_fsblk_t desc_count; 1682 ext3_fsblk_t desc_count;
diff --git a/fs/ext3/bitmap.c b/fs/ext3/bitmap.c
index ce4f82b9e528..b9176eed98d1 100644
--- a/fs/ext3/bitmap.c
+++ b/fs/ext3/bitmap.c
@@ -20,7 +20,7 @@ unsigned long ext3_count_free (struct buffer_head * map, unsigned int numchars)
20 unsigned int i; 20 unsigned int i;
21 unsigned long sum = 0; 21 unsigned long sum = 0;
22 22
23 if (!map) 23 if (!map)
24 return (0); 24 return (0);
25 for (i = 0; i < numchars; i++) 25 for (i = 0; i < numchars; i++)
26 sum += nibblemap[map->b_data[i] & 0xf] + 26 sum += nibblemap[map->b_data[i] & 0xf] +
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index fbb0d4ed07d4..429acbb4e064 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -59,7 +59,7 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
59 59
60 return (ext3_filetype_table[filetype]); 60 return (ext3_filetype_table[filetype]);
61} 61}
62 62
63 63
64int ext3_check_dir_entry (const char * function, struct inode * dir, 64int ext3_check_dir_entry (const char * function, struct inode * dir,
65 struct ext3_dir_entry_2 * de, 65 struct ext3_dir_entry_2 * de,
@@ -67,7 +67,7 @@ int ext3_check_dir_entry (const char * function, struct inode * dir,
67 unsigned long offset) 67 unsigned long offset)
68{ 68{
69 const char * error_msg = NULL; 69 const char * error_msg = NULL;
70 const int rlen = le16_to_cpu(de->rec_len); 70 const int rlen = le16_to_cpu(de->rec_len);
71 71
72 if (rlen < EXT3_DIR_REC_LEN(1)) 72 if (rlen < EXT3_DIR_REC_LEN(1))
73 error_msg = "rec_len is smaller than minimal"; 73 error_msg = "rec_len is smaller than minimal";
@@ -162,7 +162,7 @@ revalidate:
162 * to make sure. */ 162 * to make sure. */
163 if (filp->f_version != inode->i_version) { 163 if (filp->f_version != inode->i_version) {
164 for (i = 0; i < sb->s_blocksize && i < offset; ) { 164 for (i = 0; i < sb->s_blocksize && i < offset; ) {
165 de = (struct ext3_dir_entry_2 *) 165 de = (struct ext3_dir_entry_2 *)
166 (bh->b_data + i); 166 (bh->b_data + i);
167 /* It's too expensive to do a full 167 /* It's too expensive to do a full
168 * dirent test each time round this 168 * dirent test each time round this
@@ -181,7 +181,7 @@ revalidate:
181 filp->f_version = inode->i_version; 181 filp->f_version = inode->i_version;
182 } 182 }
183 183
184 while (!error && filp->f_pos < inode->i_size 184 while (!error && filp->f_pos < inode->i_size
185 && offset < sb->s_blocksize) { 185 && offset < sb->s_blocksize) {
186 de = (struct ext3_dir_entry_2 *) (bh->b_data + offset); 186 de = (struct ext3_dir_entry_2 *) (bh->b_data + offset);
187 if (!ext3_check_dir_entry ("ext3_readdir", inode, de, 187 if (!ext3_check_dir_entry ("ext3_readdir", inode, de,
@@ -229,7 +229,7 @@ out:
229/* 229/*
230 * These functions convert from the major/minor hash to an f_pos 230 * These functions convert from the major/minor hash to an f_pos
231 * value. 231 * value.
232 * 232 *
233 * Currently we only use major hash numer. This is unfortunate, but 233 * Currently we only use major hash numer. This is unfortunate, but
234 * on 32-bit machines, the same VFS interface is used for lseek and 234 * on 32-bit machines, the same VFS interface is used for lseek and
235 * llseek, so if we use the 64 bit offset, then the 32-bit versions of 235 * llseek, so if we use the 64 bit offset, then the 32-bit versions of
@@ -250,7 +250,7 @@ out:
250struct fname { 250struct fname {
251 __u32 hash; 251 __u32 hash;
252 __u32 minor_hash; 252 __u32 minor_hash;
253 struct rb_node rb_hash; 253 struct rb_node rb_hash;
254 struct fname *next; 254 struct fname *next;
255 __u32 inode; 255 __u32 inode;
256 __u8 name_len; 256 __u8 name_len;
@@ -343,10 +343,9 @@ int ext3_htree_store_dirent(struct file *dir_file, __u32 hash,
343 343
344 /* Create and allocate the fname structure */ 344 /* Create and allocate the fname structure */
345 len = sizeof(struct fname) + dirent->name_len + 1; 345 len = sizeof(struct fname) + dirent->name_len + 1;
346 new_fn = kmalloc(len, GFP_KERNEL); 346 new_fn = kzalloc(len, GFP_KERNEL);
347 if (!new_fn) 347 if (!new_fn)
348 return -ENOMEM; 348 return -ENOMEM;
349 memset(new_fn, 0, len);
350 new_fn->hash = hash; 349 new_fn->hash = hash;
351 new_fn->minor_hash = minor_hash; 350 new_fn->minor_hash = minor_hash;
352 new_fn->inode = le32_to_cpu(dirent->inode); 351 new_fn->inode = le32_to_cpu(dirent->inode);
@@ -410,7 +409,7 @@ static int call_filldir(struct file * filp, void * dirent,
410 curr_pos = hash2pos(fname->hash, fname->minor_hash); 409 curr_pos = hash2pos(fname->hash, fname->minor_hash);
411 while (fname) { 410 while (fname) {
412 error = filldir(dirent, fname->name, 411 error = filldir(dirent, fname->name,
413 fname->name_len, curr_pos, 412 fname->name_len, curr_pos,
414 fname->inode, 413 fname->inode,
415 get_dtype(sb, fname->file_type)); 414 get_dtype(sb, fname->file_type));
416 if (error) { 415 if (error) {
@@ -465,7 +464,7 @@ static int ext3_dx_readdir(struct file * filp,
465 /* 464 /*
466 * Fill the rbtree if we have no more entries, 465 * Fill the rbtree if we have no more entries,
467 * or the inode has changed since we last read in the 466 * or the inode has changed since we last read in the
468 * cached entries. 467 * cached entries.
469 */ 468 */
470 if ((!info->curr_node) || 469 if ((!info->curr_node) ||
471 (filp->f_version != inode->i_version)) { 470 (filp->f_version != inode->i_version)) {
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 1efefb630ea9..994efd189f4e 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -100,7 +100,7 @@ ext3_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t
100 100
101force_commit: 101force_commit:
102 err = ext3_force_commit(inode->i_sb); 102 err = ext3_force_commit(inode->i_sb);
103 if (err) 103 if (err)
104 return err; 104 return err;
105 return ret; 105 return ret;
106} 106}
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index 49382a208e05..dd1fd3c0fc05 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -8,14 +8,14 @@
8 * Universite Pierre et Marie Curie (Paris VI) 8 * Universite Pierre et Marie Curie (Paris VI)
9 * from 9 * from
10 * linux/fs/minix/truncate.c Copyright (C) 1991, 1992 Linus Torvalds 10 * linux/fs/minix/truncate.c Copyright (C) 1991, 1992 Linus Torvalds
11 * 11 *
12 * ext3fs fsync primitive 12 * ext3fs fsync primitive
13 * 13 *
14 * Big-endian to little-endian byte-swapping/bitmaps by 14 * Big-endian to little-endian byte-swapping/bitmaps by
15 * David S. Miller (davem@caip.rutgers.edu), 1995 15 * David S. Miller (davem@caip.rutgers.edu), 1995
16 * 16 *
17 * Removed unnecessary code duplication for little endian machines 17 * Removed unnecessary code duplication for little endian machines
18 * and excessive __inline__s. 18 * and excessive __inline__s.
19 * Andi Kleen, 1997 19 * Andi Kleen, 1997
20 * 20 *
21 * Major simplications and cleanup - we only need to do the metadata, because 21 * Major simplications and cleanup - we only need to do the metadata, because
diff --git a/fs/ext3/hash.c b/fs/ext3/hash.c
index 5a2d1235ead0..deeb27b5ba83 100644
--- a/fs/ext3/hash.c
+++ b/fs/ext3/hash.c
@@ -4,7 +4,7 @@
4 * Copyright (C) 2002 by Theodore Ts'o 4 * Copyright (C) 2002 by Theodore Ts'o
5 * 5 *
6 * This file is released under the GPL v2. 6 * This file is released under the GPL v2.
7 * 7 *
8 * This file may be redistributed under the terms of the GNU Public 8 * This file may be redistributed under the terms of the GNU Public
9 * License. 9 * License.
10 */ 10 */
@@ -80,11 +80,11 @@ static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
80 * Returns the hash of a filename. If len is 0 and name is NULL, then 80 * Returns the hash of a filename. If len is 0 and name is NULL, then
81 * this function can be used to test whether or not a hash version is 81 * this function can be used to test whether or not a hash version is
82 * supported. 82 * supported.
83 * 83 *
84 * The seed is an 4 longword (32 bits) "secret" which can be used to 84 * The seed is an 4 longword (32 bits) "secret" which can be used to
85 * uniquify a hash. If the seed is all zero's, then some default seed 85 * uniquify a hash. If the seed is all zero's, then some default seed
86 * may be used. 86 * may be used.
87 * 87 *
88 * A particular hash version specifies whether or not the seed is 88 * A particular hash version specifies whether or not the seed is
89 * represented, and whether or not the returned hash is 32 bits or 64 89 * represented, and whether or not the returned hash is 32 bits or 64
90 * bits. 32 bit hashes will return 0 for the minor hash. 90 * bits. 32 bit hashes will return 0 for the minor hash.
@@ -95,7 +95,7 @@ int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
95 __u32 minor_hash = 0; 95 __u32 minor_hash = 0;
96 const char *p; 96 const char *p;
97 int i; 97 int i;
98 __u32 in[8], buf[4]; 98 __u32 in[8], buf[4];
99 99
100 /* Initialize the default seed for the hash checksum functions */ 100 /* Initialize the default seed for the hash checksum functions */
101 buf[0] = 0x67452301; 101 buf[0] = 0x67452301;
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 36546ed36a14..e45dbd651736 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -202,7 +202,7 @@ error_return:
202static int find_group_dir(struct super_block *sb, struct inode *parent) 202static int find_group_dir(struct super_block *sb, struct inode *parent)
203{ 203{
204 int ngroups = EXT3_SB(sb)->s_groups_count; 204 int ngroups = EXT3_SB(sb)->s_groups_count;
205 int freei, avefreei; 205 unsigned int freei, avefreei;
206 struct ext3_group_desc *desc, *best_desc = NULL; 206 struct ext3_group_desc *desc, *best_desc = NULL;
207 struct buffer_head *bh; 207 struct buffer_head *bh;
208 int group, best_group = -1; 208 int group, best_group = -1;
@@ -216,7 +216,7 @@ static int find_group_dir(struct super_block *sb, struct inode *parent)
216 continue; 216 continue;
217 if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) 217 if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
218 continue; 218 continue;
219 if (!best_desc || 219 if (!best_desc ||
220 (le16_to_cpu(desc->bg_free_blocks_count) > 220 (le16_to_cpu(desc->bg_free_blocks_count) >
221 le16_to_cpu(best_desc->bg_free_blocks_count))) { 221 le16_to_cpu(best_desc->bg_free_blocks_count))) {
222 best_group = group; 222 best_group = group;
@@ -226,30 +226,30 @@ static int find_group_dir(struct super_block *sb, struct inode *parent)
226 return best_group; 226 return best_group;
227} 227}
228 228
229/* 229/*
230 * Orlov's allocator for directories. 230 * Orlov's allocator for directories.
231 * 231 *
232 * We always try to spread first-level directories. 232 * We always try to spread first-level directories.
233 * 233 *
234 * If there are blockgroups with both free inodes and free blocks counts 234 * If there are blockgroups with both free inodes and free blocks counts
235 * not worse than average we return one with smallest directory count. 235 * not worse than average we return one with smallest directory count.
236 * Otherwise we simply return a random group. 236 * Otherwise we simply return a random group.
237 * 237 *
238 * For the rest rules look so: 238 * For the rest rules look so:
239 * 239 *
240 * It's OK to put directory into a group unless 240 * It's OK to put directory into a group unless
241 * it has too many directories already (max_dirs) or 241 * it has too many directories already (max_dirs) or
242 * it has too few free inodes left (min_inodes) or 242 * it has too few free inodes left (min_inodes) or
243 * it has too few free blocks left (min_blocks) or 243 * it has too few free blocks left (min_blocks) or
244 * it's already running too large debt (max_debt). 244 * it's already running too large debt (max_debt).
245 * Parent's group is prefered, if it doesn't satisfy these 245 * Parent's group is prefered, if it doesn't satisfy these
246 * conditions we search cyclically through the rest. If none 246 * conditions we search cyclically through the rest. If none
247 * of the groups look good we just look for a group with more 247 * of the groups look good we just look for a group with more
248 * free inodes than average (starting at parent's group). 248 * free inodes than average (starting at parent's group).
249 * 249 *
250 * Debt is incremented each time we allocate a directory and decremented 250 * Debt is incremented each time we allocate a directory and decremented
251 * when we allocate an inode, within 0--255. 251 * when we allocate an inode, within 0--255.
252 */ 252 */
253 253
254#define INODE_COST 64 254#define INODE_COST 64
255#define BLOCK_COST 256 255#define BLOCK_COST 256
@@ -261,10 +261,10 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
261 struct ext3_super_block *es = sbi->s_es; 261 struct ext3_super_block *es = sbi->s_es;
262 int ngroups = sbi->s_groups_count; 262 int ngroups = sbi->s_groups_count;
263 int inodes_per_group = EXT3_INODES_PER_GROUP(sb); 263 int inodes_per_group = EXT3_INODES_PER_GROUP(sb);
264 int freei, avefreei; 264 unsigned int freei, avefreei;
265 ext3_fsblk_t freeb, avefreeb; 265 ext3_fsblk_t freeb, avefreeb;
266 ext3_fsblk_t blocks_per_dir; 266 ext3_fsblk_t blocks_per_dir;
267 int ndirs; 267 unsigned int ndirs;
268 int max_debt, max_dirs, min_inodes; 268 int max_debt, max_dirs, min_inodes;
269 ext3_grpblk_t min_blocks; 269 ext3_grpblk_t min_blocks;
270 int group = -1, i; 270 int group = -1, i;
@@ -454,7 +454,7 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
454 group = find_group_dir(sb, dir); 454 group = find_group_dir(sb, dir);
455 else 455 else
456 group = find_group_orlov(sb, dir); 456 group = find_group_orlov(sb, dir);
457 } else 457 } else
458 group = find_group_other(sb, dir); 458 group = find_group_other(sb, dir);
459 459
460 err = -ENOSPC; 460 err = -ENOSPC;
@@ -559,7 +559,6 @@ got:
559 559
560 inode->i_ino = ino; 560 inode->i_ino = ino;
561 /* This is the optimal IO size (for stat), not the fs block size */ 561 /* This is the optimal IO size (for stat), not the fs block size */
562 inode->i_blksize = PAGE_SIZE;
563 inode->i_blocks = 0; 562 inode->i_blocks = 0;
564 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 563 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
565 564
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 84be02e93652..dcf4f1dd108b 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -13,11 +13,11 @@
13 * Copyright (C) 1991, 1992 Linus Torvalds 13 * Copyright (C) 1991, 1992 Linus Torvalds
14 * 14 *
15 * Goal-directed block allocation by Stephen Tweedie 15 * Goal-directed block allocation by Stephen Tweedie
16 * (sct@redhat.com), 1993, 1998 16 * (sct@redhat.com), 1993, 1998
17 * Big-endian to little-endian byte-swapping/bitmaps by 17 * Big-endian to little-endian byte-swapping/bitmaps by
18 * David S. Miller (davem@caip.rutgers.edu), 1995 18 * David S. Miller (davem@caip.rutgers.edu), 1995
19 * 64-bit file support on 64-bit platforms by Jakub Jelinek 19 * 64-bit file support on 64-bit platforms by Jakub Jelinek
20 * (jj@sunsite.ms.mff.cuni.cz) 20 * (jj@sunsite.ms.mff.cuni.cz)
21 * 21 *
22 * Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000 22 * Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000
23 */ 23 */
@@ -55,7 +55,7 @@ static int ext3_inode_is_fast_symlink(struct inode *inode)
55/* 55/*
56 * The ext3 forget function must perform a revoke if we are freeing data 56 * The ext3 forget function must perform a revoke if we are freeing data
57 * which has been journaled. Metadata (eg. indirect blocks) must be 57 * which has been journaled. Metadata (eg. indirect blocks) must be
58 * revoked in all cases. 58 * revoked in all cases.
59 * 59 *
60 * "bh" may be NULL: a metadata block may have been freed from memory 60 * "bh" may be NULL: a metadata block may have been freed from memory
61 * but there may still be a record of it in the journal, and that record 61 * but there may still be a record of it in the journal, and that record
@@ -105,7 +105,7 @@ int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
105 * Work out how many blocks we need to proceed with the next chunk of a 105 * Work out how many blocks we need to proceed with the next chunk of a
106 * truncate transaction. 106 * truncate transaction.
107 */ 107 */
108static unsigned long blocks_for_truncate(struct inode *inode) 108static unsigned long blocks_for_truncate(struct inode *inode)
109{ 109{
110 unsigned long needed; 110 unsigned long needed;
111 111
@@ -122,13 +122,13 @@ static unsigned long blocks_for_truncate(struct inode *inode)
122 122
123 /* But we need to bound the transaction so we don't overflow the 123 /* But we need to bound the transaction so we don't overflow the
124 * journal. */ 124 * journal. */
125 if (needed > EXT3_MAX_TRANS_DATA) 125 if (needed > EXT3_MAX_TRANS_DATA)
126 needed = EXT3_MAX_TRANS_DATA; 126 needed = EXT3_MAX_TRANS_DATA;
127 127
128 return EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + needed; 128 return EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + needed;
129} 129}
130 130
131/* 131/*
132 * Truncate transactions can be complex and absolutely huge. So we need to 132 * Truncate transactions can be complex and absolutely huge. So we need to
133 * be able to restart the transaction at a conventient checkpoint to make 133 * be able to restart the transaction at a conventient checkpoint to make
134 * sure we don't overflow the journal. 134 * sure we don't overflow the journal.
@@ -136,9 +136,9 @@ static unsigned long blocks_for_truncate(struct inode *inode)
136 * start_transaction gets us a new handle for a truncate transaction, 136 * start_transaction gets us a new handle for a truncate transaction,
137 * and extend_transaction tries to extend the existing one a bit. If 137 * and extend_transaction tries to extend the existing one a bit. If
138 * extend fails, we need to propagate the failure up and restart the 138 * extend fails, we need to propagate the failure up and restart the
139 * transaction in the top-level truncate loop. --sct 139 * transaction in the top-level truncate loop. --sct
140 */ 140 */
141static handle_t *start_transaction(struct inode *inode) 141static handle_t *start_transaction(struct inode *inode)
142{ 142{
143 handle_t *result; 143 handle_t *result;
144 144
@@ -215,12 +215,12 @@ void ext3_delete_inode (struct inode * inode)
215 ext3_orphan_del(handle, inode); 215 ext3_orphan_del(handle, inode);
216 EXT3_I(inode)->i_dtime = get_seconds(); 216 EXT3_I(inode)->i_dtime = get_seconds();
217 217
218 /* 218 /*
219 * One subtle ordering requirement: if anything has gone wrong 219 * One subtle ordering requirement: if anything has gone wrong
220 * (transaction abort, IO errors, whatever), then we can still 220 * (transaction abort, IO errors, whatever), then we can still
221 * do these next steps (the fs will already have been marked as 221 * do these next steps (the fs will already have been marked as
222 * having errors), but we can't free the inode if the mark_dirty 222 * having errors), but we can't free the inode if the mark_dirty
223 * fails. 223 * fails.
224 */ 224 */
225 if (ext3_mark_inode_dirty(handle, inode)) 225 if (ext3_mark_inode_dirty(handle, inode))
226 /* If that failed, just do the required in-core inode clear. */ 226 /* If that failed, just do the required in-core inode clear. */
@@ -398,7 +398,7 @@ no_block:
398 * + if there is a block to the left of our position - allocate near it. 398 * + if there is a block to the left of our position - allocate near it.
399 * + if pointer will live in indirect block - allocate near that block. 399 * + if pointer will live in indirect block - allocate near that block.
400 * + if pointer will live in inode - allocate in the same 400 * + if pointer will live in inode - allocate in the same
401 * cylinder group. 401 * cylinder group.
402 * 402 *
403 * In the latter case we colour the starting block by the callers PID to 403 * In the latter case we colour the starting block by the callers PID to
404 * prevent it from clashing with concurrent allocations for a different inode 404 * prevent it from clashing with concurrent allocations for a different inode
@@ -470,7 +470,7 @@ static ext3_fsblk_t ext3_find_goal(struct inode *inode, long block,
470 * ext3_blks_to_allocate: Look up the block map and count the number 470 * ext3_blks_to_allocate: Look up the block map and count the number
471 * of direct blocks need to be allocated for the given branch. 471 * of direct blocks need to be allocated for the given branch.
472 * 472 *
473 * @branch: chain of indirect blocks 473 * @branch: chain of indirect blocks
474 * @k: number of blocks need for indirect blocks 474 * @k: number of blocks need for indirect blocks
475 * @blks: number of data blocks to be mapped. 475 * @blks: number of data blocks to be mapped.
476 * @blocks_to_boundary: the offset in the indirect block 476 * @blocks_to_boundary: the offset in the indirect block
@@ -744,7 +744,7 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
744 jbd_debug(5, "splicing indirect only\n"); 744 jbd_debug(5, "splicing indirect only\n");
745 BUFFER_TRACE(where->bh, "call ext3_journal_dirty_metadata"); 745 BUFFER_TRACE(where->bh, "call ext3_journal_dirty_metadata");
746 err = ext3_journal_dirty_metadata(handle, where->bh); 746 err = ext3_journal_dirty_metadata(handle, where->bh);
747 if (err) 747 if (err)
748 goto err_out; 748 goto err_out;
749 } else { 749 } else {
750 /* 750 /*
@@ -1098,7 +1098,7 @@ static int walk_page_buffers( handle_t *handle,
1098 1098
1099 for ( bh = head, block_start = 0; 1099 for ( bh = head, block_start = 0;
1100 ret == 0 && (bh != head || !block_start); 1100 ret == 0 && (bh != head || !block_start);
1101 block_start = block_end, bh = next) 1101 block_start = block_end, bh = next)
1102 { 1102 {
1103 next = bh->b_this_page; 1103 next = bh->b_this_page;
1104 block_end = block_start + blocksize; 1104 block_end = block_start + blocksize;
@@ -1137,7 +1137,7 @@ static int walk_page_buffers( handle_t *handle,
1137 * So what we do is to rely on the fact that journal_stop/journal_start 1137 * So what we do is to rely on the fact that journal_stop/journal_start
1138 * will _not_ run commit under these circumstances because handle->h_ref 1138 * will _not_ run commit under these circumstances because handle->h_ref
1139 * is elevated. We'll still have enough credits for the tiny quotafile 1139 * is elevated. We'll still have enough credits for the tiny quotafile
1140 * write. 1140 * write.
1141 */ 1141 */
1142static int do_journal_get_write_access(handle_t *handle, 1142static int do_journal_get_write_access(handle_t *handle,
1143 struct buffer_head *bh) 1143 struct buffer_head *bh)
@@ -1282,7 +1282,7 @@ static int ext3_journalled_commit_write(struct file *file,
1282 if (inode->i_size > EXT3_I(inode)->i_disksize) { 1282 if (inode->i_size > EXT3_I(inode)->i_disksize) {
1283 EXT3_I(inode)->i_disksize = inode->i_size; 1283 EXT3_I(inode)->i_disksize = inode->i_size;
1284 ret2 = ext3_mark_inode_dirty(handle, inode); 1284 ret2 = ext3_mark_inode_dirty(handle, inode);
1285 if (!ret) 1285 if (!ret)
1286 ret = ret2; 1286 ret = ret2;
1287 } 1287 }
1288 ret2 = ext3_journal_stop(handle); 1288 ret2 = ext3_journal_stop(handle);
@@ -1291,7 +1291,7 @@ static int ext3_journalled_commit_write(struct file *file,
1291 return ret; 1291 return ret;
1292} 1292}
1293 1293
1294/* 1294/*
1295 * bmap() is special. It gets used by applications such as lilo and by 1295 * bmap() is special. It gets used by applications such as lilo and by
1296 * the swapper to find the on-disk block of a specific piece of data. 1296 * the swapper to find the on-disk block of a specific piece of data.
1297 * 1297 *
@@ -1300,10 +1300,10 @@ static int ext3_journalled_commit_write(struct file *file,
1300 * filesystem and enables swap, then they may get a nasty shock when the 1300 * filesystem and enables swap, then they may get a nasty shock when the
1301 * data getting swapped to that swapfile suddenly gets overwritten by 1301 * data getting swapped to that swapfile suddenly gets overwritten by
1302 * the original zero's written out previously to the journal and 1302 * the original zero's written out previously to the journal and
1303 * awaiting writeback in the kernel's buffer cache. 1303 * awaiting writeback in the kernel's buffer cache.
1304 * 1304 *
1305 * So, if we see any bmap calls here on a modified, data-journaled file, 1305 * So, if we see any bmap calls here on a modified, data-journaled file,
1306 * take extra steps to flush any blocks which might be in the cache. 1306 * take extra steps to flush any blocks which might be in the cache.
1307 */ 1307 */
1308static sector_t ext3_bmap(struct address_space *mapping, sector_t block) 1308static sector_t ext3_bmap(struct address_space *mapping, sector_t block)
1309{ 1309{
@@ -1312,16 +1312,16 @@ static sector_t ext3_bmap(struct address_space *mapping, sector_t block)
1312 int err; 1312 int err;
1313 1313
1314 if (EXT3_I(inode)->i_state & EXT3_STATE_JDATA) { 1314 if (EXT3_I(inode)->i_state & EXT3_STATE_JDATA) {
1315 /* 1315 /*
1316 * This is a REALLY heavyweight approach, but the use of 1316 * This is a REALLY heavyweight approach, but the use of
1317 * bmap on dirty files is expected to be extremely rare: 1317 * bmap on dirty files is expected to be extremely rare:
1318 * only if we run lilo or swapon on a freshly made file 1318 * only if we run lilo or swapon on a freshly made file
1319 * do we expect this to happen. 1319 * do we expect this to happen.
1320 * 1320 *
1321 * (bmap requires CAP_SYS_RAWIO so this does not 1321 * (bmap requires CAP_SYS_RAWIO so this does not
1322 * represent an unprivileged user DOS attack --- we'd be 1322 * represent an unprivileged user DOS attack --- we'd be
1323 * in trouble if mortal users could trigger this path at 1323 * in trouble if mortal users could trigger this path at
1324 * will.) 1324 * will.)
1325 * 1325 *
1326 * NB. EXT3_STATE_JDATA is not set on files other than 1326 * NB. EXT3_STATE_JDATA is not set on files other than
1327 * regular files. If somebody wants to bmap a directory 1327 * regular files. If somebody wants to bmap a directory
@@ -1457,7 +1457,7 @@ static int ext3_ordered_writepage(struct page *page,
1457 */ 1457 */
1458 1458
1459 /* 1459 /*
1460 * And attach them to the current transaction. But only if 1460 * And attach them to the current transaction. But only if
1461 * block_write_full_page() succeeded. Otherwise they are unmapped, 1461 * block_write_full_page() succeeded. Otherwise they are unmapped,
1462 * and generally junk. 1462 * and generally junk.
1463 */ 1463 */
@@ -1644,7 +1644,7 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
1644 } 1644 }
1645 } 1645 }
1646 1646
1647 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 1647 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
1648 offset, nr_segs, 1648 offset, nr_segs,
1649 ext3_get_block, NULL); 1649 ext3_get_block, NULL);
1650 1650
@@ -2025,7 +2025,7 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,
2025 __le32 *first, __le32 *last) 2025 __le32 *first, __le32 *last)
2026{ 2026{
2027 ext3_fsblk_t block_to_free = 0; /* Starting block # of a run */ 2027 ext3_fsblk_t block_to_free = 0; /* Starting block # of a run */
2028 unsigned long count = 0; /* Number of blocks in the run */ 2028 unsigned long count = 0; /* Number of blocks in the run */
2029 __le32 *block_to_free_p = NULL; /* Pointer into inode/ind 2029 __le32 *block_to_free_p = NULL; /* Pointer into inode/ind
2030 corresponding to 2030 corresponding to
2031 block_to_free */ 2031 block_to_free */
@@ -2054,7 +2054,7 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,
2054 } else if (nr == block_to_free + count) { 2054 } else if (nr == block_to_free + count) {
2055 count++; 2055 count++;
2056 } else { 2056 } else {
2057 ext3_clear_blocks(handle, inode, this_bh, 2057 ext3_clear_blocks(handle, inode, this_bh,
2058 block_to_free, 2058 block_to_free,
2059 count, block_to_free_p, p); 2059 count, block_to_free_p, p);
2060 block_to_free = nr; 2060 block_to_free = nr;
@@ -2115,7 +2115,7 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
2115 */ 2115 */
2116 if (!bh) { 2116 if (!bh) {
2117 ext3_error(inode->i_sb, "ext3_free_branches", 2117 ext3_error(inode->i_sb, "ext3_free_branches",
2118 "Read failure, inode=%ld, block="E3FSBLK, 2118 "Read failure, inode=%lu, block="E3FSBLK,
2119 inode->i_ino, nr); 2119 inode->i_ino, nr);
2120 continue; 2120 continue;
2121 } 2121 }
@@ -2184,7 +2184,7 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
2184 *p = 0; 2184 *p = 0;
2185 BUFFER_TRACE(parent_bh, 2185 BUFFER_TRACE(parent_bh,
2186 "call ext3_journal_dirty_metadata"); 2186 "call ext3_journal_dirty_metadata");
2187 ext3_journal_dirty_metadata(handle, 2187 ext3_journal_dirty_metadata(handle,
2188 parent_bh); 2188 parent_bh);
2189 } 2189 }
2190 } 2190 }
@@ -2632,9 +2632,6 @@ void ext3_read_inode(struct inode * inode)
2632 * recovery code: that's fine, we're about to complete 2632 * recovery code: that's fine, we're about to complete
2633 * the process of deleting those. */ 2633 * the process of deleting those. */
2634 } 2634 }
2635 inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size
2636 * (for stat), not the fs block
2637 * size */
2638 inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); 2635 inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
2639 ei->i_flags = le32_to_cpu(raw_inode->i_flags); 2636 ei->i_flags = le32_to_cpu(raw_inode->i_flags);
2640#ifdef EXT3_FRAGMENTS 2637#ifdef EXT3_FRAGMENTS
@@ -2704,7 +2701,7 @@ void ext3_read_inode(struct inode * inode)
2704 if (raw_inode->i_block[0]) 2701 if (raw_inode->i_block[0])
2705 init_special_inode(inode, inode->i_mode, 2702 init_special_inode(inode, inode->i_mode,
2706 old_decode_dev(le32_to_cpu(raw_inode->i_block[0]))); 2703 old_decode_dev(le32_to_cpu(raw_inode->i_block[0])));
2707 else 2704 else
2708 init_special_inode(inode, inode->i_mode, 2705 init_special_inode(inode, inode->i_mode,
2709 new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); 2706 new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
2710 } 2707 }
@@ -2724,8 +2721,8 @@ bad_inode:
2724 * 2721 *
2725 * The caller must have write access to iloc->bh. 2722 * The caller must have write access to iloc->bh.
2726 */ 2723 */
2727static int ext3_do_update_inode(handle_t *handle, 2724static int ext3_do_update_inode(handle_t *handle,
2728 struct inode *inode, 2725 struct inode *inode,
2729 struct ext3_iloc *iloc) 2726 struct ext3_iloc *iloc)
2730{ 2727{
2731 struct ext3_inode *raw_inode = ext3_raw_inode(iloc); 2728 struct ext3_inode *raw_inode = ext3_raw_inode(iloc);
@@ -2900,7 +2897,7 @@ int ext3_write_inode(struct inode *inode, int wait)
2900 * commit will leave the blocks being flushed in an unused state on 2897 * commit will leave the blocks being flushed in an unused state on
2901 * disk. (On recovery, the inode will get truncated and the blocks will 2898 * disk. (On recovery, the inode will get truncated and the blocks will
2902 * be freed, so we have a strong guarantee that no future commit will 2899 * be freed, so we have a strong guarantee that no future commit will
2903 * leave these blocks visible to the user.) 2900 * leave these blocks visible to the user.)
2904 * 2901 *
2905 * Called with inode->sem down. 2902 * Called with inode->sem down.
2906 */ 2903 */
@@ -3043,13 +3040,13 @@ int ext3_mark_iloc_dirty(handle_t *handle,
3043 return err; 3040 return err;
3044} 3041}
3045 3042
3046/* 3043/*
3047 * On success, We end up with an outstanding reference count against 3044 * On success, We end up with an outstanding reference count against
3048 * iloc->bh. This _must_ be cleaned up later. 3045 * iloc->bh. This _must_ be cleaned up later.
3049 */ 3046 */
3050 3047
3051int 3048int
3052ext3_reserve_inode_write(handle_t *handle, struct inode *inode, 3049ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
3053 struct ext3_iloc *iloc) 3050 struct ext3_iloc *iloc)
3054{ 3051{
3055 int err = 0; 3052 int err = 0;
@@ -3139,7 +3136,7 @@ out:
3139} 3136}
3140 3137
3141#if 0 3138#if 0
3142/* 3139/*
3143 * Bind an inode's backing buffer_head into this transaction, to prevent 3140 * Bind an inode's backing buffer_head into this transaction, to prevent
3144 * it from being flushed to disk early. Unlike 3141 * it from being flushed to disk early. Unlike
3145 * ext3_reserve_inode_write, this leaves behind no bh reference and 3142 * ext3_reserve_inode_write, this leaves behind no bh reference and
@@ -3157,7 +3154,7 @@ static int ext3_pin_inode(handle_t *handle, struct inode *inode)
3157 BUFFER_TRACE(iloc.bh, "get_write_access"); 3154 BUFFER_TRACE(iloc.bh, "get_write_access");
3158 err = journal_get_write_access(handle, iloc.bh); 3155 err = journal_get_write_access(handle, iloc.bh);
3159 if (!err) 3156 if (!err)
3160 err = ext3_journal_dirty_metadata(handle, 3157 err = ext3_journal_dirty_metadata(handle,
3161 iloc.bh); 3158 iloc.bh);
3162 brelse(iloc.bh); 3159 brelse(iloc.bh);
3163 } 3160 }
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 2aa7101b27cd..85d132c37ee0 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -15,13 +15,13 @@
15 * Big-endian to little-endian byte-swapping/bitmaps by 15 * Big-endian to little-endian byte-swapping/bitmaps by
16 * David S. Miller (davem@caip.rutgers.edu), 1995 16 * David S. Miller (davem@caip.rutgers.edu), 1995
17 * Directory entry file type support and forward compatibility hooks 17 * Directory entry file type support and forward compatibility hooks
18 * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998 18 * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
19 * Hash Tree Directory indexing (c) 19 * Hash Tree Directory indexing (c)
20 * Daniel Phillips, 2001 20 * Daniel Phillips, 2001
21 * Hash Tree Directory indexing porting 21 * Hash Tree Directory indexing porting
22 * Christopher Li, 2002 22 * Christopher Li, 2002
23 * Hash Tree Directory indexing cleanup 23 * Hash Tree Directory indexing cleanup
24 * Theodore Ts'o, 2002 24 * Theodore Ts'o, 2002
25 */ 25 */
26 26
27#include <linux/fs.h> 27#include <linux/fs.h>
@@ -76,7 +76,7 @@ static struct buffer_head *ext3_append(handle_t *handle,
76#ifdef DX_DEBUG 76#ifdef DX_DEBUG
77#define dxtrace(command) command 77#define dxtrace(command) command
78#else 78#else
79#define dxtrace(command) 79#define dxtrace(command)
80#endif 80#endif
81 81
82struct fake_dirent 82struct fake_dirent
@@ -169,7 +169,7 @@ static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size);
169static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block); 169static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block);
170static int ext3_htree_next_block(struct inode *dir, __u32 hash, 170static int ext3_htree_next_block(struct inode *dir, __u32 hash,
171 struct dx_frame *frame, 171 struct dx_frame *frame,
172 struct dx_frame *frames, 172 struct dx_frame *frames,
173 __u32 *start_hash); 173 __u32 *start_hash);
174static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, 174static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
175 struct ext3_dir_entry_2 **res_dir, int *err); 175 struct ext3_dir_entry_2 **res_dir, int *err);
@@ -250,7 +250,7 @@ static void dx_show_index (char * label, struct dx_entry *entries)
250} 250}
251 251
252struct stats 252struct stats
253{ 253{
254 unsigned names; 254 unsigned names;
255 unsigned space; 255 unsigned space;
256 unsigned bcount; 256 unsigned bcount;
@@ -278,7 +278,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent
278 ((char *) de - base)); 278 ((char *) de - base));
279 } 279 }
280 space += EXT3_DIR_REC_LEN(de->name_len); 280 space += EXT3_DIR_REC_LEN(de->name_len);
281 names++; 281 names++;
282 } 282 }
283 de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); 283 de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
284 } 284 }
@@ -464,7 +464,7 @@ static void dx_release (struct dx_frame *frames)
464 */ 464 */
465static int ext3_htree_next_block(struct inode *dir, __u32 hash, 465static int ext3_htree_next_block(struct inode *dir, __u32 hash,
466 struct dx_frame *frame, 466 struct dx_frame *frame,
467 struct dx_frame *frames, 467 struct dx_frame *frames,
468 __u32 *start_hash) 468 __u32 *start_hash)
469{ 469{
470 struct dx_frame *p; 470 struct dx_frame *p;
@@ -632,7 +632,7 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
632 } 632 }
633 count += ret; 633 count += ret;
634 hashval = ~0; 634 hashval = ~0;
635 ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS, 635 ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS,
636 frame, frames, &hashval); 636 frame, frames, &hashval);
637 *next_hash = hashval; 637 *next_hash = hashval;
638 if (ret < 0) { 638 if (ret < 0) {
@@ -649,7 +649,7 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
649 break; 649 break;
650 } 650 }
651 dx_release(frames); 651 dx_release(frames);
652 dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n", 652 dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n",
653 count, *next_hash)); 653 count, *next_hash));
654 return count; 654 return count;
655errout: 655errout:
@@ -1050,7 +1050,7 @@ struct dentry *ext3_get_parent(struct dentry *child)
1050 parent = ERR_PTR(-ENOMEM); 1050 parent = ERR_PTR(-ENOMEM);
1051 } 1051 }
1052 return parent; 1052 return parent;
1053} 1053}
1054 1054
1055#define S_SHIFT 12 1055#define S_SHIFT 12
1056static unsigned char ext3_type_by_mode[S_IFMT >> S_SHIFT] = { 1056static unsigned char ext3_type_by_mode[S_IFMT >> S_SHIFT] = {
@@ -1198,7 +1198,7 @@ errout:
1198 * add_dirent_to_buf will attempt search the directory block for 1198 * add_dirent_to_buf will attempt search the directory block for
1199 * space. It will return -ENOSPC if no space is available, and -EIO 1199 * space. It will return -ENOSPC if no space is available, and -EIO
1200 * and -EEXIST if directory entry already exists. 1200 * and -EEXIST if directory entry already exists.
1201 * 1201 *
1202 * NOTE! bh is NOT released in the case where ENOSPC is returned. In 1202 * NOTE! bh is NOT released in the case where ENOSPC is returned. In
1203 * all other cases bh is released. 1203 * all other cases bh is released.
1204 */ 1204 */
@@ -1572,7 +1572,7 @@ cleanup:
1572 * ext3_delete_entry deletes a directory entry by merging it with the 1572 * ext3_delete_entry deletes a directory entry by merging it with the
1573 * previous entry 1573 * previous entry
1574 */ 1574 */
1575static int ext3_delete_entry (handle_t *handle, 1575static int ext3_delete_entry (handle_t *handle,
1576 struct inode * dir, 1576 struct inode * dir,
1577 struct ext3_dir_entry_2 * de_del, 1577 struct ext3_dir_entry_2 * de_del,
1578 struct buffer_head * bh) 1578 struct buffer_head * bh)
@@ -1643,12 +1643,12 @@ static int ext3_add_nondir(handle_t *handle,
1643 * is so far negative - it has no inode. 1643 * is so far negative - it has no inode.
1644 * 1644 *
1645 * If the create succeeds, we fill in the inode information 1645 * If the create succeeds, we fill in the inode information
1646 * with d_instantiate(). 1646 * with d_instantiate().
1647 */ 1647 */
1648static int ext3_create (struct inode * dir, struct dentry * dentry, int mode, 1648static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
1649 struct nameidata *nd) 1649 struct nameidata *nd)
1650{ 1650{
1651 handle_t *handle; 1651 handle_t *handle;
1652 struct inode * inode; 1652 struct inode * inode;
1653 int err, retries = 0; 1653 int err, retries = 0;
1654 1654
@@ -1688,7 +1688,7 @@ static int ext3_mknod (struct inode * dir, struct dentry *dentry,
1688 1688
1689retry: 1689retry:
1690 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 1690 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
1691 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1691 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1692 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb)); 1692 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
1693 if (IS_ERR(handle)) 1693 if (IS_ERR(handle))
1694 return PTR_ERR(handle); 1694 return PTR_ERR(handle);
@@ -1813,10 +1813,10 @@ static int empty_dir (struct inode * inode)
1813 de1 = (struct ext3_dir_entry_2 *) 1813 de1 = (struct ext3_dir_entry_2 *)
1814 ((char *) de + le16_to_cpu(de->rec_len)); 1814 ((char *) de + le16_to_cpu(de->rec_len));
1815 if (le32_to_cpu(de->inode) != inode->i_ino || 1815 if (le32_to_cpu(de->inode) != inode->i_ino ||
1816 !le32_to_cpu(de1->inode) || 1816 !le32_to_cpu(de1->inode) ||
1817 strcmp (".", de->name) || 1817 strcmp (".", de->name) ||
1818 strcmp ("..", de1->name)) { 1818 strcmp ("..", de1->name)) {
1819 ext3_warning (inode->i_sb, "empty_dir", 1819 ext3_warning (inode->i_sb, "empty_dir",
1820 "bad directory (dir #%lu) - no `.' or `..'", 1820 "bad directory (dir #%lu) - no `.' or `..'",
1821 inode->i_ino); 1821 inode->i_ino);
1822 brelse (bh); 1822 brelse (bh);
@@ -1883,7 +1883,7 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode)
1883 * being truncated, or files being unlinked. */ 1883 * being truncated, or files being unlinked. */
1884 1884
1885 /* @@@ FIXME: Observation from aviro: 1885 /* @@@ FIXME: Observation from aviro:
1886 * I think I can trigger J_ASSERT in ext3_orphan_add(). We block 1886 * I think I can trigger J_ASSERT in ext3_orphan_add(). We block
1887 * here (on lock_super()), so race with ext3_link() which might bump 1887 * here (on lock_super()), so race with ext3_link() which might bump
1888 * ->i_nlink. For, say it, character device. Not a regular file, 1888 * ->i_nlink. For, say it, character device. Not a regular file,
1889 * not a directory, not a symlink and ->i_nlink > 0. 1889 * not a directory, not a symlink and ->i_nlink > 0.
@@ -1919,8 +1919,8 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode)
1919 if (!err) 1919 if (!err)
1920 list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan); 1920 list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan);
1921 1921
1922 jbd_debug(4, "superblock will point to %ld\n", inode->i_ino); 1922 jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);
1923 jbd_debug(4, "orphan inode %ld will point to %d\n", 1923 jbd_debug(4, "orphan inode %lu will point to %d\n",
1924 inode->i_ino, NEXT_ORPHAN(inode)); 1924 inode->i_ino, NEXT_ORPHAN(inode));
1925out_unlock: 1925out_unlock:
1926 unlock_super(sb); 1926 unlock_super(sb);
@@ -2129,7 +2129,7 @@ static int ext3_symlink (struct inode * dir,
2129 2129
2130retry: 2130retry:
2131 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 2131 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
2132 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 + 2132 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
2133 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb)); 2133 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
2134 if (IS_ERR(handle)) 2134 if (IS_ERR(handle))
2135 return PTR_ERR(handle); 2135 return PTR_ERR(handle);
@@ -2227,7 +2227,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
2227 DQUOT_INIT(new_dentry->d_inode); 2227 DQUOT_INIT(new_dentry->d_inode);
2228 handle = ext3_journal_start(old_dir, 2 * 2228 handle = ext3_journal_start(old_dir, 2 *
2229 EXT3_DATA_TRANS_BLOCKS(old_dir->i_sb) + 2229 EXT3_DATA_TRANS_BLOCKS(old_dir->i_sb) +
2230 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2); 2230 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2);
2231 if (IS_ERR(handle)) 2231 if (IS_ERR(handle))
2232 return PTR_ERR(handle); 2232 return PTR_ERR(handle);
2233 2233
@@ -2393,4 +2393,4 @@ struct inode_operations ext3_special_inode_operations = {
2393 .removexattr = generic_removexattr, 2393 .removexattr = generic_removexattr,
2394#endif 2394#endif
2395 .permission = ext3_permission, 2395 .permission = ext3_permission,
2396}; 2396};
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 5e1337fd878a..b73cba12f79c 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -336,7 +336,7 @@ static int verify_reserved_gdb(struct super_block *sb,
336 unsigned five = 5; 336 unsigned five = 5;
337 unsigned seven = 7; 337 unsigned seven = 7;
338 unsigned grp; 338 unsigned grp;
339 __u32 *p = (__u32 *)primary->b_data; 339 __le32 *p = (__le32 *)primary->b_data;
340 int gdbackups = 0; 340 int gdbackups = 0;
341 341
342 while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) { 342 while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) {
@@ -380,7 +380,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
380 struct buffer_head *dind; 380 struct buffer_head *dind;
381 int gdbackups; 381 int gdbackups;
382 struct ext3_iloc iloc; 382 struct ext3_iloc iloc;
383 __u32 *data; 383 __le32 *data;
384 int err; 384 int err;
385 385
386 if (test_opt(sb, DEBUG)) 386 if (test_opt(sb, DEBUG))
@@ -417,7 +417,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
417 goto exit_bh; 417 goto exit_bh;
418 } 418 }
419 419
420 data = (__u32 *)dind->b_data; 420 data = (__le32 *)dind->b_data;
421 if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) { 421 if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) {
422 ext3_warning(sb, __FUNCTION__, 422 ext3_warning(sb, __FUNCTION__,
423 "new group %u GDT block "E3FSBLK" not reserved", 423 "new group %u GDT block "E3FSBLK" not reserved",
@@ -439,8 +439,8 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
439 if ((err = ext3_reserve_inode_write(handle, inode, &iloc))) 439 if ((err = ext3_reserve_inode_write(handle, inode, &iloc)))
440 goto exit_dindj; 440 goto exit_dindj;
441 441
442 n_group_desc = (struct buffer_head **)kmalloc((gdb_num + 1) * 442 n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
443 sizeof(struct buffer_head *), GFP_KERNEL); 443 GFP_KERNEL);
444 if (!n_group_desc) { 444 if (!n_group_desc) {
445 err = -ENOMEM; 445 err = -ENOMEM;
446 ext3_warning (sb, __FUNCTION__, 446 ext3_warning (sb, __FUNCTION__,
@@ -519,7 +519,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
519 struct buffer_head *dind; 519 struct buffer_head *dind;
520 struct ext3_iloc iloc; 520 struct ext3_iloc iloc;
521 ext3_fsblk_t blk; 521 ext3_fsblk_t blk;
522 __u32 *data, *end; 522 __le32 *data, *end;
523 int gdbackups = 0; 523 int gdbackups = 0;
524 int res, i; 524 int res, i;
525 int err; 525 int err;
@@ -536,8 +536,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
536 } 536 }
537 537
538 blk = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + EXT3_SB(sb)->s_gdb_count; 538 blk = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + EXT3_SB(sb)->s_gdb_count;
539 data = (__u32 *)dind->b_data + EXT3_SB(sb)->s_gdb_count; 539 data = (__le32 *)dind->b_data + EXT3_SB(sb)->s_gdb_count;
540 end = (__u32 *)dind->b_data + EXT3_ADDR_PER_BLOCK(sb); 540 end = (__le32 *)dind->b_data + EXT3_ADDR_PER_BLOCK(sb);
541 541
542 /* Get each reserved primary GDT block and verify it holds backups */ 542 /* Get each reserved primary GDT block and verify it holds backups */
543 for (res = 0; res < reserved_gdb; res++, blk++) { 543 for (res = 0; res < reserved_gdb; res++, blk++) {
@@ -545,7 +545,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
545 ext3_warning(sb, __FUNCTION__, 545 ext3_warning(sb, __FUNCTION__,
546 "reserved block "E3FSBLK 546 "reserved block "E3FSBLK
547 " not at offset %ld", 547 " not at offset %ld",
548 blk, (long)(data - (__u32 *)dind->b_data)); 548 blk,
549 (long)(data - (__le32 *)dind->b_data));
549 err = -EINVAL; 550 err = -EINVAL;
550 goto exit_bh; 551 goto exit_bh;
551 } 552 }
@@ -560,7 +561,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
560 goto exit_bh; 561 goto exit_bh;
561 } 562 }
562 if (++data >= end) 563 if (++data >= end)
563 data = (__u32 *)dind->b_data; 564 data = (__le32 *)dind->b_data;
564 } 565 }
565 566
566 for (i = 0; i < reserved_gdb; i++) { 567 for (i = 0; i < reserved_gdb; i++) {
@@ -584,7 +585,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
584 blk = input->group * EXT3_BLOCKS_PER_GROUP(sb); 585 blk = input->group * EXT3_BLOCKS_PER_GROUP(sb);
585 for (i = 0; i < reserved_gdb; i++) { 586 for (i = 0; i < reserved_gdb; i++) {
586 int err2; 587 int err2;
587 data = (__u32 *)primary[i]->b_data; 588 data = (__le32 *)primary[i]->b_data;
588 /* printk("reserving backup %lu[%u] = %lu\n", 589 /* printk("reserving backup %lu[%u] = %lu\n",
589 primary[i]->b_blocknr, gdbackups, 590 primary[i]->b_blocknr, gdbackups,
590 blk + primary[i]->b_blocknr); */ 591 blk + primary[i]->b_blocknr); */
@@ -689,7 +690,7 @@ exit_err:
689 "can't update backup for group %d (err %d), " 690 "can't update backup for group %d (err %d), "
690 "forcing fsck on next reboot", group, err); 691 "forcing fsck on next reboot", group, err);
691 sbi->s_mount_state &= ~EXT3_VALID_FS; 692 sbi->s_mount_state &= ~EXT3_VALID_FS;
692 sbi->s_es->s_state &= ~cpu_to_le16(EXT3_VALID_FS); 693 sbi->s_es->s_state &= cpu_to_le16(~EXT3_VALID_FS);
693 mark_buffer_dirty(sbi->s_sbh); 694 mark_buffer_dirty(sbi->s_sbh);
694 } 695 }
695} 696}
@@ -730,6 +731,18 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
730 return -EPERM; 731 return -EPERM;
731 } 732 }
732 733
734 if (le32_to_cpu(es->s_blocks_count) + input->blocks_count <
735 le32_to_cpu(es->s_blocks_count)) {
736 ext3_warning(sb, __FUNCTION__, "blocks_count overflow\n");
737 return -EINVAL;
738 }
739
740 if (le32_to_cpu(es->s_inodes_count) + EXT3_INODES_PER_GROUP(sb) <
741 le32_to_cpu(es->s_inodes_count)) {
742 ext3_warning(sb, __FUNCTION__, "inodes_count overflow\n");
743 return -EINVAL;
744 }
745
733 if (reserved_gdb || gdb_off == 0) { 746 if (reserved_gdb || gdb_off == 0) {
734 if (!EXT3_HAS_COMPAT_FEATURE(sb, 747 if (!EXT3_HAS_COMPAT_FEATURE(sb,
735 EXT3_FEATURE_COMPAT_RESIZE_INODE)){ 748 EXT3_FEATURE_COMPAT_RESIZE_INODE)){
@@ -958,6 +971,11 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
958 971
959 add = EXT3_BLOCKS_PER_GROUP(sb) - last; 972 add = EXT3_BLOCKS_PER_GROUP(sb) - last;
960 973
974 if (o_blocks_count + add < o_blocks_count) {
975 ext3_warning(sb, __FUNCTION__, "blocks_count overflow");
976 return -EINVAL;
977 }
978
961 if (o_blocks_count + add > n_blocks_count) 979 if (o_blocks_count + add > n_blocks_count)
962 add = n_blocks_count - o_blocks_count; 980 add = n_blocks_count - o_blocks_count;
963 981
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 3559086eee5f..8bfd56ef18ca 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -45,7 +45,7 @@
45static int ext3_load_journal(struct super_block *, struct ext3_super_block *, 45static int ext3_load_journal(struct super_block *, struct ext3_super_block *,
46 unsigned long journal_devnum); 46 unsigned long journal_devnum);
47static int ext3_create_journal(struct super_block *, struct ext3_super_block *, 47static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
48 int); 48 unsigned int);
49static void ext3_commit_super (struct super_block * sb, 49static void ext3_commit_super (struct super_block * sb,
50 struct ext3_super_block * es, 50 struct ext3_super_block * es,
51 int sync); 51 int sync);
@@ -62,13 +62,13 @@ static void ext3_unlockfs(struct super_block *sb);
62static void ext3_write_super (struct super_block * sb); 62static void ext3_write_super (struct super_block * sb);
63static void ext3_write_super_lockfs(struct super_block *sb); 63static void ext3_write_super_lockfs(struct super_block *sb);
64 64
65/* 65/*
66 * Wrappers for journal_start/end. 66 * Wrappers for journal_start/end.
67 * 67 *
68 * The only special thing we need to do here is to make sure that all 68 * The only special thing we need to do here is to make sure that all
69 * journal_end calls result in the superblock being marked dirty, so 69 * journal_end calls result in the superblock being marked dirty, so
70 * that sync() will call the filesystem's write_super callback if 70 * that sync() will call the filesystem's write_super callback if
71 * appropriate. 71 * appropriate.
72 */ 72 */
73handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks) 73handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
74{ 74{
@@ -90,11 +90,11 @@ handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
90 return journal_start(journal, nblocks); 90 return journal_start(journal, nblocks);
91} 91}
92 92
93/* 93/*
94 * The only special thing we need to do here is to make sure that all 94 * The only special thing we need to do here is to make sure that all
95 * journal_stop calls result in the superblock being marked dirty, so 95 * journal_stop calls result in the superblock being marked dirty, so
96 * that sync() will call the filesystem's write_super callback if 96 * that sync() will call the filesystem's write_super callback if
97 * appropriate. 97 * appropriate.
98 */ 98 */
99int __ext3_journal_stop(const char *where, handle_t *handle) 99int __ext3_journal_stop(const char *where, handle_t *handle)
100{ 100{
@@ -159,20 +159,21 @@ static void ext3_handle_error(struct super_block *sb)
159 if (sb->s_flags & MS_RDONLY) 159 if (sb->s_flags & MS_RDONLY)
160 return; 160 return;
161 161
162 if (test_opt (sb, ERRORS_RO)) { 162 if (!test_opt (sb, ERRORS_CONT)) {
163 printk (KERN_CRIT "Remounting filesystem read-only\n");
164 sb->s_flags |= MS_RDONLY;
165 } else {
166 journal_t *journal = EXT3_SB(sb)->s_journal; 163 journal_t *journal = EXT3_SB(sb)->s_journal;
167 164
168 EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT; 165 EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
169 if (journal) 166 if (journal)
170 journal_abort(journal, -EIO); 167 journal_abort(journal, -EIO);
171 } 168 }
169 if (test_opt (sb, ERRORS_RO)) {
170 printk (KERN_CRIT "Remounting filesystem read-only\n");
171 sb->s_flags |= MS_RDONLY;
172 }
173 ext3_commit_super(sb, es, 1);
172 if (test_opt(sb, ERRORS_PANIC)) 174 if (test_opt(sb, ERRORS_PANIC))
173 panic("EXT3-fs (device %s): panic forced after error\n", 175 panic("EXT3-fs (device %s): panic forced after error\n",
174 sb->s_id); 176 sb->s_id);
175 ext3_commit_super(sb, es, 1);
176} 177}
177 178
178void ext3_error (struct super_block * sb, const char * function, 179void ext3_error (struct super_block * sb, const char * function,
@@ -369,16 +370,16 @@ static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi)
369{ 370{
370 struct list_head *l; 371 struct list_head *l;
371 372
372 printk(KERN_ERR "sb orphan head is %d\n", 373 printk(KERN_ERR "sb orphan head is %d\n",
373 le32_to_cpu(sbi->s_es->s_last_orphan)); 374 le32_to_cpu(sbi->s_es->s_last_orphan));
374 375
375 printk(KERN_ERR "sb_info orphan list:\n"); 376 printk(KERN_ERR "sb_info orphan list:\n");
376 list_for_each(l, &sbi->s_orphan) { 377 list_for_each(l, &sbi->s_orphan) {
377 struct inode *inode = orphan_list_entry(l); 378 struct inode *inode = orphan_list_entry(l);
378 printk(KERN_ERR " " 379 printk(KERN_ERR " "
379 "inode %s:%ld at %p: mode %o, nlink %d, next %d\n", 380 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
380 inode->i_sb->s_id, inode->i_ino, inode, 381 inode->i_sb->s_id, inode->i_ino, inode,
381 inode->i_mode, inode->i_nlink, 382 inode->i_mode, inode->i_nlink,
382 NEXT_ORPHAN(inode)); 383 NEXT_ORPHAN(inode));
383 } 384 }
384} 385}
@@ -475,7 +476,7 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
475 inode_init_once(&ei->vfs_inode); 476 inode_init_once(&ei->vfs_inode);
476 } 477 }
477} 478}
478 479
479static int init_inodecache(void) 480static int init_inodecache(void)
480{ 481{
481 ext3_inode_cachep = kmem_cache_create("ext3_inode_cache", 482 ext3_inode_cachep = kmem_cache_create("ext3_inode_cache",
@@ -490,8 +491,7 @@ static int init_inodecache(void)
490 491
491static void destroy_inodecache(void) 492static void destroy_inodecache(void)
492{ 493{
493 if (kmem_cache_destroy(ext3_inode_cachep)) 494 kmem_cache_destroy(ext3_inode_cachep);
494 printk(KERN_INFO "ext3_inode_cache: not all structures were freed\n");
495} 495}
496 496
497static void ext3_clear_inode(struct inode *inode) 497static void ext3_clear_inode(struct inode *inode)
@@ -733,8 +733,8 @@ static match_table_t tokens = {
733 733
734static ext3_fsblk_t get_sb_block(void **data) 734static ext3_fsblk_t get_sb_block(void **data)
735{ 735{
736 ext3_fsblk_t sb_block; 736 ext3_fsblk_t sb_block;
737 char *options = (char *) *data; 737 char *options = (char *) *data;
738 738
739 if (!options || strncmp(options, "sb=", 3) != 0) 739 if (!options || strncmp(options, "sb=", 3) != 0)
740 return 1; /* Default location */ 740 return 1; /* Default location */
@@ -753,7 +753,7 @@ static ext3_fsblk_t get_sb_block(void **data)
753} 753}
754 754
755static int parse_options (char *options, struct super_block *sb, 755static int parse_options (char *options, struct super_block *sb,
756 unsigned long *inum, unsigned long *journal_devnum, 756 unsigned int *inum, unsigned long *journal_devnum,
757 ext3_fsblk_t *n_blocks_count, int is_remount) 757 ext3_fsblk_t *n_blocks_count, int is_remount)
758{ 758{
759 struct ext3_sb_info *sbi = EXT3_SB(sb); 759 struct ext3_sb_info *sbi = EXT3_SB(sb);
@@ -1174,7 +1174,8 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
1174static int ext3_check_descriptors (struct super_block * sb) 1174static int ext3_check_descriptors (struct super_block * sb)
1175{ 1175{
1176 struct ext3_sb_info *sbi = EXT3_SB(sb); 1176 struct ext3_sb_info *sbi = EXT3_SB(sb);
1177 ext3_fsblk_t block = le32_to_cpu(sbi->s_es->s_first_data_block); 1177 ext3_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
1178 ext3_fsblk_t last_block;
1178 struct ext3_group_desc * gdp = NULL; 1179 struct ext3_group_desc * gdp = NULL;
1179 int desc_block = 0; 1180 int desc_block = 0;
1180 int i; 1181 int i;
@@ -1183,12 +1184,17 @@ static int ext3_check_descriptors (struct super_block * sb)
1183 1184
1184 for (i = 0; i < sbi->s_groups_count; i++) 1185 for (i = 0; i < sbi->s_groups_count; i++)
1185 { 1186 {
1187 if (i == sbi->s_groups_count - 1)
1188 last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1;
1189 else
1190 last_block = first_block +
1191 (EXT3_BLOCKS_PER_GROUP(sb) - 1);
1192
1186 if ((i % EXT3_DESC_PER_BLOCK(sb)) == 0) 1193 if ((i % EXT3_DESC_PER_BLOCK(sb)) == 0)
1187 gdp = (struct ext3_group_desc *) 1194 gdp = (struct ext3_group_desc *)
1188 sbi->s_group_desc[desc_block++]->b_data; 1195 sbi->s_group_desc[desc_block++]->b_data;
1189 if (le32_to_cpu(gdp->bg_block_bitmap) < block || 1196 if (le32_to_cpu(gdp->bg_block_bitmap) < first_block ||
1190 le32_to_cpu(gdp->bg_block_bitmap) >= 1197 le32_to_cpu(gdp->bg_block_bitmap) > last_block)
1191 block + EXT3_BLOCKS_PER_GROUP(sb))
1192 { 1198 {
1193 ext3_error (sb, "ext3_check_descriptors", 1199 ext3_error (sb, "ext3_check_descriptors",
1194 "Block bitmap for group %d" 1200 "Block bitmap for group %d"
@@ -1197,9 +1203,8 @@ static int ext3_check_descriptors (struct super_block * sb)
1197 le32_to_cpu(gdp->bg_block_bitmap)); 1203 le32_to_cpu(gdp->bg_block_bitmap));
1198 return 0; 1204 return 0;
1199 } 1205 }
1200 if (le32_to_cpu(gdp->bg_inode_bitmap) < block || 1206 if (le32_to_cpu(gdp->bg_inode_bitmap) < first_block ||
1201 le32_to_cpu(gdp->bg_inode_bitmap) >= 1207 le32_to_cpu(gdp->bg_inode_bitmap) > last_block)
1202 block + EXT3_BLOCKS_PER_GROUP(sb))
1203 { 1208 {
1204 ext3_error (sb, "ext3_check_descriptors", 1209 ext3_error (sb, "ext3_check_descriptors",
1205 "Inode bitmap for group %d" 1210 "Inode bitmap for group %d"
@@ -1208,9 +1213,9 @@ static int ext3_check_descriptors (struct super_block * sb)
1208 le32_to_cpu(gdp->bg_inode_bitmap)); 1213 le32_to_cpu(gdp->bg_inode_bitmap));
1209 return 0; 1214 return 0;
1210 } 1215 }
1211 if (le32_to_cpu(gdp->bg_inode_table) < block || 1216 if (le32_to_cpu(gdp->bg_inode_table) < first_block ||
1212 le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group >= 1217 le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group >
1213 block + EXT3_BLOCKS_PER_GROUP(sb)) 1218 last_block)
1214 { 1219 {
1215 ext3_error (sb, "ext3_check_descriptors", 1220 ext3_error (sb, "ext3_check_descriptors",
1216 "Inode table for group %d" 1221 "Inode table for group %d"
@@ -1219,7 +1224,7 @@ static int ext3_check_descriptors (struct super_block * sb)
1219 le32_to_cpu(gdp->bg_inode_table)); 1224 le32_to_cpu(gdp->bg_inode_table));
1220 return 0; 1225 return 0;
1221 } 1226 }
1222 block += EXT3_BLOCKS_PER_GROUP(sb); 1227 first_block += EXT3_BLOCKS_PER_GROUP(sb);
1223 gdp++; 1228 gdp++;
1224 } 1229 }
1225 1230
@@ -1301,17 +1306,17 @@ static void ext3_orphan_cleanup (struct super_block * sb,
1301 DQUOT_INIT(inode); 1306 DQUOT_INIT(inode);
1302 if (inode->i_nlink) { 1307 if (inode->i_nlink) {
1303 printk(KERN_DEBUG 1308 printk(KERN_DEBUG
1304 "%s: truncating inode %ld to %Ld bytes\n", 1309 "%s: truncating inode %lu to %Ld bytes\n",
1305 __FUNCTION__, inode->i_ino, inode->i_size); 1310 __FUNCTION__, inode->i_ino, inode->i_size);
1306 jbd_debug(2, "truncating inode %ld to %Ld bytes\n", 1311 jbd_debug(2, "truncating inode %lu to %Ld bytes\n",
1307 inode->i_ino, inode->i_size); 1312 inode->i_ino, inode->i_size);
1308 ext3_truncate(inode); 1313 ext3_truncate(inode);
1309 nr_truncates++; 1314 nr_truncates++;
1310 } else { 1315 } else {
1311 printk(KERN_DEBUG 1316 printk(KERN_DEBUG
1312 "%s: deleting unreferenced inode %ld\n", 1317 "%s: deleting unreferenced inode %lu\n",
1313 __FUNCTION__, inode->i_ino); 1318 __FUNCTION__, inode->i_ino);
1314 jbd_debug(2, "deleting unreferenced inode %ld\n", 1319 jbd_debug(2, "deleting unreferenced inode %lu\n",
1315 inode->i_ino); 1320 inode->i_ino);
1316 nr_orphans++; 1321 nr_orphans++;
1317 } 1322 }
@@ -1390,7 +1395,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1390 ext3_fsblk_t sb_block = get_sb_block(&data); 1395 ext3_fsblk_t sb_block = get_sb_block(&data);
1391 ext3_fsblk_t logic_sb_block; 1396 ext3_fsblk_t logic_sb_block;
1392 unsigned long offset = 0; 1397 unsigned long offset = 0;
1393 unsigned long journal_inum = 0; 1398 unsigned int journal_inum = 0;
1394 unsigned long journal_devnum = 0; 1399 unsigned long journal_devnum = 0;
1395 unsigned long def_mount_opts; 1400 unsigned long def_mount_opts;
1396 struct inode *root; 1401 struct inode *root;
@@ -1401,11 +1406,10 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1401 int needs_recovery; 1406 int needs_recovery;
1402 __le32 features; 1407 __le32 features;
1403 1408
1404 sbi = kmalloc(sizeof(*sbi), GFP_KERNEL); 1409 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
1405 if (!sbi) 1410 if (!sbi)
1406 return -ENOMEM; 1411 return -ENOMEM;
1407 sb->s_fs_info = sbi; 1412 sb->s_fs_info = sbi;
1408 memset(sbi, 0, sizeof(*sbi));
1409 sbi->s_mount_opt = 0; 1413 sbi->s_mount_opt = 0;
1410 sbi->s_resuid = EXT3_DEF_RESUID; 1414 sbi->s_resuid = EXT3_DEF_RESUID;
1411 sbi->s_resgid = EXT3_DEF_RESGID; 1415 sbi->s_resgid = EXT3_DEF_RESGID;
@@ -1483,7 +1487,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1483 (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) || 1487 (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) ||
1484 EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 1488 EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
1485 EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U))) 1489 EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U)))
1486 printk(KERN_WARNING 1490 printk(KERN_WARNING
1487 "EXT3-fs warning: feature flags set on rev 0 fs, " 1491 "EXT3-fs warning: feature flags set on rev 0 fs, "
1488 "running e2fsck is recommended\n"); 1492 "running e2fsck is recommended\n");
1489 /* 1493 /*
@@ -1509,7 +1513,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1509 1513
1510 if (blocksize < EXT3_MIN_BLOCK_SIZE || 1514 if (blocksize < EXT3_MIN_BLOCK_SIZE ||
1511 blocksize > EXT3_MAX_BLOCK_SIZE) { 1515 blocksize > EXT3_MAX_BLOCK_SIZE) {
1512 printk(KERN_ERR 1516 printk(KERN_ERR
1513 "EXT3-fs: Unsupported filesystem blocksize %d on %s.\n", 1517 "EXT3-fs: Unsupported filesystem blocksize %d on %s.\n",
1514 blocksize, sb->s_id); 1518 blocksize, sb->s_id);
1515 goto failed_mount; 1519 goto failed_mount;
@@ -1533,14 +1537,14 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1533 offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize; 1537 offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
1534 bh = sb_bread(sb, logic_sb_block); 1538 bh = sb_bread(sb, logic_sb_block);
1535 if (!bh) { 1539 if (!bh) {
1536 printk(KERN_ERR 1540 printk(KERN_ERR
1537 "EXT3-fs: Can't read superblock on 2nd try.\n"); 1541 "EXT3-fs: Can't read superblock on 2nd try.\n");
1538 goto failed_mount; 1542 goto failed_mount;
1539 } 1543 }
1540 es = (struct ext3_super_block *)(((char *)bh->b_data) + offset); 1544 es = (struct ext3_super_block *)(((char *)bh->b_data) + offset);
1541 sbi->s_es = es; 1545 sbi->s_es = es;
1542 if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) { 1546 if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) {
1543 printk (KERN_ERR 1547 printk (KERN_ERR
1544 "EXT3-fs: Magic mismatch, very weird !\n"); 1548 "EXT3-fs: Magic mismatch, very weird !\n");
1545 goto failed_mount; 1549 goto failed_mount;
1546 } 1550 }
@@ -1622,10 +1626,9 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1622 1626
1623 if (EXT3_BLOCKS_PER_GROUP(sb) == 0) 1627 if (EXT3_BLOCKS_PER_GROUP(sb) == 0)
1624 goto cantfind_ext3; 1628 goto cantfind_ext3;
1625 sbi->s_groups_count = (le32_to_cpu(es->s_blocks_count) - 1629 sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
1626 le32_to_cpu(es->s_first_data_block) + 1630 le32_to_cpu(es->s_first_data_block) - 1)
1627 EXT3_BLOCKS_PER_GROUP(sb) - 1) / 1631 / EXT3_BLOCKS_PER_GROUP(sb)) + 1;
1628 EXT3_BLOCKS_PER_GROUP(sb);
1629 db_count = (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) / 1632 db_count = (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) /
1630 EXT3_DESC_PER_BLOCK(sb); 1633 EXT3_DESC_PER_BLOCK(sb);
1631 sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *), 1634 sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
@@ -1820,7 +1823,7 @@ out_fail:
1820/* 1823/*
1821 * Setup any per-fs journal parameters now. We'll do this both on 1824 * Setup any per-fs journal parameters now. We'll do this both on
1822 * initial mount, once the journal has been initialised but before we've 1825 * initial mount, once the journal has been initialised but before we've
1823 * done any recovery; and again on any subsequent remount. 1826 * done any recovery; and again on any subsequent remount.
1824 */ 1827 */
1825static void ext3_init_journal_params(struct super_block *sb, journal_t *journal) 1828static void ext3_init_journal_params(struct super_block *sb, journal_t *journal)
1826{ 1829{
@@ -1840,7 +1843,8 @@ static void ext3_init_journal_params(struct super_block *sb, journal_t *journal)
1840 spin_unlock(&journal->j_state_lock); 1843 spin_unlock(&journal->j_state_lock);
1841} 1844}
1842 1845
1843static journal_t *ext3_get_journal(struct super_block *sb, int journal_inum) 1846static journal_t *ext3_get_journal(struct super_block *sb,
1847 unsigned int journal_inum)
1844{ 1848{
1845 struct inode *journal_inode; 1849 struct inode *journal_inode;
1846 journal_t *journal; 1850 journal_t *journal;
@@ -1975,7 +1979,7 @@ static int ext3_load_journal(struct super_block *sb,
1975 unsigned long journal_devnum) 1979 unsigned long journal_devnum)
1976{ 1980{
1977 journal_t *journal; 1981 journal_t *journal;
1978 int journal_inum = le32_to_cpu(es->s_journal_inum); 1982 unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
1979 dev_t journal_dev; 1983 dev_t journal_dev;
1980 int err = 0; 1984 int err = 0;
1981 int really_read_only; 1985 int really_read_only;
@@ -2061,7 +2065,7 @@ static int ext3_load_journal(struct super_block *sb,
2061 2065
2062static int ext3_create_journal(struct super_block * sb, 2066static int ext3_create_journal(struct super_block * sb,
2063 struct ext3_super_block * es, 2067 struct ext3_super_block * es,
2064 int journal_inum) 2068 unsigned int journal_inum)
2065{ 2069{
2066 journal_t *journal; 2070 journal_t *journal;
2067 2071
@@ -2074,7 +2078,7 @@ static int ext3_create_journal(struct super_block * sb,
2074 if (!(journal = ext3_get_journal(sb, journal_inum))) 2078 if (!(journal = ext3_get_journal(sb, journal_inum)))
2075 return -EINVAL; 2079 return -EINVAL;
2076 2080
2077 printk(KERN_INFO "EXT3-fs: creating new journal on inode %d\n", 2081 printk(KERN_INFO "EXT3-fs: creating new journal on inode %u\n",
2078 journal_inum); 2082 journal_inum);
2079 2083
2080 if (journal_create(journal)) { 2084 if (journal_create(journal)) {
@@ -2342,10 +2346,8 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
2342 */ 2346 */
2343 ext3_clear_journal_err(sb, es); 2347 ext3_clear_journal_err(sb, es);
2344 sbi->s_mount_state = le16_to_cpu(es->s_state); 2348 sbi->s_mount_state = le16_to_cpu(es->s_state);
2345 if ((ret = ext3_group_extend(sb, es, n_blocks_count))) { 2349 if ((err = ext3_group_extend(sb, es, n_blocks_count)))
2346 err = ret;
2347 goto restore_opts; 2350 goto restore_opts;
2348 }
2349 if (!ext3_setup_super (sb, es, 0)) 2351 if (!ext3_setup_super (sb, es, 0))
2350 sb->s_flags &= ~MS_RDONLY; 2352 sb->s_flags &= ~MS_RDONLY;
2351 } 2353 }
@@ -2734,7 +2736,7 @@ static int __init init_ext3_fs(void)
2734out: 2736out:
2735 destroy_inodecache(); 2737 destroy_inodecache();
2736out1: 2738out1:
2737 exit_ext3_xattr(); 2739 exit_ext3_xattr();
2738 return err; 2740 return err;
2739} 2741}
2740 2742
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index a44a0562203a..f86f2482f01d 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -75,7 +75,7 @@
75 75
76#ifdef EXT3_XATTR_DEBUG 76#ifdef EXT3_XATTR_DEBUG
77# define ea_idebug(inode, f...) do { \ 77# define ea_idebug(inode, f...) do { \
78 printk(KERN_DEBUG "inode %s:%ld: ", \ 78 printk(KERN_DEBUG "inode %s:%lu: ", \
79 inode->i_sb->s_id, inode->i_ino); \ 79 inode->i_sb->s_id, inode->i_ino); \
80 printk(f); \ 80 printk(f); \
81 printk("\n"); \ 81 printk("\n"); \
@@ -233,7 +233,7 @@ ext3_xattr_block_get(struct inode *inode, int name_index, const char *name,
233 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); 233 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
234 if (ext3_xattr_check_block(bh)) { 234 if (ext3_xattr_check_block(bh)) {
235bad_block: ext3_error(inode->i_sb, __FUNCTION__, 235bad_block: ext3_error(inode->i_sb, __FUNCTION__,
236 "inode %ld: bad block "E3FSBLK, inode->i_ino, 236 "inode %lu: bad block "E3FSBLK, inode->i_ino,
237 EXT3_I(inode)->i_file_acl); 237 EXT3_I(inode)->i_file_acl);
238 error = -EIO; 238 error = -EIO;
239 goto cleanup; 239 goto cleanup;
@@ -375,7 +375,7 @@ ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
375 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); 375 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
376 if (ext3_xattr_check_block(bh)) { 376 if (ext3_xattr_check_block(bh)) {
377 ext3_error(inode->i_sb, __FUNCTION__, 377 ext3_error(inode->i_sb, __FUNCTION__,
378 "inode %ld: bad block "E3FSBLK, inode->i_ino, 378 "inode %lu: bad block "E3FSBLK, inode->i_ino,
379 EXT3_I(inode)->i_file_acl); 379 EXT3_I(inode)->i_file_acl);
380 error = -EIO; 380 error = -EIO;
381 goto cleanup; 381 goto cleanup;
@@ -647,7 +647,7 @@ ext3_xattr_block_find(struct inode *inode, struct ext3_xattr_info *i,
647 le32_to_cpu(BHDR(bs->bh)->h_refcount)); 647 le32_to_cpu(BHDR(bs->bh)->h_refcount));
648 if (ext3_xattr_check_block(bs->bh)) { 648 if (ext3_xattr_check_block(bs->bh)) {
649 ext3_error(sb, __FUNCTION__, 649 ext3_error(sb, __FUNCTION__,
650 "inode %ld: bad block "E3FSBLK, inode->i_ino, 650 "inode %lu: bad block "E3FSBLK, inode->i_ino,
651 EXT3_I(inode)->i_file_acl); 651 EXT3_I(inode)->i_file_acl);
652 error = -EIO; 652 error = -EIO;
653 goto cleanup; 653 goto cleanup;
@@ -848,7 +848,7 @@ cleanup_dquot:
848 848
849bad_block: 849bad_block:
850 ext3_error(inode->i_sb, __FUNCTION__, 850 ext3_error(inode->i_sb, __FUNCTION__,
851 "inode %ld: bad block "E3FSBLK, inode->i_ino, 851 "inode %lu: bad block "E3FSBLK, inode->i_ino,
852 EXT3_I(inode)->i_file_acl); 852 EXT3_I(inode)->i_file_acl);
853 goto cleanup; 853 goto cleanup;
854 854
@@ -1077,14 +1077,14 @@ ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
1077 bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); 1077 bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
1078 if (!bh) { 1078 if (!bh) {
1079 ext3_error(inode->i_sb, __FUNCTION__, 1079 ext3_error(inode->i_sb, __FUNCTION__,
1080 "inode %ld: block "E3FSBLK" read error", inode->i_ino, 1080 "inode %lu: block "E3FSBLK" read error", inode->i_ino,
1081 EXT3_I(inode)->i_file_acl); 1081 EXT3_I(inode)->i_file_acl);
1082 goto cleanup; 1082 goto cleanup;
1083 } 1083 }
1084 if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || 1084 if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
1085 BHDR(bh)->h_blocks != cpu_to_le32(1)) { 1085 BHDR(bh)->h_blocks != cpu_to_le32(1)) {
1086 ext3_error(inode->i_sb, __FUNCTION__, 1086 ext3_error(inode->i_sb, __FUNCTION__,
1087 "inode %ld: bad block "E3FSBLK, inode->i_ino, 1087 "inode %lu: bad block "E3FSBLK, inode->i_ino,
1088 EXT3_I(inode)->i_file_acl); 1088 EXT3_I(inode)->i_file_acl);
1089 goto cleanup; 1089 goto cleanup;
1090 } 1090 }
@@ -1211,7 +1211,7 @@ again:
1211 bh = sb_bread(inode->i_sb, ce->e_block); 1211 bh = sb_bread(inode->i_sb, ce->e_block);
1212 if (!bh) { 1212 if (!bh) {
1213 ext3_error(inode->i_sb, __FUNCTION__, 1213 ext3_error(inode->i_sb, __FUNCTION__,
1214 "inode %ld: block %lu read error", 1214 "inode %lu: block %lu read error",
1215 inode->i_ino, (unsigned long) ce->e_block); 1215 inode->i_ino, (unsigned long) ce->e_block);
1216 } else if (le32_to_cpu(BHDR(bh)->h_refcount) >= 1216 } else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
1217 EXT3_XATTR_REFCOUNT_MAX) { 1217 EXT3_XATTR_REFCOUNT_MAX) {
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 97b967b84fc6..82cc4f59e3ba 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -58,8 +58,7 @@ int __init fat_cache_init(void)
58 58
59void fat_cache_destroy(void) 59void fat_cache_destroy(void)
60{ 60{
61 if (kmem_cache_destroy(fat_cache_cachep)) 61 kmem_cache_destroy(fat_cache_cachep);
62 printk(KERN_INFO "fat_cache: not all structures were freed\n");
63} 62}
64 63
65static inline struct fat_cache *fat_cache_alloc(struct inode *inode) 64static inline struct fat_cache *fat_cache_alloc(struct inode *inode)
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 1ee25232e6af..d50fc47169c1 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -13,6 +13,7 @@
13#include <linux/smp_lock.h> 13#include <linux/smp_lock.h>
14#include <linux/buffer_head.h> 14#include <linux/buffer_head.h>
15#include <linux/writeback.h> 15#include <linux/writeback.h>
16#include <linux/blkdev.h>
16 17
17int fat_generic_ioctl(struct inode *inode, struct file *filp, 18int fat_generic_ioctl(struct inode *inode, struct file *filp,
18 unsigned int cmd, unsigned long arg) 19 unsigned int cmd, unsigned long arg)
@@ -112,6 +113,16 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
112 } 113 }
113} 114}
114 115
116static int fat_file_release(struct inode *inode, struct file *filp)
117{
118 if ((filp->f_mode & FMODE_WRITE) &&
119 MSDOS_SB(inode->i_sb)->options.flush) {
120 fat_flush_inodes(inode->i_sb, inode, NULL);
121 blk_congestion_wait(WRITE, HZ/10);
122 }
123 return 0;
124}
125
115const struct file_operations fat_file_operations = { 126const struct file_operations fat_file_operations = {
116 .llseek = generic_file_llseek, 127 .llseek = generic_file_llseek,
117 .read = do_sync_read, 128 .read = do_sync_read,
@@ -121,6 +132,7 @@ const struct file_operations fat_file_operations = {
121 .aio_read = generic_file_aio_read, 132 .aio_read = generic_file_aio_read,
122 .aio_write = generic_file_aio_write, 133 .aio_write = generic_file_aio_write,
123 .mmap = generic_file_mmap, 134 .mmap = generic_file_mmap,
135 .release = fat_file_release,
124 .ioctl = fat_generic_ioctl, 136 .ioctl = fat_generic_ioctl,
125 .fsync = file_fsync, 137 .fsync = file_fsync,
126 .sendfile = generic_file_sendfile, 138 .sendfile = generic_file_sendfile,
@@ -289,6 +301,7 @@ void fat_truncate(struct inode *inode)
289 lock_kernel(); 301 lock_kernel();
290 fat_free(inode, nr_clusters); 302 fat_free(inode, nr_clusters);
291 unlock_kernel(); 303 unlock_kernel();
304 fat_flush_inodes(inode->i_sb, inode, NULL);
292} 305}
293 306
294struct inode_operations fat_file_inode_operations = { 307struct inode_operations fat_file_inode_operations = {
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 31b7174176ba..045738032a83 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -24,6 +24,7 @@
24#include <linux/vfs.h> 24#include <linux/vfs.h>
25#include <linux/parser.h> 25#include <linux/parser.h>
26#include <linux/uio.h> 26#include <linux/uio.h>
27#include <linux/writeback.h>
27#include <asm/unaligned.h> 28#include <asm/unaligned.h>
28 29
29#ifndef CONFIG_FAT_DEFAULT_IOCHARSET 30#ifndef CONFIG_FAT_DEFAULT_IOCHARSET
@@ -50,14 +51,14 @@ static int fat_add_cluster(struct inode *inode)
50 return err; 51 return err;
51} 52}
52 53
53static int __fat_get_blocks(struct inode *inode, sector_t iblock, 54static inline int __fat_get_block(struct inode *inode, sector_t iblock,
54 unsigned long *max_blocks, 55 unsigned long *max_blocks,
55 struct buffer_head *bh_result, int create) 56 struct buffer_head *bh_result, int create)
56{ 57{
57 struct super_block *sb = inode->i_sb; 58 struct super_block *sb = inode->i_sb;
58 struct msdos_sb_info *sbi = MSDOS_SB(sb); 59 struct msdos_sb_info *sbi = MSDOS_SB(sb);
59 sector_t phys;
60 unsigned long mapped_blocks; 60 unsigned long mapped_blocks;
61 sector_t phys;
61 int err, offset; 62 int err, offset;
62 63
63 err = fat_bmap(inode, iblock, &phys, &mapped_blocks); 64 err = fat_bmap(inode, iblock, &phys, &mapped_blocks);
@@ -73,7 +74,7 @@ static int __fat_get_blocks(struct inode *inode, sector_t iblock,
73 74
74 if (iblock != MSDOS_I(inode)->mmu_private >> sb->s_blocksize_bits) { 75 if (iblock != MSDOS_I(inode)->mmu_private >> sb->s_blocksize_bits) {
75 fat_fs_panic(sb, "corrupted file size (i_pos %lld, %lld)", 76 fat_fs_panic(sb, "corrupted file size (i_pos %lld, %lld)",
76 MSDOS_I(inode)->i_pos, MSDOS_I(inode)->mmu_private); 77 MSDOS_I(inode)->i_pos, MSDOS_I(inode)->mmu_private);
77 return -EIO; 78 return -EIO;
78 } 79 }
79 80
@@ -93,34 +94,29 @@ static int __fat_get_blocks(struct inode *inode, sector_t iblock,
93 err = fat_bmap(inode, iblock, &phys, &mapped_blocks); 94 err = fat_bmap(inode, iblock, &phys, &mapped_blocks);
94 if (err) 95 if (err)
95 return err; 96 return err;
97
96 BUG_ON(!phys); 98 BUG_ON(!phys);
97 BUG_ON(*max_blocks != mapped_blocks); 99 BUG_ON(*max_blocks != mapped_blocks);
98 set_buffer_new(bh_result); 100 set_buffer_new(bh_result);
99 map_bh(bh_result, sb, phys); 101 map_bh(bh_result, sb, phys);
102
100 return 0; 103 return 0;
101} 104}
102 105
103static int fat_get_blocks(struct inode *inode, sector_t iblock, 106static int fat_get_block(struct inode *inode, sector_t iblock,
104 struct buffer_head *bh_result, int create) 107 struct buffer_head *bh_result, int create)
105{ 108{
106 struct super_block *sb = inode->i_sb; 109 struct super_block *sb = inode->i_sb;
107 int err;
108 unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits; 110 unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
111 int err;
109 112
110 err = __fat_get_blocks(inode, iblock, &max_blocks, bh_result, create); 113 err = __fat_get_block(inode, iblock, &max_blocks, bh_result, create);
111 if (err) 114 if (err)
112 return err; 115 return err;
113 bh_result->b_size = max_blocks << sb->s_blocksize_bits; 116 bh_result->b_size = max_blocks << sb->s_blocksize_bits;
114 return 0; 117 return 0;
115} 118}
116 119
117static int fat_get_block(struct inode *inode, sector_t iblock,
118 struct buffer_head *bh_result, int create)
119{
120 unsigned long max_blocks = 1;
121 return __fat_get_blocks(inode, iblock, &max_blocks, bh_result, create);
122}
123
124static int fat_writepage(struct page *page, struct writeback_control *wbc) 120static int fat_writepage(struct page *page, struct writeback_control *wbc)
125{ 121{
126 return block_write_full_page(page, fat_get_block, wbc); 122 return block_write_full_page(page, fat_get_block, wbc);
@@ -188,7 +184,7 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
188 * condition of fat_get_block() and ->truncate(). 184 * condition of fat_get_block() and ->truncate().
189 */ 185 */
190 return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 186 return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
191 offset, nr_segs, fat_get_blocks, NULL); 187 offset, nr_segs, fat_get_block, NULL);
192} 188}
193 189
194static sector_t _fat_bmap(struct address_space *mapping, sector_t block) 190static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
@@ -375,8 +371,6 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
375 inode->i_flags |= S_IMMUTABLE; 371 inode->i_flags |= S_IMMUTABLE;
376 } 372 }
377 MSDOS_I(inode)->i_attrs = de->attr & ATTR_UNUSED; 373 MSDOS_I(inode)->i_attrs = de->attr & ATTR_UNUSED;
378 /* this is as close to the truth as we can get ... */
379 inode->i_blksize = sbi->cluster_size;
380 inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1)) 374 inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
381 & ~((loff_t)sbi->cluster_size - 1)) >> 9; 375 & ~((loff_t)sbi->cluster_size - 1)) >> 9;
382 inode->i_mtime.tv_sec = 376 inode->i_mtime.tv_sec =
@@ -528,8 +522,7 @@ static int __init fat_init_inodecache(void)
528 522
529static void __exit fat_destroy_inodecache(void) 523static void __exit fat_destroy_inodecache(void)
530{ 524{
531 if (kmem_cache_destroy(fat_inode_cachep)) 525 kmem_cache_destroy(fat_inode_cachep);
532 printk(KERN_INFO "fat_inode_cache: not all structures were freed\n");
533} 526}
534 527
535static int fat_remount(struct super_block *sb, int *flags, char *data) 528static int fat_remount(struct super_block *sb, int *flags, char *data)
@@ -861,7 +854,7 @@ enum {
861 Opt_charset, Opt_shortname_lower, Opt_shortname_win95, 854 Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
862 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, 855 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
863 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, 856 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
864 Opt_obsolate, Opt_err, 857 Opt_obsolate, Opt_flush, Opt_err,
865}; 858};
866 859
867static match_table_t fat_tokens = { 860static match_table_t fat_tokens = {
@@ -893,7 +886,8 @@ static match_table_t fat_tokens = {
893 {Opt_obsolate, "cvf_format=%20s"}, 886 {Opt_obsolate, "cvf_format=%20s"},
894 {Opt_obsolate, "cvf_options=%100s"}, 887 {Opt_obsolate, "cvf_options=%100s"},
895 {Opt_obsolate, "posix"}, 888 {Opt_obsolate, "posix"},
896 {Opt_err, NULL} 889 {Opt_flush, "flush"},
890 {Opt_err, NULL},
897}; 891};
898static match_table_t msdos_tokens = { 892static match_table_t msdos_tokens = {
899 {Opt_nodots, "nodots"}, 893 {Opt_nodots, "nodots"},
@@ -1034,6 +1028,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
1034 return 0; 1028 return 0;
1035 opts->codepage = option; 1029 opts->codepage = option;
1036 break; 1030 break;
1031 case Opt_flush:
1032 opts->flush = 1;
1033 break;
1037 1034
1038 /* msdos specific */ 1035 /* msdos specific */
1039 case Opt_dots: 1036 case Opt_dots:
@@ -1137,7 +1134,6 @@ static int fat_read_root(struct inode *inode)
1137 MSDOS_I(inode)->i_start = 0; 1134 MSDOS_I(inode)->i_start = 0;
1138 inode->i_size = sbi->dir_entries * sizeof(struct msdos_dir_entry); 1135 inode->i_size = sbi->dir_entries * sizeof(struct msdos_dir_entry);
1139 } 1136 }
1140 inode->i_blksize = sbi->cluster_size;
1141 inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1)) 1137 inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
1142 & ~((loff_t)sbi->cluster_size - 1)) >> 9; 1138 & ~((loff_t)sbi->cluster_size - 1)) >> 9;
1143 MSDOS_I(inode)->i_logstart = 0; 1139 MSDOS_I(inode)->i_logstart = 0;
@@ -1168,11 +1164,10 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1168 long error; 1164 long error;
1169 char buf[50]; 1165 char buf[50];
1170 1166
1171 sbi = kmalloc(sizeof(struct msdos_sb_info), GFP_KERNEL); 1167 sbi = kzalloc(sizeof(struct msdos_sb_info), GFP_KERNEL);
1172 if (!sbi) 1168 if (!sbi)
1173 return -ENOMEM; 1169 return -ENOMEM;
1174 sb->s_fs_info = sbi; 1170 sb->s_fs_info = sbi;
1175 memset(sbi, 0, sizeof(struct msdos_sb_info));
1176 1171
1177 sb->s_flags |= MS_NODIRATIME; 1172 sb->s_flags |= MS_NODIRATIME;
1178 sb->s_magic = MSDOS_SUPER_MAGIC; 1173 sb->s_magic = MSDOS_SUPER_MAGIC;
@@ -1435,6 +1430,56 @@ out_fail:
1435 1430
1436EXPORT_SYMBOL_GPL(fat_fill_super); 1431EXPORT_SYMBOL_GPL(fat_fill_super);
1437 1432
1433/*
1434 * helper function for fat_flush_inodes. This writes both the inode
1435 * and the file data blocks, waiting for in flight data blocks before
1436 * the start of the call. It does not wait for any io started
1437 * during the call
1438 */
1439static int writeback_inode(struct inode *inode)
1440{
1441
1442 int ret;
1443 struct address_space *mapping = inode->i_mapping;
1444 struct writeback_control wbc = {
1445 .sync_mode = WB_SYNC_NONE,
1446 .nr_to_write = 0,
1447 };
1448 /* if we used WB_SYNC_ALL, sync_inode waits for the io for the
1449 * inode to finish. So WB_SYNC_NONE is sent down to sync_inode
1450 * and filemap_fdatawrite is used for the data blocks
1451 */
1452 ret = sync_inode(inode, &wbc);
1453 if (!ret)
1454 ret = filemap_fdatawrite(mapping);
1455 return ret;
1456}
1457
1458/*
1459 * write data and metadata corresponding to i1 and i2. The io is
1460 * started but we do not wait for any of it to finish.
1461 *
1462 * filemap_flush is used for the block device, so if there is a dirty
1463 * page for a block already in flight, we will not wait and start the
1464 * io over again
1465 */
1466int fat_flush_inodes(struct super_block *sb, struct inode *i1, struct inode *i2)
1467{
1468 int ret = 0;
1469 if (!MSDOS_SB(sb)->options.flush)
1470 return 0;
1471 if (i1)
1472 ret = writeback_inode(i1);
1473 if (!ret && i2)
1474 ret = writeback_inode(i2);
1475 if (!ret && sb) {
1476 struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
1477 ret = filemap_flush(mapping);
1478 }
1479 return ret;
1480}
1481EXPORT_SYMBOL_GPL(fat_flush_inodes);
1482
1438static int __init init_fat_fs(void) 1483static int __init init_fat_fs(void)
1439{ 1484{
1440 int err; 1485 int err;
diff --git a/fs/file.c b/fs/file.c
index b3c6b82e6a9d..8e81775c5dc8 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -281,80 +281,70 @@ static struct fdtable *alloc_fdtable(int nr)
281out2: 281out2:
282 nfds = fdt->max_fdset; 282 nfds = fdt->max_fdset;
283out: 283out:
284 if (new_openset) 284 free_fdset(new_openset, nfds);
285 free_fdset(new_openset, nfds); 285 free_fdset(new_execset, nfds);
286 if (new_execset)
287 free_fdset(new_execset, nfds);
288 kfree(fdt); 286 kfree(fdt);
289 return NULL; 287 return NULL;
290} 288}
291 289
292/* 290/*
293 * Expands the file descriptor table - it will allocate a new fdtable and 291 * Expand the file descriptor table.
294 * both fd array and fdset. It is expected to be called with the 292 * This function will allocate a new fdtable and both fd array and fdset, of
295 * files_lock held. 293 * the given size.
294 * Return <0 error code on error; 1 on successful completion.
295 * The files->file_lock should be held on entry, and will be held on exit.
296 */ 296 */
297static int expand_fdtable(struct files_struct *files, int nr) 297static int expand_fdtable(struct files_struct *files, int nr)
298 __releases(files->file_lock) 298 __releases(files->file_lock)
299 __acquires(files->file_lock) 299 __acquires(files->file_lock)
300{ 300{
301 int error = 0; 301 struct fdtable *new_fdt, *cur_fdt;
302 struct fdtable *fdt;
303 struct fdtable *nfdt = NULL;
304 302
305 spin_unlock(&files->file_lock); 303 spin_unlock(&files->file_lock);
306 nfdt = alloc_fdtable(nr); 304 new_fdt = alloc_fdtable(nr);
307 if (!nfdt) {
308 error = -ENOMEM;
309 spin_lock(&files->file_lock);
310 goto out;
311 }
312
313 spin_lock(&files->file_lock); 305 spin_lock(&files->file_lock);
314 fdt = files_fdtable(files); 306 if (!new_fdt)
307 return -ENOMEM;
315 /* 308 /*
316 * Check again since another task may have expanded the 309 * Check again since another task may have expanded the fd table while
317 * fd table while we dropped the lock 310 * we dropped the lock
318 */ 311 */
319 if (nr >= fdt->max_fds || nr >= fdt->max_fdset) { 312 cur_fdt = files_fdtable(files);
320 copy_fdtable(nfdt, fdt); 313 if (nr >= cur_fdt->max_fds || nr >= cur_fdt->max_fdset) {
314 /* Continue as planned */
315 copy_fdtable(new_fdt, cur_fdt);
316 rcu_assign_pointer(files->fdt, new_fdt);
317 free_fdtable(cur_fdt);
321 } else { 318 } else {
322 /* Somebody expanded while we dropped file_lock */ 319 /* Somebody else expanded, so undo our attempt */
323 spin_unlock(&files->file_lock); 320 __free_fdtable(new_fdt);
324 __free_fdtable(nfdt);
325 spin_lock(&files->file_lock);
326 goto out;
327 } 321 }
328 rcu_assign_pointer(files->fdt, nfdt); 322 return 1;
329 free_fdtable(fdt);
330out:
331 return error;
332} 323}
333 324
334/* 325/*
335 * Expand files. 326 * Expand files.
336 * Return <0 on error; 0 nothing done; 1 files expanded, we may have blocked. 327 * This function will expand the file structures, if the requested size exceeds
337 * Should be called with the files->file_lock spinlock held for write. 328 * the current capacity and there is room for expansion.
329 * Return <0 error code on error; 0 when nothing done; 1 when files were
330 * expanded and execution may have blocked.
331 * The files->file_lock should be held on entry, and will be held on exit.
338 */ 332 */
339int expand_files(struct files_struct *files, int nr) 333int expand_files(struct files_struct *files, int nr)
340{ 334{
341 int err, expand = 0;
342 struct fdtable *fdt; 335 struct fdtable *fdt;
343 336
344 fdt = files_fdtable(files); 337 fdt = files_fdtable(files);
345 if (nr >= fdt->max_fdset || nr >= fdt->max_fds) { 338 /* Do we need to expand? */
346 if (fdt->max_fdset >= NR_OPEN || 339 if (nr < fdt->max_fdset && nr < fdt->max_fds)
347 fdt->max_fds >= NR_OPEN || nr >= NR_OPEN) { 340 return 0;
348 err = -EMFILE; 341 /* Can we expand? */
349 goto out; 342 if (fdt->max_fdset >= NR_OPEN || fdt->max_fds >= NR_OPEN ||
350 } 343 nr >= NR_OPEN)
351 expand = 1; 344 return -EMFILE;
352 if ((err = expand_fdtable(files, nr))) 345
353 goto out; 346 /* All good, so we try */
354 } 347 return expand_fdtable(files, nr);
355 err = expand;
356out:
357 return err;
358} 348}
359 349
360static void __devinit fdtable_defer_list_init(int cpu) 350static void __devinit fdtable_defer_list_init(int cpu)
diff --git a/fs/file_table.c b/fs/file_table.c
index 0131ba06e1ee..bc35a40417d7 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -169,7 +169,7 @@ void fastcall __fput(struct file *file)
169 if (file->f_op && file->f_op->release) 169 if (file->f_op && file->f_op->release)
170 file->f_op->release(inode, file); 170 file->f_op->release(inode, file);
171 security_file_free(file); 171 security_file_free(file);
172 if (unlikely(inode->i_cdev != NULL)) 172 if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL))
173 cdev_put(inode->i_cdev); 173 cdev_put(inode->i_cdev);
174 fops_put(file->f_op); 174 fops_put(file->f_op);
175 if (file->f_mode & FMODE_WRITE) 175 if (file->f_mode & FMODE_WRITE)
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 9f1072836c8e..e3fa77c6ed56 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -69,8 +69,6 @@ int register_filesystem(struct file_system_type * fs)
69 int res = 0; 69 int res = 0;
70 struct file_system_type ** p; 70 struct file_system_type ** p;
71 71
72 if (!fs)
73 return -EINVAL;
74 if (fs->next) 72 if (fs->next)
75 return -EBUSY; 73 return -EBUSY;
76 INIT_LIST_HEAD(&fs->fs_supers); 74 INIT_LIST_HEAD(&fs->fs_supers);
diff --git a/fs/freevxfs/vxfs.h b/fs/freevxfs/vxfs.h
index d35979a58743..c8a92652612a 100644
--- a/fs/freevxfs/vxfs.h
+++ b/fs/freevxfs/vxfs.h
@@ -252,7 +252,7 @@ enum {
252 * Get filesystem private data from VFS inode. 252 * Get filesystem private data from VFS inode.
253 */ 253 */
254#define VXFS_INO(ip) \ 254#define VXFS_INO(ip) \
255 ((struct vxfs_inode_info *)(ip)->u.generic_ip) 255 ((struct vxfs_inode_info *)(ip)->i_private)
256 256
257/* 257/*
258 * Get filesystem private data from VFS superblock. 258 * Get filesystem private data from VFS superblock.
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index ca6a39714771..4786d51ad3bd 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -239,11 +239,10 @@ vxfs_iinit(struct inode *ip, struct vxfs_inode_info *vip)
239 ip->i_ctime.tv_nsec = 0; 239 ip->i_ctime.tv_nsec = 0;
240 ip->i_mtime.tv_nsec = 0; 240 ip->i_mtime.tv_nsec = 0;
241 241
242 ip->i_blksize = PAGE_SIZE;
243 ip->i_blocks = vip->vii_blocks; 242 ip->i_blocks = vip->vii_blocks;
244 ip->i_generation = vip->vii_gen; 243 ip->i_generation = vip->vii_gen;
245 244
246 ip->u.generic_ip = (void *)vip; 245 ip->i_private = vip;
247 246
248} 247}
249 248
@@ -338,5 +337,5 @@ vxfs_read_inode(struct inode *ip)
338void 337void
339vxfs_clear_inode(struct inode *ip) 338vxfs_clear_inode(struct inode *ip)
340{ 339{
341 kmem_cache_free(vxfs_inode_cachep, ip->u.generic_ip); 340 kmem_cache_free(vxfs_inode_cachep, ip->i_private);
342} 341}
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index b74b791fc23b..ac28b0835ffc 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -260,12 +260,17 @@ static struct file_system_type vxfs_fs_type = {
260static int __init 260static int __init
261vxfs_init(void) 261vxfs_init(void)
262{ 262{
263 int rv;
264
263 vxfs_inode_cachep = kmem_cache_create("vxfs_inode", 265 vxfs_inode_cachep = kmem_cache_create("vxfs_inode",
264 sizeof(struct vxfs_inode_info), 0, 266 sizeof(struct vxfs_inode_info), 0,
265 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL, NULL); 267 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL, NULL);
266 if (vxfs_inode_cachep) 268 if (!vxfs_inode_cachep)
267 return register_filesystem(&vxfs_fs_type); 269 return -ENOMEM;
268 return -ENOMEM; 270 rv = register_filesystem(&vxfs_fs_type);
271 if (rv < 0)
272 kmem_cache_destroy(vxfs_inode_cachep);
273 return rv;
269} 274}
270 275
271static void __exit 276static void __exit
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 46fe60b2da23..79ec1f23d4d2 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -23,7 +23,7 @@ static struct fuse_conn *fuse_ctl_file_conn_get(struct file *file)
23{ 23{
24 struct fuse_conn *fc; 24 struct fuse_conn *fc;
25 mutex_lock(&fuse_mutex); 25 mutex_lock(&fuse_mutex);
26 fc = file->f_dentry->d_inode->u.generic_ip; 26 fc = file->f_dentry->d_inode->i_private;
27 if (fc) 27 if (fc)
28 fc = fuse_conn_get(fc); 28 fc = fuse_conn_get(fc);
29 mutex_unlock(&fuse_mutex); 29 mutex_unlock(&fuse_mutex);
@@ -98,7 +98,7 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
98 inode->i_op = iop; 98 inode->i_op = iop;
99 inode->i_fop = fop; 99 inode->i_fop = fop;
100 inode->i_nlink = nlink; 100 inode->i_nlink = nlink;
101 inode->u.generic_ip = fc; 101 inode->i_private = fc;
102 d_add(dentry, inode); 102 d_add(dentry, inode);
103 return dentry; 103 return dentry;
104} 104}
@@ -150,7 +150,7 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc)
150 150
151 for (i = fc->ctl_ndents - 1; i >= 0; i--) { 151 for (i = fc->ctl_ndents - 1; i >= 0; i--) {
152 struct dentry *dentry = fc->ctl_dentry[i]; 152 struct dentry *dentry = fc->ctl_dentry[i];
153 dentry->d_inode->u.generic_ip = NULL; 153 dentry->d_inode->i_private = NULL;
154 d_drop(dentry); 154 d_drop(dentry);
155 dput(dentry); 155 dput(dentry);
156 } 156 }
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 1e2006caf158..4fc557c40cc0 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -212,6 +212,7 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
212 * Called with fc->lock, unlocks it 212 * Called with fc->lock, unlocks it
213 */ 213 */
214static void request_end(struct fuse_conn *fc, struct fuse_req *req) 214static void request_end(struct fuse_conn *fc, struct fuse_req *req)
215 __releases(fc->lock)
215{ 216{
216 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; 217 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
217 req->end = NULL; 218 req->end = NULL;
@@ -640,6 +641,7 @@ static void request_wait(struct fuse_conn *fc)
640 */ 641 */
641static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_req *req, 642static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_req *req,
642 const struct iovec *iov, unsigned long nr_segs) 643 const struct iovec *iov, unsigned long nr_segs)
644 __releases(fc->lock)
643{ 645{
644 struct fuse_copy_state cs; 646 struct fuse_copy_state cs;
645 struct fuse_in_header ih; 647 struct fuse_in_header ih;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 409ce6a7cca4..f85b2a282f13 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -776,7 +776,7 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
776 if ((mask & MAY_EXEC) && !S_ISDIR(mode) && !(mode & S_IXUGO)) 776 if ((mask & MAY_EXEC) && !S_ISDIR(mode) && !(mode & S_IXUGO))
777 return -EACCES; 777 return -EACCES;
778 778
779 if (nd && (nd->flags & LOOKUP_ACCESS)) 779 if (nd && (nd->flags & (LOOKUP_ACCESS | LOOKUP_CHDIR)))
780 return fuse_access(inode, mask); 780 return fuse_access(inode, mask);
781 return 0; 781 return 0;
782 } 782 }
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 7d25092262ae..7d0a9aee01f2 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -118,7 +118,6 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr)
118 inode->i_uid = attr->uid; 118 inode->i_uid = attr->uid;
119 inode->i_gid = attr->gid; 119 inode->i_gid = attr->gid;
120 i_size_write(inode, attr->size); 120 i_size_write(inode, attr->size);
121 inode->i_blksize = PAGE_CACHE_SIZE;
122 inode->i_blocks = attr->blocks; 121 inode->i_blocks = attr->blocks;
123 inode->i_atime.tv_sec = attr->atime; 122 inode->i_atime.tv_sec = attr->atime;
124 inode->i_atime.tv_nsec = attr->atimensec; 123 inode->i_atime.tv_nsec = attr->atimensec;
@@ -252,6 +251,7 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
252 memset(&outarg, 0, sizeof(outarg)); 251 memset(&outarg, 0, sizeof(outarg));
253 req->in.numargs = 0; 252 req->in.numargs = 0;
254 req->in.h.opcode = FUSE_STATFS; 253 req->in.h.opcode = FUSE_STATFS;
254 req->in.h.nodeid = get_node_id(dentry->d_inode);
255 req->out.numargs = 1; 255 req->out.numargs = 1;
256 req->out.args[0].size = 256 req->out.args[0].size =
257 fc->minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(outarg); 257 fc->minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(outarg);
diff --git a/fs/generic_acl.c b/fs/generic_acl.c
new file mode 100644
index 000000000000..9ccb78947171
--- /dev/null
+++ b/fs/generic_acl.c
@@ -0,0 +1,197 @@
1/*
2 * fs/generic_acl.c
3 *
4 * (C) 2005 Andreas Gruenbacher <agruen@suse.de>
5 *
6 * This file is released under the GPL.
7 */
8
9#include <linux/sched.h>
10#include <linux/fs.h>
11#include <linux/generic_acl.h>
12
13/**
14 * generic_acl_list - Generic xattr_handler->list() operation
15 * @ops: Filesystem specific getacl and setacl callbacks
16 */
17size_t
18generic_acl_list(struct inode *inode, struct generic_acl_operations *ops,
19 int type, char *list, size_t list_size)
20{
21 struct posix_acl *acl;
22 const char *name;
23 size_t size;
24
25 acl = ops->getacl(inode, type);
26 if (!acl)
27 return 0;
28 posix_acl_release(acl);
29
30 switch(type) {
31 case ACL_TYPE_ACCESS:
32 name = POSIX_ACL_XATTR_ACCESS;
33 break;
34
35 case ACL_TYPE_DEFAULT:
36 name = POSIX_ACL_XATTR_DEFAULT;
37 break;
38
39 default:
40 return 0;
41 }
42 size = strlen(name) + 1;
43 if (list && size <= list_size)
44 memcpy(list, name, size);
45 return size;
46}
47
48/**
49 * generic_acl_get - Generic xattr_handler->get() operation
50 * @ops: Filesystem specific getacl and setacl callbacks
51 */
52int
53generic_acl_get(struct inode *inode, struct generic_acl_operations *ops,
54 int type, void *buffer, size_t size)
55{
56 struct posix_acl *acl;
57 int error;
58
59 acl = ops->getacl(inode, type);
60 if (!acl)
61 return -ENODATA;
62 error = posix_acl_to_xattr(acl, buffer, size);
63 posix_acl_release(acl);
64
65 return error;
66}
67
68/**
69 * generic_acl_set - Generic xattr_handler->set() operation
70 * @ops: Filesystem specific getacl and setacl callbacks
71 */
72int
73generic_acl_set(struct inode *inode, struct generic_acl_operations *ops,
74 int type, const void *value, size_t size)
75{
76 struct posix_acl *acl = NULL;
77 int error;
78
79 if (S_ISLNK(inode->i_mode))
80 return -EOPNOTSUPP;
81 if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER))
82 return -EPERM;
83 if (value) {
84 acl = posix_acl_from_xattr(value, size);
85 if (IS_ERR(acl))
86 return PTR_ERR(acl);
87 }
88 if (acl) {
89 mode_t mode;
90
91 error = posix_acl_valid(acl);
92 if (error)
93 goto failed;
94 switch(type) {
95 case ACL_TYPE_ACCESS:
96 mode = inode->i_mode;
97 error = posix_acl_equiv_mode(acl, &mode);
98 if (error < 0)
99 goto failed;
100 inode->i_mode = mode;
101 if (error == 0) {
102 posix_acl_release(acl);
103 acl = NULL;
104 }
105 break;
106
107 case ACL_TYPE_DEFAULT:
108 if (!S_ISDIR(inode->i_mode)) {
109 error = -EINVAL;
110 goto failed;
111 }
112 break;
113 }
114 }
115 ops->setacl(inode, type, acl);
116 error = 0;
117failed:
118 posix_acl_release(acl);
119 return error;
120}
121
122/**
123 * generic_acl_init - Take care of acl inheritance at @inode create time
124 * @ops: Filesystem specific getacl and setacl callbacks
125 *
126 * Files created inside a directory with a default ACL inherit the
127 * directory's default ACL.
128 */
129int
130generic_acl_init(struct inode *inode, struct inode *dir,
131 struct generic_acl_operations *ops)
132{
133 struct posix_acl *acl = NULL;
134 mode_t mode = inode->i_mode;
135 int error;
136
137 inode->i_mode = mode & ~current->fs->umask;
138 if (!S_ISLNK(inode->i_mode))
139 acl = ops->getacl(dir, ACL_TYPE_DEFAULT);
140 if (acl) {
141 struct posix_acl *clone;
142
143 if (S_ISDIR(inode->i_mode)) {
144 clone = posix_acl_clone(acl, GFP_KERNEL);
145 error = -ENOMEM;
146 if (!clone)
147 goto cleanup;
148 ops->setacl(inode, ACL_TYPE_DEFAULT, clone);
149 posix_acl_release(clone);
150 }
151 clone = posix_acl_clone(acl, GFP_KERNEL);
152 error = -ENOMEM;
153 if (!clone)
154 goto cleanup;
155 error = posix_acl_create_masq(clone, &mode);
156 if (error >= 0) {
157 inode->i_mode = mode;
158 if (error > 0)
159 ops->setacl(inode, ACL_TYPE_ACCESS, clone);
160 }
161 posix_acl_release(clone);
162 }
163 error = 0;
164
165cleanup:
166 posix_acl_release(acl);
167 return error;
168}
169
170/**
171 * generic_acl_chmod - change the access acl of @inode upon chmod()
172 * @ops: FIlesystem specific getacl and setacl callbacks
173 *
174 * A chmod also changes the permissions of the owner, group/mask, and
175 * other ACL entries.
176 */
177int
178generic_acl_chmod(struct inode *inode, struct generic_acl_operations *ops)
179{
180 struct posix_acl *acl, *clone;
181 int error = 0;
182
183 if (S_ISLNK(inode->i_mode))
184 return -EOPNOTSUPP;
185 acl = ops->getacl(inode, ACL_TYPE_ACCESS);
186 if (acl) {
187 clone = posix_acl_clone(acl, GFP_KERNEL);
188 posix_acl_release(acl);
189 if (!clone)
190 return -ENOMEM;
191 error = posix_acl_chmod_masq(clone, inode->i_mode);
192 if (!error)
193 ops->setacl(inode, ACL_TYPE_ACCESS, clone);
194 posix_acl_release(clone);
195 }
196 return error;
197}
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index 13231dd5ce66..0d200068d0af 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -249,10 +249,9 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
249 sb = tree->inode->i_sb; 249 sb = tree->inode->i_sb;
250 size = sizeof(struct hfs_bnode) + tree->pages_per_bnode * 250 size = sizeof(struct hfs_bnode) + tree->pages_per_bnode *
251 sizeof(struct page *); 251 sizeof(struct page *);
252 node = kmalloc(size, GFP_KERNEL); 252 node = kzalloc(size, GFP_KERNEL);
253 if (!node) 253 if (!node)
254 return NULL; 254 return NULL;
255 memset(node, 0, size);
256 node->tree = tree; 255 node->tree = tree;
257 node->this = cnid; 256 node->this = cnid;
258 set_bit(HFS_BNODE_NEW, &node->flags); 257 set_bit(HFS_BNODE_NEW, &node->flags);
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index 400357994319..5fd0ed71f923 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -21,10 +21,9 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
21 struct page *page; 21 struct page *page;
22 unsigned int size; 22 unsigned int size;
23 23
24 tree = kmalloc(sizeof(*tree), GFP_KERNEL); 24 tree = kzalloc(sizeof(*tree), GFP_KERNEL);
25 if (!tree) 25 if (!tree)
26 return NULL; 26 return NULL;
27 memset(tree, 0, sizeof(*tree));
28 27
29 init_MUTEX(&tree->tree_lock); 28 init_MUTEX(&tree->tree_lock);
30 spin_lock_init(&tree->hash_lock); 29 spin_lock_init(&tree->hash_lock);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 315cf44a90b2..d05641c35fc9 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -154,7 +154,6 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode)
154 inode->i_gid = current->fsgid; 154 inode->i_gid = current->fsgid;
155 inode->i_nlink = 1; 155 inode->i_nlink = 1;
156 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 156 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
157 inode->i_blksize = HFS_SB(sb)->alloc_blksz;
158 HFS_I(inode)->flags = 0; 157 HFS_I(inode)->flags = 0;
159 HFS_I(inode)->rsrc_inode = NULL; 158 HFS_I(inode)->rsrc_inode = NULL;
160 HFS_I(inode)->fs_blocks = 0; 159 HFS_I(inode)->fs_blocks = 0;
@@ -284,7 +283,6 @@ static int hfs_read_inode(struct inode *inode, void *data)
284 inode->i_uid = hsb->s_uid; 283 inode->i_uid = hsb->s_uid;
285 inode->i_gid = hsb->s_gid; 284 inode->i_gid = hsb->s_gid;
286 inode->i_nlink = 1; 285 inode->i_nlink = 1;
287 inode->i_blksize = HFS_SB(inode->i_sb)->alloc_blksz;
288 286
289 if (idata->key) 287 if (idata->key)
290 HFS_I(inode)->cat_key = *idata->key; 288 HFS_I(inode)->cat_key = *idata->key;
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 34937ee83ab1..d43b4fcc8ad3 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -356,11 +356,10 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
356 struct inode *root_inode; 356 struct inode *root_inode;
357 int res; 357 int res;
358 358
359 sbi = kmalloc(sizeof(struct hfs_sb_info), GFP_KERNEL); 359 sbi = kzalloc(sizeof(struct hfs_sb_info), GFP_KERNEL);
360 if (!sbi) 360 if (!sbi)
361 return -ENOMEM; 361 return -ENOMEM;
362 sb->s_fs_info = sbi; 362 sb->s_fs_info = sbi;
363 memset(sbi, 0, sizeof(struct hfs_sb_info));
364 INIT_HLIST_HEAD(&sbi->rsrc_inodes); 363 INIT_HLIST_HEAD(&sbi->rsrc_inodes);
365 364
366 res = -EINVAL; 365 res = -EINVAL;
@@ -455,8 +454,7 @@ static int __init init_hfs_fs(void)
455static void __exit exit_hfs_fs(void) 454static void __exit exit_hfs_fs(void)
456{ 455{
457 unregister_filesystem(&hfs_fs_type); 456 unregister_filesystem(&hfs_fs_type);
458 if (kmem_cache_destroy(hfs_inode_cachep)) 457 kmem_cache_destroy(hfs_inode_cachep);
459 printk(KERN_ERR "hfs_inode_cache: not all structures were freed\n");
460} 458}
461 459
462module_init(init_hfs_fs) 460module_init(init_hfs_fs)
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c
index 77bf434da679..29da6574ba77 100644
--- a/fs/hfsplus/bnode.c
+++ b/fs/hfsplus/bnode.c
@@ -409,10 +409,9 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
409 sb = tree->inode->i_sb; 409 sb = tree->inode->i_sb;
410 size = sizeof(struct hfs_bnode) + tree->pages_per_bnode * 410 size = sizeof(struct hfs_bnode) + tree->pages_per_bnode *
411 sizeof(struct page *); 411 sizeof(struct page *);
412 node = kmalloc(size, GFP_KERNEL); 412 node = kzalloc(size, GFP_KERNEL);
413 if (!node) 413 if (!node)
414 return NULL; 414 return NULL;
415 memset(node, 0, size);
416 node->tree = tree; 415 node->tree = tree;
417 node->this = cnid; 416 node->this = cnid;
418 set_bit(HFS_BNODE_NEW, &node->flags); 417 set_bit(HFS_BNODE_NEW, &node->flags);
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index cfc852fdd1b5..a9b9e872e29a 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -24,10 +24,9 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id)
24 struct page *page; 24 struct page *page;
25 unsigned int size; 25 unsigned int size;
26 26
27 tree = kmalloc(sizeof(*tree), GFP_KERNEL); 27 tree = kzalloc(sizeof(*tree), GFP_KERNEL);
28 if (!tree) 28 if (!tree)
29 return NULL; 29 return NULL;
30 memset(tree, 0, sizeof(*tree));
31 30
32 init_MUTEX(&tree->tree_lock); 31 init_MUTEX(&tree->tree_lock);
33 spin_lock_init(&tree->hash_lock); 32 spin_lock_init(&tree->hash_lock);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 924ecdef8091..0eb1a6092668 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -304,7 +304,6 @@ struct inode *hfsplus_new_inode(struct super_block *sb, int mode)
304 inode->i_gid = current->fsgid; 304 inode->i_gid = current->fsgid;
305 inode->i_nlink = 1; 305 inode->i_nlink = 1;
306 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 306 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
307 inode->i_blksize = HFSPLUS_SB(sb).alloc_blksz;
308 INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); 307 INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
309 init_MUTEX(&HFSPLUS_I(inode).extents_lock); 308 init_MUTEX(&HFSPLUS_I(inode).extents_lock);
310 atomic_set(&HFSPLUS_I(inode).opencnt, 0); 309 atomic_set(&HFSPLUS_I(inode).opencnt, 0);
@@ -407,7 +406,6 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
407 type = hfs_bnode_read_u16(fd->bnode, fd->entryoffset); 406 type = hfs_bnode_read_u16(fd->bnode, fd->entryoffset);
408 407
409 HFSPLUS_I(inode).dev = 0; 408 HFSPLUS_I(inode).dev = 0;
410 inode->i_blksize = HFSPLUS_SB(inode->i_sb).alloc_blksz;
411 if (type == HFSPLUS_FOLDER) { 409 if (type == HFSPLUS_FOLDER) {
412 struct hfsplus_cat_folder *folder = &entry.folder; 410 struct hfsplus_cat_folder *folder = &entry.folder;
413 411
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index d279d5924f28..194eede52fa4 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -493,8 +493,7 @@ static int __init init_hfsplus_fs(void)
493static void __exit exit_hfsplus_fs(void) 493static void __exit exit_hfsplus_fs(void)
494{ 494{
495 unregister_filesystem(&hfsplus_fs_type); 495 unregister_filesystem(&hfsplus_fs_type);
496 if (kmem_cache_destroy(hfsplus_inode_cachep)) 496 kmem_cache_destroy(hfsplus_inode_cachep);
497 printk(KERN_ERR "hfsplus_inode_cache: not all structures were freed\n");
498} 497}
499 498
500module_init(init_hfsplus_fs) 499module_init(init_hfsplus_fs)
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index b82e3d9c8790..322e876c35ed 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -156,7 +156,6 @@ static int read_name(struct inode *ino, char *name)
156 ino->i_mode = i_mode; 156 ino->i_mode = i_mode;
157 ino->i_nlink = i_nlink; 157 ino->i_nlink = i_nlink;
158 ino->i_size = i_size; 158 ino->i_size = i_size;
159 ino->i_blksize = i_blksize;
160 ino->i_blocks = i_blocks; 159 ino->i_blocks = i_blocks;
161 return(0); 160 return(0);
162} 161}
diff --git a/fs/hpfs/buffer.c b/fs/hpfs/buffer.c
index 2807aa833e62..b52b7381d10f 100644
--- a/fs/hpfs/buffer.c
+++ b/fs/hpfs/buffer.c
@@ -76,7 +76,7 @@ void *hpfs_map_4sectors(struct super_block *s, unsigned secno, struct quad_buffe
76 return NULL; 76 return NULL;
77 } 77 }
78 78
79 qbh->data = data = (char *)kmalloc(2048, GFP_NOFS); 79 qbh->data = data = kmalloc(2048, GFP_NOFS);
80 if (!data) { 80 if (!data) {
81 printk("HPFS: hpfs_map_4sectors: out of memory\n"); 81 printk("HPFS: hpfs_map_4sectors: out of memory\n");
82 goto bail; 82 goto bail;
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index f687d54ed442..32ab51e42b96 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -12,7 +12,6 @@
12#include <linux/mutex.h> 12#include <linux/mutex.h>
13#include <linux/pagemap.h> 13#include <linux/pagemap.h>
14#include <linux/buffer_head.h> 14#include <linux/buffer_head.h>
15#include <linux/hpfs_fs.h>
16#include <linux/slab.h> 15#include <linux/slab.h>
17#include <linux/smp_lock.h> 16#include <linux/smp_lock.h>
18 17
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 56f2c338c4d9..bcf6ee36e065 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -17,7 +17,6 @@ void hpfs_init_inode(struct inode *i)
17 i->i_gid = hpfs_sb(sb)->sb_gid; 17 i->i_gid = hpfs_sb(sb)->sb_gid;
18 i->i_mode = hpfs_sb(sb)->sb_mode; 18 i->i_mode = hpfs_sb(sb)->sb_mode;
19 hpfs_inode->i_conv = hpfs_sb(sb)->sb_conv; 19 hpfs_inode->i_conv = hpfs_sb(sb)->sb_conv;
20 i->i_blksize = 512;
21 i->i_size = -1; 20 i->i_size = -1;
22 i->i_blocks = -1; 21 i->i_blocks = -1;
23 22
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index f798480a363f..450b5e0b4785 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -11,6 +11,7 @@
11#include <linux/parser.h> 11#include <linux/parser.h>
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/statfs.h> 13#include <linux/statfs.h>
14#include <linux/magic.h>
14 15
15/* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */ 16/* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */
16 17
@@ -202,8 +203,7 @@ static int init_inodecache(void)
202 203
203static void destroy_inodecache(void) 204static void destroy_inodecache(void)
204{ 205{
205 if (kmem_cache_destroy(hpfs_inode_cachep)) 206 kmem_cache_destroy(hpfs_inode_cachep);
206 printk(KERN_INFO "hpfs_inode_cache: not all structures were freed\n");
207} 207}
208 208
209/* 209/*
@@ -461,11 +461,10 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
461 461
462 int o; 462 int o;
463 463
464 sbi = kmalloc(sizeof(*sbi), GFP_KERNEL); 464 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
465 if (!sbi) 465 if (!sbi)
466 return -ENOMEM; 466 return -ENOMEM;
467 s->s_fs_info = sbi; 467 s->s_fs_info = sbi;
468 memset(sbi, 0, sizeof(*sbi));
469 468
470 sbi->sb_bmp_dir = NULL; 469 sbi->sb_bmp_dir = NULL;
471 sbi->sb_cp_table = NULL; 470 sbi->sb_cp_table = NULL;
diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c
index 3a9bdf58166f..dcb6d2e988b8 100644
--- a/fs/hppfs/hppfs_kern.c
+++ b/fs/hppfs/hppfs_kern.c
@@ -152,7 +152,6 @@ static void hppfs_read_inode(struct inode *ino)
152 ino->i_mode = proc_ino->i_mode; 152 ino->i_mode = proc_ino->i_mode;
153 ino->i_nlink = proc_ino->i_nlink; 153 ino->i_nlink = proc_ino->i_nlink;
154 ino->i_size = proc_ino->i_size; 154 ino->i_size = proc_ino->i_size;
155 ino->i_blksize = proc_ino->i_blksize;
156 ino->i_blocks = proc_ino->i_blocks; 155 ino->i_blocks = proc_ino->i_blocks;
157} 156}
158 157
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index c3920c96dadf..f5b8f329aca6 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -229,7 +229,7 @@ static void hugetlbfs_delete_inode(struct inode *inode)
229 clear_inode(inode); 229 clear_inode(inode);
230} 230}
231 231
232static void hugetlbfs_forget_inode(struct inode *inode) 232static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock)
233{ 233{
234 struct super_block *sb = inode->i_sb; 234 struct super_block *sb = inode->i_sb;
235 235
@@ -357,7 +357,6 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
357 inode->i_mode = mode; 357 inode->i_mode = mode;
358 inode->i_uid = uid; 358 inode->i_uid = uid;
359 inode->i_gid = gid; 359 inode->i_gid = gid;
360 inode->i_blksize = HPAGE_SIZE;
361 inode->i_blocks = 0; 360 inode->i_blocks = 0;
362 inode->i_mapping->a_ops = &hugetlbfs_aops; 361 inode->i_mapping->a_ops = &hugetlbfs_aops;
363 inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; 362 inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info;
diff --git a/fs/inode.c b/fs/inode.c
index 0bf9f0444a96..abf77471e6c4 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -133,7 +133,6 @@ static struct inode *alloc_inode(struct super_block *sb)
133 inode->i_bdev = NULL; 133 inode->i_bdev = NULL;
134 inode->i_cdev = NULL; 134 inode->i_cdev = NULL;
135 inode->i_rdev = 0; 135 inode->i_rdev = 0;
136 inode->i_security = NULL;
137 inode->dirtied_when = 0; 136 inode->dirtied_when = 0;
138 if (security_inode_alloc(inode)) { 137 if (security_inode_alloc(inode)) {
139 if (inode->i_sb->s_op->destroy_inode) 138 if (inode->i_sb->s_op->destroy_inode)
@@ -163,7 +162,7 @@ static struct inode *alloc_inode(struct super_block *sb)
163 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; 162 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
164 mapping->backing_dev_info = bdi; 163 mapping->backing_dev_info = bdi;
165 } 164 }
166 memset(&inode->u, 0, sizeof(inode->u)); 165 inode->i_private = 0;
167 inode->i_mapping = mapping; 166 inode->i_mapping = mapping;
168 } 167 }
169 return inode; 168 return inode;
@@ -254,9 +253,9 @@ void clear_inode(struct inode *inode)
254 DQUOT_DROP(inode); 253 DQUOT_DROP(inode);
255 if (inode->i_sb && inode->i_sb->s_op->clear_inode) 254 if (inode->i_sb && inode->i_sb->s_op->clear_inode)
256 inode->i_sb->s_op->clear_inode(inode); 255 inode->i_sb->s_op->clear_inode(inode);
257 if (inode->i_bdev) 256 if (S_ISBLK(inode->i_mode) && inode->i_bdev)
258 bd_forget(inode); 257 bd_forget(inode);
259 if (inode->i_cdev) 258 if (S_ISCHR(inode->i_mode) && inode->i_cdev)
260 cd_forget(inode); 259 cd_forget(inode);
261 inode->i_state = I_CLEAR; 260 inode->i_state = I_CLEAR;
262} 261}
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 14391361c886..c34b862cdbf2 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -96,9 +96,7 @@ static int init_inodecache(void)
96 96
97static void destroy_inodecache(void) 97static void destroy_inodecache(void)
98{ 98{
99 if (kmem_cache_destroy(isofs_inode_cachep)) 99 kmem_cache_destroy(isofs_inode_cachep);
100 printk(KERN_INFO "iso_inode_cache: not all structures were "
101 "freed\n");
102} 100}
103 101
104static int isofs_remount(struct super_block *sb, int *flags, char *data) 102static int isofs_remount(struct super_block *sb, int *flags, char *data)
@@ -557,11 +555,10 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
557 struct iso9660_options opt; 555 struct iso9660_options opt;
558 struct isofs_sb_info * sbi; 556 struct isofs_sb_info * sbi;
559 557
560 sbi = kmalloc(sizeof(*sbi), GFP_KERNEL); 558 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
561 if (!sbi) 559 if (!sbi)
562 return -ENOMEM; 560 return -ENOMEM;
563 s->s_fs_info = sbi; 561 s->s_fs_info = sbi;
564 memset(sbi, 0, sizeof(*sbi));
565 562
566 if (!parse_options((char *)data, &opt)) 563 if (!parse_options((char *)data, &opt))
567 goto out_freesbi; 564 goto out_freesbi;
@@ -963,30 +960,30 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s,
963 goto abort; 960 goto abort;
964 } 961 }
965 962
966 if (nextblk) { 963 /* On the last section, nextblk == 0, section size is likely to
967 while (b_off >= (offset + sect_size)) { 964 * exceed sect_size by a partial block, and access beyond the
968 struct inode *ninode; 965 * end of the file will reach beyond the section size, too.
969 966 */
970 offset += sect_size; 967 while (nextblk && (b_off >= (offset + sect_size))) {
971 if (nextblk == 0) 968 struct inode *ninode;
972 goto abort; 969
973 ninode = isofs_iget(inode->i_sb, nextblk, nextoff); 970 offset += sect_size;
974 if (!ninode) 971 ninode = isofs_iget(inode->i_sb, nextblk, nextoff);
975 goto abort; 972 if (!ninode)
976 firstext = ISOFS_I(ninode)->i_first_extent; 973 goto abort;
977 sect_size = ISOFS_I(ninode)->i_section_size >> ISOFS_BUFFER_BITS(ninode); 974 firstext = ISOFS_I(ninode)->i_first_extent;
978 nextblk = ISOFS_I(ninode)->i_next_section_block; 975 sect_size = ISOFS_I(ninode)->i_section_size >> ISOFS_BUFFER_BITS(ninode);
979 nextoff = ISOFS_I(ninode)->i_next_section_offset; 976 nextblk = ISOFS_I(ninode)->i_next_section_block;
980 iput(ninode); 977 nextoff = ISOFS_I(ninode)->i_next_section_offset;
981 978 iput(ninode);
982 if (++section > 100) { 979
983 printk("isofs_get_blocks: More than 100 file sections ?!?, aborting...\n"); 980 if (++section > 100) {
984 printk("isofs_get_blocks: block=%ld firstext=%u sect_size=%u " 981 printk("isofs_get_blocks: More than 100 file sections ?!?, aborting...\n");
985 "nextblk=%lu nextoff=%lu\n", 982 printk("isofs_get_blocks: block=%ld firstext=%u sect_size=%u "
986 iblock, firstext, (unsigned) sect_size, 983 "nextblk=%lu nextoff=%lu\n",
987 nextblk, nextoff); 984 iblock, firstext, (unsigned) sect_size,
988 goto abort; 985 nextblk, nextoff);
989 } 986 goto abort;
990 } 987 }
991 } 988 }
992 989
@@ -1238,7 +1235,7 @@ static void isofs_read_inode(struct inode *inode)
1238 } 1235 }
1239 inode->i_uid = sbi->s_uid; 1236 inode->i_uid = sbi->s_uid;
1240 inode->i_gid = sbi->s_gid; 1237 inode->i_gid = sbi->s_gid;
1241 inode->i_blocks = inode->i_blksize = 0; 1238 inode->i_blocks = 0;
1242 1239
1243 ei->i_format_parm[0] = 0; 1240 ei->i_format_parm[0] = 0;
1244 ei->i_format_parm[1] = 0; 1241 ei->i_format_parm[1] = 0;
@@ -1294,7 +1291,6 @@ static void isofs_read_inode(struct inode *inode)
1294 isonum_711 (de->ext_attr_length)); 1291 isonum_711 (de->ext_attr_length));
1295 1292
1296 /* Set the number of blocks for stat() - should be done before RR */ 1293 /* Set the number of blocks for stat() - should be done before RR */
1297 inode->i_blksize = PAGE_CACHE_SIZE; /* For stat() only */
1298 inode->i_blocks = (inode->i_size + 511) >> 9; 1294 inode->i_blocks = (inode->i_size + 511) >> 9;
1299 1295
1300 /* 1296 /*
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 47678a26c13b..0208cc7ac5d0 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * linux/fs/checkpoint.c 2 * linux/fs/checkpoint.c
3 * 3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
5 * 5 *
6 * Copyright 1999 Red Hat Software --- All Rights Reserved 6 * Copyright 1999 Red Hat Software --- All Rights Reserved
@@ -9,8 +9,8 @@
9 * the terms of the GNU General Public License, version 2, or at your 9 * the terms of the GNU General Public License, version 2, or at your
10 * option, any later version, incorporated herein by reference. 10 * option, any later version, incorporated herein by reference.
11 * 11 *
12 * Checkpoint routines for the generic filesystem journaling code. 12 * Checkpoint routines for the generic filesystem journaling code.
13 * Part of the ext2fs journaling system. 13 * Part of the ext2fs journaling system.
14 * 14 *
15 * Checkpointing is the process of ensuring that a section of the log is 15 * Checkpointing is the process of ensuring that a section of the log is
16 * committed fully to disk, so that that portion of the log can be 16 * committed fully to disk, so that that portion of the log can be
@@ -145,6 +145,7 @@ void __log_wait_for_space(journal_t *journal)
145 * jbd_unlock_bh_state(). 145 * jbd_unlock_bh_state().
146 */ 146 */
147static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh) 147static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
148 __releases(journal->j_list_lock)
148{ 149{
149 get_bh(bh); 150 get_bh(bh);
150 spin_unlock(&journal->j_list_lock); 151 spin_unlock(&journal->j_list_lock);
@@ -225,7 +226,7 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
225 * Try to flush one buffer from the checkpoint list to disk. 226 * Try to flush one buffer from the checkpoint list to disk.
226 * 227 *
227 * Return 1 if something happened which requires us to abort the current 228 * Return 1 if something happened which requires us to abort the current
228 * scan of the checkpoint list. 229 * scan of the checkpoint list.
229 * 230 *
230 * Called with j_list_lock held and drops it if 1 is returned 231 * Called with j_list_lock held and drops it if 1 is returned
231 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it 232 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
@@ -269,7 +270,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
269 * possibly block, while still holding the journal lock. 270 * possibly block, while still holding the journal lock.
270 * We cannot afford to let the transaction logic start 271 * We cannot afford to let the transaction logic start
271 * messing around with this buffer before we write it to 272 * messing around with this buffer before we write it to
272 * disk, as that would break recoverability. 273 * disk, as that would break recoverability.
273 */ 274 */
274 BUFFER_TRACE(bh, "queue"); 275 BUFFER_TRACE(bh, "queue");
275 get_bh(bh); 276 get_bh(bh);
@@ -292,7 +293,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
292 * Perform an actual checkpoint. We take the first transaction on the 293 * Perform an actual checkpoint. We take the first transaction on the
293 * list of transactions to be checkpointed and send all its buffers 294 * list of transactions to be checkpointed and send all its buffers
294 * to disk. We submit larger chunks of data at once. 295 * to disk. We submit larger chunks of data at once.
295 * 296 *
296 * The journal should be locked before calling this function. 297 * The journal should be locked before calling this function.
297 */ 298 */
298int log_do_checkpoint(journal_t *journal) 299int log_do_checkpoint(journal_t *journal)
@@ -303,10 +304,10 @@ int log_do_checkpoint(journal_t *journal)
303 304
304 jbd_debug(1, "Start checkpoint\n"); 305 jbd_debug(1, "Start checkpoint\n");
305 306
306 /* 307 /*
307 * First thing: if there are any transactions in the log which 308 * First thing: if there are any transactions in the log which
308 * don't need checkpointing, just eliminate them from the 309 * don't need checkpointing, just eliminate them from the
309 * journal straight away. 310 * journal straight away.
310 */ 311 */
311 result = cleanup_journal_tail(journal); 312 result = cleanup_journal_tail(journal);
312 jbd_debug(1, "cleanup_journal_tail returned %d\n", result); 313 jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
@@ -384,9 +385,9 @@ out:
384 * we have already got rid of any since the last update of the log tail 385 * we have already got rid of any since the last update of the log tail
385 * in the journal superblock. If so, we can instantly roll the 386 * in the journal superblock. If so, we can instantly roll the
386 * superblock forward to remove those transactions from the log. 387 * superblock forward to remove those transactions from the log.
387 * 388 *
388 * Return <0 on error, 0 on success, 1 if there was nothing to clean up. 389 * Return <0 on error, 0 on success, 1 if there was nothing to clean up.
389 * 390 *
390 * Called with the journal lock held. 391 * Called with the journal lock held.
391 * 392 *
392 * This is the only part of the journaling code which really needs to be 393 * This is the only part of the journaling code which really needs to be
@@ -403,8 +404,8 @@ int cleanup_journal_tail(journal_t *journal)
403 unsigned long blocknr, freed; 404 unsigned long blocknr, freed;
404 405
405 /* OK, work out the oldest transaction remaining in the log, and 406 /* OK, work out the oldest transaction remaining in the log, and
406 * the log block it starts at. 407 * the log block it starts at.
407 * 408 *
408 * If the log is now empty, we need to work out which is the 409 * If the log is now empty, we need to work out which is the
409 * next transaction ID we will write, and where it will 410 * next transaction ID we will write, and where it will
410 * start. */ 411 * start. */
@@ -479,7 +480,7 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
479 if (!jh) 480 if (!jh)
480 return 0; 481 return 0;
481 482
482 last_jh = jh->b_cpprev; 483 last_jh = jh->b_cpprev;
483 do { 484 do {
484 jh = next_jh; 485 jh = next_jh;
485 next_jh = jh->b_cpnext; 486 next_jh = jh->b_cpnext;
@@ -557,7 +558,7 @@ out:
557 return ret; 558 return ret;
558} 559}
559 560
560/* 561/*
561 * journal_remove_checkpoint: called after a buffer has been committed 562 * journal_remove_checkpoint: called after a buffer has been committed
562 * to disk (either by being write-back flushed to disk, or being 563 * to disk (either by being write-back flushed to disk, or being
563 * committed to the log). 564 * committed to the log).
@@ -635,7 +636,7 @@ out:
635 * Called with the journal locked. 636 * Called with the journal locked.
636 * Called with j_list_lock held. 637 * Called with j_list_lock held.
637 */ 638 */
638void __journal_insert_checkpoint(struct journal_head *jh, 639void __journal_insert_checkpoint(struct journal_head *jh,
639 transaction_t *transaction) 640 transaction_t *transaction)
640{ 641{
641 JBUFFER_TRACE(jh, "entry"); 642 JBUFFER_TRACE(jh, "entry");
@@ -657,7 +658,7 @@ void __journal_insert_checkpoint(struct journal_head *jh,
657 658
658/* 659/*
659 * We've finished with this transaction structure: adios... 660 * We've finished with this transaction structure: adios...
660 * 661 *
661 * The transaction must have no links except for the checkpoint by this 662 * The transaction must have no links except for the checkpoint by this
662 * point. 663 * point.
663 * 664 *
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 42da60784311..32a8caf0c41e 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -160,6 +160,117 @@ static int journal_write_commit_record(journal_t *journal,
160 return (ret == -EIO); 160 return (ret == -EIO);
161} 161}
162 162
163static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
164{
165 int i;
166
167 for (i = 0; i < bufs; i++) {
168 wbuf[i]->b_end_io = end_buffer_write_sync;
169 /* We use-up our safety reference in submit_bh() */
170 submit_bh(WRITE, wbuf[i]);
171 }
172}
173
174/*
175 * Submit all the data buffers to disk
176 */
177static void journal_submit_data_buffers(journal_t *journal,
178 transaction_t *commit_transaction)
179{
180 struct journal_head *jh;
181 struct buffer_head *bh;
182 int locked;
183 int bufs = 0;
184 struct buffer_head **wbuf = journal->j_wbuf;
185
186 /*
187 * Whenever we unlock the journal and sleep, things can get added
188 * onto ->t_sync_datalist, so we have to keep looping back to
189 * write_out_data until we *know* that the list is empty.
190 *
191 * Cleanup any flushed data buffers from the data list. Even in
192 * abort mode, we want to flush this out as soon as possible.
193 */
194write_out_data:
195 cond_resched();
196 spin_lock(&journal->j_list_lock);
197
198 while (commit_transaction->t_sync_datalist) {
199 jh = commit_transaction->t_sync_datalist;
200 bh = jh2bh(jh);
201 locked = 0;
202
203 /* Get reference just to make sure buffer does not disappear
204 * when we are forced to drop various locks */
205 get_bh(bh);
206 /* If the buffer is dirty, we need to submit IO and hence
207 * we need the buffer lock. We try to lock the buffer without
208 * blocking. If we fail, we need to drop j_list_lock and do
209 * blocking lock_buffer().
210 */
211 if (buffer_dirty(bh)) {
212 if (test_set_buffer_locked(bh)) {
213 BUFFER_TRACE(bh, "needs blocking lock");
214 spin_unlock(&journal->j_list_lock);
215 /* Write out all data to prevent deadlocks */
216 journal_do_submit_data(wbuf, bufs);
217 bufs = 0;
218 lock_buffer(bh);
219 spin_lock(&journal->j_list_lock);
220 }
221 locked = 1;
222 }
223 /* We have to get bh_state lock. Again out of order, sigh. */
224 if (!inverted_lock(journal, bh)) {
225 jbd_lock_bh_state(bh);
226 spin_lock(&journal->j_list_lock);
227 }
228 /* Someone already cleaned up the buffer? */
229 if (!buffer_jbd(bh)
230 || jh->b_transaction != commit_transaction
231 || jh->b_jlist != BJ_SyncData) {
232 jbd_unlock_bh_state(bh);
233 if (locked)
234 unlock_buffer(bh);
235 BUFFER_TRACE(bh, "already cleaned up");
236 put_bh(bh);
237 continue;
238 }
239 if (locked && test_clear_buffer_dirty(bh)) {
240 BUFFER_TRACE(bh, "needs writeout, adding to array");
241 wbuf[bufs++] = bh;
242 __journal_file_buffer(jh, commit_transaction,
243 BJ_Locked);
244 jbd_unlock_bh_state(bh);
245 if (bufs == journal->j_wbufsize) {
246 spin_unlock(&journal->j_list_lock);
247 journal_do_submit_data(wbuf, bufs);
248 bufs = 0;
249 goto write_out_data;
250 }
251 }
252 else {
253 BUFFER_TRACE(bh, "writeout complete: unfile");
254 __journal_unfile_buffer(jh);
255 jbd_unlock_bh_state(bh);
256 if (locked)
257 unlock_buffer(bh);
258 journal_remove_journal_head(bh);
259 /* Once for our safety reference, once for
260 * journal_remove_journal_head() */
261 put_bh(bh);
262 put_bh(bh);
263 }
264
265 if (lock_need_resched(&journal->j_list_lock)) {
266 spin_unlock(&journal->j_list_lock);
267 goto write_out_data;
268 }
269 }
270 spin_unlock(&journal->j_list_lock);
271 journal_do_submit_data(wbuf, bufs);
272}
273
163/* 274/*
164 * journal_commit_transaction 275 * journal_commit_transaction
165 * 276 *
@@ -313,80 +424,13 @@ void journal_commit_transaction(journal_t *journal)
313 * Now start flushing things to disk, in the order they appear 424 * Now start flushing things to disk, in the order they appear
314 * on the transaction lists. Data blocks go first. 425 * on the transaction lists. Data blocks go first.
315 */ 426 */
316
317 err = 0; 427 err = 0;
318 /* 428 journal_submit_data_buffers(journal, commit_transaction);
319 * Whenever we unlock the journal and sleep, things can get added
320 * onto ->t_sync_datalist, so we have to keep looping back to
321 * write_out_data until we *know* that the list is empty.
322 */
323 bufs = 0;
324 /*
325 * Cleanup any flushed data buffers from the data list. Even in
326 * abort mode, we want to flush this out as soon as possible.
327 */
328write_out_data:
329 cond_resched();
330 spin_lock(&journal->j_list_lock);
331
332 while (commit_transaction->t_sync_datalist) {
333 struct buffer_head *bh;
334
335 jh = commit_transaction->t_sync_datalist;
336 commit_transaction->t_sync_datalist = jh->b_tnext;
337 bh = jh2bh(jh);
338 if (buffer_locked(bh)) {
339 BUFFER_TRACE(bh, "locked");
340 if (!inverted_lock(journal, bh))
341 goto write_out_data;
342 __journal_temp_unlink_buffer(jh);
343 __journal_file_buffer(jh, commit_transaction,
344 BJ_Locked);
345 jbd_unlock_bh_state(bh);
346 if (lock_need_resched(&journal->j_list_lock)) {
347 spin_unlock(&journal->j_list_lock);
348 goto write_out_data;
349 }
350 } else {
351 if (buffer_dirty(bh)) {
352 BUFFER_TRACE(bh, "start journal writeout");
353 get_bh(bh);
354 wbuf[bufs++] = bh;
355 if (bufs == journal->j_wbufsize) {
356 jbd_debug(2, "submit %d writes\n",
357 bufs);
358 spin_unlock(&journal->j_list_lock);
359 ll_rw_block(SWRITE, bufs, wbuf);
360 journal_brelse_array(wbuf, bufs);
361 bufs = 0;
362 goto write_out_data;
363 }
364 } else {
365 BUFFER_TRACE(bh, "writeout complete: unfile");
366 if (!inverted_lock(journal, bh))
367 goto write_out_data;
368 __journal_unfile_buffer(jh);
369 jbd_unlock_bh_state(bh);
370 journal_remove_journal_head(bh);
371 put_bh(bh);
372 if (lock_need_resched(&journal->j_list_lock)) {
373 spin_unlock(&journal->j_list_lock);
374 goto write_out_data;
375 }
376 }
377 }
378 }
379
380 if (bufs) {
381 spin_unlock(&journal->j_list_lock);
382 ll_rw_block(SWRITE, bufs, wbuf);
383 journal_brelse_array(wbuf, bufs);
384 spin_lock(&journal->j_list_lock);
385 }
386 429
387 /* 430 /*
388 * Wait for all previously submitted IO to complete. 431 * Wait for all previously submitted IO to complete.
389 */ 432 */
433 spin_lock(&journal->j_list_lock);
390 while (commit_transaction->t_locked_list) { 434 while (commit_transaction->t_locked_list) {
391 struct buffer_head *bh; 435 struct buffer_head *bh;
392 436
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index f66724ce443a..7af6099c911c 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -181,7 +181,7 @@ loop:
181 transaction->t_expires)) 181 transaction->t_expires))
182 should_sleep = 0; 182 should_sleep = 0;
183 if (journal->j_flags & JFS_UNMOUNT) 183 if (journal->j_flags & JFS_UNMOUNT)
184 should_sleep = 0; 184 should_sleep = 0;
185 if (should_sleep) { 185 if (should_sleep) {
186 spin_unlock(&journal->j_state_lock); 186 spin_unlock(&journal->j_state_lock);
187 schedule(); 187 schedule();
@@ -271,7 +271,7 @@ static void journal_kill_thread(journal_t *journal)
271int journal_write_metadata_buffer(transaction_t *transaction, 271int journal_write_metadata_buffer(transaction_t *transaction,
272 struct journal_head *jh_in, 272 struct journal_head *jh_in,
273 struct journal_head **jh_out, 273 struct journal_head **jh_out,
274 int blocknr) 274 unsigned long blocknr)
275{ 275{
276 int need_copy_out = 0; 276 int need_copy_out = 0;
277 int done_copy_out = 0; 277 int done_copy_out = 0;
@@ -578,7 +578,7 @@ int journal_next_log_block(journal_t *journal, unsigned long *retp)
578 * this is a no-op. If needed, we can use j_blk_offset - everything is 578 * this is a no-op. If needed, we can use j_blk_offset - everything is
579 * ready. 579 * ready.
580 */ 580 */
581int journal_bmap(journal_t *journal, unsigned long blocknr, 581int journal_bmap(journal_t *journal, unsigned long blocknr,
582 unsigned long *retp) 582 unsigned long *retp)
583{ 583{
584 int err = 0; 584 int err = 0;
@@ -696,13 +696,13 @@ fail:
696 * @bdev: Block device on which to create the journal 696 * @bdev: Block device on which to create the journal
697 * @fs_dev: Device which hold journalled filesystem for this journal. 697 * @fs_dev: Device which hold journalled filesystem for this journal.
698 * @start: Block nr Start of journal. 698 * @start: Block nr Start of journal.
699 * @len: Lenght of the journal in blocks. 699 * @len: Length of the journal in blocks.
700 * @blocksize: blocksize of journalling device 700 * @blocksize: blocksize of journalling device
701 * @returns: a newly created journal_t * 701 * @returns: a newly created journal_t *
702 * 702 *
703 * journal_init_dev creates a journal which maps a fixed contiguous 703 * journal_init_dev creates a journal which maps a fixed contiguous
704 * range of blocks on an arbitrary block device. 704 * range of blocks on an arbitrary block device.
705 * 705 *
706 */ 706 */
707journal_t * journal_init_dev(struct block_device *bdev, 707journal_t * journal_init_dev(struct block_device *bdev,
708 struct block_device *fs_dev, 708 struct block_device *fs_dev,
@@ -715,18 +715,8 @@ journal_t * journal_init_dev(struct block_device *bdev,
715 if (!journal) 715 if (!journal)
716 return NULL; 716 return NULL;
717 717
718 journal->j_dev = bdev;
719 journal->j_fs_dev = fs_dev;
720 journal->j_blk_offset = start;
721 journal->j_maxlen = len;
722 journal->j_blocksize = blocksize;
723
724 bh = __getblk(journal->j_dev, start, journal->j_blocksize);
725 J_ASSERT(bh != NULL);
726 journal->j_sb_buffer = bh;
727 journal->j_superblock = (journal_superblock_t *)bh->b_data;
728
729 /* journal descriptor can store up to n blocks -bzzz */ 718 /* journal descriptor can store up to n blocks -bzzz */
719 journal->j_blocksize = blocksize;
730 n = journal->j_blocksize / sizeof(journal_block_tag_t); 720 n = journal->j_blocksize / sizeof(journal_block_tag_t);
731 journal->j_wbufsize = n; 721 journal->j_wbufsize = n;
732 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 722 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
@@ -736,14 +726,23 @@ journal_t * journal_init_dev(struct block_device *bdev,
736 kfree(journal); 726 kfree(journal);
737 journal = NULL; 727 journal = NULL;
738 } 728 }
729 journal->j_dev = bdev;
730 journal->j_fs_dev = fs_dev;
731 journal->j_blk_offset = start;
732 journal->j_maxlen = len;
733
734 bh = __getblk(journal->j_dev, start, journal->j_blocksize);
735 J_ASSERT(bh != NULL);
736 journal->j_sb_buffer = bh;
737 journal->j_superblock = (journal_superblock_t *)bh->b_data;
739 738
740 return journal; 739 return journal;
741} 740}
742 741
743/** 742/**
744 * journal_t * journal_init_inode () - creates a journal which maps to a inode. 743 * journal_t * journal_init_inode () - creates a journal which maps to a inode.
745 * @inode: An inode to create the journal in 744 * @inode: An inode to create the journal in
746 * 745 *
747 * journal_init_inode creates a journal which maps an on-disk inode as 746 * journal_init_inode creates a journal which maps an on-disk inode as
748 * the journal. The inode must exist already, must support bmap() and 747 * the journal. The inode must exist already, must support bmap() and
749 * must have all data blocks preallocated. 748 * must have all data blocks preallocated.
@@ -763,7 +762,7 @@ journal_t * journal_init_inode (struct inode *inode)
763 journal->j_inode = inode; 762 journal->j_inode = inode;
764 jbd_debug(1, 763 jbd_debug(1,
765 "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n", 764 "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n",
766 journal, inode->i_sb->s_id, inode->i_ino, 765 journal, inode->i_sb->s_id, inode->i_ino,
767 (long long) inode->i_size, 766 (long long) inode->i_size,
768 inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize); 767 inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize);
769 768
@@ -798,10 +797,10 @@ journal_t * journal_init_inode (struct inode *inode)
798 return journal; 797 return journal;
799} 798}
800 799
801/* 800/*
802 * If the journal init or create aborts, we need to mark the journal 801 * If the journal init or create aborts, we need to mark the journal
803 * superblock as being NULL to prevent the journal destroy from writing 802 * superblock as being NULL to prevent the journal destroy from writing
804 * back a bogus superblock. 803 * back a bogus superblock.
805 */ 804 */
806static void journal_fail_superblock (journal_t *journal) 805static void journal_fail_superblock (journal_t *journal)
807{ 806{
@@ -820,7 +819,7 @@ static void journal_fail_superblock (journal_t *journal)
820static int journal_reset(journal_t *journal) 819static int journal_reset(journal_t *journal)
821{ 820{
822 journal_superblock_t *sb = journal->j_superblock; 821 journal_superblock_t *sb = journal->j_superblock;
823 unsigned int first, last; 822 unsigned long first, last;
824 823
825 first = be32_to_cpu(sb->s_first); 824 first = be32_to_cpu(sb->s_first);
826 last = be32_to_cpu(sb->s_maxlen); 825 last = be32_to_cpu(sb->s_maxlen);
@@ -844,13 +843,13 @@ static int journal_reset(journal_t *journal)
844 return 0; 843 return 0;
845} 844}
846 845
847/** 846/**
848 * int journal_create() - Initialise the new journal file 847 * int journal_create() - Initialise the new journal file
849 * @journal: Journal to create. This structure must have been initialised 848 * @journal: Journal to create. This structure must have been initialised
850 * 849 *
851 * Given a journal_t structure which tells us which disk blocks we can 850 * Given a journal_t structure which tells us which disk blocks we can
852 * use, create a new journal superblock and initialise all of the 851 * use, create a new journal superblock and initialise all of the
853 * journal fields from scratch. 852 * journal fields from scratch.
854 **/ 853 **/
855int journal_create(journal_t *journal) 854int journal_create(journal_t *journal)
856{ 855{
@@ -915,7 +914,7 @@ int journal_create(journal_t *journal)
915 return journal_reset(journal); 914 return journal_reset(journal);
916} 915}
917 916
918/** 917/**
919 * void journal_update_superblock() - Update journal sb on disk. 918 * void journal_update_superblock() - Update journal sb on disk.
920 * @journal: The journal to update. 919 * @journal: The journal to update.
921 * @wait: Set to '0' if you don't want to wait for IO completion. 920 * @wait: Set to '0' if you don't want to wait for IO completion.
@@ -939,7 +938,7 @@ void journal_update_superblock(journal_t *journal, int wait)
939 journal->j_transaction_sequence) { 938 journal->j_transaction_sequence) {
940 jbd_debug(1,"JBD: Skipping superblock update on recovered sb " 939 jbd_debug(1,"JBD: Skipping superblock update on recovered sb "
941 "(start %ld, seq %d, errno %d)\n", 940 "(start %ld, seq %d, errno %d)\n",
942 journal->j_tail, journal->j_tail_sequence, 941 journal->j_tail, journal->j_tail_sequence,
943 journal->j_errno); 942 journal->j_errno);
944 goto out; 943 goto out;
945 } 944 }
@@ -1062,7 +1061,7 @@ static int load_superblock(journal_t *journal)
1062/** 1061/**
1063 * int journal_load() - Read journal from disk. 1062 * int journal_load() - Read journal from disk.
1064 * @journal: Journal to act on. 1063 * @journal: Journal to act on.
1065 * 1064 *
1066 * Given a journal_t structure which tells us which disk blocks contain 1065 * Given a journal_t structure which tells us which disk blocks contain
1067 * a journal, read the journal from disk to initialise the in-memory 1066 * a journal, read the journal from disk to initialise the in-memory
1068 * structures. 1067 * structures.
@@ -1094,7 +1093,7 @@ int journal_load(journal_t *journal)
1094 /* 1093 /*
1095 * Create a slab for this blocksize 1094 * Create a slab for this blocksize
1096 */ 1095 */
1097 err = journal_create_jbd_slab(cpu_to_be32(sb->s_blocksize)); 1096 err = journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize));
1098 if (err) 1097 if (err)
1099 return err; 1098 return err;
1100 1099
@@ -1172,9 +1171,9 @@ void journal_destroy(journal_t *journal)
1172 * @compat: bitmask of compatible features 1171 * @compat: bitmask of compatible features
1173 * @ro: bitmask of features that force read-only mount 1172 * @ro: bitmask of features that force read-only mount
1174 * @incompat: bitmask of incompatible features 1173 * @incompat: bitmask of incompatible features
1175 * 1174 *
1176 * Check whether the journal uses all of a given set of 1175 * Check whether the journal uses all of a given set of
1177 * features. Return true (non-zero) if it does. 1176 * features. Return true (non-zero) if it does.
1178 **/ 1177 **/
1179 1178
1180int journal_check_used_features (journal_t *journal, unsigned long compat, 1179int journal_check_used_features (journal_t *journal, unsigned long compat,
@@ -1203,7 +1202,7 @@ int journal_check_used_features (journal_t *journal, unsigned long compat,
1203 * @compat: bitmask of compatible features 1202 * @compat: bitmask of compatible features
1204 * @ro: bitmask of features that force read-only mount 1203 * @ro: bitmask of features that force read-only mount
1205 * @incompat: bitmask of incompatible features 1204 * @incompat: bitmask of incompatible features
1206 * 1205 *
1207 * Check whether the journaling code supports the use of 1206 * Check whether the journaling code supports the use of
1208 * all of a given set of features on this journal. Return true 1207 * all of a given set of features on this journal. Return true
1209 * (non-zero) if it can. */ 1208 * (non-zero) if it can. */
@@ -1241,7 +1240,7 @@ int journal_check_available_features (journal_t *journal, unsigned long compat,
1241 * @incompat: bitmask of incompatible features 1240 * @incompat: bitmask of incompatible features
1242 * 1241 *
1243 * Mark a given journal feature as present on the 1242 * Mark a given journal feature as present on the
1244 * superblock. Returns true if the requested features could be set. 1243 * superblock. Returns true if the requested features could be set.
1245 * 1244 *
1246 */ 1245 */
1247 1246
@@ -1327,7 +1326,7 @@ static int journal_convert_superblock_v1(journal_t *journal,
1327/** 1326/**
1328 * int journal_flush () - Flush journal 1327 * int journal_flush () - Flush journal
1329 * @journal: Journal to act on. 1328 * @journal: Journal to act on.
1330 * 1329 *
1331 * Flush all data for a given journal to disk and empty the journal. 1330 * Flush all data for a given journal to disk and empty the journal.
1332 * Filesystems can use this when remounting readonly to ensure that 1331 * Filesystems can use this when remounting readonly to ensure that
1333 * recovery does not need to happen on remount. 1332 * recovery does not need to happen on remount.
@@ -1394,7 +1393,7 @@ int journal_flush(journal_t *journal)
1394 * int journal_wipe() - Wipe journal contents 1393 * int journal_wipe() - Wipe journal contents
1395 * @journal: Journal to act on. 1394 * @journal: Journal to act on.
1396 * @write: flag (see below) 1395 * @write: flag (see below)
1397 * 1396 *
1398 * Wipe out all of the contents of a journal, safely. This will produce 1397 * Wipe out all of the contents of a journal, safely. This will produce
1399 * a warning if the journal contains any valid recovery information. 1398 * a warning if the journal contains any valid recovery information.
1400 * Must be called between journal_init_*() and journal_load(). 1399 * Must be called between journal_init_*() and journal_load().
@@ -1449,7 +1448,7 @@ static const char *journal_dev_name(journal_t *journal, char *buffer)
1449 1448
1450/* 1449/*
1451 * Journal abort has very specific semantics, which we describe 1450 * Journal abort has very specific semantics, which we describe
1452 * for journal abort. 1451 * for journal abort.
1453 * 1452 *
1454 * Two internal function, which provide abort to te jbd layer 1453 * Two internal function, which provide abort to te jbd layer
1455 * itself are here. 1454 * itself are here.
@@ -1504,7 +1503,7 @@ static void __journal_abort_soft (journal_t *journal, int errno)
1504 * Perform a complete, immediate shutdown of the ENTIRE 1503 * Perform a complete, immediate shutdown of the ENTIRE
1505 * journal (not of a single transaction). This operation cannot be 1504 * journal (not of a single transaction). This operation cannot be
1506 * undone without closing and reopening the journal. 1505 * undone without closing and reopening the journal.
1507 * 1506 *
1508 * The journal_abort function is intended to support higher level error 1507 * The journal_abort function is intended to support higher level error
1509 * recovery mechanisms such as the ext2/ext3 remount-readonly error 1508 * recovery mechanisms such as the ext2/ext3 remount-readonly error
1510 * mode. 1509 * mode.
@@ -1538,7 +1537,7 @@ static void __journal_abort_soft (journal_t *journal, int errno)
1538 * supply an errno; a null errno implies that absolutely no further 1537 * supply an errno; a null errno implies that absolutely no further
1539 * writes are done to the journal (unless there are any already in 1538 * writes are done to the journal (unless there are any already in
1540 * progress). 1539 * progress).
1541 * 1540 *
1542 */ 1541 */
1543 1542
1544void journal_abort(journal_t *journal, int errno) 1543void journal_abort(journal_t *journal, int errno)
@@ -1546,7 +1545,7 @@ void journal_abort(journal_t *journal, int errno)
1546 __journal_abort_soft(journal, errno); 1545 __journal_abort_soft(journal, errno);
1547} 1546}
1548 1547
1549/** 1548/**
1550 * int journal_errno () - returns the journal's error state. 1549 * int journal_errno () - returns the journal's error state.
1551 * @journal: journal to examine. 1550 * @journal: journal to examine.
1552 * 1551 *
@@ -1570,7 +1569,7 @@ int journal_errno(journal_t *journal)
1570 return err; 1569 return err;
1571} 1570}
1572 1571
1573/** 1572/**
1574 * int journal_clear_err () - clears the journal's error state 1573 * int journal_clear_err () - clears the journal's error state
1575 * @journal: journal to act on. 1574 * @journal: journal to act on.
1576 * 1575 *
@@ -1590,7 +1589,7 @@ int journal_clear_err(journal_t *journal)
1590 return err; 1589 return err;
1591} 1590}
1592 1591
1593/** 1592/**
1594 * void journal_ack_err() - Ack journal err. 1593 * void journal_ack_err() - Ack journal err.
1595 * @journal: journal to act on. 1594 * @journal: journal to act on.
1596 * 1595 *
@@ -1612,7 +1611,7 @@ int journal_blocks_per_page(struct inode *inode)
1612 1611
1613/* 1612/*
1614 * Simple support for retrying memory allocations. Introduced to help to 1613 * Simple support for retrying memory allocations. Introduced to help to
1615 * debug different VM deadlock avoidance strategies. 1614 * debug different VM deadlock avoidance strategies.
1616 */ 1615 */
1617void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry) 1616void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
1618{ 1617{
@@ -2047,13 +2046,7 @@ static int __init journal_init(void)
2047{ 2046{
2048 int ret; 2047 int ret;
2049 2048
2050/* Static check for data structure consistency. There's no code 2049 BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024);
2051 * invoked --- we'll just get a linker failure if things aren't right.
2052 */
2053 extern void journal_bad_superblock_size(void);
2054 if (sizeof(struct journal_superblock_s) != 1024)
2055 journal_bad_superblock_size();
2056
2057 2050
2058 ret = journal_init_caches(); 2051 ret = journal_init_caches();
2059 if (ret != 0) 2052 if (ret != 0)
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index de5bafb4e853..11563fe2a52b 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * linux/fs/recovery.c 2 * linux/fs/recovery.c
3 * 3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
5 * 5 *
6 * Copyright 1999-2000 Red Hat Software --- All Rights Reserved 6 * Copyright 1999-2000 Red Hat Software --- All Rights Reserved
@@ -10,7 +10,7 @@
10 * option, any later version, incorporated herein by reference. 10 * option, any later version, incorporated herein by reference.
11 * 11 *
12 * Journal recovery routines for the generic filesystem journaling code; 12 * Journal recovery routines for the generic filesystem journaling code;
13 * part of the ext2fs journaling system. 13 * part of the ext2fs journaling system.
14 */ 14 */
15 15
16#ifndef __KERNEL__ 16#ifndef __KERNEL__
@@ -25,9 +25,9 @@
25 25
26/* 26/*
27 * Maintain information about the progress of the recovery job, so that 27 * Maintain information about the progress of the recovery job, so that
28 * the different passes can carry information between them. 28 * the different passes can carry information between them.
29 */ 29 */
30struct recovery_info 30struct recovery_info
31{ 31{
32 tid_t start_transaction; 32 tid_t start_transaction;
33 tid_t end_transaction; 33 tid_t end_transaction;
@@ -46,7 +46,7 @@ static int scan_revoke_records(journal_t *, struct buffer_head *,
46#ifdef __KERNEL__ 46#ifdef __KERNEL__
47 47
48/* Release readahead buffers after use */ 48/* Release readahead buffers after use */
49void journal_brelse_array(struct buffer_head *b[], int n) 49static void journal_brelse_array(struct buffer_head *b[], int n)
50{ 50{
51 while (--n >= 0) 51 while (--n >= 0)
52 brelse (b[n]); 52 brelse (b[n]);
@@ -116,7 +116,7 @@ static int do_readahead(journal_t *journal, unsigned int start)
116 err = 0; 116 err = 0;
117 117
118failed: 118failed:
119 if (nbufs) 119 if (nbufs)
120 journal_brelse_array(bufs, nbufs); 120 journal_brelse_array(bufs, nbufs);
121 return err; 121 return err;
122} 122}
@@ -128,7 +128,7 @@ failed:
128 * Read a block from the journal 128 * Read a block from the journal
129 */ 129 */
130 130
131static int jread(struct buffer_head **bhp, journal_t *journal, 131static int jread(struct buffer_head **bhp, journal_t *journal,
132 unsigned int offset) 132 unsigned int offset)
133{ 133{
134 int err; 134 int err;
@@ -212,14 +212,14 @@ do { \
212/** 212/**
213 * journal_recover - recovers a on-disk journal 213 * journal_recover - recovers a on-disk journal
214 * @journal: the journal to recover 214 * @journal: the journal to recover
215 * 215 *
216 * The primary function for recovering the log contents when mounting a 216 * The primary function for recovering the log contents when mounting a
217 * journaled device. 217 * journaled device.
218 * 218 *
219 * Recovery is done in three passes. In the first pass, we look for the 219 * Recovery is done in three passes. In the first pass, we look for the
220 * end of the log. In the second, we assemble the list of revoke 220 * end of the log. In the second, we assemble the list of revoke
221 * blocks. In the third and final pass, we replay any un-revoked blocks 221 * blocks. In the third and final pass, we replay any un-revoked blocks
222 * in the log. 222 * in the log.
223 */ 223 */
224int journal_recover(journal_t *journal) 224int journal_recover(journal_t *journal)
225{ 225{
@@ -231,10 +231,10 @@ int journal_recover(journal_t *journal)
231 memset(&info, 0, sizeof(info)); 231 memset(&info, 0, sizeof(info));
232 sb = journal->j_superblock; 232 sb = journal->j_superblock;
233 233
234 /* 234 /*
235 * The journal superblock's s_start field (the current log head) 235 * The journal superblock's s_start field (the current log head)
236 * is always zero if, and only if, the journal was cleanly 236 * is always zero if, and only if, the journal was cleanly
237 * unmounted. 237 * unmounted.
238 */ 238 */
239 239
240 if (!sb->s_start) { 240 if (!sb->s_start) {
@@ -253,7 +253,7 @@ int journal_recover(journal_t *journal)
253 jbd_debug(0, "JBD: recovery, exit status %d, " 253 jbd_debug(0, "JBD: recovery, exit status %d, "
254 "recovered transactions %u to %u\n", 254 "recovered transactions %u to %u\n",
255 err, info.start_transaction, info.end_transaction); 255 err, info.start_transaction, info.end_transaction);
256 jbd_debug(0, "JBD: Replayed %d and revoked %d/%d blocks\n", 256 jbd_debug(0, "JBD: Replayed %d and revoked %d/%d blocks\n",
257 info.nr_replays, info.nr_revoke_hits, info.nr_revokes); 257 info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
258 258
259 /* Restart the log at the next transaction ID, thus invalidating 259 /* Restart the log at the next transaction ID, thus invalidating
@@ -268,15 +268,15 @@ int journal_recover(journal_t *journal)
268/** 268/**
269 * journal_skip_recovery - Start journal and wipe exiting records 269 * journal_skip_recovery - Start journal and wipe exiting records
270 * @journal: journal to startup 270 * @journal: journal to startup
271 * 271 *
272 * Locate any valid recovery information from the journal and set up the 272 * Locate any valid recovery information from the journal and set up the
273 * journal structures in memory to ignore it (presumably because the 273 * journal structures in memory to ignore it (presumably because the
274 * caller has evidence that it is out of date). 274 * caller has evidence that it is out of date).
275 * This function does'nt appear to be exorted.. 275 * This function does'nt appear to be exorted..
276 * 276 *
277 * We perform one pass over the journal to allow us to tell the user how 277 * We perform one pass over the journal to allow us to tell the user how
278 * much recovery information is being erased, and to let us initialise 278 * much recovery information is being erased, and to let us initialise
279 * the journal transaction sequence numbers to the next unused ID. 279 * the journal transaction sequence numbers to the next unused ID.
280 */ 280 */
281int journal_skip_recovery(journal_t *journal) 281int journal_skip_recovery(journal_t *journal)
282{ 282{
@@ -297,7 +297,7 @@ int journal_skip_recovery(journal_t *journal)
297#ifdef CONFIG_JBD_DEBUG 297#ifdef CONFIG_JBD_DEBUG
298 int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence); 298 int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence);
299#endif 299#endif
300 jbd_debug(0, 300 jbd_debug(0,
301 "JBD: ignoring %d transaction%s from the journal.\n", 301 "JBD: ignoring %d transaction%s from the journal.\n",
302 dropped, (dropped == 1) ? "" : "s"); 302 dropped, (dropped == 1) ? "" : "s");
303 journal->j_transaction_sequence = ++info.end_transaction; 303 journal->j_transaction_sequence = ++info.end_transaction;
@@ -314,7 +314,7 @@ static int do_one_pass(journal_t *journal,
314 unsigned long next_log_block; 314 unsigned long next_log_block;
315 int err, success = 0; 315 int err, success = 0;
316 journal_superblock_t * sb; 316 journal_superblock_t * sb;
317 journal_header_t * tmp; 317 journal_header_t * tmp;
318 struct buffer_head * bh; 318 struct buffer_head * bh;
319 unsigned int sequence; 319 unsigned int sequence;
320 int blocktype; 320 int blocktype;
@@ -324,10 +324,10 @@ static int do_one_pass(journal_t *journal,
324 MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t)) 324 MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))
325 / sizeof(journal_block_tag_t)); 325 / sizeof(journal_block_tag_t));
326 326
327 /* 327 /*
328 * First thing is to establish what we expect to find in the log 328 * First thing is to establish what we expect to find in the log
329 * (in terms of transaction IDs), and where (in terms of log 329 * (in terms of transaction IDs), and where (in terms of log
330 * block offsets): query the superblock. 330 * block offsets): query the superblock.
331 */ 331 */
332 332
333 sb = journal->j_superblock; 333 sb = journal->j_superblock;
@@ -344,7 +344,7 @@ static int do_one_pass(journal_t *journal,
344 * Now we walk through the log, transaction by transaction, 344 * Now we walk through the log, transaction by transaction,
345 * making sure that each transaction has a commit block in the 345 * making sure that each transaction has a commit block in the
346 * expected place. Each complete transaction gets replayed back 346 * expected place. Each complete transaction gets replayed back
347 * into the main filesystem. 347 * into the main filesystem.
348 */ 348 */
349 349
350 while (1) { 350 while (1) {
@@ -379,8 +379,8 @@ static int do_one_pass(journal_t *journal,
379 next_log_block++; 379 next_log_block++;
380 wrap(journal, next_log_block); 380 wrap(journal, next_log_block);
381 381
382 /* What kind of buffer is it? 382 /* What kind of buffer is it?
383 * 383 *
384 * If it is a descriptor block, check that it has the 384 * If it is a descriptor block, check that it has the
385 * expected sequence number. Otherwise, we're all done 385 * expected sequence number. Otherwise, we're all done
386 * here. */ 386 * here. */
@@ -394,7 +394,7 @@ static int do_one_pass(journal_t *journal,
394 394
395 blocktype = be32_to_cpu(tmp->h_blocktype); 395 blocktype = be32_to_cpu(tmp->h_blocktype);
396 sequence = be32_to_cpu(tmp->h_sequence); 396 sequence = be32_to_cpu(tmp->h_sequence);
397 jbd_debug(3, "Found magic %d, sequence %d\n", 397 jbd_debug(3, "Found magic %d, sequence %d\n",
398 blocktype, sequence); 398 blocktype, sequence);
399 399
400 if (sequence != next_commit_ID) { 400 if (sequence != next_commit_ID) {
@@ -438,7 +438,7 @@ static int do_one_pass(journal_t *journal,
438 /* Recover what we can, but 438 /* Recover what we can, but
439 * report failure at the end. */ 439 * report failure at the end. */
440 success = err; 440 success = err;
441 printk (KERN_ERR 441 printk (KERN_ERR
442 "JBD: IO error %d recovering " 442 "JBD: IO error %d recovering "
443 "block %ld in log\n", 443 "block %ld in log\n",
444 err, io_block); 444 err, io_block);
@@ -452,7 +452,7 @@ static int do_one_pass(journal_t *journal,
452 * revoked, then we're all done 452 * revoked, then we're all done
453 * here. */ 453 * here. */
454 if (journal_test_revoke 454 if (journal_test_revoke
455 (journal, blocknr, 455 (journal, blocknr,
456 next_commit_ID)) { 456 next_commit_ID)) {
457 brelse(obh); 457 brelse(obh);
458 ++info->nr_revoke_hits; 458 ++info->nr_revoke_hits;
@@ -465,7 +465,7 @@ static int do_one_pass(journal_t *journal,
465 blocknr, 465 blocknr,
466 journal->j_blocksize); 466 journal->j_blocksize);
467 if (nbh == NULL) { 467 if (nbh == NULL) {
468 printk(KERN_ERR 468 printk(KERN_ERR
469 "JBD: Out of memory " 469 "JBD: Out of memory "
470 "during recovery.\n"); 470 "during recovery.\n");
471 err = -ENOMEM; 471 err = -ENOMEM;
@@ -537,7 +537,7 @@ static int do_one_pass(journal_t *journal,
537 } 537 }
538 538
539 done: 539 done:
540 /* 540 /*
541 * We broke out of the log scan loop: either we came to the 541 * We broke out of the log scan loop: either we came to the
542 * known end of the log or we found an unexpected block in the 542 * known end of the log or we found an unexpected block in the
543 * log. If the latter happened, then we know that the "current" 543 * log. If the latter happened, then we know that the "current"
@@ -567,7 +567,7 @@ static int do_one_pass(journal_t *journal,
567 567
568/* Scan a revoke record, marking all blocks mentioned as revoked. */ 568/* Scan a revoke record, marking all blocks mentioned as revoked. */
569 569
570static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, 570static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
571 tid_t sequence, struct recovery_info *info) 571 tid_t sequence, struct recovery_info *info)
572{ 572{
573 journal_revoke_header_t *header; 573 journal_revoke_header_t *header;
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index a56144183462..c532429d8d9b 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * linux/fs/revoke.c 2 * linux/fs/revoke.c
3 * 3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 2000 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 2000
5 * 5 *
6 * Copyright 2000 Red Hat corp --- All Rights Reserved 6 * Copyright 2000 Red Hat corp --- All Rights Reserved
@@ -15,10 +15,10 @@
15 * Revoke is the mechanism used to prevent old log records for deleted 15 * Revoke is the mechanism used to prevent old log records for deleted
16 * metadata from being replayed on top of newer data using the same 16 * metadata from being replayed on top of newer data using the same
17 * blocks. The revoke mechanism is used in two separate places: 17 * blocks. The revoke mechanism is used in two separate places:
18 * 18 *
19 * + Commit: during commit we write the entire list of the current 19 * + Commit: during commit we write the entire list of the current
20 * transaction's revoked blocks to the journal 20 * transaction's revoked blocks to the journal
21 * 21 *
22 * + Recovery: during recovery we record the transaction ID of all 22 * + Recovery: during recovery we record the transaction ID of all
23 * revoked blocks. If there are multiple revoke records in the log 23 * revoked blocks. If there are multiple revoke records in the log
24 * for a single block, only the last one counts, and if there is a log 24 * for a single block, only the last one counts, and if there is a log
@@ -29,7 +29,7 @@
29 * single transaction: 29 * single transaction:
30 * 30 *
31 * Block is revoked and then journaled: 31 * Block is revoked and then journaled:
32 * The desired end result is the journaling of the new block, so we 32 * The desired end result is the journaling of the new block, so we
33 * cancel the revoke before the transaction commits. 33 * cancel the revoke before the transaction commits.
34 * 34 *
35 * Block is journaled and then revoked: 35 * Block is journaled and then revoked:
@@ -41,7 +41,7 @@
41 * transaction must have happened after the block was journaled and so 41 * transaction must have happened after the block was journaled and so
42 * the revoke must take precedence. 42 * the revoke must take precedence.
43 * 43 *
44 * Block is revoked and then written as data: 44 * Block is revoked and then written as data:
45 * The data write is allowed to succeed, but the revoke is _not_ 45 * The data write is allowed to succeed, but the revoke is _not_
46 * cancelled. We still need to prevent old log records from 46 * cancelled. We still need to prevent old log records from
47 * overwriting the new data. We don't even need to clear the revoke 47 * overwriting the new data. We don't even need to clear the revoke
@@ -54,7 +54,7 @@
54 * buffer has not been revoked, and cancel_revoke 54 * buffer has not been revoked, and cancel_revoke
55 * need do nothing. 55 * need do nothing.
56 * RevokeValid set, Revoked set: 56 * RevokeValid set, Revoked set:
57 * buffer has been revoked. 57 * buffer has been revoked.
58 */ 58 */
59 59
60#ifndef __KERNEL__ 60#ifndef __KERNEL__
@@ -77,7 +77,7 @@ static kmem_cache_t *revoke_table_cache;
77 journal replay, this involves recording the transaction ID of the 77 journal replay, this involves recording the transaction ID of the
78 last transaction to revoke this block. */ 78 last transaction to revoke this block. */
79 79
80struct jbd_revoke_record_s 80struct jbd_revoke_record_s
81{ 81{
82 struct list_head hash; 82 struct list_head hash;
83 tid_t sequence; /* Used for recovery only */ 83 tid_t sequence; /* Used for recovery only */
@@ -90,8 +90,8 @@ struct jbd_revoke_table_s
90{ 90{
91 /* It is conceivable that we might want a larger hash table 91 /* It is conceivable that we might want a larger hash table
92 * for recovery. Must be a power of two. */ 92 * for recovery. Must be a power of two. */
93 int hash_size; 93 int hash_size;
94 int hash_shift; 94 int hash_shift;
95 struct list_head *hash_table; 95 struct list_head *hash_table;
96}; 96};
97 97
@@ -301,22 +301,22 @@ void journal_destroy_revoke(journal_t *journal)
301 301
302#ifdef __KERNEL__ 302#ifdef __KERNEL__
303 303
304/* 304/*
305 * journal_revoke: revoke a given buffer_head from the journal. This 305 * journal_revoke: revoke a given buffer_head from the journal. This
306 * prevents the block from being replayed during recovery if we take a 306 * prevents the block from being replayed during recovery if we take a
307 * crash after this current transaction commits. Any subsequent 307 * crash after this current transaction commits. Any subsequent
308 * metadata writes of the buffer in this transaction cancel the 308 * metadata writes of the buffer in this transaction cancel the
309 * revoke. 309 * revoke.
310 * 310 *
311 * Note that this call may block --- it is up to the caller to make 311 * Note that this call may block --- it is up to the caller to make
312 * sure that there are no further calls to journal_write_metadata 312 * sure that there are no further calls to journal_write_metadata
313 * before the revoke is complete. In ext3, this implies calling the 313 * before the revoke is complete. In ext3, this implies calling the
314 * revoke before clearing the block bitmap when we are deleting 314 * revoke before clearing the block bitmap when we are deleting
315 * metadata. 315 * metadata.
316 * 316 *
317 * Revoke performs a journal_forget on any buffer_head passed in as a 317 * Revoke performs a journal_forget on any buffer_head passed in as a
318 * parameter, but does _not_ forget the buffer_head if the bh was only 318 * parameter, but does _not_ forget the buffer_head if the bh was only
319 * found implicitly. 319 * found implicitly.
320 * 320 *
321 * bh_in may not be a journalled buffer - it may have come off 321 * bh_in may not be a journalled buffer - it may have come off
322 * the hash tables without an attached journal_head. 322 * the hash tables without an attached journal_head.
@@ -325,7 +325,7 @@ void journal_destroy_revoke(journal_t *journal)
325 * by one. 325 * by one.
326 */ 326 */
327 327
328int journal_revoke(handle_t *handle, unsigned long blocknr, 328int journal_revoke(handle_t *handle, unsigned long blocknr,
329 struct buffer_head *bh_in) 329 struct buffer_head *bh_in)
330{ 330{
331 struct buffer_head *bh = NULL; 331 struct buffer_head *bh = NULL;
@@ -487,7 +487,7 @@ void journal_switch_revoke_table(journal_t *journal)
487 else 487 else
488 journal->j_revoke = journal->j_revoke_table[0]; 488 journal->j_revoke = journal->j_revoke_table[0];
489 489
490 for (i = 0; i < journal->j_revoke->hash_size; i++) 490 for (i = 0; i < journal->j_revoke->hash_size; i++)
491 INIT_LIST_HEAD(&journal->j_revoke->hash_table[i]); 491 INIT_LIST_HEAD(&journal->j_revoke->hash_table[i]);
492} 492}
493 493
@@ -498,7 +498,7 @@ void journal_switch_revoke_table(journal_t *journal)
498 * Called with the journal lock held. 498 * Called with the journal lock held.
499 */ 499 */
500 500
501void journal_write_revoke_records(journal_t *journal, 501void journal_write_revoke_records(journal_t *journal,
502 transaction_t *transaction) 502 transaction_t *transaction)
503{ 503{
504 struct journal_head *descriptor; 504 struct journal_head *descriptor;
@@ -507,7 +507,7 @@ void journal_write_revoke_records(journal_t *journal,
507 struct list_head *hash_list; 507 struct list_head *hash_list;
508 int i, offset, count; 508 int i, offset, count;
509 509
510 descriptor = NULL; 510 descriptor = NULL;
511 offset = 0; 511 offset = 0;
512 count = 0; 512 count = 0;
513 513
@@ -519,10 +519,10 @@ void journal_write_revoke_records(journal_t *journal,
519 hash_list = &revoke->hash_table[i]; 519 hash_list = &revoke->hash_table[i];
520 520
521 while (!list_empty(hash_list)) { 521 while (!list_empty(hash_list)) {
522 record = (struct jbd_revoke_record_s *) 522 record = (struct jbd_revoke_record_s *)
523 hash_list->next; 523 hash_list->next;
524 write_one_revoke_record(journal, transaction, 524 write_one_revoke_record(journal, transaction,
525 &descriptor, &offset, 525 &descriptor, &offset,
526 record); 526 record);
527 count++; 527 count++;
528 list_del(&record->hash); 528 list_del(&record->hash);
@@ -534,14 +534,14 @@ void journal_write_revoke_records(journal_t *journal,
534 jbd_debug(1, "Wrote %d revoke records\n", count); 534 jbd_debug(1, "Wrote %d revoke records\n", count);
535} 535}
536 536
537/* 537/*
538 * Write out one revoke record. We need to create a new descriptor 538 * Write out one revoke record. We need to create a new descriptor
539 * block if the old one is full or if we have not already created one. 539 * block if the old one is full or if we have not already created one.
540 */ 540 */
541 541
542static void write_one_revoke_record(journal_t *journal, 542static void write_one_revoke_record(journal_t *journal,
543 transaction_t *transaction, 543 transaction_t *transaction,
544 struct journal_head **descriptorp, 544 struct journal_head **descriptorp,
545 int *offsetp, 545 int *offsetp,
546 struct jbd_revoke_record_s *record) 546 struct jbd_revoke_record_s *record)
547{ 547{
@@ -584,21 +584,21 @@ static void write_one_revoke_record(journal_t *journal,
584 *descriptorp = descriptor; 584 *descriptorp = descriptor;
585 } 585 }
586 586
587 * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) = 587 * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) =
588 cpu_to_be32(record->blocknr); 588 cpu_to_be32(record->blocknr);
589 offset += 4; 589 offset += 4;
590 *offsetp = offset; 590 *offsetp = offset;
591} 591}
592 592
593/* 593/*
594 * Flush a revoke descriptor out to the journal. If we are aborting, 594 * Flush a revoke descriptor out to the journal. If we are aborting,
595 * this is a noop; otherwise we are generating a buffer which needs to 595 * this is a noop; otherwise we are generating a buffer which needs to
596 * be waited for during commit, so it has to go onto the appropriate 596 * be waited for during commit, so it has to go onto the appropriate
597 * journal buffer list. 597 * journal buffer list.
598 */ 598 */
599 599
600static void flush_descriptor(journal_t *journal, 600static void flush_descriptor(journal_t *journal,
601 struct journal_head *descriptor, 601 struct journal_head *descriptor,
602 int offset) 602 int offset)
603{ 603{
604 journal_revoke_header_t *header; 604 journal_revoke_header_t *header;
@@ -618,7 +618,7 @@ static void flush_descriptor(journal_t *journal,
618} 618}
619#endif 619#endif
620 620
621/* 621/*
622 * Revoke support for recovery. 622 * Revoke support for recovery.
623 * 623 *
624 * Recovery needs to be able to: 624 * Recovery needs to be able to:
@@ -629,7 +629,7 @@ static void flush_descriptor(journal_t *journal,
629 * check whether a given block in a given transaction should be replayed 629 * check whether a given block in a given transaction should be replayed
630 * (ie. has not been revoked by a revoke record in that or a subsequent 630 * (ie. has not been revoked by a revoke record in that or a subsequent
631 * transaction) 631 * transaction)
632 * 632 *
633 * empty the revoke table after recovery. 633 * empty the revoke table after recovery.
634 */ 634 */
635 635
@@ -637,11 +637,11 @@ static void flush_descriptor(journal_t *journal,
637 * First, setting revoke records. We create a new revoke record for 637 * First, setting revoke records. We create a new revoke record for
638 * every block ever revoked in the log as we scan it for recovery, and 638 * every block ever revoked in the log as we scan it for recovery, and
639 * we update the existing records if we find multiple revokes for a 639 * we update the existing records if we find multiple revokes for a
640 * single block. 640 * single block.
641 */ 641 */
642 642
643int journal_set_revoke(journal_t *journal, 643int journal_set_revoke(journal_t *journal,
644 unsigned long blocknr, 644 unsigned long blocknr,
645 tid_t sequence) 645 tid_t sequence)
646{ 646{
647 struct jbd_revoke_record_s *record; 647 struct jbd_revoke_record_s *record;
@@ -653,18 +653,18 @@ int journal_set_revoke(journal_t *journal,
653 if (tid_gt(sequence, record->sequence)) 653 if (tid_gt(sequence, record->sequence))
654 record->sequence = sequence; 654 record->sequence = sequence;
655 return 0; 655 return 0;
656 } 656 }
657 return insert_revoke_hash(journal, blocknr, sequence); 657 return insert_revoke_hash(journal, blocknr, sequence);
658} 658}
659 659
660/* 660/*
661 * Test revoke records. For a given block referenced in the log, has 661 * Test revoke records. For a given block referenced in the log, has
662 * that block been revoked? A revoke record with a given transaction 662 * that block been revoked? A revoke record with a given transaction
663 * sequence number revokes all blocks in that transaction and earlier 663 * sequence number revokes all blocks in that transaction and earlier
664 * ones, but later transactions still need replayed. 664 * ones, but later transactions still need replayed.
665 */ 665 */
666 666
667int journal_test_revoke(journal_t *journal, 667int journal_test_revoke(journal_t *journal,
668 unsigned long blocknr, 668 unsigned long blocknr,
669 tid_t sequence) 669 tid_t sequence)
670{ 670{
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index f5169a96260e..e1b3c8af4d17 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * linux/fs/transaction.c 2 * linux/fs/transaction.c
3 * 3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
5 * 5 *
6 * Copyright 1998 Red Hat corp --- All Rights Reserved 6 * Copyright 1998 Red Hat corp --- All Rights Reserved
@@ -10,7 +10,7 @@
10 * option, any later version, incorporated herein by reference. 10 * option, any later version, incorporated herein by reference.
11 * 11 *
12 * Generic filesystem transaction handling code; part of the ext2fs 12 * Generic filesystem transaction handling code; part of the ext2fs
13 * journaling system. 13 * journaling system.
14 * 14 *
15 * This file manages transactions (compound commits managed by the 15 * This file manages transactions (compound commits managed by the
16 * journaling code) and handles (individual atomic operations by the 16 * journaling code) and handles (individual atomic operations by the
@@ -74,7 +74,7 @@ get_transaction(journal_t *journal, transaction_t *transaction)
74 * start_this_handle: Given a handle, deal with any locking or stalling 74 * start_this_handle: Given a handle, deal with any locking or stalling
75 * needed to make sure that there is enough journal space for the handle 75 * needed to make sure that there is enough journal space for the handle
76 * to begin. Attach the handle to a transaction and set up the 76 * to begin. Attach the handle to a transaction and set up the
77 * transaction's buffer credits. 77 * transaction's buffer credits.
78 */ 78 */
79 79
80static int start_this_handle(journal_t *journal, handle_t *handle) 80static int start_this_handle(journal_t *journal, handle_t *handle)
@@ -117,7 +117,7 @@ repeat_locked:
117 if (is_journal_aborted(journal) || 117 if (is_journal_aborted(journal) ||
118 (journal->j_errno != 0 && !(journal->j_flags & JFS_ACK_ERR))) { 118 (journal->j_errno != 0 && !(journal->j_flags & JFS_ACK_ERR))) {
119 spin_unlock(&journal->j_state_lock); 119 spin_unlock(&journal->j_state_lock);
120 ret = -EROFS; 120 ret = -EROFS;
121 goto out; 121 goto out;
122 } 122 }
123 123
@@ -182,7 +182,7 @@ repeat_locked:
182 goto repeat; 182 goto repeat;
183 } 183 }
184 184
185 /* 185 /*
186 * The commit code assumes that it can get enough log space 186 * The commit code assumes that it can get enough log space
187 * without forcing a checkpoint. This is *critical* for 187 * without forcing a checkpoint. This is *critical* for
188 * correctness: a checkpoint of a buffer which is also 188 * correctness: a checkpoint of a buffer which is also
@@ -191,7 +191,7 @@ repeat_locked:
191 * 191 *
192 * We must therefore ensure the necessary space in the journal 192 * We must therefore ensure the necessary space in the journal
193 * *before* starting to dirty potentially checkpointed buffers 193 * *before* starting to dirty potentially checkpointed buffers
194 * in the new transaction. 194 * in the new transaction.
195 * 195 *
196 * The worst part is, any transaction currently committing can 196 * The worst part is, any transaction currently committing can
197 * reduce the free space arbitrarily. Be careful to account for 197 * reduce the free space arbitrarily. Be careful to account for
@@ -246,13 +246,13 @@ static handle_t *new_handle(int nblocks)
246} 246}
247 247
248/** 248/**
249 * handle_t *journal_start() - Obtain a new handle. 249 * handle_t *journal_start() - Obtain a new handle.
250 * @journal: Journal to start transaction on. 250 * @journal: Journal to start transaction on.
251 * @nblocks: number of block buffer we might modify 251 * @nblocks: number of block buffer we might modify
252 * 252 *
253 * We make sure that the transaction can guarantee at least nblocks of 253 * We make sure that the transaction can guarantee at least nblocks of
254 * modified buffers in the log. We block until the log can guarantee 254 * modified buffers in the log. We block until the log can guarantee
255 * that much space. 255 * that much space.
256 * 256 *
257 * This function is visible to journal users (like ext3fs), so is not 257 * This function is visible to journal users (like ext3fs), so is not
258 * called with the journal already locked. 258 * called with the journal already locked.
@@ -292,11 +292,11 @@ handle_t *journal_start(journal_t *journal, int nblocks)
292 * int journal_extend() - extend buffer credits. 292 * int journal_extend() - extend buffer credits.
293 * @handle: handle to 'extend' 293 * @handle: handle to 'extend'
294 * @nblocks: nr blocks to try to extend by. 294 * @nblocks: nr blocks to try to extend by.
295 * 295 *
296 * Some transactions, such as large extends and truncates, can be done 296 * Some transactions, such as large extends and truncates, can be done
297 * atomically all at once or in several stages. The operation requests 297 * atomically all at once or in several stages. The operation requests
298 * a credit for a number of buffer modications in advance, but can 298 * a credit for a number of buffer modications in advance, but can
299 * extend its credit if it needs more. 299 * extend its credit if it needs more.
300 * 300 *
301 * journal_extend tries to give the running handle more buffer credits. 301 * journal_extend tries to give the running handle more buffer credits.
302 * It does not guarantee that allocation - this is a best-effort only. 302 * It does not guarantee that allocation - this is a best-effort only.
@@ -363,7 +363,7 @@ out:
363 * int journal_restart() - restart a handle . 363 * int journal_restart() - restart a handle .
364 * @handle: handle to restart 364 * @handle: handle to restart
365 * @nblocks: nr credits requested 365 * @nblocks: nr credits requested
366 * 366 *
367 * Restart a handle for a multi-transaction filesystem 367 * Restart a handle for a multi-transaction filesystem
368 * operation. 368 * operation.
369 * 369 *
@@ -462,7 +462,7 @@ void journal_lock_updates(journal_t *journal)
462/** 462/**
463 * void journal_unlock_updates (journal_t* journal) - release barrier 463 * void journal_unlock_updates (journal_t* journal) - release barrier
464 * @journal: Journal to release the barrier on. 464 * @journal: Journal to release the barrier on.
465 * 465 *
466 * Release a transaction barrier obtained with journal_lock_updates(). 466 * Release a transaction barrier obtained with journal_lock_updates().
467 * 467 *
468 * Should be called without the journal lock held. 468 * Should be called without the journal lock held.
@@ -547,8 +547,8 @@ repeat:
547 jbd_lock_bh_state(bh); 547 jbd_lock_bh_state(bh);
548 548
549 /* We now hold the buffer lock so it is safe to query the buffer 549 /* We now hold the buffer lock so it is safe to query the buffer
550 * state. Is the buffer dirty? 550 * state. Is the buffer dirty?
551 * 551 *
552 * If so, there are two possibilities. The buffer may be 552 * If so, there are two possibilities. The buffer may be
553 * non-journaled, and undergoing a quite legitimate writeback. 553 * non-journaled, and undergoing a quite legitimate writeback.
554 * Otherwise, it is journaled, and we don't expect dirty buffers 554 * Otherwise, it is journaled, and we don't expect dirty buffers
@@ -566,7 +566,7 @@ repeat:
566 */ 566 */
567 if (jh->b_transaction) { 567 if (jh->b_transaction) {
568 J_ASSERT_JH(jh, 568 J_ASSERT_JH(jh,
569 jh->b_transaction == transaction || 569 jh->b_transaction == transaction ||
570 jh->b_transaction == 570 jh->b_transaction ==
571 journal->j_committing_transaction); 571 journal->j_committing_transaction);
572 if (jh->b_next_transaction) 572 if (jh->b_next_transaction)
@@ -580,7 +580,7 @@ repeat:
580 */ 580 */
581 JBUFFER_TRACE(jh, "Unexpected dirty buffer"); 581 JBUFFER_TRACE(jh, "Unexpected dirty buffer");
582 jbd_unexpected_dirty_buffer(jh); 582 jbd_unexpected_dirty_buffer(jh);
583 } 583 }
584 584
585 unlock_buffer(bh); 585 unlock_buffer(bh);
586 586
@@ -653,7 +653,7 @@ repeat:
653 * buffer had better remain locked during the kmalloc, 653 * buffer had better remain locked during the kmalloc,
654 * but that should be true --- we hold the journal lock 654 * but that should be true --- we hold the journal lock
655 * still and the buffer is already on the BUF_JOURNAL 655 * still and the buffer is already on the BUF_JOURNAL
656 * list so won't be flushed. 656 * list so won't be flushed.
657 * 657 *
658 * Subtle point, though: if this is a get_undo_access, 658 * Subtle point, though: if this is a get_undo_access,
659 * then we will be relying on the frozen_data to contain 659 * then we will be relying on the frozen_data to contain
@@ -765,8 +765,8 @@ int journal_get_write_access(handle_t *handle, struct buffer_head *bh)
765 * manually rather than reading off disk), then we need to keep the 765 * manually rather than reading off disk), then we need to keep the
766 * buffer_head locked until it has been completely filled with new 766 * buffer_head locked until it has been completely filled with new
767 * data. In this case, we should be able to make the assertion that 767 * data. In this case, we should be able to make the assertion that
768 * the bh is not already part of an existing transaction. 768 * the bh is not already part of an existing transaction.
769 * 769 *
770 * The buffer should already be locked by the caller by this point. 770 * The buffer should already be locked by the caller by this point.
771 * There is no lock ranking violation: it was a newly created, 771 * There is no lock ranking violation: it was a newly created,
772 * unlocked buffer beforehand. */ 772 * unlocked buffer beforehand. */
@@ -778,7 +778,7 @@ int journal_get_write_access(handle_t *handle, struct buffer_head *bh)
778 * 778 *
779 * Call this if you create a new bh. 779 * Call this if you create a new bh.
780 */ 780 */
781int journal_get_create_access(handle_t *handle, struct buffer_head *bh) 781int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
782{ 782{
783 transaction_t *transaction = handle->h_transaction; 783 transaction_t *transaction = handle->h_transaction;
784 journal_t *journal = transaction->t_journal; 784 journal_t *journal = transaction->t_journal;
@@ -847,13 +847,13 @@ out:
847 * do not reuse freed space until the deallocation has been committed, 847 * do not reuse freed space until the deallocation has been committed,
848 * since if we overwrote that space we would make the delete 848 * since if we overwrote that space we would make the delete
849 * un-rewindable in case of a crash. 849 * un-rewindable in case of a crash.
850 * 850 *
851 * To deal with that, journal_get_undo_access requests write access to a 851 * To deal with that, journal_get_undo_access requests write access to a
852 * buffer for parts of non-rewindable operations such as delete 852 * buffer for parts of non-rewindable operations such as delete
853 * operations on the bitmaps. The journaling code must keep a copy of 853 * operations on the bitmaps. The journaling code must keep a copy of
854 * the buffer's contents prior to the undo_access call until such time 854 * the buffer's contents prior to the undo_access call until such time
855 * as we know that the buffer has definitely been committed to disk. 855 * as we know that the buffer has definitely been committed to disk.
856 * 856 *
857 * We never need to know which transaction the committed data is part 857 * We never need to know which transaction the committed data is part
858 * of, buffers touched here are guaranteed to be dirtied later and so 858 * of, buffers touched here are guaranteed to be dirtied later and so
859 * will be committed to a new transaction in due course, at which point 859 * will be committed to a new transaction in due course, at which point
@@ -911,13 +911,13 @@ out:
911 return err; 911 return err;
912} 912}
913 913
914/** 914/**
915 * int journal_dirty_data() - mark a buffer as containing dirty data which 915 * int journal_dirty_data() - mark a buffer as containing dirty data which
916 * needs to be flushed before we can commit the 916 * needs to be flushed before we can commit the
917 * current transaction. 917 * current transaction.
918 * @handle: transaction 918 * @handle: transaction
919 * @bh: bufferhead to mark 919 * @bh: bufferhead to mark
920 * 920 *
921 * The buffer is placed on the transaction's data list and is marked as 921 * The buffer is placed on the transaction's data list and is marked as
922 * belonging to the transaction. 922 * belonging to the transaction.
923 * 923 *
@@ -946,15 +946,15 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
946 946
947 /* 947 /*
948 * What if the buffer is already part of a running transaction? 948 * What if the buffer is already part of a running transaction?
949 * 949 *
950 * There are two cases: 950 * There are two cases:
951 * 1) It is part of the current running transaction. Refile it, 951 * 1) It is part of the current running transaction. Refile it,
952 * just in case we have allocated it as metadata, deallocated 952 * just in case we have allocated it as metadata, deallocated
953 * it, then reallocated it as data. 953 * it, then reallocated it as data.
954 * 2) It is part of the previous, still-committing transaction. 954 * 2) It is part of the previous, still-committing transaction.
955 * If all we want to do is to guarantee that the buffer will be 955 * If all we want to do is to guarantee that the buffer will be
956 * written to disk before this new transaction commits, then 956 * written to disk before this new transaction commits, then
957 * being sure that the *previous* transaction has this same 957 * being sure that the *previous* transaction has this same
958 * property is sufficient for us! Just leave it on its old 958 * property is sufficient for us! Just leave it on its old
959 * transaction. 959 * transaction.
960 * 960 *
@@ -1076,18 +1076,18 @@ no_journal:
1076 return 0; 1076 return 0;
1077} 1077}
1078 1078
1079/** 1079/**
1080 * int journal_dirty_metadata() - mark a buffer as containing dirty metadata 1080 * int journal_dirty_metadata() - mark a buffer as containing dirty metadata
1081 * @handle: transaction to add buffer to. 1081 * @handle: transaction to add buffer to.
1082 * @bh: buffer to mark 1082 * @bh: buffer to mark
1083 * 1083 *
1084 * mark dirty metadata which needs to be journaled as part of the current 1084 * mark dirty metadata which needs to be journaled as part of the current
1085 * transaction. 1085 * transaction.
1086 * 1086 *
1087 * The buffer is placed on the transaction's metadata list and is marked 1087 * The buffer is placed on the transaction's metadata list and is marked
1088 * as belonging to the transaction. 1088 * as belonging to the transaction.
1089 * 1089 *
1090 * Returns error number or 0 on success. 1090 * Returns error number or 0 on success.
1091 * 1091 *
1092 * Special care needs to be taken if the buffer already belongs to the 1092 * Special care needs to be taken if the buffer already belongs to the
1093 * current committing transaction (in which case we should have frozen 1093 * current committing transaction (in which case we should have frozen
@@ -1135,11 +1135,11 @@ int journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1135 1135
1136 set_buffer_jbddirty(bh); 1136 set_buffer_jbddirty(bh);
1137 1137
1138 /* 1138 /*
1139 * Metadata already on the current transaction list doesn't 1139 * Metadata already on the current transaction list doesn't
1140 * need to be filed. Metadata on another transaction's list must 1140 * need to be filed. Metadata on another transaction's list must
1141 * be committing, and will be refiled once the commit completes: 1141 * be committing, and will be refiled once the commit completes:
1142 * leave it alone for now. 1142 * leave it alone for now.
1143 */ 1143 */
1144 if (jh->b_transaction != transaction) { 1144 if (jh->b_transaction != transaction) {
1145 JBUFFER_TRACE(jh, "already on other transaction"); 1145 JBUFFER_TRACE(jh, "already on other transaction");
@@ -1165,7 +1165,7 @@ out:
1165 return 0; 1165 return 0;
1166} 1166}
1167 1167
1168/* 1168/*
1169 * journal_release_buffer: undo a get_write_access without any buffer 1169 * journal_release_buffer: undo a get_write_access without any buffer
1170 * updates, if the update decided in the end that it didn't need access. 1170 * updates, if the update decided in the end that it didn't need access.
1171 * 1171 *
@@ -1176,20 +1176,20 @@ journal_release_buffer(handle_t *handle, struct buffer_head *bh)
1176 BUFFER_TRACE(bh, "entry"); 1176 BUFFER_TRACE(bh, "entry");
1177} 1177}
1178 1178
1179/** 1179/**
1180 * void journal_forget() - bforget() for potentially-journaled buffers. 1180 * void journal_forget() - bforget() for potentially-journaled buffers.
1181 * @handle: transaction handle 1181 * @handle: transaction handle
1182 * @bh: bh to 'forget' 1182 * @bh: bh to 'forget'
1183 * 1183 *
1184 * We can only do the bforget if there are no commits pending against the 1184 * We can only do the bforget if there are no commits pending against the
1185 * buffer. If the buffer is dirty in the current running transaction we 1185 * buffer. If the buffer is dirty in the current running transaction we
1186 * can safely unlink it. 1186 * can safely unlink it.
1187 * 1187 *
1188 * bh may not be a journalled buffer at all - it may be a non-JBD 1188 * bh may not be a journalled buffer at all - it may be a non-JBD
1189 * buffer which came off the hashtable. Check for this. 1189 * buffer which came off the hashtable. Check for this.
1190 * 1190 *
1191 * Decrements bh->b_count by one. 1191 * Decrements bh->b_count by one.
1192 * 1192 *
1193 * Allow this call even if the handle has aborted --- it may be part of 1193 * Allow this call even if the handle has aborted --- it may be part of
1194 * the caller's cleanup after an abort. 1194 * the caller's cleanup after an abort.
1195 */ 1195 */
@@ -1237,7 +1237,7 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
1237 1237
1238 drop_reserve = 1; 1238 drop_reserve = 1;
1239 1239
1240 /* 1240 /*
1241 * We are no longer going to journal this buffer. 1241 * We are no longer going to journal this buffer.
1242 * However, the commit of this transaction is still 1242 * However, the commit of this transaction is still
1243 * important to the buffer: the delete that we are now 1243 * important to the buffer: the delete that we are now
@@ -1246,7 +1246,7 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
1246 * 1246 *
1247 * So, if we have a checkpoint on the buffer, we should 1247 * So, if we have a checkpoint on the buffer, we should
1248 * now refile the buffer on our BJ_Forget list so that 1248 * now refile the buffer on our BJ_Forget list so that
1249 * we know to remove the checkpoint after we commit. 1249 * we know to remove the checkpoint after we commit.
1250 */ 1250 */
1251 1251
1252 if (jh->b_cp_transaction) { 1252 if (jh->b_cp_transaction) {
@@ -1264,7 +1264,7 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
1264 } 1264 }
1265 } 1265 }
1266 } else if (jh->b_transaction) { 1266 } else if (jh->b_transaction) {
1267 J_ASSERT_JH(jh, (jh->b_transaction == 1267 J_ASSERT_JH(jh, (jh->b_transaction ==
1268 journal->j_committing_transaction)); 1268 journal->j_committing_transaction));
1269 /* However, if the buffer is still owned by a prior 1269 /* However, if the buffer is still owned by a prior
1270 * (committing) transaction, we can't drop it yet... */ 1270 * (committing) transaction, we can't drop it yet... */
@@ -1294,7 +1294,7 @@ drop:
1294/** 1294/**
1295 * int journal_stop() - complete a transaction 1295 * int journal_stop() - complete a transaction
1296 * @handle: tranaction to complete. 1296 * @handle: tranaction to complete.
1297 * 1297 *
1298 * All done for a particular handle. 1298 * All done for a particular handle.
1299 * 1299 *
1300 * There is not much action needed here. We just return any remaining 1300 * There is not much action needed here. We just return any remaining
@@ -1303,7 +1303,7 @@ drop:
1303 * filesystem is marked for synchronous update. 1303 * filesystem is marked for synchronous update.
1304 * 1304 *
1305 * journal_stop itself will not usually return an error, but it may 1305 * journal_stop itself will not usually return an error, but it may
1306 * do so in unusual circumstances. In particular, expect it to 1306 * do so in unusual circumstances. In particular, expect it to
1307 * return -EIO if a journal_abort has been executed since the 1307 * return -EIO if a journal_abort has been executed since the
1308 * transaction began. 1308 * transaction began.
1309 */ 1309 */
@@ -1373,7 +1373,7 @@ int journal_stop(handle_t *handle)
1373 if (handle->h_sync || 1373 if (handle->h_sync ||
1374 transaction->t_outstanding_credits > 1374 transaction->t_outstanding_credits >
1375 journal->j_max_transaction_buffers || 1375 journal->j_max_transaction_buffers ||
1376 time_after_eq(jiffies, transaction->t_expires)) { 1376 time_after_eq(jiffies, transaction->t_expires)) {
1377 /* Do this even for aborted journals: an abort still 1377 /* Do this even for aborted journals: an abort still
1378 * completes the commit thread, it just doesn't write 1378 * completes the commit thread, it just doesn't write
1379 * anything to disk. */ 1379 * anything to disk. */
@@ -1388,7 +1388,7 @@ int journal_stop(handle_t *handle)
1388 1388
1389 /* 1389 /*
1390 * Special case: JFS_SYNC synchronous updates require us 1390 * Special case: JFS_SYNC synchronous updates require us
1391 * to wait for the commit to complete. 1391 * to wait for the commit to complete.
1392 */ 1392 */
1393 if (handle->h_sync && !(current->flags & PF_MEMALLOC)) 1393 if (handle->h_sync && !(current->flags & PF_MEMALLOC))
1394 err = log_wait_commit(journal, tid); 1394 err = log_wait_commit(journal, tid);
@@ -1439,7 +1439,7 @@ int journal_force_commit(journal_t *journal)
1439 * jbd_lock_bh_state(jh2bh(jh)) is held. 1439 * jbd_lock_bh_state(jh2bh(jh)) is held.
1440 */ 1440 */
1441 1441
1442static inline void 1442static inline void
1443__blist_add_buffer(struct journal_head **list, struct journal_head *jh) 1443__blist_add_buffer(struct journal_head **list, struct journal_head *jh)
1444{ 1444{
1445 if (!*list) { 1445 if (!*list) {
@@ -1454,7 +1454,7 @@ __blist_add_buffer(struct journal_head **list, struct journal_head *jh)
1454 } 1454 }
1455} 1455}
1456 1456
1457/* 1457/*
1458 * Remove a buffer from a transaction list, given the transaction's list 1458 * Remove a buffer from a transaction list, given the transaction's list
1459 * head pointer. 1459 * head pointer.
1460 * 1460 *
@@ -1475,7 +1475,7 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
1475 jh->b_tnext->b_tprev = jh->b_tprev; 1475 jh->b_tnext->b_tprev = jh->b_tprev;
1476} 1476}
1477 1477
1478/* 1478/*
1479 * Remove a buffer from the appropriate transaction list. 1479 * Remove a buffer from the appropriate transaction list.
1480 * 1480 *
1481 * Note that this function can *change* the value of 1481 * Note that this function can *change* the value of
@@ -1595,17 +1595,17 @@ out:
1595} 1595}
1596 1596
1597 1597
1598/** 1598/**
1599 * int journal_try_to_free_buffers() - try to free page buffers. 1599 * int journal_try_to_free_buffers() - try to free page buffers.
1600 * @journal: journal for operation 1600 * @journal: journal for operation
1601 * @page: to try and free 1601 * @page: to try and free
1602 * @unused_gfp_mask: unused 1602 * @unused_gfp_mask: unused
1603 * 1603 *
1604 * 1604 *
1605 * For all the buffers on this page, 1605 * For all the buffers on this page,
1606 * if they are fully written out ordered data, move them onto BUF_CLEAN 1606 * if they are fully written out ordered data, move them onto BUF_CLEAN
1607 * so try_to_free_buffers() can reap them. 1607 * so try_to_free_buffers() can reap them.
1608 * 1608 *
1609 * This function returns non-zero if we wish try_to_free_buffers() 1609 * This function returns non-zero if we wish try_to_free_buffers()
1610 * to be called. We do this if the page is releasable by try_to_free_buffers(). 1610 * to be called. We do this if the page is releasable by try_to_free_buffers().
1611 * We also do it if the page has locked or dirty buffers and the caller wants 1611 * We also do it if the page has locked or dirty buffers and the caller wants
@@ -1629,7 +1629,7 @@ out:
1629 * cannot happen because we never reallocate freed data as metadata 1629 * cannot happen because we never reallocate freed data as metadata
1630 * while the data is part of a transaction. Yes? 1630 * while the data is part of a transaction. Yes?
1631 */ 1631 */
1632int journal_try_to_free_buffers(journal_t *journal, 1632int journal_try_to_free_buffers(journal_t *journal,
1633 struct page *page, gfp_t unused_gfp_mask) 1633 struct page *page, gfp_t unused_gfp_mask)
1634{ 1634{
1635 struct buffer_head *head; 1635 struct buffer_head *head;
@@ -1697,7 +1697,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1697} 1697}
1698 1698
1699/* 1699/*
1700 * journal_invalidatepage 1700 * journal_invalidatepage
1701 * 1701 *
1702 * This code is tricky. It has a number of cases to deal with. 1702 * This code is tricky. It has a number of cases to deal with.
1703 * 1703 *
@@ -1705,15 +1705,15 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1705 * 1705 *
1706 * i_size must be updated on disk before we start calling invalidatepage on the 1706 * i_size must be updated on disk before we start calling invalidatepage on the
1707 * data. 1707 * data.
1708 * 1708 *
1709 * This is done in ext3 by defining an ext3_setattr method which 1709 * This is done in ext3 by defining an ext3_setattr method which
1710 * updates i_size before truncate gets going. By maintaining this 1710 * updates i_size before truncate gets going. By maintaining this
1711 * invariant, we can be sure that it is safe to throw away any buffers 1711 * invariant, we can be sure that it is safe to throw away any buffers
1712 * attached to the current transaction: once the transaction commits, 1712 * attached to the current transaction: once the transaction commits,
1713 * we know that the data will not be needed. 1713 * we know that the data will not be needed.
1714 * 1714 *
1715 * Note however that we can *not* throw away data belonging to the 1715 * Note however that we can *not* throw away data belonging to the
1716 * previous, committing transaction! 1716 * previous, committing transaction!
1717 * 1717 *
1718 * Any disk blocks which *are* part of the previous, committing 1718 * Any disk blocks which *are* part of the previous, committing
1719 * transaction (and which therefore cannot be discarded immediately) are 1719 * transaction (and which therefore cannot be discarded immediately) are
@@ -1732,7 +1732,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1732 * don't make guarantees about the order in which data hits disk --- in 1732 * don't make guarantees about the order in which data hits disk --- in
1733 * particular we don't guarantee that new dirty data is flushed before 1733 * particular we don't guarantee that new dirty data is flushed before
1734 * transaction commit --- so it is always safe just to discard data 1734 * transaction commit --- so it is always safe just to discard data
1735 * immediately in that mode. --sct 1735 * immediately in that mode. --sct
1736 */ 1736 */
1737 1737
1738/* 1738/*
@@ -1876,9 +1876,9 @@ zap_buffer_unlocked:
1876 return may_free; 1876 return may_free;
1877} 1877}
1878 1878
1879/** 1879/**
1880 * void journal_invalidatepage() 1880 * void journal_invalidatepage()
1881 * @journal: journal to use for flush... 1881 * @journal: journal to use for flush...
1882 * @page: page to flush 1882 * @page: page to flush
1883 * @offset: length of page to invalidate. 1883 * @offset: length of page to invalidate.
1884 * 1884 *
@@ -1886,7 +1886,7 @@ zap_buffer_unlocked:
1886 * 1886 *
1887 */ 1887 */
1888void journal_invalidatepage(journal_t *journal, 1888void journal_invalidatepage(journal_t *journal,
1889 struct page *page, 1889 struct page *page,
1890 unsigned long offset) 1890 unsigned long offset)
1891{ 1891{
1892 struct buffer_head *head, *bh, *next; 1892 struct buffer_head *head, *bh, *next;
@@ -1908,7 +1908,7 @@ void journal_invalidatepage(journal_t *journal,
1908 next = bh->b_this_page; 1908 next = bh->b_this_page;
1909 1909
1910 if (offset <= curr_off) { 1910 if (offset <= curr_off) {
1911 /* This block is wholly outside the truncation point */ 1911 /* This block is wholly outside the truncation point */
1912 lock_buffer(bh); 1912 lock_buffer(bh);
1913 may_free &= journal_unmap_buffer(journal, bh); 1913 may_free &= journal_unmap_buffer(journal, bh);
1914 unlock_buffer(bh); 1914 unlock_buffer(bh);
@@ -1924,8 +1924,8 @@ void journal_invalidatepage(journal_t *journal,
1924 } 1924 }
1925} 1925}
1926 1926
1927/* 1927/*
1928 * File a buffer on the given transaction list. 1928 * File a buffer on the given transaction list.
1929 */ 1929 */
1930void __journal_file_buffer(struct journal_head *jh, 1930void __journal_file_buffer(struct journal_head *jh,
1931 transaction_t *transaction, int jlist) 1931 transaction_t *transaction, int jlist)
@@ -1948,7 +1948,7 @@ void __journal_file_buffer(struct journal_head *jh,
1948 * with __jbd_unexpected_dirty_buffer()'s handling of dirty 1948 * with __jbd_unexpected_dirty_buffer()'s handling of dirty
1949 * state. */ 1949 * state. */
1950 1950
1951 if (jlist == BJ_Metadata || jlist == BJ_Reserved || 1951 if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
1952 jlist == BJ_Shadow || jlist == BJ_Forget) { 1952 jlist == BJ_Shadow || jlist == BJ_Forget) {
1953 if (test_clear_buffer_dirty(bh) || 1953 if (test_clear_buffer_dirty(bh) ||
1954 test_clear_buffer_jbddirty(bh)) 1954 test_clear_buffer_jbddirty(bh))
@@ -2008,7 +2008,7 @@ void journal_file_buffer(struct journal_head *jh,
2008 jbd_unlock_bh_state(jh2bh(jh)); 2008 jbd_unlock_bh_state(jh2bh(jh));
2009} 2009}
2010 2010
2011/* 2011/*
2012 * Remove a buffer from its current buffer list in preparation for 2012 * Remove a buffer from its current buffer list in preparation for
2013 * dropping it from its current transaction entirely. If the buffer has 2013 * dropping it from its current transaction entirely. If the buffer has
2014 * already started to be used by a subsequent transaction, refile the 2014 * already started to be used by a subsequent transaction, refile the
@@ -2060,7 +2060,7 @@ void __journal_refile_buffer(struct journal_head *jh)
2060 * to the caller to remove the journal_head if necessary. For the 2060 * to the caller to remove the journal_head if necessary. For the
2061 * unlocked journal_refile_buffer call, the caller isn't going to be 2061 * unlocked journal_refile_buffer call, the caller isn't going to be
2062 * doing anything else to the buffer so we need to do the cleanup 2062 * doing anything else to the buffer so we need to do the cleanup
2063 * ourselves to avoid a jh leak. 2063 * ourselves to avoid a jh leak.
2064 * 2064 *
2065 * *** The journal_head may be freed by this call! *** 2065 * *** The journal_head may be freed by this call! ***
2066 */ 2066 */
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index 93068697a9bf..f5cf9c93e243 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -364,12 +364,11 @@ jffs_new_inode(const struct inode * dir, struct jffs_raw_inode *raw_inode,
364 inode->i_ctime.tv_nsec = 0; 364 inode->i_ctime.tv_nsec = 0;
365 inode->i_mtime.tv_nsec = 0; 365 inode->i_mtime.tv_nsec = 0;
366 inode->i_atime.tv_nsec = 0; 366 inode->i_atime.tv_nsec = 0;
367 inode->i_blksize = PAGE_SIZE;
368 inode->i_blocks = (inode->i_size + 511) >> 9; 367 inode->i_blocks = (inode->i_size + 511) >> 9;
369 368
370 f = jffs_find_file(c, raw_inode->ino); 369 f = jffs_find_file(c, raw_inode->ino);
371 370
372 inode->u.generic_ip = (void *)f; 371 inode->i_private = (void *)f;
373 insert_inode_hash(inode); 372 insert_inode_hash(inode);
374 373
375 return inode; 374 return inode;
@@ -442,7 +441,7 @@ jffs_rename(struct inode *old_dir, struct dentry *old_dentry,
442 }); 441 });
443 442
444 result = -ENOTDIR; 443 result = -ENOTDIR;
445 if (!(old_dir_f = (struct jffs_file *)old_dir->u.generic_ip)) { 444 if (!(old_dir_f = old_dir->i_private)) {
446 D(printk("jffs_rename(): Old dir invalid.\n")); 445 D(printk("jffs_rename(): Old dir invalid.\n"));
447 goto jffs_rename_end; 446 goto jffs_rename_end;
448 } 447 }
@@ -456,7 +455,7 @@ jffs_rename(struct inode *old_dir, struct dentry *old_dentry,
456 455
457 /* Find the new directory. */ 456 /* Find the new directory. */
458 result = -ENOTDIR; 457 result = -ENOTDIR;
459 if (!(new_dir_f = (struct jffs_file *)new_dir->u.generic_ip)) { 458 if (!(new_dir_f = new_dir->i_private)) {
460 D(printk("jffs_rename(): New dir invalid.\n")); 459 D(printk("jffs_rename(): New dir invalid.\n"));
461 goto jffs_rename_end; 460 goto jffs_rename_end;
462 } 461 }
@@ -593,7 +592,7 @@ jffs_readdir(struct file *filp, void *dirent, filldir_t filldir)
593 } 592 }
594 else { 593 else {
595 ddino = ((struct jffs_file *) 594 ddino = ((struct jffs_file *)
596 inode->u.generic_ip)->pino; 595 inode->i_private)->pino;
597 } 596 }
598 D3(printk("jffs_readdir(): \"..\" %u\n", ddino)); 597 D3(printk("jffs_readdir(): \"..\" %u\n", ddino));
599 if (filldir(dirent, "..", 2, filp->f_pos, ddino, DT_DIR) < 0) { 598 if (filldir(dirent, "..", 2, filp->f_pos, ddino, DT_DIR) < 0) {
@@ -604,7 +603,7 @@ jffs_readdir(struct file *filp, void *dirent, filldir_t filldir)
604 } 603 }
605 filp->f_pos++; 604 filp->f_pos++;
606 } 605 }
607 f = ((struct jffs_file *)inode->u.generic_ip)->children; 606 f = ((struct jffs_file *)inode->i_private)->children;
608 607
609 j = 2; 608 j = 2;
610 while(f && (f->deleted || j++ < filp->f_pos )) { 609 while(f && (f->deleted || j++ < filp->f_pos )) {
@@ -652,7 +651,7 @@ jffs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
652 lock_kernel(); 651 lock_kernel();
653 652
654 D3({ 653 D3({
655 char *s = (char *)kmalloc(len + 1, GFP_KERNEL); 654 char *s = kmalloc(len + 1, GFP_KERNEL);
656 memcpy(s, name, len); 655 memcpy(s, name, len);
657 s[len] = '\0'; 656 s[len] = '\0';
658 printk("jffs_lookup(): dir: 0x%p, name: \"%s\"\n", dir, s); 657 printk("jffs_lookup(): dir: 0x%p, name: \"%s\"\n", dir, s);
@@ -668,7 +667,7 @@ jffs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
668 } 667 }
669 668
670 r = -EACCES; 669 r = -EACCES;
671 if (!(d = (struct jffs_file *)dir->u.generic_ip)) { 670 if (!(d = (struct jffs_file *)dir->i_private)) {
672 D(printk("jffs_lookup(): No such inode! (%lu)\n", 671 D(printk("jffs_lookup(): No such inode! (%lu)\n",
673 dir->i_ino)); 672 dir->i_ino));
674 goto jffs_lookup_end; 673 goto jffs_lookup_end;
@@ -739,7 +738,7 @@ jffs_do_readpage_nolock(struct file *file, struct page *page)
739 unsigned long read_len; 738 unsigned long read_len;
740 int result; 739 int result;
741 struct inode *inode = (struct inode*)page->mapping->host; 740 struct inode *inode = (struct inode*)page->mapping->host;
742 struct jffs_file *f = (struct jffs_file *)inode->u.generic_ip; 741 struct jffs_file *f = (struct jffs_file *)inode->i_private;
743 struct jffs_control *c = (struct jffs_control *)inode->i_sb->s_fs_info; 742 struct jffs_control *c = (struct jffs_control *)inode->i_sb->s_fs_info;
744 int r; 743 int r;
745 loff_t offset; 744 loff_t offset;
@@ -828,7 +827,7 @@ jffs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
828 }); 827 });
829 828
830 lock_kernel(); 829 lock_kernel();
831 dir_f = (struct jffs_file *)dir->u.generic_ip; 830 dir_f = dir->i_private;
832 831
833 ASSERT(if (!dir_f) { 832 ASSERT(if (!dir_f) {
834 printk(KERN_ERR "jffs_mkdir(): No reference to a " 833 printk(KERN_ERR "jffs_mkdir(): No reference to a "
@@ -972,7 +971,7 @@ jffs_remove(struct inode *dir, struct dentry *dentry, int type)
972 kfree(_name); 971 kfree(_name);
973 }); 972 });
974 973
975 dir_f = (struct jffs_file *) dir->u.generic_ip; 974 dir_f = dir->i_private;
976 c = dir_f->c; 975 c = dir_f->c;
977 976
978 result = -ENOENT; 977 result = -ENOENT;
@@ -1082,7 +1081,7 @@ jffs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
1082 if (!old_valid_dev(rdev)) 1081 if (!old_valid_dev(rdev))
1083 return -EINVAL; 1082 return -EINVAL;
1084 lock_kernel(); 1083 lock_kernel();
1085 dir_f = (struct jffs_file *)dir->u.generic_ip; 1084 dir_f = dir->i_private;
1086 c = dir_f->c; 1085 c = dir_f->c;
1087 1086
1088 D3(printk (KERN_NOTICE "mknod(): down biglock\n")); 1087 D3(printk (KERN_NOTICE "mknod(): down biglock\n"));
@@ -1173,8 +1172,8 @@ jffs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
1173 lock_kernel(); 1172 lock_kernel();
1174 D1({ 1173 D1({
1175 int len = dentry->d_name.len; 1174 int len = dentry->d_name.len;
1176 char *_name = (char *)kmalloc(len + 1, GFP_KERNEL); 1175 char *_name = kmalloc(len + 1, GFP_KERNEL);
1177 char *_symname = (char *)kmalloc(symname_len + 1, GFP_KERNEL); 1176 char *_symname = kmalloc(symname_len + 1, GFP_KERNEL);
1178 memcpy(_name, dentry->d_name.name, len); 1177 memcpy(_name, dentry->d_name.name, len);
1179 _name[len] = '\0'; 1178 _name[len] = '\0';
1180 memcpy(_symname, symname, symname_len); 1179 memcpy(_symname, symname, symname_len);
@@ -1186,7 +1185,7 @@ jffs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
1186 kfree(_symname); 1185 kfree(_symname);
1187 }); 1186 });
1188 1187
1189 dir_f = (struct jffs_file *)dir->u.generic_ip; 1188 dir_f = dir->i_private;
1190 ASSERT(if (!dir_f) { 1189 ASSERT(if (!dir_f) {
1191 printk(KERN_ERR "jffs_symlink(): No reference to a " 1190 printk(KERN_ERR "jffs_symlink(): No reference to a "
1192 "jffs_file struct in inode.\n"); 1191 "jffs_file struct in inode.\n");
@@ -1282,14 +1281,14 @@ jffs_create(struct inode *dir, struct dentry *dentry, int mode,
1282 lock_kernel(); 1281 lock_kernel();
1283 D1({ 1282 D1({
1284 int len = dentry->d_name.len; 1283 int len = dentry->d_name.len;
1285 char *s = (char *)kmalloc(len + 1, GFP_KERNEL); 1284 char *s = kmalloc(len + 1, GFP_KERNEL);
1286 memcpy(s, dentry->d_name.name, len); 1285 memcpy(s, dentry->d_name.name, len);
1287 s[len] = '\0'; 1286 s[len] = '\0';
1288 printk("jffs_create(): dir: 0x%p, name: \"%s\"\n", dir, s); 1287 printk("jffs_create(): dir: 0x%p, name: \"%s\"\n", dir, s);
1289 kfree(s); 1288 kfree(s);
1290 }); 1289 });
1291 1290
1292 dir_f = (struct jffs_file *)dir->u.generic_ip; 1291 dir_f = dir->i_private;
1293 ASSERT(if (!dir_f) { 1292 ASSERT(if (!dir_f) {
1294 printk(KERN_ERR "jffs_create(): No reference to a " 1293 printk(KERN_ERR "jffs_create(): No reference to a "
1295 "jffs_file struct in inode.\n"); 1294 "jffs_file struct in inode.\n");
@@ -1403,9 +1402,9 @@ jffs_file_write(struct file *filp, const char *buf, size_t count,
1403 goto out_isem; 1402 goto out_isem;
1404 } 1403 }
1405 1404
1406 if (!(f = (struct jffs_file *)inode->u.generic_ip)) { 1405 if (!(f = inode->i_private)) {
1407 D(printk("jffs_file_write(): inode->u.generic_ip = 0x%p\n", 1406 D(printk("jffs_file_write(): inode->i_private = 0x%p\n",
1408 inode->u.generic_ip)); 1407 inode->i_private));
1409 goto out_isem; 1408 goto out_isem;
1410 } 1409 }
1411 1410
@@ -1693,7 +1692,7 @@ jffs_read_inode(struct inode *inode)
1693 mutex_unlock(&c->fmc->biglock); 1692 mutex_unlock(&c->fmc->biglock);
1694 return; 1693 return;
1695 } 1694 }
1696 inode->u.generic_ip = (void *)f; 1695 inode->i_private = f;
1697 inode->i_mode = f->mode; 1696 inode->i_mode = f->mode;
1698 inode->i_nlink = f->nlink; 1697 inode->i_nlink = f->nlink;
1699 inode->i_uid = f->uid; 1698 inode->i_uid = f->uid;
@@ -1706,7 +1705,6 @@ jffs_read_inode(struct inode *inode)
1706 inode->i_mtime.tv_nsec = 1705 inode->i_mtime.tv_nsec =
1707 inode->i_ctime.tv_nsec = 0; 1706 inode->i_ctime.tv_nsec = 0;
1708 1707
1709 inode->i_blksize = PAGE_SIZE;
1710 inode->i_blocks = (inode->i_size + 511) >> 9; 1708 inode->i_blocks = (inode->i_size + 511) >> 9;
1711 if (S_ISREG(inode->i_mode)) { 1709 if (S_ISREG(inode->i_mode)) {
1712 inode->i_op = &jffs_file_inode_operations; 1710 inode->i_op = &jffs_file_inode_operations;
@@ -1748,7 +1746,7 @@ jffs_delete_inode(struct inode *inode)
1748 lock_kernel(); 1746 lock_kernel();
1749 inode->i_size = 0; 1747 inode->i_size = 0;
1750 inode->i_blocks = 0; 1748 inode->i_blocks = 0;
1751 inode->u.generic_ip = NULL; 1749 inode->i_private = NULL;
1752 clear_inode(inode); 1750 clear_inode(inode);
1753 if (inode->i_nlink == 0) { 1751 if (inode->i_nlink == 0) {
1754 c = (struct jffs_control *) inode->i_sb->s_fs_info; 1752 c = (struct jffs_control *) inode->i_sb->s_fs_info;
diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c
index 9000f1effedf..4a543e114970 100644
--- a/fs/jffs/intrep.c
+++ b/fs/jffs/intrep.c
@@ -488,13 +488,11 @@ jffs_create_file(struct jffs_control *c,
488{ 488{
489 struct jffs_file *f; 489 struct jffs_file *f;
490 490
491 if (!(f = (struct jffs_file *)kmalloc(sizeof(struct jffs_file), 491 if (!(f = kzalloc(sizeof(*f), GFP_KERNEL))) {
492 GFP_KERNEL))) {
493 D(printk("jffs_create_file(): Failed!\n")); 492 D(printk("jffs_create_file(): Failed!\n"));
494 return NULL; 493 return NULL;
495 } 494 }
496 no_jffs_file++; 495 no_jffs_file++;
497 memset(f, 0, sizeof(struct jffs_file));
498 f->ino = raw_inode->ino; 496 f->ino = raw_inode->ino;
499 f->pino = raw_inode->pino; 497 f->pino = raw_inode->pino;
500 f->nlink = raw_inode->nlink; 498 f->nlink = raw_inode->nlink;
@@ -516,7 +514,7 @@ jffs_create_control(struct super_block *sb)
516 514
517 D2(printk("jffs_create_control()\n")); 515 D2(printk("jffs_create_control()\n"));
518 516
519 if (!(c = (struct jffs_control *)kmalloc(s, GFP_KERNEL))) { 517 if (!(c = kmalloc(s, GFP_KERNEL))) {
520 goto fail_control; 518 goto fail_control;
521 } 519 }
522 DJM(no_jffs_control++); 520 DJM(no_jffs_control++);
@@ -524,7 +522,7 @@ jffs_create_control(struct super_block *sb)
524 c->gc_task = NULL; 522 c->gc_task = NULL;
525 c->hash_len = JFFS_HASH_SIZE; 523 c->hash_len = JFFS_HASH_SIZE;
526 s = sizeof(struct list_head) * c->hash_len; 524 s = sizeof(struct list_head) * c->hash_len;
527 if (!(c->hash = (struct list_head *)kmalloc(s, GFP_KERNEL))) { 525 if (!(c->hash = kmalloc(s, GFP_KERNEL))) {
528 goto fail_hash; 526 goto fail_hash;
529 } 527 }
530 DJM(no_hash++); 528 DJM(no_hash++);
@@ -593,8 +591,7 @@ jffs_add_virtual_root(struct jffs_control *c)
593 D2(printk("jffs_add_virtual_root(): " 591 D2(printk("jffs_add_virtual_root(): "
594 "Creating a virtual root directory.\n")); 592 "Creating a virtual root directory.\n"));
595 593
596 if (!(root = (struct jffs_file *)kmalloc(sizeof(struct jffs_file), 594 if (!(root = kmalloc(sizeof(struct jffs_file), GFP_KERNEL))) {
597 GFP_KERNEL))) {
598 return -ENOMEM; 595 return -ENOMEM;
599 } 596 }
600 no_jffs_file++; 597 no_jffs_file++;
diff --git a/fs/jffs/jffs_fm.c b/fs/jffs/jffs_fm.c
index 7d8ca1aeace2..29b68d939bd9 100644
--- a/fs/jffs/jffs_fm.c
+++ b/fs/jffs/jffs_fm.c
@@ -94,8 +94,7 @@ jffs_build_begin(struct jffs_control *c, int unit)
94 struct mtd_info *mtd; 94 struct mtd_info *mtd;
95 95
96 D3(printk("jffs_build_begin()\n")); 96 D3(printk("jffs_build_begin()\n"));
97 fmc = (struct jffs_fmcontrol *)kmalloc(sizeof(struct jffs_fmcontrol), 97 fmc = kmalloc(sizeof(*fmc), GFP_KERNEL);
98 GFP_KERNEL);
99 if (!fmc) { 98 if (!fmc) {
100 D(printk("jffs_build_begin(): Allocation of " 99 D(printk("jffs_build_begin(): Allocation of "
101 "struct jffs_fmcontrol failed!\n")); 100 "struct jffs_fmcontrol failed!\n"));
@@ -486,8 +485,7 @@ jffs_add_node(struct jffs_node *node)
486 485
487 D3(printk("jffs_add_node(): ino = %u\n", node->ino)); 486 D3(printk("jffs_add_node(): ino = %u\n", node->ino));
488 487
489 ref = (struct jffs_node_ref *)kmalloc(sizeof(struct jffs_node_ref), 488 ref = kmalloc(sizeof(*ref), GFP_KERNEL);
490 GFP_KERNEL);
491 if (!ref) 489 if (!ref)
492 return -ENOMEM; 490 return -ENOMEM;
493 491
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 4780f82825d6..72d9909d95ff 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -263,7 +263,6 @@ void jffs2_read_inode (struct inode *inode)
263 263
264 inode->i_nlink = f->inocache->nlink; 264 inode->i_nlink = f->inocache->nlink;
265 265
266 inode->i_blksize = PAGE_SIZE;
267 inode->i_blocks = (inode->i_size + 511) >> 9; 266 inode->i_blocks = (inode->i_size + 511) >> 9;
268 267
269 switch (inode->i_mode & S_IFMT) { 268 switch (inode->i_mode & S_IFMT) {
@@ -449,7 +448,6 @@ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_i
449 inode->i_atime = inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; 448 inode->i_atime = inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
450 ri->atime = ri->mtime = ri->ctime = cpu_to_je32(I_SEC(inode->i_mtime)); 449 ri->atime = ri->mtime = ri->ctime = cpu_to_je32(I_SEC(inode->i_mtime));
451 450
452 inode->i_blksize = PAGE_SIZE;
453 inode->i_blocks = 0; 451 inode->i_blocks = 0;
454 inode->i_size = 0; 452 inode->i_size = 0;
455 453
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 68e3953419b4..6de374513c01 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -119,10 +119,9 @@ static int jffs2_get_sb_mtd(struct file_system_type *fs_type,
119 struct jffs2_sb_info *c; 119 struct jffs2_sb_info *c;
120 int ret; 120 int ret;
121 121
122 c = kmalloc(sizeof(*c), GFP_KERNEL); 122 c = kzalloc(sizeof(*c), GFP_KERNEL);
123 if (!c) 123 if (!c)
124 return -ENOMEM; 124 return -ENOMEM;
125 memset(c, 0, sizeof(*c));
126 c->mtd = mtd; 125 c->mtd = mtd;
127 126
128 sb = sget(fs_type, jffs2_sb_compare, jffs2_sb_set, c); 127 sb = sget(fs_type, jffs2_sb_compare, jffs2_sb_set, c);
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
index 4d52593a5fc6..4c74f0944f7e 100644
--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -468,7 +468,7 @@ int extRecord(struct inode *ip, xad_t * xp)
468int extFill(struct inode *ip, xad_t * xp) 468int extFill(struct inode *ip, xad_t * xp)
469{ 469{
470 int rc, nbperpage = JFS_SBI(ip->i_sb)->nbperpage; 470 int rc, nbperpage = JFS_SBI(ip->i_sb)->nbperpage;
471 s64 blkno = offsetXAD(xp) >> ip->i_blksize; 471 s64 blkno = offsetXAD(xp) >> ip->i_blkbits;
472 472
473// assert(ISSPARSE(ip)); 473// assert(ISSPARSE(ip));
474 474
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index ccbe60aff83d..369d7f39c040 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -3115,7 +3115,6 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip)
3115 ip->i_mtime.tv_nsec = le32_to_cpu(dip->di_mtime.tv_nsec); 3115 ip->i_mtime.tv_nsec = le32_to_cpu(dip->di_mtime.tv_nsec);
3116 ip->i_ctime.tv_sec = le32_to_cpu(dip->di_ctime.tv_sec); 3116 ip->i_ctime.tv_sec = le32_to_cpu(dip->di_ctime.tv_sec);
3117 ip->i_ctime.tv_nsec = le32_to_cpu(dip->di_ctime.tv_nsec); 3117 ip->i_ctime.tv_nsec = le32_to_cpu(dip->di_ctime.tv_nsec);
3118 ip->i_blksize = ip->i_sb->s_blocksize;
3119 ip->i_blocks = LBLK2PBLK(ip->i_sb, le64_to_cpu(dip->di_nblocks)); 3118 ip->i_blocks = LBLK2PBLK(ip->i_sb, le64_to_cpu(dip->di_nblocks));
3120 ip->i_generation = le32_to_cpu(dip->di_gen); 3119 ip->i_generation = le32_to_cpu(dip->di_gen);
3121 3120
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index 495df402916d..bffaca9ae3a2 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -115,7 +115,6 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
115 } 115 }
116 jfs_inode->mode2 |= mode; 116 jfs_inode->mode2 |= mode;
117 117
118 inode->i_blksize = sb->s_blocksize;
119 inode->i_blocks = 0; 118 inode->i_blocks = 0;
120 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 119 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
121 jfs_inode->otime = inode->i_ctime.tv_sec; 120 jfs_inode->otime = inode->i_ctime.tv_sec;
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index e1e0a6e6ebdf..f5afc129d6b1 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -257,7 +257,7 @@ static sector_t metapage_get_blocks(struct inode *inode, sector_t lblock,
257 int rc = 0; 257 int rc = 0;
258 int xflag; 258 int xflag;
259 s64 xaddr; 259 s64 xaddr;
260 sector_t file_blocks = (inode->i_size + inode->i_blksize - 1) >> 260 sector_t file_blocks = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
261 inode->i_blkbits; 261 inode->i_blkbits;
262 262
263 if (lblock >= file_blocks) 263 if (lblock >= file_blocks)
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index efbb586bed4b..3856efc399c1 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -282,7 +282,7 @@ int txInit(void)
282 TxLockVHWM = (nTxLock * 8) / 10; 282 TxLockVHWM = (nTxLock * 8) / 10;
283 283
284 size = sizeof(struct tblock) * nTxBlock; 284 size = sizeof(struct tblock) * nTxBlock;
285 TxBlock = (struct tblock *) vmalloc(size); 285 TxBlock = vmalloc(size);
286 if (TxBlock == NULL) 286 if (TxBlock == NULL)
287 return -ENOMEM; 287 return -ENOMEM;
288 288
@@ -307,7 +307,7 @@ int txInit(void)
307 * tlock id = 0 is reserved. 307 * tlock id = 0 is reserved.
308 */ 308 */
309 size = sizeof(struct tlock) * nTxLock; 309 size = sizeof(struct tlock) * nTxLock;
310 TxLock = (struct tlock *) vmalloc(size); 310 TxLock = vmalloc(size);
311 if (TxLock == NULL) { 311 if (TxLock == NULL) {
312 vfree(TxBlock); 312 vfree(TxBlock);
313 return -ENOMEM; 313 return -ENOMEM;
diff --git a/fs/libfs.c b/fs/libfs.c
index ac02ea602c3d..3793aaa14577 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -317,17 +317,9 @@ int simple_rename(struct inode *old_dir, struct dentry *old_dentry,
317 317
318int simple_readpage(struct file *file, struct page *page) 318int simple_readpage(struct file *file, struct page *page)
319{ 319{
320 void *kaddr; 320 clear_highpage(page);
321
322 if (PageUptodate(page))
323 goto out;
324
325 kaddr = kmap_atomic(page, KM_USER0);
326 memset(kaddr, 0, PAGE_CACHE_SIZE);
327 kunmap_atomic(kaddr, KM_USER0);
328 flush_dcache_page(page); 321 flush_dcache_page(page);
329 SetPageUptodate(page); 322 SetPageUptodate(page);
330out:
331 unlock_page(page); 323 unlock_page(page);
332 return 0; 324 return 0;
333} 325}
@@ -383,7 +375,6 @@ int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files
383 return -ENOMEM; 375 return -ENOMEM;
384 inode->i_mode = S_IFDIR | 0755; 376 inode->i_mode = S_IFDIR | 0755;
385 inode->i_uid = inode->i_gid = 0; 377 inode->i_uid = inode->i_gid = 0;
386 inode->i_blksize = PAGE_CACHE_SIZE;
387 inode->i_blocks = 0; 378 inode->i_blocks = 0;
388 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 379 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
389 inode->i_op = &simple_dir_inode_operations; 380 inode->i_op = &simple_dir_inode_operations;
@@ -405,7 +396,6 @@ int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files
405 goto out; 396 goto out;
406 inode->i_mode = S_IFREG | files->mode; 397 inode->i_mode = S_IFREG | files->mode;
407 inode->i_uid = inode->i_gid = 0; 398 inode->i_uid = inode->i_gid = 0;
408 inode->i_blksize = PAGE_CACHE_SIZE;
409 inode->i_blocks = 0; 399 inode->i_blocks = 0;
410 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 400 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
411 inode->i_fop = files->ops; 401 inode->i_fop = files->ops;
@@ -547,7 +537,7 @@ int simple_attr_open(struct inode *inode, struct file *file,
547 537
548 attr->get = get; 538 attr->get = get;
549 attr->set = set; 539 attr->set = set;
550 attr->data = inode->u.generic_ip; 540 attr->data = inode->i_private;
551 attr->fmt = fmt; 541 attr->fmt = fmt;
552 mutex_init(&attr->mutex); 542 mutex_init(&attr->mutex);
553 543
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 52774feab93f..f95cc3f3c42d 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -160,7 +160,7 @@ static void nlmclnt_prepare_reclaim(struct nlm_host *host)
160 */ 160 */
161 list_splice_init(&host->h_granted, &host->h_reclaim); 161 list_splice_init(&host->h_granted, &host->h_reclaim);
162 162
163 dprintk("NLM: reclaiming locks for host %s", host->h_name); 163 dprintk("NLM: reclaiming locks for host %s\n", host->h_name);
164} 164}
165 165
166static void nlmclnt_finish_reclaim(struct nlm_host *host) 166static void nlmclnt_finish_reclaim(struct nlm_host *host)
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 89ba0df14c22..271e2165fff6 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -100,7 +100,7 @@ static struct nlm_lockowner *nlm_find_lockowner(struct nlm_host *host, fl_owner_
100 res = __nlm_find_lockowner(host, owner); 100 res = __nlm_find_lockowner(host, owner);
101 if (res == NULL) { 101 if (res == NULL) {
102 spin_unlock(&host->h_lock); 102 spin_unlock(&host->h_lock);
103 new = (struct nlm_lockowner *)kmalloc(sizeof(*new), GFP_KERNEL); 103 new = kmalloc(sizeof(*new), GFP_KERNEL);
104 spin_lock(&host->h_lock); 104 spin_lock(&host->h_lock);
105 res = __nlm_find_lockowner(host, owner); 105 res = __nlm_find_lockowner(host, owner);
106 if (res == NULL && new != NULL) { 106 if (res == NULL && new != NULL) {
@@ -151,11 +151,13 @@ static void nlmclnt_release_lockargs(struct nlm_rqst *req)
151int 151int
152nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl) 152nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
153{ 153{
154 struct rpc_clnt *client = NFS_CLIENT(inode);
155 struct sockaddr_in addr;
154 struct nlm_host *host; 156 struct nlm_host *host;
155 struct nlm_rqst *call; 157 struct nlm_rqst *call;
156 sigset_t oldset; 158 sigset_t oldset;
157 unsigned long flags; 159 unsigned long flags;
158 int status, proto, vers; 160 int status, vers;
159 161
160 vers = (NFS_PROTO(inode)->version == 3) ? 4 : 1; 162 vers = (NFS_PROTO(inode)->version == 3) ? 4 : 1;
161 if (NFS_PROTO(inode)->version > 3) { 163 if (NFS_PROTO(inode)->version > 3) {
@@ -163,10 +165,8 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
163 return -ENOLCK; 165 return -ENOLCK;
164 } 166 }
165 167
166 /* Retrieve transport protocol from NFS client */ 168 rpc_peeraddr(client, (struct sockaddr *) &addr, sizeof(addr));
167 proto = NFS_CLIENT(inode)->cl_xprt->prot; 169 host = nlmclnt_lookup_host(&addr, client->cl_xprt->prot, vers);
168
169 host = nlmclnt_lookup_host(NFS_ADDR(inode), proto, vers);
170 if (host == NULL) 170 if (host == NULL)
171 return -ENOLCK; 171 return -ENOLCK;
172 172
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 38b0e8a1aec0..a0d0b58ce7a4 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -26,7 +26,6 @@
26#define NLM_HOST_REBIND (60 * HZ) 26#define NLM_HOST_REBIND (60 * HZ)
27#define NLM_HOST_EXPIRE ((nrhosts > NLM_HOST_MAX)? 300 * HZ : 120 * HZ) 27#define NLM_HOST_EXPIRE ((nrhosts > NLM_HOST_MAX)? 300 * HZ : 120 * HZ)
28#define NLM_HOST_COLLECT ((nrhosts > NLM_HOST_MAX)? 120 * HZ : 60 * HZ) 28#define NLM_HOST_COLLECT ((nrhosts > NLM_HOST_MAX)? 120 * HZ : 60 * HZ)
29#define NLM_HOST_ADDR(sv) (&(sv)->s_nlmclnt->cl_xprt->addr)
30 29
31static struct nlm_host * nlm_hosts[NLM_HOST_NRHASH]; 30static struct nlm_host * nlm_hosts[NLM_HOST_NRHASH];
32static unsigned long next_gc; 31static unsigned long next_gc;
@@ -100,9 +99,9 @@ nlm_lookup_host(int server, struct sockaddr_in *sin,
100 /* Ooops, no host found, create it */ 99 /* Ooops, no host found, create it */
101 dprintk("lockd: creating host entry\n"); 100 dprintk("lockd: creating host entry\n");
102 101
103 if (!(host = (struct nlm_host *) kmalloc(sizeof(*host), GFP_KERNEL))) 102 host = kzalloc(sizeof(*host), GFP_KERNEL);
103 if (!host)
104 goto nohost; 104 goto nohost;
105 memset(host, 0, sizeof(*host));
106 105
107 addr = sin->sin_addr.s_addr; 106 addr = sin->sin_addr.s_addr;
108 sprintf(host->h_name, "%u.%u.%u.%u", NIPQUAD(addr)); 107 sprintf(host->h_name, "%u.%u.%u.%u", NIPQUAD(addr));
@@ -167,7 +166,6 @@ struct rpc_clnt *
167nlm_bind_host(struct nlm_host *host) 166nlm_bind_host(struct nlm_host *host)
168{ 167{
169 struct rpc_clnt *clnt; 168 struct rpc_clnt *clnt;
170 struct rpc_xprt *xprt;
171 169
172 dprintk("lockd: nlm_bind_host(%08x)\n", 170 dprintk("lockd: nlm_bind_host(%08x)\n",
173 (unsigned)ntohl(host->h_addr.sin_addr.s_addr)); 171 (unsigned)ntohl(host->h_addr.sin_addr.s_addr));
@@ -179,7 +177,6 @@ nlm_bind_host(struct nlm_host *host)
179 * RPC rebind is required 177 * RPC rebind is required
180 */ 178 */
181 if ((clnt = host->h_rpcclnt) != NULL) { 179 if ((clnt = host->h_rpcclnt) != NULL) {
182 xprt = clnt->cl_xprt;
183 if (time_after_eq(jiffies, host->h_nextrebind)) { 180 if (time_after_eq(jiffies, host->h_nextrebind)) {
184 rpc_force_rebind(clnt); 181 rpc_force_rebind(clnt);
185 host->h_nextrebind = jiffies + NLM_HOST_REBIND; 182 host->h_nextrebind = jiffies + NLM_HOST_REBIND;
@@ -187,31 +184,37 @@ nlm_bind_host(struct nlm_host *host)
187 host->h_nextrebind - jiffies); 184 host->h_nextrebind - jiffies);
188 } 185 }
189 } else { 186 } else {
190 xprt = xprt_create_proto(host->h_proto, &host->h_addr, NULL); 187 unsigned long increment = nlmsvc_timeout * HZ;
191 if (IS_ERR(xprt)) 188 struct rpc_timeout timeparms = {
192 goto forgetit; 189 .to_initval = increment,
193 190 .to_increment = increment,
194 xprt_set_timeout(&xprt->timeout, 5, nlmsvc_timeout); 191 .to_maxval = increment * 6UL,
195 xprt->resvport = 1; /* NLM requires a reserved port */ 192 .to_retries = 5U,
196 193 };
197 /* Existing NLM servers accept AUTH_UNIX only */ 194 struct rpc_create_args args = {
198 clnt = rpc_new_client(xprt, host->h_name, &nlm_program, 195 .protocol = host->h_proto,
199 host->h_version, RPC_AUTH_UNIX); 196 .address = (struct sockaddr *)&host->h_addr,
200 if (IS_ERR(clnt)) 197 .addrsize = sizeof(host->h_addr),
201 goto forgetit; 198 .timeout = &timeparms,
202 clnt->cl_autobind = 1; /* turn on pmap queries */ 199 .servername = host->h_name,
203 clnt->cl_softrtry = 1; /* All queries are soft */ 200 .program = &nlm_program,
204 201 .version = host->h_version,
205 host->h_rpcclnt = clnt; 202 .authflavor = RPC_AUTH_UNIX,
203 .flags = (RPC_CLNT_CREATE_HARDRTRY |
204 RPC_CLNT_CREATE_AUTOBIND),
205 };
206
207 clnt = rpc_create(&args);
208 if (!IS_ERR(clnt))
209 host->h_rpcclnt = clnt;
210 else {
211 printk("lockd: couldn't create RPC handle for %s\n", host->h_name);
212 clnt = NULL;
213 }
206 } 214 }
207 215
208 mutex_unlock(&host->h_mutex); 216 mutex_unlock(&host->h_mutex);
209 return clnt; 217 return clnt;
210
211forgetit:
212 printk("lockd: couldn't create RPC handle for %s\n", host->h_name);
213 mutex_unlock(&host->h_mutex);
214 return NULL;
215} 218}
216 219
217/* 220/*
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 3fc683f46b3e..5954dcb497e4 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -109,30 +109,23 @@ nsm_unmonitor(struct nlm_host *host)
109static struct rpc_clnt * 109static struct rpc_clnt *
110nsm_create(void) 110nsm_create(void)
111{ 111{
112 struct rpc_xprt *xprt; 112 struct sockaddr_in sin = {
113 struct rpc_clnt *clnt; 113 .sin_family = AF_INET,
114 struct sockaddr_in sin; 114 .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
115 115 .sin_port = 0,
116 sin.sin_family = AF_INET; 116 };
117 sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); 117 struct rpc_create_args args = {
118 sin.sin_port = 0; 118 .protocol = IPPROTO_UDP,
119 119 .address = (struct sockaddr *)&sin,
120 xprt = xprt_create_proto(IPPROTO_UDP, &sin, NULL); 120 .addrsize = sizeof(sin),
121 if (IS_ERR(xprt)) 121 .servername = "localhost",
122 return (struct rpc_clnt *)xprt; 122 .program = &nsm_program,
123 xprt->resvport = 1; /* NSM requires a reserved port */ 123 .version = SM_VERSION,
124 124 .authflavor = RPC_AUTH_NULL,
125 clnt = rpc_create_client(xprt, "localhost", 125 .flags = (RPC_CLNT_CREATE_ONESHOT),
126 &nsm_program, SM_VERSION, 126 };
127 RPC_AUTH_NULL); 127
128 if (IS_ERR(clnt)) 128 return rpc_create(&args);
129 goto out_err;
130 clnt->cl_softrtry = 1;
131 clnt->cl_oneshot = 1;
132 return clnt;
133
134out_err:
135 return clnt;
136} 129}
137 130
138/* 131/*
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 01b4db9e5466..a92dd98f8401 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -100,11 +100,10 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
100 nlm_debug_print_fh("creating file for", f); 100 nlm_debug_print_fh("creating file for", f);
101 101
102 nfserr = nlm_lck_denied_nolocks; 102 nfserr = nlm_lck_denied_nolocks;
103 file = (struct nlm_file *) kmalloc(sizeof(*file), GFP_KERNEL); 103 file = kzalloc(sizeof(*file), GFP_KERNEL);
104 if (!file) 104 if (!file)
105 goto out_unlock; 105 goto out_unlock;
106 106
107 memset(file, 0, sizeof(*file));
108 memcpy(&file->f_handle, f, sizeof(struct nfs_fh)); 107 memcpy(&file->f_handle, f, sizeof(struct nfs_fh));
109 file->f_hash = hash; 108 file->f_hash = hash;
110 init_MUTEX(&file->f_sema); 109 init_MUTEX(&file->f_sema);
diff --git a/fs/mbcache.c b/fs/mbcache.c
index e4fde1ab22cd..0ff71256e65b 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -160,6 +160,7 @@ __mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask)
160 160
161static void 161static void
162__mb_cache_entry_release_unlock(struct mb_cache_entry *ce) 162__mb_cache_entry_release_unlock(struct mb_cache_entry *ce)
163 __releases(mb_cache_spinlock)
163{ 164{
164 /* Wake up all processes queuing for this cache entry. */ 165 /* Wake up all processes queuing for this cache entry. */
165 if (ce->e_queued) 166 if (ce->e_queued)
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index 4a6abc49418e..df6b1075b549 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -254,7 +254,7 @@ struct inode * minix_new_inode(const struct inode * dir, int * error)
254 inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid; 254 inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid;
255 inode->i_ino = j; 255 inode->i_ino = j;
256 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 256 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
257 inode->i_blocks = inode->i_blksize = 0; 257 inode->i_blocks = 0;
258 memset(&minix_i(inode)->u, 0, sizeof(minix_i(inode)->u)); 258 memset(&minix_i(inode)->u, 0, sizeof(minix_i(inode)->u));
259 insert_inode_hash(inode); 259 insert_inode_hash(inode);
260 mark_inode_dirty(inode); 260 mark_inode_dirty(inode);
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 330ff9fc7cf0..c11a4b9fb863 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -90,8 +90,7 @@ static int init_inodecache(void)
90 90
91static void destroy_inodecache(void) 91static void destroy_inodecache(void)
92{ 92{
93 if (kmem_cache_destroy(minix_inode_cachep)) 93 kmem_cache_destroy(minix_inode_cachep);
94 printk(KERN_INFO "minix_inode_cache: not all structures were freed\n");
95} 94}
96 95
97static struct super_operations minix_sops = { 96static struct super_operations minix_sops = {
@@ -145,11 +144,10 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
145 struct inode *root_inode; 144 struct inode *root_inode;
146 struct minix_sb_info *sbi; 145 struct minix_sb_info *sbi;
147 146
148 sbi = kmalloc(sizeof(struct minix_sb_info), GFP_KERNEL); 147 sbi = kzalloc(sizeof(struct minix_sb_info), GFP_KERNEL);
149 if (!sbi) 148 if (!sbi)
150 return -ENOMEM; 149 return -ENOMEM;
151 s->s_fs_info = sbi; 150 s->s_fs_info = sbi;
152 memset(sbi, 0, sizeof(struct minix_sb_info));
153 151
154 /* N.B. These should be compile-time tests. 152 /* N.B. These should be compile-time tests.
155 Unfortunately that is impossible. */ 153 Unfortunately that is impossible. */
@@ -207,10 +205,9 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
207 if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0) 205 if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0)
208 goto out_illegal_sb; 206 goto out_illegal_sb;
209 i = (sbi->s_imap_blocks + sbi->s_zmap_blocks) * sizeof(bh); 207 i = (sbi->s_imap_blocks + sbi->s_zmap_blocks) * sizeof(bh);
210 map = kmalloc(i, GFP_KERNEL); 208 map = kzalloc(i, GFP_KERNEL);
211 if (!map) 209 if (!map)
212 goto out_no_map; 210 goto out_no_map;
213 memset(map, 0, i);
214 sbi->s_imap = &map[0]; 211 sbi->s_imap = &map[0];
215 sbi->s_zmap = &map[sbi->s_imap_blocks]; 212 sbi->s_zmap = &map[sbi->s_imap_blocks];
216 213
@@ -399,7 +396,7 @@ static void V1_minix_read_inode(struct inode * inode)
399 inode->i_mtime.tv_nsec = 0; 396 inode->i_mtime.tv_nsec = 0;
400 inode->i_atime.tv_nsec = 0; 397 inode->i_atime.tv_nsec = 0;
401 inode->i_ctime.tv_nsec = 0; 398 inode->i_ctime.tv_nsec = 0;
402 inode->i_blocks = inode->i_blksize = 0; 399 inode->i_blocks = 0;
403 for (i = 0; i < 9; i++) 400 for (i = 0; i < 9; i++)
404 minix_inode->u.i1_data[i] = raw_inode->i_zone[i]; 401 minix_inode->u.i1_data[i] = raw_inode->i_zone[i];
405 minix_set_inode(inode, old_decode_dev(raw_inode->i_zone[0])); 402 minix_set_inode(inode, old_decode_dev(raw_inode->i_zone[0]));
@@ -432,7 +429,7 @@ static void V2_minix_read_inode(struct inode * inode)
432 inode->i_mtime.tv_nsec = 0; 429 inode->i_mtime.tv_nsec = 0;
433 inode->i_atime.tv_nsec = 0; 430 inode->i_atime.tv_nsec = 0;
434 inode->i_ctime.tv_nsec = 0; 431 inode->i_ctime.tv_nsec = 0;
435 inode->i_blocks = inode->i_blksize = 0; 432 inode->i_blocks = 0;
436 for (i = 0; i < 10; i++) 433 for (i = 0; i < 10; i++)
437 minix_inode->u.i2_data[i] = raw_inode->i_zone[i]; 434 minix_inode->u.i2_data[i] = raw_inode->i_zone[i];
438 minix_set_inode(inode, old_decode_dev(raw_inode->i_zone[0])); 435 minix_set_inode(inode, old_decode_dev(raw_inode->i_zone[0]));
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index 9e44158a7540..d220165d4918 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -280,7 +280,7 @@ static int msdos_create(struct inode *dir, struct dentry *dentry, int mode,
280 struct nameidata *nd) 280 struct nameidata *nd)
281{ 281{
282 struct super_block *sb = dir->i_sb; 282 struct super_block *sb = dir->i_sb;
283 struct inode *inode; 283 struct inode *inode = NULL;
284 struct fat_slot_info sinfo; 284 struct fat_slot_info sinfo;
285 struct timespec ts; 285 struct timespec ts;
286 unsigned char msdos_name[MSDOS_NAME]; 286 unsigned char msdos_name[MSDOS_NAME];
@@ -316,6 +316,8 @@ static int msdos_create(struct inode *dir, struct dentry *dentry, int mode,
316 d_instantiate(dentry, inode); 316 d_instantiate(dentry, inode);
317out: 317out:
318 unlock_kernel(); 318 unlock_kernel();
319 if (!err)
320 err = fat_flush_inodes(sb, dir, inode);
319 return err; 321 return err;
320} 322}
321 323
@@ -348,6 +350,8 @@ static int msdos_rmdir(struct inode *dir, struct dentry *dentry)
348 fat_detach(inode); 350 fat_detach(inode);
349out: 351out:
350 unlock_kernel(); 352 unlock_kernel();
353 if (!err)
354 err = fat_flush_inodes(inode->i_sb, dir, inode);
351 355
352 return err; 356 return err;
353} 357}
@@ -401,6 +405,7 @@ static int msdos_mkdir(struct inode *dir, struct dentry *dentry, int mode)
401 d_instantiate(dentry, inode); 405 d_instantiate(dentry, inode);
402 406
403 unlock_kernel(); 407 unlock_kernel();
408 fat_flush_inodes(sb, dir, inode);
404 return 0; 409 return 0;
405 410
406out_free: 411out_free:
@@ -430,6 +435,8 @@ static int msdos_unlink(struct inode *dir, struct dentry *dentry)
430 fat_detach(inode); 435 fat_detach(inode);
431out: 436out:
432 unlock_kernel(); 437 unlock_kernel();
438 if (!err)
439 err = fat_flush_inodes(inode->i_sb, dir, inode);
433 440
434 return err; 441 return err;
435} 442}
@@ -635,6 +642,8 @@ static int msdos_rename(struct inode *old_dir, struct dentry *old_dentry,
635 new_dir, new_msdos_name, new_dentry, is_hid); 642 new_dir, new_msdos_name, new_dentry, is_hid);
636out: 643out:
637 unlock_kernel(); 644 unlock_kernel();
645 if (!err)
646 err = fat_flush_inodes(old_dir->i_sb, old_dir, new_dir);
638 return err; 647 return err;
639} 648}
640 649
diff --git a/fs/namei.c b/fs/namei.c
index 432d6bc6fab0..2892e68d3a86 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -372,6 +372,30 @@ void release_open_intent(struct nameidata *nd)
372 fput(nd->intent.open.file); 372 fput(nd->intent.open.file);
373} 373}
374 374
375static inline struct dentry *
376do_revalidate(struct dentry *dentry, struct nameidata *nd)
377{
378 int status = dentry->d_op->d_revalidate(dentry, nd);
379 if (unlikely(status <= 0)) {
380 /*
381 * The dentry failed validation.
382 * If d_revalidate returned 0 attempt to invalidate
383 * the dentry otherwise d_revalidate is asking us
384 * to return a fail status.
385 */
386 if (!status) {
387 if (!d_invalidate(dentry)) {
388 dput(dentry);
389 dentry = NULL;
390 }
391 } else {
392 dput(dentry);
393 dentry = ERR_PTR(status);
394 }
395 }
396 return dentry;
397}
398
375/* 399/*
376 * Internal lookup() using the new generic dcache. 400 * Internal lookup() using the new generic dcache.
377 * SMP-safe 401 * SMP-safe
@@ -386,12 +410,9 @@ static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name,
386 if (!dentry) 410 if (!dentry)
387 dentry = d_lookup(parent, name); 411 dentry = d_lookup(parent, name);
388 412
389 if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { 413 if (dentry && dentry->d_op && dentry->d_op->d_revalidate)
390 if (!dentry->d_op->d_revalidate(dentry, nd) && !d_invalidate(dentry)) { 414 dentry = do_revalidate(dentry, nd);
391 dput(dentry); 415
392 dentry = NULL;
393 }
394 }
395 return dentry; 416 return dentry;
396} 417}
397 418
@@ -484,10 +505,9 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
484 */ 505 */
485 mutex_unlock(&dir->i_mutex); 506 mutex_unlock(&dir->i_mutex);
486 if (result->d_op && result->d_op->d_revalidate) { 507 if (result->d_op && result->d_op->d_revalidate) {
487 if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) { 508 result = do_revalidate(result, nd);
488 dput(result); 509 if (!result)
489 result = ERR_PTR(-ENOENT); 510 result = ERR_PTR(-ENOENT);
490 }
491 } 511 }
492 return result; 512 return result;
493} 513}
@@ -498,18 +518,20 @@ static int __emul_lookup_dentry(const char *, struct nameidata *);
498static __always_inline int 518static __always_inline int
499walk_init_root(const char *name, struct nameidata *nd) 519walk_init_root(const char *name, struct nameidata *nd)
500{ 520{
501 read_lock(&current->fs->lock); 521 struct fs_struct *fs = current->fs;
502 if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) { 522
503 nd->mnt = mntget(current->fs->altrootmnt); 523 read_lock(&fs->lock);
504 nd->dentry = dget(current->fs->altroot); 524 if (fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
505 read_unlock(&current->fs->lock); 525 nd->mnt = mntget(fs->altrootmnt);
526 nd->dentry = dget(fs->altroot);
527 read_unlock(&fs->lock);
506 if (__emul_lookup_dentry(name,nd)) 528 if (__emul_lookup_dentry(name,nd))
507 return 0; 529 return 0;
508 read_lock(&current->fs->lock); 530 read_lock(&fs->lock);
509 } 531 }
510 nd->mnt = mntget(current->fs->rootmnt); 532 nd->mnt = mntget(fs->rootmnt);
511 nd->dentry = dget(current->fs->root); 533 nd->dentry = dget(fs->root);
512 read_unlock(&current->fs->lock); 534 read_unlock(&fs->lock);
513 return 1; 535 return 1;
514} 536}
515 537
@@ -704,17 +726,19 @@ int follow_down(struct vfsmount **mnt, struct dentry **dentry)
704 726
705static __always_inline void follow_dotdot(struct nameidata *nd) 727static __always_inline void follow_dotdot(struct nameidata *nd)
706{ 728{
729 struct fs_struct *fs = current->fs;
730
707 while(1) { 731 while(1) {
708 struct vfsmount *parent; 732 struct vfsmount *parent;
709 struct dentry *old = nd->dentry; 733 struct dentry *old = nd->dentry;
710 734
711 read_lock(&current->fs->lock); 735 read_lock(&fs->lock);
712 if (nd->dentry == current->fs->root && 736 if (nd->dentry == fs->root &&
713 nd->mnt == current->fs->rootmnt) { 737 nd->mnt == fs->rootmnt) {
714 read_unlock(&current->fs->lock); 738 read_unlock(&fs->lock);
715 break; 739 break;
716 } 740 }
717 read_unlock(&current->fs->lock); 741 read_unlock(&fs->lock);
718 spin_lock(&dcache_lock); 742 spin_lock(&dcache_lock);
719 if (nd->dentry != nd->mnt->mnt_root) { 743 if (nd->dentry != nd->mnt->mnt_root) {
720 nd->dentry = dget(nd->dentry->d_parent); 744 nd->dentry = dget(nd->dentry->d_parent);
@@ -767,12 +791,12 @@ need_lookup:
767 goto done; 791 goto done;
768 792
769need_revalidate: 793need_revalidate:
770 if (dentry->d_op->d_revalidate(dentry, nd)) 794 dentry = do_revalidate(dentry, nd);
771 goto done; 795 if (!dentry)
772 if (d_invalidate(dentry)) 796 goto need_lookup;
773 goto done; 797 if (IS_ERR(dentry))
774 dput(dentry); 798 goto fail;
775 goto need_lookup; 799 goto done;
776 800
777fail: 801fail:
778 return PTR_ERR(dentry); 802 return PTR_ERR(dentry);
@@ -1022,15 +1046,17 @@ static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
1022 struct vfsmount *old_mnt = nd->mnt; 1046 struct vfsmount *old_mnt = nd->mnt;
1023 struct qstr last = nd->last; 1047 struct qstr last = nd->last;
1024 int last_type = nd->last_type; 1048 int last_type = nd->last_type;
1049 struct fs_struct *fs = current->fs;
1050
1025 /* 1051 /*
1026 * NAME was not found in alternate root or it's a directory. Try to find 1052 * NAME was not found in alternate root or it's a directory.
1027 * it in the normal root: 1053 * Try to find it in the normal root:
1028 */ 1054 */
1029 nd->last_type = LAST_ROOT; 1055 nd->last_type = LAST_ROOT;
1030 read_lock(&current->fs->lock); 1056 read_lock(&fs->lock);
1031 nd->mnt = mntget(current->fs->rootmnt); 1057 nd->mnt = mntget(fs->rootmnt);
1032 nd->dentry = dget(current->fs->root); 1058 nd->dentry = dget(fs->root);
1033 read_unlock(&current->fs->lock); 1059 read_unlock(&fs->lock);
1034 if (path_walk(name, nd) == 0) { 1060 if (path_walk(name, nd) == 0) {
1035 if (nd->dentry->d_inode) { 1061 if (nd->dentry->d_inode) {
1036 dput(old_dentry); 1062 dput(old_dentry);
@@ -1054,6 +1080,7 @@ void set_fs_altroot(void)
1054 struct vfsmount *mnt = NULL, *oldmnt; 1080 struct vfsmount *mnt = NULL, *oldmnt;
1055 struct dentry *dentry = NULL, *olddentry; 1081 struct dentry *dentry = NULL, *olddentry;
1056 int err; 1082 int err;
1083 struct fs_struct *fs = current->fs;
1057 1084
1058 if (!emul) 1085 if (!emul)
1059 goto set_it; 1086 goto set_it;
@@ -1063,12 +1090,12 @@ void set_fs_altroot(void)
1063 dentry = nd.dentry; 1090 dentry = nd.dentry;
1064 } 1091 }
1065set_it: 1092set_it:
1066 write_lock(&current->fs->lock); 1093 write_lock(&fs->lock);
1067 oldmnt = current->fs->altrootmnt; 1094 oldmnt = fs->altrootmnt;
1068 olddentry = current->fs->altroot; 1095 olddentry = fs->altroot;
1069 current->fs->altrootmnt = mnt; 1096 fs->altrootmnt = mnt;
1070 current->fs->altroot = dentry; 1097 fs->altroot = dentry;
1071 write_unlock(&current->fs->lock); 1098 write_unlock(&fs->lock);
1072 if (olddentry) { 1099 if (olddentry) {
1073 dput(olddentry); 1100 dput(olddentry);
1074 mntput(oldmnt); 1101 mntput(oldmnt);
@@ -1082,29 +1109,30 @@ static int fastcall do_path_lookup(int dfd, const char *name,
1082 int retval = 0; 1109 int retval = 0;
1083 int fput_needed; 1110 int fput_needed;
1084 struct file *file; 1111 struct file *file;
1112 struct fs_struct *fs = current->fs;
1085 1113
1086 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1114 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1087 nd->flags = flags; 1115 nd->flags = flags;
1088 nd->depth = 0; 1116 nd->depth = 0;
1089 1117
1090 if (*name=='/') { 1118 if (*name=='/') {
1091 read_lock(&current->fs->lock); 1119 read_lock(&fs->lock);
1092 if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) { 1120 if (fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
1093 nd->mnt = mntget(current->fs->altrootmnt); 1121 nd->mnt = mntget(fs->altrootmnt);
1094 nd->dentry = dget(current->fs->altroot); 1122 nd->dentry = dget(fs->altroot);
1095 read_unlock(&current->fs->lock); 1123 read_unlock(&fs->lock);
1096 if (__emul_lookup_dentry(name,nd)) 1124 if (__emul_lookup_dentry(name,nd))
1097 goto out; /* found in altroot */ 1125 goto out; /* found in altroot */
1098 read_lock(&current->fs->lock); 1126 read_lock(&fs->lock);
1099 } 1127 }
1100 nd->mnt = mntget(current->fs->rootmnt); 1128 nd->mnt = mntget(fs->rootmnt);
1101 nd->dentry = dget(current->fs->root); 1129 nd->dentry = dget(fs->root);
1102 read_unlock(&current->fs->lock); 1130 read_unlock(&fs->lock);
1103 } else if (dfd == AT_FDCWD) { 1131 } else if (dfd == AT_FDCWD) {
1104 read_lock(&current->fs->lock); 1132 read_lock(&fs->lock);
1105 nd->mnt = mntget(current->fs->pwdmnt); 1133 nd->mnt = mntget(fs->pwdmnt);
1106 nd->dentry = dget(current->fs->pwd); 1134 nd->dentry = dget(fs->pwd);
1107 read_unlock(&current->fs->lock); 1135 read_unlock(&fs->lock);
1108 } else { 1136 } else {
1109 struct dentry *dentry; 1137 struct dentry *dentry;
1110 1138
@@ -2370,7 +2398,8 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
2370 dput(new_dentry); 2398 dput(new_dentry);
2371 } 2399 }
2372 if (!error) 2400 if (!error)
2373 d_move(old_dentry,new_dentry); 2401 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
2402 d_move(old_dentry,new_dentry);
2374 return error; 2403 return error;
2375} 2404}
2376 2405
@@ -2393,8 +2422,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
2393 else 2422 else
2394 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 2423 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
2395 if (!error) { 2424 if (!error) {
2396 /* The following d_move() should become unconditional */ 2425 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
2397 if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME))
2398 d_move(old_dentry, new_dentry); 2426 d_move(old_dentry, new_dentry);
2399 } 2427 }
2400 if (target) 2428 if (target)
diff --git a/fs/namespace.c b/fs/namespace.c
index fa7ed6a9fc2d..6ede3a539ed8 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -13,10 +13,12 @@
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <linux/smp_lock.h> 14#include <linux/smp_lock.h>
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/kernel.h>
16#include <linux/quotaops.h> 17#include <linux/quotaops.h>
17#include <linux/acct.h> 18#include <linux/acct.h>
18#include <linux/capability.h> 19#include <linux/capability.h>
19#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/sysfs.h>
20#include <linux/seq_file.h> 22#include <linux/seq_file.h>
21#include <linux/namespace.h> 23#include <linux/namespace.h>
22#include <linux/namei.h> 24#include <linux/namei.h>
@@ -28,15 +30,6 @@
28 30
29extern int __init init_rootfs(void); 31extern int __init init_rootfs(void);
30 32
31#ifdef CONFIG_SYSFS
32extern int __init sysfs_init(void);
33#else
34static inline int sysfs_init(void)
35{
36 return 0;
37}
38#endif
39
40/* spinlock for vfsmount related operations, inplace of dcache_lock */ 33/* spinlock for vfsmount related operations, inplace of dcache_lock */
41__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); 34__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
42 35
@@ -1821,6 +1814,7 @@ void __init mnt_init(unsigned long mempages)
1821 struct list_head *d; 1814 struct list_head *d;
1822 unsigned int nr_hash; 1815 unsigned int nr_hash;
1823 int i; 1816 int i;
1817 int err;
1824 1818
1825 init_rwsem(&namespace_sem); 1819 init_rwsem(&namespace_sem);
1826 1820
@@ -1861,8 +1855,14 @@ void __init mnt_init(unsigned long mempages)
1861 d++; 1855 d++;
1862 i--; 1856 i--;
1863 } while (i); 1857 } while (i);
1864 sysfs_init(); 1858 err = sysfs_init();
1865 subsystem_register(&fs_subsys); 1859 if (err)
1860 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
1861 __FUNCTION__, err);
1862 err = subsystem_register(&fs_subsys);
1863 if (err)
1864 printk(KERN_WARNING "%s: subsystem_register error: %d\n",
1865 __FUNCTION__, err);
1866 init_rootfs(); 1866 init_rootfs();
1867 init_mount_tree(); 1867 init_mount_tree();
1868} 1868}
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 1ddf77b0b825..42e3bef270c9 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -81,8 +81,7 @@ static int init_inodecache(void)
81 81
82static void destroy_inodecache(void) 82static void destroy_inodecache(void)
83{ 83{
84 if (kmem_cache_destroy(ncp_inode_cachep)) 84 kmem_cache_destroy(ncp_inode_cachep);
85 printk(KERN_INFO "ncp_inode_cache: not all structures were freed\n");
86} 85}
87 86
88static int ncp_remount(struct super_block *sb, int *flags, char* data) 87static int ncp_remount(struct super_block *sb, int *flags, char* data)
@@ -224,7 +223,6 @@ static void ncp_set_attr(struct inode *inode, struct ncp_entry_info *nwinfo)
224 inode->i_nlink = 1; 223 inode->i_nlink = 1;
225 inode->i_uid = server->m.uid; 224 inode->i_uid = server->m.uid;
226 inode->i_gid = server->m.gid; 225 inode->i_gid = server->m.gid;
227 inode->i_blksize = NCP_BLOCK_SIZE;
228 226
229 ncp_update_dates(inode, &nwinfo->i); 227 ncp_update_dates(inode, &nwinfo->i);
230 ncp_update_inode(inode, nwinfo); 228 ncp_update_inode(inode, nwinfo);
@@ -411,11 +409,10 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
411#endif 409#endif
412 struct ncp_entry_info finfo; 410 struct ncp_entry_info finfo;
413 411
414 server = kmalloc(sizeof(struct ncp_server), GFP_KERNEL); 412 server = kzalloc(sizeof(struct ncp_server), GFP_KERNEL);
415 if (!server) 413 if (!server)
416 return -ENOMEM; 414 return -ENOMEM;
417 sb->s_fs_info = server; 415 sb->s_fs_info = server;
418 memset(server, 0, sizeof(struct ncp_server));
419 416
420 error = -EFAULT; 417 error = -EFAULT;
421 if (raw_data == NULL) 418 if (raw_data == NULL)
diff --git a/fs/ncpfs/symlink.c b/fs/ncpfs/symlink.c
index ca92c2406635..e3d26c1bd105 100644
--- a/fs/ncpfs/symlink.c
+++ b/fs/ncpfs/symlink.c
@@ -48,7 +48,7 @@ static int ncp_symlink_readpage(struct file *file, struct page *page)
48 char *buf = kmap(page); 48 char *buf = kmap(page);
49 49
50 error = -ENOMEM; 50 error = -ENOMEM;
51 rawlink=(char *)kmalloc(NCP_MAX_SYMLINK_SIZE, GFP_KERNEL); 51 rawlink = kmalloc(NCP_MAX_SYMLINK_SIZE, GFP_KERNEL);
52 if (!rawlink) 52 if (!rawlink)
53 goto fail; 53 goto fail;
54 54
@@ -126,7 +126,7 @@ int ncp_symlink(struct inode *dir, struct dentry *dentry, const char *symname) {
126 /* EPERM is returned by VFS if symlink procedure does not exist */ 126 /* EPERM is returned by VFS if symlink procedure does not exist */
127 return -EPERM; 127 return -EPERM;
128 128
129 rawlink=(char *)kmalloc(NCP_MAX_SYMLINK_SIZE, GFP_KERNEL); 129 rawlink = kmalloc(NCP_MAX_SYMLINK_SIZE, GFP_KERNEL);
130 if (!rawlink) 130 if (!rawlink)
131 return -ENOMEM; 131 return -ENOMEM;
132 132
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 0b572a0c1967..f4580b44eef4 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -4,9 +4,9 @@
4 4
5obj-$(CONFIG_NFS_FS) += nfs.o 5obj-$(CONFIG_NFS_FS) += nfs.o
6 6
7nfs-y := dir.o file.o inode.o super.o nfs2xdr.o pagelist.o \ 7nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \
8 proc.o read.o symlink.o unlink.o write.o \ 8 pagelist.o proc.o read.o symlink.o unlink.o \
9 namespace.o 9 write.o namespace.o
10nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o 10nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o
11nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o 11nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o
12nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o 12nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index fe0a6b8ac149..a3ee11364db0 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -19,6 +19,7 @@
19 19
20#include "nfs4_fs.h" 20#include "nfs4_fs.h"
21#include "callback.h" 21#include "callback.h"
22#include "internal.h"
22 23
23#define NFSDBG_FACILITY NFSDBG_CALLBACK 24#define NFSDBG_FACILITY NFSDBG_CALLBACK
24 25
@@ -36,6 +37,21 @@ static struct svc_program nfs4_callback_program;
36 37
37unsigned int nfs_callback_set_tcpport; 38unsigned int nfs_callback_set_tcpport;
38unsigned short nfs_callback_tcpport; 39unsigned short nfs_callback_tcpport;
40static const int nfs_set_port_min = 0;
41static const int nfs_set_port_max = 65535;
42
43static int param_set_port(const char *val, struct kernel_param *kp)
44{
45 char *endp;
46 int num = simple_strtol(val, &endp, 0);
47 if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max)
48 return -EINVAL;
49 *((int *)kp->arg) = num;
50 return 0;
51}
52
53module_param_call(callback_tcpport, param_set_port, param_get_int,
54 &nfs_callback_set_tcpport, 0644);
39 55
40/* 56/*
41 * This is the callback kernel thread. 57 * This is the callback kernel thread.
@@ -134,10 +150,8 @@ out_err:
134/* 150/*
135 * Kill the server process if it is not already up. 151 * Kill the server process if it is not already up.
136 */ 152 */
137int nfs_callback_down(void) 153void nfs_callback_down(void)
138{ 154{
139 int ret = 0;
140
141 lock_kernel(); 155 lock_kernel();
142 mutex_lock(&nfs_callback_mutex); 156 mutex_lock(&nfs_callback_mutex);
143 nfs_callback_info.users--; 157 nfs_callback_info.users--;
@@ -149,20 +163,19 @@ int nfs_callback_down(void)
149 } while (wait_for_completion_timeout(&nfs_callback_info.stopped, 5*HZ) == 0); 163 } while (wait_for_completion_timeout(&nfs_callback_info.stopped, 5*HZ) == 0);
150 mutex_unlock(&nfs_callback_mutex); 164 mutex_unlock(&nfs_callback_mutex);
151 unlock_kernel(); 165 unlock_kernel();
152 return ret;
153} 166}
154 167
155static int nfs_callback_authenticate(struct svc_rqst *rqstp) 168static int nfs_callback_authenticate(struct svc_rqst *rqstp)
156{ 169{
157 struct in_addr *addr = &rqstp->rq_addr.sin_addr; 170 struct sockaddr_in *addr = &rqstp->rq_addr;
158 struct nfs4_client *clp; 171 struct nfs_client *clp;
159 172
160 /* Don't talk to strangers */ 173 /* Don't talk to strangers */
161 clp = nfs4_find_client(addr); 174 clp = nfs_find_client(addr, 4);
162 if (clp == NULL) 175 if (clp == NULL)
163 return SVC_DROP; 176 return SVC_DROP;
164 dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr)); 177 dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr->sin_addr));
165 nfs4_put_client(clp); 178 nfs_put_client(clp);
166 switch (rqstp->rq_authop->flavour) { 179 switch (rqstp->rq_authop->flavour) {
167 case RPC_AUTH_NULL: 180 case RPC_AUTH_NULL:
168 if (rqstp->rq_proc != CB_NULL) 181 if (rqstp->rq_proc != CB_NULL)
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index b252e7fe53a5..5676163d26e8 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -62,8 +62,13 @@ struct cb_recallargs {
62extern unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res); 62extern unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
63extern unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy); 63extern unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy);
64 64
65#ifdef CONFIG_NFS_V4
65extern int nfs_callback_up(void); 66extern int nfs_callback_up(void);
66extern int nfs_callback_down(void); 67extern void nfs_callback_down(void);
68#else
69#define nfs_callback_up() (0)
70#define nfs_callback_down() do {} while(0)
71#endif
67 72
68extern unsigned int nfs_callback_set_tcpport; 73extern unsigned int nfs_callback_set_tcpport;
69extern unsigned short nfs_callback_tcpport; 74extern unsigned short nfs_callback_tcpport;
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 7719483ecdfc..97cf8f71451f 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -10,19 +10,20 @@
10#include "nfs4_fs.h" 10#include "nfs4_fs.h"
11#include "callback.h" 11#include "callback.h"
12#include "delegation.h" 12#include "delegation.h"
13#include "internal.h"
13 14
14#define NFSDBG_FACILITY NFSDBG_CALLBACK 15#define NFSDBG_FACILITY NFSDBG_CALLBACK
15 16
16unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res) 17unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res)
17{ 18{
18 struct nfs4_client *clp; 19 struct nfs_client *clp;
19 struct nfs_delegation *delegation; 20 struct nfs_delegation *delegation;
20 struct nfs_inode *nfsi; 21 struct nfs_inode *nfsi;
21 struct inode *inode; 22 struct inode *inode;
22 23
23 res->bitmap[0] = res->bitmap[1] = 0; 24 res->bitmap[0] = res->bitmap[1] = 0;
24 res->status = htonl(NFS4ERR_BADHANDLE); 25 res->status = htonl(NFS4ERR_BADHANDLE);
25 clp = nfs4_find_client(&args->addr->sin_addr); 26 clp = nfs_find_client(args->addr, 4);
26 if (clp == NULL) 27 if (clp == NULL)
27 goto out; 28 goto out;
28 inode = nfs_delegation_find_inode(clp, &args->fh); 29 inode = nfs_delegation_find_inode(clp, &args->fh);
@@ -48,7 +49,7 @@ out_iput:
48 up_read(&nfsi->rwsem); 49 up_read(&nfsi->rwsem);
49 iput(inode); 50 iput(inode);
50out_putclient: 51out_putclient:
51 nfs4_put_client(clp); 52 nfs_put_client(clp);
52out: 53out:
53 dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res->status)); 54 dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res->status));
54 return res->status; 55 return res->status;
@@ -56,12 +57,12 @@ out:
56 57
57unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy) 58unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
58{ 59{
59 struct nfs4_client *clp; 60 struct nfs_client *clp;
60 struct inode *inode; 61 struct inode *inode;
61 unsigned res; 62 unsigned res;
62 63
63 res = htonl(NFS4ERR_BADHANDLE); 64 res = htonl(NFS4ERR_BADHANDLE);
64 clp = nfs4_find_client(&args->addr->sin_addr); 65 clp = nfs_find_client(args->addr, 4);
65 if (clp == NULL) 66 if (clp == NULL)
66 goto out; 67 goto out;
67 inode = nfs_delegation_find_inode(clp, &args->fh); 68 inode = nfs_delegation_find_inode(clp, &args->fh);
@@ -80,7 +81,7 @@ unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
80 } 81 }
81 iput(inode); 82 iput(inode);
82out_putclient: 83out_putclient:
83 nfs4_put_client(clp); 84 nfs_put_client(clp);
84out: 85out:
85 dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res)); 86 dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res));
86 return res; 87 return res;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
new file mode 100644
index 000000000000..ec1938d4b814
--- /dev/null
+++ b/fs/nfs/client.c
@@ -0,0 +1,1448 @@
1/* client.c: NFS client sharing and management code
2 *
3 * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12
13#include <linux/config.h>
14#include <linux/module.h>
15#include <linux/init.h>
16
17#include <linux/time.h>
18#include <linux/kernel.h>
19#include <linux/mm.h>
20#include <linux/string.h>
21#include <linux/stat.h>
22#include <linux/errno.h>
23#include <linux/unistd.h>
24#include <linux/sunrpc/clnt.h>
25#include <linux/sunrpc/stats.h>
26#include <linux/sunrpc/metrics.h>
27#include <linux/nfs_fs.h>
28#include <linux/nfs_mount.h>
29#include <linux/nfs4_mount.h>
30#include <linux/lockd/bind.h>
31#include <linux/smp_lock.h>
32#include <linux/seq_file.h>
33#include <linux/mount.h>
34#include <linux/nfs_idmap.h>
35#include <linux/vfs.h>
36#include <linux/inet.h>
37#include <linux/nfs_xdr.h>
38
39#include <asm/system.h>
40
41#include "nfs4_fs.h"
42#include "callback.h"
43#include "delegation.h"
44#include "iostat.h"
45#include "internal.h"
46
47#define NFSDBG_FACILITY NFSDBG_CLIENT
48
49static DEFINE_SPINLOCK(nfs_client_lock);
50static LIST_HEAD(nfs_client_list);
51static LIST_HEAD(nfs_volume_list);
52static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
53
54/*
55 * RPC cruft for NFS
56 */
57static struct rpc_version *nfs_version[5] = {
58 [2] = &nfs_version2,
59#ifdef CONFIG_NFS_V3
60 [3] = &nfs_version3,
61#endif
62#ifdef CONFIG_NFS_V4
63 [4] = &nfs_version4,
64#endif
65};
66
67struct rpc_program nfs_program = {
68 .name = "nfs",
69 .number = NFS_PROGRAM,
70 .nrvers = ARRAY_SIZE(nfs_version),
71 .version = nfs_version,
72 .stats = &nfs_rpcstat,
73 .pipe_dir_name = "/nfs",
74};
75
76struct rpc_stat nfs_rpcstat = {
77 .program = &nfs_program
78};
79
80
81#ifdef CONFIG_NFS_V3_ACL
82static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program };
83static struct rpc_version * nfsacl_version[] = {
84 [3] = &nfsacl_version3,
85};
86
87struct rpc_program nfsacl_program = {
88 .name = "nfsacl",
89 .number = NFS_ACL_PROGRAM,
90 .nrvers = ARRAY_SIZE(nfsacl_version),
91 .version = nfsacl_version,
92 .stats = &nfsacl_rpcstat,
93};
94#endif /* CONFIG_NFS_V3_ACL */
95
96/*
97 * Allocate a shared client record
98 *
99 * Since these are allocated/deallocated very rarely, we don't
100 * bother putting them in a slab cache...
101 */
102static struct nfs_client *nfs_alloc_client(const char *hostname,
103 const struct sockaddr_in *addr,
104 int nfsversion)
105{
106 struct nfs_client *clp;
107 int error;
108
109 if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
110 goto error_0;
111
112 error = rpciod_up();
113 if (error < 0) {
114 dprintk("%s: couldn't start rpciod! Error = %d\n",
115 __FUNCTION__, error);
116 goto error_1;
117 }
118 __set_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
119
120 if (nfsversion == 4) {
121 if (nfs_callback_up() < 0)
122 goto error_2;
123 __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state);
124 }
125
126 atomic_set(&clp->cl_count, 1);
127 clp->cl_cons_state = NFS_CS_INITING;
128
129 clp->cl_nfsversion = nfsversion;
130 memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr));
131
132 if (hostname) {
133 clp->cl_hostname = kstrdup(hostname, GFP_KERNEL);
134 if (!clp->cl_hostname)
135 goto error_3;
136 }
137
138 INIT_LIST_HEAD(&clp->cl_superblocks);
139 clp->cl_rpcclient = ERR_PTR(-EINVAL);
140
141#ifdef CONFIG_NFS_V4
142 init_rwsem(&clp->cl_sem);
143 INIT_LIST_HEAD(&clp->cl_delegations);
144 INIT_LIST_HEAD(&clp->cl_state_owners);
145 INIT_LIST_HEAD(&clp->cl_unused);
146 spin_lock_init(&clp->cl_lock);
147 INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
148 rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
149 clp->cl_boot_time = CURRENT_TIME;
150 clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
151#endif
152
153 return clp;
154
155error_3:
156 if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
157 nfs_callback_down();
158error_2:
159 rpciod_down();
160 __clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
161error_1:
162 kfree(clp);
163error_0:
164 return NULL;
165}
166
167static void nfs4_shutdown_client(struct nfs_client *clp)
168{
169#ifdef CONFIG_NFS_V4
170 if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
171 nfs4_kill_renewd(clp);
172 while (!list_empty(&clp->cl_unused)) {
173 struct nfs4_state_owner *sp;
174
175 sp = list_entry(clp->cl_unused.next,
176 struct nfs4_state_owner,
177 so_list);
178 list_del(&sp->so_list);
179 kfree(sp);
180 }
181 BUG_ON(!list_empty(&clp->cl_state_owners));
182 if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
183 nfs_idmap_delete(clp);
184#endif
185}
186
187/*
188 * Destroy a shared client record
189 */
190static void nfs_free_client(struct nfs_client *clp)
191{
192 dprintk("--> nfs_free_client(%d)\n", clp->cl_nfsversion);
193
194 nfs4_shutdown_client(clp);
195
196 /* -EIO all pending I/O */
197 if (!IS_ERR(clp->cl_rpcclient))
198 rpc_shutdown_client(clp->cl_rpcclient);
199
200 if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
201 nfs_callback_down();
202
203 if (__test_and_clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state))
204 rpciod_down();
205
206 kfree(clp->cl_hostname);
207 kfree(clp);
208
209 dprintk("<-- nfs_free_client()\n");
210}
211
212/*
213 * Release a reference to a shared client record
214 */
215void nfs_put_client(struct nfs_client *clp)
216{
217 if (!clp)
218 return;
219
220 dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count));
221
222 if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) {
223 list_del(&clp->cl_share_link);
224 spin_unlock(&nfs_client_lock);
225
226 BUG_ON(!list_empty(&clp->cl_superblocks));
227
228 nfs_free_client(clp);
229 }
230}
231
232/*
233 * Find a client by address
234 * - caller must hold nfs_client_lock
235 */
236static struct nfs_client *__nfs_find_client(const struct sockaddr_in *addr, int nfsversion)
237{
238 struct nfs_client *clp;
239
240 list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
241 /* Different NFS versions cannot share the same nfs_client */
242 if (clp->cl_nfsversion != nfsversion)
243 continue;
244
245 if (memcmp(&clp->cl_addr.sin_addr, &addr->sin_addr,
246 sizeof(clp->cl_addr.sin_addr)) != 0)
247 continue;
248
249 if (clp->cl_addr.sin_port == addr->sin_port)
250 goto found;
251 }
252
253 return NULL;
254
255found:
256 atomic_inc(&clp->cl_count);
257 return clp;
258}
259
260/*
261 * Find a client by IP address and protocol version
262 * - returns NULL if no such client
263 */
264struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversion)
265{
266 struct nfs_client *clp;
267
268 spin_lock(&nfs_client_lock);
269 clp = __nfs_find_client(addr, nfsversion);
270 spin_unlock(&nfs_client_lock);
271
272 BUG_ON(clp && clp->cl_cons_state == 0);
273
274 return clp;
275}
276
277/*
278 * Look up a client by IP address and protocol version
279 * - creates a new record if one doesn't yet exist
280 */
281static struct nfs_client *nfs_get_client(const char *hostname,
282 const struct sockaddr_in *addr,
283 int nfsversion)
284{
285 struct nfs_client *clp, *new = NULL;
286 int error;
287
288 dprintk("--> nfs_get_client(%s,"NIPQUAD_FMT":%d,%d)\n",
289 hostname ?: "", NIPQUAD(addr->sin_addr),
290 addr->sin_port, nfsversion);
291
292 /* see if the client already exists */
293 do {
294 spin_lock(&nfs_client_lock);
295
296 clp = __nfs_find_client(addr, nfsversion);
297 if (clp)
298 goto found_client;
299 if (new)
300 goto install_client;
301
302 spin_unlock(&nfs_client_lock);
303
304 new = nfs_alloc_client(hostname, addr, nfsversion);
305 } while (new);
306
307 return ERR_PTR(-ENOMEM);
308
309 /* install a new client and return with it unready */
310install_client:
311 clp = new;
312 list_add(&clp->cl_share_link, &nfs_client_list);
313 spin_unlock(&nfs_client_lock);
314 dprintk("--> nfs_get_client() = %p [new]\n", clp);
315 return clp;
316
317 /* found an existing client
318 * - make sure it's ready before returning
319 */
320found_client:
321 spin_unlock(&nfs_client_lock);
322
323 if (new)
324 nfs_free_client(new);
325
326 if (clp->cl_cons_state == NFS_CS_INITING) {
327 DECLARE_WAITQUEUE(myself, current);
328
329 add_wait_queue(&nfs_client_active_wq, &myself);
330
331 for (;;) {
332 set_current_state(TASK_INTERRUPTIBLE);
333 if (signal_pending(current) ||
334 clp->cl_cons_state > NFS_CS_READY)
335 break;
336 schedule();
337 }
338
339 remove_wait_queue(&nfs_client_active_wq, &myself);
340
341 if (signal_pending(current)) {
342 nfs_put_client(clp);
343 return ERR_PTR(-ERESTARTSYS);
344 }
345 }
346
347 if (clp->cl_cons_state < NFS_CS_READY) {
348 error = clp->cl_cons_state;
349 nfs_put_client(clp);
350 return ERR_PTR(error);
351 }
352
353 BUG_ON(clp->cl_cons_state != NFS_CS_READY);
354
355 dprintk("--> nfs_get_client() = %p [share]\n", clp);
356 return clp;
357}
358
359/*
360 * Mark a server as ready or failed
361 */
362static void nfs_mark_client_ready(struct nfs_client *clp, int state)
363{
364 clp->cl_cons_state = state;
365 wake_up_all(&nfs_client_active_wq);
366}
367
368/*
369 * Initialise the timeout values for a connection
370 */
371static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
372 unsigned int timeo, unsigned int retrans)
373{
374 to->to_initval = timeo * HZ / 10;
375 to->to_retries = retrans;
376 if (!to->to_retries)
377 to->to_retries = 2;
378
379 switch (proto) {
380 case IPPROTO_TCP:
381 if (!to->to_initval)
382 to->to_initval = 60 * HZ;
383 if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
384 to->to_initval = NFS_MAX_TCP_TIMEOUT;
385 to->to_increment = to->to_initval;
386 to->to_maxval = to->to_initval + (to->to_increment * to->to_retries);
387 to->to_exponential = 0;
388 break;
389 case IPPROTO_UDP:
390 default:
391 if (!to->to_initval)
392 to->to_initval = 11 * HZ / 10;
393 if (to->to_initval > NFS_MAX_UDP_TIMEOUT)
394 to->to_initval = NFS_MAX_UDP_TIMEOUT;
395 to->to_maxval = NFS_MAX_UDP_TIMEOUT;
396 to->to_exponential = 1;
397 break;
398 }
399}
400
401/*
402 * Create an RPC client handle
403 */
404static int nfs_create_rpc_client(struct nfs_client *clp, int proto,
405 unsigned int timeo,
406 unsigned int retrans,
407 rpc_authflavor_t flavor)
408{
409 struct rpc_timeout timeparms;
410 struct rpc_clnt *clnt = NULL;
411 struct rpc_create_args args = {
412 .protocol = proto,
413 .address = (struct sockaddr *)&clp->cl_addr,
414 .addrsize = sizeof(clp->cl_addr),
415 .timeout = &timeparms,
416 .servername = clp->cl_hostname,
417 .program = &nfs_program,
418 .version = clp->rpc_ops->version,
419 .authflavor = flavor,
420 };
421
422 if (!IS_ERR(clp->cl_rpcclient))
423 return 0;
424
425 nfs_init_timeout_values(&timeparms, proto, timeo, retrans);
426 clp->retrans_timeo = timeparms.to_initval;
427 clp->retrans_count = timeparms.to_retries;
428
429 clnt = rpc_create(&args);
430 if (IS_ERR(clnt)) {
431 dprintk("%s: cannot create RPC client. Error = %ld\n",
432 __FUNCTION__, PTR_ERR(clnt));
433 return PTR_ERR(clnt);
434 }
435
436 clp->cl_rpcclient = clnt;
437 return 0;
438}
439
440/*
441 * Version 2 or 3 client destruction
442 */
443static void nfs_destroy_server(struct nfs_server *server)
444{
445 if (!IS_ERR(server->client_acl))
446 rpc_shutdown_client(server->client_acl);
447
448 if (!(server->flags & NFS_MOUNT_NONLM))
449 lockd_down(); /* release rpc.lockd */
450}
451
452/*
453 * Version 2 or 3 lockd setup
454 */
455static int nfs_start_lockd(struct nfs_server *server)
456{
457 int error = 0;
458
459 if (server->nfs_client->cl_nfsversion > 3)
460 goto out;
461 if (server->flags & NFS_MOUNT_NONLM)
462 goto out;
463 error = lockd_up();
464 if (error < 0)
465 server->flags |= NFS_MOUNT_NONLM;
466 else
467 server->destroy = nfs_destroy_server;
468out:
469 return error;
470}
471
472/*
473 * Initialise an NFSv3 ACL client connection
474 */
475#ifdef CONFIG_NFS_V3_ACL
476static void nfs_init_server_aclclient(struct nfs_server *server)
477{
478 if (server->nfs_client->cl_nfsversion != 3)
479 goto out_noacl;
480 if (server->flags & NFS_MOUNT_NOACL)
481 goto out_noacl;
482
483 server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3);
484 if (IS_ERR(server->client_acl))
485 goto out_noacl;
486
487 /* No errors! Assume that Sun nfsacls are supported */
488 server->caps |= NFS_CAP_ACLS;
489 return;
490
491out_noacl:
492 server->caps &= ~NFS_CAP_ACLS;
493}
494#else
495static inline void nfs_init_server_aclclient(struct nfs_server *server)
496{
497 server->flags &= ~NFS_MOUNT_NOACL;
498 server->caps &= ~NFS_CAP_ACLS;
499}
500#endif
501
502/*
503 * Create a general RPC client
504 */
505static int nfs_init_server_rpcclient(struct nfs_server *server, rpc_authflavor_t pseudoflavour)
506{
507 struct nfs_client *clp = server->nfs_client;
508
509 server->client = rpc_clone_client(clp->cl_rpcclient);
510 if (IS_ERR(server->client)) {
511 dprintk("%s: couldn't create rpc_client!\n", __FUNCTION__);
512 return PTR_ERR(server->client);
513 }
514
515 if (pseudoflavour != clp->cl_rpcclient->cl_auth->au_flavor) {
516 struct rpc_auth *auth;
517
518 auth = rpcauth_create(pseudoflavour, server->client);
519 if (IS_ERR(auth)) {
520 dprintk("%s: couldn't create credcache!\n", __FUNCTION__);
521 return PTR_ERR(auth);
522 }
523 }
524 server->client->cl_softrtry = 0;
525 if (server->flags & NFS_MOUNT_SOFT)
526 server->client->cl_softrtry = 1;
527
528 server->client->cl_intr = 0;
529 if (server->flags & NFS4_MOUNT_INTR)
530 server->client->cl_intr = 1;
531
532 return 0;
533}
534
535/*
536 * Initialise an NFS2 or NFS3 client
537 */
538static int nfs_init_client(struct nfs_client *clp, const struct nfs_mount_data *data)
539{
540 int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
541 int error;
542
543 if (clp->cl_cons_state == NFS_CS_READY) {
544 /* the client is already initialised */
545 dprintk("<-- nfs_init_client() = 0 [already %p]\n", clp);
546 return 0;
547 }
548
549 /* Check NFS protocol revision and initialize RPC op vector */
550 clp->rpc_ops = &nfs_v2_clientops;
551#ifdef CONFIG_NFS_V3
552 if (clp->cl_nfsversion == 3)
553 clp->rpc_ops = &nfs_v3_clientops;
554#endif
555 /*
556 * Create a client RPC handle for doing FSSTAT with UNIX auth only
557 * - RFC 2623, sec 2.3.2
558 */
559 error = nfs_create_rpc_client(clp, proto, data->timeo, data->retrans,
560 RPC_AUTH_UNIX);
561 if (error < 0)
562 goto error;
563 nfs_mark_client_ready(clp, NFS_CS_READY);
564 return 0;
565
566error:
567 nfs_mark_client_ready(clp, error);
568 dprintk("<-- nfs_init_client() = xerror %d\n", error);
569 return error;
570}
571
572/*
573 * Create a version 2 or 3 client
574 */
575static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_data *data)
576{
577 struct nfs_client *clp;
578 int error, nfsvers = 2;
579
580 dprintk("--> nfs_init_server()\n");
581
582#ifdef CONFIG_NFS_V3
583 if (data->flags & NFS_MOUNT_VER3)
584 nfsvers = 3;
585#endif
586
587 /* Allocate or find a client reference we can use */
588 clp = nfs_get_client(data->hostname, &data->addr, nfsvers);
589 if (IS_ERR(clp)) {
590 dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
591 return PTR_ERR(clp);
592 }
593
594 error = nfs_init_client(clp, data);
595 if (error < 0)
596 goto error;
597
598 server->nfs_client = clp;
599
600 /* Initialise the client representation from the mount data */
601 server->flags = data->flags & NFS_MOUNT_FLAGMASK;
602
603 if (data->rsize)
604 server->rsize = nfs_block_size(data->rsize, NULL);
605 if (data->wsize)
606 server->wsize = nfs_block_size(data->wsize, NULL);
607
608 server->acregmin = data->acregmin * HZ;
609 server->acregmax = data->acregmax * HZ;
610 server->acdirmin = data->acdirmin * HZ;
611 server->acdirmax = data->acdirmax * HZ;
612
613 /* Start lockd here, before we might error out */
614 error = nfs_start_lockd(server);
615 if (error < 0)
616 goto error;
617
618 error = nfs_init_server_rpcclient(server, data->pseudoflavor);
619 if (error < 0)
620 goto error;
621
622 server->namelen = data->namlen;
623 /* Create a client RPC handle for the NFSv3 ACL management interface */
624 nfs_init_server_aclclient(server);
625 if (clp->cl_nfsversion == 3) {
626 if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
627 server->namelen = NFS3_MAXNAMLEN;
628 server->caps |= NFS_CAP_READDIRPLUS;
629 } else {
630 if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
631 server->namelen = NFS2_MAXNAMLEN;
632 }
633
634 dprintk("<-- nfs_init_server() = 0 [new %p]\n", clp);
635 return 0;
636
637error:
638 server->nfs_client = NULL;
639 nfs_put_client(clp);
640 dprintk("<-- nfs_init_server() = xerror %d\n", error);
641 return error;
642}
643
644/*
645 * Load up the server record from information gained in an fsinfo record
646 */
647static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *fsinfo)
648{
649 unsigned long max_rpc_payload;
650
651 /* Work out a lot of parameters */
652 if (server->rsize == 0)
653 server->rsize = nfs_block_size(fsinfo->rtpref, NULL);
654 if (server->wsize == 0)
655 server->wsize = nfs_block_size(fsinfo->wtpref, NULL);
656
657 if (fsinfo->rtmax >= 512 && server->rsize > fsinfo->rtmax)
658 server->rsize = nfs_block_size(fsinfo->rtmax, NULL);
659 if (fsinfo->wtmax >= 512 && server->wsize > fsinfo->wtmax)
660 server->wsize = nfs_block_size(fsinfo->wtmax, NULL);
661
662 max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL);
663 if (server->rsize > max_rpc_payload)
664 server->rsize = max_rpc_payload;
665 if (server->rsize > NFS_MAX_FILE_IO_SIZE)
666 server->rsize = NFS_MAX_FILE_IO_SIZE;
667 server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
668 server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
669
670 if (server->wsize > max_rpc_payload)
671 server->wsize = max_rpc_payload;
672 if (server->wsize > NFS_MAX_FILE_IO_SIZE)
673 server->wsize = NFS_MAX_FILE_IO_SIZE;
674 server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
675 server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
676
677 server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
678 if (server->dtsize > PAGE_CACHE_SIZE)
679 server->dtsize = PAGE_CACHE_SIZE;
680 if (server->dtsize > server->rsize)
681 server->dtsize = server->rsize;
682
683 if (server->flags & NFS_MOUNT_NOAC) {
684 server->acregmin = server->acregmax = 0;
685 server->acdirmin = server->acdirmax = 0;
686 }
687
688 server->maxfilesize = fsinfo->maxfilesize;
689
690 /* We're airborne Set socket buffersize */
691 rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
692}
693
694/*
695 * Probe filesystem information, including the FSID on v2/v3
696 */
697static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fattr *fattr)
698{
699 struct nfs_fsinfo fsinfo;
700 struct nfs_client *clp = server->nfs_client;
701 int error;
702
703 dprintk("--> nfs_probe_fsinfo()\n");
704
705 if (clp->rpc_ops->set_capabilities != NULL) {
706 error = clp->rpc_ops->set_capabilities(server, mntfh);
707 if (error < 0)
708 goto out_error;
709 }
710
711 fsinfo.fattr = fattr;
712 nfs_fattr_init(fattr);
713 error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
714 if (error < 0)
715 goto out_error;
716
717 nfs_server_set_fsinfo(server, &fsinfo);
718
719 /* Get some general file system info */
720 if (server->namelen == 0) {
721 struct nfs_pathconf pathinfo;
722
723 pathinfo.fattr = fattr;
724 nfs_fattr_init(fattr);
725
726 if (clp->rpc_ops->pathconf(server, mntfh, &pathinfo) >= 0)
727 server->namelen = pathinfo.max_namelen;
728 }
729
730 dprintk("<-- nfs_probe_fsinfo() = 0\n");
731 return 0;
732
733out_error:
734 dprintk("nfs_probe_fsinfo: error = %d\n", -error);
735 return error;
736}
737
738/*
739 * Copy useful information when duplicating a server record
740 */
741static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source)
742{
743 target->flags = source->flags;
744 target->acregmin = source->acregmin;
745 target->acregmax = source->acregmax;
746 target->acdirmin = source->acdirmin;
747 target->acdirmax = source->acdirmax;
748 target->caps = source->caps;
749}
750
751/*
752 * Allocate and initialise a server record
753 */
754static struct nfs_server *nfs_alloc_server(void)
755{
756 struct nfs_server *server;
757
758 server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
759 if (!server)
760 return NULL;
761
762 server->client = server->client_acl = ERR_PTR(-EINVAL);
763
764 /* Zero out the NFS state stuff */
765 INIT_LIST_HEAD(&server->client_link);
766 INIT_LIST_HEAD(&server->master_link);
767
768 server->io_stats = nfs_alloc_iostats();
769 if (!server->io_stats) {
770 kfree(server);
771 return NULL;
772 }
773
774 return server;
775}
776
777/*
778 * Free up a server record
779 */
780void nfs_free_server(struct nfs_server *server)
781{
782 dprintk("--> nfs_free_server()\n");
783
784 spin_lock(&nfs_client_lock);
785 list_del(&server->client_link);
786 list_del(&server->master_link);
787 spin_unlock(&nfs_client_lock);
788
789 if (server->destroy != NULL)
790 server->destroy(server);
791 if (!IS_ERR(server->client))
792 rpc_shutdown_client(server->client);
793
794 nfs_put_client(server->nfs_client);
795
796 nfs_free_iostats(server->io_stats);
797 kfree(server);
798 nfs_release_automount_timer();
799 dprintk("<-- nfs_free_server()\n");
800}
801
802/*
803 * Create a version 2 or 3 volume record
804 * - keyed on server and FSID
805 */
806struct nfs_server *nfs_create_server(const struct nfs_mount_data *data,
807 struct nfs_fh *mntfh)
808{
809 struct nfs_server *server;
810 struct nfs_fattr fattr;
811 int error;
812
813 server = nfs_alloc_server();
814 if (!server)
815 return ERR_PTR(-ENOMEM);
816
817 /* Get a client representation */
818 error = nfs_init_server(server, data);
819 if (error < 0)
820 goto error;
821
822 BUG_ON(!server->nfs_client);
823 BUG_ON(!server->nfs_client->rpc_ops);
824 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
825
826 /* Probe the root fh to retrieve its FSID */
827 error = nfs_probe_fsinfo(server, mntfh, &fattr);
828 if (error < 0)
829 goto error;
830 if (!(fattr.valid & NFS_ATTR_FATTR)) {
831 error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr);
832 if (error < 0) {
833 dprintk("nfs_create_server: getattr error = %d\n", -error);
834 goto error;
835 }
836 }
837 memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid));
838
839 dprintk("Server FSID: %llx:%llx\n",
840 (unsigned long long) server->fsid.major,
841 (unsigned long long) server->fsid.minor);
842
843 BUG_ON(!server->nfs_client);
844 BUG_ON(!server->nfs_client->rpc_ops);
845 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
846
847 spin_lock(&nfs_client_lock);
848 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
849 list_add_tail(&server->master_link, &nfs_volume_list);
850 spin_unlock(&nfs_client_lock);
851
852 server->mount_time = jiffies;
853 return server;
854
855error:
856 nfs_free_server(server);
857 return ERR_PTR(error);
858}
859
860#ifdef CONFIG_NFS_V4
861/*
862 * Initialise an NFS4 client record
863 */
864static int nfs4_init_client(struct nfs_client *clp,
865 int proto, int timeo, int retrans,
866 rpc_authflavor_t authflavour)
867{
868 int error;
869
870 if (clp->cl_cons_state == NFS_CS_READY) {
871 /* the client is initialised already */
872 dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp);
873 return 0;
874 }
875
876 /* Check NFS protocol revision and initialize RPC op vector */
877 clp->rpc_ops = &nfs_v4_clientops;
878
879 error = nfs_create_rpc_client(clp, proto, timeo, retrans, authflavour);
880 if (error < 0)
881 goto error;
882
883 error = nfs_idmap_new(clp);
884 if (error < 0) {
885 dprintk("%s: failed to create idmapper. Error = %d\n",
886 __FUNCTION__, error);
887 goto error;
888 }
889 __set_bit(NFS_CS_IDMAP, &clp->cl_res_state);
890
891 nfs_mark_client_ready(clp, NFS_CS_READY);
892 return 0;
893
894error:
895 nfs_mark_client_ready(clp, error);
896 dprintk("<-- nfs4_init_client() = xerror %d\n", error);
897 return error;
898}
899
900/*
901 * Set up an NFS4 client
902 */
903static int nfs4_set_client(struct nfs_server *server,
904 const char *hostname, const struct sockaddr_in *addr,
905 rpc_authflavor_t authflavour,
906 int proto, int timeo, int retrans)
907{
908 struct nfs_client *clp;
909 int error;
910
911 dprintk("--> nfs4_set_client()\n");
912
913 /* Allocate or find a client reference we can use */
914 clp = nfs_get_client(hostname, addr, 4);
915 if (IS_ERR(clp)) {
916 error = PTR_ERR(clp);
917 goto error;
918 }
919 error = nfs4_init_client(clp, proto, timeo, retrans, authflavour);
920 if (error < 0)
921 goto error_put;
922
923 server->nfs_client = clp;
924 dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp);
925 return 0;
926
927error_put:
928 nfs_put_client(clp);
929error:
930 dprintk("<-- nfs4_set_client() = xerror %d\n", error);
931 return error;
932}
933
934/*
935 * Create a version 4 volume record
936 */
937static int nfs4_init_server(struct nfs_server *server,
938 const struct nfs4_mount_data *data, rpc_authflavor_t authflavour)
939{
940 int error;
941
942 dprintk("--> nfs4_init_server()\n");
943
944 /* Initialise the client representation from the mount data */
945 server->flags = data->flags & NFS_MOUNT_FLAGMASK;
946 server->caps |= NFS_CAP_ATOMIC_OPEN;
947
948 if (data->rsize)
949 server->rsize = nfs_block_size(data->rsize, NULL);
950 if (data->wsize)
951 server->wsize = nfs_block_size(data->wsize, NULL);
952
953 server->acregmin = data->acregmin * HZ;
954 server->acregmax = data->acregmax * HZ;
955 server->acdirmin = data->acdirmin * HZ;
956 server->acdirmax = data->acdirmax * HZ;
957
958 error = nfs_init_server_rpcclient(server, authflavour);
959
960 /* Done */
961 dprintk("<-- nfs4_init_server() = %d\n", error);
962 return error;
963}
964
965/*
966 * Create a version 4 volume record
967 * - keyed on server and FSID
968 */
969struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data,
970 const char *hostname,
971 const struct sockaddr_in *addr,
972 const char *mntpath,
973 const char *ip_addr,
974 rpc_authflavor_t authflavour,
975 struct nfs_fh *mntfh)
976{
977 struct nfs_fattr fattr;
978 struct nfs_server *server;
979 int error;
980
981 dprintk("--> nfs4_create_server()\n");
982
983 server = nfs_alloc_server();
984 if (!server)
985 return ERR_PTR(-ENOMEM);
986
987 /* Get a client record */
988 error = nfs4_set_client(server, hostname, addr, authflavour,
989 data->proto, data->timeo, data->retrans);
990 if (error < 0)
991 goto error;
992
993 /* set up the general RPC client */
994 error = nfs4_init_server(server, data, authflavour);
995 if (error < 0)
996 goto error;
997
998 BUG_ON(!server->nfs_client);
999 BUG_ON(!server->nfs_client->rpc_ops);
1000 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
1001
1002 /* Probe the root fh to retrieve its FSID */
1003 error = nfs4_path_walk(server, mntfh, mntpath);
1004 if (error < 0)
1005 goto error;
1006
1007 dprintk("Server FSID: %llx:%llx\n",
1008 (unsigned long long) server->fsid.major,
1009 (unsigned long long) server->fsid.minor);
1010 dprintk("Mount FH: %d\n", mntfh->size);
1011
1012 error = nfs_probe_fsinfo(server, mntfh, &fattr);
1013 if (error < 0)
1014 goto error;
1015
1016 BUG_ON(!server->nfs_client);
1017 BUG_ON(!server->nfs_client->rpc_ops);
1018 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
1019
1020 spin_lock(&nfs_client_lock);
1021 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
1022 list_add_tail(&server->master_link, &nfs_volume_list);
1023 spin_unlock(&nfs_client_lock);
1024
1025 server->mount_time = jiffies;
1026 dprintk("<-- nfs4_create_server() = %p\n", server);
1027 return server;
1028
1029error:
1030 nfs_free_server(server);
1031 dprintk("<-- nfs4_create_server() = error %d\n", error);
1032 return ERR_PTR(error);
1033}
1034
1035/*
1036 * Create an NFS4 referral server record
1037 */
1038struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1039 struct nfs_fh *fh)
1040{
1041 struct nfs_client *parent_client;
1042 struct nfs_server *server, *parent_server;
1043 struct nfs_fattr fattr;
1044 int error;
1045
1046 dprintk("--> nfs4_create_referral_server()\n");
1047
1048 server = nfs_alloc_server();
1049 if (!server)
1050 return ERR_PTR(-ENOMEM);
1051
1052 parent_server = NFS_SB(data->sb);
1053 parent_client = parent_server->nfs_client;
1054
1055 /* Get a client representation.
1056 * Note: NFSv4 always uses TCP, */
1057 error = nfs4_set_client(server, data->hostname, data->addr,
1058 data->authflavor,
1059 parent_server->client->cl_xprt->prot,
1060 parent_client->retrans_timeo,
1061 parent_client->retrans_count);
1062 if (error < 0)
1063 goto error;
1064
1065 /* Initialise the client representation from the parent server */
1066 nfs_server_copy_userdata(server, parent_server);
1067 server->caps |= NFS_CAP_ATOMIC_OPEN;
1068
1069 error = nfs_init_server_rpcclient(server, data->authflavor);
1070 if (error < 0)
1071 goto error;
1072
1073 BUG_ON(!server->nfs_client);
1074 BUG_ON(!server->nfs_client->rpc_ops);
1075 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
1076
1077 /* probe the filesystem info for this server filesystem */
1078 error = nfs_probe_fsinfo(server, fh, &fattr);
1079 if (error < 0)
1080 goto error;
1081
1082 dprintk("Referral FSID: %llx:%llx\n",
1083 (unsigned long long) server->fsid.major,
1084 (unsigned long long) server->fsid.minor);
1085
1086 spin_lock(&nfs_client_lock);
1087 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
1088 list_add_tail(&server->master_link, &nfs_volume_list);
1089 spin_unlock(&nfs_client_lock);
1090
1091 server->mount_time = jiffies;
1092
1093 dprintk("<-- nfs_create_referral_server() = %p\n", server);
1094 return server;
1095
1096error:
1097 nfs_free_server(server);
1098 dprintk("<-- nfs4_create_referral_server() = error %d\n", error);
1099 return ERR_PTR(error);
1100}
1101
1102#endif /* CONFIG_NFS_V4 */
1103
1104/*
1105 * Clone an NFS2, NFS3 or NFS4 server record
1106 */
1107struct nfs_server *nfs_clone_server(struct nfs_server *source,
1108 struct nfs_fh *fh,
1109 struct nfs_fattr *fattr)
1110{
1111 struct nfs_server *server;
1112 struct nfs_fattr fattr_fsinfo;
1113 int error;
1114
1115 dprintk("--> nfs_clone_server(,%llx:%llx,)\n",
1116 (unsigned long long) fattr->fsid.major,
1117 (unsigned long long) fattr->fsid.minor);
1118
1119 server = nfs_alloc_server();
1120 if (!server)
1121 return ERR_PTR(-ENOMEM);
1122
1123 /* Copy data from the source */
1124 server->nfs_client = source->nfs_client;
1125 atomic_inc(&server->nfs_client->cl_count);
1126 nfs_server_copy_userdata(server, source);
1127
1128 server->fsid = fattr->fsid;
1129
1130 error = nfs_init_server_rpcclient(server, source->client->cl_auth->au_flavor);
1131 if (error < 0)
1132 goto out_free_server;
1133 if (!IS_ERR(source->client_acl))
1134 nfs_init_server_aclclient(server);
1135
1136 /* probe the filesystem info for this server filesystem */
1137 error = nfs_probe_fsinfo(server, fh, &fattr_fsinfo);
1138 if (error < 0)
1139 goto out_free_server;
1140
1141 dprintk("Cloned FSID: %llx:%llx\n",
1142 (unsigned long long) server->fsid.major,
1143 (unsigned long long) server->fsid.minor);
1144
1145 error = nfs_start_lockd(server);
1146 if (error < 0)
1147 goto out_free_server;
1148
1149 spin_lock(&nfs_client_lock);
1150 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
1151 list_add_tail(&server->master_link, &nfs_volume_list);
1152 spin_unlock(&nfs_client_lock);
1153
1154 server->mount_time = jiffies;
1155
1156 dprintk("<-- nfs_clone_server() = %p\n", server);
1157 return server;
1158
1159out_free_server:
1160 nfs_free_server(server);
1161 dprintk("<-- nfs_clone_server() = error %d\n", error);
1162 return ERR_PTR(error);
1163}
1164
1165#ifdef CONFIG_PROC_FS
1166static struct proc_dir_entry *proc_fs_nfs;
1167
1168static int nfs_server_list_open(struct inode *inode, struct file *file);
1169static void *nfs_server_list_start(struct seq_file *p, loff_t *pos);
1170static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos);
1171static void nfs_server_list_stop(struct seq_file *p, void *v);
1172static int nfs_server_list_show(struct seq_file *m, void *v);
1173
1174static struct seq_operations nfs_server_list_ops = {
1175 .start = nfs_server_list_start,
1176 .next = nfs_server_list_next,
1177 .stop = nfs_server_list_stop,
1178 .show = nfs_server_list_show,
1179};
1180
1181static struct file_operations nfs_server_list_fops = {
1182 .open = nfs_server_list_open,
1183 .read = seq_read,
1184 .llseek = seq_lseek,
1185 .release = seq_release,
1186};
1187
1188static int nfs_volume_list_open(struct inode *inode, struct file *file);
1189static void *nfs_volume_list_start(struct seq_file *p, loff_t *pos);
1190static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos);
1191static void nfs_volume_list_stop(struct seq_file *p, void *v);
1192static int nfs_volume_list_show(struct seq_file *m, void *v);
1193
1194static struct seq_operations nfs_volume_list_ops = {
1195 .start = nfs_volume_list_start,
1196 .next = nfs_volume_list_next,
1197 .stop = nfs_volume_list_stop,
1198 .show = nfs_volume_list_show,
1199};
1200
1201static struct file_operations nfs_volume_list_fops = {
1202 .open = nfs_volume_list_open,
1203 .read = seq_read,
1204 .llseek = seq_lseek,
1205 .release = seq_release,
1206};
1207
1208/*
1209 * open "/proc/fs/nfsfs/servers" which provides a summary of servers with which
1210 * we're dealing
1211 */
1212static int nfs_server_list_open(struct inode *inode, struct file *file)
1213{
1214 struct seq_file *m;
1215 int ret;
1216
1217 ret = seq_open(file, &nfs_server_list_ops);
1218 if (ret < 0)
1219 return ret;
1220
1221 m = file->private_data;
1222 m->private = PDE(inode)->data;
1223
1224 return 0;
1225}
1226
1227/*
1228 * set up the iterator to start reading from the server list and return the first item
1229 */
1230static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos)
1231{
1232 struct list_head *_p;
1233 loff_t pos = *_pos;
1234
1235 /* lock the list against modification */
1236 spin_lock(&nfs_client_lock);
1237
1238 /* allow for the header line */
1239 if (!pos)
1240 return SEQ_START_TOKEN;
1241 pos--;
1242
1243 /* find the n'th element in the list */
1244 list_for_each(_p, &nfs_client_list)
1245 if (!pos--)
1246 break;
1247
1248 return _p != &nfs_client_list ? _p : NULL;
1249}
1250
1251/*
1252 * move to next server
1253 */
1254static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos)
1255{
1256 struct list_head *_p;
1257
1258 (*pos)++;
1259
1260 _p = v;
1261 _p = (v == SEQ_START_TOKEN) ? nfs_client_list.next : _p->next;
1262
1263 return _p != &nfs_client_list ? _p : NULL;
1264}
1265
1266/*
1267 * clean up after reading from the transports list
1268 */
1269static void nfs_server_list_stop(struct seq_file *p, void *v)
1270{
1271 spin_unlock(&nfs_client_lock);
1272}
1273
1274/*
1275 * display a header line followed by a load of call lines
1276 */
1277static int nfs_server_list_show(struct seq_file *m, void *v)
1278{
1279 struct nfs_client *clp;
1280
1281 /* display header on line 1 */
1282 if (v == SEQ_START_TOKEN) {
1283 seq_puts(m, "NV SERVER PORT USE HOSTNAME\n");
1284 return 0;
1285 }
1286
1287 /* display one transport per line on subsequent lines */
1288 clp = list_entry(v, struct nfs_client, cl_share_link);
1289
1290 seq_printf(m, "v%d %02x%02x%02x%02x %4hx %3d %s\n",
1291 clp->cl_nfsversion,
1292 NIPQUAD(clp->cl_addr.sin_addr),
1293 ntohs(clp->cl_addr.sin_port),
1294 atomic_read(&clp->cl_count),
1295 clp->cl_hostname);
1296
1297 return 0;
1298}
1299
1300/*
1301 * open "/proc/fs/nfsfs/volumes" which provides a summary of extant volumes
1302 */
1303static int nfs_volume_list_open(struct inode *inode, struct file *file)
1304{
1305 struct seq_file *m;
1306 int ret;
1307
1308 ret = seq_open(file, &nfs_volume_list_ops);
1309 if (ret < 0)
1310 return ret;
1311
1312 m = file->private_data;
1313 m->private = PDE(inode)->data;
1314
1315 return 0;
1316}
1317
1318/*
1319 * set up the iterator to start reading from the volume list and return the first item
1320 */
1321static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
1322{
1323 struct list_head *_p;
1324 loff_t pos = *_pos;
1325
1326 /* lock the list against modification */
1327 spin_lock(&nfs_client_lock);
1328
1329 /* allow for the header line */
1330 if (!pos)
1331 return SEQ_START_TOKEN;
1332 pos--;
1333
1334 /* find the n'th element in the list */
1335 list_for_each(_p, &nfs_volume_list)
1336 if (!pos--)
1337 break;
1338
1339 return _p != &nfs_volume_list ? _p : NULL;
1340}
1341
1342/*
1343 * move to next volume
1344 */
1345static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos)
1346{
1347 struct list_head *_p;
1348
1349 (*pos)++;
1350
1351 _p = v;
1352 _p = (v == SEQ_START_TOKEN) ? nfs_volume_list.next : _p->next;
1353
1354 return _p != &nfs_volume_list ? _p : NULL;
1355}
1356
1357/*
1358 * clean up after reading from the transports list
1359 */
1360static void nfs_volume_list_stop(struct seq_file *p, void *v)
1361{
1362 spin_unlock(&nfs_client_lock);
1363}
1364
1365/*
1366 * display a header line followed by a load of call lines
1367 */
1368static int nfs_volume_list_show(struct seq_file *m, void *v)
1369{
1370 struct nfs_server *server;
1371 struct nfs_client *clp;
1372 char dev[8], fsid[17];
1373
1374 /* display header on line 1 */
1375 if (v == SEQ_START_TOKEN) {
1376 seq_puts(m, "NV SERVER PORT DEV FSID\n");
1377 return 0;
1378 }
1379 /* display one transport per line on subsequent lines */
1380 server = list_entry(v, struct nfs_server, master_link);
1381 clp = server->nfs_client;
1382
1383 snprintf(dev, 8, "%u:%u",
1384 MAJOR(server->s_dev), MINOR(server->s_dev));
1385
1386 snprintf(fsid, 17, "%llx:%llx",
1387 (unsigned long long) server->fsid.major,
1388 (unsigned long long) server->fsid.minor);
1389
1390 seq_printf(m, "v%d %02x%02x%02x%02x %4hx %-7s %-17s\n",
1391 clp->cl_nfsversion,
1392 NIPQUAD(clp->cl_addr.sin_addr),
1393 ntohs(clp->cl_addr.sin_port),
1394 dev,
1395 fsid);
1396
1397 return 0;
1398}
1399
1400/*
1401 * initialise the /proc/fs/nfsfs/ directory
1402 */
1403int __init nfs_fs_proc_init(void)
1404{
1405 struct proc_dir_entry *p;
1406
1407 proc_fs_nfs = proc_mkdir("nfsfs", proc_root_fs);
1408 if (!proc_fs_nfs)
1409 goto error_0;
1410
1411 proc_fs_nfs->owner = THIS_MODULE;
1412
1413 /* a file of servers with which we're dealing */
1414 p = create_proc_entry("servers", S_IFREG|S_IRUGO, proc_fs_nfs);
1415 if (!p)
1416 goto error_1;
1417
1418 p->proc_fops = &nfs_server_list_fops;
1419 p->owner = THIS_MODULE;
1420
1421 /* a file of volumes that we have mounted */
1422 p = create_proc_entry("volumes", S_IFREG|S_IRUGO, proc_fs_nfs);
1423 if (!p)
1424 goto error_2;
1425
1426 p->proc_fops = &nfs_volume_list_fops;
1427 p->owner = THIS_MODULE;
1428 return 0;
1429
1430error_2:
1431 remove_proc_entry("servers", proc_fs_nfs);
1432error_1:
1433 remove_proc_entry("nfsfs", proc_root_fs);
1434error_0:
1435 return -ENOMEM;
1436}
1437
1438/*
1439 * clean up the /proc/fs/nfsfs/ directory
1440 */
1441void nfs_fs_proc_exit(void)
1442{
1443 remove_proc_entry("volumes", proc_fs_nfs);
1444 remove_proc_entry("servers", proc_fs_nfs);
1445 remove_proc_entry("nfsfs", proc_root_fs);
1446}
1447
1448#endif /* CONFIG_PROC_FS */
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 9540a316c05e..841c99a9b11c 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -18,11 +18,7 @@
18 18
19#include "nfs4_fs.h" 19#include "nfs4_fs.h"
20#include "delegation.h" 20#include "delegation.h"
21 21#include "internal.h"
22static struct nfs_delegation *nfs_alloc_delegation(void)
23{
24 return (struct nfs_delegation *)kmalloc(sizeof(struct nfs_delegation), GFP_KERNEL);
25}
26 22
27static void nfs_free_delegation(struct nfs_delegation *delegation) 23static void nfs_free_delegation(struct nfs_delegation *delegation)
28{ 24{
@@ -52,7 +48,7 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
52 case -NFS4ERR_EXPIRED: 48 case -NFS4ERR_EXPIRED:
53 /* kill_proc(fl->fl_pid, SIGLOST, 1); */ 49 /* kill_proc(fl->fl_pid, SIGLOST, 1); */
54 case -NFS4ERR_STALE_CLIENTID: 50 case -NFS4ERR_STALE_CLIENTID:
55 nfs4_schedule_state_recovery(NFS_SERVER(inode)->nfs4_state); 51 nfs4_schedule_state_recovery(NFS_SERVER(inode)->nfs_client);
56 goto out_err; 52 goto out_err;
57 } 53 }
58 } 54 }
@@ -114,7 +110,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st
114 */ 110 */
115int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) 111int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
116{ 112{
117 struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; 113 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
118 struct nfs_inode *nfsi = NFS_I(inode); 114 struct nfs_inode *nfsi = NFS_I(inode);
119 struct nfs_delegation *delegation; 115 struct nfs_delegation *delegation;
120 int status = 0; 116 int status = 0;
@@ -123,7 +119,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
123 if ((nfsi->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATTR))) 119 if ((nfsi->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATTR)))
124 __nfs_revalidate_inode(NFS_SERVER(inode), inode); 120 __nfs_revalidate_inode(NFS_SERVER(inode), inode);
125 121
126 delegation = nfs_alloc_delegation(); 122 delegation = kmalloc(sizeof(*delegation), GFP_KERNEL);
127 if (delegation == NULL) 123 if (delegation == NULL)
128 return -ENOMEM; 124 return -ENOMEM;
129 memcpy(delegation->stateid.data, res->delegation.data, 125 memcpy(delegation->stateid.data, res->delegation.data,
@@ -145,7 +141,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
145 sizeof(delegation->stateid)) != 0 || 141 sizeof(delegation->stateid)) != 0 ||
146 delegation->type != nfsi->delegation->type) { 142 delegation->type != nfsi->delegation->type) {
147 printk("%s: server %u.%u.%u.%u, handed out a duplicate delegation!\n", 143 printk("%s: server %u.%u.%u.%u, handed out a duplicate delegation!\n",
148 __FUNCTION__, NIPQUAD(clp->cl_addr)); 144 __FUNCTION__, NIPQUAD(clp->cl_addr.sin_addr));
149 status = -EIO; 145 status = -EIO;
150 } 146 }
151 } 147 }
@@ -176,7 +172,7 @@ static void nfs_msync_inode(struct inode *inode)
176 */ 172 */
177int __nfs_inode_return_delegation(struct inode *inode) 173int __nfs_inode_return_delegation(struct inode *inode)
178{ 174{
179 struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; 175 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
180 struct nfs_inode *nfsi = NFS_I(inode); 176 struct nfs_inode *nfsi = NFS_I(inode);
181 struct nfs_delegation *delegation; 177 struct nfs_delegation *delegation;
182 int res = 0; 178 int res = 0;
@@ -208,7 +204,7 @@ int __nfs_inode_return_delegation(struct inode *inode)
208 */ 204 */
209void nfs_return_all_delegations(struct super_block *sb) 205void nfs_return_all_delegations(struct super_block *sb)
210{ 206{
211 struct nfs4_client *clp = NFS_SB(sb)->nfs4_state; 207 struct nfs_client *clp = NFS_SB(sb)->nfs_client;
212 struct nfs_delegation *delegation; 208 struct nfs_delegation *delegation;
213 struct inode *inode; 209 struct inode *inode;
214 210
@@ -232,7 +228,7 @@ restart:
232 228
233int nfs_do_expire_all_delegations(void *ptr) 229int nfs_do_expire_all_delegations(void *ptr)
234{ 230{
235 struct nfs4_client *clp = ptr; 231 struct nfs_client *clp = ptr;
236 struct nfs_delegation *delegation; 232 struct nfs_delegation *delegation;
237 struct inode *inode; 233 struct inode *inode;
238 234
@@ -254,11 +250,11 @@ restart:
254 } 250 }
255out: 251out:
256 spin_unlock(&clp->cl_lock); 252 spin_unlock(&clp->cl_lock);
257 nfs4_put_client(clp); 253 nfs_put_client(clp);
258 module_put_and_exit(0); 254 module_put_and_exit(0);
259} 255}
260 256
261void nfs_expire_all_delegations(struct nfs4_client *clp) 257void nfs_expire_all_delegations(struct nfs_client *clp)
262{ 258{
263 struct task_struct *task; 259 struct task_struct *task;
264 260
@@ -266,17 +262,17 @@ void nfs_expire_all_delegations(struct nfs4_client *clp)
266 atomic_inc(&clp->cl_count); 262 atomic_inc(&clp->cl_count);
267 task = kthread_run(nfs_do_expire_all_delegations, clp, 263 task = kthread_run(nfs_do_expire_all_delegations, clp,
268 "%u.%u.%u.%u-delegreturn", 264 "%u.%u.%u.%u-delegreturn",
269 NIPQUAD(clp->cl_addr)); 265 NIPQUAD(clp->cl_addr.sin_addr));
270 if (!IS_ERR(task)) 266 if (!IS_ERR(task))
271 return; 267 return;
272 nfs4_put_client(clp); 268 nfs_put_client(clp);
273 module_put(THIS_MODULE); 269 module_put(THIS_MODULE);
274} 270}
275 271
276/* 272/*
277 * Return all delegations following an NFS4ERR_CB_PATH_DOWN error. 273 * Return all delegations following an NFS4ERR_CB_PATH_DOWN error.
278 */ 274 */
279void nfs_handle_cb_pathdown(struct nfs4_client *clp) 275void nfs_handle_cb_pathdown(struct nfs_client *clp)
280{ 276{
281 struct nfs_delegation *delegation; 277 struct nfs_delegation *delegation;
282 struct inode *inode; 278 struct inode *inode;
@@ -299,7 +295,7 @@ restart:
299 295
300struct recall_threadargs { 296struct recall_threadargs {
301 struct inode *inode; 297 struct inode *inode;
302 struct nfs4_client *clp; 298 struct nfs_client *clp;
303 const nfs4_stateid *stateid; 299 const nfs4_stateid *stateid;
304 300
305 struct completion started; 301 struct completion started;
@@ -310,7 +306,7 @@ static int recall_thread(void *data)
310{ 306{
311 struct recall_threadargs *args = (struct recall_threadargs *)data; 307 struct recall_threadargs *args = (struct recall_threadargs *)data;
312 struct inode *inode = igrab(args->inode); 308 struct inode *inode = igrab(args->inode);
313 struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; 309 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
314 struct nfs_inode *nfsi = NFS_I(inode); 310 struct nfs_inode *nfsi = NFS_I(inode);
315 struct nfs_delegation *delegation; 311 struct nfs_delegation *delegation;
316 312
@@ -371,7 +367,7 @@ out_module_put:
371/* 367/*
372 * Retrieve the inode associated with a delegation 368 * Retrieve the inode associated with a delegation
373 */ 369 */
374struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle) 370struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle)
375{ 371{
376 struct nfs_delegation *delegation; 372 struct nfs_delegation *delegation;
377 struct inode *res = NULL; 373 struct inode *res = NULL;
@@ -389,7 +385,7 @@ struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nf
389/* 385/*
390 * Mark all delegations as needing to be reclaimed 386 * Mark all delegations as needing to be reclaimed
391 */ 387 */
392void nfs_delegation_mark_reclaim(struct nfs4_client *clp) 388void nfs_delegation_mark_reclaim(struct nfs_client *clp)
393{ 389{
394 struct nfs_delegation *delegation; 390 struct nfs_delegation *delegation;
395 spin_lock(&clp->cl_lock); 391 spin_lock(&clp->cl_lock);
@@ -401,7 +397,7 @@ void nfs_delegation_mark_reclaim(struct nfs4_client *clp)
401/* 397/*
402 * Reap all unclaimed delegations after reboot recovery is done 398 * Reap all unclaimed delegations after reboot recovery is done
403 */ 399 */
404void nfs_delegation_reap_unclaimed(struct nfs4_client *clp) 400void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
405{ 401{
406 struct nfs_delegation *delegation, *n; 402 struct nfs_delegation *delegation, *n;
407 LIST_HEAD(head); 403 LIST_HEAD(head);
@@ -423,7 +419,7 @@ void nfs_delegation_reap_unclaimed(struct nfs4_client *clp)
423 419
424int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode) 420int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
425{ 421{
426 struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; 422 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
427 struct nfs_inode *nfsi = NFS_I(inode); 423 struct nfs_inode *nfsi = NFS_I(inode);
428 struct nfs_delegation *delegation; 424 struct nfs_delegation *delegation;
429 int res = 0; 425 int res = 0;
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 3858694652fa..2cfd4b24c7fe 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -29,13 +29,13 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st
29int __nfs_inode_return_delegation(struct inode *inode); 29int __nfs_inode_return_delegation(struct inode *inode);
30int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); 30int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
31 31
32struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle); 32struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
33void nfs_return_all_delegations(struct super_block *sb); 33void nfs_return_all_delegations(struct super_block *sb);
34void nfs_expire_all_delegations(struct nfs4_client *clp); 34void nfs_expire_all_delegations(struct nfs_client *clp);
35void nfs_handle_cb_pathdown(struct nfs4_client *clp); 35void nfs_handle_cb_pathdown(struct nfs_client *clp);
36 36
37void nfs_delegation_mark_reclaim(struct nfs4_client *clp); 37void nfs_delegation_mark_reclaim(struct nfs_client *clp);
38void nfs_delegation_reap_unclaimed(struct nfs4_client *clp); 38void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
39 39
40/* NFSv4 delegation-related procedures */ 40/* NFSv4 delegation-related procedures */
41int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid); 41int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e7ffb4deb3e5..7432f1a43f3d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -30,7 +30,9 @@
30#include <linux/nfs_mount.h> 30#include <linux/nfs_mount.h>
31#include <linux/pagemap.h> 31#include <linux/pagemap.h>
32#include <linux/smp_lock.h> 32#include <linux/smp_lock.h>
33#include <linux/pagevec.h>
33#include <linux/namei.h> 34#include <linux/namei.h>
35#include <linux/mount.h>
34 36
35#include "nfs4_fs.h" 37#include "nfs4_fs.h"
36#include "delegation.h" 38#include "delegation.h"
@@ -870,14 +872,14 @@ int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
870 return (nd->intent.open.flags & O_EXCL) != 0; 872 return (nd->intent.open.flags & O_EXCL) != 0;
871} 873}
872 874
873static inline int nfs_reval_fsid(struct inode *dir, 875static inline int nfs_reval_fsid(struct vfsmount *mnt, struct inode *dir,
874 struct nfs_fh *fh, struct nfs_fattr *fattr) 876 struct nfs_fh *fh, struct nfs_fattr *fattr)
875{ 877{
876 struct nfs_server *server = NFS_SERVER(dir); 878 struct nfs_server *server = NFS_SERVER(dir);
877 879
878 if (!nfs_fsid_equal(&server->fsid, &fattr->fsid)) 880 if (!nfs_fsid_equal(&server->fsid, &fattr->fsid))
879 /* Revalidate fsid on root dir */ 881 /* Revalidate fsid on root dir */
880 return __nfs_revalidate_inode(server, dir->i_sb->s_root->d_inode); 882 return __nfs_revalidate_inode(server, mnt->mnt_root->d_inode);
881 return 0; 883 return 0;
882} 884}
883 885
@@ -902,9 +904,15 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
902 904
903 lock_kernel(); 905 lock_kernel();
904 906
905 /* If we're doing an exclusive create, optimize away the lookup */ 907 /*
906 if (nfs_is_exclusive_create(dir, nd)) 908 * If we're doing an exclusive create, optimize away the lookup
907 goto no_entry; 909 * but don't hash the dentry.
910 */
911 if (nfs_is_exclusive_create(dir, nd)) {
912 d_instantiate(dentry, NULL);
913 res = NULL;
914 goto out_unlock;
915 }
908 916
909 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); 917 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
910 if (error == -ENOENT) 918 if (error == -ENOENT)
@@ -913,7 +921,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
913 res = ERR_PTR(error); 921 res = ERR_PTR(error);
914 goto out_unlock; 922 goto out_unlock;
915 } 923 }
916 error = nfs_reval_fsid(dir, &fhandle, &fattr); 924 error = nfs_reval_fsid(nd->mnt, dir, &fhandle, &fattr);
917 if (error < 0) { 925 if (error < 0) {
918 res = ERR_PTR(error); 926 res = ERR_PTR(error);
919 goto out_unlock; 927 goto out_unlock;
@@ -922,8 +930,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
922 res = (struct dentry *)inode; 930 res = (struct dentry *)inode;
923 if (IS_ERR(res)) 931 if (IS_ERR(res))
924 goto out_unlock; 932 goto out_unlock;
933
925no_entry: 934no_entry:
926 res = d_add_unique(dentry, inode); 935 res = d_materialise_unique(dentry, inode);
927 if (res != NULL) 936 if (res != NULL)
928 dentry = res; 937 dentry = res;
929 nfs_renew_times(dentry); 938 nfs_renew_times(dentry);
@@ -1117,11 +1126,13 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
1117 dput(dentry); 1126 dput(dentry);
1118 return NULL; 1127 return NULL;
1119 } 1128 }
1120 alias = d_add_unique(dentry, inode); 1129
1130 alias = d_materialise_unique(dentry, inode);
1121 if (alias != NULL) { 1131 if (alias != NULL) {
1122 dput(dentry); 1132 dput(dentry);
1123 dentry = alias; 1133 dentry = alias;
1124 } 1134 }
1135
1125 nfs_renew_times(dentry); 1136 nfs_renew_times(dentry);
1126 nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); 1137 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1127 return dentry; 1138 return dentry;
@@ -1143,23 +1154,22 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
1143 struct inode *dir = dentry->d_parent->d_inode; 1154 struct inode *dir = dentry->d_parent->d_inode;
1144 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); 1155 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
1145 if (error) 1156 if (error)
1146 goto out_err; 1157 return error;
1147 } 1158 }
1148 if (!(fattr->valid & NFS_ATTR_FATTR)) { 1159 if (!(fattr->valid & NFS_ATTR_FATTR)) {
1149 struct nfs_server *server = NFS_SB(dentry->d_sb); 1160 struct nfs_server *server = NFS_SB(dentry->d_sb);
1150 error = server->rpc_ops->getattr(server, fhandle, fattr); 1161 error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr);
1151 if (error < 0) 1162 if (error < 0)
1152 goto out_err; 1163 return error;
1153 } 1164 }
1154 inode = nfs_fhget(dentry->d_sb, fhandle, fattr); 1165 inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
1155 error = PTR_ERR(inode); 1166 error = PTR_ERR(inode);
1156 if (IS_ERR(inode)) 1167 if (IS_ERR(inode))
1157 goto out_err; 1168 return error;
1158 d_instantiate(dentry, inode); 1169 d_instantiate(dentry, inode);
1170 if (d_unhashed(dentry))
1171 d_rehash(dentry);
1159 return 0; 1172 return 0;
1160out_err:
1161 d_drop(dentry);
1162 return error;
1163} 1173}
1164 1174
1165/* 1175/*
@@ -1440,48 +1450,82 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
1440 return error; 1450 return error;
1441} 1451}
1442 1452
1443static int 1453/*
1444nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) 1454 * To create a symbolic link, most file systems instantiate a new inode,
1455 * add a page to it containing the path, then write it out to the disk
1456 * using prepare_write/commit_write.
1457 *
1458 * Unfortunately the NFS client can't create the in-core inode first
1459 * because it needs a file handle to create an in-core inode (see
1460 * fs/nfs/inode.c:nfs_fhget). We only have a file handle *after* the
1461 * symlink request has completed on the server.
1462 *
1463 * So instead we allocate a raw page, copy the symname into it, then do
1464 * the SYMLINK request with the page as the buffer. If it succeeds, we
1465 * now have a new file handle and can instantiate an in-core NFS inode
1466 * and move the raw page into its mapping.
1467 */
1468static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
1445{ 1469{
1470 struct pagevec lru_pvec;
1471 struct page *page;
1472 char *kaddr;
1446 struct iattr attr; 1473 struct iattr attr;
1447 struct nfs_fattr sym_attr; 1474 unsigned int pathlen = strlen(symname);
1448 struct nfs_fh sym_fh;
1449 struct qstr qsymname;
1450 int error; 1475 int error;
1451 1476
1452 dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s)\n", dir->i_sb->s_id, 1477 dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s)\n", dir->i_sb->s_id,
1453 dir->i_ino, dentry->d_name.name, symname); 1478 dir->i_ino, dentry->d_name.name, symname);
1454 1479
1455#ifdef NFS_PARANOIA 1480 if (pathlen > PAGE_SIZE)
1456if (dentry->d_inode) 1481 return -ENAMETOOLONG;
1457printk("nfs_proc_symlink: %s/%s not negative!\n",
1458dentry->d_parent->d_name.name, dentry->d_name.name);
1459#endif
1460 /*
1461 * Fill in the sattr for the call.
1462 * Note: SunOS 4.1.2 crashes if the mode isn't initialized!
1463 */
1464 attr.ia_valid = ATTR_MODE;
1465 attr.ia_mode = S_IFLNK | S_IRWXUGO;
1466 1482
1467 qsymname.name = symname; 1483 attr.ia_mode = S_IFLNK | S_IRWXUGO;
1468 qsymname.len = strlen(symname); 1484 attr.ia_valid = ATTR_MODE;
1469 1485
1470 lock_kernel(); 1486 lock_kernel();
1487
1488 page = alloc_page(GFP_KERNEL);
1489 if (!page) {
1490 unlock_kernel();
1491 return -ENOMEM;
1492 }
1493
1494 kaddr = kmap_atomic(page, KM_USER0);
1495 memcpy(kaddr, symname, pathlen);
1496 if (pathlen < PAGE_SIZE)
1497 memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
1498 kunmap_atomic(kaddr, KM_USER0);
1499
1471 nfs_begin_data_update(dir); 1500 nfs_begin_data_update(dir);
1472 error = NFS_PROTO(dir)->symlink(dir, &dentry->d_name, &qsymname, 1501 error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
1473 &attr, &sym_fh, &sym_attr);
1474 nfs_end_data_update(dir); 1502 nfs_end_data_update(dir);
1475 if (!error) { 1503 if (error != 0) {
1476 error = nfs_instantiate(dentry, &sym_fh, &sym_attr); 1504 dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n",
1477 } else { 1505 dir->i_sb->s_id, dir->i_ino,
1478 if (error == -EEXIST) 1506 dentry->d_name.name, symname, error);
1479 printk("nfs_proc_symlink: %s/%s already exists??\n",
1480 dentry->d_parent->d_name.name, dentry->d_name.name);
1481 d_drop(dentry); 1507 d_drop(dentry);
1508 __free_page(page);
1509 unlock_kernel();
1510 return error;
1482 } 1511 }
1512
1513 /*
1514 * No big deal if we can't add this page to the page cache here.
1515 * READLINK will get the missing page from the server if needed.
1516 */
1517 pagevec_init(&lru_pvec, 0);
1518 if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0,
1519 GFP_KERNEL)) {
1520 if (!pagevec_add(&lru_pvec, page))
1521 __pagevec_lru_add(&lru_pvec);
1522 SetPageUptodate(page);
1523 unlock_page(page);
1524 } else
1525 __free_page(page);
1526
1483 unlock_kernel(); 1527 unlock_kernel();
1484 return error; 1528 return 0;
1485} 1529}
1486 1530
1487static int 1531static int
@@ -1625,8 +1669,7 @@ out:
1625 if (rehash) 1669 if (rehash)
1626 d_rehash(rehash); 1670 d_rehash(rehash);
1627 if (!error) { 1671 if (!error) {
1628 if (!S_ISDIR(old_inode->i_mode)) 1672 d_move(old_dentry, new_dentry);
1629 d_move(old_dentry, new_dentry);
1630 nfs_renew_times(new_dentry); 1673 nfs_renew_times(new_dentry);
1631 nfs_set_verifier(new_dentry, nfs_save_change_attribute(new_dir)); 1674 nfs_set_verifier(new_dentry, nfs_save_change_attribute(new_dir));
1632 } 1675 }
@@ -1638,35 +1681,211 @@ out:
1638 return error; 1681 return error;
1639} 1682}
1640 1683
1684static DEFINE_SPINLOCK(nfs_access_lru_lock);
1685static LIST_HEAD(nfs_access_lru_list);
1686static atomic_long_t nfs_access_nr_entries;
1687
1688static void nfs_access_free_entry(struct nfs_access_entry *entry)
1689{
1690 put_rpccred(entry->cred);
1691 kfree(entry);
1692 smp_mb__before_atomic_dec();
1693 atomic_long_dec(&nfs_access_nr_entries);
1694 smp_mb__after_atomic_dec();
1695}
1696
1697int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
1698{
1699 LIST_HEAD(head);
1700 struct nfs_inode *nfsi;
1701 struct nfs_access_entry *cache;
1702
1703 spin_lock(&nfs_access_lru_lock);
1704restart:
1705 list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) {
1706 struct inode *inode;
1707
1708 if (nr_to_scan-- == 0)
1709 break;
1710 inode = igrab(&nfsi->vfs_inode);
1711 if (inode == NULL)
1712 continue;
1713 spin_lock(&inode->i_lock);
1714 if (list_empty(&nfsi->access_cache_entry_lru))
1715 goto remove_lru_entry;
1716 cache = list_entry(nfsi->access_cache_entry_lru.next,
1717 struct nfs_access_entry, lru);
1718 list_move(&cache->lru, &head);
1719 rb_erase(&cache->rb_node, &nfsi->access_cache);
1720 if (!list_empty(&nfsi->access_cache_entry_lru))
1721 list_move_tail(&nfsi->access_cache_inode_lru,
1722 &nfs_access_lru_list);
1723 else {
1724remove_lru_entry:
1725 list_del_init(&nfsi->access_cache_inode_lru);
1726 clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
1727 }
1728 spin_unlock(&inode->i_lock);
1729 iput(inode);
1730 goto restart;
1731 }
1732 spin_unlock(&nfs_access_lru_lock);
1733 while (!list_empty(&head)) {
1734 cache = list_entry(head.next, struct nfs_access_entry, lru);
1735 list_del(&cache->lru);
1736 nfs_access_free_entry(cache);
1737 }
1738 return (atomic_long_read(&nfs_access_nr_entries) / 100) * sysctl_vfs_cache_pressure;
1739}
1740
1741static void __nfs_access_zap_cache(struct inode *inode)
1742{
1743 struct nfs_inode *nfsi = NFS_I(inode);
1744 struct rb_root *root_node = &nfsi->access_cache;
1745 struct rb_node *n, *dispose = NULL;
1746 struct nfs_access_entry *entry;
1747
1748 /* Unhook entries from the cache */
1749 while ((n = rb_first(root_node)) != NULL) {
1750 entry = rb_entry(n, struct nfs_access_entry, rb_node);
1751 rb_erase(n, root_node);
1752 list_del(&entry->lru);
1753 n->rb_left = dispose;
1754 dispose = n;
1755 }
1756 nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
1757 spin_unlock(&inode->i_lock);
1758
1759 /* Now kill them all! */
1760 while (dispose != NULL) {
1761 n = dispose;
1762 dispose = n->rb_left;
1763 nfs_access_free_entry(rb_entry(n, struct nfs_access_entry, rb_node));
1764 }
1765}
1766
1767void nfs_access_zap_cache(struct inode *inode)
1768{
1769 /* Remove from global LRU init */
1770 if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_FLAGS(inode))) {
1771 spin_lock(&nfs_access_lru_lock);
1772 list_del_init(&NFS_I(inode)->access_cache_inode_lru);
1773 spin_unlock(&nfs_access_lru_lock);
1774 }
1775
1776 spin_lock(&inode->i_lock);
1777 /* This will release the spinlock */
1778 __nfs_access_zap_cache(inode);
1779}
1780
1781static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, struct rpc_cred *cred)
1782{
1783 struct rb_node *n = NFS_I(inode)->access_cache.rb_node;
1784 struct nfs_access_entry *entry;
1785
1786 while (n != NULL) {
1787 entry = rb_entry(n, struct nfs_access_entry, rb_node);
1788
1789 if (cred < entry->cred)
1790 n = n->rb_left;
1791 else if (cred > entry->cred)
1792 n = n->rb_right;
1793 else
1794 return entry;
1795 }
1796 return NULL;
1797}
1798
1641int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) 1799int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
1642{ 1800{
1643 struct nfs_inode *nfsi = NFS_I(inode); 1801 struct nfs_inode *nfsi = NFS_I(inode);
1644 struct nfs_access_entry *cache = &nfsi->cache_access; 1802 struct nfs_access_entry *cache;
1803 int err = -ENOENT;
1645 1804
1646 if (cache->cred != cred 1805 spin_lock(&inode->i_lock);
1647 || time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)) 1806 if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
1648 || (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)) 1807 goto out_zap;
1649 return -ENOENT; 1808 cache = nfs_access_search_rbtree(inode, cred);
1650 memcpy(res, cache, sizeof(*res)); 1809 if (cache == NULL)
1651 return 0; 1810 goto out;
1811 if (time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)))
1812 goto out_stale;
1813 res->jiffies = cache->jiffies;
1814 res->cred = cache->cred;
1815 res->mask = cache->mask;
1816 list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru);
1817 err = 0;
1818out:
1819 spin_unlock(&inode->i_lock);
1820 return err;
1821out_stale:
1822 rb_erase(&cache->rb_node, &nfsi->access_cache);
1823 list_del(&cache->lru);
1824 spin_unlock(&inode->i_lock);
1825 nfs_access_free_entry(cache);
1826 return -ENOENT;
1827out_zap:
1828 /* This will release the spinlock */
1829 __nfs_access_zap_cache(inode);
1830 return -ENOENT;
1652} 1831}
1653 1832
1654void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) 1833static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
1655{ 1834{
1656 struct nfs_inode *nfsi = NFS_I(inode); 1835 struct nfs_inode *nfsi = NFS_I(inode);
1657 struct nfs_access_entry *cache = &nfsi->cache_access; 1836 struct rb_root *root_node = &nfsi->access_cache;
1837 struct rb_node **p = &root_node->rb_node;
1838 struct rb_node *parent = NULL;
1839 struct nfs_access_entry *entry;
1658 1840
1659 if (cache->cred != set->cred) {
1660 if (cache->cred)
1661 put_rpccred(cache->cred);
1662 cache->cred = get_rpccred(set->cred);
1663 }
1664 /* FIXME: replace current access_cache BKL reliance with inode->i_lock */
1665 spin_lock(&inode->i_lock); 1841 spin_lock(&inode->i_lock);
1666 nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS; 1842 while (*p != NULL) {
1843 parent = *p;
1844 entry = rb_entry(parent, struct nfs_access_entry, rb_node);
1845
1846 if (set->cred < entry->cred)
1847 p = &parent->rb_left;
1848 else if (set->cred > entry->cred)
1849 p = &parent->rb_right;
1850 else
1851 goto found;
1852 }
1853 rb_link_node(&set->rb_node, parent, p);
1854 rb_insert_color(&set->rb_node, root_node);
1855 list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
1667 spin_unlock(&inode->i_lock); 1856 spin_unlock(&inode->i_lock);
1857 return;
1858found:
1859 rb_replace_node(parent, &set->rb_node, root_node);
1860 list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
1861 list_del(&entry->lru);
1862 spin_unlock(&inode->i_lock);
1863 nfs_access_free_entry(entry);
1864}
1865
1866void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
1867{
1868 struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL);
1869 if (cache == NULL)
1870 return;
1871 RB_CLEAR_NODE(&cache->rb_node);
1668 cache->jiffies = set->jiffies; 1872 cache->jiffies = set->jiffies;
1873 cache->cred = get_rpccred(set->cred);
1669 cache->mask = set->mask; 1874 cache->mask = set->mask;
1875
1876 nfs_access_add_rbtree(inode, cache);
1877
1878 /* Update accounting */
1879 smp_mb__before_atomic_inc();
1880 atomic_long_inc(&nfs_access_nr_entries);
1881 smp_mb__after_atomic_inc();
1882
1883 /* Add inode to global LRU list */
1884 if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_FLAGS(inode))) {
1885 spin_lock(&nfs_access_lru_lock);
1886 list_add_tail(&NFS_I(inode)->access_cache_inode_lru, &nfs_access_lru_list);
1887 spin_unlock(&nfs_access_lru_lock);
1888 }
1670} 1889}
1671 1890
1672static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) 1891static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 76ca1cbc38f9..377839bed172 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -855,6 +855,5 @@ int __init nfs_init_directcache(void)
855 */ 855 */
856void nfs_destroy_directcache(void) 856void nfs_destroy_directcache(void)
857{ 857{
858 if (kmem_cache_destroy(nfs_direct_cachep)) 858 kmem_cache_destroy(nfs_direct_cachep);
859 printk(KERN_INFO "nfs_direct_cache: not all structures were freed\n");
860} 859}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 48e892880d5b..be997d649127 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -111,7 +111,7 @@ nfs_file_open(struct inode *inode, struct file *filp)
111 111
112 nfs_inc_stats(inode, NFSIOS_VFSOPEN); 112 nfs_inc_stats(inode, NFSIOS_VFSOPEN);
113 lock_kernel(); 113 lock_kernel();
114 res = NFS_SERVER(inode)->rpc_ops->file_open(inode, filp); 114 res = NFS_PROTO(inode)->file_open(inode, filp);
115 unlock_kernel(); 115 unlock_kernel();
116 return res; 116 return res;
117} 117}
@@ -157,7 +157,7 @@ force_reval:
157static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) 157static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
158{ 158{
159 /* origin == SEEK_END => we must revalidate the cached file length */ 159 /* origin == SEEK_END => we must revalidate the cached file length */
160 if (origin == 2) { 160 if (origin == SEEK_END) {
161 struct inode *inode = filp->f_mapping->host; 161 struct inode *inode = filp->f_mapping->host;
162 int retval = nfs_revalidate_file_size(inode, filp); 162 int retval = nfs_revalidate_file_size(inode, filp);
163 if (retval < 0) 163 if (retval < 0)
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
new file mode 100644
index 000000000000..76b08ae9ed82
--- /dev/null
+++ b/fs/nfs/getroot.c
@@ -0,0 +1,311 @@
1/* getroot.c: get the root dentry for an NFS mount
2 *
3 * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/config.h>
13#include <linux/module.h>
14#include <linux/init.h>
15
16#include <linux/time.h>
17#include <linux/kernel.h>
18#include <linux/mm.h>
19#include <linux/string.h>
20#include <linux/stat.h>
21#include <linux/errno.h>
22#include <linux/unistd.h>
23#include <linux/sunrpc/clnt.h>
24#include <linux/sunrpc/stats.h>
25#include <linux/nfs_fs.h>
26#include <linux/nfs_mount.h>
27#include <linux/nfs4_mount.h>
28#include <linux/lockd/bind.h>
29#include <linux/smp_lock.h>
30#include <linux/seq_file.h>
31#include <linux/mount.h>
32#include <linux/nfs_idmap.h>
33#include <linux/vfs.h>
34#include <linux/namei.h>
35#include <linux/namespace.h>
36#include <linux/security.h>
37
38#include <asm/system.h>
39#include <asm/uaccess.h>
40
41#include "nfs4_fs.h"
42#include "delegation.h"
43#include "internal.h"
44
45#define NFSDBG_FACILITY NFSDBG_CLIENT
46#define NFS_PARANOIA 1
47
48/*
49 * get an NFS2/NFS3 root dentry from the root filehandle
50 */
51struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh)
52{
53 struct nfs_server *server = NFS_SB(sb);
54 struct nfs_fsinfo fsinfo;
55 struct nfs_fattr fattr;
56 struct dentry *mntroot;
57 struct inode *inode;
58 int error;
59
60 /* create a dummy root dentry with dummy inode for this superblock */
61 if (!sb->s_root) {
62 struct nfs_fh dummyfh;
63 struct dentry *root;
64 struct inode *iroot;
65
66 memset(&dummyfh, 0, sizeof(dummyfh));
67 memset(&fattr, 0, sizeof(fattr));
68 nfs_fattr_init(&fattr);
69 fattr.valid = NFS_ATTR_FATTR;
70 fattr.type = NFDIR;
71 fattr.mode = S_IFDIR | S_IRUSR | S_IWUSR;
72 fattr.nlink = 2;
73
74 iroot = nfs_fhget(sb, &dummyfh, &fattr);
75 if (IS_ERR(iroot))
76 return ERR_PTR(PTR_ERR(iroot));
77
78 root = d_alloc_root(iroot);
79 if (!root) {
80 iput(iroot);
81 return ERR_PTR(-ENOMEM);
82 }
83
84 sb->s_root = root;
85 }
86
87 /* get the actual root for this mount */
88 fsinfo.fattr = &fattr;
89
90 error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
91 if (error < 0) {
92 dprintk("nfs_get_root: getattr error = %d\n", -error);
93 return ERR_PTR(error);
94 }
95
96 inode = nfs_fhget(sb, mntfh, fsinfo.fattr);
97 if (IS_ERR(inode)) {
98 dprintk("nfs_get_root: get root inode failed\n");
99 return ERR_PTR(PTR_ERR(inode));
100 }
101
102 /* root dentries normally start off anonymous and get spliced in later
103 * if the dentry tree reaches them; however if the dentry already
104 * exists, we'll pick it up at this point and use it as the root
105 */
106 mntroot = d_alloc_anon(inode);
107 if (!mntroot) {
108 iput(inode);
109 dprintk("nfs_get_root: get root dentry failed\n");
110 return ERR_PTR(-ENOMEM);
111 }
112
113 security_d_instantiate(mntroot, inode);
114
115 if (!mntroot->d_op)
116 mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops;
117
118 return mntroot;
119}
120
121#ifdef CONFIG_NFS_V4
122
123/*
124 * Do a simple pathwalk from the root FH of the server to the nominated target
125 * of the mountpoint
126 * - give error on symlinks
127 * - give error on ".." occurring in the path
128 * - follow traversals
129 */
130int nfs4_path_walk(struct nfs_server *server,
131 struct nfs_fh *mntfh,
132 const char *path)
133{
134 struct nfs_fsinfo fsinfo;
135 struct nfs_fattr fattr;
136 struct nfs_fh lastfh;
137 struct qstr name;
138 int ret;
139 //int referral_count = 0;
140
141 dprintk("--> nfs4_path_walk(,,%s)\n", path);
142
143 fsinfo.fattr = &fattr;
144 nfs_fattr_init(&fattr);
145
146 if (*path++ != '/') {
147 dprintk("nfs4_get_root: Path does not begin with a slash\n");
148 return -EINVAL;
149 }
150
151 /* Start by getting the root filehandle from the server */
152 ret = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
153 if (ret < 0) {
154 dprintk("nfs4_get_root: getroot error = %d\n", -ret);
155 return ret;
156 }
157
158 if (fattr.type != NFDIR) {
159 printk(KERN_ERR "nfs4_get_root:"
160 " getroot encountered non-directory\n");
161 return -ENOTDIR;
162 }
163
164 if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) {
165 printk(KERN_ERR "nfs4_get_root:"
166 " getroot obtained referral\n");
167 return -EREMOTE;
168 }
169
170next_component:
171 dprintk("Next: %s\n", path);
172
173 /* extract the next bit of the path */
174 if (!*path)
175 goto path_walk_complete;
176
177 name.name = path;
178 while (*path && *path != '/')
179 path++;
180 name.len = path - (const char *) name.name;
181
182eat_dot_dir:
183 while (*path == '/')
184 path++;
185
186 if (path[0] == '.' && (path[1] == '/' || !path[1])) {
187 path += 2;
188 goto eat_dot_dir;
189 }
190
191 if (path[0] == '.' && path[1] == '.' && (path[2] == '/' || !path[2])
192 ) {
193 printk(KERN_ERR "nfs4_get_root:"
194 " Mount path contains reference to \"..\"\n");
195 return -EINVAL;
196 }
197
198 /* lookup the next FH in the sequence */
199 memcpy(&lastfh, mntfh, sizeof(lastfh));
200
201 dprintk("LookupFH: %*.*s [%s]\n", name.len, name.len, name.name, path);
202
203 ret = server->nfs_client->rpc_ops->lookupfh(server, &lastfh, &name,
204 mntfh, &fattr);
205 if (ret < 0) {
206 dprintk("nfs4_get_root: getroot error = %d\n", -ret);
207 return ret;
208 }
209
210 if (fattr.type != NFDIR) {
211 printk(KERN_ERR "nfs4_get_root:"
212 " lookupfh encountered non-directory\n");
213 return -ENOTDIR;
214 }
215
216 if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) {
217 printk(KERN_ERR "nfs4_get_root:"
218 " lookupfh obtained referral\n");
219 return -EREMOTE;
220 }
221
222 goto next_component;
223
224path_walk_complete:
225 memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid));
226 dprintk("<-- nfs4_path_walk() = 0\n");
227 return 0;
228}
229
230/*
231 * get an NFS4 root dentry from the root filehandle
232 */
233struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
234{
235 struct nfs_server *server = NFS_SB(sb);
236 struct nfs_fattr fattr;
237 struct dentry *mntroot;
238 struct inode *inode;
239 int error;
240
241 dprintk("--> nfs4_get_root()\n");
242
243 /* create a dummy root dentry with dummy inode for this superblock */
244 if (!sb->s_root) {
245 struct nfs_fh dummyfh;
246 struct dentry *root;
247 struct inode *iroot;
248
249 memset(&dummyfh, 0, sizeof(dummyfh));
250 memset(&fattr, 0, sizeof(fattr));
251 nfs_fattr_init(&fattr);
252 fattr.valid = NFS_ATTR_FATTR;
253 fattr.type = NFDIR;
254 fattr.mode = S_IFDIR | S_IRUSR | S_IWUSR;
255 fattr.nlink = 2;
256
257 iroot = nfs_fhget(sb, &dummyfh, &fattr);
258 if (IS_ERR(iroot))
259 return ERR_PTR(PTR_ERR(iroot));
260
261 root = d_alloc_root(iroot);
262 if (!root) {
263 iput(iroot);
264 return ERR_PTR(-ENOMEM);
265 }
266
267 sb->s_root = root;
268 }
269
270 /* get the info about the server and filesystem */
271 error = nfs4_server_capabilities(server, mntfh);
272 if (error < 0) {
273 dprintk("nfs_get_root: getcaps error = %d\n",
274 -error);
275 return ERR_PTR(error);
276 }
277
278 /* get the actual root for this mount */
279 error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr);
280 if (error < 0) {
281 dprintk("nfs_get_root: getattr error = %d\n", -error);
282 return ERR_PTR(error);
283 }
284
285 inode = nfs_fhget(sb, mntfh, &fattr);
286 if (IS_ERR(inode)) {
287 dprintk("nfs_get_root: get root inode failed\n");
288 return ERR_PTR(PTR_ERR(inode));
289 }
290
291 /* root dentries normally start off anonymous and get spliced in later
292 * if the dentry tree reaches them; however if the dentry already
293 * exists, we'll pick it up at this point and use it as the root
294 */
295 mntroot = d_alloc_anon(inode);
296 if (!mntroot) {
297 iput(inode);
298 dprintk("nfs_get_root: get root dentry failed\n");
299 return ERR_PTR(-ENOMEM);
300 }
301
302 security_d_instantiate(mntroot, inode);
303
304 if (!mntroot->d_op)
305 mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops;
306
307 dprintk("<-- nfs4_get_root()\n");
308 return mntroot;
309}
310
311#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 07a5dd57646e..82ad7110a1c0 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -57,6 +57,20 @@
57/* Default cache timeout is 10 minutes */ 57/* Default cache timeout is 10 minutes */
58unsigned int nfs_idmap_cache_timeout = 600 * HZ; 58unsigned int nfs_idmap_cache_timeout = 600 * HZ;
59 59
60static int param_set_idmap_timeout(const char *val, struct kernel_param *kp)
61{
62 char *endp;
63 int num = simple_strtol(val, &endp, 0);
64 int jif = num * HZ;
65 if (endp == val || *endp || num < 0 || jif < num)
66 return -EINVAL;
67 *((int *)kp->arg) = jif;
68 return 0;
69}
70
71module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int,
72 &nfs_idmap_cache_timeout, 0644);
73
60struct idmap_hashent { 74struct idmap_hashent {
61 unsigned long ih_expires; 75 unsigned long ih_expires;
62 __u32 ih_id; 76 __u32 ih_id;
@@ -70,7 +84,6 @@ struct idmap_hashtable {
70}; 84};
71 85
72struct idmap { 86struct idmap {
73 char idmap_path[48];
74 struct dentry *idmap_dentry; 87 struct dentry *idmap_dentry;
75 wait_queue_head_t idmap_wq; 88 wait_queue_head_t idmap_wq;
76 struct idmap_msg idmap_im; 89 struct idmap_msg idmap_im;
@@ -94,24 +107,23 @@ static struct rpc_pipe_ops idmap_upcall_ops = {
94 .destroy_msg = idmap_pipe_destroy_msg, 107 .destroy_msg = idmap_pipe_destroy_msg,
95}; 108};
96 109
97void 110int
98nfs_idmap_new(struct nfs4_client *clp) 111nfs_idmap_new(struct nfs_client *clp)
99{ 112{
100 struct idmap *idmap; 113 struct idmap *idmap;
114 int error;
101 115
102 if (clp->cl_idmap != NULL) 116 BUG_ON(clp->cl_idmap != NULL);
103 return;
104 if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
105 return;
106 117
107 snprintf(idmap->idmap_path, sizeof(idmap->idmap_path), 118 if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
108 "%s/idmap", clp->cl_rpcclient->cl_pathname); 119 return -ENOMEM;
109 120
110 idmap->idmap_dentry = rpc_mkpipe(idmap->idmap_path, 121 idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_dentry, "idmap",
111 idmap, &idmap_upcall_ops, 0); 122 idmap, &idmap_upcall_ops, 0);
112 if (IS_ERR(idmap->idmap_dentry)) { 123 if (IS_ERR(idmap->idmap_dentry)) {
124 error = PTR_ERR(idmap->idmap_dentry);
113 kfree(idmap); 125 kfree(idmap);
114 return; 126 return error;
115 } 127 }
116 128
117 mutex_init(&idmap->idmap_lock); 129 mutex_init(&idmap->idmap_lock);
@@ -121,10 +133,11 @@ nfs_idmap_new(struct nfs4_client *clp)
121 idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP; 133 idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP;
122 134
123 clp->cl_idmap = idmap; 135 clp->cl_idmap = idmap;
136 return 0;
124} 137}
125 138
126void 139void
127nfs_idmap_delete(struct nfs4_client *clp) 140nfs_idmap_delete(struct nfs_client *clp)
128{ 141{
129 struct idmap *idmap = clp->cl_idmap; 142 struct idmap *idmap = clp->cl_idmap;
130 143
@@ -477,27 +490,27 @@ static unsigned int fnvhash32(const void *buf, size_t buflen)
477 return (hash); 490 return (hash);
478} 491}
479 492
480int nfs_map_name_to_uid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid) 493int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid)
481{ 494{
482 struct idmap *idmap = clp->cl_idmap; 495 struct idmap *idmap = clp->cl_idmap;
483 496
484 return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid); 497 return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid);
485} 498}
486 499
487int nfs_map_group_to_gid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid) 500int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid)
488{ 501{
489 struct idmap *idmap = clp->cl_idmap; 502 struct idmap *idmap = clp->cl_idmap;
490 503
491 return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); 504 return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid);
492} 505}
493 506
494int nfs_map_uid_to_name(struct nfs4_client *clp, __u32 uid, char *buf) 507int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf)
495{ 508{
496 struct idmap *idmap = clp->cl_idmap; 509 struct idmap *idmap = clp->cl_idmap;
497 510
498 return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); 511 return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
499} 512}
500int nfs_map_gid_to_group(struct nfs4_client *clp, __u32 uid, char *buf) 513int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf)
501{ 514{
502 struct idmap *idmap = clp->cl_idmap; 515 struct idmap *idmap = clp->cl_idmap;
503 516
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index d349fb2245da..bc9376ca86cd 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -76,19 +76,14 @@ int nfs_write_inode(struct inode *inode, int sync)
76 76
77void nfs_clear_inode(struct inode *inode) 77void nfs_clear_inode(struct inode *inode)
78{ 78{
79 struct nfs_inode *nfsi = NFS_I(inode);
80 struct rpc_cred *cred;
81
82 /* 79 /*
83 * The following should never happen... 80 * The following should never happen...
84 */ 81 */
85 BUG_ON(nfs_have_writebacks(inode)); 82 BUG_ON(nfs_have_writebacks(inode));
86 BUG_ON (!list_empty(&nfsi->open_files)); 83 BUG_ON(!list_empty(&NFS_I(inode)->open_files));
84 BUG_ON(atomic_read(&NFS_I(inode)->data_updates) != 0);
87 nfs_zap_acl_cache(inode); 85 nfs_zap_acl_cache(inode);
88 cred = nfsi->cache_access.cred; 86 nfs_access_zap_cache(inode);
89 if (cred)
90 put_rpccred(cred);
91 BUG_ON(atomic_read(&nfsi->data_updates) != 0);
92} 87}
93 88
94/** 89/**
@@ -242,13 +237,13 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
242 /* Why so? Because we want revalidate for devices/FIFOs, and 237 /* Why so? Because we want revalidate for devices/FIFOs, and
243 * that's precisely what we have in nfs_file_inode_operations. 238 * that's precisely what we have in nfs_file_inode_operations.
244 */ 239 */
245 inode->i_op = NFS_SB(sb)->rpc_ops->file_inode_ops; 240 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops;
246 if (S_ISREG(inode->i_mode)) { 241 if (S_ISREG(inode->i_mode)) {
247 inode->i_fop = &nfs_file_operations; 242 inode->i_fop = &nfs_file_operations;
248 inode->i_data.a_ops = &nfs_file_aops; 243 inode->i_data.a_ops = &nfs_file_aops;
249 inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info; 244 inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info;
250 } else if (S_ISDIR(inode->i_mode)) { 245 } else if (S_ISDIR(inode->i_mode)) {
251 inode->i_op = NFS_SB(sb)->rpc_ops->dir_inode_ops; 246 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
252 inode->i_fop = &nfs_dir_operations; 247 inode->i_fop = &nfs_dir_operations;
253 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) 248 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)
254 && fattr->size <= NFS_LIMIT_READDIRPLUS) 249 && fattr->size <= NFS_LIMIT_READDIRPLUS)
@@ -282,15 +277,13 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
282 * report the blocks in 512byte units 277 * report the blocks in 512byte units
283 */ 278 */
284 inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); 279 inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
285 inode->i_blksize = inode->i_sb->s_blocksize;
286 } else { 280 } else {
287 inode->i_blocks = fattr->du.nfs2.blocks; 281 inode->i_blocks = fattr->du.nfs2.blocks;
288 inode->i_blksize = fattr->du.nfs2.blocksize;
289 } 282 }
290 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); 283 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
291 nfsi->attrtimeo_timestamp = jiffies; 284 nfsi->attrtimeo_timestamp = jiffies;
292 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); 285 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
293 nfsi->cache_access.cred = NULL; 286 nfsi->access_cache = RB_ROOT;
294 287
295 unlock_new_inode(inode); 288 unlock_new_inode(inode);
296 } else 289 } else
@@ -448,7 +441,7 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str
448{ 441{
449 struct nfs_open_context *ctx; 442 struct nfs_open_context *ctx;
450 443
451 ctx = (struct nfs_open_context *)kmalloc(sizeof(*ctx), GFP_KERNEL); 444 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
452 if (ctx != NULL) { 445 if (ctx != NULL) {
453 atomic_set(&ctx->count, 1); 446 atomic_set(&ctx->count, 1);
454 ctx->dentry = dget(dentry); 447 ctx->dentry = dget(dentry);
@@ -722,13 +715,11 @@ void nfs_end_data_update(struct inode *inode)
722{ 715{
723 struct nfs_inode *nfsi = NFS_I(inode); 716 struct nfs_inode *nfsi = NFS_I(inode);
724 717
725 if (!nfs_have_delegation(inode, FMODE_READ)) { 718 /* Directories: invalidate page cache */
726 /* Directories and symlinks: invalidate page cache */ 719 if (S_ISDIR(inode->i_mode)) {
727 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) { 720 spin_lock(&inode->i_lock);
728 spin_lock(&inode->i_lock); 721 nfsi->cache_validity |= NFS_INO_INVALID_DATA;
729 nfsi->cache_validity |= NFS_INO_INVALID_DATA; 722 spin_unlock(&inode->i_lock);
730 spin_unlock(&inode->i_lock);
731 }
732 } 723 }
733 nfsi->cache_change_attribute = jiffies; 724 nfsi->cache_change_attribute = jiffies;
734 atomic_dec(&nfsi->data_updates); 725 atomic_dec(&nfsi->data_updates);
@@ -847,6 +838,12 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
847 * 838 *
848 * After an operation that has changed the inode metadata, mark the 839 * After an operation that has changed the inode metadata, mark the
849 * attribute cache as being invalid, then try to update it. 840 * attribute cache as being invalid, then try to update it.
841 *
842 * NB: if the server didn't return any post op attributes, this
843 * function will force the retrieval of attributes before the next
844 * NFS request. Thus it should be used only for operations that
845 * are expected to change one or more attributes, to avoid
846 * unnecessary NFS requests and trips through nfs_update_inode().
850 */ 847 */
851int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) 848int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
852{ 849{
@@ -970,10 +967,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
970 * report the blocks in 512byte units 967 * report the blocks in 512byte units
971 */ 968 */
972 inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); 969 inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
973 inode->i_blksize = inode->i_sb->s_blocksize;
974 } else { 970 } else {
975 inode->i_blocks = fattr->du.nfs2.blocks; 971 inode->i_blocks = fattr->du.nfs2.blocks;
976 inode->i_blksize = fattr->du.nfs2.blocksize;
977 } 972 }
978 973
979 if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && 974 if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
@@ -1025,7 +1020,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1025 out_fileid: 1020 out_fileid:
1026 printk(KERN_ERR "NFS: server %s error: fileid changed\n" 1021 printk(KERN_ERR "NFS: server %s error: fileid changed\n"
1027 "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n", 1022 "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n",
1028 NFS_SERVER(inode)->hostname, inode->i_sb->s_id, 1023 NFS_SERVER(inode)->nfs_client->cl_hostname, inode->i_sb->s_id,
1029 (long long)nfsi->fileid, (long long)fattr->fileid); 1024 (long long)nfsi->fileid, (long long)fattr->fileid);
1030 goto out_err; 1025 goto out_err;
1031} 1026}
@@ -1109,6 +1104,8 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
1109 INIT_LIST_HEAD(&nfsi->dirty); 1104 INIT_LIST_HEAD(&nfsi->dirty);
1110 INIT_LIST_HEAD(&nfsi->commit); 1105 INIT_LIST_HEAD(&nfsi->commit);
1111 INIT_LIST_HEAD(&nfsi->open_files); 1106 INIT_LIST_HEAD(&nfsi->open_files);
1107 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
1108 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
1112 INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); 1109 INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
1113 atomic_set(&nfsi->data_updates, 0); 1110 atomic_set(&nfsi->data_updates, 0);
1114 nfsi->ndirty = 0; 1111 nfsi->ndirty = 0;
@@ -1133,8 +1130,7 @@ static int __init nfs_init_inodecache(void)
1133 1130
1134static void nfs_destroy_inodecache(void) 1131static void nfs_destroy_inodecache(void)
1135{ 1132{
1136 if (kmem_cache_destroy(nfs_inode_cachep)) 1133 kmem_cache_destroy(nfs_inode_cachep);
1137 printk(KERN_INFO "nfs_inode_cache: not all structures were freed\n");
1138} 1134}
1139 1135
1140/* 1136/*
@@ -1144,6 +1140,10 @@ static int __init init_nfs_fs(void)
1144{ 1140{
1145 int err; 1141 int err;
1146 1142
1143 err = nfs_fs_proc_init();
1144 if (err)
1145 goto out5;
1146
1147 err = nfs_init_nfspagecache(); 1147 err = nfs_init_nfspagecache();
1148 if (err) 1148 if (err)
1149 goto out4; 1149 goto out4;
@@ -1184,6 +1184,8 @@ out2:
1184out3: 1184out3:
1185 nfs_destroy_nfspagecache(); 1185 nfs_destroy_nfspagecache();
1186out4: 1186out4:
1187 nfs_fs_proc_exit();
1188out5:
1187 return err; 1189 return err;
1188} 1190}
1189 1191
@@ -1198,6 +1200,7 @@ static void __exit exit_nfs_fs(void)
1198 rpc_proc_unregister("nfs"); 1200 rpc_proc_unregister("nfs");
1199#endif 1201#endif
1200 unregister_nfs_fs(); 1202 unregister_nfs_fs();
1203 nfs_fs_proc_exit();
1201} 1204}
1202 1205
1203/* Not quite true; I just maintain it */ 1206/* Not quite true; I just maintain it */
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index e4f4e5def0fc..bea0b016bd70 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -4,6 +4,18 @@
4 4
5#include <linux/mount.h> 5#include <linux/mount.h>
6 6
7struct nfs_string;
8struct nfs_mount_data;
9struct nfs4_mount_data;
10
11/* Maximum number of readahead requests
12 * FIXME: this should really be a sysctl so that users may tune it to suit
13 * their needs. People that do NFS over a slow network, might for
14 * instance want to reduce it to something closer to 1 for improved
15 * interactive response.
16 */
17#define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1)
18
7struct nfs_clone_mount { 19struct nfs_clone_mount {
8 const struct super_block *sb; 20 const struct super_block *sb;
9 const struct dentry *dentry; 21 const struct dentry *dentry;
@@ -15,7 +27,40 @@ struct nfs_clone_mount {
15 rpc_authflavor_t authflavor; 27 rpc_authflavor_t authflavor;
16}; 28};
17 29
18/* namespace-nfs4.c */ 30/* client.c */
31extern struct rpc_program nfs_program;
32
33extern void nfs_put_client(struct nfs_client *);
34extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int);
35extern struct nfs_server *nfs_create_server(const struct nfs_mount_data *,
36 struct nfs_fh *);
37extern struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *,
38 const char *,
39 const struct sockaddr_in *,
40 const char *,
41 const char *,
42 rpc_authflavor_t,
43 struct nfs_fh *);
44extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *,
45 struct nfs_fh *);
46extern void nfs_free_server(struct nfs_server *server);
47extern struct nfs_server *nfs_clone_server(struct nfs_server *,
48 struct nfs_fh *,
49 struct nfs_fattr *);
50#ifdef CONFIG_PROC_FS
51extern int __init nfs_fs_proc_init(void);
52extern void nfs_fs_proc_exit(void);
53#else
54static inline int nfs_fs_proc_init(void)
55{
56 return 0;
57}
58static inline void nfs_fs_proc_exit(void)
59{
60}
61#endif
62
63/* nfs4namespace.c */
19#ifdef CONFIG_NFS_V4 64#ifdef CONFIG_NFS_V4
20extern struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry); 65extern struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry);
21#else 66#else
@@ -46,6 +91,7 @@ extern void nfs_destroy_directcache(void);
46#endif 91#endif
47 92
48/* nfs2xdr.c */ 93/* nfs2xdr.c */
94extern int nfs_stat_to_errno(int);
49extern struct rpc_procinfo nfs_procedures[]; 95extern struct rpc_procinfo nfs_procedures[];
50extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int); 96extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
51 97
@@ -54,8 +100,9 @@ extern struct rpc_procinfo nfs3_procedures[];
54extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int); 100extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
55 101
56/* nfs4xdr.c */ 102/* nfs4xdr.c */
57extern int nfs_stat_to_errno(int); 103#ifdef CONFIG_NFS_V4
58extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); 104extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus);
105#endif
59 106
60/* nfs4proc.c */ 107/* nfs4proc.c */
61#ifdef CONFIG_NFS_V4 108#ifdef CONFIG_NFS_V4
@@ -66,6 +113,9 @@ extern int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry,
66 struct page *page); 113 struct page *page);
67#endif 114#endif
68 115
116/* dir.c */
117extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask);
118
69/* inode.c */ 119/* inode.c */
70extern struct inode *nfs_alloc_inode(struct super_block *sb); 120extern struct inode *nfs_alloc_inode(struct super_block *sb);
71extern void nfs_destroy_inode(struct inode *); 121extern void nfs_destroy_inode(struct inode *);
@@ -76,10 +126,10 @@ extern void nfs4_clear_inode(struct inode *);
76#endif 126#endif
77 127
78/* super.c */ 128/* super.c */
79extern struct file_system_type nfs_referral_nfs4_fs_type; 129extern struct file_system_type nfs_xdev_fs_type;
80extern struct file_system_type clone_nfs_fs_type;
81#ifdef CONFIG_NFS_V4 130#ifdef CONFIG_NFS_V4
82extern struct file_system_type clone_nfs4_fs_type; 131extern struct file_system_type nfs4_xdev_fs_type;
132extern struct file_system_type nfs4_referral_fs_type;
83#endif 133#endif
84 134
85extern struct rpc_stat nfs_rpcstat; 135extern struct rpc_stat nfs_rpcstat;
@@ -88,30 +138,30 @@ extern int __init register_nfs_fs(void);
88extern void __exit unregister_nfs_fs(void); 138extern void __exit unregister_nfs_fs(void);
89 139
90/* namespace.c */ 140/* namespace.c */
91extern char *nfs_path(const char *base, const struct dentry *dentry, 141extern char *nfs_path(const char *base,
142 const struct dentry *droot,
143 const struct dentry *dentry,
92 char *buffer, ssize_t buflen); 144 char *buffer, ssize_t buflen);
93 145
94/* 146/* getroot.c */
95 * Determine the mount path as a string 147extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *);
96 */
97static inline char *
98nfs4_path(const struct dentry *dentry, char *buffer, ssize_t buflen)
99{
100#ifdef CONFIG_NFS_V4 148#ifdef CONFIG_NFS_V4
101 return nfs_path(NFS_SB(dentry->d_sb)->mnt_path, dentry, buffer, buflen); 149extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *);
102#else 150
103 return NULL; 151extern int nfs4_path_walk(struct nfs_server *server,
152 struct nfs_fh *mntfh,
153 const char *path);
104#endif 154#endif
105}
106 155
107/* 156/*
108 * Determine the device name as a string 157 * Determine the device name as a string
109 */ 158 */
110static inline char *nfs_devname(const struct vfsmount *mnt_parent, 159static inline char *nfs_devname(const struct vfsmount *mnt_parent,
111 const struct dentry *dentry, 160 const struct dentry *dentry,
112 char *buffer, ssize_t buflen) 161 char *buffer, ssize_t buflen)
113{ 162{
114 return nfs_path(mnt_parent->mnt_devname, dentry, buffer, buflen); 163 return nfs_path(mnt_parent->mnt_devname, mnt_parent->mnt_root,
164 dentry, buffer, buflen);
115} 165}
116 166
117/* 167/*
@@ -167,20 +217,3 @@ void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize)
167 if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0) 217 if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0)
168 sb->s_maxbytes = MAX_LFS_FILESIZE; 218 sb->s_maxbytes = MAX_LFS_FILESIZE;
169} 219}
170
171/*
172 * Check if the string represents a "valid" IPv4 address
173 */
174static inline int valid_ipaddr4(const char *buf)
175{
176 int rc, count, in[4];
177
178 rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]);
179 if (rc != 4)
180 return -EINVAL;
181 for (count = 0; count < 4; count++) {
182 if (in[count] > 255)
183 return -EINVAL;
184 }
185 return 0;
186}
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 445abb4d4214..d507b021207f 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -14,7 +14,6 @@
14#include <linux/net.h> 14#include <linux/net.h>
15#include <linux/in.h> 15#include <linux/in.h>
16#include <linux/sunrpc/clnt.h> 16#include <linux/sunrpc/clnt.h>
17#include <linux/sunrpc/xprt.h>
18#include <linux/sunrpc/sched.h> 17#include <linux/sunrpc/sched.h>
19#include <linux/nfs_fs.h> 18#include <linux/nfs_fs.h>
20 19
@@ -77,22 +76,19 @@ static struct rpc_clnt *
77mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version, 76mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version,
78 int protocol) 77 int protocol)
79{ 78{
80 struct rpc_xprt *xprt; 79 struct rpc_create_args args = {
81 struct rpc_clnt *clnt; 80 .protocol = protocol,
82 81 .address = (struct sockaddr *)srvaddr,
83 xprt = xprt_create_proto(protocol, srvaddr, NULL); 82 .addrsize = sizeof(*srvaddr),
84 if (IS_ERR(xprt)) 83 .servername = hostname,
85 return (struct rpc_clnt *)xprt; 84 .program = &mnt_program,
86 85 .version = version,
87 clnt = rpc_create_client(xprt, hostname, 86 .authflavor = RPC_AUTH_UNIX,
88 &mnt_program, version, 87 .flags = (RPC_CLNT_CREATE_ONESHOT |
89 RPC_AUTH_UNIX); 88 RPC_CLNT_CREATE_INTR),
90 if (!IS_ERR(clnt)) { 89 };
91 clnt->cl_softrtry = 1; 90
92 clnt->cl_oneshot = 1; 91 return rpc_create(&args);
93 clnt->cl_intr = 1;
94 }
95 return clnt;
96} 92}
97 93
98/* 94/*
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 86b3169c8cac..60408646176b 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -2,6 +2,7 @@
2 * linux/fs/nfs/namespace.c 2 * linux/fs/nfs/namespace.c
3 * 3 *
4 * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com> 4 * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com>
5 * - Modified by David Howells <dhowells@redhat.com>
5 * 6 *
6 * NFS namespace 7 * NFS namespace
7 */ 8 */
@@ -25,9 +26,15 @@ LIST_HEAD(nfs_automount_list);
25static DECLARE_WORK(nfs_automount_task, nfs_expire_automounts, &nfs_automount_list); 26static DECLARE_WORK(nfs_automount_task, nfs_expire_automounts, &nfs_automount_list);
26int nfs_mountpoint_expiry_timeout = 500 * HZ; 27int nfs_mountpoint_expiry_timeout = 500 * HZ;
27 28
29static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
30 const struct dentry *dentry,
31 struct nfs_fh *fh,
32 struct nfs_fattr *fattr);
33
28/* 34/*
29 * nfs_path - reconstruct the path given an arbitrary dentry 35 * nfs_path - reconstruct the path given an arbitrary dentry
30 * @base - arbitrary string to prepend to the path 36 * @base - arbitrary string to prepend to the path
37 * @droot - pointer to root dentry for mountpoint
31 * @dentry - pointer to dentry 38 * @dentry - pointer to dentry
32 * @buffer - result buffer 39 * @buffer - result buffer
33 * @buflen - length of buffer 40 * @buflen - length of buffer
@@ -38,7 +45,9 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ;
38 * This is mainly for use in figuring out the path on the 45 * This is mainly for use in figuring out the path on the
39 * server side when automounting on top of an existing partition. 46 * server side when automounting on top of an existing partition.
40 */ 47 */
41char *nfs_path(const char *base, const struct dentry *dentry, 48char *nfs_path(const char *base,
49 const struct dentry *droot,
50 const struct dentry *dentry,
42 char *buffer, ssize_t buflen) 51 char *buffer, ssize_t buflen)
43{ 52{
44 char *end = buffer+buflen; 53 char *end = buffer+buflen;
@@ -47,7 +56,7 @@ char *nfs_path(const char *base, const struct dentry *dentry,
47 *--end = '\0'; 56 *--end = '\0';
48 buflen--; 57 buflen--;
49 spin_lock(&dcache_lock); 58 spin_lock(&dcache_lock);
50 while (!IS_ROOT(dentry)) { 59 while (!IS_ROOT(dentry) && dentry != droot) {
51 namelen = dentry->d_name.len; 60 namelen = dentry->d_name.len;
52 buflen -= namelen + 1; 61 buflen -= namelen + 1;
53 if (buflen < 0) 62 if (buflen < 0)
@@ -96,15 +105,18 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
96 struct nfs_fattr fattr; 105 struct nfs_fattr fattr;
97 int err; 106 int err;
98 107
108 dprintk("--> nfs_follow_mountpoint()\n");
109
99 BUG_ON(IS_ROOT(dentry)); 110 BUG_ON(IS_ROOT(dentry));
100 dprintk("%s: enter\n", __FUNCTION__); 111 dprintk("%s: enter\n", __FUNCTION__);
101 dput(nd->dentry); 112 dput(nd->dentry);
102 nd->dentry = dget(dentry); 113 nd->dentry = dget(dentry);
103 if (d_mountpoint(nd->dentry)) 114
104 goto out_follow;
105 /* Look it up again */ 115 /* Look it up again */
106 parent = dget_parent(nd->dentry); 116 parent = dget_parent(nd->dentry);
107 err = server->rpc_ops->lookup(parent->d_inode, &nd->dentry->d_name, &fh, &fattr); 117 err = server->nfs_client->rpc_ops->lookup(parent->d_inode,
118 &nd->dentry->d_name,
119 &fh, &fattr);
108 dput(parent); 120 dput(parent);
109 if (err != 0) 121 if (err != 0)
110 goto out_err; 122 goto out_err;
@@ -132,6 +144,8 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
132 schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); 144 schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
133out: 145out:
134 dprintk("%s: done, returned %d\n", __FUNCTION__, err); 146 dprintk("%s: done, returned %d\n", __FUNCTION__, err);
147
148 dprintk("<-- nfs_follow_mountpoint() = %d\n", err);
135 return ERR_PTR(err); 149 return ERR_PTR(err);
136out_err: 150out_err:
137 path_release(nd); 151 path_release(nd);
@@ -172,22 +186,23 @@ void nfs_release_automount_timer(void)
172/* 186/*
173 * Clone a mountpoint of the appropriate type 187 * Clone a mountpoint of the appropriate type
174 */ 188 */
175static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, char *devname, 189static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
190 const char *devname,
176 struct nfs_clone_mount *mountdata) 191 struct nfs_clone_mount *mountdata)
177{ 192{
178#ifdef CONFIG_NFS_V4 193#ifdef CONFIG_NFS_V4
179 struct vfsmount *mnt = NULL; 194 struct vfsmount *mnt = NULL;
180 switch (server->rpc_ops->version) { 195 switch (server->nfs_client->cl_nfsversion) {
181 case 2: 196 case 2:
182 case 3: 197 case 3:
183 mnt = vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata); 198 mnt = vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata);
184 break; 199 break;
185 case 4: 200 case 4:
186 mnt = vfs_kern_mount(&clone_nfs4_fs_type, 0, devname, mountdata); 201 mnt = vfs_kern_mount(&nfs4_xdev_fs_type, 0, devname, mountdata);
187 } 202 }
188 return mnt; 203 return mnt;
189#else 204#else
190 return vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata); 205 return vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata);
191#endif 206#endif
192} 207}
193 208
@@ -199,9 +214,10 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, char *devn
199 * @fattr - attributes for new root inode 214 * @fattr - attributes for new root inode
200 * 215 *
201 */ 216 */
202struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent, 217static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
203 const struct dentry *dentry, struct nfs_fh *fh, 218 const struct dentry *dentry,
204 struct nfs_fattr *fattr) 219 struct nfs_fh *fh,
220 struct nfs_fattr *fattr)
205{ 221{
206 struct nfs_clone_mount mountdata = { 222 struct nfs_clone_mount mountdata = {
207 .sb = mnt_parent->mnt_sb, 223 .sb = mnt_parent->mnt_sb,
@@ -213,6 +229,8 @@ struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
213 char *page = (char *) __get_free_page(GFP_USER); 229 char *page = (char *) __get_free_page(GFP_USER);
214 char *devname; 230 char *devname;
215 231
232 dprintk("--> nfs_do_submount()\n");
233
216 dprintk("%s: submounting on %s/%s\n", __FUNCTION__, 234 dprintk("%s: submounting on %s/%s\n", __FUNCTION__,
217 dentry->d_parent->d_name.name, 235 dentry->d_parent->d_name.name,
218 dentry->d_name.name); 236 dentry->d_name.name);
@@ -227,5 +245,7 @@ free_page:
227 free_page((unsigned long)page); 245 free_page((unsigned long)page);
228out: 246out:
229 dprintk("%s: done\n", __FUNCTION__); 247 dprintk("%s: done\n", __FUNCTION__);
248
249 dprintk("<-- nfs_do_submount() = %p\n", mnt);
230 return mnt; 250 return mnt;
231} 251}
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 67391eef6b93..b49501fc0a79 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -51,7 +51,7 @@
51#define NFS_createargs_sz (NFS_diropargs_sz+NFS_sattr_sz) 51#define NFS_createargs_sz (NFS_diropargs_sz+NFS_sattr_sz)
52#define NFS_renameargs_sz (NFS_diropargs_sz+NFS_diropargs_sz) 52#define NFS_renameargs_sz (NFS_diropargs_sz+NFS_diropargs_sz)
53#define NFS_linkargs_sz (NFS_fhandle_sz+NFS_diropargs_sz) 53#define NFS_linkargs_sz (NFS_fhandle_sz+NFS_diropargs_sz)
54#define NFS_symlinkargs_sz (NFS_diropargs_sz+NFS_path_sz+NFS_sattr_sz) 54#define NFS_symlinkargs_sz (NFS_diropargs_sz+1+NFS_sattr_sz)
55#define NFS_readdirargs_sz (NFS_fhandle_sz+2) 55#define NFS_readdirargs_sz (NFS_fhandle_sz+2)
56 56
57#define NFS_attrstat_sz (1+NFS_fattr_sz) 57#define NFS_attrstat_sz (1+NFS_fattr_sz)
@@ -351,11 +351,26 @@ nfs_xdr_linkargs(struct rpc_rqst *req, u32 *p, struct nfs_linkargs *args)
351static int 351static int
352nfs_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_symlinkargs *args) 352nfs_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_symlinkargs *args)
353{ 353{
354 struct xdr_buf *sndbuf = &req->rq_snd_buf;
355 size_t pad;
356
354 p = xdr_encode_fhandle(p, args->fromfh); 357 p = xdr_encode_fhandle(p, args->fromfh);
355 p = xdr_encode_array(p, args->fromname, args->fromlen); 358 p = xdr_encode_array(p, args->fromname, args->fromlen);
356 p = xdr_encode_array(p, args->topath, args->tolen); 359 *p++ = htonl(args->pathlen);
360 sndbuf->len = xdr_adjust_iovec(sndbuf->head, p);
361
362 xdr_encode_pages(sndbuf, args->pages, 0, args->pathlen);
363
364 /*
365 * xdr_encode_pages may have added a few bytes to ensure the
366 * pathname ends on a 4-byte boundary. Start encoding the
367 * attributes after the pad bytes.
368 */
369 pad = sndbuf->tail->iov_len;
370 if (pad > 0)
371 p++;
357 p = xdr_encode_sattr(p, args->sattr); 372 p = xdr_encode_sattr(p, args->sattr);
358 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 373 sndbuf->len += xdr_adjust_iovec(sndbuf->tail, p) - pad;
359 return 0; 374 return 0;
360} 375}
361 376
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 7143b1f82cea..3b234d4601e7 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -81,7 +81,7 @@ do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle,
81} 81}
82 82
83/* 83/*
84 * Bare-bones access to getattr: this is for nfs_read_super. 84 * Bare-bones access to getattr: this is for nfs_get_root/nfs_get_sb
85 */ 85 */
86static int 86static int
87nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, 87nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
@@ -90,8 +90,8 @@ nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
90 int status; 90 int status;
91 91
92 status = do_proc_get_root(server->client, fhandle, info); 92 status = do_proc_get_root(server->client, fhandle, info);
93 if (status && server->client_sys != server->client) 93 if (status && server->nfs_client->cl_rpcclient != server->client)
94 status = do_proc_get_root(server->client_sys, fhandle, info); 94 status = do_proc_get_root(server->nfs_client->cl_rpcclient, fhandle, info);
95 return status; 95 return status;
96} 96}
97 97
@@ -449,7 +449,7 @@ nfs3_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir, struct qstr
449 struct nfs_fattr res; 449 struct nfs_fattr res;
450 } *ptr; 450 } *ptr;
451 451
452 ptr = (struct unlinkxdr *)kmalloc(sizeof(*ptr), GFP_KERNEL); 452 ptr = kmalloc(sizeof(*ptr), GFP_KERNEL);
453 if (!ptr) 453 if (!ptr)
454 return -ENOMEM; 454 return -ENOMEM;
455 ptr->arg.fh = NFS_FH(dir->d_inode); 455 ptr->arg.fh = NFS_FH(dir->d_inode);
@@ -544,23 +544,23 @@ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
544} 544}
545 545
546static int 546static int
547nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path, 547nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
548 struct iattr *sattr, struct nfs_fh *fhandle, 548 unsigned int len, struct iattr *sattr)
549 struct nfs_fattr *fattr)
550{ 549{
551 struct nfs_fattr dir_attr; 550 struct nfs_fh fhandle;
551 struct nfs_fattr fattr, dir_attr;
552 struct nfs3_symlinkargs arg = { 552 struct nfs3_symlinkargs arg = {
553 .fromfh = NFS_FH(dir), 553 .fromfh = NFS_FH(dir),
554 .fromname = name->name, 554 .fromname = dentry->d_name.name,
555 .fromlen = name->len, 555 .fromlen = dentry->d_name.len,
556 .topath = path->name, 556 .pages = &page,
557 .tolen = path->len, 557 .pathlen = len,
558 .sattr = sattr 558 .sattr = sattr
559 }; 559 };
560 struct nfs3_diropres res = { 560 struct nfs3_diropres res = {
561 .dir_attr = &dir_attr, 561 .dir_attr = &dir_attr,
562 .fh = fhandle, 562 .fh = &fhandle,
563 .fattr = fattr 563 .fattr = &fattr
564 }; 564 };
565 struct rpc_message msg = { 565 struct rpc_message msg = {
566 .rpc_proc = &nfs3_procedures[NFS3PROC_SYMLINK], 566 .rpc_proc = &nfs3_procedures[NFS3PROC_SYMLINK],
@@ -569,13 +569,19 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
569 }; 569 };
570 int status; 570 int status;
571 571
572 if (path->len > NFS3_MAXPATHLEN) 572 if (len > NFS3_MAXPATHLEN)
573 return -ENAMETOOLONG; 573 return -ENAMETOOLONG;
574 dprintk("NFS call symlink %s -> %s\n", name->name, path->name); 574
575 dprintk("NFS call symlink %s\n", dentry->d_name.name);
576
575 nfs_fattr_init(&dir_attr); 577 nfs_fattr_init(&dir_attr);
576 nfs_fattr_init(fattr); 578 nfs_fattr_init(&fattr);
577 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 579 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
578 nfs_post_op_update_inode(dir, &dir_attr); 580 nfs_post_op_update_inode(dir, &dir_attr);
581 if (status != 0)
582 goto out;
583 status = nfs_instantiate(dentry, &fhandle, &fattr);
584out:
579 dprintk("NFS reply symlink: %d\n", status); 585 dprintk("NFS reply symlink: %d\n", status);
580 return status; 586 return status;
581} 587}
@@ -785,7 +791,7 @@ nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
785 791
786 dprintk("NFS call fsinfo\n"); 792 dprintk("NFS call fsinfo\n");
787 nfs_fattr_init(info->fattr); 793 nfs_fattr_init(info->fattr);
788 status = rpc_call_sync(server->client_sys, &msg, 0); 794 status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0);
789 dprintk("NFS reply fsinfo: %d\n", status); 795 dprintk("NFS reply fsinfo: %d\n", status);
790 return status; 796 return status;
791} 797}
@@ -886,7 +892,7 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
886 return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl); 892 return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl);
887} 893}
888 894
889struct nfs_rpc_ops nfs_v3_clientops = { 895const struct nfs_rpc_ops nfs_v3_clientops = {
890 .version = 3, /* protocol version */ 896 .version = 3, /* protocol version */
891 .dentry_ops = &nfs_dentry_operations, 897 .dentry_ops = &nfs_dentry_operations,
892 .dir_inode_ops = &nfs3_dir_inode_operations, 898 .dir_inode_ops = &nfs3_dir_inode_operations,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 0250269e9753..16556fa4effb 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -56,7 +56,7 @@
56#define NFS3_writeargs_sz (NFS3_fh_sz+5) 56#define NFS3_writeargs_sz (NFS3_fh_sz+5)
57#define NFS3_createargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz) 57#define NFS3_createargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz)
58#define NFS3_mkdirargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz) 58#define NFS3_mkdirargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz)
59#define NFS3_symlinkargs_sz (NFS3_diropargs_sz+NFS3_path_sz+NFS3_sattr_sz) 59#define NFS3_symlinkargs_sz (NFS3_diropargs_sz+1+NFS3_sattr_sz)
60#define NFS3_mknodargs_sz (NFS3_diropargs_sz+2+NFS3_sattr_sz) 60#define NFS3_mknodargs_sz (NFS3_diropargs_sz+2+NFS3_sattr_sz)
61#define NFS3_renameargs_sz (NFS3_diropargs_sz+NFS3_diropargs_sz) 61#define NFS3_renameargs_sz (NFS3_diropargs_sz+NFS3_diropargs_sz)
62#define NFS3_linkargs_sz (NFS3_fh_sz+NFS3_diropargs_sz) 62#define NFS3_linkargs_sz (NFS3_fh_sz+NFS3_diropargs_sz)
@@ -398,8 +398,11 @@ nfs3_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs3_symlinkargs *args
398 p = xdr_encode_fhandle(p, args->fromfh); 398 p = xdr_encode_fhandle(p, args->fromfh);
399 p = xdr_encode_array(p, args->fromname, args->fromlen); 399 p = xdr_encode_array(p, args->fromname, args->fromlen);
400 p = xdr_encode_sattr(p, args->sattr); 400 p = xdr_encode_sattr(p, args->sattr);
401 p = xdr_encode_array(p, args->topath, args->tolen); 401 *p++ = htonl(args->pathlen);
402 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 402 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
403
404 /* Copy the page */
405 xdr_encode_pages(&req->rq_snd_buf, args->pages, 0, args->pathlen);
403 return 0; 406 return 0;
404} 407}
405 408
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 9a102860df37..61095fe4b5ca 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -43,55 +43,6 @@ enum nfs4_client_state {
43}; 43};
44 44
45/* 45/*
46 * The nfs4_client identifies our client state to the server.
47 */
48struct nfs4_client {
49 struct list_head cl_servers; /* Global list of servers */
50 struct in_addr cl_addr; /* Server identifier */
51 u64 cl_clientid; /* constant */
52 nfs4_verifier cl_confirm;
53 unsigned long cl_state;
54
55 u32 cl_lockowner_id;
56
57 /*
58 * The following rwsem ensures exclusive access to the server
59 * while we recover the state following a lease expiration.
60 */
61 struct rw_semaphore cl_sem;
62
63 struct list_head cl_delegations;
64 struct list_head cl_state_owners;
65 struct list_head cl_unused;
66 int cl_nunused;
67 spinlock_t cl_lock;
68 atomic_t cl_count;
69
70 struct rpc_clnt * cl_rpcclient;
71
72 struct list_head cl_superblocks; /* List of nfs_server structs */
73
74 unsigned long cl_lease_time;
75 unsigned long cl_last_renewal;
76 struct work_struct cl_renewd;
77 struct work_struct cl_recoverd;
78
79 struct rpc_wait_queue cl_rpcwaitq;
80
81 /* used for the setclientid verifier */
82 struct timespec cl_boot_time;
83
84 /* idmapper */
85 struct idmap * cl_idmap;
86
87 /* Our own IP address, as a null-terminated string.
88 * This is used to generate the clientid, and the callback address.
89 */
90 char cl_ipaddr[16];
91 unsigned char cl_id_uniquifier;
92};
93
94/*
95 * struct rpc_sequence ensures that RPC calls are sent in the exact 46 * struct rpc_sequence ensures that RPC calls are sent in the exact
96 * order that they appear on the list. 47 * order that they appear on the list.
97 */ 48 */
@@ -127,7 +78,7 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status
127struct nfs4_state_owner { 78struct nfs4_state_owner {
128 spinlock_t so_lock; 79 spinlock_t so_lock;
129 struct list_head so_list; /* per-clientid list of state_owners */ 80 struct list_head so_list; /* per-clientid list of state_owners */
130 struct nfs4_client *so_client; 81 struct nfs_client *so_client;
131 u32 so_id; /* 32-bit identifier, unique */ 82 u32 so_id; /* 32-bit identifier, unique */
132 atomic_t so_count; 83 atomic_t so_count;
133 84
@@ -210,10 +161,10 @@ extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t);
210 161
211/* nfs4proc.c */ 162/* nfs4proc.c */
212extern int nfs4_map_errors(int err); 163extern int nfs4_map_errors(int err);
213extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short, struct rpc_cred *); 164extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *);
214extern int nfs4_proc_setclientid_confirm(struct nfs4_client *, struct rpc_cred *); 165extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *);
215extern int nfs4_proc_async_renew(struct nfs4_client *, struct rpc_cred *); 166extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
216extern int nfs4_proc_renew(struct nfs4_client *, struct rpc_cred *); 167extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
217extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state); 168extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state);
218extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); 169extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
219extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); 170extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
@@ -231,19 +182,14 @@ extern const u32 nfs4_fsinfo_bitmap[2];
231extern const u32 nfs4_fs_locations_bitmap[2]; 182extern const u32 nfs4_fs_locations_bitmap[2];
232 183
233/* nfs4renewd.c */ 184/* nfs4renewd.c */
234extern void nfs4_schedule_state_renewal(struct nfs4_client *); 185extern void nfs4_schedule_state_renewal(struct nfs_client *);
235extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); 186extern void nfs4_renewd_prepare_shutdown(struct nfs_server *);
236extern void nfs4_kill_renewd(struct nfs4_client *); 187extern void nfs4_kill_renewd(struct nfs_client *);
237extern void nfs4_renew_state(void *); 188extern void nfs4_renew_state(void *);
238 189
239/* nfs4state.c */ 190/* nfs4state.c */
240extern void init_nfsv4_state(struct nfs_server *); 191struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp);
241extern void destroy_nfsv4_state(struct nfs_server *); 192extern u32 nfs4_alloc_lockowner_id(struct nfs_client *);
242extern struct nfs4_client *nfs4_get_client(struct in_addr *);
243extern void nfs4_put_client(struct nfs4_client *clp);
244extern struct nfs4_client *nfs4_find_client(struct in_addr *);
245struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp);
246extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *);
247 193
248extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); 194extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
249extern void nfs4_put_state_owner(struct nfs4_state_owner *); 195extern void nfs4_put_state_owner(struct nfs4_state_owner *);
@@ -252,7 +198,7 @@ extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state
252extern void nfs4_put_open_state(struct nfs4_state *); 198extern void nfs4_put_open_state(struct nfs4_state *);
253extern void nfs4_close_state(struct nfs4_state *, mode_t); 199extern void nfs4_close_state(struct nfs4_state *, mode_t);
254extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t); 200extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t);
255extern void nfs4_schedule_state_recovery(struct nfs4_client *); 201extern void nfs4_schedule_state_recovery(struct nfs_client *);
256extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); 202extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
257extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); 203extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
258extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); 204extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
@@ -276,10 +222,6 @@ extern struct svc_version nfs4_callback_version1;
276 222
277#else 223#else
278 224
279#define init_nfsv4_state(server) do { } while (0)
280#define destroy_nfsv4_state(server) do { } while (0)
281#define nfs4_put_state_owner(inode, owner) do { } while (0)
282#define nfs4_put_open_state(state) do { } while (0)
283#define nfs4_close_state(a, b) do { } while (0) 225#define nfs4_close_state(a, b) do { } while (0)
284 226
285#endif /* CONFIG_NFS_V4 */ 227#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index ea38d27b74e6..24e47f3bbd17 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -2,6 +2,7 @@
2 * linux/fs/nfs/nfs4namespace.c 2 * linux/fs/nfs/nfs4namespace.c
3 * 3 *
4 * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com> 4 * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com>
5 * - Modified by David Howells <dhowells@redhat.com>
5 * 6 *
6 * NFSv4 namespace 7 * NFSv4 namespace
7 */ 8 */
@@ -23,7 +24,7 @@
23/* 24/*
24 * Check if fs_root is valid 25 * Check if fs_root is valid
25 */ 26 */
26static inline char *nfs4_pathname_string(struct nfs4_pathname *pathname, 27static inline char *nfs4_pathname_string(const struct nfs4_pathname *pathname,
27 char *buffer, ssize_t buflen) 28 char *buffer, ssize_t buflen)
28{ 29{
29 char *end = buffer + buflen; 30 char *end = buffer + buflen;
@@ -34,7 +35,7 @@ static inline char *nfs4_pathname_string(struct nfs4_pathname *pathname,
34 35
35 n = pathname->ncomponents; 36 n = pathname->ncomponents;
36 while (--n >= 0) { 37 while (--n >= 0) {
37 struct nfs4_string *component = &pathname->components[n]; 38 const struct nfs4_string *component = &pathname->components[n];
38 buflen -= component->len + 1; 39 buflen -= component->len + 1;
39 if (buflen < 0) 40 if (buflen < 0)
40 goto Elong; 41 goto Elong;
@@ -47,6 +48,68 @@ Elong:
47 return ERR_PTR(-ENAMETOOLONG); 48 return ERR_PTR(-ENAMETOOLONG);
48} 49}
49 50
51/*
52 * Determine the mount path as a string
53 */
54static char *nfs4_path(const struct vfsmount *mnt_parent,
55 const struct dentry *dentry,
56 char *buffer, ssize_t buflen)
57{
58 const char *srvpath;
59
60 srvpath = strchr(mnt_parent->mnt_devname, ':');
61 if (srvpath)
62 srvpath++;
63 else
64 srvpath = mnt_parent->mnt_devname;
65
66 return nfs_path(srvpath, mnt_parent->mnt_root, dentry, buffer, buflen);
67}
68
69/*
70 * Check that fs_locations::fs_root [RFC3530 6.3] is a prefix for what we
71 * believe to be the server path to this dentry
72 */
73static int nfs4_validate_fspath(const struct vfsmount *mnt_parent,
74 const struct dentry *dentry,
75 const struct nfs4_fs_locations *locations,
76 char *page, char *page2)
77{
78 const char *path, *fs_path;
79
80 path = nfs4_path(mnt_parent, dentry, page, PAGE_SIZE);
81 if (IS_ERR(path))
82 return PTR_ERR(path);
83
84 fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE);
85 if (IS_ERR(fs_path))
86 return PTR_ERR(fs_path);
87
88 if (strncmp(path, fs_path, strlen(fs_path)) != 0) {
89 dprintk("%s: path %s does not begin with fsroot %s\n",
90 __FUNCTION__, path, fs_path);
91 return -ENOENT;
92 }
93
94 return 0;
95}
96
97/*
98 * Check if the string represents a "valid" IPv4 address
99 */
100static inline int valid_ipaddr4(const char *buf)
101{
102 int rc, count, in[4];
103
104 rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]);
105 if (rc != 4)
106 return -EINVAL;
107 for (count = 0; count < 4; count++) {
108 if (in[count] > 255)
109 return -EINVAL;
110 }
111 return 0;
112}
50 113
51/** 114/**
52 * nfs_follow_referral - set up mountpoint when hitting a referral on moved error 115 * nfs_follow_referral - set up mountpoint when hitting a referral on moved error
@@ -60,7 +123,7 @@ Elong:
60 */ 123 */
61static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, 124static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
62 const struct dentry *dentry, 125 const struct dentry *dentry,
63 struct nfs4_fs_locations *locations) 126 const struct nfs4_fs_locations *locations)
64{ 127{
65 struct vfsmount *mnt = ERR_PTR(-ENOENT); 128 struct vfsmount *mnt = ERR_PTR(-ENOENT);
66 struct nfs_clone_mount mountdata = { 129 struct nfs_clone_mount mountdata = {
@@ -68,10 +131,9 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
68 .dentry = dentry, 131 .dentry = dentry,
69 .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor, 132 .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor,
70 }; 133 };
71 char *page, *page2; 134 char *page = NULL, *page2 = NULL;
72 char *path, *fs_path;
73 char *devname; 135 char *devname;
74 int loc, s; 136 int loc, s, error;
75 137
76 if (locations == NULL || locations->nlocations <= 0) 138 if (locations == NULL || locations->nlocations <= 0)
77 goto out; 139 goto out;
@@ -79,36 +141,30 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
79 dprintk("%s: referral at %s/%s\n", __FUNCTION__, 141 dprintk("%s: referral at %s/%s\n", __FUNCTION__,
80 dentry->d_parent->d_name.name, dentry->d_name.name); 142 dentry->d_parent->d_name.name, dentry->d_name.name);
81 143
82 /* Ensure fs path is a prefix of current dentry path */
83 page = (char *) __get_free_page(GFP_USER); 144 page = (char *) __get_free_page(GFP_USER);
84 if (page == NULL) 145 if (!page)
85 goto out; 146 goto out;
147
86 page2 = (char *) __get_free_page(GFP_USER); 148 page2 = (char *) __get_free_page(GFP_USER);
87 if (page2 == NULL) 149 if (!page2)
88 goto out; 150 goto out;
89 151
90 path = nfs4_path(dentry, page, PAGE_SIZE); 152 /* Ensure fs path is a prefix of current dentry path */
91 if (IS_ERR(path)) 153 error = nfs4_validate_fspath(mnt_parent, dentry, locations, page, page2);
92 goto out_free; 154 if (error < 0) {
93 155 mnt = ERR_PTR(error);
94 fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE); 156 goto out;
95 if (IS_ERR(fs_path))
96 goto out_free;
97
98 if (strncmp(path, fs_path, strlen(fs_path)) != 0) {
99 dprintk("%s: path %s does not begin with fsroot %s\n", __FUNCTION__, path, fs_path);
100 goto out_free;
101 } 157 }
102 158
103 devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE); 159 devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE);
104 if (IS_ERR(devname)) { 160 if (IS_ERR(devname)) {
105 mnt = (struct vfsmount *)devname; 161 mnt = (struct vfsmount *)devname;
106 goto out_free; 162 goto out;
107 } 163 }
108 164
109 loc = 0; 165 loc = 0;
110 while (loc < locations->nlocations && IS_ERR(mnt)) { 166 while (loc < locations->nlocations && IS_ERR(mnt)) {
111 struct nfs4_fs_location *location = &locations->locations[loc]; 167 const struct nfs4_fs_location *location = &locations->locations[loc];
112 char *mnt_path; 168 char *mnt_path;
113 169
114 if (location == NULL || location->nservers <= 0 || 170 if (location == NULL || location->nservers <= 0 ||
@@ -140,7 +196,7 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
140 addr.sin_port = htons(NFS_PORT); 196 addr.sin_port = htons(NFS_PORT);
141 mountdata.addr = &addr; 197 mountdata.addr = &addr;
142 198
143 mnt = vfs_kern_mount(&nfs_referral_nfs4_fs_type, 0, devname, &mountdata); 199 mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, devname, &mountdata);
144 if (!IS_ERR(mnt)) { 200 if (!IS_ERR(mnt)) {
145 break; 201 break;
146 } 202 }
@@ -149,10 +205,9 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
149 loc++; 205 loc++;
150 } 206 }
151 207
152out_free:
153 free_page((unsigned long)page);
154 free_page((unsigned long)page2);
155out: 208out:
209 free_page((unsigned long) page);
210 free_page((unsigned long) page2);
156 dprintk("%s: done\n", __FUNCTION__); 211 dprintk("%s: done\n", __FUNCTION__);
157 return mnt; 212 return mnt;
158} 213}
@@ -165,7 +220,7 @@ out:
165 */ 220 */
166struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry) 221struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry)
167{ 222{
168 struct vfsmount *mnt = ERR_PTR(-ENOENT); 223 struct vfsmount *mnt = ERR_PTR(-ENOMEM);
169 struct dentry *parent; 224 struct dentry *parent;
170 struct nfs4_fs_locations *fs_locations = NULL; 225 struct nfs4_fs_locations *fs_locations = NULL;
171 struct page *page; 226 struct page *page;
@@ -183,11 +238,16 @@ struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentr
183 goto out_free; 238 goto out_free;
184 239
185 /* Get locations */ 240 /* Get locations */
241 mnt = ERR_PTR(-ENOENT);
242
186 parent = dget_parent(dentry); 243 parent = dget_parent(dentry);
187 dprintk("%s: getting locations for %s/%s\n", __FUNCTION__, parent->d_name.name, dentry->d_name.name); 244 dprintk("%s: getting locations for %s/%s\n",
245 __FUNCTION__, parent->d_name.name, dentry->d_name.name);
246
188 err = nfs4_proc_fs_locations(parent->d_inode, dentry, fs_locations, page); 247 err = nfs4_proc_fs_locations(parent->d_inode, dentry, fs_locations, page);
189 dput(parent); 248 dput(parent);
190 if (err != 0 || fs_locations->nlocations <= 0 || 249 if (err != 0 ||
250 fs_locations->nlocations <= 0 ||
191 fs_locations->fs_path.ncomponents <= 0) 251 fs_locations->fs_path.ncomponents <= 0)
192 goto out_free; 252 goto out_free;
193 253
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b14145b7b87f..47c7e6e3910d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -55,7 +55,7 @@
55 55
56#define NFSDBG_FACILITY NFSDBG_PROC 56#define NFSDBG_FACILITY NFSDBG_PROC
57 57
58#define NFS4_POLL_RETRY_MIN (1*HZ) 58#define NFS4_POLL_RETRY_MIN (HZ/10)
59#define NFS4_POLL_RETRY_MAX (15*HZ) 59#define NFS4_POLL_RETRY_MAX (15*HZ)
60 60
61struct nfs4_opendata; 61struct nfs4_opendata;
@@ -64,7 +64,7 @@ static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinf
64static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *); 64static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *);
65static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); 65static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
66static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception); 66static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
67static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp); 67static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp);
68 68
69/* Prevent leaks of NFSv4 errors into userland */ 69/* Prevent leaks of NFSv4 errors into userland */
70int nfs4_map_errors(int err) 70int nfs4_map_errors(int err)
@@ -195,7 +195,7 @@ static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry,
195 195
196static void renew_lease(const struct nfs_server *server, unsigned long timestamp) 196static void renew_lease(const struct nfs_server *server, unsigned long timestamp)
197{ 197{
198 struct nfs4_client *clp = server->nfs4_state; 198 struct nfs_client *clp = server->nfs_client;
199 spin_lock(&clp->cl_lock); 199 spin_lock(&clp->cl_lock);
200 if (time_before(clp->cl_last_renewal,timestamp)) 200 if (time_before(clp->cl_last_renewal,timestamp))
201 clp->cl_last_renewal = timestamp; 201 clp->cl_last_renewal = timestamp;
@@ -252,7 +252,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
252 atomic_inc(&sp->so_count); 252 atomic_inc(&sp->so_count);
253 p->o_arg.fh = NFS_FH(dir); 253 p->o_arg.fh = NFS_FH(dir);
254 p->o_arg.open_flags = flags, 254 p->o_arg.open_flags = flags,
255 p->o_arg.clientid = server->nfs4_state->cl_clientid; 255 p->o_arg.clientid = server->nfs_client->cl_clientid;
256 p->o_arg.id = sp->so_id; 256 p->o_arg.id = sp->so_id;
257 p->o_arg.name = &dentry->d_name; 257 p->o_arg.name = &dentry->d_name;
258 p->o_arg.server = server; 258 p->o_arg.server = server;
@@ -550,7 +550,7 @@ int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
550 case -NFS4ERR_STALE_STATEID: 550 case -NFS4ERR_STALE_STATEID:
551 case -NFS4ERR_EXPIRED: 551 case -NFS4ERR_EXPIRED:
552 /* Don't recall a delegation if it was lost */ 552 /* Don't recall a delegation if it was lost */
553 nfs4_schedule_state_recovery(server->nfs4_state); 553 nfs4_schedule_state_recovery(server->nfs_client);
554 return err; 554 return err;
555 } 555 }
556 err = nfs4_handle_exception(server, err, &exception); 556 err = nfs4_handle_exception(server, err, &exception);
@@ -758,7 +758,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
758 } 758 }
759 nfs_confirm_seqid(&data->owner->so_seqid, 0); 759 nfs_confirm_seqid(&data->owner->so_seqid, 0);
760 if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) 760 if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
761 return server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr); 761 return server->nfs_client->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
762 return 0; 762 return 0;
763} 763}
764 764
@@ -792,11 +792,18 @@ out:
792 792
793int nfs4_recover_expired_lease(struct nfs_server *server) 793int nfs4_recover_expired_lease(struct nfs_server *server)
794{ 794{
795 struct nfs4_client *clp = server->nfs4_state; 795 struct nfs_client *clp = server->nfs_client;
796 int ret;
796 797
797 if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) 798 for (;;) {
799 ret = nfs4_wait_clnt_recover(server->client, clp);
800 if (ret != 0)
801 return ret;
802 if (!test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
803 break;
798 nfs4_schedule_state_recovery(clp); 804 nfs4_schedule_state_recovery(clp);
799 return nfs4_wait_clnt_recover(server->client, clp); 805 }
806 return 0;
800} 807}
801 808
802/* 809/*
@@ -867,7 +874,7 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred
867{ 874{
868 struct nfs_delegation *delegation; 875 struct nfs_delegation *delegation;
869 struct nfs_server *server = NFS_SERVER(inode); 876 struct nfs_server *server = NFS_SERVER(inode);
870 struct nfs4_client *clp = server->nfs4_state; 877 struct nfs_client *clp = server->nfs_client;
871 struct nfs_inode *nfsi = NFS_I(inode); 878 struct nfs_inode *nfsi = NFS_I(inode);
872 struct nfs4_state_owner *sp = NULL; 879 struct nfs4_state_owner *sp = NULL;
873 struct nfs4_state *state = NULL; 880 struct nfs4_state *state = NULL;
@@ -953,7 +960,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
953 struct nfs4_state_owner *sp; 960 struct nfs4_state_owner *sp;
954 struct nfs4_state *state = NULL; 961 struct nfs4_state *state = NULL;
955 struct nfs_server *server = NFS_SERVER(dir); 962 struct nfs_server *server = NFS_SERVER(dir);
956 struct nfs4_client *clp = server->nfs4_state; 963 struct nfs_client *clp = server->nfs_client;
957 struct nfs4_opendata *opendata; 964 struct nfs4_opendata *opendata;
958 int status; 965 int status;
959 966
@@ -1133,7 +1140,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
1133 break; 1140 break;
1134 case -NFS4ERR_STALE_STATEID: 1141 case -NFS4ERR_STALE_STATEID:
1135 case -NFS4ERR_EXPIRED: 1142 case -NFS4ERR_EXPIRED:
1136 nfs4_schedule_state_recovery(server->nfs4_state); 1143 nfs4_schedule_state_recovery(server->nfs_client);
1137 break; 1144 break;
1138 default: 1145 default:
1139 if (nfs4_async_handle_error(task, server) == -EAGAIN) { 1146 if (nfs4_async_handle_error(task, server) == -EAGAIN) {
@@ -1268,7 +1275,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
1268 BUG_ON(nd->intent.open.flags & O_CREAT); 1275 BUG_ON(nd->intent.open.flags & O_CREAT);
1269 } 1276 }
1270 1277
1271 cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); 1278 cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
1272 if (IS_ERR(cred)) 1279 if (IS_ERR(cred))
1273 return (struct dentry *)cred; 1280 return (struct dentry *)cred;
1274 state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred); 1281 state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred);
@@ -1291,7 +1298,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
1291 struct rpc_cred *cred; 1298 struct rpc_cred *cred;
1292 struct nfs4_state *state; 1299 struct nfs4_state *state;
1293 1300
1294 cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); 1301 cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
1295 if (IS_ERR(cred)) 1302 if (IS_ERR(cred))
1296 return PTR_ERR(cred); 1303 return PTR_ERR(cred);
1297 state = nfs4_open_delegated(dentry->d_inode, openflags, cred); 1304 state = nfs4_open_delegated(dentry->d_inode, openflags, cred);
@@ -1393,70 +1400,19 @@ static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
1393 return err; 1400 return err;
1394} 1401}
1395 1402
1403/*
1404 * get the file handle for the "/" directory on the server
1405 */
1396static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, 1406static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
1397 struct nfs_fsinfo *info) 1407 struct nfs_fsinfo *info)
1398{ 1408{
1399 struct nfs_fattr * fattr = info->fattr;
1400 unsigned char * p;
1401 struct qstr q;
1402 struct nfs4_lookup_arg args = {
1403 .dir_fh = fhandle,
1404 .name = &q,
1405 .bitmask = nfs4_fattr_bitmap,
1406 };
1407 struct nfs4_lookup_res res = {
1408 .server = server,
1409 .fattr = fattr,
1410 .fh = fhandle,
1411 };
1412 struct rpc_message msg = {
1413 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
1414 .rpc_argp = &args,
1415 .rpc_resp = &res,
1416 };
1417 int status; 1409 int status;
1418 1410
1419 /*
1420 * Now we do a separate LOOKUP for each component of the mount path.
1421 * The LOOKUPs are done separately so that we can conveniently
1422 * catch an ERR_WRONGSEC if it occurs along the way...
1423 */
1424 status = nfs4_lookup_root(server, fhandle, info); 1411 status = nfs4_lookup_root(server, fhandle, info);
1425 if (status)
1426 goto out;
1427
1428 p = server->mnt_path;
1429 for (;;) {
1430 struct nfs4_exception exception = { };
1431
1432 while (*p == '/')
1433 p++;
1434 if (!*p)
1435 break;
1436 q.name = p;
1437 while (*p && (*p != '/'))
1438 p++;
1439 q.len = p - q.name;
1440
1441 do {
1442 nfs_fattr_init(fattr);
1443 status = nfs4_handle_exception(server,
1444 rpc_call_sync(server->client, &msg, 0),
1445 &exception);
1446 } while (exception.retry);
1447 if (status == 0)
1448 continue;
1449 if (status == -ENOENT) {
1450 printk(KERN_NOTICE "NFS: mount path %s does not exist!\n", server->mnt_path);
1451 printk(KERN_NOTICE "NFS: suggestion: try mounting '/' instead.\n");
1452 }
1453 break;
1454 }
1455 if (status == 0) 1412 if (status == 0)
1456 status = nfs4_server_capabilities(server, fhandle); 1413 status = nfs4_server_capabilities(server, fhandle);
1457 if (status == 0) 1414 if (status == 0)
1458 status = nfs4_do_fsinfo(server, fhandle, info); 1415 status = nfs4_do_fsinfo(server, fhandle, info);
1459out:
1460 return nfs4_map_errors(status); 1416 return nfs4_map_errors(status);
1461} 1417}
1462 1418
@@ -1565,7 +1521,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
1565 1521
1566 nfs_fattr_init(fattr); 1522 nfs_fattr_init(fattr);
1567 1523
1568 cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); 1524 cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
1569 if (IS_ERR(cred)) 1525 if (IS_ERR(cred))
1570 return PTR_ERR(cred); 1526 return PTR_ERR(cred);
1571 1527
@@ -1583,6 +1539,52 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
1583 return status; 1539 return status;
1584} 1540}
1585 1541
1542static int _nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
1543 struct qstr *name, struct nfs_fh *fhandle,
1544 struct nfs_fattr *fattr)
1545{
1546 int status;
1547 struct nfs4_lookup_arg args = {
1548 .bitmask = server->attr_bitmask,
1549 .dir_fh = dirfh,
1550 .name = name,
1551 };
1552 struct nfs4_lookup_res res = {
1553 .server = server,
1554 .fattr = fattr,
1555 .fh = fhandle,
1556 };
1557 struct rpc_message msg = {
1558 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
1559 .rpc_argp = &args,
1560 .rpc_resp = &res,
1561 };
1562
1563 nfs_fattr_init(fattr);
1564
1565 dprintk("NFS call lookupfh %s\n", name->name);
1566 status = rpc_call_sync(server->client, &msg, 0);
1567 dprintk("NFS reply lookupfh: %d\n", status);
1568 if (status == -NFS4ERR_MOVED)
1569 status = -EREMOTE;
1570 return status;
1571}
1572
1573static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
1574 struct qstr *name, struct nfs_fh *fhandle,
1575 struct nfs_fattr *fattr)
1576{
1577 struct nfs4_exception exception = { };
1578 int err;
1579 do {
1580 err = nfs4_handle_exception(server,
1581 _nfs4_proc_lookupfh(server, dirfh, name,
1582 fhandle, fattr),
1583 &exception);
1584 } while (exception.retry);
1585 return err;
1586}
1587
1586static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name, 1588static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name,
1587 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 1589 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
1588{ 1590{
@@ -1881,7 +1883,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
1881 struct rpc_cred *cred; 1883 struct rpc_cred *cred;
1882 int status = 0; 1884 int status = 0;
1883 1885
1884 cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); 1886 cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
1885 if (IS_ERR(cred)) { 1887 if (IS_ERR(cred)) {
1886 status = PTR_ERR(cred); 1888 status = PTR_ERR(cred);
1887 goto out; 1889 goto out;
@@ -2089,24 +2091,24 @@ static int nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *n
2089 return err; 2091 return err;
2090} 2092}
2091 2093
2092static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name, 2094static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
2093 struct qstr *path, struct iattr *sattr, struct nfs_fh *fhandle, 2095 struct page *page, unsigned int len, struct iattr *sattr)
2094 struct nfs_fattr *fattr)
2095{ 2096{
2096 struct nfs_server *server = NFS_SERVER(dir); 2097 struct nfs_server *server = NFS_SERVER(dir);
2097 struct nfs_fattr dir_fattr; 2098 struct nfs_fh fhandle;
2099 struct nfs_fattr fattr, dir_fattr;
2098 struct nfs4_create_arg arg = { 2100 struct nfs4_create_arg arg = {
2099 .dir_fh = NFS_FH(dir), 2101 .dir_fh = NFS_FH(dir),
2100 .server = server, 2102 .server = server,
2101 .name = name, 2103 .name = &dentry->d_name,
2102 .attrs = sattr, 2104 .attrs = sattr,
2103 .ftype = NF4LNK, 2105 .ftype = NF4LNK,
2104 .bitmask = server->attr_bitmask, 2106 .bitmask = server->attr_bitmask,
2105 }; 2107 };
2106 struct nfs4_create_res res = { 2108 struct nfs4_create_res res = {
2107 .server = server, 2109 .server = server,
2108 .fh = fhandle, 2110 .fh = &fhandle,
2109 .fattr = fattr, 2111 .fattr = &fattr,
2110 .dir_fattr = &dir_fattr, 2112 .dir_fattr = &dir_fattr,
2111 }; 2113 };
2112 struct rpc_message msg = { 2114 struct rpc_message msg = {
@@ -2116,29 +2118,32 @@ static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name,
2116 }; 2118 };
2117 int status; 2119 int status;
2118 2120
2119 if (path->len > NFS4_MAXPATHLEN) 2121 if (len > NFS4_MAXPATHLEN)
2120 return -ENAMETOOLONG; 2122 return -ENAMETOOLONG;
2121 arg.u.symlink = path; 2123
2122 nfs_fattr_init(fattr); 2124 arg.u.symlink.pages = &page;
2125 arg.u.symlink.len = len;
2126 nfs_fattr_init(&fattr);
2123 nfs_fattr_init(&dir_fattr); 2127 nfs_fattr_init(&dir_fattr);
2124 2128
2125 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 2129 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
2126 if (!status) 2130 if (!status) {
2127 update_changeattr(dir, &res.dir_cinfo); 2131 update_changeattr(dir, &res.dir_cinfo);
2128 nfs_post_op_update_inode(dir, res.dir_fattr); 2132 nfs_post_op_update_inode(dir, res.dir_fattr);
2133 status = nfs_instantiate(dentry, &fhandle, &fattr);
2134 }
2129 return status; 2135 return status;
2130} 2136}
2131 2137
2132static int nfs4_proc_symlink(struct inode *dir, struct qstr *name, 2138static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
2133 struct qstr *path, struct iattr *sattr, struct nfs_fh *fhandle, 2139 struct page *page, unsigned int len, struct iattr *sattr)
2134 struct nfs_fattr *fattr)
2135{ 2140{
2136 struct nfs4_exception exception = { }; 2141 struct nfs4_exception exception = { };
2137 int err; 2142 int err;
2138 do { 2143 do {
2139 err = nfs4_handle_exception(NFS_SERVER(dir), 2144 err = nfs4_handle_exception(NFS_SERVER(dir),
2140 _nfs4_proc_symlink(dir, name, path, sattr, 2145 _nfs4_proc_symlink(dir, dentry, page,
2141 fhandle, fattr), 2146 len, sattr),
2142 &exception); 2147 &exception);
2143 } while (exception.retry); 2148 } while (exception.retry);
2144 return err; 2149 return err;
@@ -2521,7 +2526,7 @@ static void nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
2521 */ 2526 */
2522static void nfs4_renew_done(struct rpc_task *task, void *data) 2527static void nfs4_renew_done(struct rpc_task *task, void *data)
2523{ 2528{
2524 struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp; 2529 struct nfs_client *clp = (struct nfs_client *)task->tk_msg.rpc_argp;
2525 unsigned long timestamp = (unsigned long)data; 2530 unsigned long timestamp = (unsigned long)data;
2526 2531
2527 if (task->tk_status < 0) { 2532 if (task->tk_status < 0) {
@@ -2543,7 +2548,7 @@ static const struct rpc_call_ops nfs4_renew_ops = {
2543 .rpc_call_done = nfs4_renew_done, 2548 .rpc_call_done = nfs4_renew_done,
2544}; 2549};
2545 2550
2546int nfs4_proc_async_renew(struct nfs4_client *clp, struct rpc_cred *cred) 2551int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred)
2547{ 2552{
2548 struct rpc_message msg = { 2553 struct rpc_message msg = {
2549 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], 2554 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW],
@@ -2555,7 +2560,7 @@ int nfs4_proc_async_renew(struct nfs4_client *clp, struct rpc_cred *cred)
2555 &nfs4_renew_ops, (void *)jiffies); 2560 &nfs4_renew_ops, (void *)jiffies);
2556} 2561}
2557 2562
2558int nfs4_proc_renew(struct nfs4_client *clp, struct rpc_cred *cred) 2563int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred)
2559{ 2564{
2560 struct rpc_message msg = { 2565 struct rpc_message msg = {
2561 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], 2566 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW],
@@ -2770,7 +2775,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
2770 return -EOPNOTSUPP; 2775 return -EOPNOTSUPP;
2771 nfs_inode_return_delegation(inode); 2776 nfs_inode_return_delegation(inode);
2772 buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); 2777 buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
2773 ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0); 2778 ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
2774 if (ret == 0) 2779 if (ret == 0)
2775 nfs4_write_cached_acl(inode, buf, buflen); 2780 nfs4_write_cached_acl(inode, buf, buflen);
2776 return ret; 2781 return ret;
@@ -2791,7 +2796,7 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
2791static int 2796static int
2792nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) 2797nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
2793{ 2798{
2794 struct nfs4_client *clp = server->nfs4_state; 2799 struct nfs_client *clp = server->nfs_client;
2795 2800
2796 if (!clp || task->tk_status >= 0) 2801 if (!clp || task->tk_status >= 0)
2797 return 0; 2802 return 0;
@@ -2828,7 +2833,7 @@ static int nfs4_wait_bit_interruptible(void *word)
2828 return 0; 2833 return 0;
2829} 2834}
2830 2835
2831static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp) 2836static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp)
2832{ 2837{
2833 sigset_t oldset; 2838 sigset_t oldset;
2834 int res; 2839 int res;
@@ -2871,7 +2876,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
2871 */ 2876 */
2872int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception) 2877int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
2873{ 2878{
2874 struct nfs4_client *clp = server->nfs4_state; 2879 struct nfs_client *clp = server->nfs_client;
2875 int ret = errorcode; 2880 int ret = errorcode;
2876 2881
2877 exception->retry = 0; 2882 exception->retry = 0;
@@ -2886,6 +2891,7 @@ int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct
2886 if (ret == 0) 2891 if (ret == 0)
2887 exception->retry = 1; 2892 exception->retry = 1;
2888 break; 2893 break;
2894 case -NFS4ERR_FILE_OPEN:
2889 case -NFS4ERR_GRACE: 2895 case -NFS4ERR_GRACE:
2890 case -NFS4ERR_DELAY: 2896 case -NFS4ERR_DELAY:
2891 ret = nfs4_delay(server->client, &exception->timeout); 2897 ret = nfs4_delay(server->client, &exception->timeout);
@@ -2898,7 +2904,7 @@ int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct
2898 return nfs4_map_errors(ret); 2904 return nfs4_map_errors(ret);
2899} 2905}
2900 2906
2901int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short port, struct rpc_cred *cred) 2907int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred)
2902{ 2908{
2903 nfs4_verifier sc_verifier; 2909 nfs4_verifier sc_verifier;
2904 struct nfs4_setclientid setclientid = { 2910 struct nfs4_setclientid setclientid = {
@@ -2922,7 +2928,7 @@ int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short p
2922 for(;;) { 2928 for(;;) {
2923 setclientid.sc_name_len = scnprintf(setclientid.sc_name, 2929 setclientid.sc_name_len = scnprintf(setclientid.sc_name,
2924 sizeof(setclientid.sc_name), "%s/%u.%u.%u.%u %s %u", 2930 sizeof(setclientid.sc_name), "%s/%u.%u.%u.%u %s %u",
2925 clp->cl_ipaddr, NIPQUAD(clp->cl_addr.s_addr), 2931 clp->cl_ipaddr, NIPQUAD(clp->cl_addr.sin_addr),
2926 cred->cr_ops->cr_name, 2932 cred->cr_ops->cr_name,
2927 clp->cl_id_uniquifier); 2933 clp->cl_id_uniquifier);
2928 setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, 2934 setclientid.sc_netid_len = scnprintf(setclientid.sc_netid,
@@ -2945,7 +2951,7 @@ int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short p
2945 return status; 2951 return status;
2946} 2952}
2947 2953
2948static int _nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred) 2954static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred)
2949{ 2955{
2950 struct nfs_fsinfo fsinfo; 2956 struct nfs_fsinfo fsinfo;
2951 struct rpc_message msg = { 2957 struct rpc_message msg = {
@@ -2969,7 +2975,7 @@ static int _nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cr
2969 return status; 2975 return status;
2970} 2976}
2971 2977
2972int nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred) 2978int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred)
2973{ 2979{
2974 long timeout; 2980 long timeout;
2975 int err; 2981 int err;
@@ -3077,7 +3083,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4
3077 switch (err) { 3083 switch (err) {
3078 case -NFS4ERR_STALE_STATEID: 3084 case -NFS4ERR_STALE_STATEID:
3079 case -NFS4ERR_EXPIRED: 3085 case -NFS4ERR_EXPIRED:
3080 nfs4_schedule_state_recovery(server->nfs4_state); 3086 nfs4_schedule_state_recovery(server->nfs_client);
3081 case 0: 3087 case 0:
3082 return 0; 3088 return 0;
3083 } 3089 }
@@ -3106,7 +3112,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
3106{ 3112{
3107 struct inode *inode = state->inode; 3113 struct inode *inode = state->inode;
3108 struct nfs_server *server = NFS_SERVER(inode); 3114 struct nfs_server *server = NFS_SERVER(inode);
3109 struct nfs4_client *clp = server->nfs4_state; 3115 struct nfs_client *clp = server->nfs_client;
3110 struct nfs_lockt_args arg = { 3116 struct nfs_lockt_args arg = {
3111 .fh = NFS_FH(inode), 3117 .fh = NFS_FH(inode),
3112 .fl = request, 3118 .fl = request,
@@ -3231,7 +3237,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
3231 break; 3237 break;
3232 case -NFS4ERR_STALE_STATEID: 3238 case -NFS4ERR_STALE_STATEID:
3233 case -NFS4ERR_EXPIRED: 3239 case -NFS4ERR_EXPIRED:
3234 nfs4_schedule_state_recovery(calldata->server->nfs4_state); 3240 nfs4_schedule_state_recovery(calldata->server->nfs_client);
3235 break; 3241 break;
3236 default: 3242 default:
3237 if (nfs4_async_handle_error(task, calldata->server) == -EAGAIN) { 3243 if (nfs4_async_handle_error(task, calldata->server) == -EAGAIN) {
@@ -3343,7 +3349,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
3343 if (p->arg.lock_seqid == NULL) 3349 if (p->arg.lock_seqid == NULL)
3344 goto out_free; 3350 goto out_free;
3345 p->arg.lock_stateid = &lsp->ls_stateid; 3351 p->arg.lock_stateid = &lsp->ls_stateid;
3346 p->arg.lock_owner.clientid = server->nfs4_state->cl_clientid; 3352 p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
3347 p->arg.lock_owner.id = lsp->ls_id; 3353 p->arg.lock_owner.id = lsp->ls_id;
3348 p->lsp = lsp; 3354 p->lsp = lsp;
3349 atomic_inc(&lsp->ls_count); 3355 atomic_inc(&lsp->ls_count);
@@ -3513,7 +3519,7 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request
3513 3519
3514static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) 3520static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
3515{ 3521{
3516 struct nfs4_client *clp = state->owner->so_client; 3522 struct nfs_client *clp = state->owner->so_client;
3517 unsigned char fl_flags = request->fl_flags; 3523 unsigned char fl_flags = request->fl_flags;
3518 int status; 3524 int status;
3519 3525
@@ -3715,7 +3721,7 @@ static struct inode_operations nfs4_file_inode_operations = {
3715 .listxattr = nfs4_listxattr, 3721 .listxattr = nfs4_listxattr,
3716}; 3722};
3717 3723
3718struct nfs_rpc_ops nfs_v4_clientops = { 3724const struct nfs_rpc_ops nfs_v4_clientops = {
3719 .version = 4, /* protocol version */ 3725 .version = 4, /* protocol version */
3720 .dentry_ops = &nfs4_dentry_operations, 3726 .dentry_ops = &nfs4_dentry_operations,
3721 .dir_inode_ops = &nfs4_dir_inode_operations, 3727 .dir_inode_ops = &nfs4_dir_inode_operations,
@@ -3723,6 +3729,7 @@ struct nfs_rpc_ops nfs_v4_clientops = {
3723 .getroot = nfs4_proc_get_root, 3729 .getroot = nfs4_proc_get_root,
3724 .getattr = nfs4_proc_getattr, 3730 .getattr = nfs4_proc_getattr,
3725 .setattr = nfs4_proc_setattr, 3731 .setattr = nfs4_proc_setattr,
3732 .lookupfh = nfs4_proc_lookupfh,
3726 .lookup = nfs4_proc_lookup, 3733 .lookup = nfs4_proc_lookup,
3727 .access = nfs4_proc_access, 3734 .access = nfs4_proc_access,
3728 .readlink = nfs4_proc_readlink, 3735 .readlink = nfs4_proc_readlink,
@@ -3743,6 +3750,7 @@ struct nfs_rpc_ops nfs_v4_clientops = {
3743 .statfs = nfs4_proc_statfs, 3750 .statfs = nfs4_proc_statfs,
3744 .fsinfo = nfs4_proc_fsinfo, 3751 .fsinfo = nfs4_proc_fsinfo,
3745 .pathconf = nfs4_proc_pathconf, 3752 .pathconf = nfs4_proc_pathconf,
3753 .set_capabilities = nfs4_server_capabilities,
3746 .decode_dirent = nfs4_decode_dirent, 3754 .decode_dirent = nfs4_decode_dirent,
3747 .read_setup = nfs4_proc_read_setup, 3755 .read_setup = nfs4_proc_read_setup,
3748 .read_done = nfs4_read_done, 3756 .read_done = nfs4_read_done,
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 5d764d8e6d8a..7b6df1852e75 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -61,7 +61,7 @@
61void 61void
62nfs4_renew_state(void *data) 62nfs4_renew_state(void *data)
63{ 63{
64 struct nfs4_client *clp = (struct nfs4_client *)data; 64 struct nfs_client *clp = (struct nfs_client *)data;
65 struct rpc_cred *cred; 65 struct rpc_cred *cred;
66 long lease, timeout; 66 long lease, timeout;
67 unsigned long last, now; 67 unsigned long last, now;
@@ -108,7 +108,7 @@ out:
108 108
109/* Must be called with clp->cl_sem locked for writes */ 109/* Must be called with clp->cl_sem locked for writes */
110void 110void
111nfs4_schedule_state_renewal(struct nfs4_client *clp) 111nfs4_schedule_state_renewal(struct nfs_client *clp)
112{ 112{
113 long timeout; 113 long timeout;
114 114
@@ -121,32 +121,20 @@ nfs4_schedule_state_renewal(struct nfs4_client *clp)
121 __FUNCTION__, (timeout + HZ - 1) / HZ); 121 __FUNCTION__, (timeout + HZ - 1) / HZ);
122 cancel_delayed_work(&clp->cl_renewd); 122 cancel_delayed_work(&clp->cl_renewd);
123 schedule_delayed_work(&clp->cl_renewd, timeout); 123 schedule_delayed_work(&clp->cl_renewd, timeout);
124 set_bit(NFS_CS_RENEWD, &clp->cl_res_state);
124 spin_unlock(&clp->cl_lock); 125 spin_unlock(&clp->cl_lock);
125} 126}
126 127
127void 128void
128nfs4_renewd_prepare_shutdown(struct nfs_server *server) 129nfs4_renewd_prepare_shutdown(struct nfs_server *server)
129{ 130{
130 struct nfs4_client *clp = server->nfs4_state;
131
132 if (!clp)
133 return;
134 flush_scheduled_work(); 131 flush_scheduled_work();
135 down_write(&clp->cl_sem);
136 if (!list_empty(&server->nfs4_siblings))
137 list_del_init(&server->nfs4_siblings);
138 up_write(&clp->cl_sem);
139} 132}
140 133
141/* Must be called with clp->cl_sem locked for writes */
142void 134void
143nfs4_kill_renewd(struct nfs4_client *clp) 135nfs4_kill_renewd(struct nfs_client *clp)
144{ 136{
145 down_read(&clp->cl_sem); 137 down_read(&clp->cl_sem);
146 if (!list_empty(&clp->cl_superblocks)) {
147 up_read(&clp->cl_sem);
148 return;
149 }
150 cancel_delayed_work(&clp->cl_renewd); 138 cancel_delayed_work(&clp->cl_renewd);
151 up_read(&clp->cl_sem); 139 up_read(&clp->cl_sem);
152 flush_scheduled_work(); 140 flush_scheduled_work();
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 090a36b07a22..5fffbdfa971f 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -50,149 +50,15 @@
50#include "nfs4_fs.h" 50#include "nfs4_fs.h"
51#include "callback.h" 51#include "callback.h"
52#include "delegation.h" 52#include "delegation.h"
53#include "internal.h"
53 54
54#define OPENOWNER_POOL_SIZE 8 55#define OPENOWNER_POOL_SIZE 8
55 56
56const nfs4_stateid zero_stateid; 57const nfs4_stateid zero_stateid;
57 58
58static DEFINE_SPINLOCK(state_spinlock);
59static LIST_HEAD(nfs4_clientid_list); 59static LIST_HEAD(nfs4_clientid_list);
60 60
61void 61static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
62init_nfsv4_state(struct nfs_server *server)
63{
64 server->nfs4_state = NULL;
65 INIT_LIST_HEAD(&server->nfs4_siblings);
66}
67
68void
69destroy_nfsv4_state(struct nfs_server *server)
70{
71 kfree(server->mnt_path);
72 server->mnt_path = NULL;
73 if (server->nfs4_state) {
74 nfs4_put_client(server->nfs4_state);
75 server->nfs4_state = NULL;
76 }
77}
78
79/*
80 * nfs4_get_client(): returns an empty client structure
81 * nfs4_put_client(): drops reference to client structure
82 *
83 * Since these are allocated/deallocated very rarely, we don't
84 * bother putting them in a slab cache...
85 */
86static struct nfs4_client *
87nfs4_alloc_client(struct in_addr *addr)
88{
89 struct nfs4_client *clp;
90
91 if (nfs_callback_up() < 0)
92 return NULL;
93 if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) {
94 nfs_callback_down();
95 return NULL;
96 }
97 memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr));
98 init_rwsem(&clp->cl_sem);
99 INIT_LIST_HEAD(&clp->cl_delegations);
100 INIT_LIST_HEAD(&clp->cl_state_owners);
101 INIT_LIST_HEAD(&clp->cl_unused);
102 spin_lock_init(&clp->cl_lock);
103 atomic_set(&clp->cl_count, 1);
104 INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
105 INIT_LIST_HEAD(&clp->cl_superblocks);
106 rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client");
107 clp->cl_rpcclient = ERR_PTR(-EINVAL);
108 clp->cl_boot_time = CURRENT_TIME;
109 clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
110 return clp;
111}
112
113static void
114nfs4_free_client(struct nfs4_client *clp)
115{
116 struct nfs4_state_owner *sp;
117
118 while (!list_empty(&clp->cl_unused)) {
119 sp = list_entry(clp->cl_unused.next,
120 struct nfs4_state_owner,
121 so_list);
122 list_del(&sp->so_list);
123 kfree(sp);
124 }
125 BUG_ON(!list_empty(&clp->cl_state_owners));
126 nfs_idmap_delete(clp);
127 if (!IS_ERR(clp->cl_rpcclient))
128 rpc_shutdown_client(clp->cl_rpcclient);
129 kfree(clp);
130 nfs_callback_down();
131}
132
133static struct nfs4_client *__nfs4_find_client(struct in_addr *addr)
134{
135 struct nfs4_client *clp;
136 list_for_each_entry(clp, &nfs4_clientid_list, cl_servers) {
137 if (memcmp(&clp->cl_addr, addr, sizeof(clp->cl_addr)) == 0) {
138 atomic_inc(&clp->cl_count);
139 return clp;
140 }
141 }
142 return NULL;
143}
144
145struct nfs4_client *nfs4_find_client(struct in_addr *addr)
146{
147 struct nfs4_client *clp;
148 spin_lock(&state_spinlock);
149 clp = __nfs4_find_client(addr);
150 spin_unlock(&state_spinlock);
151 return clp;
152}
153
154struct nfs4_client *
155nfs4_get_client(struct in_addr *addr)
156{
157 struct nfs4_client *clp, *new = NULL;
158
159 spin_lock(&state_spinlock);
160 for (;;) {
161 clp = __nfs4_find_client(addr);
162 if (clp != NULL)
163 break;
164 clp = new;
165 if (clp != NULL) {
166 list_add(&clp->cl_servers, &nfs4_clientid_list);
167 new = NULL;
168 break;
169 }
170 spin_unlock(&state_spinlock);
171 new = nfs4_alloc_client(addr);
172 spin_lock(&state_spinlock);
173 if (new == NULL)
174 break;
175 }
176 spin_unlock(&state_spinlock);
177 if (new)
178 nfs4_free_client(new);
179 return clp;
180}
181
182void
183nfs4_put_client(struct nfs4_client *clp)
184{
185 if (!atomic_dec_and_lock(&clp->cl_count, &state_spinlock))
186 return;
187 list_del(&clp->cl_servers);
188 spin_unlock(&state_spinlock);
189 BUG_ON(!list_empty(&clp->cl_superblocks));
190 rpc_wake_up(&clp->cl_rpcwaitq);
191 nfs4_kill_renewd(clp);
192 nfs4_free_client(clp);
193}
194
195static int nfs4_init_client(struct nfs4_client *clp, struct rpc_cred *cred)
196{ 62{
197 int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, 63 int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK,
198 nfs_callback_tcpport, cred); 64 nfs_callback_tcpport, cred);
@@ -204,13 +70,13 @@ static int nfs4_init_client(struct nfs4_client *clp, struct rpc_cred *cred)
204} 70}
205 71
206u32 72u32
207nfs4_alloc_lockowner_id(struct nfs4_client *clp) 73nfs4_alloc_lockowner_id(struct nfs_client *clp)
208{ 74{
209 return clp->cl_lockowner_id ++; 75 return clp->cl_lockowner_id ++;
210} 76}
211 77
212static struct nfs4_state_owner * 78static struct nfs4_state_owner *
213nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred) 79nfs4_client_grab_unused(struct nfs_client *clp, struct rpc_cred *cred)
214{ 80{
215 struct nfs4_state_owner *sp = NULL; 81 struct nfs4_state_owner *sp = NULL;
216 82
@@ -224,7 +90,7 @@ nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred)
224 return sp; 90 return sp;
225} 91}
226 92
227struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp) 93struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
228{ 94{
229 struct nfs4_state_owner *sp; 95 struct nfs4_state_owner *sp;
230 struct rpc_cred *cred = NULL; 96 struct rpc_cred *cred = NULL;
@@ -238,7 +104,7 @@ struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp)
238 return cred; 104 return cred;
239} 105}
240 106
241struct rpc_cred *nfs4_get_setclientid_cred(struct nfs4_client *clp) 107struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
242{ 108{
243 struct nfs4_state_owner *sp; 109 struct nfs4_state_owner *sp;
244 110
@@ -251,7 +117,7 @@ struct rpc_cred *nfs4_get_setclientid_cred(struct nfs4_client *clp)
251} 117}
252 118
253static struct nfs4_state_owner * 119static struct nfs4_state_owner *
254nfs4_find_state_owner(struct nfs4_client *clp, struct rpc_cred *cred) 120nfs4_find_state_owner(struct nfs_client *clp, struct rpc_cred *cred)
255{ 121{
256 struct nfs4_state_owner *sp, *res = NULL; 122 struct nfs4_state_owner *sp, *res = NULL;
257 123
@@ -294,7 +160,7 @@ nfs4_alloc_state_owner(void)
294void 160void
295nfs4_drop_state_owner(struct nfs4_state_owner *sp) 161nfs4_drop_state_owner(struct nfs4_state_owner *sp)
296{ 162{
297 struct nfs4_client *clp = sp->so_client; 163 struct nfs_client *clp = sp->so_client;
298 spin_lock(&clp->cl_lock); 164 spin_lock(&clp->cl_lock);
299 list_del_init(&sp->so_list); 165 list_del_init(&sp->so_list);
300 spin_unlock(&clp->cl_lock); 166 spin_unlock(&clp->cl_lock);
@@ -306,7 +172,7 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp)
306 */ 172 */
307struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred) 173struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
308{ 174{
309 struct nfs4_client *clp = server->nfs4_state; 175 struct nfs_client *clp = server->nfs_client;
310 struct nfs4_state_owner *sp, *new; 176 struct nfs4_state_owner *sp, *new;
311 177
312 get_rpccred(cred); 178 get_rpccred(cred);
@@ -337,7 +203,7 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
337 */ 203 */
338void nfs4_put_state_owner(struct nfs4_state_owner *sp) 204void nfs4_put_state_owner(struct nfs4_state_owner *sp)
339{ 205{
340 struct nfs4_client *clp = sp->so_client; 206 struct nfs_client *clp = sp->so_client;
341 struct rpc_cred *cred = sp->so_cred; 207 struct rpc_cred *cred = sp->so_cred;
342 208
343 if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock)) 209 if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
@@ -540,7 +406,7 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
540static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) 406static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
541{ 407{
542 struct nfs4_lock_state *lsp; 408 struct nfs4_lock_state *lsp;
543 struct nfs4_client *clp = state->owner->so_client; 409 struct nfs_client *clp = state->owner->so_client;
544 410
545 lsp = kzalloc(sizeof(*lsp), GFP_KERNEL); 411 lsp = kzalloc(sizeof(*lsp), GFP_KERNEL);
546 if (lsp == NULL) 412 if (lsp == NULL)
@@ -752,7 +618,7 @@ out:
752 618
753static int reclaimer(void *); 619static int reclaimer(void *);
754 620
755static inline void nfs4_clear_recover_bit(struct nfs4_client *clp) 621static inline void nfs4_clear_recover_bit(struct nfs_client *clp)
756{ 622{
757 smp_mb__before_clear_bit(); 623 smp_mb__before_clear_bit();
758 clear_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state); 624 clear_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state);
@@ -764,25 +630,25 @@ static inline void nfs4_clear_recover_bit(struct nfs4_client *clp)
764/* 630/*
765 * State recovery routine 631 * State recovery routine
766 */ 632 */
767static void nfs4_recover_state(struct nfs4_client *clp) 633static void nfs4_recover_state(struct nfs_client *clp)
768{ 634{
769 struct task_struct *task; 635 struct task_struct *task;
770 636
771 __module_get(THIS_MODULE); 637 __module_get(THIS_MODULE);
772 atomic_inc(&clp->cl_count); 638 atomic_inc(&clp->cl_count);
773 task = kthread_run(reclaimer, clp, "%u.%u.%u.%u-reclaim", 639 task = kthread_run(reclaimer, clp, "%u.%u.%u.%u-reclaim",
774 NIPQUAD(clp->cl_addr)); 640 NIPQUAD(clp->cl_addr.sin_addr));
775 if (!IS_ERR(task)) 641 if (!IS_ERR(task))
776 return; 642 return;
777 nfs4_clear_recover_bit(clp); 643 nfs4_clear_recover_bit(clp);
778 nfs4_put_client(clp); 644 nfs_put_client(clp);
779 module_put(THIS_MODULE); 645 module_put(THIS_MODULE);
780} 646}
781 647
782/* 648/*
783 * Schedule a state recovery attempt 649 * Schedule a state recovery attempt
784 */ 650 */
785void nfs4_schedule_state_recovery(struct nfs4_client *clp) 651void nfs4_schedule_state_recovery(struct nfs_client *clp)
786{ 652{
787 if (!clp) 653 if (!clp)
788 return; 654 return;
@@ -879,7 +745,7 @@ out_err:
879 return status; 745 return status;
880} 746}
881 747
882static void nfs4_state_mark_reclaim(struct nfs4_client *clp) 748static void nfs4_state_mark_reclaim(struct nfs_client *clp)
883{ 749{
884 struct nfs4_state_owner *sp; 750 struct nfs4_state_owner *sp;
885 struct nfs4_state *state; 751 struct nfs4_state *state;
@@ -903,7 +769,7 @@ static void nfs4_state_mark_reclaim(struct nfs4_client *clp)
903 769
904static int reclaimer(void *ptr) 770static int reclaimer(void *ptr)
905{ 771{
906 struct nfs4_client *clp = ptr; 772 struct nfs_client *clp = ptr;
907 struct nfs4_state_owner *sp; 773 struct nfs4_state_owner *sp;
908 struct nfs4_state_recovery_ops *ops; 774 struct nfs4_state_recovery_ops *ops;
909 struct rpc_cred *cred; 775 struct rpc_cred *cred;
@@ -970,12 +836,12 @@ out:
970 if (status == -NFS4ERR_CB_PATH_DOWN) 836 if (status == -NFS4ERR_CB_PATH_DOWN)
971 nfs_handle_cb_pathdown(clp); 837 nfs_handle_cb_pathdown(clp);
972 nfs4_clear_recover_bit(clp); 838 nfs4_clear_recover_bit(clp);
973 nfs4_put_client(clp); 839 nfs_put_client(clp);
974 module_put_and_exit(0); 840 module_put_and_exit(0);
975 return 0; 841 return 0;
976out_error: 842out_error:
977 printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n", 843 printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n",
978 NIPQUAD(clp->cl_addr.s_addr), -status); 844 NIPQUAD(clp->cl_addr.sin_addr), -status);
979 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); 845 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
980 goto out; 846 goto out;
981} 847}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 730ec8fb31c6..3dd413f52da1 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -58,7 +58,7 @@
58/* Mapping from NFS error code to "errno" error code. */ 58/* Mapping from NFS error code to "errno" error code. */
59#define errno_NFSERR_IO EIO 59#define errno_NFSERR_IO EIO
60 60
61static int nfs_stat_to_errno(int); 61static int nfs4_stat_to_errno(int);
62 62
63/* NFSv4 COMPOUND tags are only wanted for debugging purposes */ 63/* NFSv4 COMPOUND tags are only wanted for debugging purposes */
64#ifdef DEBUG 64#ifdef DEBUG
@@ -128,7 +128,7 @@ static int nfs_stat_to_errno(int);
128#define decode_link_maxsz (op_decode_hdr_maxsz + 5) 128#define decode_link_maxsz (op_decode_hdr_maxsz + 5)
129#define encode_symlink_maxsz (op_encode_hdr_maxsz + \ 129#define encode_symlink_maxsz (op_encode_hdr_maxsz + \
130 1 + nfs4_name_maxsz + \ 130 1 + nfs4_name_maxsz + \
131 nfs4_path_maxsz + \ 131 1 + \
132 nfs4_fattr_maxsz) 132 nfs4_fattr_maxsz)
133#define decode_symlink_maxsz (op_decode_hdr_maxsz + 8) 133#define decode_symlink_maxsz (op_decode_hdr_maxsz + 8)
134#define encode_create_maxsz (op_encode_hdr_maxsz + \ 134#define encode_create_maxsz (op_encode_hdr_maxsz + \
@@ -529,7 +529,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s
529 if (iap->ia_valid & ATTR_MODE) 529 if (iap->ia_valid & ATTR_MODE)
530 len += 4; 530 len += 4;
531 if (iap->ia_valid & ATTR_UID) { 531 if (iap->ia_valid & ATTR_UID) {
532 owner_namelen = nfs_map_uid_to_name(server->nfs4_state, iap->ia_uid, owner_name); 532 owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name);
533 if (owner_namelen < 0) { 533 if (owner_namelen < 0) {
534 printk(KERN_WARNING "nfs: couldn't resolve uid %d to string\n", 534 printk(KERN_WARNING "nfs: couldn't resolve uid %d to string\n",
535 iap->ia_uid); 535 iap->ia_uid);
@@ -541,7 +541,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s
541 len += 4 + (XDR_QUADLEN(owner_namelen) << 2); 541 len += 4 + (XDR_QUADLEN(owner_namelen) << 2);
542 } 542 }
543 if (iap->ia_valid & ATTR_GID) { 543 if (iap->ia_valid & ATTR_GID) {
544 owner_grouplen = nfs_map_gid_to_group(server->nfs4_state, iap->ia_gid, owner_group); 544 owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group);
545 if (owner_grouplen < 0) { 545 if (owner_grouplen < 0) {
546 printk(KERN_WARNING "nfs4: couldn't resolve gid %d to string\n", 546 printk(KERN_WARNING "nfs4: couldn't resolve gid %d to string\n",
547 iap->ia_gid); 547 iap->ia_gid);
@@ -673,9 +673,9 @@ static int encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *c
673 673
674 switch (create->ftype) { 674 switch (create->ftype) {
675 case NF4LNK: 675 case NF4LNK:
676 RESERVE_SPACE(4 + create->u.symlink->len); 676 RESERVE_SPACE(4);
677 WRITE32(create->u.symlink->len); 677 WRITE32(create->u.symlink.len);
678 WRITEMEM(create->u.symlink->name, create->u.symlink->len); 678 xdr_write_pages(xdr, create->u.symlink.pages, 0, create->u.symlink.len);
679 break; 679 break;
680 680
681 case NF4BLK: case NF4CHR: 681 case NF4BLK: case NF4CHR:
@@ -1160,7 +1160,7 @@ static int encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, con
1160 return 0; 1160 return 0;
1161} 1161}
1162 1162
1163static int encode_renew(struct xdr_stream *xdr, const struct nfs4_client *client_stateid) 1163static int encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid)
1164{ 1164{
1165 uint32_t *p; 1165 uint32_t *p;
1166 1166
@@ -1246,7 +1246,7 @@ static int encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclien
1246 return 0; 1246 return 0;
1247} 1247}
1248 1248
1249static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_client *client_state) 1249static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state)
1250{ 1250{
1251 uint32_t *p; 1251 uint32_t *p;
1252 1252
@@ -1945,7 +1945,7 @@ static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, uint32_t *p, const str
1945/* 1945/*
1946 * a RENEW request 1946 * a RENEW request
1947 */ 1947 */
1948static int nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp) 1948static int nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs_client *clp)
1949{ 1949{
1950 struct xdr_stream xdr; 1950 struct xdr_stream xdr;
1951 struct compound_hdr hdr = { 1951 struct compound_hdr hdr = {
@@ -1975,7 +1975,7 @@ static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, uint32_t *p, struct nf
1975/* 1975/*
1976 * a SETCLIENTID_CONFIRM request 1976 * a SETCLIENTID_CONFIRM request
1977 */ 1977 */
1978static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp) 1978static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_client *clp)
1979{ 1979{
1980 struct xdr_stream xdr; 1980 struct xdr_stream xdr;
1981 struct compound_hdr hdr = { 1981 struct compound_hdr hdr = {
@@ -2127,12 +2127,12 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
2127 } 2127 }
2128 READ32(nfserr); 2128 READ32(nfserr);
2129 if (nfserr != NFS_OK) 2129 if (nfserr != NFS_OK)
2130 return -nfs_stat_to_errno(nfserr); 2130 return -nfs4_stat_to_errno(nfserr);
2131 return 0; 2131 return 0;
2132} 2132}
2133 2133
2134/* Dummy routine */ 2134/* Dummy routine */
2135static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs4_client *clp) 2135static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs_client *clp)
2136{ 2136{
2137 uint32_t *p; 2137 uint32_t *p;
2138 unsigned int strlen; 2138 unsigned int strlen;
@@ -2636,7 +2636,7 @@ static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t
2636 return 0; 2636 return 0;
2637} 2637}
2638 2638
2639static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_client *clp, int32_t *uid) 2639static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, int32_t *uid)
2640{ 2640{
2641 uint32_t len, *p; 2641 uint32_t len, *p;
2642 2642
@@ -2660,7 +2660,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
2660 return 0; 2660 return 0;
2661} 2661}
2662 2662
2663static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_client *clp, int32_t *gid) 2663static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, int32_t *gid)
2664{ 2664{
2665 uint32_t len, *p; 2665 uint32_t len, *p;
2666 2666
@@ -3051,9 +3051,9 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons
3051 fattr->mode |= fmode; 3051 fattr->mode |= fmode;
3052 if ((status = decode_attr_nlink(xdr, bitmap, &fattr->nlink)) != 0) 3052 if ((status = decode_attr_nlink(xdr, bitmap, &fattr->nlink)) != 0)
3053 goto xdr_error; 3053 goto xdr_error;
3054 if ((status = decode_attr_owner(xdr, bitmap, server->nfs4_state, &fattr->uid)) != 0) 3054 if ((status = decode_attr_owner(xdr, bitmap, server->nfs_client, &fattr->uid)) != 0)
3055 goto xdr_error; 3055 goto xdr_error;
3056 if ((status = decode_attr_group(xdr, bitmap, server->nfs4_state, &fattr->gid)) != 0) 3056 if ((status = decode_attr_group(xdr, bitmap, server->nfs_client, &fattr->gid)) != 0)
3057 goto xdr_error; 3057 goto xdr_error;
3058 if ((status = decode_attr_rdev(xdr, bitmap, &fattr->rdev)) != 0) 3058 if ((status = decode_attr_rdev(xdr, bitmap, &fattr->rdev)) != 0)
3059 goto xdr_error; 3059 goto xdr_error;
@@ -3254,7 +3254,7 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
3254 if (decode_space_limit(xdr, &res->maxsize) < 0) 3254 if (decode_space_limit(xdr, &res->maxsize) < 0)
3255 return -EIO; 3255 return -EIO;
3256 } 3256 }
3257 return decode_ace(xdr, NULL, res->server->nfs4_state); 3257 return decode_ace(xdr, NULL, res->server->nfs_client);
3258} 3258}
3259 3259
3260static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) 3260static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
@@ -3565,7 +3565,7 @@ static int decode_setattr(struct xdr_stream *xdr, struct nfs_setattrres *res)
3565 return 0; 3565 return 0;
3566} 3566}
3567 3567
3568static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_client *clp) 3568static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
3569{ 3569{
3570 uint32_t *p; 3570 uint32_t *p;
3571 uint32_t opnum; 3571 uint32_t opnum;
@@ -3598,7 +3598,7 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_client *clp)
3598 READ_BUF(len); 3598 READ_BUF(len);
3599 return -NFSERR_CLID_INUSE; 3599 return -NFSERR_CLID_INUSE;
3600 } else 3600 } else
3601 return -nfs_stat_to_errno(nfserr); 3601 return -nfs4_stat_to_errno(nfserr);
3602 3602
3603 return 0; 3603 return 0;
3604} 3604}
@@ -4256,7 +4256,7 @@ static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, uint32_t *p, struct nfs_fsi
4256 if (!status) 4256 if (!status)
4257 status = decode_fsinfo(&xdr, fsinfo); 4257 status = decode_fsinfo(&xdr, fsinfo);
4258 if (!status) 4258 if (!status)
4259 status = -nfs_stat_to_errno(hdr.status); 4259 status = -nfs4_stat_to_errno(hdr.status);
4260 return status; 4260 return status;
4261} 4261}
4262 4262
@@ -4335,7 +4335,7 @@ static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, uint32_t *p, void *dummy)
4335 * a SETCLIENTID request 4335 * a SETCLIENTID request
4336 */ 4336 */
4337static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, uint32_t *p, 4337static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, uint32_t *p,
4338 struct nfs4_client *clp) 4338 struct nfs_client *clp)
4339{ 4339{
4340 struct xdr_stream xdr; 4340 struct xdr_stream xdr;
4341 struct compound_hdr hdr; 4341 struct compound_hdr hdr;
@@ -4346,7 +4346,7 @@ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, uint32_t *p,
4346 if (!status) 4346 if (!status)
4347 status = decode_setclientid(&xdr, clp); 4347 status = decode_setclientid(&xdr, clp);
4348 if (!status) 4348 if (!status)
4349 status = -nfs_stat_to_errno(hdr.status); 4349 status = -nfs4_stat_to_errno(hdr.status);
4350 return status; 4350 return status;
4351} 4351}
4352 4352
@@ -4368,7 +4368,7 @@ static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, s
4368 if (!status) 4368 if (!status)
4369 status = decode_fsinfo(&xdr, fsinfo); 4369 status = decode_fsinfo(&xdr, fsinfo);
4370 if (!status) 4370 if (!status)
4371 status = -nfs_stat_to_errno(hdr.status); 4371 status = -nfs4_stat_to_errno(hdr.status);
4372 return status; 4372 return status;
4373} 4373}
4374 4374
@@ -4521,7 +4521,7 @@ static struct {
4521 * This one is used jointly by NFSv2 and NFSv3. 4521 * This one is used jointly by NFSv2 and NFSv3.
4522 */ 4522 */
4523static int 4523static int
4524nfs_stat_to_errno(int stat) 4524nfs4_stat_to_errno(int stat)
4525{ 4525{
4526 int i; 4526 int i;
4527 for (i = 0; nfs_errtbl[i].stat != -1; i++) { 4527 for (i = 0; nfs_errtbl[i].stat != -1; i++) {
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 36e902a88ca1..829af323f288 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -392,7 +392,6 @@ int __init nfs_init_nfspagecache(void)
392 392
393void nfs_destroy_nfspagecache(void) 393void nfs_destroy_nfspagecache(void)
394{ 394{
395 if (kmem_cache_destroy(nfs_page_cachep)) 395 kmem_cache_destroy(nfs_page_cachep);
396 printk(KERN_INFO "nfs_page: not all structures were freed\n");
397} 396}
398 397
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index b3899ea3229e..4529cc4f3f8f 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -66,14 +66,14 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
66 66
67 dprintk("%s: call getattr\n", __FUNCTION__); 67 dprintk("%s: call getattr\n", __FUNCTION__);
68 nfs_fattr_init(fattr); 68 nfs_fattr_init(fattr);
69 status = rpc_call_sync(server->client_sys, &msg, 0); 69 status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0);
70 dprintk("%s: reply getattr: %d\n", __FUNCTION__, status); 70 dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
71 if (status) 71 if (status)
72 return status; 72 return status;
73 dprintk("%s: call statfs\n", __FUNCTION__); 73 dprintk("%s: call statfs\n", __FUNCTION__);
74 msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS]; 74 msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS];
75 msg.rpc_resp = &fsinfo; 75 msg.rpc_resp = &fsinfo;
76 status = rpc_call_sync(server->client_sys, &msg, 0); 76 status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0);
77 dprintk("%s: reply statfs: %d\n", __FUNCTION__, status); 77 dprintk("%s: reply statfs: %d\n", __FUNCTION__, status);
78 if (status) 78 if (status)
79 return status; 79 return status;
@@ -352,7 +352,7 @@ nfs_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir, struct qstr *
352{ 352{
353 struct nfs_diropargs *arg; 353 struct nfs_diropargs *arg;
354 354
355 arg = (struct nfs_diropargs *)kmalloc(sizeof(*arg), GFP_KERNEL); 355 arg = kmalloc(sizeof(*arg), GFP_KERNEL);
356 if (!arg) 356 if (!arg)
357 return -ENOMEM; 357 return -ENOMEM;
358 arg->fh = NFS_FH(dir->d_inode); 358 arg->fh = NFS_FH(dir->d_inode);
@@ -425,16 +425,17 @@ nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
425} 425}
426 426
427static int 427static int
428nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path, 428nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
429 struct iattr *sattr, struct nfs_fh *fhandle, 429 unsigned int len, struct iattr *sattr)
430 struct nfs_fattr *fattr)
431{ 430{
431 struct nfs_fh fhandle;
432 struct nfs_fattr fattr;
432 struct nfs_symlinkargs arg = { 433 struct nfs_symlinkargs arg = {
433 .fromfh = NFS_FH(dir), 434 .fromfh = NFS_FH(dir),
434 .fromname = name->name, 435 .fromname = dentry->d_name.name,
435 .fromlen = name->len, 436 .fromlen = dentry->d_name.len,
436 .topath = path->name, 437 .pages = &page,
437 .tolen = path->len, 438 .pathlen = len,
438 .sattr = sattr 439 .sattr = sattr
439 }; 440 };
440 struct rpc_message msg = { 441 struct rpc_message msg = {
@@ -443,13 +444,25 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
443 }; 444 };
444 int status; 445 int status;
445 446
446 if (path->len > NFS2_MAXPATHLEN) 447 if (len > NFS2_MAXPATHLEN)
447 return -ENAMETOOLONG; 448 return -ENAMETOOLONG;
448 dprintk("NFS call symlink %s -> %s\n", name->name, path->name); 449
449 nfs_fattr_init(fattr); 450 dprintk("NFS call symlink %s\n", dentry->d_name.name);
450 fhandle->size = 0; 451
451 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 452 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
452 nfs_mark_for_revalidate(dir); 453 nfs_mark_for_revalidate(dir);
454
455 /*
456 * V2 SYMLINK requests don't return any attributes. Setting the
457 * filehandle size to zero indicates to nfs_instantiate that it
458 * should fill in the data with a LOOKUP call on the wire.
459 */
460 if (status == 0) {
461 nfs_fattr_init(&fattr);
462 fhandle.size = 0;
463 status = nfs_instantiate(dentry, &fhandle, &fattr);
464 }
465
453 dprintk("NFS reply symlink: %d\n", status); 466 dprintk("NFS reply symlink: %d\n", status);
454 return status; 467 return status;
455} 468}
@@ -671,7 +684,7 @@ nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
671} 684}
672 685
673 686
674struct nfs_rpc_ops nfs_v2_clientops = { 687const struct nfs_rpc_ops nfs_v2_clientops = {
675 .version = 2, /* protocol version */ 688 .version = 2, /* protocol version */
676 .dentry_ops = &nfs_dentry_operations, 689 .dentry_ops = &nfs_dentry_operations,
677 .dir_inode_ops = &nfs_dir_inode_operations, 690 .dir_inode_ops = &nfs_dir_inode_operations,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index f0aff824a291..c2e49c397a27 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -171,7 +171,7 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
171 rdata->args.offset = page_offset(page) + rdata->args.pgbase; 171 rdata->args.offset = page_offset(page) + rdata->args.pgbase;
172 172
173 dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n", 173 dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n",
174 NFS_SERVER(inode)->hostname, 174 NFS_SERVER(inode)->nfs_client->cl_hostname,
175 inode->i_sb->s_id, 175 inode->i_sb->s_id,
176 (long long)NFS_FILEID(inode), 176 (long long)NFS_FILEID(inode),
177 (unsigned long long)rdata->args.pgbase, 177 (unsigned long long)rdata->args.pgbase,
@@ -568,8 +568,13 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
568 568
569 nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count); 569 nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count);
570 570
571 /* Is this a short read? */ 571 if (task->tk_status < 0) {
572 if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) { 572 if (task->tk_status == -ESTALE) {
573 set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode));
574 nfs_mark_for_revalidate(data->inode);
575 }
576 } else if (resp->count < argp->count && !resp->eof) {
577 /* This is a short read! */
573 nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); 578 nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
574 /* Has the server at least made some progress? */ 579 /* Has the server at least made some progress? */
575 if (resp->count != 0) { 580 if (resp->count != 0) {
@@ -616,6 +621,10 @@ int nfs_readpage(struct file *file, struct page *page)
616 if (error) 621 if (error)
617 goto out_error; 622 goto out_error;
618 623
624 error = -ESTALE;
625 if (NFS_STALE(inode))
626 goto out_error;
627
619 if (file == NULL) { 628 if (file == NULL) {
620 ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 629 ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
621 if (ctx == NULL) 630 if (ctx == NULL)
@@ -678,7 +687,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
678 }; 687 };
679 struct inode *inode = mapping->host; 688 struct inode *inode = mapping->host;
680 struct nfs_server *server = NFS_SERVER(inode); 689 struct nfs_server *server = NFS_SERVER(inode);
681 int ret; 690 int ret = -ESTALE;
682 691
683 dprintk("NFS: nfs_readpages (%s/%Ld %d)\n", 692 dprintk("NFS: nfs_readpages (%s/%Ld %d)\n",
684 inode->i_sb->s_id, 693 inode->i_sb->s_id,
@@ -686,6 +695,9 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
686 nr_pages); 695 nr_pages);
687 nfs_inc_stats(inode, NFSIOS_VFSREADPAGES); 696 nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
688 697
698 if (NFS_STALE(inode))
699 goto out;
700
689 if (filp == NULL) { 701 if (filp == NULL) {
690 desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 702 desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
691 if (desc.ctx == NULL) 703 if (desc.ctx == NULL)
@@ -701,6 +713,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
701 ret = err; 713 ret = err;
702 } 714 }
703 put_nfs_open_context(desc.ctx); 715 put_nfs_open_context(desc.ctx);
716out:
704 return ret; 717 return ret;
705} 718}
706 719
@@ -724,6 +737,5 @@ int __init nfs_init_readpagecache(void)
724void nfs_destroy_readpagecache(void) 737void nfs_destroy_readpagecache(void)
725{ 738{
726 mempool_destroy(nfs_rdata_mempool); 739 mempool_destroy(nfs_rdata_mempool);
727 if (kmem_cache_destroy(nfs_rdata_cachep)) 740 kmem_cache_destroy(nfs_rdata_cachep);
728 printk(KERN_INFO "nfs_read_data: not all structures were freed\n");
729} 741}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index e8a9bee74d9d..e8d40030cab4 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -13,6 +13,11 @@
13 * 13 *
14 * Split from inode.c by David Howells <dhowells@redhat.com> 14 * Split from inode.c by David Howells <dhowells@redhat.com>
15 * 15 *
16 * - superblocks are indexed on server only - all inodes, dentries, etc. associated with a
17 * particular server are held in the same superblock
18 * - NFS superblocks can have several effective roots to the dentry tree
19 * - directory type roots are spliced into the tree when a path from one root reaches the root
20 * of another (see nfs_lookup())
16 */ 21 */
17 22
18#include <linux/config.h> 23#include <linux/config.h>
@@ -52,66 +57,12 @@
52 57
53#define NFSDBG_FACILITY NFSDBG_VFS 58#define NFSDBG_FACILITY NFSDBG_VFS
54 59
55/* Maximum number of readahead requests
56 * FIXME: this should really be a sysctl so that users may tune it to suit
57 * their needs. People that do NFS over a slow network, might for
58 * instance want to reduce it to something closer to 1 for improved
59 * interactive response.
60 */
61#define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1)
62
63/*
64 * RPC cruft for NFS
65 */
66static struct rpc_version * nfs_version[] = {
67 NULL,
68 NULL,
69 &nfs_version2,
70#if defined(CONFIG_NFS_V3)
71 &nfs_version3,
72#elif defined(CONFIG_NFS_V4)
73 NULL,
74#endif
75#if defined(CONFIG_NFS_V4)
76 &nfs_version4,
77#endif
78};
79
80static struct rpc_program nfs_program = {
81 .name = "nfs",
82 .number = NFS_PROGRAM,
83 .nrvers = ARRAY_SIZE(nfs_version),
84 .version = nfs_version,
85 .stats = &nfs_rpcstat,
86 .pipe_dir_name = "/nfs",
87};
88
89struct rpc_stat nfs_rpcstat = {
90 .program = &nfs_program
91};
92
93
94#ifdef CONFIG_NFS_V3_ACL
95static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program };
96static struct rpc_version * nfsacl_version[] = {
97 [3] = &nfsacl_version3,
98};
99
100struct rpc_program nfsacl_program = {
101 .name = "nfsacl",
102 .number = NFS_ACL_PROGRAM,
103 .nrvers = ARRAY_SIZE(nfsacl_version),
104 .version = nfsacl_version,
105 .stats = &nfsacl_rpcstat,
106};
107#endif /* CONFIG_NFS_V3_ACL */
108
109static void nfs_umount_begin(struct vfsmount *, int); 60static void nfs_umount_begin(struct vfsmount *, int);
110static int nfs_statfs(struct dentry *, struct kstatfs *); 61static int nfs_statfs(struct dentry *, struct kstatfs *);
111static int nfs_show_options(struct seq_file *, struct vfsmount *); 62static int nfs_show_options(struct seq_file *, struct vfsmount *);
112static int nfs_show_stats(struct seq_file *, struct vfsmount *); 63static int nfs_show_stats(struct seq_file *, struct vfsmount *);
113static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *); 64static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *);
114static int nfs_clone_nfs_sb(struct file_system_type *fs_type, 65static int nfs_xdev_get_sb(struct file_system_type *fs_type,
115 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 66 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
116static void nfs_kill_super(struct super_block *); 67static void nfs_kill_super(struct super_block *);
117 68
@@ -120,15 +71,15 @@ static struct file_system_type nfs_fs_type = {
120 .name = "nfs", 71 .name = "nfs",
121 .get_sb = nfs_get_sb, 72 .get_sb = nfs_get_sb,
122 .kill_sb = nfs_kill_super, 73 .kill_sb = nfs_kill_super,
123 .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 74 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
124}; 75};
125 76
126struct file_system_type clone_nfs_fs_type = { 77struct file_system_type nfs_xdev_fs_type = {
127 .owner = THIS_MODULE, 78 .owner = THIS_MODULE,
128 .name = "nfs", 79 .name = "nfs",
129 .get_sb = nfs_clone_nfs_sb, 80 .get_sb = nfs_xdev_get_sb,
130 .kill_sb = nfs_kill_super, 81 .kill_sb = nfs_kill_super,
131 .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 82 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
132}; 83};
133 84
134static struct super_operations nfs_sops = { 85static struct super_operations nfs_sops = {
@@ -145,10 +96,10 @@ static struct super_operations nfs_sops = {
145#ifdef CONFIG_NFS_V4 96#ifdef CONFIG_NFS_V4
146static int nfs4_get_sb(struct file_system_type *fs_type, 97static int nfs4_get_sb(struct file_system_type *fs_type,
147 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 98 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
148static int nfs_clone_nfs4_sb(struct file_system_type *fs_type, 99static int nfs4_xdev_get_sb(struct file_system_type *fs_type,
149 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 100 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
150static int nfs_referral_nfs4_sb(struct file_system_type *fs_type, 101static int nfs4_referral_get_sb(struct file_system_type *fs_type,
151 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 102 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
152static void nfs4_kill_super(struct super_block *sb); 103static void nfs4_kill_super(struct super_block *sb);
153 104
154static struct file_system_type nfs4_fs_type = { 105static struct file_system_type nfs4_fs_type = {
@@ -156,23 +107,23 @@ static struct file_system_type nfs4_fs_type = {
156 .name = "nfs4", 107 .name = "nfs4",
157 .get_sb = nfs4_get_sb, 108 .get_sb = nfs4_get_sb,
158 .kill_sb = nfs4_kill_super, 109 .kill_sb = nfs4_kill_super,
159 .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 110 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
160}; 111};
161 112
162struct file_system_type clone_nfs4_fs_type = { 113struct file_system_type nfs4_xdev_fs_type = {
163 .owner = THIS_MODULE, 114 .owner = THIS_MODULE,
164 .name = "nfs4", 115 .name = "nfs4",
165 .get_sb = nfs_clone_nfs4_sb, 116 .get_sb = nfs4_xdev_get_sb,
166 .kill_sb = nfs4_kill_super, 117 .kill_sb = nfs4_kill_super,
167 .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 118 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
168}; 119};
169 120
170struct file_system_type nfs_referral_nfs4_fs_type = { 121struct file_system_type nfs4_referral_fs_type = {
171 .owner = THIS_MODULE, 122 .owner = THIS_MODULE,
172 .name = "nfs4", 123 .name = "nfs4",
173 .get_sb = nfs_referral_nfs4_sb, 124 .get_sb = nfs4_referral_get_sb,
174 .kill_sb = nfs4_kill_super, 125 .kill_sb = nfs4_kill_super,
175 .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 126 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
176}; 127};
177 128
178static struct super_operations nfs4_sops = { 129static struct super_operations nfs4_sops = {
@@ -187,39 +138,7 @@ static struct super_operations nfs4_sops = {
187}; 138};
188#endif 139#endif
189 140
190#ifdef CONFIG_NFS_V4 141static struct shrinker *acl_shrinker;
191static const int nfs_set_port_min = 0;
192static const int nfs_set_port_max = 65535;
193
194static int param_set_port(const char *val, struct kernel_param *kp)
195{
196 char *endp;
197 int num = simple_strtol(val, &endp, 0);
198 if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max)
199 return -EINVAL;
200 *((int *)kp->arg) = num;
201 return 0;
202}
203
204module_param_call(callback_tcpport, param_set_port, param_get_int,
205 &nfs_callback_set_tcpport, 0644);
206#endif
207
208#ifdef CONFIG_NFS_V4
209static int param_set_idmap_timeout(const char *val, struct kernel_param *kp)
210{
211 char *endp;
212 int num = simple_strtol(val, &endp, 0);
213 int jif = num * HZ;
214 if (endp == val || *endp || num < 0 || jif < num)
215 return -EINVAL;
216 *((int *)kp->arg) = jif;
217 return 0;
218}
219
220module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int,
221 &nfs_idmap_cache_timeout, 0644);
222#endif
223 142
224/* 143/*
225 * Register the NFS filesystems 144 * Register the NFS filesystems
@@ -240,6 +159,7 @@ int __init register_nfs_fs(void)
240 if (ret < 0) 159 if (ret < 0)
241 goto error_2; 160 goto error_2;
242#endif 161#endif
162 acl_shrinker = set_shrinker(DEFAULT_SEEKS, nfs_access_cache_shrinker);
243 return 0; 163 return 0;
244 164
245#ifdef CONFIG_NFS_V4 165#ifdef CONFIG_NFS_V4
@@ -257,6 +177,8 @@ error_0:
257 */ 177 */
258void __exit unregister_nfs_fs(void) 178void __exit unregister_nfs_fs(void)
259{ 179{
180 if (acl_shrinker != NULL)
181 remove_shrinker(acl_shrinker);
260#ifdef CONFIG_NFS_V4 182#ifdef CONFIG_NFS_V4
261 unregister_filesystem(&nfs4_fs_type); 183 unregister_filesystem(&nfs4_fs_type);
262 nfs_unregister_sysctl(); 184 nfs_unregister_sysctl();
@@ -269,11 +191,10 @@ void __exit unregister_nfs_fs(void)
269 */ 191 */
270static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) 192static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
271{ 193{
272 struct super_block *sb = dentry->d_sb; 194 struct nfs_server *server = NFS_SB(dentry->d_sb);
273 struct nfs_server *server = NFS_SB(sb);
274 unsigned char blockbits; 195 unsigned char blockbits;
275 unsigned long blockres; 196 unsigned long blockres;
276 struct nfs_fh *rootfh = NFS_FH(sb->s_root->d_inode); 197 struct nfs_fh *fh = NFS_FH(dentry->d_inode);
277 struct nfs_fattr fattr; 198 struct nfs_fattr fattr;
278 struct nfs_fsstat res = { 199 struct nfs_fsstat res = {
279 .fattr = &fattr, 200 .fattr = &fattr,
@@ -282,7 +203,7 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
282 203
283 lock_kernel(); 204 lock_kernel();
284 205
285 error = server->rpc_ops->statfs(server, rootfh, &res); 206 error = server->nfs_client->rpc_ops->statfs(server, fh, &res);
286 buf->f_type = NFS_SUPER_MAGIC; 207 buf->f_type = NFS_SUPER_MAGIC;
287 if (error < 0) 208 if (error < 0)
288 goto out_err; 209 goto out_err;
@@ -292,7 +213,7 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
292 * case where f_frsize != f_bsize. Eventually we want to 213 * case where f_frsize != f_bsize. Eventually we want to
293 * report the value of wtmult in this field. 214 * report the value of wtmult in this field.
294 */ 215 */
295 buf->f_frsize = sb->s_blocksize; 216 buf->f_frsize = dentry->d_sb->s_blocksize;
296 217
297 /* 218 /*
298 * On most *nix systems, f_blocks, f_bfree, and f_bavail 219 * On most *nix systems, f_blocks, f_bfree, and f_bavail
@@ -301,8 +222,8 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
301 * thus historically Linux's sys_statfs reports these 222 * thus historically Linux's sys_statfs reports these
302 * fields in units of f_bsize. 223 * fields in units of f_bsize.
303 */ 224 */
304 buf->f_bsize = sb->s_blocksize; 225 buf->f_bsize = dentry->d_sb->s_blocksize;
305 blockbits = sb->s_blocksize_bits; 226 blockbits = dentry->d_sb->s_blocksize_bits;
306 blockres = (1 << blockbits) - 1; 227 blockres = (1 << blockbits) - 1;
307 buf->f_blocks = (res.tbytes + blockres) >> blockbits; 228 buf->f_blocks = (res.tbytes + blockres) >> blockbits;
308 buf->f_bfree = (res.fbytes + blockres) >> blockbits; 229 buf->f_bfree = (res.fbytes + blockres) >> blockbits;
@@ -323,9 +244,12 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
323 244
324} 245}
325 246
247/*
248 * Map the security flavour number to a name
249 */
326static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour) 250static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour)
327{ 251{
328 static struct { 252 static const struct {
329 rpc_authflavor_t flavour; 253 rpc_authflavor_t flavour;
330 const char *str; 254 const char *str;
331 } sec_flavours[] = { 255 } sec_flavours[] = {
@@ -356,10 +280,10 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour)
356 */ 280 */
357static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults) 281static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults)
358{ 282{
359 static struct proc_nfs_info { 283 static const struct proc_nfs_info {
360 int flag; 284 int flag;
361 char *str; 285 const char *str;
362 char *nostr; 286 const char *nostr;
363 } nfs_info[] = { 287 } nfs_info[] = {
364 { NFS_MOUNT_SOFT, ",soft", ",hard" }, 288 { NFS_MOUNT_SOFT, ",soft", ",hard" },
365 { NFS_MOUNT_INTR, ",intr", "" }, 289 { NFS_MOUNT_INTR, ",intr", "" },
@@ -369,11 +293,12 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
369 { NFS_MOUNT_NOACL, ",noacl", "" }, 293 { NFS_MOUNT_NOACL, ",noacl", "" },
370 { 0, NULL, NULL } 294 { 0, NULL, NULL }
371 }; 295 };
372 struct proc_nfs_info *nfs_infop; 296 const struct proc_nfs_info *nfs_infop;
297 struct nfs_client *clp = nfss->nfs_client;
373 char buf[12]; 298 char buf[12];
374 char *proto; 299 const char *proto;
375 300
376 seq_printf(m, ",vers=%d", nfss->rpc_ops->version); 301 seq_printf(m, ",vers=%d", clp->rpc_ops->version);
377 seq_printf(m, ",rsize=%d", nfss->rsize); 302 seq_printf(m, ",rsize=%d", nfss->rsize);
378 seq_printf(m, ",wsize=%d", nfss->wsize); 303 seq_printf(m, ",wsize=%d", nfss->wsize);
379 if (nfss->acregmin != 3*HZ || showdefaults) 304 if (nfss->acregmin != 3*HZ || showdefaults)
@@ -402,8 +327,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
402 proto = buf; 327 proto = buf;
403 } 328 }
404 seq_printf(m, ",proto=%s", proto); 329 seq_printf(m, ",proto=%s", proto);
405 seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ); 330 seq_printf(m, ",timeo=%lu", 10U * clp->retrans_timeo / HZ);
406 seq_printf(m, ",retrans=%u", nfss->retrans_count); 331 seq_printf(m, ",retrans=%u", clp->retrans_count);
407 seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor)); 332 seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor));
408} 333}
409 334
@@ -417,7 +342,7 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
417 nfs_show_mount_options(m, nfss, 0); 342 nfs_show_mount_options(m, nfss, 0);
418 343
419 seq_puts(m, ",addr="); 344 seq_puts(m, ",addr=");
420 seq_escape(m, nfss->hostname, " \t\n\\"); 345 seq_escape(m, nfss->nfs_client->cl_hostname, " \t\n\\");
421 346
422 return 0; 347 return 0;
423} 348}
@@ -454,7 +379,7 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
454 seq_printf(m, ",namelen=%d", nfss->namelen); 379 seq_printf(m, ",namelen=%d", nfss->namelen);
455 380
456#ifdef CONFIG_NFS_V4 381#ifdef CONFIG_NFS_V4
457 if (nfss->rpc_ops->version == 4) { 382 if (nfss->nfs_client->cl_nfsversion == 4) {
458 seq_printf(m, "\n\tnfsv4:\t"); 383 seq_printf(m, "\n\tnfsv4:\t");
459 seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); 384 seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
460 seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); 385 seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
@@ -501,782 +426,353 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
501 426
502/* 427/*
503 * Begin unmount by attempting to remove all automounted mountpoints we added 428 * Begin unmount by attempting to remove all automounted mountpoints we added
504 * in response to traversals 429 * in response to xdev traversals and referrals
505 */ 430 */
506static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags) 431static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags)
507{ 432{
508 struct nfs_server *server;
509 struct rpc_clnt *rpc;
510
511 shrink_submounts(vfsmnt, &nfs_automount_list); 433 shrink_submounts(vfsmnt, &nfs_automount_list);
512 if (!(flags & MNT_FORCE))
513 return;
514 /* -EIO all pending I/O */
515 server = NFS_SB(vfsmnt->mnt_sb);
516 rpc = server->client;
517 if (!IS_ERR(rpc))
518 rpc_killall_tasks(rpc);
519 rpc = server->client_acl;
520 if (!IS_ERR(rpc))
521 rpc_killall_tasks(rpc);
522} 434}
523 435
524/* 436/*
525 * Obtain the root inode of the file system. 437 * Validate the NFS2/NFS3 mount data
438 * - fills in the mount root filehandle
526 */ 439 */
527static struct inode * 440static int nfs_validate_mount_data(struct nfs_mount_data *data,
528nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo) 441 struct nfs_fh *mntfh)
529{ 442{
530 struct nfs_server *server = NFS_SB(sb); 443 if (data == NULL) {
531 int error; 444 dprintk("%s: missing data argument\n", __FUNCTION__);
532 445 return -EINVAL;
533 error = server->rpc_ops->getroot(server, rootfh, fsinfo);
534 if (error < 0) {
535 dprintk("nfs_get_root: getattr error = %d\n", -error);
536 return ERR_PTR(error);
537 } 446 }
538 447
539 server->fsid = fsinfo->fattr->fsid; 448 if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) {
540 return nfs_fhget(sb, rootfh, fsinfo->fattr); 449 dprintk("%s: bad mount version\n", __FUNCTION__);
541} 450 return -EINVAL;
542 451 }
543/*
544 * Do NFS version-independent mount processing, and sanity checking
545 */
546static int
547nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
548{
549 struct nfs_server *server;
550 struct inode *root_inode;
551 struct nfs_fattr fattr;
552 struct nfs_fsinfo fsinfo = {
553 .fattr = &fattr,
554 };
555 struct nfs_pathconf pathinfo = {
556 .fattr = &fattr,
557 };
558 int no_root_error = 0;
559 unsigned long max_rpc_payload;
560
561 /* We probably want something more informative here */
562 snprintf(sb->s_id, sizeof(sb->s_id), "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev));
563
564 server = NFS_SB(sb);
565 452
566 sb->s_magic = NFS_SUPER_MAGIC; 453 switch (data->version) {
454 case 1:
455 data->namlen = 0;
456 case 2:
457 data->bsize = 0;
458 case 3:
459 if (data->flags & NFS_MOUNT_VER3) {
460 dprintk("%s: mount structure version %d does not support NFSv3\n",
461 __FUNCTION__,
462 data->version);
463 return -EINVAL;
464 }
465 data->root.size = NFS2_FHSIZE;
466 memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
467 case 4:
468 if (data->flags & NFS_MOUNT_SECFLAVOUR) {
469 dprintk("%s: mount structure version %d does not support strong security\n",
470 __FUNCTION__,
471 data->version);
472 return -EINVAL;
473 }
474 case 5:
475 memset(data->context, 0, sizeof(data->context));
476 }
567 477
568 server->io_stats = nfs_alloc_iostats(); 478 /* Set the pseudoflavor */
569 if (server->io_stats == NULL) 479 if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
570 return -ENOMEM; 480 data->pseudoflavor = RPC_AUTH_UNIX;
571 481
572 root_inode = nfs_get_root(sb, &server->fh, &fsinfo); 482#ifndef CONFIG_NFS_V3
573 /* Did getting the root inode fail? */ 483 /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */
574 if (IS_ERR(root_inode)) { 484 if (data->flags & NFS_MOUNT_VER3) {
575 no_root_error = PTR_ERR(root_inode); 485 dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__);
576 goto out_no_root; 486 return -EPROTONOSUPPORT;
577 }
578 sb->s_root = d_alloc_root(root_inode);
579 if (!sb->s_root) {
580 no_root_error = -ENOMEM;
581 goto out_no_root;
582 } 487 }
583 sb->s_root->d_op = server->rpc_ops->dentry_ops; 488#endif /* CONFIG_NFS_V3 */
584
585 /* mount time stamp, in seconds */
586 server->mount_time = jiffies;
587
588 /* Get some general file system info */
589 if (server->namelen == 0 &&
590 server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
591 server->namelen = pathinfo.max_namelen;
592 /* Work out a lot of parameters */
593 if (server->rsize == 0)
594 server->rsize = nfs_block_size(fsinfo.rtpref, NULL);
595 if (server->wsize == 0)
596 server->wsize = nfs_block_size(fsinfo.wtpref, NULL);
597
598 if (fsinfo.rtmax >= 512 && server->rsize > fsinfo.rtmax)
599 server->rsize = nfs_block_size(fsinfo.rtmax, NULL);
600 if (fsinfo.wtmax >= 512 && server->wsize > fsinfo.wtmax)
601 server->wsize = nfs_block_size(fsinfo.wtmax, NULL);
602
603 max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL);
604 if (server->rsize > max_rpc_payload)
605 server->rsize = max_rpc_payload;
606 if (server->rsize > NFS_MAX_FILE_IO_SIZE)
607 server->rsize = NFS_MAX_FILE_IO_SIZE;
608 server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
609
610 if (server->wsize > max_rpc_payload)
611 server->wsize = max_rpc_payload;
612 if (server->wsize > NFS_MAX_FILE_IO_SIZE)
613 server->wsize = NFS_MAX_FILE_IO_SIZE;
614 server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
615 489
616 if (sb->s_blocksize == 0) 490 /* We now require that the mount process passes the remote address */
617 sb->s_blocksize = nfs_block_bits(server->wsize, 491 if (data->addr.sin_addr.s_addr == INADDR_ANY) {
618 &sb->s_blocksize_bits); 492 dprintk("%s: mount program didn't pass remote address!\n",
619 server->wtmult = nfs_block_bits(fsinfo.wtmult, NULL); 493 __FUNCTION__);
620 494 return -EINVAL;
621 server->dtsize = nfs_block_size(fsinfo.dtpref, NULL);
622 if (server->dtsize > PAGE_CACHE_SIZE)
623 server->dtsize = PAGE_CACHE_SIZE;
624 if (server->dtsize > server->rsize)
625 server->dtsize = server->rsize;
626
627 if (server->flags & NFS_MOUNT_NOAC) {
628 server->acregmin = server->acregmax = 0;
629 server->acdirmin = server->acdirmax = 0;
630 sb->s_flags |= MS_SYNCHRONOUS;
631 } 495 }
632 server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
633 496
634 nfs_super_set_maxbytes(sb, fsinfo.maxfilesize); 497 /* Prepare the root filehandle */
498 if (data->flags & NFS_MOUNT_VER3)
499 mntfh->size = data->root.size;
500 else
501 mntfh->size = NFS2_FHSIZE;
502
503 if (mntfh->size > sizeof(mntfh->data)) {
504 dprintk("%s: invalid root filehandle\n", __FUNCTION__);
505 return -EINVAL;
506 }
635 507
636 server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0; 508 memcpy(mntfh->data, data->root.data, mntfh->size);
637 server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0; 509 if (mntfh->size < sizeof(mntfh->data))
510 memset(mntfh->data + mntfh->size, 0,
511 sizeof(mntfh->data) - mntfh->size);
638 512
639 /* We're airborne Set socket buffersize */
640 rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
641 return 0; 513 return 0;
642 /* Yargs. It didn't work out. */
643out_no_root:
644 dprintk("nfs_sb_init: get root inode failed: errno %d\n", -no_root_error);
645 if (!IS_ERR(root_inode))
646 iput(root_inode);
647 return no_root_error;
648} 514}
649 515
650/* 516/*
651 * Initialise the timeout values for a connection 517 * Initialise the common bits of the superblock
652 */ 518 */
653static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned int timeo, unsigned int retrans) 519static inline void nfs_initialise_sb(struct super_block *sb)
654{ 520{
655 to->to_initval = timeo * HZ / 10; 521 struct nfs_server *server = NFS_SB(sb);
656 to->to_retries = retrans;
657 if (!to->to_retries)
658 to->to_retries = 2;
659
660 switch (proto) {
661 case IPPROTO_TCP:
662 if (!to->to_initval)
663 to->to_initval = 60 * HZ;
664 if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
665 to->to_initval = NFS_MAX_TCP_TIMEOUT;
666 to->to_increment = to->to_initval;
667 to->to_maxval = to->to_initval + (to->to_increment * to->to_retries);
668 to->to_exponential = 0;
669 break;
670 case IPPROTO_UDP:
671 default:
672 if (!to->to_initval)
673 to->to_initval = 11 * HZ / 10;
674 if (to->to_initval > NFS_MAX_UDP_TIMEOUT)
675 to->to_initval = NFS_MAX_UDP_TIMEOUT;
676 to->to_maxval = NFS_MAX_UDP_TIMEOUT;
677 to->to_exponential = 1;
678 break;
679 }
680}
681 522
682/* 523 sb->s_magic = NFS_SUPER_MAGIC;
683 * Create an RPC client handle.
684 */
685static struct rpc_clnt *
686nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
687{
688 struct rpc_timeout timeparms;
689 struct rpc_xprt *xprt = NULL;
690 struct rpc_clnt *clnt = NULL;
691 int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
692
693 nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans);
694
695 server->retrans_timeo = timeparms.to_initval;
696 server->retrans_count = timeparms.to_retries;
697
698 /* create transport and client */
699 xprt = xprt_create_proto(proto, &server->addr, &timeparms);
700 if (IS_ERR(xprt)) {
701 dprintk("%s: cannot create RPC transport. Error = %ld\n",
702 __FUNCTION__, PTR_ERR(xprt));
703 return (struct rpc_clnt *)xprt;
704 }
705 clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
706 server->rpc_ops->version, data->pseudoflavor);
707 if (IS_ERR(clnt)) {
708 dprintk("%s: cannot create RPC client. Error = %ld\n",
709 __FUNCTION__, PTR_ERR(xprt));
710 goto out_fail;
711 }
712 524
713 clnt->cl_intr = 1; 525 /* We probably want something more informative here */
714 clnt->cl_softrtry = 1; 526 snprintf(sb->s_id, sizeof(sb->s_id),
527 "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev));
528
529 if (sb->s_blocksize == 0)
530 sb->s_blocksize = nfs_block_bits(server->wsize,
531 &sb->s_blocksize_bits);
715 532
716 return clnt; 533 if (server->flags & NFS_MOUNT_NOAC)
534 sb->s_flags |= MS_SYNCHRONOUS;
717 535
718out_fail: 536 nfs_super_set_maxbytes(sb, server->maxfilesize);
719 return clnt;
720} 537}
721 538
722/* 539/*
723 * Clone a server record 540 * Finish setting up an NFS2/3 superblock
724 */ 541 */
725static struct nfs_server *nfs_clone_server(struct super_block *sb, struct nfs_clone_mount *data) 542static void nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data)
726{ 543{
727 struct nfs_server *server = NFS_SB(sb); 544 struct nfs_server *server = NFS_SB(sb);
728 struct nfs_server *parent = NFS_SB(data->sb);
729 struct inode *root_inode;
730 struct nfs_fsinfo fsinfo;
731 void *err = ERR_PTR(-ENOMEM);
732
733 sb->s_op = data->sb->s_op;
734 sb->s_blocksize = data->sb->s_blocksize;
735 sb->s_blocksize_bits = data->sb->s_blocksize_bits;
736 sb->s_maxbytes = data->sb->s_maxbytes;
737
738 server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
739 server->io_stats = nfs_alloc_iostats();
740 if (server->io_stats == NULL)
741 goto out;
742
743 server->client = rpc_clone_client(parent->client);
744 if (IS_ERR((err = server->client)))
745 goto out;
746
747 if (!IS_ERR(parent->client_sys)) {
748 server->client_sys = rpc_clone_client(parent->client_sys);
749 if (IS_ERR((err = server->client_sys)))
750 goto out;
751 }
752 if (!IS_ERR(parent->client_acl)) {
753 server->client_acl = rpc_clone_client(parent->client_acl);
754 if (IS_ERR((err = server->client_acl)))
755 goto out;
756 }
757 root_inode = nfs_fhget(sb, data->fh, data->fattr);
758 if (!root_inode)
759 goto out;
760 sb->s_root = d_alloc_root(root_inode);
761 if (!sb->s_root)
762 goto out_put_root;
763 fsinfo.fattr = data->fattr;
764 if (NFS_PROTO(root_inode)->fsinfo(server, data->fh, &fsinfo) == 0)
765 nfs_super_set_maxbytes(sb, fsinfo.maxfilesize);
766 sb->s_root->d_op = server->rpc_ops->dentry_ops;
767 sb->s_flags |= MS_ACTIVE;
768 return server;
769out_put_root:
770 iput(root_inode);
771out:
772 return err;
773}
774 545
775/* 546 sb->s_blocksize_bits = 0;
776 * Copy an existing superblock and attach revised data 547 sb->s_blocksize = 0;
777 */ 548 if (data->bsize)
778static int nfs_clone_generic_sb(struct nfs_clone_mount *data, 549 sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
779 struct super_block *(*fill_sb)(struct nfs_server *, struct nfs_clone_mount *),
780 struct nfs_server *(*fill_server)(struct super_block *, struct nfs_clone_mount *),
781 struct vfsmount *mnt)
782{
783 struct nfs_server *server;
784 struct nfs_server *parent = NFS_SB(data->sb);
785 struct super_block *sb = ERR_PTR(-EINVAL);
786 char *hostname;
787 int error = -ENOMEM;
788 int len;
789
790 server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
791 if (server == NULL)
792 goto out_err;
793 memcpy(server, parent, sizeof(*server));
794 hostname = (data->hostname != NULL) ? data->hostname : parent->hostname;
795 len = strlen(hostname) + 1;
796 server->hostname = kmalloc(len, GFP_KERNEL);
797 if (server->hostname == NULL)
798 goto free_server;
799 memcpy(server->hostname, hostname, len);
800 error = rpciod_up();
801 if (error != 0)
802 goto free_hostname;
803
804 sb = fill_sb(server, data);
805 if (IS_ERR(sb)) {
806 error = PTR_ERR(sb);
807 goto kill_rpciod;
808 }
809
810 if (sb->s_root)
811 goto out_rpciod_down;
812 550
813 server = fill_server(sb, data); 551 if (server->flags & NFS_MOUNT_VER3) {
814 if (IS_ERR(server)) { 552 /* The VFS shouldn't apply the umask to mode bits. We will do
815 error = PTR_ERR(server); 553 * so ourselves when necessary.
816 goto out_deactivate; 554 */
555 sb->s_flags |= MS_POSIXACL;
556 sb->s_time_gran = 1;
817 } 557 }
818 return simple_set_mnt(mnt, sb); 558
819out_deactivate: 559 sb->s_op = &nfs_sops;
820 up_write(&sb->s_umount); 560 nfs_initialise_sb(sb);
821 deactivate_super(sb);
822 return error;
823out_rpciod_down:
824 rpciod_down();
825 kfree(server->hostname);
826 kfree(server);
827 return simple_set_mnt(mnt, sb);
828kill_rpciod:
829 rpciod_down();
830free_hostname:
831 kfree(server->hostname);
832free_server:
833 kfree(server);
834out_err:
835 return error;
836} 561}
837 562
838/* 563/*
839 * Set up an NFS2/3 superblock 564 * Finish setting up a cloned NFS2/3 superblock
840 *
841 * The way this works is that the mount process passes a structure
842 * in the data argument which contains the server's IP address
843 * and the root file handle obtained from the server's mount
844 * daemon. We stash these away in the private superblock fields.
845 */ 565 */
846static int 566static void nfs_clone_super(struct super_block *sb,
847nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent) 567 const struct super_block *old_sb)
848{ 568{
849 struct nfs_server *server; 569 struct nfs_server *server = NFS_SB(sb);
850 rpc_authflavor_t authflavor;
851 570
852 server = NFS_SB(sb); 571 sb->s_blocksize_bits = old_sb->s_blocksize_bits;
853 sb->s_blocksize_bits = 0; 572 sb->s_blocksize = old_sb->s_blocksize;
854 sb->s_blocksize = 0; 573 sb->s_maxbytes = old_sb->s_maxbytes;
855 if (data->bsize)
856 sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
857 if (data->rsize)
858 server->rsize = nfs_block_size(data->rsize, NULL);
859 if (data->wsize)
860 server->wsize = nfs_block_size(data->wsize, NULL);
861 server->flags = data->flags & NFS_MOUNT_FLAGMASK;
862
863 server->acregmin = data->acregmin*HZ;
864 server->acregmax = data->acregmax*HZ;
865 server->acdirmin = data->acdirmin*HZ;
866 server->acdirmax = data->acdirmax*HZ;
867
868 /* Start lockd here, before we might error out */
869 if (!(server->flags & NFS_MOUNT_NONLM))
870 lockd_up();
871
872 server->namelen = data->namlen;
873 server->hostname = kmalloc(strlen(data->hostname) + 1, GFP_KERNEL);
874 if (!server->hostname)
875 return -ENOMEM;
876 strcpy(server->hostname, data->hostname);
877
878 /* Check NFS protocol revision and initialize RPC op vector
879 * and file handle pool. */
880#ifdef CONFIG_NFS_V3
881 if (server->flags & NFS_MOUNT_VER3) {
882 server->rpc_ops = &nfs_v3_clientops;
883 server->caps |= NFS_CAP_READDIRPLUS;
884 } else {
885 server->rpc_ops = &nfs_v2_clientops;
886 }
887#else
888 server->rpc_ops = &nfs_v2_clientops;
889#endif
890 574
891 /* Fill in pseudoflavor for mount version < 5 */
892 if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
893 data->pseudoflavor = RPC_AUTH_UNIX;
894 authflavor = data->pseudoflavor; /* save for sb_init() */
895 /* XXX maybe we want to add a server->pseudoflavor field */
896
897 /* Create RPC client handles */
898 server->client = nfs_create_client(server, data);
899 if (IS_ERR(server->client))
900 return PTR_ERR(server->client);
901 /* RFC 2623, sec 2.3.2 */
902 if (authflavor != RPC_AUTH_UNIX) {
903 struct rpc_auth *auth;
904
905 server->client_sys = rpc_clone_client(server->client);
906 if (IS_ERR(server->client_sys))
907 return PTR_ERR(server->client_sys);
908 auth = rpcauth_create(RPC_AUTH_UNIX, server->client_sys);
909 if (IS_ERR(auth))
910 return PTR_ERR(auth);
911 } else {
912 atomic_inc(&server->client->cl_count);
913 server->client_sys = server->client;
914 }
915 if (server->flags & NFS_MOUNT_VER3) { 575 if (server->flags & NFS_MOUNT_VER3) {
916#ifdef CONFIG_NFS_V3_ACL 576 /* The VFS shouldn't apply the umask to mode bits. We will do
917 if (!(server->flags & NFS_MOUNT_NOACL)) { 577 * so ourselves when necessary.
918 server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3);
919 /* No errors! Assume that Sun nfsacls are supported */
920 if (!IS_ERR(server->client_acl))
921 server->caps |= NFS_CAP_ACLS;
922 }
923#else
924 server->flags &= ~NFS_MOUNT_NOACL;
925#endif /* CONFIG_NFS_V3_ACL */
926 /*
927 * The VFS shouldn't apply the umask to mode bits. We will
928 * do so ourselves when necessary.
929 */ 578 */
930 sb->s_flags |= MS_POSIXACL; 579 sb->s_flags |= MS_POSIXACL;
931 if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
932 server->namelen = NFS3_MAXNAMLEN;
933 sb->s_time_gran = 1; 580 sb->s_time_gran = 1;
934 } else {
935 if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
936 server->namelen = NFS2_MAXNAMLEN;
937 } 581 }
938 582
939 sb->s_op = &nfs_sops; 583 sb->s_op = old_sb->s_op;
940 return nfs_sb_init(sb, authflavor); 584 nfs_initialise_sb(sb);
941} 585}
942 586
943static int nfs_set_super(struct super_block *s, void *data) 587static int nfs_set_super(struct super_block *s, void *_server)
944{ 588{
945 s->s_fs_info = data; 589 struct nfs_server *server = _server;
946 return set_anon_super(s, data); 590 int ret;
591
592 s->s_fs_info = server;
593 ret = set_anon_super(s, server);
594 if (ret == 0)
595 server->s_dev = s->s_dev;
596 return ret;
947} 597}
948 598
949static int nfs_compare_super(struct super_block *sb, void *data) 599static int nfs_compare_super(struct super_block *sb, void *data)
950{ 600{
951 struct nfs_server *server = data; 601 struct nfs_server *server = data, *old = NFS_SB(sb);
952 struct nfs_server *old = NFS_SB(sb);
953 602
954 if (old->addr.sin_addr.s_addr != server->addr.sin_addr.s_addr) 603 if (old->nfs_client != server->nfs_client)
955 return 0; 604 return 0;
956 if (old->addr.sin_port != server->addr.sin_port) 605 if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0)
957 return 0; 606 return 0;
958 return !nfs_compare_fh(&old->fh, &server->fh); 607 return 1;
959} 608}
960 609
961static int nfs_get_sb(struct file_system_type *fs_type, 610static int nfs_get_sb(struct file_system_type *fs_type,
962 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 611 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
963{ 612{
964 int error;
965 struct nfs_server *server = NULL; 613 struct nfs_server *server = NULL;
966 struct super_block *s; 614 struct super_block *s;
967 struct nfs_fh *root; 615 struct nfs_fh mntfh;
968 struct nfs_mount_data *data = raw_data; 616 struct nfs_mount_data *data = raw_data;
617 struct dentry *mntroot;
618 int error;
969 619
970 error = -EINVAL; 620 /* Validate the mount data */
971 if (data == NULL) { 621 error = nfs_validate_mount_data(data, &mntfh);
972 dprintk("%s: missing data argument\n", __FUNCTION__); 622 if (error < 0)
973 goto out_err_noserver; 623 return error;
974 }
975 if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) {
976 dprintk("%s: bad mount version\n", __FUNCTION__);
977 goto out_err_noserver;
978 }
979 switch (data->version) {
980 case 1:
981 data->namlen = 0;
982 case 2:
983 data->bsize = 0;
984 case 3:
985 if (data->flags & NFS_MOUNT_VER3) {
986 dprintk("%s: mount structure version %d does not support NFSv3\n",
987 __FUNCTION__,
988 data->version);
989 goto out_err_noserver;
990 }
991 data->root.size = NFS2_FHSIZE;
992 memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
993 case 4:
994 if (data->flags & NFS_MOUNT_SECFLAVOUR) {
995 dprintk("%s: mount structure version %d does not support strong security\n",
996 __FUNCTION__,
997 data->version);
998 goto out_err_noserver;
999 }
1000 case 5:
1001 memset(data->context, 0, sizeof(data->context));
1002 }
1003#ifndef CONFIG_NFS_V3
1004 /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */
1005 error = -EPROTONOSUPPORT;
1006 if (data->flags & NFS_MOUNT_VER3) {
1007 dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__);
1008 goto out_err_noserver;
1009 }
1010#endif /* CONFIG_NFS_V3 */
1011 624
1012 error = -ENOMEM; 625 /* Get a volume representation */
1013 server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); 626 server = nfs_create_server(data, &mntfh);
1014 if (!server) 627 if (IS_ERR(server)) {
628 error = PTR_ERR(server);
1015 goto out_err_noserver; 629 goto out_err_noserver;
1016 /* Zero out the NFS state stuff */
1017 init_nfsv4_state(server);
1018 server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
1019
1020 root = &server->fh;
1021 if (data->flags & NFS_MOUNT_VER3)
1022 root->size = data->root.size;
1023 else
1024 root->size = NFS2_FHSIZE;
1025 error = -EINVAL;
1026 if (root->size > sizeof(root->data)) {
1027 dprintk("%s: invalid root filehandle\n", __FUNCTION__);
1028 goto out_err;
1029 }
1030 memcpy(root->data, data->root.data, root->size);
1031
1032 /* We now require that the mount process passes the remote address */
1033 memcpy(&server->addr, &data->addr, sizeof(server->addr));
1034 if (server->addr.sin_addr.s_addr == INADDR_ANY) {
1035 dprintk("%s: mount program didn't pass remote address!\n",
1036 __FUNCTION__);
1037 goto out_err;
1038 }
1039
1040 /* Fire up rpciod if not yet running */
1041 error = rpciod_up();
1042 if (error < 0) {
1043 dprintk("%s: couldn't start rpciod! Error = %d\n",
1044 __FUNCTION__, error);
1045 goto out_err;
1046 } 630 }
1047 631
632 /* Get a superblock - note that we may end up sharing one that already exists */
1048 s = sget(fs_type, nfs_compare_super, nfs_set_super, server); 633 s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
1049 if (IS_ERR(s)) { 634 if (IS_ERR(s)) {
1050 error = PTR_ERR(s); 635 error = PTR_ERR(s);
1051 goto out_err_rpciod; 636 goto out_err_nosb;
1052 } 637 }
1053 638
1054 if (s->s_root) 639 if (s->s_fs_info != server) {
1055 goto out_rpciod_down; 640 nfs_free_server(server);
641 server = NULL;
642 }
1056 643
1057 s->s_flags = flags; 644 if (!s->s_root) {
645 /* initial superblock/root creation */
646 s->s_flags = flags;
647 nfs_fill_super(s, data);
648 }
1058 649
1059 error = nfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0); 650 mntroot = nfs_get_root(s, &mntfh);
1060 if (error) { 651 if (IS_ERR(mntroot)) {
1061 up_write(&s->s_umount); 652 error = PTR_ERR(mntroot);
1062 deactivate_super(s); 653 goto error_splat_super;
1063 return error;
1064 } 654 }
1065 s->s_flags |= MS_ACTIVE;
1066 return simple_set_mnt(mnt, s);
1067 655
1068out_rpciod_down: 656 s->s_flags |= MS_ACTIVE;
1069 rpciod_down(); 657 mnt->mnt_sb = s;
1070 kfree(server); 658 mnt->mnt_root = mntroot;
1071 return simple_set_mnt(mnt, s); 659 return 0;
1072 660
1073out_err_rpciod: 661out_err_nosb:
1074 rpciod_down(); 662 nfs_free_server(server);
1075out_err:
1076 kfree(server);
1077out_err_noserver: 663out_err_noserver:
1078 return error; 664 return error;
665
666error_splat_super:
667 up_write(&s->s_umount);
668 deactivate_super(s);
669 return error;
1079} 670}
1080 671
672/*
673 * Destroy an NFS2/3 superblock
674 */
1081static void nfs_kill_super(struct super_block *s) 675static void nfs_kill_super(struct super_block *s)
1082{ 676{
1083 struct nfs_server *server = NFS_SB(s); 677 struct nfs_server *server = NFS_SB(s);
1084 678
1085 kill_anon_super(s); 679 kill_anon_super(s);
1086 680 nfs_free_server(server);
1087 if (!IS_ERR(server->client))
1088 rpc_shutdown_client(server->client);
1089 if (!IS_ERR(server->client_sys))
1090 rpc_shutdown_client(server->client_sys);
1091 if (!IS_ERR(server->client_acl))
1092 rpc_shutdown_client(server->client_acl);
1093
1094 if (!(server->flags & NFS_MOUNT_NONLM))
1095 lockd_down(); /* release rpc.lockd */
1096
1097 rpciod_down(); /* release rpciod */
1098
1099 nfs_free_iostats(server->io_stats);
1100 kfree(server->hostname);
1101 kfree(server);
1102 nfs_release_automount_timer();
1103}
1104
1105static struct super_block *nfs_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data)
1106{
1107 struct super_block *sb;
1108
1109 server->fsid = data->fattr->fsid;
1110 nfs_copy_fh(&server->fh, data->fh);
1111 sb = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
1112 if (!IS_ERR(sb) && sb->s_root == NULL && !(server->flags & NFS_MOUNT_NONLM))
1113 lockd_up();
1114 return sb;
1115} 681}
1116 682
1117static int nfs_clone_nfs_sb(struct file_system_type *fs_type, 683/*
1118 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 684 * Clone an NFS2/3 server record on xdev traversal (FSID-change)
685 */
686static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
687 const char *dev_name, void *raw_data,
688 struct vfsmount *mnt)
1119{ 689{
1120 struct nfs_clone_mount *data = raw_data; 690 struct nfs_clone_mount *data = raw_data;
1121 return nfs_clone_generic_sb(data, nfs_clone_sb, nfs_clone_server, mnt); 691 struct super_block *s;
1122} 692 struct nfs_server *server;
693 struct dentry *mntroot;
694 int error;
1123 695
1124#ifdef CONFIG_NFS_V4 696 dprintk("--> nfs_xdev_get_sb()\n");
1125static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
1126 struct rpc_timeout *timeparms, int proto, rpc_authflavor_t flavor)
1127{
1128 struct nfs4_client *clp;
1129 struct rpc_xprt *xprt = NULL;
1130 struct rpc_clnt *clnt = NULL;
1131 int err = -EIO;
1132
1133 clp = nfs4_get_client(&server->addr.sin_addr);
1134 if (!clp) {
1135 dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
1136 return ERR_PTR(err);
1137 }
1138 697
1139 /* Now create transport and client */ 698 /* create a new volume representation */
1140 down_write(&clp->cl_sem); 699 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
1141 if (IS_ERR(clp->cl_rpcclient)) { 700 if (IS_ERR(server)) {
1142 xprt = xprt_create_proto(proto, &server->addr, timeparms); 701 error = PTR_ERR(server);
1143 if (IS_ERR(xprt)) { 702 goto out_err_noserver;
1144 up_write(&clp->cl_sem);
1145 err = PTR_ERR(xprt);
1146 dprintk("%s: cannot create RPC transport. Error = %d\n",
1147 __FUNCTION__, err);
1148 goto out_fail;
1149 }
1150 /* Bind to a reserved port! */
1151 xprt->resvport = 1;
1152 clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
1153 server->rpc_ops->version, flavor);
1154 if (IS_ERR(clnt)) {
1155 up_write(&clp->cl_sem);
1156 err = PTR_ERR(clnt);
1157 dprintk("%s: cannot create RPC client. Error = %d\n",
1158 __FUNCTION__, err);
1159 goto out_fail;
1160 }
1161 clnt->cl_intr = 1;
1162 clnt->cl_softrtry = 1;
1163 clp->cl_rpcclient = clnt;
1164 memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr));
1165 nfs_idmap_new(clp);
1166 }
1167 list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
1168 clnt = rpc_clone_client(clp->cl_rpcclient);
1169 if (!IS_ERR(clnt))
1170 server->nfs4_state = clp;
1171 up_write(&clp->cl_sem);
1172 clp = NULL;
1173
1174 if (IS_ERR(clnt)) {
1175 dprintk("%s: cannot create RPC client. Error = %d\n",
1176 __FUNCTION__, err);
1177 return clnt;
1178 } 703 }
1179 704
1180 if (server->nfs4_state->cl_idmap == NULL) { 705 /* Get a superblock - note that we may end up sharing one that already exists */
1181 dprintk("%s: failed to create idmapper.\n", __FUNCTION__); 706 s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
1182 return ERR_PTR(-ENOMEM); 707 if (IS_ERR(s)) {
708 error = PTR_ERR(s);
709 goto out_err_nosb;
1183 } 710 }
1184 711
1185 if (clnt->cl_auth->au_flavor != flavor) { 712 if (s->s_fs_info != server) {
1186 struct rpc_auth *auth; 713 nfs_free_server(server);
1187 714 server = NULL;
1188 auth = rpcauth_create(flavor, clnt);
1189 if (IS_ERR(auth)) {
1190 dprintk("%s: couldn't create credcache!\n", __FUNCTION__);
1191 return (struct rpc_clnt *)auth;
1192 }
1193 } 715 }
1194 return clnt;
1195
1196 out_fail:
1197 if (clp)
1198 nfs4_put_client(clp);
1199 return ERR_PTR(err);
1200}
1201
1202/*
1203 * Set up an NFS4 superblock
1204 */
1205static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent)
1206{
1207 struct nfs_server *server;
1208 struct rpc_timeout timeparms;
1209 rpc_authflavor_t authflavour;
1210 int err = -EIO;
1211 716
1212 sb->s_blocksize_bits = 0; 717 if (!s->s_root) {
1213 sb->s_blocksize = 0; 718 /* initial superblock/root creation */
1214 server = NFS_SB(sb); 719 s->s_flags = flags;
1215 if (data->rsize != 0) 720 nfs_clone_super(s, data->sb);
1216 server->rsize = nfs_block_size(data->rsize, NULL); 721 }
1217 if (data->wsize != 0)
1218 server->wsize = nfs_block_size(data->wsize, NULL);
1219 server->flags = data->flags & NFS_MOUNT_FLAGMASK;
1220 server->caps = NFS_CAP_ATOMIC_OPEN;
1221 722
1222 server->acregmin = data->acregmin*HZ; 723 mntroot = nfs_get_root(s, data->fh);
1223 server->acregmax = data->acregmax*HZ; 724 if (IS_ERR(mntroot)) {
1224 server->acdirmin = data->acdirmin*HZ; 725 error = PTR_ERR(mntroot);
1225 server->acdirmax = data->acdirmax*HZ; 726 goto error_splat_super;
727 }
1226 728
1227 server->rpc_ops = &nfs_v4_clientops; 729 s->s_flags |= MS_ACTIVE;
730 mnt->mnt_sb = s;
731 mnt->mnt_root = mntroot;
1228 732
1229 nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans); 733 dprintk("<-- nfs_xdev_get_sb() = 0\n");
734 return 0;
1230 735
1231 server->retrans_timeo = timeparms.to_initval; 736out_err_nosb:
1232 server->retrans_count = timeparms.to_retries; 737 nfs_free_server(server);
738out_err_noserver:
739 dprintk("<-- nfs_xdev_get_sb() = %d [error]\n", error);
740 return error;
1233 741
1234 /* Now create transport and client */ 742error_splat_super:
1235 authflavour = RPC_AUTH_UNIX; 743 up_write(&s->s_umount);
1236 if (data->auth_flavourlen != 0) { 744 deactivate_super(s);
1237 if (data->auth_flavourlen != 1) { 745 dprintk("<-- nfs_xdev_get_sb() = %d [splat]\n", error);
1238 dprintk("%s: Invalid number of RPC auth flavours %d.\n", 746 return error;
1239 __FUNCTION__, data->auth_flavourlen); 747}
1240 err = -EINVAL;
1241 goto out_fail;
1242 }
1243 if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) {
1244 err = -EFAULT;
1245 goto out_fail;
1246 }
1247 }
1248 748
1249 server->client = nfs4_create_client(server, &timeparms, data->proto, authflavour); 749#ifdef CONFIG_NFS_V4
1250 if (IS_ERR(server->client)) {
1251 err = PTR_ERR(server->client);
1252 dprintk("%s: cannot create RPC client. Error = %d\n",
1253 __FUNCTION__, err);
1254 goto out_fail;
1255 }
1256 750
751/*
752 * Finish setting up a cloned NFS4 superblock
753 */
754static void nfs4_clone_super(struct super_block *sb,
755 const struct super_block *old_sb)
756{
757 sb->s_blocksize_bits = old_sb->s_blocksize_bits;
758 sb->s_blocksize = old_sb->s_blocksize;
759 sb->s_maxbytes = old_sb->s_maxbytes;
1257 sb->s_time_gran = 1; 760 sb->s_time_gran = 1;
1258 761 sb->s_op = old_sb->s_op;
1259 sb->s_op = &nfs4_sops; 762 nfs_initialise_sb(sb);
1260 err = nfs_sb_init(sb, authflavour);
1261
1262 out_fail:
1263 return err;
1264} 763}
1265 764
1266static int nfs4_compare_super(struct super_block *sb, void *data) 765/*
766 * Set up an NFS4 superblock
767 */
768static void nfs4_fill_super(struct super_block *sb)
1267{ 769{
1268 struct nfs_server *server = data; 770 sb->s_time_gran = 1;
1269 struct nfs_server *old = NFS_SB(sb); 771 sb->s_op = &nfs4_sops;
1270 772 nfs_initialise_sb(sb);
1271 if (strcmp(server->hostname, old->hostname) != 0)
1272 return 0;
1273 if (strcmp(server->mnt_path, old->mnt_path) != 0)
1274 return 0;
1275 return 1;
1276} 773}
1277 774
1278static void * 775static void *nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
1279nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
1280{ 776{
1281 void *p = NULL; 777 void *p = NULL;
1282 778
@@ -1297,14 +793,22 @@ nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
1297 return dst; 793 return dst;
1298} 794}
1299 795
796/*
797 * Get the superblock for an NFS4 mountpoint
798 */
1300static int nfs4_get_sb(struct file_system_type *fs_type, 799static int nfs4_get_sb(struct file_system_type *fs_type,
1301 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 800 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
1302{ 801{
1303 int error;
1304 struct nfs_server *server;
1305 struct super_block *s;
1306 struct nfs4_mount_data *data = raw_data; 802 struct nfs4_mount_data *data = raw_data;
803 struct super_block *s;
804 struct nfs_server *server;
805 struct sockaddr_in addr;
806 rpc_authflavor_t authflavour;
807 struct nfs_fh mntfh;
808 struct dentry *mntroot;
809 char *mntpath = NULL, *hostname = NULL, ip_addr[16];
1307 void *p; 810 void *p;
811 int error;
1308 812
1309 if (data == NULL) { 813 if (data == NULL) {
1310 dprintk("%s: missing data argument\n", __FUNCTION__); 814 dprintk("%s: missing data argument\n", __FUNCTION__);
@@ -1315,84 +819,112 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
1315 return -EINVAL; 819 return -EINVAL;
1316 } 820 }
1317 821
1318 server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); 822 /* We now require that the mount process passes the remote address */
1319 if (!server) 823 if (data->host_addrlen != sizeof(addr))
1320 return -ENOMEM; 824 return -EINVAL;
1321 /* Zero out the NFS state stuff */ 825
1322 init_nfsv4_state(server); 826 if (copy_from_user(&addr, data->host_addr, sizeof(addr)))
1323 server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); 827 return -EFAULT;
828
829 if (addr.sin_family != AF_INET ||
830 addr.sin_addr.s_addr == INADDR_ANY
831 ) {
832 dprintk("%s: mount program didn't pass remote IP address!\n",
833 __FUNCTION__);
834 return -EINVAL;
835 }
836 /* RFC3530: The default port for NFS is 2049 */
837 if (addr.sin_port == 0)
838 addr.sin_port = NFS_PORT;
839
840 /* Grab the authentication type */
841 authflavour = RPC_AUTH_UNIX;
842 if (data->auth_flavourlen != 0) {
843 if (data->auth_flavourlen != 1) {
844 dprintk("%s: Invalid number of RPC auth flavours %d.\n",
845 __FUNCTION__, data->auth_flavourlen);
846 error = -EINVAL;
847 goto out_err_noserver;
848 }
849
850 if (copy_from_user(&authflavour, data->auth_flavours,
851 sizeof(authflavour))) {
852 error = -EFAULT;
853 goto out_err_noserver;
854 }
855 }
1324 856
1325 p = nfs_copy_user_string(NULL, &data->hostname, 256); 857 p = nfs_copy_user_string(NULL, &data->hostname, 256);
1326 if (IS_ERR(p)) 858 if (IS_ERR(p))
1327 goto out_err; 859 goto out_err;
1328 server->hostname = p; 860 hostname = p;
1329 861
1330 p = nfs_copy_user_string(NULL, &data->mnt_path, 1024); 862 p = nfs_copy_user_string(NULL, &data->mnt_path, 1024);
1331 if (IS_ERR(p)) 863 if (IS_ERR(p))
1332 goto out_err; 864 goto out_err;
1333 server->mnt_path = p; 865 mntpath = p;
866
867 dprintk("MNTPATH: %s\n", mntpath);
1334 868
1335 p = nfs_copy_user_string(server->ip_addr, &data->client_addr, 869 p = nfs_copy_user_string(ip_addr, &data->client_addr,
1336 sizeof(server->ip_addr) - 1); 870 sizeof(ip_addr) - 1);
1337 if (IS_ERR(p)) 871 if (IS_ERR(p))
1338 goto out_err; 872 goto out_err;
1339 873
1340 /* We now require that the mount process passes the remote address */ 874 /* Get a volume representation */
1341 if (data->host_addrlen != sizeof(server->addr)) { 875 server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr,
1342 error = -EINVAL; 876 authflavour, &mntfh);
1343 goto out_free; 877 if (IS_ERR(server)) {
1344 } 878 error = PTR_ERR(server);
1345 if (copy_from_user(&server->addr, data->host_addr, sizeof(server->addr))) { 879 goto out_err_noserver;
1346 error = -EFAULT;
1347 goto out_free;
1348 }
1349 if (server->addr.sin_family != AF_INET ||
1350 server->addr.sin_addr.s_addr == INADDR_ANY) {
1351 dprintk("%s: mount program didn't pass remote IP address!\n",
1352 __FUNCTION__);
1353 error = -EINVAL;
1354 goto out_free;
1355 }
1356
1357 /* Fire up rpciod if not yet running */
1358 error = rpciod_up();
1359 if (error < 0) {
1360 dprintk("%s: couldn't start rpciod! Error = %d\n",
1361 __FUNCTION__, error);
1362 goto out_free;
1363 } 880 }
1364 881
1365 s = sget(fs_type, nfs4_compare_super, nfs_set_super, server); 882 /* Get a superblock - note that we may end up sharing one that already exists */
1366 883 s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
1367 if (IS_ERR(s)) { 884 if (IS_ERR(s)) {
1368 error = PTR_ERR(s); 885 error = PTR_ERR(s);
1369 goto out_free; 886 goto out_free;
1370 } 887 }
1371 888
1372 if (s->s_root) { 889 if (s->s_fs_info != server) {
1373 kfree(server->mnt_path); 890 nfs_free_server(server);
1374 kfree(server->hostname); 891 server = NULL;
1375 kfree(server);
1376 return simple_set_mnt(mnt, s);
1377 } 892 }
1378 893
1379 s->s_flags = flags; 894 if (!s->s_root) {
895 /* initial superblock/root creation */
896 s->s_flags = flags;
897 nfs4_fill_super(s);
898 }
1380 899
1381 error = nfs4_fill_super(s, data, flags & MS_SILENT ? 1 : 0); 900 mntroot = nfs4_get_root(s, &mntfh);
1382 if (error) { 901 if (IS_ERR(mntroot)) {
1383 up_write(&s->s_umount); 902 error = PTR_ERR(mntroot);
1384 deactivate_super(s); 903 goto error_splat_super;
1385 return error;
1386 } 904 }
905
1387 s->s_flags |= MS_ACTIVE; 906 s->s_flags |= MS_ACTIVE;
1388 return simple_set_mnt(mnt, s); 907 mnt->mnt_sb = s;
908 mnt->mnt_root = mntroot;
909 kfree(mntpath);
910 kfree(hostname);
911 return 0;
912
1389out_err: 913out_err:
1390 error = PTR_ERR(p); 914 error = PTR_ERR(p);
915 goto out_err_noserver;
916
1391out_free: 917out_free:
1392 kfree(server->mnt_path); 918 nfs_free_server(server);
1393 kfree(server->hostname); 919out_err_noserver:
1394 kfree(server); 920 kfree(mntpath);
921 kfree(hostname);
1395 return error; 922 return error;
923
924error_splat_super:
925 up_write(&s->s_umount);
926 deactivate_super(s);
927 goto out_err_noserver;
1396} 928}
1397 929
1398static void nfs4_kill_super(struct super_block *sb) 930static void nfs4_kill_super(struct super_block *sb)
@@ -1403,135 +935,140 @@ static void nfs4_kill_super(struct super_block *sb)
1403 kill_anon_super(sb); 935 kill_anon_super(sb);
1404 936
1405 nfs4_renewd_prepare_shutdown(server); 937 nfs4_renewd_prepare_shutdown(server);
938 nfs_free_server(server);
939}
940
941/*
942 * Clone an NFS4 server record on xdev traversal (FSID-change)
943 */
944static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
945 const char *dev_name, void *raw_data,
946 struct vfsmount *mnt)
947{
948 struct nfs_clone_mount *data = raw_data;
949 struct super_block *s;
950 struct nfs_server *server;
951 struct dentry *mntroot;
952 int error;
953
954 dprintk("--> nfs4_xdev_get_sb()\n");
955
956 /* create a new volume representation */
957 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
958 if (IS_ERR(server)) {
959 error = PTR_ERR(server);
960 goto out_err_noserver;
961 }
962
963 /* Get a superblock - note that we may end up sharing one that already exists */
964 s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
965 if (IS_ERR(s)) {
966 error = PTR_ERR(s);
967 goto out_err_nosb;
968 }
1406 969
1407 if (server->client != NULL && !IS_ERR(server->client)) 970 if (s->s_fs_info != server) {
1408 rpc_shutdown_client(server->client); 971 nfs_free_server(server);
972 server = NULL;
973 }
1409 974
1410 destroy_nfsv4_state(server); 975 if (!s->s_root) {
976 /* initial superblock/root creation */
977 s->s_flags = flags;
978 nfs4_clone_super(s, data->sb);
979 }
980
981 mntroot = nfs4_get_root(s, data->fh);
982 if (IS_ERR(mntroot)) {
983 error = PTR_ERR(mntroot);
984 goto error_splat_super;
985 }
1411 986
1412 rpciod_down(); 987 s->s_flags |= MS_ACTIVE;
988 mnt->mnt_sb = s;
989 mnt->mnt_root = mntroot;
990
991 dprintk("<-- nfs4_xdev_get_sb() = 0\n");
992 return 0;
993
994out_err_nosb:
995 nfs_free_server(server);
996out_err_noserver:
997 dprintk("<-- nfs4_xdev_get_sb() = %d [error]\n", error);
998 return error;
1413 999
1414 nfs_free_iostats(server->io_stats); 1000error_splat_super:
1415 kfree(server->hostname); 1001 up_write(&s->s_umount);
1416 kfree(server); 1002 deactivate_super(s);
1417 nfs_release_automount_timer(); 1003 dprintk("<-- nfs4_xdev_get_sb() = %d [splat]\n", error);
1004 return error;
1418} 1005}
1419 1006
1420/* 1007/*
1421 * Constructs the SERVER-side path 1008 * Create an NFS4 server record on referral traversal
1422 */ 1009 */
1423static inline char *nfs4_dup_path(const struct dentry *dentry) 1010static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
1011 const char *dev_name, void *raw_data,
1012 struct vfsmount *mnt)
1424{ 1013{
1425 char *page = (char *) __get_free_page(GFP_USER); 1014 struct nfs_clone_mount *data = raw_data;
1426 char *path; 1015 struct super_block *s;
1016 struct nfs_server *server;
1017 struct dentry *mntroot;
1018 struct nfs_fh mntfh;
1019 int error;
1427 1020
1428 path = nfs4_path(dentry, page, PAGE_SIZE); 1021 dprintk("--> nfs4_referral_get_sb()\n");
1429 if (!IS_ERR(path)) {
1430 int len = PAGE_SIZE + page - path;
1431 char *tmp = path;
1432 1022
1433 path = kmalloc(len, GFP_KERNEL); 1023 /* create a new volume representation */
1434 if (path) 1024 server = nfs4_create_referral_server(data, &mntfh);
1435 memcpy(path, tmp, len); 1025 if (IS_ERR(server)) {
1436 else 1026 error = PTR_ERR(server);
1437 path = ERR_PTR(-ENOMEM); 1027 goto out_err_noserver;
1438 } 1028 }
1439 free_page((unsigned long)page);
1440 return path;
1441}
1442 1029
1443static struct super_block *nfs4_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data) 1030 /* Get a superblock - note that we may end up sharing one that already exists */
1444{ 1031 s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
1445 const struct dentry *dentry = data->dentry; 1032 if (IS_ERR(s)) {
1446 struct nfs4_client *clp = server->nfs4_state; 1033 error = PTR_ERR(s);
1447 struct super_block *sb; 1034 goto out_err_nosb;
1448
1449 server->fsid = data->fattr->fsid;
1450 nfs_copy_fh(&server->fh, data->fh);
1451 server->mnt_path = nfs4_dup_path(dentry);
1452 if (IS_ERR(server->mnt_path)) {
1453 sb = (struct super_block *)server->mnt_path;
1454 goto err;
1455 } 1035 }
1456 sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server);
1457 if (IS_ERR(sb) || sb->s_root)
1458 goto free_path;
1459 nfs4_server_capabilities(server, &server->fh);
1460
1461 down_write(&clp->cl_sem);
1462 atomic_inc(&clp->cl_count);
1463 list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
1464 up_write(&clp->cl_sem);
1465 return sb;
1466free_path:
1467 kfree(server->mnt_path);
1468err:
1469 server->mnt_path = NULL;
1470 return sb;
1471}
1472 1036
1473static int nfs_clone_nfs4_sb(struct file_system_type *fs_type, 1037 if (s->s_fs_info != server) {
1474 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 1038 nfs_free_server(server);
1475{ 1039 server = NULL;
1476 struct nfs_clone_mount *data = raw_data; 1040 }
1477 return nfs_clone_generic_sb(data, nfs4_clone_sb, nfs_clone_server, mnt);
1478}
1479 1041
1480static struct super_block *nfs4_referral_sb(struct nfs_server *server, struct nfs_clone_mount *data) 1042 if (!s->s_root) {
1481{ 1043 /* initial superblock/root creation */
1482 struct super_block *sb = ERR_PTR(-ENOMEM); 1044 s->s_flags = flags;
1483 int len; 1045 nfs4_fill_super(s);
1484 1046 }
1485 len = strlen(data->mnt_path) + 1;
1486 server->mnt_path = kmalloc(len, GFP_KERNEL);
1487 if (server->mnt_path == NULL)
1488 goto err;
1489 memcpy(server->mnt_path, data->mnt_path, len);
1490 memcpy(&server->addr, data->addr, sizeof(struct sockaddr_in));
1491
1492 sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server);
1493 if (IS_ERR(sb) || sb->s_root)
1494 goto free_path;
1495 return sb;
1496free_path:
1497 kfree(server->mnt_path);
1498err:
1499 server->mnt_path = NULL;
1500 return sb;
1501}
1502 1047
1503static struct nfs_server *nfs4_referral_server(struct super_block *sb, struct nfs_clone_mount *data) 1048 mntroot = nfs4_get_root(s, data->fh);
1504{ 1049 if (IS_ERR(mntroot)) {
1505 struct nfs_server *server = NFS_SB(sb); 1050 error = PTR_ERR(mntroot);
1506 struct rpc_timeout timeparms; 1051 goto error_splat_super;
1507 int proto, timeo, retrans; 1052 }
1508 void *err;
1509
1510 proto = IPPROTO_TCP;
1511 /* Since we are following a referral and there may be alternatives,
1512 set the timeouts and retries to low values */
1513 timeo = 2;
1514 retrans = 1;
1515 nfs_init_timeout_values(&timeparms, proto, timeo, retrans);
1516
1517 server->client = nfs4_create_client(server, &timeparms, proto, data->authflavor);
1518 if (IS_ERR((err = server->client)))
1519 goto out_err;
1520 1053
1521 sb->s_time_gran = 1; 1054 s->s_flags |= MS_ACTIVE;
1522 sb->s_op = &nfs4_sops; 1055 mnt->mnt_sb = s;
1523 err = ERR_PTR(nfs_sb_init(sb, data->authflavor)); 1056 mnt->mnt_root = mntroot;
1524 if (!IS_ERR(err))
1525 return server;
1526out_err:
1527 return (struct nfs_server *)err;
1528}
1529 1057
1530static int nfs_referral_nfs4_sb(struct file_system_type *fs_type, 1058 dprintk("<-- nfs4_referral_get_sb() = 0\n");
1531 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 1059 return 0;
1532{ 1060
1533 struct nfs_clone_mount *data = raw_data; 1061out_err_nosb:
1534 return nfs_clone_generic_sb(data, nfs4_referral_sb, nfs4_referral_server, mnt); 1062 nfs_free_server(server);
1063out_err_noserver:
1064 dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error);
1065 return error;
1066
1067error_splat_super:
1068 up_write(&s->s_umount);
1069 deactivate_super(s);
1070 dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error);
1071 return error;
1535} 1072}
1536 1073
1537#endif 1074#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 7084ac9a6455..b674462793d3 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -396,6 +396,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
396out: 396out:
397 clear_bit(BDI_write_congested, &bdi->state); 397 clear_bit(BDI_write_congested, &bdi->state);
398 wake_up_all(&nfs_write_congestion); 398 wake_up_all(&nfs_write_congestion);
399 writeback_congestion_end();
399 return err; 400 return err;
400} 401}
401 402
@@ -1252,7 +1253,13 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1252 dprintk("NFS: %4d nfs_writeback_done (status %d)\n", 1253 dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
1253 task->tk_pid, task->tk_status); 1254 task->tk_pid, task->tk_status);
1254 1255
1255 /* Call the NFS version-specific code */ 1256 /*
1257 * ->write_done will attempt to use post-op attributes to detect
1258 * conflicting writes by other clients. A strict interpretation
1259 * of close-to-open would allow us to continue caching even if
1260 * another writer had changed the file, but some applications
1261 * depend on tighter cache coherency when writing.
1262 */
1256 status = NFS_PROTO(data->inode)->write_done(task, data); 1263 status = NFS_PROTO(data->inode)->write_done(task, data);
1257 if (status != 0) 1264 if (status != 0)
1258 return status; 1265 return status;
@@ -1273,7 +1280,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1273 if (time_before(complain, jiffies)) { 1280 if (time_before(complain, jiffies)) {
1274 dprintk("NFS: faulty NFS server %s:" 1281 dprintk("NFS: faulty NFS server %s:"
1275 " (committed = %d) != (stable = %d)\n", 1282 " (committed = %d) != (stable = %d)\n",
1276 NFS_SERVER(data->inode)->hostname, 1283 NFS_SERVER(data->inode)->nfs_client->cl_hostname,
1277 resp->verf->committed, argp->stable); 1284 resp->verf->committed, argp->stable);
1278 complain = jiffies + 300 * HZ; 1285 complain = jiffies + 300 * HZ;
1279 } 1286 }
@@ -1558,7 +1565,6 @@ void nfs_destroy_writepagecache(void)
1558{ 1565{
1559 mempool_destroy(nfs_commit_mempool); 1566 mempool_destroy(nfs_commit_mempool);
1560 mempool_destroy(nfs_wdata_mempool); 1567 mempool_destroy(nfs_wdata_mempool);
1561 if (kmem_cache_destroy(nfs_wdata_cachep)) 1568 kmem_cache_destroy(nfs_wdata_cachep);
1562 printk(KERN_INFO "nfs_write_data: not all structures were freed\n");
1563} 1569}
1564 1570
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 54b37b1d2e3a..8583d99ee740 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -375,16 +375,28 @@ nfsd4_probe_callback(struct nfs4_client *clp)
375{ 375{
376 struct sockaddr_in addr; 376 struct sockaddr_in addr;
377 struct nfs4_callback *cb = &clp->cl_callback; 377 struct nfs4_callback *cb = &clp->cl_callback;
378 struct rpc_timeout timeparms; 378 struct rpc_timeout timeparms = {
379 struct rpc_xprt * xprt; 379 .to_initval = (NFSD_LEASE_TIME/4) * HZ,
380 .to_retries = 5,
381 .to_maxval = (NFSD_LEASE_TIME/2) * HZ,
382 .to_exponential = 1,
383 };
380 struct rpc_program * program = &cb->cb_program; 384 struct rpc_program * program = &cb->cb_program;
381 struct rpc_stat * stat = &cb->cb_stat; 385 struct rpc_create_args args = {
382 struct rpc_clnt * clnt; 386 .protocol = IPPROTO_TCP,
387 .address = (struct sockaddr *)&addr,
388 .addrsize = sizeof(addr),
389 .timeout = &timeparms,
390 .servername = clp->cl_name.data,
391 .program = program,
392 .version = nfs_cb_version[1]->number,
393 .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */
394 .flags = (RPC_CLNT_CREATE_NOPING),
395 };
383 struct rpc_message msg = { 396 struct rpc_message msg = {
384 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], 397 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
385 .rpc_argp = clp, 398 .rpc_argp = clp,
386 }; 399 };
387 char hostname[32];
388 int status; 400 int status;
389 401
390 if (atomic_read(&cb->cb_set)) 402 if (atomic_read(&cb->cb_set))
@@ -396,51 +408,27 @@ nfsd4_probe_callback(struct nfs4_client *clp)
396 addr.sin_port = htons(cb->cb_port); 408 addr.sin_port = htons(cb->cb_port);
397 addr.sin_addr.s_addr = htonl(cb->cb_addr); 409 addr.sin_addr.s_addr = htonl(cb->cb_addr);
398 410
399 /* Initialize timeout */
400 timeparms.to_initval = (NFSD_LEASE_TIME/4) * HZ;
401 timeparms.to_retries = 0;
402 timeparms.to_maxval = (NFSD_LEASE_TIME/2) * HZ;
403 timeparms.to_exponential = 1;
404
405 /* Create RPC transport */
406 xprt = xprt_create_proto(IPPROTO_TCP, &addr, &timeparms);
407 if (IS_ERR(xprt)) {
408 dprintk("NFSD: couldn't create callback transport!\n");
409 goto out_err;
410 }
411
412 /* Initialize rpc_program */ 411 /* Initialize rpc_program */
413 program->name = "nfs4_cb"; 412 program->name = "nfs4_cb";
414 program->number = cb->cb_prog; 413 program->number = cb->cb_prog;
415 program->nrvers = ARRAY_SIZE(nfs_cb_version); 414 program->nrvers = ARRAY_SIZE(nfs_cb_version);
416 program->version = nfs_cb_version; 415 program->version = nfs_cb_version;
417 program->stats = stat; 416 program->stats = &cb->cb_stat;
418 417
419 /* Initialize rpc_stat */ 418 /* Initialize rpc_stat */
420 memset(stat, 0, sizeof(struct rpc_stat)); 419 memset(program->stats, 0, sizeof(cb->cb_stat));
421 stat->program = program; 420 program->stats->program = program;
422 421
423 /* Create RPC client 422 /* Create RPC client */
424 * 423 cb->cb_client = rpc_create(&args);
425 * XXX AUTH_UNIX only - need AUTH_GSS.... 424 if (!cb->cb_client) {
426 */
427 sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr.sin_addr.s_addr));
428 clnt = rpc_new_client(xprt, hostname, program, 1, RPC_AUTH_UNIX);
429 if (IS_ERR(clnt)) {
430 dprintk("NFSD: couldn't create callback client\n"); 425 dprintk("NFSD: couldn't create callback client\n");
431 goto out_err; 426 goto out_err;
432 } 427 }
433 clnt->cl_intr = 0;
434 clnt->cl_softrtry = 1;
435 428
436 /* Kick rpciod, put the call on the wire. */ 429 /* Kick rpciod, put the call on the wire. */
437 430 if (rpciod_up() != 0)
438 if (rpciod_up() != 0) {
439 dprintk("nfsd: couldn't start rpciod for callbacks!\n");
440 goto out_clnt; 431 goto out_clnt;
441 }
442
443 cb->cb_client = clnt;
444 432
445 /* the task holds a reference to the nfs4_client struct */ 433 /* the task holds a reference to the nfs4_client struct */
446 atomic_inc(&clp->cl_count); 434 atomic_inc(&clp->cl_count);
@@ -448,7 +436,7 @@ nfsd4_probe_callback(struct nfs4_client *clp)
448 msg.rpc_cred = nfsd4_lookupcred(clp,0); 436 msg.rpc_cred = nfsd4_lookupcred(clp,0);
449 if (IS_ERR(msg.rpc_cred)) 437 if (IS_ERR(msg.rpc_cred))
450 goto out_rpciod; 438 goto out_rpciod;
451 status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL); 439 status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL);
452 put_rpccred(msg.rpc_cred); 440 put_rpccred(msg.rpc_cred);
453 441
454 if (status != 0) { 442 if (status != 0) {
@@ -462,7 +450,7 @@ out_rpciod:
462 rpciod_down(); 450 rpciod_down();
463 cb->cb_client = NULL; 451 cb->cb_client = NULL;
464out_clnt: 452out_clnt:
465 rpc_shutdown_client(clnt); 453 rpc_shutdown_client(cb->cb_client);
466out_err: 454out_err:
467 dprintk("NFSD: warning: no callback path to client %.*s\n", 455 dprintk("NFSD: warning: no callback path to client %.*s\n",
468 (int)clp->cl_name.len, clp->cl_name.data); 456 (int)clp->cl_name.len, clp->cl_name.data);
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index bea6b9478114..b1902ebaab41 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -573,10 +573,9 @@ idmap_lookup(struct svc_rqst *rqstp,
573 struct idmap_defer_req *mdr; 573 struct idmap_defer_req *mdr;
574 int ret; 574 int ret;
575 575
576 mdr = kmalloc(sizeof(*mdr), GFP_KERNEL); 576 mdr = kzalloc(sizeof(*mdr), GFP_KERNEL);
577 if (!mdr) 577 if (!mdr)
578 return -ENOMEM; 578 return -ENOMEM;
579 memset(mdr, 0, sizeof(*mdr));
580 atomic_set(&mdr->count, 1); 579 atomic_set(&mdr->count, 1);
581 init_waitqueue_head(&mdr->waitq); 580 init_waitqueue_head(&mdr->waitq);
582 mdr->req.defer = idmap_defer; 581 mdr->req.defer = idmap_defer;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9daa0b9feb8d..ebcf226a9e4a 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -339,8 +339,7 @@ alloc_client(struct xdr_netobj name)
339{ 339{
340 struct nfs4_client *clp; 340 struct nfs4_client *clp;
341 341
342 if ((clp = kmalloc(sizeof(struct nfs4_client), GFP_KERNEL))!= NULL) { 342 if ((clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL))!= NULL) {
343 memset(clp, 0, sizeof(*clp));
344 if ((clp->cl_name.data = kmalloc(name.len, GFP_KERNEL)) != NULL) { 343 if ((clp->cl_name.data = kmalloc(name.len, GFP_KERNEL)) != NULL) {
345 memcpy(clp->cl_name.data, name.data, name.len); 344 memcpy(clp->cl_name.data, name.data, name.len);
346 clp->cl_name.len = name.len; 345 clp->cl_name.len = name.len;
@@ -1006,13 +1005,10 @@ alloc_init_file(struct inode *ino)
1006static void 1005static void
1007nfsd4_free_slab(kmem_cache_t **slab) 1006nfsd4_free_slab(kmem_cache_t **slab)
1008{ 1007{
1009 int status;
1010
1011 if (*slab == NULL) 1008 if (*slab == NULL)
1012 return; 1009 return;
1013 status = kmem_cache_destroy(*slab); 1010 kmem_cache_destroy(*slab);
1014 *slab = NULL; 1011 *slab = NULL;
1015 WARN_ON(status);
1016} 1012}
1017 1013
1018static void 1014static void
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index d1e2c6f9f05e..85c36b8ca452 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1149,8 +1149,7 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
1149 * Allocate a buffer to store the current name being processed 1149 * Allocate a buffer to store the current name being processed
1150 * converted to format determined by current NLS. 1150 * converted to format determined by current NLS.
1151 */ 1151 */
1152 name = (u8*)kmalloc(NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1, 1152 name = kmalloc(NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1, GFP_NOFS);
1153 GFP_NOFS);
1154 if (unlikely(!name)) { 1153 if (unlikely(!name)) {
1155 err = -ENOMEM; 1154 err = -ENOMEM;
1156 goto err_out; 1155 goto err_out;
@@ -1191,7 +1190,7 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
1191 * map the mft record without deadlocking. 1190 * map the mft record without deadlocking.
1192 */ 1191 */
1193 rc = le32_to_cpu(ctx->attr->data.resident.value_length); 1192 rc = le32_to_cpu(ctx->attr->data.resident.value_length);
1194 ir = (INDEX_ROOT*)kmalloc(rc, GFP_NOFS); 1193 ir = kmalloc(rc, GFP_NOFS);
1195 if (unlikely(!ir)) { 1194 if (unlikely(!ir)) {
1196 err = -ENOMEM; 1195 err = -ENOMEM;
1197 goto err_out; 1196 goto err_out;
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index d313f356e66a..933dbd89c2a4 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -137,7 +137,7 @@ static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na)
137 137
138 BUG_ON(!na->name); 138 BUG_ON(!na->name);
139 i = na->name_len * sizeof(ntfschar); 139 i = na->name_len * sizeof(ntfschar);
140 ni->name = (ntfschar*)kmalloc(i + sizeof(ntfschar), GFP_ATOMIC); 140 ni->name = kmalloc(i + sizeof(ntfschar), GFP_ATOMIC);
141 if (!ni->name) 141 if (!ni->name)
142 return -ENOMEM; 142 return -ENOMEM;
143 memcpy(ni->name, na->name, i); 143 memcpy(ni->name, na->name, i);
@@ -556,8 +556,6 @@ static int ntfs_read_locked_inode(struct inode *vi)
556 556
557 /* Setup the generic vfs inode parts now. */ 557 /* Setup the generic vfs inode parts now. */
558 558
559 /* This is the optimal IO size (for stat), not the fs block size. */
560 vi->i_blksize = PAGE_CACHE_SIZE;
561 /* 559 /*
562 * This is for checking whether an inode has changed w.r.t. a file so 560 * This is for checking whether an inode has changed w.r.t. a file so
563 * that the file can be updated if necessary (compare with f_version). 561 * that the file can be updated if necessary (compare with f_version).
@@ -1234,7 +1232,6 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
1234 base_ni = NTFS_I(base_vi); 1232 base_ni = NTFS_I(base_vi);
1235 1233
1236 /* Just mirror the values from the base inode. */ 1234 /* Just mirror the values from the base inode. */
1237 vi->i_blksize = base_vi->i_blksize;
1238 vi->i_version = base_vi->i_version; 1235 vi->i_version = base_vi->i_version;
1239 vi->i_uid = base_vi->i_uid; 1236 vi->i_uid = base_vi->i_uid;
1240 vi->i_gid = base_vi->i_gid; 1237 vi->i_gid = base_vi->i_gid;
@@ -1504,7 +1501,6 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
1504 ni = NTFS_I(vi); 1501 ni = NTFS_I(vi);
1505 base_ni = NTFS_I(base_vi); 1502 base_ni = NTFS_I(base_vi);
1506 /* Just mirror the values from the base inode. */ 1503 /* Just mirror the values from the base inode. */
1507 vi->i_blksize = base_vi->i_blksize;
1508 vi->i_version = base_vi->i_version; 1504 vi->i_version = base_vi->i_version;
1509 vi->i_uid = base_vi->i_uid; 1505 vi->i_uid = base_vi->i_uid;
1510 vi->i_gid = base_vi->i_gid; 1506 vi->i_gid = base_vi->i_gid;
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 2438c00ec0ce..584260fd6848 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -331,7 +331,7 @@ map_err_out:
331 ntfs_inode **tmp; 331 ntfs_inode **tmp;
332 int new_size = (base_ni->nr_extents + 4) * sizeof(ntfs_inode *); 332 int new_size = (base_ni->nr_extents + 4) * sizeof(ntfs_inode *);
333 333
334 tmp = (ntfs_inode **)kmalloc(new_size, GFP_NOFS); 334 tmp = kmalloc(new_size, GFP_NOFS);
335 if (unlikely(!tmp)) { 335 if (unlikely(!tmp)) {
336 ntfs_error(base_ni->vol->sb, "Failed to allocate " 336 ntfs_error(base_ni->vol->sb, "Failed to allocate "
337 "internal buffer."); 337 "internal buffer.");
@@ -2638,11 +2638,6 @@ mft_rec_already_initialized:
2638 } 2638 }
2639 vi->i_ino = bit; 2639 vi->i_ino = bit;
2640 /* 2640 /*
2641 * This is the optimal IO size (for stat), not the fs block
2642 * size.
2643 */
2644 vi->i_blksize = PAGE_CACHE_SIZE;
2645 /*
2646 * This is for checking whether an inode has changed w.r.t. a 2641 * This is for checking whether an inode has changed w.r.t. a
2647 * file so that the file can be updated if necessary (compare 2642 * file so that the file can be updated if necessary (compare
2648 * with f_version). 2643 * with f_version).
@@ -2893,7 +2888,7 @@ rollback:
2893 if (!(base_ni->nr_extents & 3)) { 2888 if (!(base_ni->nr_extents & 3)) {
2894 int new_size = (base_ni->nr_extents + 4) * sizeof(ntfs_inode*); 2889 int new_size = (base_ni->nr_extents + 4) * sizeof(ntfs_inode*);
2895 2890
2896 extent_nis = (ntfs_inode**)kmalloc(new_size, GFP_NOFS); 2891 extent_nis = kmalloc(new_size, GFP_NOFS);
2897 if (unlikely(!extent_nis)) { 2892 if (unlikely(!extent_nis)) {
2898 ntfs_error(vol->sb, "Failed to allocate internal " 2893 ntfs_error(vol->sb, "Failed to allocate internal "
2899 "buffer during rollback.%s", es); 2894 "buffer during rollback.%s", es);
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 74e0ee8fce72..6b2712f10dd2 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -3248,32 +3248,14 @@ ictx_err_out:
3248 3248
3249static void __exit exit_ntfs_fs(void) 3249static void __exit exit_ntfs_fs(void)
3250{ 3250{
3251 int err = 0;
3252
3253 ntfs_debug("Unregistering NTFS driver."); 3251 ntfs_debug("Unregistering NTFS driver.");
3254 3252
3255 unregister_filesystem(&ntfs_fs_type); 3253 unregister_filesystem(&ntfs_fs_type);
3256 3254 kmem_cache_destroy(ntfs_big_inode_cache);
3257 if (kmem_cache_destroy(ntfs_big_inode_cache) && (err = 1)) 3255 kmem_cache_destroy(ntfs_inode_cache);
3258 printk(KERN_CRIT "NTFS: Failed to destory %s.\n", 3256 kmem_cache_destroy(ntfs_name_cache);
3259 ntfs_big_inode_cache_name); 3257 kmem_cache_destroy(ntfs_attr_ctx_cache);
3260 if (kmem_cache_destroy(ntfs_inode_cache) && (err = 1)) 3258 kmem_cache_destroy(ntfs_index_ctx_cache);
3261 printk(KERN_CRIT "NTFS: Failed to destory %s.\n",
3262 ntfs_inode_cache_name);
3263 if (kmem_cache_destroy(ntfs_name_cache) && (err = 1))
3264 printk(KERN_CRIT "NTFS: Failed to destory %s.\n",
3265 ntfs_name_cache_name);
3266 if (kmem_cache_destroy(ntfs_attr_ctx_cache) && (err = 1))
3267 printk(KERN_CRIT "NTFS: Failed to destory %s.\n",
3268 ntfs_attr_ctx_cache_name);
3269 if (kmem_cache_destroy(ntfs_index_ctx_cache) && (err = 1))
3270 printk(KERN_CRIT "NTFS: Failed to destory %s.\n",
3271 ntfs_index_ctx_cache_name);
3272 if (err)
3273 printk(KERN_CRIT "NTFS: This causes memory to leak! There is "
3274 "probably a BUG in the driver! Please report "
3275 "you saw this message to "
3276 "linux-ntfs-dev@lists.sourceforge.net\n");
3277 /* Unregister the ntfs sysctls. */ 3259 /* Unregister the ntfs sysctls. */
3278 ntfs_sysctl(0); 3260 ntfs_sysctl(0);
3279} 3261}
diff --git a/fs/ntfs/unistr.c b/fs/ntfs/unistr.c
index b123c0fa6bf6..a1b572196fe4 100644
--- a/fs/ntfs/unistr.c
+++ b/fs/ntfs/unistr.c
@@ -350,7 +350,7 @@ int ntfs_ucstonls(const ntfs_volume *vol, const ntfschar *ins,
350 } 350 }
351 if (!ns) { 351 if (!ns) {
352 ns_len = ins_len * NLS_MAX_CHARSET_SIZE; 352 ns_len = ins_len * NLS_MAX_CHARSET_SIZE;
353 ns = (unsigned char*)kmalloc(ns_len + 1, GFP_NOFS); 353 ns = kmalloc(ns_len + 1, GFP_NOFS);
354 if (!ns) 354 if (!ns)
355 goto mem_err_out; 355 goto mem_err_out;
356 } 356 }
@@ -365,7 +365,7 @@ retry: wc = nls->uni2char(le16_to_cpu(ins[i]), ns + o,
365 else if (wc == -ENAMETOOLONG && ns != *outs) { 365 else if (wc == -ENAMETOOLONG && ns != *outs) {
366 unsigned char *tc; 366 unsigned char *tc;
367 /* Grow in multiples of 64 bytes. */ 367 /* Grow in multiples of 64 bytes. */
368 tc = (unsigned char*)kmalloc((ns_len + 64) & 368 tc = kmalloc((ns_len + 64) &
369 ~63, GFP_NOFS); 369 ~63, GFP_NOFS);
370 if (tc) { 370 if (tc) {
371 memcpy(tc, ns, ns_len); 371 memcpy(tc, ns, ns_len);
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index ff9e2e2104c2..4b46aac7d243 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -44,11 +44,17 @@
44 * locking semantics of the file system using the protocol. It should 44 * locking semantics of the file system using the protocol. It should
45 * be somewhere else, I'm sure, but right now it isn't. 45 * be somewhere else, I'm sure, but right now it isn't.
46 * 46 *
47 * New in version 4:
48 * - Remove i_generation from lock names for better stat performance.
49 *
50 * New in version 3:
51 * - Replace dentry votes with a cluster lock
52 *
47 * New in version 2: 53 * New in version 2:
48 * - full 64 bit i_size in the metadata lock lvbs 54 * - full 64 bit i_size in the metadata lock lvbs
49 * - introduction of "rw" lock and pushing meta/data locking down 55 * - introduction of "rw" lock and pushing meta/data locking down
50 */ 56 */
51#define O2NET_PROTOCOL_VERSION 2ULL 57#define O2NET_PROTOCOL_VERSION 4ULL
52struct o2net_handshake { 58struct o2net_handshake {
53 __be64 protocol_version; 59 __be64 protocol_version;
54 __be64 connector_id; 60 __be64 connector_id;
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 1a01380e3878..014e73978dac 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -35,15 +35,17 @@
35 35
36#include "alloc.h" 36#include "alloc.h"
37#include "dcache.h" 37#include "dcache.h"
38#include "dlmglue.h"
38#include "file.h" 39#include "file.h"
39#include "inode.h" 40#include "inode.h"
40 41
42
41static int ocfs2_dentry_revalidate(struct dentry *dentry, 43static int ocfs2_dentry_revalidate(struct dentry *dentry,
42 struct nameidata *nd) 44 struct nameidata *nd)
43{ 45{
44 struct inode *inode = dentry->d_inode; 46 struct inode *inode = dentry->d_inode;
45 int ret = 0; /* if all else fails, just return false */ 47 int ret = 0; /* if all else fails, just return false */
46 struct ocfs2_super *osb; 48 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
47 49
48 mlog_entry("(0x%p, '%.*s')\n", dentry, 50 mlog_entry("(0x%p, '%.*s')\n", dentry,
49 dentry->d_name.len, dentry->d_name.name); 51 dentry->d_name.len, dentry->d_name.name);
@@ -55,28 +57,31 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
55 goto bail; 57 goto bail;
56 } 58 }
57 59
58 osb = OCFS2_SB(inode->i_sb);
59
60 BUG_ON(!osb); 60 BUG_ON(!osb);
61 61
62 if (inode != osb->root_inode) { 62 if (inode == osb->root_inode || is_bad_inode(inode))
63 spin_lock(&OCFS2_I(inode)->ip_lock); 63 goto bail;
64 /* did we or someone else delete this inode? */ 64
65 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) { 65 spin_lock(&OCFS2_I(inode)->ip_lock);
66 spin_unlock(&OCFS2_I(inode)->ip_lock); 66 /* did we or someone else delete this inode? */
67 mlog(0, "inode (%llu) deleted, returning false\n", 67 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
68 (unsigned long long)OCFS2_I(inode)->ip_blkno);
69 goto bail;
70 }
71 spin_unlock(&OCFS2_I(inode)->ip_lock); 68 spin_unlock(&OCFS2_I(inode)->ip_lock);
69 mlog(0, "inode (%llu) deleted, returning false\n",
70 (unsigned long long)OCFS2_I(inode)->ip_blkno);
71 goto bail;
72 }
73 spin_unlock(&OCFS2_I(inode)->ip_lock);
72 74
73 if (!inode->i_nlink) { 75 /*
74 mlog(0, "Inode %llu orphaned, returning false " 76 * We don't need a cluster lock to test this because once an
75 "dir = %d\n", 77 * inode nlink hits zero, it never goes back.
76 (unsigned long long)OCFS2_I(inode)->ip_blkno, 78 */
77 S_ISDIR(inode->i_mode)); 79 if (inode->i_nlink == 0) {
78 goto bail; 80 mlog(0, "Inode %llu orphaned, returning false "
79 } 81 "dir = %d\n",
82 (unsigned long long)OCFS2_I(inode)->ip_blkno,
83 S_ISDIR(inode->i_mode));
84 goto bail;
80 } 85 }
81 86
82 ret = 1; 87 ret = 1;
@@ -87,6 +92,322 @@ bail:
87 return ret; 92 return ret;
88} 93}
89 94
95static int ocfs2_match_dentry(struct dentry *dentry,
96 u64 parent_blkno,
97 int skip_unhashed)
98{
99 struct inode *parent;
100
101 /*
102 * ocfs2_lookup() does a d_splice_alias() _before_ attaching
103 * to the lock data, so we skip those here, otherwise
104 * ocfs2_dentry_attach_lock() will get its original dentry
105 * back.
106 */
107 if (!dentry->d_fsdata)
108 return 0;
109
110 if (!dentry->d_parent)
111 return 0;
112
113 if (skip_unhashed && d_unhashed(dentry))
114 return 0;
115
116 parent = dentry->d_parent->d_inode;
117 /* Negative parent dentry? */
118 if (!parent)
119 return 0;
120
121 /* Name is in a different directory. */
122 if (OCFS2_I(parent)->ip_blkno != parent_blkno)
123 return 0;
124
125 return 1;
126}
127
128/*
129 * Walk the inode alias list, and find a dentry which has a given
130 * parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it
131 * is looking for a dentry_lock reference. The vote thread is looking
132 * to unhash aliases, so we allow it to skip any that already have
133 * that property.
134 */
135struct dentry *ocfs2_find_local_alias(struct inode *inode,
136 u64 parent_blkno,
137 int skip_unhashed)
138{
139 struct list_head *p;
140 struct dentry *dentry = NULL;
141
142 spin_lock(&dcache_lock);
143
144 list_for_each(p, &inode->i_dentry) {
145 dentry = list_entry(p, struct dentry, d_alias);
146
147 if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
148 mlog(0, "dentry found: %.*s\n",
149 dentry->d_name.len, dentry->d_name.name);
150
151 dget_locked(dentry);
152 break;
153 }
154
155 dentry = NULL;
156 }
157
158 spin_unlock(&dcache_lock);
159
160 return dentry;
161}
162
163DEFINE_SPINLOCK(dentry_attach_lock);
164
165/*
166 * Attach this dentry to a cluster lock.
167 *
168 * Dentry locks cover all links in a given directory to a particular
169 * inode. We do this so that ocfs2 can build a lock name which all
170 * nodes in the cluster can agree on at all times. Shoving full names
171 * in the cluster lock won't work due to size restrictions. Covering
172 * links inside of a directory is a good compromise because it still
173 * allows us to use the parent directory lock to synchronize
174 * operations.
175 *
176 * Call this function with the parent dir semaphore and the parent dir
177 * cluster lock held.
178 *
179 * The dir semaphore will protect us from having to worry about
180 * concurrent processes on our node trying to attach a lock at the
181 * same time.
182 *
183 * The dir cluster lock (held at either PR or EX mode) protects us
184 * from unlink and rename on other nodes.
185 *
186 * A dput() can happen asynchronously due to pruning, so we cover
187 * attaching and detaching the dentry lock with a
188 * dentry_attach_lock.
189 *
190 * A node which has done lookup on a name retains a protected read
191 * lock until final dput. If the user requests and unlink or rename,
192 * the protected read is upgraded to an exclusive lock. Other nodes
193 * who have seen the dentry will then be informed that they need to
194 * downgrade their lock, which will involve d_delete on the
195 * dentry. This happens in ocfs2_dentry_convert_worker().
196 */
197int ocfs2_dentry_attach_lock(struct dentry *dentry,
198 struct inode *inode,
199 u64 parent_blkno)
200{
201 int ret;
202 struct dentry *alias;
203 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
204
205 mlog(0, "Attach \"%.*s\", parent %llu, fsdata: %p\n",
206 dentry->d_name.len, dentry->d_name.name,
207 (unsigned long long)parent_blkno, dl);
208
209 /*
210 * Negative dentry. We ignore these for now.
211 *
212 * XXX: Could we can improve ocfs2_dentry_revalidate() by
213 * tracking these?
214 */
215 if (!inode)
216 return 0;
217
218 if (dl) {
219 mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
220 " \"%.*s\": old parent: %llu, new: %llu\n",
221 dentry->d_name.len, dentry->d_name.name,
222 (unsigned long long)parent_blkno,
223 (unsigned long long)dl->dl_parent_blkno);
224 return 0;
225 }
226
227 alias = ocfs2_find_local_alias(inode, parent_blkno, 0);
228 if (alias) {
229 /*
230 * Great, an alias exists, which means we must have a
231 * dentry lock already. We can just grab the lock off
232 * the alias and add it to the list.
233 *
234 * We're depending here on the fact that this dentry
235 * was found and exists in the dcache and so must have
236 * a reference to the dentry_lock because we can't
237 * race creates. Final dput() cannot happen on it
238 * since we have it pinned, so our reference is safe.
239 */
240 dl = alias->d_fsdata;
241 mlog_bug_on_msg(!dl, "parent %llu, ino %llu\n",
242 (unsigned long long)parent_blkno,
243 (unsigned long long)OCFS2_I(inode)->ip_blkno);
244
245 mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
246 " \"%.*s\": old parent: %llu, new: %llu\n",
247 dentry->d_name.len, dentry->d_name.name,
248 (unsigned long long)parent_blkno,
249 (unsigned long long)dl->dl_parent_blkno);
250
251 mlog(0, "Found: %s\n", dl->dl_lockres.l_name);
252
253 goto out_attach;
254 }
255
256 /*
257 * There are no other aliases
258 */
259 dl = kmalloc(sizeof(*dl), GFP_NOFS);
260 if (!dl) {
261 ret = -ENOMEM;
262 mlog_errno(ret);
263 return ret;
264 }
265
266 dl->dl_count = 0;
267 /*
268 * Does this have to happen below, for all attaches, in case
269 * the struct inode gets blown away by votes?
270 */
271 dl->dl_inode = igrab(inode);
272 dl->dl_parent_blkno = parent_blkno;
273 ocfs2_dentry_lock_res_init(dl, parent_blkno, inode);
274
275out_attach:
276 spin_lock(&dentry_attach_lock);
277 dentry->d_fsdata = dl;
278 dl->dl_count++;
279 spin_unlock(&dentry_attach_lock);
280
281 /*
282 * This actually gets us our PRMODE level lock. From now on,
283 * we'll have a notification if one of these names is
284 * destroyed on another node.
285 */
286 ret = ocfs2_dentry_lock(dentry, 0);
287 if (!ret)
288 ocfs2_dentry_unlock(dentry, 0);
289 else
290 mlog_errno(ret);
291
292 dput(alias);
293
294 return ret;
295}
296
297/*
298 * ocfs2_dentry_iput() and friends.
299 *
300 * At this point, our particular dentry is detached from the inodes
301 * alias list, so there's no way that the locking code can find it.
302 *
303 * The interesting stuff happens when we determine that our lock needs
304 * to go away because this is the last subdir alias in the
305 * system. This function needs to handle a couple things:
306 *
307 * 1) Synchronizing lock shutdown with the downconvert threads. This
308 * is already handled for us via the lockres release drop function
309 * called in ocfs2_release_dentry_lock()
310 *
311 * 2) A race may occur when we're doing our lock shutdown and
312 * another process wants to create a new dentry lock. Right now we
313 * let them race, which means that for a very short while, this
314 * node might have two locks on a lock resource. This should be a
315 * problem though because one of them is in the process of being
316 * thrown out.
317 */
318static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
319 struct ocfs2_dentry_lock *dl)
320{
321 ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
322 ocfs2_lock_res_free(&dl->dl_lockres);
323 iput(dl->dl_inode);
324 kfree(dl);
325}
326
327void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
328 struct ocfs2_dentry_lock *dl)
329{
330 int unlock = 0;
331
332 BUG_ON(dl->dl_count == 0);
333
334 spin_lock(&dentry_attach_lock);
335 dl->dl_count--;
336 unlock = !dl->dl_count;
337 spin_unlock(&dentry_attach_lock);
338
339 if (unlock)
340 ocfs2_drop_dentry_lock(osb, dl);
341}
342
343static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode)
344{
345 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
346
347 mlog_bug_on_msg(!dl && !(dentry->d_flags & DCACHE_DISCONNECTED),
348 "dentry: %.*s\n", dentry->d_name.len,
349 dentry->d_name.name);
350
351 if (!dl)
352 goto out;
353
354 mlog_bug_on_msg(dl->dl_count == 0, "dentry: %.*s, count: %u\n",
355 dentry->d_name.len, dentry->d_name.name,
356 dl->dl_count);
357
358 ocfs2_dentry_lock_put(OCFS2_SB(dentry->d_sb), dl);
359
360out:
361 iput(inode);
362}
363
364/*
365 * d_move(), but keep the locks in sync.
366 *
367 * When we are done, "dentry" will have the parent dir and name of
368 * "target", which will be thrown away.
369 *
370 * We manually update the lock of "dentry" if need be.
371 *
372 * "target" doesn't have it's dentry lock touched - we allow the later
373 * dput() to handle this for us.
374 *
375 * This is called during ocfs2_rename(), while holding parent
376 * directory locks. The dentries have already been deleted on other
377 * nodes via ocfs2_remote_dentry_delete().
378 *
379 * Normally, the VFS handles the d_move() for the file sytem, after
380 * the ->rename() callback. OCFS2 wants to handle this internally, so
381 * the new lock can be created atomically with respect to the cluster.
382 */
383void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target,
384 struct inode *old_dir, struct inode *new_dir)
385{
386 int ret;
387 struct ocfs2_super *osb = OCFS2_SB(old_dir->i_sb);
388 struct inode *inode = dentry->d_inode;
389
390 /*
391 * Move within the same directory, so the actual lock info won't
392 * change.
393 *
394 * XXX: Is there any advantage to dropping the lock here?
395 */
396 if (old_dir == new_dir)
397 goto out_move;
398
399 ocfs2_dentry_lock_put(osb, dentry->d_fsdata);
400
401 dentry->d_fsdata = NULL;
402 ret = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(new_dir)->ip_blkno);
403 if (ret)
404 mlog_errno(ret);
405
406out_move:
407 d_move(dentry, target);
408}
409
90struct dentry_operations ocfs2_dentry_ops = { 410struct dentry_operations ocfs2_dentry_ops = {
91 .d_revalidate = ocfs2_dentry_revalidate, 411 .d_revalidate = ocfs2_dentry_revalidate,
412 .d_iput = ocfs2_dentry_iput,
92}; 413};
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h
index 90072771114b..c091c34d9883 100644
--- a/fs/ocfs2/dcache.h
+++ b/fs/ocfs2/dcache.h
@@ -28,4 +28,31 @@
28 28
29extern struct dentry_operations ocfs2_dentry_ops; 29extern struct dentry_operations ocfs2_dentry_ops;
30 30
31struct ocfs2_dentry_lock {
32 unsigned int dl_count;
33 u64 dl_parent_blkno;
34
35 /*
36 * The ocfs2_dentry_lock keeps an inode reference until
37 * dl_lockres has been destroyed. This is usually done in
38 * ->d_iput() anyway, so there should be minimal impact.
39 */
40 struct inode *dl_inode;
41 struct ocfs2_lock_res dl_lockres;
42};
43
44int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode,
45 u64 parent_blkno);
46
47void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
48 struct ocfs2_dentry_lock *dl);
49
50struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno,
51 int skip_unhashed);
52
53void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target,
54 struct inode *old_dir, struct inode *new_dir);
55
56extern spinlock_t dentry_attach_lock;
57
31#endif /* OCFS2_DCACHE_H */ 58#endif /* OCFS2_DCACHE_H */
diff --git a/fs/ocfs2/dlm/dlmapi.h b/fs/ocfs2/dlm/dlmapi.h
index 53652f51c0e1..cfd5cb65cab0 100644
--- a/fs/ocfs2/dlm/dlmapi.h
+++ b/fs/ocfs2/dlm/dlmapi.h
@@ -182,6 +182,7 @@ enum dlm_status dlmlock(struct dlm_ctxt *dlm,
182 struct dlm_lockstatus *lksb, 182 struct dlm_lockstatus *lksb,
183 int flags, 183 int flags,
184 const char *name, 184 const char *name,
185 int namelen,
185 dlm_astlockfunc_t *ast, 186 dlm_astlockfunc_t *ast,
186 void *data, 187 void *data,
187 dlm_bastlockfunc_t *bast); 188 dlm_bastlockfunc_t *bast);
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index f13a4bac41f0..681046d51393 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -320,8 +320,8 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data)
320 320
321 res = dlm_lookup_lockres(dlm, name, locklen); 321 res = dlm_lookup_lockres(dlm, name, locklen);
322 if (!res) { 322 if (!res) {
323 mlog(ML_ERROR, "got %sast for unknown lockres! " 323 mlog(0, "got %sast for unknown lockres! "
324 "cookie=%u:%llu, name=%.*s, namelen=%u\n", 324 "cookie=%u:%llu, name=%.*s, namelen=%u\n",
325 past->type == DLM_AST ? "" : "b", 325 past->type == DLM_AST ? "" : "b",
326 dlm_get_lock_cookie_node(cookie), 326 dlm_get_lock_cookie_node(cookie),
327 dlm_get_lock_cookie_seq(cookie), 327 dlm_get_lock_cookie_seq(cookie),
@@ -462,7 +462,7 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
462 mlog(ML_ERROR, "sent AST to node %u, it returned " 462 mlog(ML_ERROR, "sent AST to node %u, it returned "
463 "DLM_MIGRATING!\n", lock->ml.node); 463 "DLM_MIGRATING!\n", lock->ml.node);
464 BUG(); 464 BUG();
465 } else if (status != DLM_NORMAL) { 465 } else if (status != DLM_NORMAL && status != DLM_IVLOCKID) {
466 mlog(ML_ERROR, "AST to node %u returned %d!\n", 466 mlog(ML_ERROR, "AST to node %u returned %d!\n",
467 lock->ml.node, status); 467 lock->ml.node, status);
468 /* ignore it */ 468 /* ignore it */
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 14530ee7e11d..fa968180b072 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -747,6 +747,7 @@ void dlm_change_lockres_owner(struct dlm_ctxt *dlm,
747 u8 owner); 747 u8 owner);
748struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, 748struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
749 const char *lockid, 749 const char *lockid,
750 int namelen,
750 int flags); 751 int flags);
751struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, 752struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
752 const char *name, 753 const char *name,
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 033ad1701232..0368c6402182 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -335,7 +335,6 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb)
335 inode->i_mode = mode; 335 inode->i_mode = mode;
336 inode->i_uid = current->fsuid; 336 inode->i_uid = current->fsuid;
337 inode->i_gid = current->fsgid; 337 inode->i_gid = current->fsgid;
338 inode->i_blksize = PAGE_CACHE_SIZE;
339 inode->i_blocks = 0; 338 inode->i_blocks = 0;
340 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; 339 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
341 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 340 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
@@ -362,7 +361,6 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
362 inode->i_mode = mode; 361 inode->i_mode = mode;
363 inode->i_uid = current->fsuid; 362 inode->i_uid = current->fsuid;
364 inode->i_gid = current->fsgid; 363 inode->i_gid = current->fsgid;
365 inode->i_blksize = PAGE_CACHE_SIZE;
366 inode->i_blocks = 0; 364 inode->i_blocks = 0;
367 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; 365 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
368 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 366 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
@@ -629,9 +627,7 @@ static void __exit exit_dlmfs_fs(void)
629 flush_workqueue(user_dlm_worker); 627 flush_workqueue(user_dlm_worker);
630 destroy_workqueue(user_dlm_worker); 628 destroy_workqueue(user_dlm_worker);
631 629
632 if (kmem_cache_destroy(dlmfs_inode_cache)) 630 kmem_cache_destroy(dlmfs_inode_cache);
633 printk(KERN_INFO "dlmfs_inode_cache: not all structures "
634 "were freed\n");
635} 631}
636 632
637MODULE_AUTHOR("Oracle"); 633MODULE_AUTHOR("Oracle");
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 5ca57ec650c7..42a1b91979b5 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -540,8 +540,8 @@ static inline void dlm_get_next_cookie(u8 node_num, u64 *cookie)
540 540
541enum dlm_status dlmlock(struct dlm_ctxt *dlm, int mode, 541enum dlm_status dlmlock(struct dlm_ctxt *dlm, int mode,
542 struct dlm_lockstatus *lksb, int flags, 542 struct dlm_lockstatus *lksb, int flags,
543 const char *name, dlm_astlockfunc_t *ast, void *data, 543 const char *name, int namelen, dlm_astlockfunc_t *ast,
544 dlm_bastlockfunc_t *bast) 544 void *data, dlm_bastlockfunc_t *bast)
545{ 545{
546 enum dlm_status status; 546 enum dlm_status status;
547 struct dlm_lock_resource *res = NULL; 547 struct dlm_lock_resource *res = NULL;
@@ -571,7 +571,7 @@ enum dlm_status dlmlock(struct dlm_ctxt *dlm, int mode,
571 recovery = (flags & LKM_RECOVERY); 571 recovery = (flags & LKM_RECOVERY);
572 572
573 if (recovery && 573 if (recovery &&
574 (!dlm_is_recovery_lock(name, strlen(name)) || convert) ) { 574 (!dlm_is_recovery_lock(name, namelen) || convert) ) {
575 dlm_error(status); 575 dlm_error(status);
576 goto error; 576 goto error;
577 } 577 }
@@ -643,7 +643,7 @@ retry_convert:
643 } 643 }
644 644
645 status = DLM_IVBUFLEN; 645 status = DLM_IVBUFLEN;
646 if (strlen(name) > DLM_LOCKID_NAME_MAX || strlen(name) < 1) { 646 if (namelen > DLM_LOCKID_NAME_MAX || namelen < 1) {
647 dlm_error(status); 647 dlm_error(status);
648 goto error; 648 goto error;
649 } 649 }
@@ -659,7 +659,7 @@ retry_convert:
659 dlm_wait_for_recovery(dlm); 659 dlm_wait_for_recovery(dlm);
660 660
661 /* find or create the lock resource */ 661 /* find or create the lock resource */
662 res = dlm_get_lock_resource(dlm, name, flags); 662 res = dlm_get_lock_resource(dlm, name, namelen, flags);
663 if (!res) { 663 if (!res) {
664 status = DLM_IVLOCKID; 664 status = DLM_IVLOCKID;
665 dlm_error(status); 665 dlm_error(status);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 9503240ef0e5..f784177b6241 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -740,6 +740,7 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
740 */ 740 */
741struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, 741struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
742 const char *lockid, 742 const char *lockid,
743 int namelen,
743 int flags) 744 int flags)
744{ 745{
745 struct dlm_lock_resource *tmpres=NULL, *res=NULL; 746 struct dlm_lock_resource *tmpres=NULL, *res=NULL;
@@ -748,13 +749,12 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
748 int blocked = 0; 749 int blocked = 0;
749 int ret, nodenum; 750 int ret, nodenum;
750 struct dlm_node_iter iter; 751 struct dlm_node_iter iter;
751 unsigned int namelen, hash; 752 unsigned int hash;
752 int tries = 0; 753 int tries = 0;
753 int bit, wait_on_recovery = 0; 754 int bit, wait_on_recovery = 0;
754 755
755 BUG_ON(!lockid); 756 BUG_ON(!lockid);
756 757
757 namelen = strlen(lockid);
758 hash = dlm_lockid_hash(lockid, namelen); 758 hash = dlm_lockid_hash(lockid, namelen);
759 759
760 mlog(0, "get lockres %s (len %d)\n", lockid, namelen); 760 mlog(0, "get lockres %s (len %d)\n", lockid, namelen);
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 594745fab0b5..9d950d7cea38 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2285,7 +2285,8 @@ again:
2285 memset(&lksb, 0, sizeof(lksb)); 2285 memset(&lksb, 0, sizeof(lksb));
2286 2286
2287 ret = dlmlock(dlm, LKM_EXMODE, &lksb, LKM_NOQUEUE|LKM_RECOVERY, 2287 ret = dlmlock(dlm, LKM_EXMODE, &lksb, LKM_NOQUEUE|LKM_RECOVERY,
2288 DLM_RECOVERY_LOCK_NAME, dlm_reco_ast, dlm, dlm_reco_bast); 2288 DLM_RECOVERY_LOCK_NAME, DLM_RECOVERY_LOCK_NAME_LEN,
2289 dlm_reco_ast, dlm, dlm_reco_bast);
2289 2290
2290 mlog(0, "%s: dlmlock($RECOVERY) returned %d, lksb=%d\n", 2291 mlog(0, "%s: dlmlock($RECOVERY) returned %d, lksb=%d\n",
2291 dlm->name, ret, lksb.status); 2292 dlm->name, ret, lksb.status);
diff --git a/fs/ocfs2/dlm/userdlm.c b/fs/ocfs2/dlm/userdlm.c
index e641b084b343..eead48bbfac6 100644
--- a/fs/ocfs2/dlm/userdlm.c
+++ b/fs/ocfs2/dlm/userdlm.c
@@ -102,10 +102,10 @@ static inline void user_recover_from_dlm_error(struct user_lock_res *lockres)
102 spin_unlock(&lockres->l_lock); 102 spin_unlock(&lockres->l_lock);
103} 103}
104 104
105#define user_log_dlm_error(_func, _stat, _lockres) do { \ 105#define user_log_dlm_error(_func, _stat, _lockres) do { \
106 mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on " \ 106 mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on " \
107 "resource %s: %s\n", dlm_errname(_stat), _func, \ 107 "resource %.*s: %s\n", dlm_errname(_stat), _func, \
108 _lockres->l_name, dlm_errmsg(_stat)); \ 108 _lockres->l_namelen, _lockres->l_name, dlm_errmsg(_stat)); \
109} while (0) 109} while (0)
110 110
111/* WARNING: This function lives in a world where the only three lock 111/* WARNING: This function lives in a world where the only three lock
@@ -127,21 +127,22 @@ static void user_ast(void *opaque)
127 struct user_lock_res *lockres = opaque; 127 struct user_lock_res *lockres = opaque;
128 struct dlm_lockstatus *lksb; 128 struct dlm_lockstatus *lksb;
129 129
130 mlog(0, "AST fired for lockres %s\n", lockres->l_name); 130 mlog(0, "AST fired for lockres %.*s\n", lockres->l_namelen,
131 lockres->l_name);
131 132
132 spin_lock(&lockres->l_lock); 133 spin_lock(&lockres->l_lock);
133 134
134 lksb = &(lockres->l_lksb); 135 lksb = &(lockres->l_lksb);
135 if (lksb->status != DLM_NORMAL) { 136 if (lksb->status != DLM_NORMAL) {
136 mlog(ML_ERROR, "lksb status value of %u on lockres %s\n", 137 mlog(ML_ERROR, "lksb status value of %u on lockres %.*s\n",
137 lksb->status, lockres->l_name); 138 lksb->status, lockres->l_namelen, lockres->l_name);
138 spin_unlock(&lockres->l_lock); 139 spin_unlock(&lockres->l_lock);
139 return; 140 return;
140 } 141 }
141 142
142 mlog_bug_on_msg(lockres->l_requested == LKM_IVMODE, 143 mlog_bug_on_msg(lockres->l_requested == LKM_IVMODE,
143 "Lockres %s, requested ivmode. flags 0x%x\n", 144 "Lockres %.*s, requested ivmode. flags 0x%x\n",
144 lockres->l_name, lockres->l_flags); 145 lockres->l_namelen, lockres->l_name, lockres->l_flags);
145 146
146 /* we're downconverting. */ 147 /* we're downconverting. */
147 if (lockres->l_requested < lockres->l_level) { 148 if (lockres->l_requested < lockres->l_level) {
@@ -213,8 +214,8 @@ static void user_bast(void *opaque, int level)
213{ 214{
214 struct user_lock_res *lockres = opaque; 215 struct user_lock_res *lockres = opaque;
215 216
216 mlog(0, "Blocking AST fired for lockres %s. Blocking level %d\n", 217 mlog(0, "Blocking AST fired for lockres %.*s. Blocking level %d\n",
217 lockres->l_name, level); 218 lockres->l_namelen, lockres->l_name, level);
218 219
219 spin_lock(&lockres->l_lock); 220 spin_lock(&lockres->l_lock);
220 lockres->l_flags |= USER_LOCK_BLOCKED; 221 lockres->l_flags |= USER_LOCK_BLOCKED;
@@ -231,7 +232,8 @@ static void user_unlock_ast(void *opaque, enum dlm_status status)
231{ 232{
232 struct user_lock_res *lockres = opaque; 233 struct user_lock_res *lockres = opaque;
233 234
234 mlog(0, "UNLOCK AST called on lock %s\n", lockres->l_name); 235 mlog(0, "UNLOCK AST called on lock %.*s\n", lockres->l_namelen,
236 lockres->l_name);
235 237
236 if (status != DLM_NORMAL && status != DLM_CANCELGRANT) 238 if (status != DLM_NORMAL && status != DLM_CANCELGRANT)
237 mlog(ML_ERROR, "Dlm returns status %d\n", status); 239 mlog(ML_ERROR, "Dlm returns status %d\n", status);
@@ -244,8 +246,6 @@ static void user_unlock_ast(void *opaque, enum dlm_status status)
244 && !(lockres->l_flags & USER_LOCK_IN_CANCEL)) { 246 && !(lockres->l_flags & USER_LOCK_IN_CANCEL)) {
245 lockres->l_level = LKM_IVMODE; 247 lockres->l_level = LKM_IVMODE;
246 } else if (status == DLM_CANCELGRANT) { 248 } else if (status == DLM_CANCELGRANT) {
247 mlog(0, "Lock %s, cancel fails, flags 0x%x\n",
248 lockres->l_name, lockres->l_flags);
249 /* We tried to cancel a convert request, but it was 249 /* We tried to cancel a convert request, but it was
250 * already granted. Don't clear the busy flag - the 250 * already granted. Don't clear the busy flag - the
251 * ast should've done this already. */ 251 * ast should've done this already. */
@@ -255,8 +255,6 @@ static void user_unlock_ast(void *opaque, enum dlm_status status)
255 } else { 255 } else {
256 BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL)); 256 BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL));
257 /* Cancel succeeded, we want to re-queue */ 257 /* Cancel succeeded, we want to re-queue */
258 mlog(0, "Lock %s, cancel succeeds, flags 0x%x\n",
259 lockres->l_name, lockres->l_flags);
260 lockres->l_requested = LKM_IVMODE; /* cancel an 258 lockres->l_requested = LKM_IVMODE; /* cancel an
261 * upconvert 259 * upconvert
262 * request. */ 260 * request. */
@@ -287,13 +285,14 @@ static void user_dlm_unblock_lock(void *opaque)
287 struct user_lock_res *lockres = (struct user_lock_res *) opaque; 285 struct user_lock_res *lockres = (struct user_lock_res *) opaque;
288 struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); 286 struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres);
289 287
290 mlog(0, "processing lockres %s\n", lockres->l_name); 288 mlog(0, "processing lockres %.*s\n", lockres->l_namelen,
289 lockres->l_name);
291 290
292 spin_lock(&lockres->l_lock); 291 spin_lock(&lockres->l_lock);
293 292
294 mlog_bug_on_msg(!(lockres->l_flags & USER_LOCK_QUEUED), 293 mlog_bug_on_msg(!(lockres->l_flags & USER_LOCK_QUEUED),
295 "Lockres %s, flags 0x%x\n", 294 "Lockres %.*s, flags 0x%x\n",
296 lockres->l_name, lockres->l_flags); 295 lockres->l_namelen, lockres->l_name, lockres->l_flags);
297 296
298 /* notice that we don't clear USER_LOCK_BLOCKED here. If it's 297 /* notice that we don't clear USER_LOCK_BLOCKED here. If it's
299 * set, we want user_ast clear it. */ 298 * set, we want user_ast clear it. */
@@ -305,22 +304,16 @@ static void user_dlm_unblock_lock(void *opaque)
305 * flag, and finally we might get another bast which re-queues 304 * flag, and finally we might get another bast which re-queues
306 * us before our ast for the downconvert is called. */ 305 * us before our ast for the downconvert is called. */
307 if (!(lockres->l_flags & USER_LOCK_BLOCKED)) { 306 if (!(lockres->l_flags & USER_LOCK_BLOCKED)) {
308 mlog(0, "Lockres %s, flags 0x%x: queued but not blocking\n",
309 lockres->l_name, lockres->l_flags);
310 spin_unlock(&lockres->l_lock); 307 spin_unlock(&lockres->l_lock);
311 goto drop_ref; 308 goto drop_ref;
312 } 309 }
313 310
314 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { 311 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
315 mlog(0, "lock is in teardown so we do nothing\n");
316 spin_unlock(&lockres->l_lock); 312 spin_unlock(&lockres->l_lock);
317 goto drop_ref; 313 goto drop_ref;
318 } 314 }
319 315
320 if (lockres->l_flags & USER_LOCK_BUSY) { 316 if (lockres->l_flags & USER_LOCK_BUSY) {
321 mlog(0, "Cancel lock %s, flags 0x%x\n",
322 lockres->l_name, lockres->l_flags);
323
324 if (lockres->l_flags & USER_LOCK_IN_CANCEL) { 317 if (lockres->l_flags & USER_LOCK_IN_CANCEL) {
325 spin_unlock(&lockres->l_lock); 318 spin_unlock(&lockres->l_lock);
326 goto drop_ref; 319 goto drop_ref;
@@ -372,6 +365,7 @@ static void user_dlm_unblock_lock(void *opaque)
372 &lockres->l_lksb, 365 &lockres->l_lksb,
373 LKM_CONVERT|LKM_VALBLK, 366 LKM_CONVERT|LKM_VALBLK,
374 lockres->l_name, 367 lockres->l_name,
368 lockres->l_namelen,
375 user_ast, 369 user_ast,
376 lockres, 370 lockres,
377 user_bast); 371 user_bast);
@@ -420,16 +414,16 @@ int user_dlm_cluster_lock(struct user_lock_res *lockres,
420 414
421 if (level != LKM_EXMODE && 415 if (level != LKM_EXMODE &&
422 level != LKM_PRMODE) { 416 level != LKM_PRMODE) {
423 mlog(ML_ERROR, "lockres %s: invalid request!\n", 417 mlog(ML_ERROR, "lockres %.*s: invalid request!\n",
424 lockres->l_name); 418 lockres->l_namelen, lockres->l_name);
425 status = -EINVAL; 419 status = -EINVAL;
426 goto bail; 420 goto bail;
427 } 421 }
428 422
429 mlog(0, "lockres %s: asking for %s lock, passed flags = 0x%x\n", 423 mlog(0, "lockres %.*s: asking for %s lock, passed flags = 0x%x\n",
430 lockres->l_name, 424 lockres->l_namelen, lockres->l_name,
431 (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE", 425 (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE",
432 lkm_flags); 426 lkm_flags);
433 427
434again: 428again:
435 if (signal_pending(current)) { 429 if (signal_pending(current)) {
@@ -474,15 +468,13 @@ again:
474 BUG_ON(level == LKM_IVMODE); 468 BUG_ON(level == LKM_IVMODE);
475 BUG_ON(level == LKM_NLMODE); 469 BUG_ON(level == LKM_NLMODE);
476 470
477 mlog(0, "lock %s, get lock from %d to level = %d\n",
478 lockres->l_name, lockres->l_level, level);
479
480 /* call dlm_lock to upgrade lock now */ 471 /* call dlm_lock to upgrade lock now */
481 status = dlmlock(dlm, 472 status = dlmlock(dlm,
482 level, 473 level,
483 &lockres->l_lksb, 474 &lockres->l_lksb,
484 local_flags, 475 local_flags,
485 lockres->l_name, 476 lockres->l_name,
477 lockres->l_namelen,
486 user_ast, 478 user_ast,
487 lockres, 479 lockres,
488 user_bast); 480 user_bast);
@@ -498,9 +490,6 @@ again:
498 goto bail; 490 goto bail;
499 } 491 }
500 492
501 mlog(0, "lock %s, successfull return from dlmlock\n",
502 lockres->l_name);
503
504 user_wait_on_busy_lock(lockres); 493 user_wait_on_busy_lock(lockres);
505 goto again; 494 goto again;
506 } 495 }
@@ -508,9 +497,6 @@ again:
508 user_dlm_inc_holders(lockres, level); 497 user_dlm_inc_holders(lockres, level);
509 spin_unlock(&lockres->l_lock); 498 spin_unlock(&lockres->l_lock);
510 499
511 mlog(0, "lockres %s: Got %s lock!\n", lockres->l_name,
512 (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE");
513
514 status = 0; 500 status = 0;
515bail: 501bail:
516 return status; 502 return status;
@@ -538,13 +524,11 @@ void user_dlm_cluster_unlock(struct user_lock_res *lockres,
538{ 524{
539 if (level != LKM_EXMODE && 525 if (level != LKM_EXMODE &&
540 level != LKM_PRMODE) { 526 level != LKM_PRMODE) {
541 mlog(ML_ERROR, "lockres %s: invalid request!\n", lockres->l_name); 527 mlog(ML_ERROR, "lockres %.*s: invalid request!\n",
528 lockres->l_namelen, lockres->l_name);
542 return; 529 return;
543 } 530 }
544 531
545 mlog(0, "lockres %s: dropping %s lock\n", lockres->l_name,
546 (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE");
547
548 spin_lock(&lockres->l_lock); 532 spin_lock(&lockres->l_lock);
549 user_dlm_dec_holders(lockres, level); 533 user_dlm_dec_holders(lockres, level);
550 __user_dlm_cond_queue_lockres(lockres); 534 __user_dlm_cond_queue_lockres(lockres);
@@ -602,6 +586,7 @@ void user_dlm_lock_res_init(struct user_lock_res *lockres,
602 memcpy(lockres->l_name, 586 memcpy(lockres->l_name,
603 dentry->d_name.name, 587 dentry->d_name.name,
604 dentry->d_name.len); 588 dentry->d_name.len);
589 lockres->l_namelen = dentry->d_name.len;
605} 590}
606 591
607int user_dlm_destroy_lock(struct user_lock_res *lockres) 592int user_dlm_destroy_lock(struct user_lock_res *lockres)
@@ -609,11 +594,10 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
609 int status = -EBUSY; 594 int status = -EBUSY;
610 struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); 595 struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres);
611 596
612 mlog(0, "asked to destroy %s\n", lockres->l_name); 597 mlog(0, "asked to destroy %.*s\n", lockres->l_namelen, lockres->l_name);
613 598
614 spin_lock(&lockres->l_lock); 599 spin_lock(&lockres->l_lock);
615 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { 600 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
616 mlog(0, "Lock is already torn down\n");
617 spin_unlock(&lockres->l_lock); 601 spin_unlock(&lockres->l_lock);
618 return 0; 602 return 0;
619 } 603 }
@@ -623,8 +607,6 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
623 while (lockres->l_flags & USER_LOCK_BUSY) { 607 while (lockres->l_flags & USER_LOCK_BUSY) {
624 spin_unlock(&lockres->l_lock); 608 spin_unlock(&lockres->l_lock);
625 609
626 mlog(0, "lock %s is busy\n", lockres->l_name);
627
628 user_wait_on_busy_lock(lockres); 610 user_wait_on_busy_lock(lockres);
629 611
630 spin_lock(&lockres->l_lock); 612 spin_lock(&lockres->l_lock);
@@ -632,14 +614,12 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
632 614
633 if (lockres->l_ro_holders || lockres->l_ex_holders) { 615 if (lockres->l_ro_holders || lockres->l_ex_holders) {
634 spin_unlock(&lockres->l_lock); 616 spin_unlock(&lockres->l_lock);
635 mlog(0, "lock %s has holders\n", lockres->l_name);
636 goto bail; 617 goto bail;
637 } 618 }
638 619
639 status = 0; 620 status = 0;
640 if (!(lockres->l_flags & USER_LOCK_ATTACHED)) { 621 if (!(lockres->l_flags & USER_LOCK_ATTACHED)) {
641 spin_unlock(&lockres->l_lock); 622 spin_unlock(&lockres->l_lock);
642 mlog(0, "lock %s is not attached\n", lockres->l_name);
643 goto bail; 623 goto bail;
644 } 624 }
645 625
@@ -647,7 +627,6 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
647 lockres->l_flags |= USER_LOCK_BUSY; 627 lockres->l_flags |= USER_LOCK_BUSY;
648 spin_unlock(&lockres->l_lock); 628 spin_unlock(&lockres->l_lock);
649 629
650 mlog(0, "unlocking lockres %s\n", lockres->l_name);
651 status = dlmunlock(dlm, 630 status = dlmunlock(dlm,
652 &lockres->l_lksb, 631 &lockres->l_lksb,
653 LKM_VALBLK, 632 LKM_VALBLK,
diff --git a/fs/ocfs2/dlm/userdlm.h b/fs/ocfs2/dlm/userdlm.h
index 04178bc40b76..c400e93bbf79 100644
--- a/fs/ocfs2/dlm/userdlm.h
+++ b/fs/ocfs2/dlm/userdlm.h
@@ -53,6 +53,7 @@ struct user_lock_res {
53 53
54#define USER_DLM_LOCK_ID_MAX_LEN 32 54#define USER_DLM_LOCK_ID_MAX_LEN 32
55 char l_name[USER_DLM_LOCK_ID_MAX_LEN]; 55 char l_name[USER_DLM_LOCK_ID_MAX_LEN];
56 int l_namelen;
56 int l_level; 57 int l_level;
57 unsigned int l_ro_holders; 58 unsigned int l_ro_holders;
58 unsigned int l_ex_holders; 59 unsigned int l_ex_holders;
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 151b41781eab..8801e41afe80 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -46,6 +46,7 @@
46#include "ocfs2.h" 46#include "ocfs2.h"
47 47
48#include "alloc.h" 48#include "alloc.h"
49#include "dcache.h"
49#include "dlmglue.h" 50#include "dlmglue.h"
50#include "extent_map.h" 51#include "extent_map.h"
51#include "heartbeat.h" 52#include "heartbeat.h"
@@ -66,78 +67,161 @@ struct ocfs2_mask_waiter {
66 unsigned long mw_goal; 67 unsigned long mw_goal;
67}; 68};
68 69
69static void ocfs2_inode_ast_func(void *opaque); 70static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);
70static void ocfs2_inode_bast_func(void *opaque, 71static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);
71 int level);
72static void ocfs2_super_ast_func(void *opaque);
73static void ocfs2_super_bast_func(void *opaque,
74 int level);
75static void ocfs2_rename_ast_func(void *opaque);
76static void ocfs2_rename_bast_func(void *opaque,
77 int level);
78
79/* so far, all locks have gotten along with the same unlock ast */
80static void ocfs2_unlock_ast_func(void *opaque,
81 enum dlm_status status);
82static int ocfs2_do_unblock_meta(struct inode *inode,
83 int *requeue);
84static int ocfs2_unblock_meta(struct ocfs2_lock_res *lockres,
85 int *requeue);
86static int ocfs2_unblock_data(struct ocfs2_lock_res *lockres,
87 int *requeue);
88static int ocfs2_unblock_inode_lock(struct ocfs2_lock_res *lockres,
89 int *requeue);
90static int ocfs2_unblock_osb_lock(struct ocfs2_lock_res *lockres,
91 int *requeue);
92typedef void (ocfs2_convert_worker_t)(struct ocfs2_lock_res *, int);
93static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb,
94 struct ocfs2_lock_res *lockres,
95 int *requeue,
96 ocfs2_convert_worker_t *worker);
97 72
73/*
74 * Return value from ->downconvert_worker functions.
75 *
76 * These control the precise actions of ocfs2_unblock_lock()
77 * and ocfs2_process_blocked_lock()
78 *
79 */
80enum ocfs2_unblock_action {
81 UNBLOCK_CONTINUE = 0, /* Continue downconvert */
82 UNBLOCK_CONTINUE_POST = 1, /* Continue downconvert, fire
83 * ->post_unlock callback */
84 UNBLOCK_STOP_POST = 2, /* Do not downconvert, fire
85 * ->post_unlock() callback. */
86};
87
88struct ocfs2_unblock_ctl {
89 int requeue;
90 enum ocfs2_unblock_action unblock_action;
91};
92
93static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
94 int new_level);
95static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres);
96
97static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
98 int blocking);
99
100static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
101 int blocking);
102
103static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
104 struct ocfs2_lock_res *lockres);
105
106/*
107 * OCFS2 Lock Resource Operations
108 *
109 * These fine tune the behavior of the generic dlmglue locking infrastructure.
110 *
111 * The most basic of lock types can point ->l_priv to their respective
112 * struct ocfs2_super and allow the default actions to manage things.
113 *
114 * Right now, each lock type also needs to implement an init function,
115 * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres()
116 * should be called when the lock is no longer needed (i.e., object
117 * destruction time).
118 */
98struct ocfs2_lock_res_ops { 119struct ocfs2_lock_res_ops {
99 void (*ast)(void *); 120 /*
100 void (*bast)(void *, int); 121 * Translate an ocfs2_lock_res * into an ocfs2_super *. Define
101 void (*unlock_ast)(void *, enum dlm_status); 122 * this callback if ->l_priv is not an ocfs2_super pointer
102 int (*unblock)(struct ocfs2_lock_res *, int *); 123 */
124 struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *);
125
126 /*
127 * Optionally called in the downconvert (or "vote") thread
128 * after a successful downconvert. The lockres will not be
129 * referenced after this callback is called, so it is safe to
130 * free memory, etc.
131 *
132 * The exact semantics of when this is called are controlled
133 * by ->downconvert_worker()
134 */
135 void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *);
136
137 /*
138 * Allow a lock type to add checks to determine whether it is
139 * safe to downconvert a lock. Return 0 to re-queue the
140 * downconvert at a later time, nonzero to continue.
141 *
142 * For most locks, the default checks that there are no
143 * incompatible holders are sufficient.
144 *
145 * Called with the lockres spinlock held.
146 */
147 int (*check_downconvert)(struct ocfs2_lock_res *, int);
148
149 /*
150 * Allows a lock type to populate the lock value block. This
151 * is called on downconvert, and when we drop a lock.
152 *
153 * Locks that want to use this should set LOCK_TYPE_USES_LVB
154 * in the flags field.
155 *
156 * Called with the lockres spinlock held.
157 */
158 void (*set_lvb)(struct ocfs2_lock_res *);
159
160 /*
161 * Called from the downconvert thread when it is determined
162 * that a lock will be downconverted. This is called without
163 * any locks held so the function can do work that might
164 * schedule (syncing out data, etc).
165 *
166 * This should return any one of the ocfs2_unblock_action
167 * values, depending on what it wants the thread to do.
168 */
169 int (*downconvert_worker)(struct ocfs2_lock_res *, int);
170
171 /*
172 * LOCK_TYPE_* flags which describe the specific requirements
173 * of a lock type. Descriptions of each individual flag follow.
174 */
175 int flags;
103}; 176};
104 177
178/*
179 * Some locks want to "refresh" potentially stale data when a
180 * meaningful (PRMODE or EXMODE) lock level is first obtained. If this
181 * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the
182 * individual lockres l_flags member from the ast function. It is
183 * expected that the locking wrapper will clear the
184 * OCFS2_LOCK_NEEDS_REFRESH flag when done.
185 */
186#define LOCK_TYPE_REQUIRES_REFRESH 0x1
187
188/*
189 * Indicate that a lock type makes use of the lock value block. The
190 * ->set_lvb lock type callback must be defined.
191 */
192#define LOCK_TYPE_USES_LVB 0x2
193
105static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { 194static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
106 .ast = ocfs2_inode_ast_func, 195 .get_osb = ocfs2_get_inode_osb,
107 .bast = ocfs2_inode_bast_func, 196 .flags = 0,
108 .unlock_ast = ocfs2_unlock_ast_func,
109 .unblock = ocfs2_unblock_inode_lock,
110}; 197};
111 198
112static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = { 199static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = {
113 .ast = ocfs2_inode_ast_func, 200 .get_osb = ocfs2_get_inode_osb,
114 .bast = ocfs2_inode_bast_func, 201 .check_downconvert = ocfs2_check_meta_downconvert,
115 .unlock_ast = ocfs2_unlock_ast_func, 202 .set_lvb = ocfs2_set_meta_lvb,
116 .unblock = ocfs2_unblock_meta, 203 .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
117}; 204};
118 205
119static void ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
120 int blocking);
121
122static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = { 206static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = {
123 .ast = ocfs2_inode_ast_func, 207 .get_osb = ocfs2_get_inode_osb,
124 .bast = ocfs2_inode_bast_func, 208 .downconvert_worker = ocfs2_data_convert_worker,
125 .unlock_ast = ocfs2_unlock_ast_func, 209 .flags = 0,
126 .unblock = ocfs2_unblock_data,
127}; 210};
128 211
129static struct ocfs2_lock_res_ops ocfs2_super_lops = { 212static struct ocfs2_lock_res_ops ocfs2_super_lops = {
130 .ast = ocfs2_super_ast_func, 213 .flags = LOCK_TYPE_REQUIRES_REFRESH,
131 .bast = ocfs2_super_bast_func,
132 .unlock_ast = ocfs2_unlock_ast_func,
133 .unblock = ocfs2_unblock_osb_lock,
134}; 214};
135 215
136static struct ocfs2_lock_res_ops ocfs2_rename_lops = { 216static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
137 .ast = ocfs2_rename_ast_func, 217 .flags = 0,
138 .bast = ocfs2_rename_bast_func, 218};
139 .unlock_ast = ocfs2_unlock_ast_func, 219
140 .unblock = ocfs2_unblock_osb_lock, 220static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
221 .get_osb = ocfs2_get_dentry_osb,
222 .post_unlock = ocfs2_dentry_post_unlock,
223 .downconvert_worker = ocfs2_dentry_convert_worker,
224 .flags = 0,
141}; 225};
142 226
143static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) 227static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
@@ -147,29 +231,26 @@ static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
147 lockres->l_type == OCFS2_LOCK_TYPE_RW; 231 lockres->l_type == OCFS2_LOCK_TYPE_RW;
148} 232}
149 233
150static inline int ocfs2_is_super_lock(struct ocfs2_lock_res *lockres) 234static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres)
151{ 235{
152 return lockres->l_type == OCFS2_LOCK_TYPE_SUPER; 236 BUG_ON(!ocfs2_is_inode_lock(lockres));
153}
154 237
155static inline int ocfs2_is_rename_lock(struct ocfs2_lock_res *lockres) 238 return (struct inode *) lockres->l_priv;
156{
157 return lockres->l_type == OCFS2_LOCK_TYPE_RENAME;
158} 239}
159 240
160static inline struct ocfs2_super *ocfs2_lock_res_super(struct ocfs2_lock_res *lockres) 241static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres)
161{ 242{
162 BUG_ON(!ocfs2_is_super_lock(lockres) 243 BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY);
163 && !ocfs2_is_rename_lock(lockres));
164 244
165 return (struct ocfs2_super *) lockres->l_priv; 245 return (struct ocfs2_dentry_lock *)lockres->l_priv;
166} 246}
167 247
168static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) 248static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres)
169{ 249{
170 BUG_ON(!ocfs2_is_inode_lock(lockres)); 250 if (lockres->l_ops->get_osb)
251 return lockres->l_ops->get_osb(lockres);
171 252
172 return (struct inode *) lockres->l_priv; 253 return (struct ocfs2_super *)lockres->l_priv;
173} 254}
174 255
175static int ocfs2_lock_create(struct ocfs2_super *osb, 256static int ocfs2_lock_create(struct ocfs2_super *osb,
@@ -200,25 +281,6 @@ static int ocfs2_meta_lock_update(struct inode *inode,
200 struct buffer_head **bh); 281 struct buffer_head **bh);
201static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); 282static void ocfs2_drop_osb_locks(struct ocfs2_super *osb);
202static inline int ocfs2_highest_compat_lock_level(int level); 283static inline int ocfs2_highest_compat_lock_level(int level);
203static inline int ocfs2_can_downconvert_meta_lock(struct inode *inode,
204 struct ocfs2_lock_res *lockres,
205 int new_level);
206
207static char *ocfs2_lock_type_strings[] = {
208 [OCFS2_LOCK_TYPE_META] = "Meta",
209 [OCFS2_LOCK_TYPE_DATA] = "Data",
210 [OCFS2_LOCK_TYPE_SUPER] = "Super",
211 [OCFS2_LOCK_TYPE_RENAME] = "Rename",
212 /* Need to differntiate from [R]ename.. serializing writes is the
213 * important job it does, anyway. */
214 [OCFS2_LOCK_TYPE_RW] = "Write/Read",
215};
216
217static char *ocfs2_lock_type_string(enum ocfs2_lock_type type)
218{
219 mlog_bug_on_msg(type >= OCFS2_NUM_LOCK_TYPES, "%d\n", type);
220 return ocfs2_lock_type_strings[type];
221}
222 284
223static void ocfs2_build_lock_name(enum ocfs2_lock_type type, 285static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
224 u64 blkno, 286 u64 blkno,
@@ -265,13 +327,9 @@ static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res)
265static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, 327static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
266 struct ocfs2_lock_res *res, 328 struct ocfs2_lock_res *res,
267 enum ocfs2_lock_type type, 329 enum ocfs2_lock_type type,
268 u64 blkno,
269 u32 generation,
270 struct ocfs2_lock_res_ops *ops, 330 struct ocfs2_lock_res_ops *ops,
271 void *priv) 331 void *priv)
272{ 332{
273 ocfs2_build_lock_name(type, blkno, generation, res->l_name);
274
275 res->l_type = type; 333 res->l_type = type;
276 res->l_ops = ops; 334 res->l_ops = ops;
277 res->l_priv = priv; 335 res->l_priv = priv;
@@ -299,6 +357,7 @@ void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
299 357
300void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, 358void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
301 enum ocfs2_lock_type type, 359 enum ocfs2_lock_type type,
360 unsigned int generation,
302 struct inode *inode) 361 struct inode *inode)
303{ 362{
304 struct ocfs2_lock_res_ops *ops; 363 struct ocfs2_lock_res_ops *ops;
@@ -319,9 +378,73 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
319 break; 378 break;
320 }; 379 };
321 380
322 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, 381 ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno,
323 OCFS2_I(inode)->ip_blkno, 382 generation, res->l_name);
324 inode->i_generation, ops, inode); 383 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode);
384}
385
386static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres)
387{
388 struct inode *inode = ocfs2_lock_res_inode(lockres);
389
390 return OCFS2_SB(inode->i_sb);
391}
392
393static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres)
394{
395 __be64 inode_blkno_be;
396
397 memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START],
398 sizeof(__be64));
399
400 return be64_to_cpu(inode_blkno_be);
401}
402
403static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres)
404{
405 struct ocfs2_dentry_lock *dl = lockres->l_priv;
406
407 return OCFS2_SB(dl->dl_inode->i_sb);
408}
409
410void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
411 u64 parent, struct inode *inode)
412{
413 int len;
414 u64 inode_blkno = OCFS2_I(inode)->ip_blkno;
415 __be64 inode_blkno_be = cpu_to_be64(inode_blkno);
416 struct ocfs2_lock_res *lockres = &dl->dl_lockres;
417
418 ocfs2_lock_res_init_once(lockres);
419
420 /*
421 * Unfortunately, the standard lock naming scheme won't work
422 * here because we have two 16 byte values to use. Instead,
423 * we'll stuff the inode number as a binary value. We still
424 * want error prints to show something without garbling the
425 * display, so drop a null byte in there before the inode
426 * number. A future version of OCFS2 will likely use all
427 * binary lock names. The stringified names have been a
428 * tremendous aid in debugging, but now that the debugfs
429 * interface exists, we can mangle things there if need be.
430 *
431 * NOTE: We also drop the standard "pad" value (the total lock
432 * name size stays the same though - the last part is all
433 * zeros due to the memset in ocfs2_lock_res_init_once()
434 */
435 len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START,
436 "%c%016llx",
437 ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY),
438 (long long)parent);
439
440 BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1));
441
442 memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be,
443 sizeof(__be64));
444
445 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
446 OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops,
447 dl);
325} 448}
326 449
327static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res, 450static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res,
@@ -330,8 +453,9 @@ static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res,
330 /* Superblock lockres doesn't come from a slab so we call init 453 /* Superblock lockres doesn't come from a slab so we call init
331 * once on it manually. */ 454 * once on it manually. */
332 ocfs2_lock_res_init_once(res); 455 ocfs2_lock_res_init_once(res);
456 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO,
457 0, res->l_name);
333 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER, 458 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER,
334 OCFS2_SUPER_BLOCK_BLKNO, 0,
335 &ocfs2_super_lops, osb); 459 &ocfs2_super_lops, osb);
336} 460}
337 461
@@ -341,7 +465,8 @@ static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res,
341 /* Rename lockres doesn't come from a slab so we call init 465 /* Rename lockres doesn't come from a slab so we call init
342 * once on it manually. */ 466 * once on it manually. */
343 ocfs2_lock_res_init_once(res); 467 ocfs2_lock_res_init_once(res);
344 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME, 0, 0, 468 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name);
469 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME,
345 &ocfs2_rename_lops, osb); 470 &ocfs2_rename_lops, osb);
346} 471}
347 472
@@ -495,7 +620,8 @@ static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lo
495 * information is already up to data. Convert from NL to 620 * information is already up to data. Convert from NL to
496 * *anything* however should mark ourselves as needing an 621 * *anything* however should mark ourselves as needing an
497 * update */ 622 * update */
498 if (lockres->l_level == LKM_NLMODE) 623 if (lockres->l_level == LKM_NLMODE &&
624 lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
499 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 625 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
500 626
501 lockres->l_level = lockres->l_requested; 627 lockres->l_level = lockres->l_requested;
@@ -512,7 +638,8 @@ static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *loc
512 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 638 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
513 639
514 if (lockres->l_requested > LKM_NLMODE && 640 if (lockres->l_requested > LKM_NLMODE &&
515 !(lockres->l_flags & OCFS2_LOCK_LOCAL)) 641 !(lockres->l_flags & OCFS2_LOCK_LOCAL) &&
642 lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
516 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 643 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
517 644
518 lockres->l_level = lockres->l_requested; 645 lockres->l_level = lockres->l_requested;
@@ -522,68 +649,6 @@ static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *loc
522 mlog_exit_void(); 649 mlog_exit_void();
523} 650}
524 651
525static void ocfs2_inode_ast_func(void *opaque)
526{
527 struct ocfs2_lock_res *lockres = opaque;
528 struct inode *inode;
529 struct dlm_lockstatus *lksb;
530 unsigned long flags;
531
532 mlog_entry_void();
533
534 inode = ocfs2_lock_res_inode(lockres);
535
536 mlog(0, "AST fired for inode %llu, l_action = %u, type = %s\n",
537 (unsigned long long)OCFS2_I(inode)->ip_blkno, lockres->l_action,
538 ocfs2_lock_type_string(lockres->l_type));
539
540 BUG_ON(!ocfs2_is_inode_lock(lockres));
541
542 spin_lock_irqsave(&lockres->l_lock, flags);
543
544 lksb = &(lockres->l_lksb);
545 if (lksb->status != DLM_NORMAL) {
546 mlog(ML_ERROR, "ocfs2_inode_ast_func: lksb status value of %u "
547 "on inode %llu\n", lksb->status,
548 (unsigned long long)OCFS2_I(inode)->ip_blkno);
549 spin_unlock_irqrestore(&lockres->l_lock, flags);
550 mlog_exit_void();
551 return;
552 }
553
554 switch(lockres->l_action) {
555 case OCFS2_AST_ATTACH:
556 ocfs2_generic_handle_attach_action(lockres);
557 lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL);
558 break;
559 case OCFS2_AST_CONVERT:
560 ocfs2_generic_handle_convert_action(lockres);
561 break;
562 case OCFS2_AST_DOWNCONVERT:
563 ocfs2_generic_handle_downconvert_action(lockres);
564 break;
565 default:
566 mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u "
567 "lockres flags = 0x%lx, unlock action: %u\n",
568 lockres->l_name, lockres->l_action, lockres->l_flags,
569 lockres->l_unlock_action);
570
571 BUG();
572 }
573
574 /* data and rw locking ignores refresh flag for now. */
575 if (lockres->l_type != OCFS2_LOCK_TYPE_META)
576 lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
577
578 /* set it to something invalid so if we get called again we
579 * can catch it. */
580 lockres->l_action = OCFS2_AST_INVALID;
581 spin_unlock_irqrestore(&lockres->l_lock, flags);
582 wake_up(&lockres->l_event);
583
584 mlog_exit_void();
585}
586
587static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, 652static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
588 int level) 653 int level)
589{ 654{
@@ -610,54 +675,33 @@ static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
610 return needs_downconvert; 675 return needs_downconvert;
611} 676}
612 677
613static void ocfs2_generic_bast_func(struct ocfs2_super *osb, 678static void ocfs2_blocking_ast(void *opaque, int level)
614 struct ocfs2_lock_res *lockres,
615 int level)
616{ 679{
680 struct ocfs2_lock_res *lockres = opaque;
681 struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
617 int needs_downconvert; 682 int needs_downconvert;
618 unsigned long flags; 683 unsigned long flags;
619 684
620 mlog_entry_void();
621
622 BUG_ON(level <= LKM_NLMODE); 685 BUG_ON(level <= LKM_NLMODE);
623 686
687 mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n",
688 lockres->l_name, level, lockres->l_level,
689 ocfs2_lock_type_string(lockres->l_type));
690
624 spin_lock_irqsave(&lockres->l_lock, flags); 691 spin_lock_irqsave(&lockres->l_lock, flags);
625 needs_downconvert = ocfs2_generic_handle_bast(lockres, level); 692 needs_downconvert = ocfs2_generic_handle_bast(lockres, level);
626 if (needs_downconvert) 693 if (needs_downconvert)
627 ocfs2_schedule_blocked_lock(osb, lockres); 694 ocfs2_schedule_blocked_lock(osb, lockres);
628 spin_unlock_irqrestore(&lockres->l_lock, flags); 695 spin_unlock_irqrestore(&lockres->l_lock, flags);
629 696
630 ocfs2_kick_vote_thread(osb);
631
632 wake_up(&lockres->l_event); 697 wake_up(&lockres->l_event);
633 mlog_exit_void();
634}
635
636static void ocfs2_inode_bast_func(void *opaque, int level)
637{
638 struct ocfs2_lock_res *lockres = opaque;
639 struct inode *inode;
640 struct ocfs2_super *osb;
641 698
642 mlog_entry_void(); 699 ocfs2_kick_vote_thread(osb);
643
644 BUG_ON(!ocfs2_is_inode_lock(lockres));
645
646 inode = ocfs2_lock_res_inode(lockres);
647 osb = OCFS2_SB(inode->i_sb);
648
649 mlog(0, "BAST fired for inode %llu, blocking %d, level %d type %s\n",
650 (unsigned long long)OCFS2_I(inode)->ip_blkno, level,
651 lockres->l_level, ocfs2_lock_type_string(lockres->l_type));
652
653 ocfs2_generic_bast_func(osb, lockres, level);
654
655 mlog_exit_void();
656} 700}
657 701
658static void ocfs2_generic_ast_func(struct ocfs2_lock_res *lockres, 702static void ocfs2_locking_ast(void *opaque)
659 int ignore_refresh)
660{ 703{
704 struct ocfs2_lock_res *lockres = opaque;
661 struct dlm_lockstatus *lksb = &lockres->l_lksb; 705 struct dlm_lockstatus *lksb = &lockres->l_lksb;
662 unsigned long flags; 706 unsigned long flags;
663 707
@@ -673,6 +717,7 @@ static void ocfs2_generic_ast_func(struct ocfs2_lock_res *lockres,
673 switch(lockres->l_action) { 717 switch(lockres->l_action) {
674 case OCFS2_AST_ATTACH: 718 case OCFS2_AST_ATTACH:
675 ocfs2_generic_handle_attach_action(lockres); 719 ocfs2_generic_handle_attach_action(lockres);
720 lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL);
676 break; 721 break;
677 case OCFS2_AST_CONVERT: 722 case OCFS2_AST_CONVERT:
678 ocfs2_generic_handle_convert_action(lockres); 723 ocfs2_generic_handle_convert_action(lockres);
@@ -681,80 +726,19 @@ static void ocfs2_generic_ast_func(struct ocfs2_lock_res *lockres,
681 ocfs2_generic_handle_downconvert_action(lockres); 726 ocfs2_generic_handle_downconvert_action(lockres);
682 break; 727 break;
683 default: 728 default:
729 mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u "
730 "lockres flags = 0x%lx, unlock action: %u\n",
731 lockres->l_name, lockres->l_action, lockres->l_flags,
732 lockres->l_unlock_action);
684 BUG(); 733 BUG();
685 } 734 }
686 735
687 if (ignore_refresh)
688 lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
689
690 /* set it to something invalid so if we get called again we 736 /* set it to something invalid so if we get called again we
691 * can catch it. */ 737 * can catch it. */
692 lockres->l_action = OCFS2_AST_INVALID; 738 lockres->l_action = OCFS2_AST_INVALID;
693 spin_unlock_irqrestore(&lockres->l_lock, flags);
694 739
695 wake_up(&lockres->l_event); 740 wake_up(&lockres->l_event);
696} 741 spin_unlock_irqrestore(&lockres->l_lock, flags);
697
698static void ocfs2_super_ast_func(void *opaque)
699{
700 struct ocfs2_lock_res *lockres = opaque;
701
702 mlog_entry_void();
703 mlog(0, "Superblock AST fired\n");
704
705 BUG_ON(!ocfs2_is_super_lock(lockres));
706 ocfs2_generic_ast_func(lockres, 0);
707
708 mlog_exit_void();
709}
710
711static void ocfs2_super_bast_func(void *opaque,
712 int level)
713{
714 struct ocfs2_lock_res *lockres = opaque;
715 struct ocfs2_super *osb;
716
717 mlog_entry_void();
718 mlog(0, "Superblock BAST fired\n");
719
720 BUG_ON(!ocfs2_is_super_lock(lockres));
721 osb = ocfs2_lock_res_super(lockres);
722 ocfs2_generic_bast_func(osb, lockres, level);
723
724 mlog_exit_void();
725}
726
727static void ocfs2_rename_ast_func(void *opaque)
728{
729 struct ocfs2_lock_res *lockres = opaque;
730
731 mlog_entry_void();
732
733 mlog(0, "Rename AST fired\n");
734
735 BUG_ON(!ocfs2_is_rename_lock(lockres));
736
737 ocfs2_generic_ast_func(lockres, 1);
738
739 mlog_exit_void();
740}
741
742static void ocfs2_rename_bast_func(void *opaque,
743 int level)
744{
745 struct ocfs2_lock_res *lockres = opaque;
746 struct ocfs2_super *osb;
747
748 mlog_entry_void();
749
750 mlog(0, "Rename BAST fired\n");
751
752 BUG_ON(!ocfs2_is_rename_lock(lockres));
753
754 osb = ocfs2_lock_res_super(lockres);
755 ocfs2_generic_bast_func(osb, lockres, level);
756
757 mlog_exit_void();
758} 742}
759 743
760static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, 744static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
@@ -810,9 +794,10 @@ static int ocfs2_lock_create(struct ocfs2_super *osb,
810 &lockres->l_lksb, 794 &lockres->l_lksb,
811 dlm_flags, 795 dlm_flags,
812 lockres->l_name, 796 lockres->l_name,
813 lockres->l_ops->ast, 797 OCFS2_LOCK_ID_MAX_LEN - 1,
798 ocfs2_locking_ast,
814 lockres, 799 lockres,
815 lockres->l_ops->bast); 800 ocfs2_blocking_ast);
816 if (status != DLM_NORMAL) { 801 if (status != DLM_NORMAL) {
817 ocfs2_log_dlm_error("dlmlock", status, lockres); 802 ocfs2_log_dlm_error("dlmlock", status, lockres);
818 ret = -EINVAL; 803 ret = -EINVAL;
@@ -930,6 +915,9 @@ static int ocfs2_cluster_lock(struct ocfs2_super *osb,
930 915
931 ocfs2_init_mask_waiter(&mw); 916 ocfs2_init_mask_waiter(&mw);
932 917
918 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
919 lkm_flags |= LKM_VALBLK;
920
933again: 921again:
934 wait = 0; 922 wait = 0;
935 923
@@ -997,11 +985,12 @@ again:
997 status = dlmlock(osb->dlm, 985 status = dlmlock(osb->dlm,
998 level, 986 level,
999 &lockres->l_lksb, 987 &lockres->l_lksb,
1000 lkm_flags|LKM_CONVERT|LKM_VALBLK, 988 lkm_flags|LKM_CONVERT,
1001 lockres->l_name, 989 lockres->l_name,
1002 lockres->l_ops->ast, 990 OCFS2_LOCK_ID_MAX_LEN - 1,
991 ocfs2_locking_ast,
1003 lockres, 992 lockres,
1004 lockres->l_ops->bast); 993 ocfs2_blocking_ast);
1005 if (status != DLM_NORMAL) { 994 if (status != DLM_NORMAL) {
1006 if ((lkm_flags & LKM_NOQUEUE) && 995 if ((lkm_flags & LKM_NOQUEUE) &&
1007 (status == DLM_NOTQUEUED)) 996 (status == DLM_NOTQUEUED))
@@ -1074,18 +1063,21 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
1074 mlog_exit_void(); 1063 mlog_exit_void();
1075} 1064}
1076 1065
1077static int ocfs2_create_new_inode_lock(struct inode *inode, 1066int ocfs2_create_new_lock(struct ocfs2_super *osb,
1078 struct ocfs2_lock_res *lockres) 1067 struct ocfs2_lock_res *lockres,
1068 int ex,
1069 int local)
1079{ 1070{
1080 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1071 int level = ex ? LKM_EXMODE : LKM_PRMODE;
1081 unsigned long flags; 1072 unsigned long flags;
1073 int lkm_flags = local ? LKM_LOCAL : 0;
1082 1074
1083 spin_lock_irqsave(&lockres->l_lock, flags); 1075 spin_lock_irqsave(&lockres->l_lock, flags);
1084 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 1076 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
1085 lockres_or_flags(lockres, OCFS2_LOCK_LOCAL); 1077 lockres_or_flags(lockres, OCFS2_LOCK_LOCAL);
1086 spin_unlock_irqrestore(&lockres->l_lock, flags); 1078 spin_unlock_irqrestore(&lockres->l_lock, flags);
1087 1079
1088 return ocfs2_lock_create(osb, lockres, LKM_EXMODE, LKM_LOCAL); 1080 return ocfs2_lock_create(osb, lockres, level, lkm_flags);
1089} 1081}
1090 1082
1091/* Grants us an EX lock on the data and metadata resources, skipping 1083/* Grants us an EX lock on the data and metadata resources, skipping
@@ -1097,6 +1089,7 @@ static int ocfs2_create_new_inode_lock(struct inode *inode,
1097int ocfs2_create_new_inode_locks(struct inode *inode) 1089int ocfs2_create_new_inode_locks(struct inode *inode)
1098{ 1090{
1099 int ret; 1091 int ret;
1092 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1100 1093
1101 BUG_ON(!inode); 1094 BUG_ON(!inode);
1102 BUG_ON(!ocfs2_inode_is_new(inode)); 1095 BUG_ON(!ocfs2_inode_is_new(inode));
@@ -1113,22 +1106,23 @@ int ocfs2_create_new_inode_locks(struct inode *inode)
1113 * on a resource which has an invalid one -- we'll set it 1106 * on a resource which has an invalid one -- we'll set it
1114 * valid when we release the EX. */ 1107 * valid when we release the EX. */
1115 1108
1116 ret = ocfs2_create_new_inode_lock(inode, 1109 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1);
1117 &OCFS2_I(inode)->ip_rw_lockres);
1118 if (ret) { 1110 if (ret) {
1119 mlog_errno(ret); 1111 mlog_errno(ret);
1120 goto bail; 1112 goto bail;
1121 } 1113 }
1122 1114
1123 ret = ocfs2_create_new_inode_lock(inode, 1115 /*
1124 &OCFS2_I(inode)->ip_meta_lockres); 1116 * We don't want to use LKM_LOCAL on a meta data lock as they
1117 * don't use a generation in their lock names.
1118 */
1119 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_meta_lockres, 1, 0);
1125 if (ret) { 1120 if (ret) {
1126 mlog_errno(ret); 1121 mlog_errno(ret);
1127 goto bail; 1122 goto bail;
1128 } 1123 }
1129 1124
1130 ret = ocfs2_create_new_inode_lock(inode, 1125 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_data_lockres, 1, 1);
1131 &OCFS2_I(inode)->ip_data_lockres);
1132 if (ret) { 1126 if (ret) {
1133 mlog_errno(ret); 1127 mlog_errno(ret);
1134 goto bail; 1128 goto bail;
@@ -1317,7 +1311,17 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
1317 1311
1318 lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; 1312 lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;
1319 1313
1320 lvb->lvb_version = cpu_to_be32(OCFS2_LVB_VERSION); 1314 /*
1315 * Invalidate the LVB of a deleted inode - this way other
1316 * nodes are forced to go to disk and discover the new inode
1317 * status.
1318 */
1319 if (oi->ip_flags & OCFS2_INODE_DELETED) {
1320 lvb->lvb_version = 0;
1321 goto out;
1322 }
1323
1324 lvb->lvb_version = OCFS2_LVB_VERSION;
1321 lvb->lvb_isize = cpu_to_be64(i_size_read(inode)); 1325 lvb->lvb_isize = cpu_to_be64(i_size_read(inode));
1322 lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); 1326 lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
1323 lvb->lvb_iuid = cpu_to_be32(inode->i_uid); 1327 lvb->lvb_iuid = cpu_to_be32(inode->i_uid);
@@ -1331,7 +1335,9 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
1331 lvb->lvb_imtime_packed = 1335 lvb->lvb_imtime_packed =
1332 cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); 1336 cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
1333 lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); 1337 lvb->lvb_iattr = cpu_to_be32(oi->ip_attr);
1338 lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
1334 1339
1340out:
1335 mlog_meta_lvb(0, lockres); 1341 mlog_meta_lvb(0, lockres);
1336 1342
1337 mlog_exit_void(); 1343 mlog_exit_void();
@@ -1386,11 +1392,13 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
1386 mlog_exit_void(); 1392 mlog_exit_void();
1387} 1393}
1388 1394
1389static inline int ocfs2_meta_lvb_is_trustable(struct ocfs2_lock_res *lockres) 1395static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
1396 struct ocfs2_lock_res *lockres)
1390{ 1397{
1391 struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; 1398 struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;
1392 1399
1393 if (be32_to_cpu(lvb->lvb_version) == OCFS2_LVB_VERSION) 1400 if (lvb->lvb_version == OCFS2_LVB_VERSION
1401 && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)
1394 return 1; 1402 return 1;
1395 return 0; 1403 return 0;
1396} 1404}
@@ -1487,7 +1495,7 @@ static int ocfs2_meta_lock_update(struct inode *inode,
1487 * map (directories, bitmap files, etc) */ 1495 * map (directories, bitmap files, etc) */
1488 ocfs2_extent_map_trunc(inode, 0); 1496 ocfs2_extent_map_trunc(inode, 0);
1489 1497
1490 if (ocfs2_meta_lvb_is_trustable(lockres)) { 1498 if (ocfs2_meta_lvb_is_trustable(inode, lockres)) {
1491 mlog(0, "Trusting LVB on inode %llu\n", 1499 mlog(0, "Trusting LVB on inode %llu\n",
1492 (unsigned long long)oi->ip_blkno); 1500 (unsigned long long)oi->ip_blkno);
1493 ocfs2_refresh_inode_from_lvb(inode); 1501 ocfs2_refresh_inode_from_lvb(inode);
@@ -1628,6 +1636,18 @@ int ocfs2_meta_lock_full(struct inode *inode,
1628 wait_event(osb->recovery_event, 1636 wait_event(osb->recovery_event,
1629 ocfs2_node_map_is_empty(osb, &osb->recovery_map)); 1637 ocfs2_node_map_is_empty(osb, &osb->recovery_map));
1630 1638
1639 /*
1640 * We only see this flag if we're being called from
1641 * ocfs2_read_locked_inode(). It means we're locking an inode
1642 * which hasn't been populated yet, so clear the refresh flag
1643 * and let the caller handle it.
1644 */
1645 if (inode->i_state & I_NEW) {
1646 status = 0;
1647 ocfs2_complete_lock_res_refresh(lockres, 0);
1648 goto bail;
1649 }
1650
1631 /* This is fun. The caller may want a bh back, or it may 1651 /* This is fun. The caller may want a bh back, or it may
1632 * not. ocfs2_meta_lock_update definitely wants one in, but 1652 * not. ocfs2_meta_lock_update definitely wants one in, but
1633 * may or may not read one, depending on what's in the 1653 * may or may not read one, depending on what's in the
@@ -1807,6 +1827,34 @@ void ocfs2_rename_unlock(struct ocfs2_super *osb)
1807 ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE); 1827 ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE);
1808} 1828}
1809 1829
1830int ocfs2_dentry_lock(struct dentry *dentry, int ex)
1831{
1832 int ret;
1833 int level = ex ? LKM_EXMODE : LKM_PRMODE;
1834 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
1835 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
1836
1837 BUG_ON(!dl);
1838
1839 if (ocfs2_is_hard_readonly(osb))
1840 return -EROFS;
1841
1842 ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0);
1843 if (ret < 0)
1844 mlog_errno(ret);
1845
1846 return ret;
1847}
1848
1849void ocfs2_dentry_unlock(struct dentry *dentry, int ex)
1850{
1851 int level = ex ? LKM_EXMODE : LKM_PRMODE;
1852 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
1853 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
1854
1855 ocfs2_cluster_unlock(osb, &dl->dl_lockres, level);
1856}
1857
1810/* Reference counting of the dlm debug structure. We want this because 1858/* Reference counting of the dlm debug structure. We want this because
1811 * open references on the debug inodes can live on after a mount, so 1859 * open references on the debug inodes can live on after a mount, so
1812 * we can't rely on the ocfs2_super to always exist. */ 1860 * we can't rely on the ocfs2_super to always exist. */
@@ -1937,9 +1985,16 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
1937 if (!lockres) 1985 if (!lockres)
1938 return -EINVAL; 1986 return -EINVAL;
1939 1987
1940 seq_printf(m, "0x%x\t" 1988 seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION);
1941 "%.*s\t" 1989
1942 "%d\t" 1990 if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY)
1991 seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1,
1992 lockres->l_name,
1993 (unsigned int)ocfs2_get_dentry_lock_ino(lockres));
1994 else
1995 seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name);
1996
1997 seq_printf(m, "%d\t"
1943 "0x%lx\t" 1998 "0x%lx\t"
1944 "0x%x\t" 1999 "0x%x\t"
1945 "0x%x\t" 2000 "0x%x\t"
@@ -1947,8 +2002,6 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
1947 "%u\t" 2002 "%u\t"
1948 "%d\t" 2003 "%d\t"
1949 "%d\t", 2004 "%d\t",
1950 OCFS2_DLM_DEBUG_STR_VERSION,
1951 OCFS2_LOCK_ID_MAX_LEN, lockres->l_name,
1952 lockres->l_level, 2005 lockres->l_level,
1953 lockres->l_flags, 2006 lockres->l_flags,
1954 lockres->l_action, 2007 lockres->l_action,
@@ -1999,7 +2052,7 @@ static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file)
1999 mlog_errno(ret); 2052 mlog_errno(ret);
2000 goto out; 2053 goto out;
2001 } 2054 }
2002 osb = (struct ocfs2_super *) inode->u.generic_ip; 2055 osb = inode->i_private;
2003 ocfs2_get_dlm_debug(osb->osb_dlm_debug); 2056 ocfs2_get_dlm_debug(osb->osb_dlm_debug);
2004 priv->p_dlm_debug = osb->osb_dlm_debug; 2057 priv->p_dlm_debug = osb->osb_dlm_debug;
2005 INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list); 2058 INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list);
@@ -2138,7 +2191,7 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb)
2138 mlog_exit_void(); 2191 mlog_exit_void();
2139} 2192}
2140 2193
2141static void ocfs2_unlock_ast_func(void *opaque, enum dlm_status status) 2194static void ocfs2_unlock_ast(void *opaque, enum dlm_status status)
2142{ 2195{
2143 struct ocfs2_lock_res *lockres = opaque; 2196 struct ocfs2_lock_res *lockres = opaque;
2144 unsigned long flags; 2197 unsigned long flags;
@@ -2194,24 +2247,20 @@ complete_unlock:
2194 mlog_exit_void(); 2247 mlog_exit_void();
2195} 2248}
2196 2249
2197typedef void (ocfs2_pre_drop_cb_t)(struct ocfs2_lock_res *, void *);
2198
2199struct drop_lock_cb {
2200 ocfs2_pre_drop_cb_t *drop_func;
2201 void *drop_data;
2202};
2203
2204static int ocfs2_drop_lock(struct ocfs2_super *osb, 2250static int ocfs2_drop_lock(struct ocfs2_super *osb,
2205 struct ocfs2_lock_res *lockres, 2251 struct ocfs2_lock_res *lockres)
2206 struct drop_lock_cb *dcb)
2207{ 2252{
2208 enum dlm_status status; 2253 enum dlm_status status;
2209 unsigned long flags; 2254 unsigned long flags;
2255 int lkm_flags = 0;
2210 2256
2211 /* We didn't get anywhere near actually using this lockres. */ 2257 /* We didn't get anywhere near actually using this lockres. */
2212 if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) 2258 if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED))
2213 goto out; 2259 goto out;
2214 2260
2261 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
2262 lkm_flags |= LKM_VALBLK;
2263
2215 spin_lock_irqsave(&lockres->l_lock, flags); 2264 spin_lock_irqsave(&lockres->l_lock, flags);
2216 2265
2217 mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING), 2266 mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING),
@@ -2234,8 +2283,12 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb,
2234 spin_lock_irqsave(&lockres->l_lock, flags); 2283 spin_lock_irqsave(&lockres->l_lock, flags);
2235 } 2284 }
2236 2285
2237 if (dcb) 2286 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
2238 dcb->drop_func(lockres, dcb->drop_data); 2287 if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&
2288 lockres->l_level == LKM_EXMODE &&
2289 !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
2290 lockres->l_ops->set_lvb(lockres);
2291 }
2239 2292
2240 if (lockres->l_flags & OCFS2_LOCK_BUSY) 2293 if (lockres->l_flags & OCFS2_LOCK_BUSY)
2241 mlog(ML_ERROR, "destroying busy lock: \"%s\"\n", 2294 mlog(ML_ERROR, "destroying busy lock: \"%s\"\n",
@@ -2261,8 +2314,8 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb,
2261 2314
2262 mlog(0, "lock %s\n", lockres->l_name); 2315 mlog(0, "lock %s\n", lockres->l_name);
2263 2316
2264 status = dlmunlock(osb->dlm, &lockres->l_lksb, LKM_VALBLK, 2317 status = dlmunlock(osb->dlm, &lockres->l_lksb, lkm_flags,
2265 lockres->l_ops->unlock_ast, lockres); 2318 ocfs2_unlock_ast, lockres);
2266 if (status != DLM_NORMAL) { 2319 if (status != DLM_NORMAL) {
2267 ocfs2_log_dlm_error("dlmunlock", status, lockres); 2320 ocfs2_log_dlm_error("dlmunlock", status, lockres);
2268 mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); 2321 mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags);
@@ -2309,43 +2362,26 @@ void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres)
2309 spin_unlock_irqrestore(&lockres->l_lock, flags); 2362 spin_unlock_irqrestore(&lockres->l_lock, flags);
2310} 2363}
2311 2364
2312static void ocfs2_drop_osb_locks(struct ocfs2_super *osb) 2365void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
2366 struct ocfs2_lock_res *lockres)
2313{ 2367{
2314 int status; 2368 int ret;
2315
2316 mlog_entry_void();
2317
2318 ocfs2_mark_lockres_freeing(&osb->osb_super_lockres);
2319
2320 status = ocfs2_drop_lock(osb, &osb->osb_super_lockres, NULL);
2321 if (status < 0)
2322 mlog_errno(status);
2323
2324 ocfs2_mark_lockres_freeing(&osb->osb_rename_lockres);
2325
2326 status = ocfs2_drop_lock(osb, &osb->osb_rename_lockres, NULL);
2327 if (status < 0)
2328 mlog_errno(status);
2329 2369
2330 mlog_exit(status); 2370 ocfs2_mark_lockres_freeing(lockres);
2371 ret = ocfs2_drop_lock(osb, lockres);
2372 if (ret)
2373 mlog_errno(ret);
2331} 2374}
2332 2375
2333static void ocfs2_meta_pre_drop(struct ocfs2_lock_res *lockres, void *data) 2376static void ocfs2_drop_osb_locks(struct ocfs2_super *osb)
2334{ 2377{
2335 struct inode *inode = data; 2378 ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres);
2336 2379 ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres);
2337 /* the metadata lock requires a bit more work as we have an
2338 * LVB to worry about. */
2339 if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&
2340 lockres->l_level == LKM_EXMODE &&
2341 !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
2342 __ocfs2_stuff_meta_lvb(inode);
2343} 2380}
2344 2381
2345int ocfs2_drop_inode_locks(struct inode *inode) 2382int ocfs2_drop_inode_locks(struct inode *inode)
2346{ 2383{
2347 int status, err; 2384 int status, err;
2348 struct drop_lock_cb meta_dcb = { ocfs2_meta_pre_drop, inode, };
2349 2385
2350 mlog_entry_void(); 2386 mlog_entry_void();
2351 2387
@@ -2353,24 +2389,21 @@ int ocfs2_drop_inode_locks(struct inode *inode)
2353 * ocfs2_clear_inode has done it for us. */ 2389 * ocfs2_clear_inode has done it for us. */
2354 2390
2355 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 2391 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
2356 &OCFS2_I(inode)->ip_data_lockres, 2392 &OCFS2_I(inode)->ip_data_lockres);
2357 NULL);
2358 if (err < 0) 2393 if (err < 0)
2359 mlog_errno(err); 2394 mlog_errno(err);
2360 2395
2361 status = err; 2396 status = err;
2362 2397
2363 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 2398 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
2364 &OCFS2_I(inode)->ip_meta_lockres, 2399 &OCFS2_I(inode)->ip_meta_lockres);
2365 &meta_dcb);
2366 if (err < 0) 2400 if (err < 0)
2367 mlog_errno(err); 2401 mlog_errno(err);
2368 if (err < 0 && !status) 2402 if (err < 0 && !status)
2369 status = err; 2403 status = err;
2370 2404
2371 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 2405 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
2372 &OCFS2_I(inode)->ip_rw_lockres, 2406 &OCFS2_I(inode)->ip_rw_lockres);
2373 NULL);
2374 if (err < 0) 2407 if (err < 0)
2375 mlog_errno(err); 2408 mlog_errno(err);
2376 if (err < 0 && !status) 2409 if (err < 0 && !status)
@@ -2419,9 +2452,10 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
2419 &lockres->l_lksb, 2452 &lockres->l_lksb,
2420 dlm_flags, 2453 dlm_flags,
2421 lockres->l_name, 2454 lockres->l_name,
2422 lockres->l_ops->ast, 2455 OCFS2_LOCK_ID_MAX_LEN - 1,
2456 ocfs2_locking_ast,
2423 lockres, 2457 lockres,
2424 lockres->l_ops->bast); 2458 ocfs2_blocking_ast);
2425 if (status != DLM_NORMAL) { 2459 if (status != DLM_NORMAL) {
2426 ocfs2_log_dlm_error("dlmlock", status, lockres); 2460 ocfs2_log_dlm_error("dlmlock", status, lockres);
2427 ret = -EINVAL; 2461 ret = -EINVAL;
@@ -2480,7 +2514,7 @@ static int ocfs2_cancel_convert(struct ocfs2_super *osb,
2480 status = dlmunlock(osb->dlm, 2514 status = dlmunlock(osb->dlm,
2481 &lockres->l_lksb, 2515 &lockres->l_lksb,
2482 LKM_CANCEL, 2516 LKM_CANCEL,
2483 lockres->l_ops->unlock_ast, 2517 ocfs2_unlock_ast,
2484 lockres); 2518 lockres);
2485 if (status != DLM_NORMAL) { 2519 if (status != DLM_NORMAL) {
2486 ocfs2_log_dlm_error("dlmunlock", status, lockres); 2520 ocfs2_log_dlm_error("dlmunlock", status, lockres);
@@ -2494,115 +2528,15 @@ static int ocfs2_cancel_convert(struct ocfs2_super *osb,
2494 return ret; 2528 return ret;
2495} 2529}
2496 2530
2497static inline int ocfs2_can_downconvert_meta_lock(struct inode *inode, 2531static int ocfs2_unblock_lock(struct ocfs2_super *osb,
2498 struct ocfs2_lock_res *lockres, 2532 struct ocfs2_lock_res *lockres,
2499 int new_level) 2533 struct ocfs2_unblock_ctl *ctl)
2500{
2501 int ret;
2502
2503 mlog_entry_void();
2504
2505 BUG_ON(new_level != LKM_NLMODE && new_level != LKM_PRMODE);
2506
2507 if (lockres->l_flags & OCFS2_LOCK_REFRESHING) {
2508 ret = 0;
2509 mlog(0, "lockres %s currently being refreshed -- backing "
2510 "off!\n", lockres->l_name);
2511 } else if (new_level == LKM_PRMODE)
2512 ret = !lockres->l_ex_holders &&
2513 ocfs2_inode_fully_checkpointed(inode);
2514 else /* Must be NLMODE we're converting to. */
2515 ret = !lockres->l_ro_holders && !lockres->l_ex_holders &&
2516 ocfs2_inode_fully_checkpointed(inode);
2517
2518 mlog_exit(ret);
2519 return ret;
2520}
2521
2522static int ocfs2_do_unblock_meta(struct inode *inode,
2523 int *requeue)
2524{
2525 int new_level;
2526 int set_lvb = 0;
2527 int ret = 0;
2528 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_meta_lockres;
2529 unsigned long flags;
2530
2531 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2532
2533 mlog_entry_void();
2534
2535 spin_lock_irqsave(&lockres->l_lock, flags);
2536
2537 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
2538
2539 mlog(0, "l_level=%d, l_blocking=%d\n", lockres->l_level,
2540 lockres->l_blocking);
2541
2542 BUG_ON(lockres->l_level != LKM_EXMODE &&
2543 lockres->l_level != LKM_PRMODE);
2544
2545 if (lockres->l_flags & OCFS2_LOCK_BUSY) {
2546 *requeue = 1;
2547 ret = ocfs2_prepare_cancel_convert(osb, lockres);
2548 spin_unlock_irqrestore(&lockres->l_lock, flags);
2549 if (ret) {
2550 ret = ocfs2_cancel_convert(osb, lockres);
2551 if (ret < 0)
2552 mlog_errno(ret);
2553 }
2554 goto leave;
2555 }
2556
2557 new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking);
2558
2559 mlog(0, "l_level=%d, l_blocking=%d, new_level=%d\n",
2560 lockres->l_level, lockres->l_blocking, new_level);
2561
2562 if (ocfs2_can_downconvert_meta_lock(inode, lockres, new_level)) {
2563 if (lockres->l_level == LKM_EXMODE)
2564 set_lvb = 1;
2565
2566 /* If the lock hasn't been refreshed yet (rare), then
2567 * our memory inode values are old and we skip
2568 * stuffing the lvb. There's no need to actually clear
2569 * out the lvb here as it's value is still valid. */
2570 if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) {
2571 if (set_lvb)
2572 __ocfs2_stuff_meta_lvb(inode);
2573 } else
2574 mlog(0, "lockres %s: downconverting stale lock!\n",
2575 lockres->l_name);
2576
2577 mlog(0, "calling ocfs2_downconvert_lock with l_level=%d, "
2578 "l_blocking=%d, new_level=%d\n",
2579 lockres->l_level, lockres->l_blocking, new_level);
2580
2581 ocfs2_prepare_downconvert(lockres, new_level);
2582 spin_unlock_irqrestore(&lockres->l_lock, flags);
2583 ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb);
2584 goto leave;
2585 }
2586 if (!ocfs2_inode_fully_checkpointed(inode))
2587 ocfs2_start_checkpoint(osb);
2588
2589 *requeue = 1;
2590 spin_unlock_irqrestore(&lockres->l_lock, flags);
2591 ret = 0;
2592leave:
2593 mlog_exit(ret);
2594 return ret;
2595}
2596
2597static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb,
2598 struct ocfs2_lock_res *lockres,
2599 int *requeue,
2600 ocfs2_convert_worker_t *worker)
2601{ 2534{
2602 unsigned long flags; 2535 unsigned long flags;
2603 int blocking; 2536 int blocking;
2604 int new_level; 2537 int new_level;
2605 int ret = 0; 2538 int ret = 0;
2539 int set_lvb = 0;
2606 2540
2607 mlog_entry_void(); 2541 mlog_entry_void();
2608 2542
@@ -2612,7 +2546,7 @@ static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb,
2612 2546
2613recheck: 2547recheck:
2614 if (lockres->l_flags & OCFS2_LOCK_BUSY) { 2548 if (lockres->l_flags & OCFS2_LOCK_BUSY) {
2615 *requeue = 1; 2549 ctl->requeue = 1;
2616 ret = ocfs2_prepare_cancel_convert(osb, lockres); 2550 ret = ocfs2_prepare_cancel_convert(osb, lockres);
2617 spin_unlock_irqrestore(&lockres->l_lock, flags); 2551 spin_unlock_irqrestore(&lockres->l_lock, flags);
2618 if (ret) { 2552 if (ret) {
@@ -2626,27 +2560,33 @@ recheck:
2626 /* if we're blocking an exclusive and we have *any* holders, 2560 /* if we're blocking an exclusive and we have *any* holders,
2627 * then requeue. */ 2561 * then requeue. */
2628 if ((lockres->l_blocking == LKM_EXMODE) 2562 if ((lockres->l_blocking == LKM_EXMODE)
2629 && (lockres->l_ex_holders || lockres->l_ro_holders)) { 2563 && (lockres->l_ex_holders || lockres->l_ro_holders))
2630 spin_unlock_irqrestore(&lockres->l_lock, flags); 2564 goto leave_requeue;
2631 *requeue = 1;
2632 ret = 0;
2633 goto leave;
2634 }
2635 2565
2636 /* If it's a PR we're blocking, then only 2566 /* If it's a PR we're blocking, then only
2637 * requeue if we've got any EX holders */ 2567 * requeue if we've got any EX holders */
2638 if (lockres->l_blocking == LKM_PRMODE && 2568 if (lockres->l_blocking == LKM_PRMODE &&
2639 lockres->l_ex_holders) { 2569 lockres->l_ex_holders)
2640 spin_unlock_irqrestore(&lockres->l_lock, flags); 2570 goto leave_requeue;
2641 *requeue = 1; 2571
2642 ret = 0; 2572 /*
2643 goto leave; 2573 * Can we get a lock in this state if the holder counts are
2644 } 2574 * zero? The meta data unblock code used to check this.
2575 */
2576 if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
2577 && (lockres->l_flags & OCFS2_LOCK_REFRESHING))
2578 goto leave_requeue;
2579
2580 new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking);
2581
2582 if (lockres->l_ops->check_downconvert
2583 && !lockres->l_ops->check_downconvert(lockres, new_level))
2584 goto leave_requeue;
2645 2585
2646 /* If we get here, then we know that there are no more 2586 /* If we get here, then we know that there are no more
2647 * incompatible holders (and anyone asking for an incompatible 2587 * incompatible holders (and anyone asking for an incompatible
2648 * lock is blocked). We can now downconvert the lock */ 2588 * lock is blocked). We can now downconvert the lock */
2649 if (!worker) 2589 if (!lockres->l_ops->downconvert_worker)
2650 goto downconvert; 2590 goto downconvert;
2651 2591
2652 /* Some lockres types want to do a bit of work before 2592 /* Some lockres types want to do a bit of work before
@@ -2656,7 +2596,10 @@ recheck:
2656 blocking = lockres->l_blocking; 2596 blocking = lockres->l_blocking;
2657 spin_unlock_irqrestore(&lockres->l_lock, flags); 2597 spin_unlock_irqrestore(&lockres->l_lock, flags);
2658 2598
2659 worker(lockres, blocking); 2599 ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking);
2600
2601 if (ctl->unblock_action == UNBLOCK_STOP_POST)
2602 goto leave;
2660 2603
2661 spin_lock_irqsave(&lockres->l_lock, flags); 2604 spin_lock_irqsave(&lockres->l_lock, flags);
2662 if (blocking != lockres->l_blocking) { 2605 if (blocking != lockres->l_blocking) {
@@ -2666,25 +2609,43 @@ recheck:
2666 } 2609 }
2667 2610
2668downconvert: 2611downconvert:
2669 *requeue = 0; 2612 ctl->requeue = 0;
2670 new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); 2613
2614 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
2615 if (lockres->l_level == LKM_EXMODE)
2616 set_lvb = 1;
2617
2618 /*
2619 * We only set the lvb if the lock has been fully
2620 * refreshed - otherwise we risk setting stale
2621 * data. Otherwise, there's no need to actually clear
2622 * out the lvb here as it's value is still valid.
2623 */
2624 if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
2625 lockres->l_ops->set_lvb(lockres);
2626 }
2671 2627
2672 ocfs2_prepare_downconvert(lockres, new_level); 2628 ocfs2_prepare_downconvert(lockres, new_level);
2673 spin_unlock_irqrestore(&lockres->l_lock, flags); 2629 spin_unlock_irqrestore(&lockres->l_lock, flags);
2674 ret = ocfs2_downconvert_lock(osb, lockres, new_level, 0); 2630 ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb);
2675leave: 2631leave:
2676 mlog_exit(ret); 2632 mlog_exit(ret);
2677 return ret; 2633 return ret;
2634
2635leave_requeue:
2636 spin_unlock_irqrestore(&lockres->l_lock, flags);
2637 ctl->requeue = 1;
2638
2639 mlog_exit(0);
2640 return 0;
2678} 2641}
2679 2642
2680static void ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, 2643static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
2681 int blocking) 2644 int blocking)
2682{ 2645{
2683 struct inode *inode; 2646 struct inode *inode;
2684 struct address_space *mapping; 2647 struct address_space *mapping;
2685 2648
2686 mlog_entry_void();
2687
2688 inode = ocfs2_lock_res_inode(lockres); 2649 inode = ocfs2_lock_res_inode(lockres);
2689 mapping = inode->i_mapping; 2650 mapping = inode->i_mapping;
2690 2651
@@ -2705,116 +2666,159 @@ static void ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
2705 filemap_fdatawait(mapping); 2666 filemap_fdatawait(mapping);
2706 } 2667 }
2707 2668
2708 mlog_exit_void(); 2669 return UNBLOCK_CONTINUE;
2709} 2670}
2710 2671
2711int ocfs2_unblock_data(struct ocfs2_lock_res *lockres, 2672static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
2712 int *requeue) 2673 int new_level)
2713{ 2674{
2714 int status; 2675 struct inode *inode = ocfs2_lock_res_inode(lockres);
2715 struct inode *inode; 2676 int checkpointed = ocfs2_inode_fully_checkpointed(inode);
2716 struct ocfs2_super *osb;
2717
2718 mlog_entry_void();
2719
2720 inode = ocfs2_lock_res_inode(lockres);
2721 osb = OCFS2_SB(inode->i_sb);
2722
2723 mlog(0, "unblock inode %llu\n",
2724 (unsigned long long)OCFS2_I(inode)->ip_blkno);
2725 2677
2726 status = ocfs2_generic_unblock_lock(osb, 2678 BUG_ON(new_level != LKM_NLMODE && new_level != LKM_PRMODE);
2727 lockres, 2679 BUG_ON(lockres->l_level != LKM_EXMODE && !checkpointed);
2728 requeue,
2729 ocfs2_data_convert_worker);
2730 if (status < 0)
2731 mlog_errno(status);
2732 2680
2733 mlog(0, "inode %llu, requeue = %d\n", 2681 if (checkpointed)
2734 (unsigned long long)OCFS2_I(inode)->ip_blkno, *requeue); 2682 return 1;
2735 2683
2736 mlog_exit(status); 2684 ocfs2_start_checkpoint(OCFS2_SB(inode->i_sb));
2737 return status; 2685 return 0;
2738} 2686}
2739 2687
2740static int ocfs2_unblock_inode_lock(struct ocfs2_lock_res *lockres, 2688static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres)
2741 int *requeue)
2742{ 2689{
2743 int status; 2690 struct inode *inode = ocfs2_lock_res_inode(lockres);
2744 struct inode *inode;
2745
2746 mlog_entry_void();
2747
2748 mlog(0, "Unblock lockres %s\n", lockres->l_name);
2749
2750 inode = ocfs2_lock_res_inode(lockres);
2751 2691
2752 status = ocfs2_generic_unblock_lock(OCFS2_SB(inode->i_sb), 2692 __ocfs2_stuff_meta_lvb(inode);
2753 lockres,
2754 requeue,
2755 NULL);
2756 if (status < 0)
2757 mlog_errno(status);
2758
2759 mlog_exit(status);
2760 return status;
2761} 2693}
2762 2694
2763 2695/*
2764int ocfs2_unblock_meta(struct ocfs2_lock_res *lockres, 2696 * Does the final reference drop on our dentry lock. Right now this
2765 int *requeue) 2697 * happens in the vote thread, but we could choose to simplify the
2698 * dlmglue API and push these off to the ocfs2_wq in the future.
2699 */
2700static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
2701 struct ocfs2_lock_res *lockres)
2766{ 2702{
2767 int status; 2703 struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
2768 struct inode *inode; 2704 ocfs2_dentry_lock_put(osb, dl);
2769 2705}
2770 mlog_entry_void();
2771 2706
2772 inode = ocfs2_lock_res_inode(lockres); 2707/*
2708 * d_delete() matching dentries before the lock downconvert.
2709 *
2710 * At this point, any process waiting to destroy the
2711 * dentry_lock due to last ref count is stopped by the
2712 * OCFS2_LOCK_QUEUED flag.
2713 *
2714 * We have two potential problems
2715 *
2716 * 1) If we do the last reference drop on our dentry_lock (via dput)
2717 * we'll wind up in ocfs2_release_dentry_lock(), waiting on
2718 * the downconvert to finish. Instead we take an elevated
2719 * reference and push the drop until after we've completed our
2720 * unblock processing.
2721 *
2722 * 2) There might be another process with a final reference,
2723 * waiting on us to finish processing. If this is the case, we
2724 * detect it and exit out - there's no more dentries anyway.
2725 */
2726static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
2727 int blocking)
2728{
2729 struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
2730 struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode);
2731 struct dentry *dentry;
2732 unsigned long flags;
2733 int extra_ref = 0;
2773 2734
2774 mlog(0, "unblock inode %llu\n", 2735 /*
2775 (unsigned long long)OCFS2_I(inode)->ip_blkno); 2736 * This node is blocking another node from getting a read
2737 * lock. This happens when we've renamed within a
2738 * directory. We've forced the other nodes to d_delete(), but
2739 * we never actually dropped our lock because it's still
2740 * valid. The downconvert code will retain a PR for this node,
2741 * so there's no further work to do.
2742 */
2743 if (blocking == LKM_PRMODE)
2744 return UNBLOCK_CONTINUE;
2776 2745
2777 status = ocfs2_do_unblock_meta(inode, requeue); 2746 /*
2778 if (status < 0) 2747 * Mark this inode as potentially orphaned. The code in
2779 mlog_errno(status); 2748 * ocfs2_delete_inode() will figure out whether it actually
2749 * needs to be freed or not.
2750 */
2751 spin_lock(&oi->ip_lock);
2752 oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
2753 spin_unlock(&oi->ip_lock);
2780 2754
2781 mlog(0, "inode %llu, requeue = %d\n", 2755 /*
2782 (unsigned long long)OCFS2_I(inode)->ip_blkno, *requeue); 2756 * Yuck. We need to make sure however that the check of
2757 * OCFS2_LOCK_FREEING and the extra reference are atomic with
2758 * respect to a reference decrement or the setting of that
2759 * flag.
2760 */
2761 spin_lock_irqsave(&lockres->l_lock, flags);
2762 spin_lock(&dentry_attach_lock);
2763 if (!(lockres->l_flags & OCFS2_LOCK_FREEING)
2764 && dl->dl_count) {
2765 dl->dl_count++;
2766 extra_ref = 1;
2767 }
2768 spin_unlock(&dentry_attach_lock);
2769 spin_unlock_irqrestore(&lockres->l_lock, flags);
2783 2770
2784 mlog_exit(status); 2771 mlog(0, "extra_ref = %d\n", extra_ref);
2785 return status;
2786}
2787 2772
2788/* Generic unblock function for any lockres whose private data is an 2773 /*
2789 * ocfs2_super pointer. */ 2774 * We have a process waiting on us in ocfs2_dentry_iput(),
2790static int ocfs2_unblock_osb_lock(struct ocfs2_lock_res *lockres, 2775 * which means we can't have any more outstanding
2791 int *requeue) 2776 * aliases. There's no need to do any more work.
2792{ 2777 */
2793 int status; 2778 if (!extra_ref)
2794 struct ocfs2_super *osb; 2779 return UNBLOCK_CONTINUE;
2780
2781 spin_lock(&dentry_attach_lock);
2782 while (1) {
2783 dentry = ocfs2_find_local_alias(dl->dl_inode,
2784 dl->dl_parent_blkno, 1);
2785 if (!dentry)
2786 break;
2787 spin_unlock(&dentry_attach_lock);
2795 2788
2796 mlog_entry_void(); 2789 mlog(0, "d_delete(%.*s);\n", dentry->d_name.len,
2790 dentry->d_name.name);
2797 2791
2798 mlog(0, "Unblock lockres %s\n", lockres->l_name); 2792 /*
2793 * The following dcache calls may do an
2794 * iput(). Normally we don't want that from the
2795 * downconverting thread, but in this case it's ok
2796 * because the requesting node already has an
2797 * exclusive lock on the inode, so it can't be queued
2798 * for a downconvert.
2799 */
2800 d_delete(dentry);
2801 dput(dentry);
2799 2802
2800 osb = ocfs2_lock_res_super(lockres); 2803 spin_lock(&dentry_attach_lock);
2804 }
2805 spin_unlock(&dentry_attach_lock);
2801 2806
2802 status = ocfs2_generic_unblock_lock(osb, 2807 /*
2803 lockres, 2808 * If we are the last holder of this dentry lock, there is no
2804 requeue, 2809 * reason to downconvert so skip straight to the unlock.
2805 NULL); 2810 */
2806 if (status < 0) 2811 if (dl->dl_count == 1)
2807 mlog_errno(status); 2812 return UNBLOCK_STOP_POST;
2808 2813
2809 mlog_exit(status); 2814 return UNBLOCK_CONTINUE_POST;
2810 return status;
2811} 2815}
2812 2816
2813void ocfs2_process_blocked_lock(struct ocfs2_super *osb, 2817void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
2814 struct ocfs2_lock_res *lockres) 2818 struct ocfs2_lock_res *lockres)
2815{ 2819{
2816 int status; 2820 int status;
2817 int requeue = 0; 2821 struct ocfs2_unblock_ctl ctl = {0, 0,};
2818 unsigned long flags; 2822 unsigned long flags;
2819 2823
2820 /* Our reference to the lockres in this function can be 2824 /* Our reference to the lockres in this function can be
@@ -2825,7 +2829,6 @@ void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
2825 2829
2826 BUG_ON(!lockres); 2830 BUG_ON(!lockres);
2827 BUG_ON(!lockres->l_ops); 2831 BUG_ON(!lockres->l_ops);
2828 BUG_ON(!lockres->l_ops->unblock);
2829 2832
2830 mlog(0, "lockres %s blocked.\n", lockres->l_name); 2833 mlog(0, "lockres %s blocked.\n", lockres->l_name);
2831 2834
@@ -2839,21 +2842,25 @@ void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
2839 goto unqueue; 2842 goto unqueue;
2840 spin_unlock_irqrestore(&lockres->l_lock, flags); 2843 spin_unlock_irqrestore(&lockres->l_lock, flags);
2841 2844
2842 status = lockres->l_ops->unblock(lockres, &requeue); 2845 status = ocfs2_unblock_lock(osb, lockres, &ctl);
2843 if (status < 0) 2846 if (status < 0)
2844 mlog_errno(status); 2847 mlog_errno(status);
2845 2848
2846 spin_lock_irqsave(&lockres->l_lock, flags); 2849 spin_lock_irqsave(&lockres->l_lock, flags);
2847unqueue: 2850unqueue:
2848 if (lockres->l_flags & OCFS2_LOCK_FREEING || !requeue) { 2851 if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) {
2849 lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED); 2852 lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED);
2850 } else 2853 } else
2851 ocfs2_schedule_blocked_lock(osb, lockres); 2854 ocfs2_schedule_blocked_lock(osb, lockres);
2852 2855
2853 mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name, 2856 mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name,
2854 requeue ? "yes" : "no"); 2857 ctl.requeue ? "yes" : "no");
2855 spin_unlock_irqrestore(&lockres->l_lock, flags); 2858 spin_unlock_irqrestore(&lockres->l_lock, flags);
2856 2859
2860 if (ctl.unblock_action != UNBLOCK_CONTINUE
2861 && lockres->l_ops->post_unlock)
2862 lockres->l_ops->post_unlock(osb, lockres);
2863
2857 mlog_exit_void(); 2864 mlog_exit_void();
2858} 2865}
2859 2866
@@ -2896,8 +2903,9 @@ void ocfs2_dump_meta_lvb_info(u64 level,
2896 2903
2897 mlog(level, "LVB information for %s (called from %s:%u):\n", 2904 mlog(level, "LVB information for %s (called from %s:%u):\n",
2898 lockres->l_name, function, line); 2905 lockres->l_name, function, line);
2899 mlog(level, "version: %u, clusters: %u\n", 2906 mlog(level, "version: %u, clusters: %u, generation: 0x%x\n",
2900 be32_to_cpu(lvb->lvb_version), be32_to_cpu(lvb->lvb_iclusters)); 2907 lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters),
2908 be32_to_cpu(lvb->lvb_igeneration));
2901 mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n", 2909 mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n",
2902 (unsigned long long)be64_to_cpu(lvb->lvb_isize), 2910 (unsigned long long)be64_to_cpu(lvb->lvb_isize),
2903 be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid), 2911 be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 243ae862ece5..4a2769387229 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -27,10 +27,14 @@
27#ifndef DLMGLUE_H 27#ifndef DLMGLUE_H
28#define DLMGLUE_H 28#define DLMGLUE_H
29 29
30#define OCFS2_LVB_VERSION 3 30#include "dcache.h"
31
32#define OCFS2_LVB_VERSION 4
31 33
32struct ocfs2_meta_lvb { 34struct ocfs2_meta_lvb {
33 __be32 lvb_version; 35 __u8 lvb_version;
36 __u8 lvb_reserved0;
37 __be16 lvb_reserved1;
34 __be32 lvb_iclusters; 38 __be32 lvb_iclusters;
35 __be32 lvb_iuid; 39 __be32 lvb_iuid;
36 __be32 lvb_igid; 40 __be32 lvb_igid;
@@ -41,7 +45,8 @@ struct ocfs2_meta_lvb {
41 __be16 lvb_imode; 45 __be16 lvb_imode;
42 __be16 lvb_inlink; 46 __be16 lvb_inlink;
43 __be32 lvb_iattr; 47 __be32 lvb_iattr;
44 __be32 lvb_reserved[2]; 48 __be32 lvb_igeneration;
49 __be32 lvb_reserved2;
45}; 50};
46 51
47/* ocfs2_meta_lock_full() and ocfs2_data_lock_full() 'arg_flags' flags */ 52/* ocfs2_meta_lock_full() and ocfs2_data_lock_full() 'arg_flags' flags */
@@ -57,9 +62,14 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb);
57void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res); 62void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res);
58void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, 63void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
59 enum ocfs2_lock_type type, 64 enum ocfs2_lock_type type,
65 unsigned int generation,
60 struct inode *inode); 66 struct inode *inode);
67void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
68 u64 parent, struct inode *inode);
61void ocfs2_lock_res_free(struct ocfs2_lock_res *res); 69void ocfs2_lock_res_free(struct ocfs2_lock_res *res);
62int ocfs2_create_new_inode_locks(struct inode *inode); 70int ocfs2_create_new_inode_locks(struct inode *inode);
71int ocfs2_create_new_lock(struct ocfs2_super *osb,
72 struct ocfs2_lock_res *lockres, int ex, int local);
63int ocfs2_drop_inode_locks(struct inode *inode); 73int ocfs2_drop_inode_locks(struct inode *inode);
64int ocfs2_data_lock_full(struct inode *inode, 74int ocfs2_data_lock_full(struct inode *inode,
65 int write, 75 int write,
@@ -93,7 +103,12 @@ void ocfs2_super_unlock(struct ocfs2_super *osb,
93 int ex); 103 int ex);
94int ocfs2_rename_lock(struct ocfs2_super *osb); 104int ocfs2_rename_lock(struct ocfs2_super *osb);
95void ocfs2_rename_unlock(struct ocfs2_super *osb); 105void ocfs2_rename_unlock(struct ocfs2_super *osb);
106int ocfs2_dentry_lock(struct dentry *dentry, int ex);
107void ocfs2_dentry_unlock(struct dentry *dentry, int ex);
108
96void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres); 109void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres);
110void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
111 struct ocfs2_lock_res *lockres);
97 112
98/* for the vote thread */ 113/* for the vote thread */
99void ocfs2_process_blocked_lock(struct ocfs2_super *osb, 114void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index ec55ab3c1214..fb91089a60a7 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -33,6 +33,7 @@
33 33
34#include "dir.h" 34#include "dir.h"
35#include "dlmglue.h" 35#include "dlmglue.h"
36#include "dcache.h"
36#include "export.h" 37#include "export.h"
37#include "inode.h" 38#include "inode.h"
38 39
@@ -57,7 +58,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, void *vobjp)
57 return ERR_PTR(-ESTALE); 58 return ERR_PTR(-ESTALE);
58 } 59 }
59 60
60 inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno); 61 inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno, 0);
61 62
62 if (IS_ERR(inode)) { 63 if (IS_ERR(inode)) {
63 mlog_errno(PTR_ERR(inode)); 64 mlog_errno(PTR_ERR(inode));
@@ -77,6 +78,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, void *vobjp)
77 mlog_errno(-ENOMEM); 78 mlog_errno(-ENOMEM);
78 return ERR_PTR(-ENOMEM); 79 return ERR_PTR(-ENOMEM);
79 } 80 }
81 result->d_op = &ocfs2_dentry_ops;
80 82
81 mlog_exit_ptr(result); 83 mlog_exit_ptr(result);
82 return result; 84 return result;
@@ -113,7 +115,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
113 goto bail_unlock; 115 goto bail_unlock;
114 } 116 }
115 117
116 inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno); 118 inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0);
117 if (IS_ERR(inode)) { 119 if (IS_ERR(inode)) {
118 mlog(ML_ERROR, "Unable to create inode %llu\n", 120 mlog(ML_ERROR, "Unable to create inode %llu\n",
119 (unsigned long long)blkno); 121 (unsigned long long)blkno);
@@ -127,6 +129,8 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
127 parent = ERR_PTR(-ENOMEM); 129 parent = ERR_PTR(-ENOMEM);
128 } 130 }
129 131
132 parent->d_op = &ocfs2_dentry_ops;
133
130bail_unlock: 134bail_unlock:
131 ocfs2_meta_unlock(dir, 0); 135 ocfs2_meta_unlock(dir, 0);
132 136
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 7bcf69154592..16e8e74dc966 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -54,8 +54,6 @@
54 54
55#include "buffer_head_io.h" 55#include "buffer_head_io.h"
56 56
57#define OCFS2_FI_FLAG_NOWAIT 0x1
58#define OCFS2_FI_FLAG_DELETE 0x2
59struct ocfs2_find_inode_args 57struct ocfs2_find_inode_args
60{ 58{
61 u64 fi_blkno; 59 u64 fi_blkno;
@@ -109,7 +107,7 @@ struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb,
109 return ilookup5(osb->sb, args.fi_ino, ocfs2_find_actor, &args); 107 return ilookup5(osb->sb, args.fi_ino, ocfs2_find_actor, &args);
110} 108}
111 109
112struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno) 110struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, int flags)
113{ 111{
114 struct inode *inode = NULL; 112 struct inode *inode = NULL;
115 struct super_block *sb = osb->sb; 113 struct super_block *sb = osb->sb;
@@ -127,7 +125,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno)
127 } 125 }
128 126
129 args.fi_blkno = blkno; 127 args.fi_blkno = blkno;
130 args.fi_flags = 0; 128 args.fi_flags = flags;
131 args.fi_ino = ino_from_blkno(sb, blkno); 129 args.fi_ino = ino_from_blkno(sb, blkno);
132 130
133 inode = iget5_locked(sb, args.fi_ino, ocfs2_find_actor, 131 inode = iget5_locked(sb, args.fi_ino, ocfs2_find_actor,
@@ -271,7 +269,6 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
271 inode->i_mode = le16_to_cpu(fe->i_mode); 269 inode->i_mode = le16_to_cpu(fe->i_mode);
272 inode->i_uid = le32_to_cpu(fe->i_uid); 270 inode->i_uid = le32_to_cpu(fe->i_uid);
273 inode->i_gid = le32_to_cpu(fe->i_gid); 271 inode->i_gid = le32_to_cpu(fe->i_gid);
274 inode->i_blksize = (u32)osb->s_clustersize;
275 272
276 /* Fast symlinks will have i_size but no allocated clusters. */ 273 /* Fast symlinks will have i_size but no allocated clusters. */
277 if (S_ISLNK(inode->i_mode) && !fe->i_clusters) 274 if (S_ISLNK(inode->i_mode) && !fe->i_clusters)
@@ -297,15 +294,11 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
297 OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT; 294 OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT;
298 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); 295 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
299 296
300 if (create_ino)
301 inode->i_ino = ino_from_blkno(inode->i_sb,
302 le64_to_cpu(fe->i_blkno));
303
304 mlog(0, "blkno = %llu, ino = %lu, create_ino = %s\n",
305 (unsigned long long)fe->i_blkno, inode->i_ino, create_ino ? "true" : "false");
306
307 inode->i_nlink = le16_to_cpu(fe->i_links_count); 297 inode->i_nlink = le16_to_cpu(fe->i_links_count);
308 298
299 if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL))
300 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE;
301
309 if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) { 302 if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) {
310 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP; 303 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP;
311 mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino); 304 mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino);
@@ -343,12 +336,28 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
343 break; 336 break;
344 } 337 }
345 338
339 if (create_ino) {
340 inode->i_ino = ino_from_blkno(inode->i_sb,
341 le64_to_cpu(fe->i_blkno));
342
343 /*
344 * If we ever want to create system files from kernel,
345 * the generation argument to
346 * ocfs2_inode_lock_res_init() will have to change.
347 */
348 BUG_ON(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL));
349
350 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres,
351 OCFS2_LOCK_TYPE_META, 0, inode);
352 }
353
346 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_rw_lockres, 354 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_rw_lockres,
347 OCFS2_LOCK_TYPE_RW, inode); 355 OCFS2_LOCK_TYPE_RW, inode->i_generation,
348 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres, 356 inode);
349 OCFS2_LOCK_TYPE_META, inode); 357
350 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres, 358 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres,
351 OCFS2_LOCK_TYPE_DATA, inode); 359 OCFS2_LOCK_TYPE_DATA, inode->i_generation,
360 inode);
352 361
353 ocfs2_set_inode_flags(inode); 362 ocfs2_set_inode_flags(inode);
354 inode->i_flags |= S_NOATIME; 363 inode->i_flags |= S_NOATIME;
@@ -366,15 +375,15 @@ static int ocfs2_read_locked_inode(struct inode *inode,
366 struct ocfs2_super *osb; 375 struct ocfs2_super *osb;
367 struct ocfs2_dinode *fe; 376 struct ocfs2_dinode *fe;
368 struct buffer_head *bh = NULL; 377 struct buffer_head *bh = NULL;
369 int status; 378 int status, can_lock;
370 int sysfile = 0; 379 u32 generation = 0;
371 380
372 mlog_entry("(0x%p, 0x%p)\n", inode, args); 381 mlog_entry("(0x%p, 0x%p)\n", inode, args);
373 382
374 status = -EINVAL; 383 status = -EINVAL;
375 if (inode == NULL || inode->i_sb == NULL) { 384 if (inode == NULL || inode->i_sb == NULL) {
376 mlog(ML_ERROR, "bad inode\n"); 385 mlog(ML_ERROR, "bad inode\n");
377 goto bail; 386 return status;
378 } 387 }
379 sb = inode->i_sb; 388 sb = inode->i_sb;
380 osb = OCFS2_SB(sb); 389 osb = OCFS2_SB(sb);
@@ -382,50 +391,110 @@ static int ocfs2_read_locked_inode(struct inode *inode,
382 if (!args) { 391 if (!args) {
383 mlog(ML_ERROR, "bad inode args\n"); 392 mlog(ML_ERROR, "bad inode args\n");
384 make_bad_inode(inode); 393 make_bad_inode(inode);
385 goto bail; 394 return status;
395 }
396
397 /*
398 * To improve performance of cold-cache inode stats, we take
399 * the cluster lock here if possible.
400 *
401 * Generally, OCFS2 never trusts the contents of an inode
402 * unless it's holding a cluster lock, so taking it here isn't
403 * a correctness issue as much as it is a performance
404 * improvement.
405 *
406 * There are three times when taking the lock is not a good idea:
407 *
408 * 1) During startup, before we have initialized the DLM.
409 *
410 * 2) If we are reading certain system files which never get
411 * cluster locks (local alloc, truncate log).
412 *
413 * 3) If the process doing the iget() is responsible for
414 * orphan dir recovery. We're holding the orphan dir lock and
415 * can get into a deadlock with another process on another
416 * node in ->delete_inode().
417 *
418 * #1 and #2 can be simply solved by never taking the lock
419 * here for system files (which are the only type we read
420 * during mount). It's a heavier approach, but our main
421 * concern is user-accesible files anyway.
422 *
423 * #3 works itself out because we'll eventually take the
424 * cluster lock before trusting anything anyway.
425 */
426 can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE)
427 && !(args->fi_flags & OCFS2_FI_FLAG_NOLOCK);
428
429 /*
430 * To maintain backwards compatibility with older versions of
431 * ocfs2-tools, we still store the generation value for system
432 * files. The only ones that actually matter to userspace are
433 * the journals, but it's easier and inexpensive to just flag
434 * all system files similarly.
435 */
436 if (args->fi_flags & OCFS2_FI_FLAG_SYSFILE)
437 generation = osb->fs_generation;
438
439 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres,
440 OCFS2_LOCK_TYPE_META,
441 generation, inode);
442
443 if (can_lock) {
444 status = ocfs2_meta_lock(inode, NULL, NULL, 0);
445 if (status) {
446 make_bad_inode(inode);
447 mlog_errno(status);
448 return status;
449 }
386 } 450 }
387 451
388 /* Read the FE off disk. This is safe because the kernel only 452 status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0,
389 * does one read_inode2 for a new inode, and if it doesn't 453 can_lock ? inode : NULL);
390 * exist yet then nobody can be working on it! */
391 status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0, NULL);
392 if (status < 0) { 454 if (status < 0) {
393 mlog_errno(status); 455 mlog_errno(status);
394 make_bad_inode(inode);
395 goto bail; 456 goto bail;
396 } 457 }
397 458
459 status = -EINVAL;
398 fe = (struct ocfs2_dinode *) bh->b_data; 460 fe = (struct ocfs2_dinode *) bh->b_data;
399 if (!OCFS2_IS_VALID_DINODE(fe)) { 461 if (!OCFS2_IS_VALID_DINODE(fe)) {
400 mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n", 462 mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",
401 (unsigned long long)fe->i_blkno, 7, fe->i_signature); 463 (unsigned long long)fe->i_blkno, 7, fe->i_signature);
402 make_bad_inode(inode);
403 goto bail; 464 goto bail;
404 } 465 }
405 466
406 if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) 467 /*
407 sysfile = 1; 468 * This is a code bug. Right now the caller needs to
469 * understand whether it is asking for a system file inode or
470 * not so the proper lock names can be built.
471 */
472 mlog_bug_on_msg(!!(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) !=
473 !!(args->fi_flags & OCFS2_FI_FLAG_SYSFILE),
474 "Inode %llu: system file state is ambigous\n",
475 (unsigned long long)args->fi_blkno);
408 476
409 if (S_ISCHR(le16_to_cpu(fe->i_mode)) || 477 if (S_ISCHR(le16_to_cpu(fe->i_mode)) ||
410 S_ISBLK(le16_to_cpu(fe->i_mode))) 478 S_ISBLK(le16_to_cpu(fe->i_mode)))
411 inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); 479 inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
412 480
413 status = -EINVAL;
414 if (ocfs2_populate_inode(inode, fe, 0) < 0) { 481 if (ocfs2_populate_inode(inode, fe, 0) < 0) {
415 mlog(ML_ERROR, "populate failed! i_blkno=%llu, i_ino=%lu\n", 482 mlog(ML_ERROR, "populate failed! i_blkno=%llu, i_ino=%lu\n",
416 (unsigned long long)fe->i_blkno, inode->i_ino); 483 (unsigned long long)fe->i_blkno, inode->i_ino);
417 make_bad_inode(inode);
418 goto bail; 484 goto bail;
419 } 485 }
420 486
421 BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno)); 487 BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno));
422 488
423 if (sysfile)
424 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE;
425
426 status = 0; 489 status = 0;
427 490
428bail: 491bail:
492 if (can_lock)
493 ocfs2_meta_unlock(inode, 0);
494
495 if (status < 0)
496 make_bad_inode(inode);
497
429 if (args && bh) 498 if (args && bh)
430 brelse(bh); 499 brelse(bh);
431 500
@@ -898,9 +967,15 @@ void ocfs2_delete_inode(struct inode *inode)
898 goto bail_unlock_inode; 967 goto bail_unlock_inode;
899 } 968 }
900 969
901 /* Mark the inode as successfully deleted. This is important 970 /*
902 * for ocfs2_clear_inode as it will check this flag and skip 971 * Mark the inode as successfully deleted.
903 * any checkpointing work */ 972 *
973 * This is important for ocfs2_clear_inode() as it will check
974 * this flag and skip any checkpointing work
975 *
976 * ocfs2_stuff_meta_lvb() also uses this flag to invalidate
977 * the LVB for other nodes.
978 */
904 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED; 979 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED;
905 980
906bail_unlock_inode: 981bail_unlock_inode:
@@ -1025,12 +1100,10 @@ void ocfs2_drop_inode(struct inode *inode)
1025 /* Testing ip_orphaned_slot here wouldn't work because we may 1100 /* Testing ip_orphaned_slot here wouldn't work because we may
1026 * not have gotten a delete_inode vote from any other nodes 1101 * not have gotten a delete_inode vote from any other nodes
1027 * yet. */ 1102 * yet. */
1028 if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) { 1103 if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)
1029 mlog(0, "Inode was orphaned on another node, clearing nlink.\n"); 1104 generic_delete_inode(inode);
1030 inode->i_nlink = 0; 1105 else
1031 } 1106 generic_drop_inode(inode);
1032
1033 generic_drop_inode(inode);
1034 1107
1035 mlog_exit_void(); 1108 mlog_exit_void();
1036} 1109}
@@ -1184,8 +1257,6 @@ leave:
1184void ocfs2_refresh_inode(struct inode *inode, 1257void ocfs2_refresh_inode(struct inode *inode,
1185 struct ocfs2_dinode *fe) 1258 struct ocfs2_dinode *fe)
1186{ 1259{
1187 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1188
1189 spin_lock(&OCFS2_I(inode)->ip_lock); 1260 spin_lock(&OCFS2_I(inode)->ip_lock);
1190 1261
1191 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); 1262 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
@@ -1196,7 +1267,6 @@ void ocfs2_refresh_inode(struct inode *inode,
1196 inode->i_uid = le32_to_cpu(fe->i_uid); 1267 inode->i_uid = le32_to_cpu(fe->i_uid);
1197 inode->i_gid = le32_to_cpu(fe->i_gid); 1268 inode->i_gid = le32_to_cpu(fe->i_gid);
1198 inode->i_mode = le16_to_cpu(fe->i_mode); 1269 inode->i_mode = le16_to_cpu(fe->i_mode);
1199 inode->i_blksize = (u32) osb->s_clustersize;
1200 if (S_ISLNK(inode->i_mode) && le32_to_cpu(fe->i_clusters) == 0) 1270 if (S_ISLNK(inode->i_mode) && le32_to_cpu(fe->i_clusters) == 0)
1201 inode->i_blocks = 0; 1271 inode->i_blocks = 0;
1202 else 1272 else
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 4d1e53992566..9957810fdf85 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -122,7 +122,13 @@ struct buffer_head *ocfs2_bread(struct inode *inode, int block,
122void ocfs2_clear_inode(struct inode *inode); 122void ocfs2_clear_inode(struct inode *inode);
123void ocfs2_delete_inode(struct inode *inode); 123void ocfs2_delete_inode(struct inode *inode);
124void ocfs2_drop_inode(struct inode *inode); 124void ocfs2_drop_inode(struct inode *inode);
125struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff); 125
126/* Flags for ocfs2_iget() */
127#define OCFS2_FI_FLAG_NOWAIT 0x1
128#define OCFS2_FI_FLAG_DELETE 0x2
129#define OCFS2_FI_FLAG_SYSFILE 0x4
130#define OCFS2_FI_FLAG_NOLOCK 0x8
131struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, int flags);
126struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb, 132struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb,
127 u64 blkno, 133 u64 blkno,
128 int delete_vote); 134 int delete_vote);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index f92bf1dd379a..fd9734def551 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1493,7 +1493,8 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
1493 if (de->name_len == 2 && !strncmp("..", de->name, 2)) 1493 if (de->name_len == 2 && !strncmp("..", de->name, 2))
1494 continue; 1494 continue;
1495 1495
1496 iter = ocfs2_iget(osb, le64_to_cpu(de->inode)); 1496 iter = ocfs2_iget(osb, le64_to_cpu(de->inode),
1497 OCFS2_FI_FLAG_NOLOCK);
1497 if (IS_ERR(iter)) 1498 if (IS_ERR(iter))
1498 continue; 1499 continue;
1499 1500
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 0d3e939b1f56..849c3b4bb94a 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -179,7 +179,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
179 if (status < 0) 179 if (status < 0)
180 goto bail_add; 180 goto bail_add;
181 181
182 inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno); 182 inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0);
183 if (IS_ERR(inode)) { 183 if (IS_ERR(inode)) {
184 mlog(ML_ERROR, "Unable to create inode %llu\n", 184 mlog(ML_ERROR, "Unable to create inode %llu\n",
185 (unsigned long long)blkno); 185 (unsigned long long)blkno);
@@ -199,10 +199,32 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
199 spin_unlock(&oi->ip_lock); 199 spin_unlock(&oi->ip_lock);
200 200
201bail_add: 201bail_add:
202
203 dentry->d_op = &ocfs2_dentry_ops; 202 dentry->d_op = &ocfs2_dentry_ops;
204 ret = d_splice_alias(inode, dentry); 203 ret = d_splice_alias(inode, dentry);
205 204
205 if (inode) {
206 /*
207 * If d_splice_alias() finds a DCACHE_DISCONNECTED
208 * dentry, it will d_move() it on top of ourse. The
209 * return value will indicate this however, so in
210 * those cases, we switch them around for the locking
211 * code.
212 *
213 * NOTE: This dentry already has ->d_op set from
214 * ocfs2_get_parent() and ocfs2_get_dentry()
215 */
216 if (ret)
217 dentry = ret;
218
219 status = ocfs2_dentry_attach_lock(dentry, inode,
220 OCFS2_I(dir)->ip_blkno);
221 if (status) {
222 mlog_errno(status);
223 ret = ERR_PTR(status);
224 goto bail_unlock;
225 }
226 }
227
206bail_unlock: 228bail_unlock:
207 /* Don't drop the cluster lock until *after* the d_add -- 229 /* Don't drop the cluster lock until *after* the d_add --
208 * unlink on another node will message us to remove that 230 * unlink on another node will message us to remove that
@@ -418,6 +440,13 @@ static int ocfs2_mknod(struct inode *dir,
418 goto leave; 440 goto leave;
419 } 441 }
420 442
443 status = ocfs2_dentry_attach_lock(dentry, inode,
444 OCFS2_I(dir)->ip_blkno);
445 if (status) {
446 mlog_errno(status);
447 goto leave;
448 }
449
421 insert_inode_hash(inode); 450 insert_inode_hash(inode);
422 dentry->d_op = &ocfs2_dentry_ops; 451 dentry->d_op = &ocfs2_dentry_ops;
423 d_instantiate(dentry, inode); 452 d_instantiate(dentry, inode);
@@ -725,6 +754,12 @@ static int ocfs2_link(struct dentry *old_dentry,
725 goto bail; 754 goto bail;
726 } 755 }
727 756
757 err = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
758 if (err) {
759 mlog_errno(err);
760 goto bail;
761 }
762
728 atomic_inc(&inode->i_count); 763 atomic_inc(&inode->i_count);
729 dentry->d_op = &ocfs2_dentry_ops; 764 dentry->d_op = &ocfs2_dentry_ops;
730 d_instantiate(dentry, inode); 765 d_instantiate(dentry, inode);
@@ -743,6 +778,23 @@ bail:
743 return err; 778 return err;
744} 779}
745 780
781/*
782 * Takes and drops an exclusive lock on the given dentry. This will
783 * force other nodes to drop it.
784 */
785static int ocfs2_remote_dentry_delete(struct dentry *dentry)
786{
787 int ret;
788
789 ret = ocfs2_dentry_lock(dentry, 1);
790 if (ret)
791 mlog_errno(ret);
792 else
793 ocfs2_dentry_unlock(dentry, 1);
794
795 return ret;
796}
797
746static int ocfs2_unlink(struct inode *dir, 798static int ocfs2_unlink(struct inode *dir,
747 struct dentry *dentry) 799 struct dentry *dentry)
748{ 800{
@@ -832,8 +884,7 @@ static int ocfs2_unlink(struct inode *dir,
832 else 884 else
833 inode->i_nlink--; 885 inode->i_nlink--;
834 886
835 status = ocfs2_request_unlink_vote(inode, dentry, 887 status = ocfs2_remote_dentry_delete(dentry);
836 (unsigned int) inode->i_nlink);
837 if (status < 0) { 888 if (status < 0) {
838 /* This vote should succeed under all normal 889 /* This vote should succeed under all normal
839 * circumstances. */ 890 * circumstances. */
@@ -1019,7 +1070,6 @@ static int ocfs2_rename(struct inode *old_dir,
1019 struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir, 1070 struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir,
1020 // this is the 1st dirent bh 1071 // this is the 1st dirent bh
1021 nlink_t old_dir_nlink = old_dir->i_nlink, new_dir_nlink = new_dir->i_nlink; 1072 nlink_t old_dir_nlink = old_dir->i_nlink, new_dir_nlink = new_dir->i_nlink;
1022 unsigned int links_count;
1023 1073
1024 /* At some point it might be nice to break this function up a 1074 /* At some point it might be nice to break this function up a
1025 * bit. */ 1075 * bit. */
@@ -1093,23 +1143,26 @@ static int ocfs2_rename(struct inode *old_dir,
1093 } 1143 }
1094 } 1144 }
1095 1145
1096 if (S_ISDIR(old_inode->i_mode)) { 1146 /*
1097 /* Directories actually require metadata updates to 1147 * Though we don't require an inode meta data update if
1098 * the directory info so we can't get away with not 1148 * old_inode is not a directory, we lock anyway here to ensure
1099 * doing node locking on it. */ 1149 * the vote thread on other nodes won't have to concurrently
1100 status = ocfs2_meta_lock(old_inode, handle, NULL, 1); 1150 * downconvert the inode and the dentry locks.
1101 if (status < 0) { 1151 */
1102 if (status != -ENOENT) 1152 status = ocfs2_meta_lock(old_inode, handle, NULL, 1);
1103 mlog_errno(status); 1153 if (status < 0) {
1104 goto bail; 1154 if (status != -ENOENT)
1105 }
1106
1107 status = ocfs2_request_rename_vote(old_inode, old_dentry);
1108 if (status < 0) {
1109 mlog_errno(status); 1155 mlog_errno(status);
1110 goto bail; 1156 goto bail;
1111 } 1157 }
1158
1159 status = ocfs2_remote_dentry_delete(old_dentry);
1160 if (status < 0) {
1161 mlog_errno(status);
1162 goto bail;
1163 }
1112 1164
1165 if (S_ISDIR(old_inode->i_mode)) {
1113 status = -EIO; 1166 status = -EIO;
1114 old_inode_de_bh = ocfs2_bread(old_inode, 0, &status, 0); 1167 old_inode_de_bh = ocfs2_bread(old_inode, 0, &status, 0);
1115 if (!old_inode_de_bh) 1168 if (!old_inode_de_bh)
@@ -1123,14 +1176,6 @@ static int ocfs2_rename(struct inode *old_dir,
1123 if (!new_inode && new_dir!=old_dir && 1176 if (!new_inode && new_dir!=old_dir &&
1124 new_dir->i_nlink >= OCFS2_LINK_MAX) 1177 new_dir->i_nlink >= OCFS2_LINK_MAX)
1125 goto bail; 1178 goto bail;
1126 } else {
1127 /* Ah, the simple case - we're a file so just send a
1128 * message. */
1129 status = ocfs2_request_rename_vote(old_inode, old_dentry);
1130 if (status < 0) {
1131 mlog_errno(status);
1132 goto bail;
1133 }
1134 } 1179 }
1135 1180
1136 status = -ENOENT; 1181 status = -ENOENT;
@@ -1202,13 +1247,7 @@ static int ocfs2_rename(struct inode *old_dir,
1202 goto bail; 1247 goto bail;
1203 } 1248 }
1204 1249
1205 if (S_ISDIR(new_inode->i_mode)) 1250 status = ocfs2_remote_dentry_delete(new_dentry);
1206 links_count = 0;
1207 else
1208 links_count = (unsigned int) (new_inode->i_nlink - 1);
1209
1210 status = ocfs2_request_unlink_vote(new_inode, new_dentry,
1211 links_count);
1212 if (status < 0) { 1251 if (status < 0) {
1213 mlog_errno(status); 1252 mlog_errno(status);
1214 goto bail; 1253 goto bail;
@@ -1387,6 +1426,7 @@ static int ocfs2_rename(struct inode *old_dir,
1387 } 1426 }
1388 } 1427 }
1389 1428
1429 ocfs2_dentry_move(old_dentry, new_dentry, old_dir, new_dir);
1390 status = 0; 1430 status = 0;
1391bail: 1431bail:
1392 if (rename_lock) 1432 if (rename_lock)
@@ -1675,6 +1715,12 @@ static int ocfs2_symlink(struct inode *dir,
1675 goto bail; 1715 goto bail;
1676 } 1716 }
1677 1717
1718 status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
1719 if (status) {
1720 mlog_errno(status);
1721 goto bail;
1722 }
1723
1678 insert_inode_hash(inode); 1724 insert_inode_hash(inode);
1679 dentry->d_op = &ocfs2_dentry_ops; 1725 dentry->d_op = &ocfs2_dentry_ops;
1680 d_instantiate(dentry, inode); 1726 d_instantiate(dentry, inode);
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h
index 7dd9e1e705b0..4d5d5655c185 100644
--- a/fs/ocfs2/ocfs2_lockid.h
+++ b/fs/ocfs2/ocfs2_lockid.h
@@ -35,12 +35,15 @@
35#define OCFS2_LOCK_ID_MAX_LEN 32 35#define OCFS2_LOCK_ID_MAX_LEN 32
36#define OCFS2_LOCK_ID_PAD "000000" 36#define OCFS2_LOCK_ID_PAD "000000"
37 37
38#define OCFS2_DENTRY_LOCK_INO_START 18
39
38enum ocfs2_lock_type { 40enum ocfs2_lock_type {
39 OCFS2_LOCK_TYPE_META = 0, 41 OCFS2_LOCK_TYPE_META = 0,
40 OCFS2_LOCK_TYPE_DATA, 42 OCFS2_LOCK_TYPE_DATA,
41 OCFS2_LOCK_TYPE_SUPER, 43 OCFS2_LOCK_TYPE_SUPER,
42 OCFS2_LOCK_TYPE_RENAME, 44 OCFS2_LOCK_TYPE_RENAME,
43 OCFS2_LOCK_TYPE_RW, 45 OCFS2_LOCK_TYPE_RW,
46 OCFS2_LOCK_TYPE_DENTRY,
44 OCFS2_NUM_LOCK_TYPES 47 OCFS2_NUM_LOCK_TYPES
45}; 48};
46 49
@@ -63,6 +66,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
63 case OCFS2_LOCK_TYPE_RW: 66 case OCFS2_LOCK_TYPE_RW:
64 c = 'W'; 67 c = 'W';
65 break; 68 break;
69 case OCFS2_LOCK_TYPE_DENTRY:
70 c = 'N';
71 break;
66 default: 72 default:
67 c = '\0'; 73 c = '\0';
68 } 74 }
@@ -70,4 +76,23 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
70 return c; 76 return c;
71} 77}
72 78
79static char *ocfs2_lock_type_strings[] = {
80 [OCFS2_LOCK_TYPE_META] = "Meta",
81 [OCFS2_LOCK_TYPE_DATA] = "Data",
82 [OCFS2_LOCK_TYPE_SUPER] = "Super",
83 [OCFS2_LOCK_TYPE_RENAME] = "Rename",
84 /* Need to differntiate from [R]ename.. serializing writes is the
85 * important job it does, anyway. */
86 [OCFS2_LOCK_TYPE_RW] = "Write/Read",
87 [OCFS2_LOCK_TYPE_DENTRY] = "Dentry",
88};
89
90static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type)
91{
92#ifdef __KERNEL__
93 mlog_bug_on_msg(type >= OCFS2_NUM_LOCK_TYPES, "%d\n", type);
94#endif
95 return ocfs2_lock_type_strings[type];
96}
97
73#endif /* OCFS2_LOCKID_H */ 98#endif /* OCFS2_LOCKID_H */
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index d17e33e66a1e..4c29cd7cc8e6 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -202,7 +202,7 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
202 202
203 mlog_entry_void(); 203 mlog_entry_void();
204 204
205 new = ocfs2_iget(osb, osb->root_blkno); 205 new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE);
206 if (IS_ERR(new)) { 206 if (IS_ERR(new)) {
207 status = PTR_ERR(new); 207 status = PTR_ERR(new);
208 mlog_errno(status); 208 mlog_errno(status);
@@ -210,7 +210,7 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
210 } 210 }
211 osb->root_inode = new; 211 osb->root_inode = new;
212 212
213 new = ocfs2_iget(osb, osb->system_dir_blkno); 213 new = ocfs2_iget(osb, osb->system_dir_blkno, OCFS2_FI_FLAG_SYSFILE);
214 if (IS_ERR(new)) { 214 if (IS_ERR(new)) {
215 status = PTR_ERR(new); 215 status = PTR_ERR(new);
216 mlog_errno(status); 216 mlog_errno(status);
@@ -682,7 +682,7 @@ static struct file_system_type ocfs2_fs_type = {
682 .kill_sb = kill_block_super, /* set to the generic one 682 .kill_sb = kill_block_super, /* set to the generic one
683 * right now, but do we 683 * right now, but do we
684 * need to change that? */ 684 * need to change that? */
685 .fs_flags = FS_REQUIRES_DEV, 685 .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
686 .next = NULL 686 .next = NULL
687}; 687};
688 688
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c
index fc29cb7a437d..5df6e35d09b1 100644
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -28,11 +28,11 @@
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/highmem.h> 29#include <linux/highmem.h>
30 30
31#include "ocfs2.h"
32
33#define MLOG_MASK_PREFIX ML_INODE 31#define MLOG_MASK_PREFIX ML_INODE
34#include <cluster/masklog.h> 32#include <cluster/masklog.h>
35 33
34#include "ocfs2.h"
35
36#include "alloc.h" 36#include "alloc.h"
37#include "dir.h" 37#include "dir.h"
38#include "inode.h" 38#include "inode.h"
@@ -115,7 +115,7 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb,
115 goto bail; 115 goto bail;
116 } 116 }
117 117
118 inode = ocfs2_iget(osb, blkno); 118 inode = ocfs2_iget(osb, blkno, OCFS2_FI_FLAG_SYSFILE);
119 if (IS_ERR(inode)) { 119 if (IS_ERR(inode)) {
120 mlog_errno(PTR_ERR(inode)); 120 mlog_errno(PTR_ERR(inode));
121 inode = NULL; 121 inode = NULL;
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c
index cf70fe2075b8..5b4dca79990b 100644
--- a/fs/ocfs2/vote.c
+++ b/fs/ocfs2/vote.c
@@ -74,9 +74,6 @@ struct ocfs2_vote_msg
74 __be32 v_orphaned_slot; /* Used during delete votes */ 74 __be32 v_orphaned_slot; /* Used during delete votes */
75 __be32 v_nlink; /* Used during unlink votes */ 75 __be32 v_nlink; /* Used during unlink votes */
76 } md1; /* Message type dependant 1 */ 76 } md1; /* Message type dependant 1 */
77 __be32 v_unlink_namelen;
78 __be64 v_unlink_parent;
79 u8 v_unlink_dirent[OCFS2_VOTE_FILENAME_LEN];
80}; 77};
81 78
82/* Responses are given these values to maintain backwards 79/* Responses are given these values to maintain backwards
@@ -100,8 +97,6 @@ struct ocfs2_vote_work {
100enum ocfs2_vote_request { 97enum ocfs2_vote_request {
101 OCFS2_VOTE_REQ_INVALID = 0, 98 OCFS2_VOTE_REQ_INVALID = 0,
102 OCFS2_VOTE_REQ_DELETE, 99 OCFS2_VOTE_REQ_DELETE,
103 OCFS2_VOTE_REQ_UNLINK,
104 OCFS2_VOTE_REQ_RENAME,
105 OCFS2_VOTE_REQ_MOUNT, 100 OCFS2_VOTE_REQ_MOUNT,
106 OCFS2_VOTE_REQ_UMOUNT, 101 OCFS2_VOTE_REQ_UMOUNT,
107 OCFS2_VOTE_REQ_LAST 102 OCFS2_VOTE_REQ_LAST
@@ -261,103 +256,13 @@ done:
261 return response; 256 return response;
262} 257}
263 258
264static int ocfs2_match_dentry(struct dentry *dentry,
265 u64 parent_blkno,
266 unsigned int namelen,
267 const char *name)
268{
269 struct inode *parent;
270
271 if (!dentry->d_parent) {
272 mlog(0, "Detached from parent.\n");
273 return 0;
274 }
275
276 parent = dentry->d_parent->d_inode;
277 /* Negative parent dentry? */
278 if (!parent)
279 return 0;
280
281 /* Name is in a different directory. */
282 if (OCFS2_I(parent)->ip_blkno != parent_blkno)
283 return 0;
284
285 if (dentry->d_name.len != namelen)
286 return 0;
287
288 /* comparison above guarantees this is safe. */
289 if (memcmp(dentry->d_name.name, name, namelen))
290 return 0;
291
292 return 1;
293}
294
295static void ocfs2_process_dentry_request(struct inode *inode,
296 int rename,
297 unsigned int new_nlink,
298 u64 parent_blkno,
299 unsigned int namelen,
300 const char *name)
301{
302 struct dentry *dentry = NULL;
303 struct list_head *p;
304 struct ocfs2_inode_info *oi = OCFS2_I(inode);
305
306 mlog(0, "parent %llu, namelen = %u, name = %.*s\n",
307 (unsigned long long)parent_blkno, namelen, namelen, name);
308
309 spin_lock(&dcache_lock);
310
311 /* Another node is removing this name from the system. It is
312 * up to us to find the corresponding dentry and if it exists,
313 * unhash it from the dcache. */
314 list_for_each(p, &inode->i_dentry) {
315 dentry = list_entry(p, struct dentry, d_alias);
316
317 if (ocfs2_match_dentry(dentry, parent_blkno, namelen, name)) {
318 mlog(0, "dentry found: %.*s\n",
319 dentry->d_name.len, dentry->d_name.name);
320
321 dget_locked(dentry);
322 break;
323 }
324
325 dentry = NULL;
326 }
327
328 spin_unlock(&dcache_lock);
329
330 if (dentry) {
331 d_delete(dentry);
332 dput(dentry);
333 }
334
335 /* rename votes don't send link counts */
336 if (!rename) {
337 mlog(0, "new_nlink = %u\n", new_nlink);
338
339 /* We don't have the proper locks here to directly
340 * change i_nlink and besides, the vote is sent
341 * *before* the operation so it may have failed on the
342 * other node. This passes a hint to ocfs2_drop_inode
343 * to force ocfs2_delete_inode, who will take the
344 * proper cluster locks to sort things out. */
345 if (new_nlink == 0) {
346 spin_lock(&oi->ip_lock);
347 oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
348 spin_unlock(&OCFS2_I(inode)->ip_lock);
349 }
350 }
351}
352
353static void ocfs2_process_vote(struct ocfs2_super *osb, 259static void ocfs2_process_vote(struct ocfs2_super *osb,
354 struct ocfs2_vote_msg *msg) 260 struct ocfs2_vote_msg *msg)
355{ 261{
356 int net_status, vote_response; 262 int net_status, vote_response;
357 int orphaned_slot = 0; 263 int orphaned_slot = 0;
358 int rename = 0; 264 unsigned int node_num, generation;
359 unsigned int node_num, generation, new_nlink, namelen; 265 u64 blkno;
360 u64 blkno, parent_blkno;
361 enum ocfs2_vote_request request; 266 enum ocfs2_vote_request request;
362 struct inode *inode = NULL; 267 struct inode *inode = NULL;
363 struct ocfs2_msg_hdr *hdr = &msg->v_hdr; 268 struct ocfs2_msg_hdr *hdr = &msg->v_hdr;
@@ -437,18 +342,6 @@ static void ocfs2_process_vote(struct ocfs2_super *osb,
437 vote_response = ocfs2_process_delete_request(inode, 342 vote_response = ocfs2_process_delete_request(inode,
438 &orphaned_slot); 343 &orphaned_slot);
439 break; 344 break;
440 case OCFS2_VOTE_REQ_RENAME:
441 rename = 1;
442 /* fall through */
443 case OCFS2_VOTE_REQ_UNLINK:
444 parent_blkno = be64_to_cpu(msg->v_unlink_parent);
445 namelen = be32_to_cpu(msg->v_unlink_namelen);
446 /* new_nlink will be ignored in case of a rename vote */
447 new_nlink = be32_to_cpu(msg->md1.v_nlink);
448 ocfs2_process_dentry_request(inode, rename, new_nlink,
449 parent_blkno, namelen,
450 msg->v_unlink_dirent);
451 break;
452 default: 345 default:
453 mlog(ML_ERROR, "node %u, invalid request: %u\n", 346 mlog(ML_ERROR, "node %u, invalid request: %u\n",
454 node_num, request); 347 node_num, request);
@@ -889,75 +782,6 @@ int ocfs2_request_delete_vote(struct inode *inode)
889 return status; 782 return status;
890} 783}
891 784
892static void ocfs2_setup_unlink_vote(struct ocfs2_vote_msg *request,
893 struct dentry *dentry)
894{
895 struct inode *parent = dentry->d_parent->d_inode;
896
897 /* We need some values which will uniquely identify a dentry
898 * on the other nodes so that they can find it and run
899 * d_delete against it. Parent directory block and full name
900 * should suffice. */
901
902 mlog(0, "unlink/rename request: parent: %llu name: %.*s\n",
903 (unsigned long long)OCFS2_I(parent)->ip_blkno, dentry->d_name.len,
904 dentry->d_name.name);
905
906 request->v_unlink_parent = cpu_to_be64(OCFS2_I(parent)->ip_blkno);
907 request->v_unlink_namelen = cpu_to_be32(dentry->d_name.len);
908 memcpy(request->v_unlink_dirent, dentry->d_name.name,
909 dentry->d_name.len);
910}
911
912int ocfs2_request_unlink_vote(struct inode *inode,
913 struct dentry *dentry,
914 unsigned int nlink)
915{
916 int status;
917 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
918 struct ocfs2_vote_msg *request;
919
920 if (dentry->d_name.len > OCFS2_VOTE_FILENAME_LEN)
921 return -ENAMETOOLONG;
922
923 status = -ENOMEM;
924 request = ocfs2_new_vote_request(osb, OCFS2_I(inode)->ip_blkno,
925 inode->i_generation,
926 OCFS2_VOTE_REQ_UNLINK, nlink);
927 if (request) {
928 ocfs2_setup_unlink_vote(request, dentry);
929
930 status = ocfs2_request_vote(inode, request, NULL);
931
932 kfree(request);
933 }
934 return status;
935}
936
937int ocfs2_request_rename_vote(struct inode *inode,
938 struct dentry *dentry)
939{
940 int status;
941 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
942 struct ocfs2_vote_msg *request;
943
944 if (dentry->d_name.len > OCFS2_VOTE_FILENAME_LEN)
945 return -ENAMETOOLONG;
946
947 status = -ENOMEM;
948 request = ocfs2_new_vote_request(osb, OCFS2_I(inode)->ip_blkno,
949 inode->i_generation,
950 OCFS2_VOTE_REQ_RENAME, 0);
951 if (request) {
952 ocfs2_setup_unlink_vote(request, dentry);
953
954 status = ocfs2_request_vote(inode, request, NULL);
955
956 kfree(request);
957 }
958 return status;
959}
960
961int ocfs2_request_mount_vote(struct ocfs2_super *osb) 785int ocfs2_request_mount_vote(struct ocfs2_super *osb)
962{ 786{
963 int status; 787 int status;
diff --git a/fs/ocfs2/vote.h b/fs/ocfs2/vote.h
index 9cce60703466..53ebc1c69e56 100644
--- a/fs/ocfs2/vote.h
+++ b/fs/ocfs2/vote.h
@@ -39,11 +39,6 @@ static inline void ocfs2_kick_vote_thread(struct ocfs2_super *osb)
39} 39}
40 40
41int ocfs2_request_delete_vote(struct inode *inode); 41int ocfs2_request_delete_vote(struct inode *inode);
42int ocfs2_request_unlink_vote(struct inode *inode,
43 struct dentry *dentry,
44 unsigned int nlink);
45int ocfs2_request_rename_vote(struct inode *inode,
46 struct dentry *dentry);
47int ocfs2_request_mount_vote(struct ocfs2_super *osb); 42int ocfs2_request_mount_vote(struct ocfs2_super *osb);
48int ocfs2_request_umount_vote(struct ocfs2_super *osb); 43int ocfs2_request_umount_vote(struct ocfs2_super *osb);
49int ocfs2_register_net_handlers(struct ocfs2_super *osb); 44int ocfs2_register_net_handlers(struct ocfs2_super *osb);
diff --git a/fs/open.c b/fs/open.c
index 303f06d2a7b9..304c1c7814cb 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -546,7 +546,8 @@ asmlinkage long sys_chdir(const char __user * filename)
546 struct nameidata nd; 546 struct nameidata nd;
547 int error; 547 int error;
548 548
549 error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd); 549 error = __user_walk(filename,
550 LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_CHDIR, &nd);
550 if (error) 551 if (error)
551 goto out; 552 goto out;
552 553
@@ -1172,6 +1173,7 @@ asmlinkage long sys_close(unsigned int fd)
1172 struct file * filp; 1173 struct file * filp;
1173 struct files_struct *files = current->files; 1174 struct files_struct *files = current->files;
1174 struct fdtable *fdt; 1175 struct fdtable *fdt;
1176 int retval;
1175 1177
1176 spin_lock(&files->file_lock); 1178 spin_lock(&files->file_lock);
1177 fdt = files_fdtable(files); 1179 fdt = files_fdtable(files);
@@ -1184,7 +1186,16 @@ asmlinkage long sys_close(unsigned int fd)
1184 FD_CLR(fd, fdt->close_on_exec); 1186 FD_CLR(fd, fdt->close_on_exec);
1185 __put_unused_fd(files, fd); 1187 __put_unused_fd(files, fd);
1186 spin_unlock(&files->file_lock); 1188 spin_unlock(&files->file_lock);
1187 return filp_close(filp, files); 1189 retval = filp_close(filp, files);
1190
1191 /* can't restart close syscall because file table entry was cleared */
1192 if (unlikely(retval == -ERESTARTSYS ||
1193 retval == -ERESTARTNOINTR ||
1194 retval == -ERESTARTNOHAND ||
1195 retval == -ERESTART_RESTARTBLOCK))
1196 retval = -EINTR;
1197
1198 return retval;
1188 1199
1189out_unlock: 1200out_unlock:
1190 spin_unlock(&files->file_lock); 1201 spin_unlock(&files->file_lock);
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 93a56bd4a2b7..592a6402e851 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -8,10 +8,10 @@
8#include <linux/types.h> 8#include <linux/types.h>
9#include <linux/string.h> 9#include <linux/string.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/openprom_fs.h>
12#include <linux/init.h> 11#include <linux/init.h>
13#include <linux/slab.h> 12#include <linux/slab.h>
14#include <linux/seq_file.h> 13#include <linux/seq_file.h>
14#include <linux/magic.h>
15 15
16#include <asm/openprom.h> 16#include <asm/openprom.h>
17#include <asm/oplib.h> 17#include <asm/oplib.h>
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c
index 63730282ad81..1bea610078b3 100644
--- a/fs/partitions/efi.c
+++ b/fs/partitions/efi.c
@@ -238,10 +238,9 @@ alloc_read_gpt_entries(struct block_device *bdev, gpt_header *gpt)
238 le32_to_cpu(gpt->sizeof_partition_entry); 238 le32_to_cpu(gpt->sizeof_partition_entry);
239 if (!count) 239 if (!count)
240 return NULL; 240 return NULL;
241 pte = kmalloc(count, GFP_KERNEL); 241 pte = kzalloc(count, GFP_KERNEL);
242 if (!pte) 242 if (!pte)
243 return NULL; 243 return NULL;
244 memset(pte, 0, count);
245 244
246 if (read_lba(bdev, le64_to_cpu(gpt->partition_entry_lba), 245 if (read_lba(bdev, le64_to_cpu(gpt->partition_entry_lba),
247 (u8 *) pte, 246 (u8 *) pte,
@@ -269,10 +268,9 @@ alloc_read_gpt_header(struct block_device *bdev, u64 lba)
269 if (!bdev) 268 if (!bdev)
270 return NULL; 269 return NULL;
271 270
272 gpt = kmalloc(sizeof (gpt_header), GFP_KERNEL); 271 gpt = kzalloc(sizeof (gpt_header), GFP_KERNEL);
273 if (!gpt) 272 if (!gpt)
274 return NULL; 273 return NULL;
275 memset(gpt, 0, sizeof (gpt_header));
276 274
277 if (read_lba(bdev, lba, (u8 *) gpt, 275 if (read_lba(bdev, lba, (u8 *) gpt,
278 sizeof (gpt_header)) < sizeof (gpt_header)) { 276 sizeof (gpt_header)) < sizeof (gpt_header)) {
@@ -526,9 +524,8 @@ find_valid_gpt(struct block_device *bdev, gpt_header **gpt, gpt_entry **ptes)
526 lastlba = last_lba(bdev); 524 lastlba = last_lba(bdev);
527 if (!force_gpt) { 525 if (!force_gpt) {
528 /* This will be added to the EFI Spec. per Intel after v1.02. */ 526 /* This will be added to the EFI Spec. per Intel after v1.02. */
529 legacymbr = kmalloc(sizeof (*legacymbr), GFP_KERNEL); 527 legacymbr = kzalloc(sizeof (*legacymbr), GFP_KERNEL);
530 if (legacymbr) { 528 if (legacymbr) {
531 memset(legacymbr, 0, sizeof (*legacymbr));
532 read_lba(bdev, 0, (u8 *) legacymbr, 529 read_lba(bdev, 0, (u8 *) legacymbr,
533 sizeof (*legacymbr)); 530 sizeof (*legacymbr));
534 good_pmbr = is_pmbr_valid(legacymbr, lastlba); 531 good_pmbr = is_pmbr_valid(legacymbr, lastlba);
diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c
index 8f12587c3129..4f8df71e49d3 100644
--- a/fs/partitions/msdos.c
+++ b/fs/partitions/msdos.c
@@ -58,6 +58,31 @@ msdos_magic_present(unsigned char *p)
58 return (p[0] == MSDOS_LABEL_MAGIC1 && p[1] == MSDOS_LABEL_MAGIC2); 58 return (p[0] == MSDOS_LABEL_MAGIC1 && p[1] == MSDOS_LABEL_MAGIC2);
59} 59}
60 60
61/* Value is EBCDIC 'IBMA' */
62#define AIX_LABEL_MAGIC1 0xC9
63#define AIX_LABEL_MAGIC2 0xC2
64#define AIX_LABEL_MAGIC3 0xD4
65#define AIX_LABEL_MAGIC4 0xC1
66static int aix_magic_present(unsigned char *p, struct block_device *bdev)
67{
68 Sector sect;
69 unsigned char *d;
70 int ret = 0;
71
72 if (p[0] != AIX_LABEL_MAGIC1 &&
73 p[1] != AIX_LABEL_MAGIC2 &&
74 p[2] != AIX_LABEL_MAGIC3 &&
75 p[3] != AIX_LABEL_MAGIC4)
76 return 0;
77 d = read_dev_sector(bdev, 7, &sect);
78 if (d) {
79 if (d[0] == '_' && d[1] == 'L' && d[2] == 'V' && d[3] == 'M')
80 ret = 1;
81 put_dev_sector(sect);
82 };
83 return ret;
84}
85
61/* 86/*
62 * Create devices for each logical partition in an extended partition. 87 * Create devices for each logical partition in an extended partition.
63 * The logical partitions form a linked list, with each entry being 88 * The logical partitions form a linked list, with each entry being
@@ -393,6 +418,12 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev)
393 return 0; 418 return 0;
394 } 419 }
395 420
421 if (aix_magic_present(data, bdev)) {
422 put_dev_sector(sect);
423 printk( " [AIX]");
424 return 0;
425 }
426
396 /* 427 /*
397 * Now that the 55aa signature is present, this is probably 428 * Now that the 55aa signature is present, this is probably
398 * either the boot sector of a FAT filesystem or a DOS-type 429 * either the boot sector of a FAT filesystem or a DOS-type
diff --git a/fs/pipe.c b/fs/pipe.c
index 20352573e025..f3b6f71e9d0b 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -879,7 +879,6 @@ static struct inode * get_pipe_inode(void)
879 inode->i_uid = current->fsuid; 879 inode->i_uid = current->fsuid;
880 inode->i_gid = current->fsgid; 880 inode->i_gid = current->fsgid;
881 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 881 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
882 inode->i_blksize = PAGE_SIZE;
883 882
884 return inode; 883 return inode;
885 884
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 0b615d62a159..c0e554971df0 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -347,6 +347,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
347 sigemptyset(&sigign); 347 sigemptyset(&sigign);
348 sigemptyset(&sigcatch); 348 sigemptyset(&sigcatch);
349 cutime = cstime = utime = stime = cputime_zero; 349 cutime = cstime = utime = stime = cputime_zero;
350
351 mutex_lock(&tty_mutex);
350 read_lock(&tasklist_lock); 352 read_lock(&tasklist_lock);
351 if (task->sighand) { 353 if (task->sighand) {
352 spin_lock_irq(&task->sighand->siglock); 354 spin_lock_irq(&task->sighand->siglock);
@@ -388,6 +390,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
388 } 390 }
389 ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0; 391 ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0;
390 read_unlock(&tasklist_lock); 392 read_unlock(&tasklist_lock);
393 mutex_unlock(&tty_mutex);
391 394
392 if (!whole || num_threads<2) 395 if (!whole || num_threads<2)
393 wchan = get_wchan(task); 396 wchan = get_wchan(task);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index fe8d55fb17cc..89c20d9d50bf 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -797,7 +797,7 @@ out_no_task:
797static ssize_t mem_write(struct file * file, const char * buf, 797static ssize_t mem_write(struct file * file, const char * buf,
798 size_t count, loff_t *ppos) 798 size_t count, loff_t *ppos)
799{ 799{
800 int copied = 0; 800 int copied;
801 char *page; 801 char *page;
802 struct task_struct *task = get_proc_task(file->f_dentry->d_inode); 802 struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
803 unsigned long dst = *ppos; 803 unsigned long dst = *ppos;
@@ -814,6 +814,7 @@ static ssize_t mem_write(struct file * file, const char * buf,
814 if (!page) 814 if (!page)
815 goto out; 815 goto out;
816 816
817 copied = 0;
817 while (count > 0) { 818 while (count > 0) {
818 int this_len, retval; 819 int this_len, retval;
819 820
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 146a434ba944..987c773dbb20 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -28,6 +28,7 @@ do { \
28 (vmi)->largest_chunk = 0; \ 28 (vmi)->largest_chunk = 0; \
29} while(0) 29} while(0)
30 30
31extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *);
31#endif 32#endif
32 33
33extern void create_seq_entry(char *name, mode_t mode, const struct file_operations *f); 34extern void create_seq_entry(char *name, mode_t mode, const struct file_operations *f);
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 6a984f64edd7..1294eda4acae 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -100,7 +100,7 @@ static int notesize(struct memelfnote *en)
100 int sz; 100 int sz;
101 101
102 sz = sizeof(struct elf_note); 102 sz = sizeof(struct elf_note);
103 sz += roundup(strlen(en->name), 4); 103 sz += roundup((strlen(en->name) + 1), 4);
104 sz += roundup(en->datasz, 4); 104 sz += roundup(en->datasz, 4);
105 105
106 return sz; 106 return sz;
@@ -116,7 +116,7 @@ static char *storenote(struct memelfnote *men, char *bufp)
116 116
117#define DUMP_WRITE(addr,nr) do { memcpy(bufp,addr,nr); bufp += nr; } while(0) 117#define DUMP_WRITE(addr,nr) do { memcpy(bufp,addr,nr); bufp += nr; } while(0)
118 118
119 en.n_namesz = strlen(men->name); 119 en.n_namesz = strlen(men->name) + 1;
120 en.n_descsz = men->datasz; 120 en.n_descsz = men->datasz;
121 en.n_type = men->type; 121 en.n_type = men->type;
122 122
@@ -279,12 +279,11 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
279 tsz = elf_buflen - *fpos; 279 tsz = elf_buflen - *fpos;
280 if (buflen < tsz) 280 if (buflen < tsz)
281 tsz = buflen; 281 tsz = buflen;
282 elf_buf = kmalloc(elf_buflen, GFP_ATOMIC); 282 elf_buf = kzalloc(elf_buflen, GFP_ATOMIC);
283 if (!elf_buf) { 283 if (!elf_buf) {
284 read_unlock(&kclist_lock); 284 read_unlock(&kclist_lock);
285 return -ENOMEM; 285 return -ENOMEM;
286 } 286 }
287 memset(elf_buf, 0, elf_buflen);
288 elf_kcore_store_hdr(elf_buf, nphdr, elf_buflen); 287 elf_kcore_store_hdr(elf_buf, nphdr, elf_buflen);
289 read_unlock(&kclist_lock); 288 read_unlock(&kclist_lock);
290 if (copy_to_user(buffer, elf_buf + *fpos, tsz)) { 289 if (copy_to_user(buffer, elf_buf + *fpos, tsz)) {
@@ -330,10 +329,9 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
330 unsigned long curstart = start; 329 unsigned long curstart = start;
331 unsigned long cursize = tsz; 330 unsigned long cursize = tsz;
332 331
333 elf_buf = kmalloc(tsz, GFP_KERNEL); 332 elf_buf = kzalloc(tsz, GFP_KERNEL);
334 if (!elf_buf) 333 if (!elf_buf)
335 return -ENOMEM; 334 return -ENOMEM;
336 memset(elf_buf, 0, tsz);
337 335
338 read_lock(&vmlist_lock); 336 read_lock(&vmlist_lock);
339 for (m=vmlist; m && cursize; m=m->next) { 337 for (m=vmlist; m && cursize; m=m->next) {
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index cff10ab1af63..d7dbdf9e0f49 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -33,19 +33,15 @@
33#include "internal.h" 33#include "internal.h"
34 34
35/* 35/*
36 * display a list of all the VMAs the kernel knows about 36 * display a single VMA to a sequenced file
37 * - nommu kernals have a single flat list
38 */ 37 */
39static int nommu_vma_list_show(struct seq_file *m, void *v) 38int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
40{ 39{
41 struct vm_area_struct *vma;
42 unsigned long ino = 0; 40 unsigned long ino = 0;
43 struct file *file; 41 struct file *file;
44 dev_t dev = 0; 42 dev_t dev = 0;
45 int flags, len; 43 int flags, len;
46 44
47 vma = rb_entry((struct rb_node *) v, struct vm_area_struct, vm_rb);
48
49 flags = vma->vm_flags; 45 flags = vma->vm_flags;
50 file = vma->vm_file; 46 file = vma->vm_file;
51 47
@@ -78,6 +74,18 @@ static int nommu_vma_list_show(struct seq_file *m, void *v)
78 return 0; 74 return 0;
79} 75}
80 76
77/*
78 * display a list of all the VMAs the kernel knows about
79 * - nommu kernals have a single flat list
80 */
81static int nommu_vma_list_show(struct seq_file *m, void *v)
82{
83 struct vm_area_struct *vma;
84
85 vma = rb_entry((struct rb_node *) v, struct vm_area_struct, vm_rb);
86 return nommu_vma_show(m, vma);
87}
88
81static void *nommu_vma_list_start(struct seq_file *m, loff_t *_pos) 89static void *nommu_vma_list_start(struct seq_file *m, loff_t *_pos)
82{ 90{
83 struct rb_node *_rb; 91 struct rb_node *_rb;
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 942156225447..5bbd60896050 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -157,10 +157,12 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
157 "SwapCached: %8lu kB\n" 157 "SwapCached: %8lu kB\n"
158 "Active: %8lu kB\n" 158 "Active: %8lu kB\n"
159 "Inactive: %8lu kB\n" 159 "Inactive: %8lu kB\n"
160#ifdef CONFIG_HIGHMEM
160 "HighTotal: %8lu kB\n" 161 "HighTotal: %8lu kB\n"
161 "HighFree: %8lu kB\n" 162 "HighFree: %8lu kB\n"
162 "LowTotal: %8lu kB\n" 163 "LowTotal: %8lu kB\n"
163 "LowFree: %8lu kB\n" 164 "LowFree: %8lu kB\n"
165#endif
164 "SwapTotal: %8lu kB\n" 166 "SwapTotal: %8lu kB\n"
165 "SwapFree: %8lu kB\n" 167 "SwapFree: %8lu kB\n"
166 "Dirty: %8lu kB\n" 168 "Dirty: %8lu kB\n"
@@ -168,6 +170,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
168 "AnonPages: %8lu kB\n" 170 "AnonPages: %8lu kB\n"
169 "Mapped: %8lu kB\n" 171 "Mapped: %8lu kB\n"
170 "Slab: %8lu kB\n" 172 "Slab: %8lu kB\n"
173 "SReclaimable: %8lu kB\n"
174 "SUnreclaim: %8lu kB\n"
171 "PageTables: %8lu kB\n" 175 "PageTables: %8lu kB\n"
172 "NFS_Unstable: %8lu kB\n" 176 "NFS_Unstable: %8lu kB\n"
173 "Bounce: %8lu kB\n" 177 "Bounce: %8lu kB\n"
@@ -183,17 +187,22 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
183 K(total_swapcache_pages), 187 K(total_swapcache_pages),
184 K(active), 188 K(active),
185 K(inactive), 189 K(inactive),
190#ifdef CONFIG_HIGHMEM
186 K(i.totalhigh), 191 K(i.totalhigh),
187 K(i.freehigh), 192 K(i.freehigh),
188 K(i.totalram-i.totalhigh), 193 K(i.totalram-i.totalhigh),
189 K(i.freeram-i.freehigh), 194 K(i.freeram-i.freehigh),
195#endif
190 K(i.totalswap), 196 K(i.totalswap),
191 K(i.freeswap), 197 K(i.freeswap),
192 K(global_page_state(NR_FILE_DIRTY)), 198 K(global_page_state(NR_FILE_DIRTY)),
193 K(global_page_state(NR_WRITEBACK)), 199 K(global_page_state(NR_WRITEBACK)),
194 K(global_page_state(NR_ANON_PAGES)), 200 K(global_page_state(NR_ANON_PAGES)),
195 K(global_page_state(NR_FILE_MAPPED)), 201 K(global_page_state(NR_FILE_MAPPED)),
196 K(global_page_state(NR_SLAB)), 202 K(global_page_state(NR_SLAB_RECLAIMABLE) +
203 global_page_state(NR_SLAB_UNRECLAIMABLE)),
204 K(global_page_state(NR_SLAB_RECLAIMABLE)),
205 K(global_page_state(NR_SLAB_UNRECLAIMABLE)),
197 K(global_page_state(NR_PAGETABLE)), 206 K(global_page_state(NR_PAGETABLE)),
198 K(global_page_state(NR_UNSTABLE_NFS)), 207 K(global_page_state(NR_UNSTABLE_NFS)),
199 K(global_page_state(NR_BOUNCE)), 208 K(global_page_state(NR_BOUNCE)),
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 0a163a4f7764..6b769afac55a 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -122,11 +122,6 @@ struct mem_size_stats
122 unsigned long private_dirty; 122 unsigned long private_dirty;
123}; 123};
124 124
125__attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma)
126{
127 return NULL;
128}
129
130static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss) 125static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss)
131{ 126{
132 struct proc_maps_private *priv = m->private; 127 struct proc_maps_private *priv = m->private;
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 4616ed50ffcd..091aa8e48e02 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -138,25 +138,63 @@ out:
138} 138}
139 139
140/* 140/*
141 * Albert D. Cahalan suggested to fake entries for the traditional 141 * display mapping lines for a particular process's /proc/pid/maps
142 * sections here. This might be worth investigating.
143 */ 142 */
144static int show_map(struct seq_file *m, void *v) 143static int show_map(struct seq_file *m, void *_vml)
145{ 144{
146 return 0; 145 struct vm_list_struct *vml = _vml;
146 return nommu_vma_show(m, vml->vma);
147} 147}
148
148static void *m_start(struct seq_file *m, loff_t *pos) 149static void *m_start(struct seq_file *m, loff_t *pos)
149{ 150{
151 struct proc_maps_private *priv = m->private;
152 struct vm_list_struct *vml;
153 struct mm_struct *mm;
154 loff_t n = *pos;
155
156 /* pin the task and mm whilst we play with them */
157 priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
158 if (!priv->task)
159 return NULL;
160
161 mm = get_task_mm(priv->task);
162 if (!mm) {
163 put_task_struct(priv->task);
164 priv->task = NULL;
165 return NULL;
166 }
167
168 down_read(&mm->mmap_sem);
169
170 /* start from the Nth VMA */
171 for (vml = mm->context.vmlist; vml; vml = vml->next)
172 if (n-- == 0)
173 return vml;
150 return NULL; 174 return NULL;
151} 175}
152static void m_stop(struct seq_file *m, void *v) 176
177static void m_stop(struct seq_file *m, void *_vml)
153{ 178{
179 struct proc_maps_private *priv = m->private;
180
181 if (priv->task) {
182 struct mm_struct *mm = priv->task->mm;
183 up_read(&mm->mmap_sem);
184 mmput(mm);
185 put_task_struct(priv->task);
186 }
154} 187}
155static void *m_next(struct seq_file *m, void *v, loff_t *pos) 188
189static void *m_next(struct seq_file *m, void *_vml, loff_t *pos)
156{ 190{
157 return NULL; 191 struct vm_list_struct *vml = _vml;
192
193 (*pos)++;
194 return vml ? vml->next : NULL;
158} 195}
159static struct seq_operations proc_pid_maps_op = { 196
197static struct seq_operations proc_pid_maps_ops = {
160 .start = m_start, 198 .start = m_start,
161 .next = m_next, 199 .next = m_next,
162 .stop = m_stop, 200 .stop = m_stop,
@@ -165,11 +203,19 @@ static struct seq_operations proc_pid_maps_op = {
165 203
166static int maps_open(struct inode *inode, struct file *file) 204static int maps_open(struct inode *inode, struct file *file)
167{ 205{
168 int ret; 206 struct proc_maps_private *priv;
169 ret = seq_open(file, &proc_pid_maps_op); 207 int ret = -ENOMEM;
170 if (!ret) { 208
171 struct seq_file *m = file->private_data; 209 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
172 m->private = NULL; 210 if (priv) {
211 priv->pid = proc_pid(inode);
212 ret = seq_open(file, &proc_pid_maps_ops);
213 if (!ret) {
214 struct seq_file *m = file->private_data;
215 m->private = priv;
216 } else {
217 kfree(priv);
218 }
173 } 219 }
174 return ret; 220 return ret;
175} 221}
@@ -178,6 +224,6 @@ struct file_operations proc_maps_operations = {
178 .open = maps_open, 224 .open = maps_open,
179 .read = seq_read, 225 .read = seq_read,
180 .llseek = seq_lseek, 226 .llseek = seq_lseek,
181 .release = seq_release, 227 .release = seq_release_private,
182}; 228};
183 229
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 5a903491e697..5a41db2a218d 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -358,11 +358,10 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent)
358 const char *errmsg; 358 const char *errmsg;
359 struct qnx4_sb_info *qs; 359 struct qnx4_sb_info *qs;
360 360
361 qs = kmalloc(sizeof(struct qnx4_sb_info), GFP_KERNEL); 361 qs = kzalloc(sizeof(struct qnx4_sb_info), GFP_KERNEL);
362 if (!qs) 362 if (!qs)
363 return -ENOMEM; 363 return -ENOMEM;
364 s->s_fs_info = qs; 364 s->s_fs_info = qs;
365 memset(qs, 0, sizeof(struct qnx4_sb_info));
366 365
367 sb_set_blocksize(s, QNX4_BLOCK_SIZE); 366 sb_set_blocksize(s, QNX4_BLOCK_SIZE);
368 367
@@ -497,7 +496,6 @@ static void qnx4_read_inode(struct inode *inode)
497 inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->di_ctime); 496 inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->di_ctime);
498 inode->i_ctime.tv_nsec = 0; 497 inode->i_ctime.tv_nsec = 0;
499 inode->i_blocks = le32_to_cpu(raw_inode->di_first_xtnt.xtnt_size); 498 inode->i_blocks = le32_to_cpu(raw_inode->di_first_xtnt.xtnt_size);
500 inode->i_blksize = QNX4_DIR_ENTRY_SIZE;
501 499
502 memcpy(qnx4_inode, raw_inode, QNX4_DIR_ENTRY_SIZE); 500 memcpy(qnx4_inode, raw_inode, QNX4_DIR_ENTRY_SIZE);
503 if (S_ISREG(inode->i_mode)) { 501 if (S_ISREG(inode->i_mode)) {
@@ -557,9 +555,7 @@ static int init_inodecache(void)
557 555
558static void destroy_inodecache(void) 556static void destroy_inodecache(void)
559{ 557{
560 if (kmem_cache_destroy(qnx4_inode_cachep)) 558 kmem_cache_destroy(qnx4_inode_cachep);
561 printk(KERN_INFO
562 "qnx4_inode_cache: not all structures were freed\n");
563} 559}
564 560
565static int qnx4_get_sb(struct file_system_type *fs_type, 561static int qnx4_get_sb(struct file_system_type *fs_type,
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index b9677335cc8d..bc0e51662424 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -58,7 +58,6 @@ struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev)
58 inode->i_mode = mode; 58 inode->i_mode = mode;
59 inode->i_uid = current->fsuid; 59 inode->i_uid = current->fsuid;
60 inode->i_gid = current->fsgid; 60 inode->i_gid = current->fsgid;
61 inode->i_blksize = PAGE_CACHE_SIZE;
62 inode->i_blocks = 0; 61 inode->i_blocks = 0;
63 inode->i_mapping->a_ops = &ramfs_aops; 62 inode->i_mapping->a_ops = &ramfs_aops;
64 inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; 63 inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
diff --git a/fs/reiserfs/Makefile b/fs/reiserfs/Makefile
index 3a59309f3ca9..0eb7ac080484 100644
--- a/fs/reiserfs/Makefile
+++ b/fs/reiserfs/Makefile
@@ -28,7 +28,7 @@ endif
28# will work around it. If any other architecture displays this behavior, 28# will work around it. If any other architecture displays this behavior,
29# add it here. 29# add it here.
30ifeq ($(CONFIG_PPC32),y) 30ifeq ($(CONFIG_PPC32),y)
31EXTRA_CFLAGS := -O1 31EXTRA_CFLAGS := $(call cc-ifversion, -lt, 0400, -O1)
32endif 32endif
33 33
34TAGS: 34TAGS:
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 1627edd50810..1cfbe857ba27 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -130,7 +130,7 @@ static int reiserfs_sync_file(struct file *p_s_filp,
130 reiserfs_write_lock(p_s_inode->i_sb); 130 reiserfs_write_lock(p_s_inode->i_sb);
131 barrier_done = reiserfs_commit_for_inode(p_s_inode); 131 barrier_done = reiserfs_commit_for_inode(p_s_inode);
132 reiserfs_write_unlock(p_s_inode->i_sb); 132 reiserfs_write_unlock(p_s_inode->i_sb);
133 if (barrier_done != 1) 133 if (barrier_done != 1 && reiserfs_barrier_flush(p_s_inode->i_sb))
134 blkdev_issue_flush(p_s_inode->i_sb->s_bdev, NULL); 134 blkdev_issue_flush(p_s_inode->i_sb->s_bdev, NULL);
135 if (barrier_done < 0) 135 if (barrier_done < 0)
136 return barrier_done; 136 return barrier_done;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 52f1e2136546..7e5a2f5ebeb0 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -17,8 +17,6 @@
17#include <linux/writeback.h> 17#include <linux/writeback.h>
18#include <linux/quotaops.h> 18#include <linux/quotaops.h>
19 19
20extern int reiserfs_default_io_size; /* default io size devuned in super.c */
21
22static int reiserfs_commit_write(struct file *f, struct page *page, 20static int reiserfs_commit_write(struct file *f, struct page *page,
23 unsigned from, unsigned to); 21 unsigned from, unsigned to);
24static int reiserfs_prepare_write(struct file *f, struct page *page, 22static int reiserfs_prepare_write(struct file *f, struct page *page,
@@ -1122,7 +1120,6 @@ static void init_inode(struct inode *inode, struct path *path)
1122 ih = PATH_PITEM_HEAD(path); 1120 ih = PATH_PITEM_HEAD(path);
1123 1121
1124 copy_key(INODE_PKEY(inode), &(ih->ih_key)); 1122 copy_key(INODE_PKEY(inode), &(ih->ih_key));
1125 inode->i_blksize = reiserfs_default_io_size;
1126 1123
1127 INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list)); 1124 INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list));
1128 REISERFS_I(inode)->i_flags = 0; 1125 REISERFS_I(inode)->i_flags = 0;
@@ -1130,9 +1127,9 @@ static void init_inode(struct inode *inode, struct path *path)
1130 REISERFS_I(inode)->i_prealloc_count = 0; 1127 REISERFS_I(inode)->i_prealloc_count = 0;
1131 REISERFS_I(inode)->i_trans_id = 0; 1128 REISERFS_I(inode)->i_trans_id = 0;
1132 REISERFS_I(inode)->i_jl = NULL; 1129 REISERFS_I(inode)->i_jl = NULL;
1133 REISERFS_I(inode)->i_acl_access = NULL; 1130 reiserfs_init_acl_access(inode);
1134 REISERFS_I(inode)->i_acl_default = NULL; 1131 reiserfs_init_acl_default(inode);
1135 init_rwsem(&REISERFS_I(inode)->xattr_sem); 1132 reiserfs_init_xattr_rwsem(inode);
1136 1133
1137 if (stat_data_v1(ih)) { 1134 if (stat_data_v1(ih)) {
1138 struct stat_data_v1 *sd = 1135 struct stat_data_v1 *sd =
@@ -1837,9 +1834,9 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1837 REISERFS_I(inode)->i_attrs = 1834 REISERFS_I(inode)->i_attrs =
1838 REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK; 1835 REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK;
1839 sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode); 1836 sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode);
1840 REISERFS_I(inode)->i_acl_access = NULL; 1837 reiserfs_init_acl_access(inode);
1841 REISERFS_I(inode)->i_acl_default = NULL; 1838 reiserfs_init_acl_default(inode);
1842 init_rwsem(&REISERFS_I(inode)->xattr_sem); 1839 reiserfs_init_xattr_rwsem(inode);
1843 1840
1844 if (old_format_only(sb)) 1841 if (old_format_only(sb))
1845 make_le_item_head(&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET, 1842 make_le_item_head(&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET,
@@ -1877,7 +1874,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1877 } 1874 }
1878 // these do not go to on-disk stat data 1875 // these do not go to on-disk stat data
1879 inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid); 1876 inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid);
1880 inode->i_blksize = reiserfs_default_io_size;
1881 1877
1882 // store in in-core inode the key of stat data and version all 1878 // store in in-core inode the key of stat data and version all
1883 // object items will have (directory items will have old offset 1879 // object items will have (directory items will have old offset
@@ -1978,11 +1974,13 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1978 * iput doesn't deadlock in reiserfs_delete_xattrs. The locking 1974 * iput doesn't deadlock in reiserfs_delete_xattrs. The locking
1979 * code really needs to be reworked, but this will take care of it 1975 * code really needs to be reworked, but this will take care of it
1980 * for now. -jeffm */ 1976 * for now. -jeffm */
1977#ifdef CONFIG_REISERFS_FS_POSIX_ACL
1981 if (REISERFS_I(dir)->i_acl_default && !IS_ERR(REISERFS_I(dir)->i_acl_default)) { 1978 if (REISERFS_I(dir)->i_acl_default && !IS_ERR(REISERFS_I(dir)->i_acl_default)) {
1982 reiserfs_write_unlock_xattrs(dir->i_sb); 1979 reiserfs_write_unlock_xattrs(dir->i_sb);
1983 iput(inode); 1980 iput(inode);
1984 reiserfs_write_lock_xattrs(dir->i_sb); 1981 reiserfs_write_lock_xattrs(dir->i_sb);
1985 } else 1982 } else
1983#endif
1986 iput(inode); 1984 iput(inode);
1987 return err; 1985 return err;
1988} 1986}
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 9b3672d69367..e6b5ccf23f15 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1186,6 +1186,21 @@ static struct reiserfs_journal_list *find_newer_jl_for_cn(struct
1186 return NULL; 1186 return NULL;
1187} 1187}
1188 1188
1189static int newer_jl_done(struct reiserfs_journal_cnode *cn)
1190{
1191 struct super_block *sb = cn->sb;
1192 b_blocknr_t blocknr = cn->blocknr;
1193
1194 cn = cn->hprev;
1195 while (cn) {
1196 if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist &&
1197 atomic_read(&cn->jlist->j_commit_left) != 0)
1198 return 0;
1199 cn = cn->hprev;
1200 }
1201 return 1;
1202}
1203
1189static void remove_journal_hash(struct super_block *, 1204static void remove_journal_hash(struct super_block *,
1190 struct reiserfs_journal_cnode **, 1205 struct reiserfs_journal_cnode **,
1191 struct reiserfs_journal_list *, unsigned long, 1206 struct reiserfs_journal_list *, unsigned long,
@@ -1604,6 +1619,31 @@ static int flush_journal_list(struct super_block *s,
1604 return err; 1619 return err;
1605} 1620}
1606 1621
1622static int test_transaction(struct super_block *s,
1623 struct reiserfs_journal_list *jl)
1624{
1625 struct reiserfs_journal_cnode *cn;
1626
1627 if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0)
1628 return 1;
1629
1630 cn = jl->j_realblock;
1631 while (cn) {
1632 /* if the blocknr == 0, this has been cleared from the hash,
1633 ** skip it
1634 */
1635 if (cn->blocknr == 0) {
1636 goto next;
1637 }
1638 if (cn->bh && !newer_jl_done(cn))
1639 return 0;
1640 next:
1641 cn = cn->next;
1642 cond_resched();
1643 }
1644 return 0;
1645}
1646
1607static int write_one_transaction(struct super_block *s, 1647static int write_one_transaction(struct super_block *s,
1608 struct reiserfs_journal_list *jl, 1648 struct reiserfs_journal_list *jl,
1609 struct buffer_chunk *chunk) 1649 struct buffer_chunk *chunk)
@@ -3433,16 +3473,6 @@ static void flush_async_commits(void *p)
3433 flush_commit_list(p_s_sb, jl, 1); 3473 flush_commit_list(p_s_sb, jl, 1);
3434 } 3474 }
3435 unlock_kernel(); 3475 unlock_kernel();
3436 /*
3437 * this is a little racey, but there's no harm in missing
3438 * the filemap_fdata_write
3439 */
3440 if (!atomic_read(&journal->j_async_throttle)
3441 && !reiserfs_is_journal_aborted(journal)) {
3442 atomic_inc(&journal->j_async_throttle);
3443 filemap_fdatawrite(p_s_sb->s_bdev->bd_inode->i_mapping);
3444 atomic_dec(&journal->j_async_throttle);
3445 }
3446} 3476}
3447 3477
3448/* 3478/*
@@ -3844,7 +3874,9 @@ static void flush_old_journal_lists(struct super_block *s)
3844 entry = journal->j_journal_list.next; 3874 entry = journal->j_journal_list.next;
3845 jl = JOURNAL_LIST_ENTRY(entry); 3875 jl = JOURNAL_LIST_ENTRY(entry);
3846 /* this check should always be run, to send old lists to disk */ 3876 /* this check should always be run, to send old lists to disk */
3847 if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4))) { 3877 if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4)) &&
3878 atomic_read(&jl->j_commit_left) == 0 &&
3879 test_transaction(s, jl)) {
3848 flush_used_journal_lists(s, jl); 3880 flush_used_journal_lists(s, jl);
3849 } else { 3881 } else {
3850 break; 3882 break;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 5567328f1041..80fc3b32802f 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -510,8 +510,10 @@ static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
510 SLAB_CTOR_CONSTRUCTOR) { 510 SLAB_CTOR_CONSTRUCTOR) {
511 INIT_LIST_HEAD(&ei->i_prealloc_list); 511 INIT_LIST_HEAD(&ei->i_prealloc_list);
512 inode_init_once(&ei->vfs_inode); 512 inode_init_once(&ei->vfs_inode);
513#ifdef CONFIG_REISERFS_FS_POSIX_ACL
513 ei->i_acl_access = NULL; 514 ei->i_acl_access = NULL;
514 ei->i_acl_default = NULL; 515 ei->i_acl_default = NULL;
516#endif
515 } 517 }
516} 518}
517 519
@@ -530,9 +532,7 @@ static int init_inodecache(void)
530 532
531static void destroy_inodecache(void) 533static void destroy_inodecache(void)
532{ 534{
533 if (kmem_cache_destroy(reiserfs_inode_cachep)) 535 kmem_cache_destroy(reiserfs_inode_cachep);
534 reiserfs_warning(NULL,
535 "reiserfs_inode_cache: not all structures were freed");
536} 536}
537 537
538/* we don't mark inodes dirty, we just log them */ 538/* we don't mark inodes dirty, we just log them */
@@ -562,6 +562,7 @@ static void reiserfs_dirty_inode(struct inode *inode)
562 reiserfs_write_unlock(inode->i_sb); 562 reiserfs_write_unlock(inode->i_sb);
563} 563}
564 564
565#ifdef CONFIG_REISERFS_FS_POSIX_ACL
565static void reiserfs_clear_inode(struct inode *inode) 566static void reiserfs_clear_inode(struct inode *inode)
566{ 567{
567 struct posix_acl *acl; 568 struct posix_acl *acl;
@@ -576,6 +577,9 @@ static void reiserfs_clear_inode(struct inode *inode)
576 posix_acl_release(acl); 577 posix_acl_release(acl);
577 REISERFS_I(inode)->i_acl_default = NULL; 578 REISERFS_I(inode)->i_acl_default = NULL;
578} 579}
580#else
581#define reiserfs_clear_inode NULL
582#endif
579 583
580#ifdef CONFIG_QUOTA 584#ifdef CONFIG_QUOTA
581static ssize_t reiserfs_quota_write(struct super_block *, int, const char *, 585static ssize_t reiserfs_quota_write(struct super_block *, int, const char *,
@@ -725,12 +729,6 @@ static const arg_desc_t error_actions[] = {
725 {NULL, 0, 0}, 729 {NULL, 0, 0},
726}; 730};
727 731
728int reiserfs_default_io_size = 128 * 1024; /* Default recommended I/O size is 128k.
729 There might be broken applications that are
730 confused by this. Use nolargeio mount option
731 to get usual i/o size = PAGE_SIZE.
732 */
733
734/* proceed only one option from a list *cur - string containing of mount options 732/* proceed only one option from a list *cur - string containing of mount options
735 opts - array of options which are accepted 733 opts - array of options which are accepted
736 opt_arg - if option is found and requires an argument and if it is specifed 734 opt_arg - if option is found and requires an argument and if it is specifed
@@ -959,19 +957,8 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
959 } 957 }
960 958
961 if (c == 'w') { 959 if (c == 'w') {
962 char *p = NULL; 960 reiserfs_warning(s, "reiserfs: nolargeio option is no longer supported");
963 int val = simple_strtoul(arg, &p, 0); 961 return 0;
964
965 if (*p != '\0') {
966 reiserfs_warning(s,
967 "reiserfs_parse_options: non-numeric value %s for nolargeio option",
968 arg);
969 return 0;
970 }
971 if (val)
972 reiserfs_default_io_size = PAGE_SIZE;
973 else
974 reiserfs_default_io_size = 128 * 1024;
975 } 962 }
976 963
977 if (c == 'j') { 964 if (c == 'j') {
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index 22eed61ebf69..ddcd9e1ef282 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -589,8 +589,7 @@ static int init_inodecache(void)
589 589
590static void destroy_inodecache(void) 590static void destroy_inodecache(void)
591{ 591{
592 if (kmem_cache_destroy(romfs_inode_cachep)) 592 kmem_cache_destroy(romfs_inode_cachep);
593 printk(KERN_INFO "romfs_inode_cache: not all structures were freed\n");
594} 593}
595 594
596static int romfs_remount(struct super_block *sb, int *flags, char *data) 595static int romfs_remount(struct super_block *sb, int *flags, char *data)
diff --git a/fs/select.c b/fs/select.c
index 33b72ba0f86f..dcbc1112b7ec 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -658,8 +658,6 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
658 unsigned int i; 658 unsigned int i;
659 struct poll_list *head; 659 struct poll_list *head;
660 struct poll_list *walk; 660 struct poll_list *walk;
661 struct fdtable *fdt;
662 int max_fdset;
663 /* Allocate small arguments on the stack to save memory and be 661 /* Allocate small arguments on the stack to save memory and be
664 faster - use long to make sure the buffer is aligned properly 662 faster - use long to make sure the buffer is aligned properly
665 on 64 bit archs to avoid unaligned access */ 663 on 64 bit archs to avoid unaligned access */
@@ -667,11 +665,7 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
667 struct poll_list *stack_pp = NULL; 665 struct poll_list *stack_pp = NULL;
668 666
669 /* Do a sanity check on nfds ... */ 667 /* Do a sanity check on nfds ... */
670 rcu_read_lock(); 668 if (nfds > current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
671 fdt = files_fdtable(current->files);
672 max_fdset = fdt->max_fdset;
673 rcu_read_unlock();
674 if (nfds > max_fdset && nfds > OPEN_MAX)
675 return -EINVAL; 669 return -EINVAL;
676 670
677 poll_initwait(&table); 671 poll_initwait(&table);
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index a1ed657c3c84..2c122ee83adb 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -89,8 +89,7 @@ static int init_inodecache(void)
89 89
90static void destroy_inodecache(void) 90static void destroy_inodecache(void)
91{ 91{
92 if (kmem_cache_destroy(smb_inode_cachep)) 92 kmem_cache_destroy(smb_inode_cachep);
93 printk(KERN_INFO "smb_inode_cache: not all structures were freed\n");
94} 93}
95 94
96static int smb_remount(struct super_block *sb, int *flags, char *data) 95static int smb_remount(struct super_block *sb, int *flags, char *data)
@@ -167,7 +166,6 @@ smb_get_inode_attr(struct inode *inode, struct smb_fattr *fattr)
167 fattr->f_mtime = inode->i_mtime; 166 fattr->f_mtime = inode->i_mtime;
168 fattr->f_ctime = inode->i_ctime; 167 fattr->f_ctime = inode->i_ctime;
169 fattr->f_atime = inode->i_atime; 168 fattr->f_atime = inode->i_atime;
170 fattr->f_blksize= inode->i_blksize;
171 fattr->f_blocks = inode->i_blocks; 169 fattr->f_blocks = inode->i_blocks;
172 170
173 fattr->attr = SMB_I(inode)->attr; 171 fattr->attr = SMB_I(inode)->attr;
@@ -201,7 +199,6 @@ smb_set_inode_attr(struct inode *inode, struct smb_fattr *fattr)
201 inode->i_uid = fattr->f_uid; 199 inode->i_uid = fattr->f_uid;
202 inode->i_gid = fattr->f_gid; 200 inode->i_gid = fattr->f_gid;
203 inode->i_ctime = fattr->f_ctime; 201 inode->i_ctime = fattr->f_ctime;
204 inode->i_blksize= fattr->f_blksize;
205 inode->i_blocks = fattr->f_blocks; 202 inode->i_blocks = fattr->f_blocks;
206 inode->i_size = fattr->f_size; 203 inode->i_size = fattr->f_size;
207 inode->i_mtime = fattr->f_mtime; 204 inode->i_mtime = fattr->f_mtime;
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index c3495059889d..40e174db9872 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -1826,7 +1826,6 @@ smb_init_dirent(struct smb_sb_info *server, struct smb_fattr *fattr)
1826 fattr->f_nlink = 1; 1826 fattr->f_nlink = 1;
1827 fattr->f_uid = server->mnt->uid; 1827 fattr->f_uid = server->mnt->uid;
1828 fattr->f_gid = server->mnt->gid; 1828 fattr->f_gid = server->mnt->gid;
1829 fattr->f_blksize = SMB_ST_BLKSIZE;
1830 fattr->f_unix = 0; 1829 fattr->f_unix = 0;
1831} 1830}
1832 1831
diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c
index c8e96195b96e..0fb74697abc4 100644
--- a/fs/smbfs/request.c
+++ b/fs/smbfs/request.c
@@ -49,8 +49,7 @@ int smb_init_request_cache(void)
49 49
50void smb_destroy_request_cache(void) 50void smb_destroy_request_cache(void)
51{ 51{
52 if (kmem_cache_destroy(req_cachep)) 52 kmem_cache_destroy(req_cachep);
53 printk(KERN_INFO "smb_destroy_request_cache: not all structures were freed\n");
54} 53}
55 54
56/* 55/*
diff --git a/fs/stat.c b/fs/stat.c
index 3a44dcf97da2..60a31d5e5966 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -14,6 +14,7 @@
14#include <linux/namei.h> 14#include <linux/namei.h>
15#include <linux/security.h> 15#include <linux/security.h>
16#include <linux/syscalls.h> 16#include <linux/syscalls.h>
17#include <linux/pagemap.h>
17 18
18#include <asm/uaccess.h> 19#include <asm/uaccess.h>
19#include <asm/unistd.h> 20#include <asm/unistd.h>
@@ -32,7 +33,7 @@ void generic_fillattr(struct inode *inode, struct kstat *stat)
32 stat->ctime = inode->i_ctime; 33 stat->ctime = inode->i_ctime;
33 stat->size = i_size_read(inode); 34 stat->size = i_size_read(inode);
34 stat->blocks = inode->i_blocks; 35 stat->blocks = inode->i_blocks;
35 stat->blksize = inode->i_blksize; 36 stat->blksize = (1 << inode->i_blkbits);
36} 37}
37 38
38EXPORT_SYMBOL(generic_fillattr); 39EXPORT_SYMBOL(generic_fillattr);
diff --git a/fs/super.c b/fs/super.c
index 5c4c94d5495e..6987824d0dce 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -199,7 +199,7 @@ EXPORT_SYMBOL(deactivate_super);
199 * success, 0 if we had failed (superblock contents was already dead or 199 * success, 0 if we had failed (superblock contents was already dead or
200 * dying when grab_super() had been called). 200 * dying when grab_super() had been called).
201 */ 201 */
202static int grab_super(struct super_block *s) 202static int grab_super(struct super_block *s) __releases(sb_lock)
203{ 203{
204 s->s_count++; 204 s->s_count++;
205 spin_unlock(&sb_lock); 205 spin_unlock(&sb_lock);
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index c16a93c353c0..98022e41cda1 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -10,6 +10,7 @@
10 10
11#include <linux/errno.h> 11#include <linux/errno.h>
12#include <linux/fs.h> 12#include <linux/fs.h>
13#include <linux/kernel.h>
13#include <linux/kobject.h> 14#include <linux/kobject.h>
14#include <linux/module.h> 15#include <linux/module.h>
15#include <linux/slab.h> 16#include <linux/slab.h>
@@ -176,7 +177,6 @@ const struct file_operations bin_fops = {
176 * sysfs_create_bin_file - create binary file for object. 177 * sysfs_create_bin_file - create binary file for object.
177 * @kobj: object. 178 * @kobj: object.
178 * @attr: attribute descriptor. 179 * @attr: attribute descriptor.
179 *
180 */ 180 */
181 181
182int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr) 182int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
@@ -191,13 +191,16 @@ int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
191 * sysfs_remove_bin_file - remove binary file for object. 191 * sysfs_remove_bin_file - remove binary file for object.
192 * @kobj: object. 192 * @kobj: object.
193 * @attr: attribute descriptor. 193 * @attr: attribute descriptor.
194 *
195 */ 194 */
196 195
197int sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr) 196void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
198{ 197{
199 sysfs_hash_and_remove(kobj->dentry,attr->attr.name); 198 if (sysfs_hash_and_remove(kobj->dentry, attr->attr.name) < 0) {
200 return 0; 199 printk(KERN_ERR "%s: "
200 "bad dentry or inode or no such file: \"%s\"\n",
201 __FUNCTION__, attr->attr.name);
202 dump_stack();
203 }
201} 204}
202 205
203EXPORT_SYMBOL_GPL(sysfs_create_bin_file); 206EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 61c42430cba3..5f3d725d1125 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -43,7 +43,7 @@ static struct sysfs_dirent * sysfs_new_dirent(struct sysfs_dirent * parent_sd,
43 43
44 memset(sd, 0, sizeof(*sd)); 44 memset(sd, 0, sizeof(*sd));
45 atomic_set(&sd->s_count, 1); 45 atomic_set(&sd->s_count, 1);
46 atomic_set(&sd->s_event, 0); 46 atomic_set(&sd->s_event, 1);
47 INIT_LIST_HEAD(&sd->s_children); 47 INIT_LIST_HEAD(&sd->s_children);
48 list_add(&sd->s_sibling, &parent_sd->s_children); 48 list_add(&sd->s_sibling, &parent_sd->s_children);
49 sd->s_element = element; 49 sd->s_element = element;
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 9889e54e1f13..e79e38d52c00 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -12,6 +12,7 @@
12#include <linux/namei.h> 12#include <linux/namei.h>
13#include <linux/backing-dev.h> 13#include <linux/backing-dev.h>
14#include <linux/capability.h> 14#include <linux/capability.h>
15#include <linux/errno.h>
15#include "sysfs.h" 16#include "sysfs.h"
16 17
17extern struct super_block * sysfs_sb; 18extern struct super_block * sysfs_sb;
@@ -124,7 +125,6 @@ struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd)
124{ 125{
125 struct inode * inode = new_inode(sysfs_sb); 126 struct inode * inode = new_inode(sysfs_sb);
126 if (inode) { 127 if (inode) {
127 inode->i_blksize = PAGE_CACHE_SIZE;
128 inode->i_blocks = 0; 128 inode->i_blocks = 0;
129 inode->i_mapping->a_ops = &sysfs_aops; 129 inode->i_mapping->a_ops = &sysfs_aops;
130 inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info; 130 inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
@@ -234,17 +234,18 @@ void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent)
234 } 234 }
235} 235}
236 236
237void sysfs_hash_and_remove(struct dentry * dir, const char * name) 237int sysfs_hash_and_remove(struct dentry * dir, const char * name)
238{ 238{
239 struct sysfs_dirent * sd; 239 struct sysfs_dirent * sd;
240 struct sysfs_dirent * parent_sd; 240 struct sysfs_dirent * parent_sd;
241 int found = 0;
241 242
242 if (!dir) 243 if (!dir)
243 return; 244 return -ENOENT;
244 245
245 if (dir->d_inode == NULL) 246 if (dir->d_inode == NULL)
246 /* no inode means this hasn't been made visible yet */ 247 /* no inode means this hasn't been made visible yet */
247 return; 248 return -ENOENT;
248 249
249 parent_sd = dir->d_fsdata; 250 parent_sd = dir->d_fsdata;
250 mutex_lock(&dir->d_inode->i_mutex); 251 mutex_lock(&dir->d_inode->i_mutex);
@@ -255,8 +256,11 @@ void sysfs_hash_and_remove(struct dentry * dir, const char * name)
255 list_del_init(&sd->s_sibling); 256 list_del_init(&sd->s_sibling);
256 sysfs_drop_dentry(sd, dir); 257 sysfs_drop_dentry(sd, dir);
257 sysfs_put(sd); 258 sysfs_put(sd);
259 found = 1;
258 break; 260 break;
259 } 261 }
260 } 262 }
261 mutex_unlock(&dir->d_inode->i_mutex); 263 mutex_unlock(&dir->d_inode->i_mutex);
264
265 return found ? 0 : -ENOENT;
262} 266}
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index d2eac3ceed5f..f50e3cc2ded8 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -3,6 +3,7 @@
3 */ 3 */
4 4
5#include <linux/fs.h> 5#include <linux/fs.h>
6#include <linux/mount.h>
6#include <linux/module.h> 7#include <linux/module.h>
7#include <linux/kobject.h> 8#include <linux/kobject.h>
8#include <linux/namei.h> 9#include <linux/namei.h>
@@ -82,10 +83,19 @@ exit1:
82 */ 83 */
83int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name) 84int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name)
84{ 85{
85 struct dentry * dentry = kobj->dentry; 86 struct dentry *dentry = NULL;
86 int error = -EEXIST; 87 int error = -EEXIST;
87 88
88 BUG_ON(!kobj || !kobj->dentry || !name); 89 BUG_ON(!name);
90
91 if (!kobj) {
92 if (sysfs_mount && sysfs_mount->mnt_sb)
93 dentry = sysfs_mount->mnt_sb->s_root;
94 } else
95 dentry = kobj->dentry;
96
97 if (!dentry)
98 return -EFAULT;
89 99
90 mutex_lock(&dentry->d_inode->i_mutex); 100 mutex_lock(&dentry->d_inode->i_mutex);
91 if (!sysfs_dirent_exist(dentry->d_fsdata, name)) 101 if (!sysfs_dirent_exist(dentry->d_fsdata, name))
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 3651ffb5ec09..6f3d6bd52887 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -10,7 +10,7 @@ extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *,
10 umode_t, int); 10 umode_t, int);
11 11
12extern int sysfs_add_file(struct dentry *, const struct attribute *, int); 12extern int sysfs_add_file(struct dentry *, const struct attribute *, int);
13extern void sysfs_hash_and_remove(struct dentry * dir, const char * name); 13extern int sysfs_hash_and_remove(struct dentry * dir, const char * name);
14extern struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name); 14extern struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name);
15 15
16extern int sysfs_create_subdir(struct kobject *, const char *, struct dentry **); 16extern int sysfs_create_subdir(struct kobject *, const char *, struct dentry **);
diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c
index 9b585d1081c0..115ab0d6f4bc 100644
--- a/fs/sysv/ialloc.c
+++ b/fs/sysv/ialloc.c
@@ -170,7 +170,7 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode)
170 inode->i_uid = current->fsuid; 170 inode->i_uid = current->fsuid;
171 inode->i_ino = fs16_to_cpu(sbi, ino); 171 inode->i_ino = fs16_to_cpu(sbi, ino);
172 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 172 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
173 inode->i_blocks = inode->i_blksize = 0; 173 inode->i_blocks = 0;
174 memset(SYSV_I(inode)->i_data, 0, sizeof(SYSV_I(inode)->i_data)); 174 memset(SYSV_I(inode)->i_data, 0, sizeof(SYSV_I(inode)->i_data));
175 SYSV_I(inode)->i_dir_start_lookup = 0; 175 SYSV_I(inode)->i_dir_start_lookup = 0;
176 insert_inode_hash(inode); 176 insert_inode_hash(inode);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 58b2d22142ba..d63c5e48b050 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -201,7 +201,7 @@ static void sysv_read_inode(struct inode *inode)
201 inode->i_ctime.tv_nsec = 0; 201 inode->i_ctime.tv_nsec = 0;
202 inode->i_atime.tv_nsec = 0; 202 inode->i_atime.tv_nsec = 0;
203 inode->i_mtime.tv_nsec = 0; 203 inode->i_mtime.tv_nsec = 0;
204 inode->i_blocks = inode->i_blksize = 0; 204 inode->i_blocks = 0;
205 205
206 si = SYSV_I(inode); 206 si = SYSV_I(inode);
207 for (block = 0; block < 10+1+1+1; block++) 207 for (block = 0; block < 10+1+1+1; block++)
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index 876639b93321..350cba5d6803 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -369,10 +369,9 @@ static int sysv_fill_super(struct super_block *sb, void *data, int silent)
369 if (64 != sizeof (struct sysv_inode)) 369 if (64 != sizeof (struct sysv_inode))
370 panic("sysv fs: bad inode size"); 370 panic("sysv fs: bad inode size");
371 371
372 sbi = kmalloc(sizeof(struct sysv_sb_info), GFP_KERNEL); 372 sbi = kzalloc(sizeof(struct sysv_sb_info), GFP_KERNEL);
373 if (!sbi) 373 if (!sbi)
374 return -ENOMEM; 374 return -ENOMEM;
375 memset(sbi, 0, sizeof(struct sysv_sb_info));
376 375
377 sbi->s_sb = sb; 376 sbi->s_sb = sb;
378 sbi->s_block_base = 0; 377 sbi->s_block_base = 0;
@@ -453,10 +452,9 @@ static int v7_fill_super(struct super_block *sb, void *data, int silent)
453 if (64 != sizeof (struct sysv_inode)) 452 if (64 != sizeof (struct sysv_inode))
454 panic("sysv fs: bad i-node size"); 453 panic("sysv fs: bad i-node size");
455 454
456 sbi = kmalloc(sizeof(struct sysv_sb_info), GFP_KERNEL); 455 sbi = kzalloc(sizeof(struct sysv_sb_info), GFP_KERNEL);
457 if (!sbi) 456 if (!sbi)
458 return -ENOMEM; 457 return -ENOMEM;
459 memset(sbi, 0, sizeof(struct sysv_sb_info));
460 458
461 sbi->s_sb = sb; 459 sbi->s_sb = sb;
462 sbi->s_block_base = 0; 460 sbi->s_block_base = 0;
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 33323473e3c4..8206983f2ebf 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -121,7 +121,6 @@ struct inode * udf_new_inode (struct inode *dir, int mode, int * err)
121 UDF_I_LOCATION(inode).logicalBlockNum = block; 121 UDF_I_LOCATION(inode).logicalBlockNum = block;
122 UDF_I_LOCATION(inode).partitionReferenceNum = UDF_I_LOCATION(dir).partitionReferenceNum; 122 UDF_I_LOCATION(inode).partitionReferenceNum = UDF_I_LOCATION(dir).partitionReferenceNum;
123 inode->i_ino = udf_get_lb_pblock(sb, UDF_I_LOCATION(inode), 0); 123 inode->i_ino = udf_get_lb_pblock(sb, UDF_I_LOCATION(inode), 0);
124 inode->i_blksize = PAGE_SIZE;
125 inode->i_blocks = 0; 124 inode->i_blocks = 0;
126 UDF_I_LENEATTR(inode) = 0; 125 UDF_I_LENEATTR(inode) = 0;
127 UDF_I_LENALLOC(inode) = 0; 126 UDF_I_LENALLOC(inode) = 0;
@@ -130,14 +129,12 @@ struct inode * udf_new_inode (struct inode *dir, int mode, int * err)
130 { 129 {
131 UDF_I_EFE(inode) = 1; 130 UDF_I_EFE(inode) = 1;
132 UDF_UPDATE_UDFREV(inode->i_sb, UDF_VERS_USE_EXTENDED_FE); 131 UDF_UPDATE_UDFREV(inode->i_sb, UDF_VERS_USE_EXTENDED_FE);
133 UDF_I_DATA(inode) = kmalloc(inode->i_sb->s_blocksize - sizeof(struct extendedFileEntry), GFP_KERNEL); 132 UDF_I_DATA(inode) = kzalloc(inode->i_sb->s_blocksize - sizeof(struct extendedFileEntry), GFP_KERNEL);
134 memset(UDF_I_DATA(inode), 0x00, inode->i_sb->s_blocksize - sizeof(struct extendedFileEntry));
135 } 133 }
136 else 134 else
137 { 135 {
138 UDF_I_EFE(inode) = 0; 136 UDF_I_EFE(inode) = 0;
139 UDF_I_DATA(inode) = kmalloc(inode->i_sb->s_blocksize - sizeof(struct fileEntry), GFP_KERNEL); 137 UDF_I_DATA(inode) = kzalloc(inode->i_sb->s_blocksize - sizeof(struct fileEntry), GFP_KERNEL);
140 memset(UDF_I_DATA(inode), 0x00, inode->i_sb->s_blocksize - sizeof(struct fileEntry));
141 } 138 }
142 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB)) 139 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB))
143 UDF_I_ALLOCTYPE(inode) = ICBTAG_FLAG_AD_IN_ICB; 140 UDF_I_ALLOCTYPE(inode) = ICBTAG_FLAG_AD_IN_ICB;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 605f5111b6d8..b223b32db991 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -916,8 +916,6 @@ __udf_read_inode(struct inode *inode)
916 * i_nlink = 1 916 * i_nlink = 1
917 * i_op = NULL; 917 * i_op = NULL;
918 */ 918 */
919 inode->i_blksize = PAGE_SIZE;
920
921 bh = udf_read_ptagged(inode->i_sb, UDF_I_LOCATION(inode), 0, &ident); 919 bh = udf_read_ptagged(inode->i_sb, UDF_I_LOCATION(inode), 0, &ident);
922 920
923 if (!bh) 921 if (!bh)
diff --git a/fs/udf/super.c b/fs/udf/super.c
index fcce1a21a51b..1d3b5d2070e5 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -156,8 +156,7 @@ static int init_inodecache(void)
156 156
157static void destroy_inodecache(void) 157static void destroy_inodecache(void)
158{ 158{
159 if (kmem_cache_destroy(udf_inode_cachep)) 159 kmem_cache_destroy(udf_inode_cachep);
160 printk(KERN_INFO "udf_inode_cache: not all structures were freed\n");
161} 160}
162 161
163/* Superblock operations */ 162/* Superblock operations */
@@ -1622,6 +1621,10 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
1622 goto error_out; 1621 goto error_out;
1623 } 1622 }
1624 1623
1624 if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_READ_ONLY)
1625 printk("UDF-fs: Partition marked readonly; forcing readonly mount\n");
1626 sb->s_flags |= MS_RDONLY;
1627
1625 if ( udf_find_fileset(sb, &fileset, &rootdir) ) 1628 if ( udf_find_fileset(sb, &fileset, &rootdir) )
1626 { 1629 {
1627 printk("UDF-fs: No fileset found\n"); 1630 printk("UDF-fs: No fileset found\n");
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 9501dcd3b213..2ad1259c6eca 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -255,7 +255,6 @@ cg_found:
255 inode->i_gid = current->fsgid; 255 inode->i_gid = current->fsgid;
256 256
257 inode->i_ino = cg * uspi->s_ipg + bit; 257 inode->i_ino = cg * uspi->s_ipg + bit;
258 inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size (for stat), not the fs block size */
259 inode->i_blocks = 0; 258 inode->i_blocks = 0;
260 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 259 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
261 ufsi->i_flags = UFS_I(dir)->i_flags; 260 ufsi->i_flags = UFS_I(dir)->i_flags;
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 30c6e8a9446c..ee1eaa6f4ec2 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -741,7 +741,6 @@ void ufs_read_inode(struct inode * inode)
741 ufs1_read_inode(inode, ufs_inode + ufs_inotofsbo(inode->i_ino)); 741 ufs1_read_inode(inode, ufs_inode + ufs_inotofsbo(inode->i_ino));
742 } 742 }
743 743
744 inode->i_blksize = PAGE_SIZE;/*This is the optimal IO size (for stat)*/
745 inode->i_version++; 744 inode->i_version++;
746 ufsi->i_lastfrag = 745 ufsi->i_lastfrag =
747 (inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift; 746 (inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 992ee0b87cc3..ec79e3091d1b 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -611,11 +611,10 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
611 611
612 UFSD("ENTER\n"); 612 UFSD("ENTER\n");
613 613
614 sbi = kmalloc(sizeof(struct ufs_sb_info), GFP_KERNEL); 614 sbi = kzalloc(sizeof(struct ufs_sb_info), GFP_KERNEL);
615 if (!sbi) 615 if (!sbi)
616 goto failed_nomem; 616 goto failed_nomem;
617 sb->s_fs_info = sbi; 617 sb->s_fs_info = sbi;
618 memset(sbi, 0, sizeof(struct ufs_sb_info));
619 618
620 UFSD("flag %u\n", (int)(sb->s_flags & MS_RDONLY)); 619 UFSD("flag %u\n", (int)(sb->s_flags & MS_RDONLY));
621 620
@@ -1245,8 +1244,7 @@ static int init_inodecache(void)
1245 1244
1246static void destroy_inodecache(void) 1245static void destroy_inodecache(void)
1247{ 1246{
1248 if (kmem_cache_destroy(ufs_inode_cachep)) 1247 kmem_cache_destroy(ufs_inode_cachep);
1249 printk(KERN_INFO "ufs_inode_cache: not all structures were freed\n");
1250} 1248}
1251 1249
1252#ifdef CONFIG_QUOTA 1250#ifdef CONFIG_QUOTA
diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6
index 9e7f85986d0d..291948d5085a 100644
--- a/fs/xfs/Makefile-linux-2.6
+++ b/fs/xfs/Makefile-linux-2.6
@@ -30,7 +30,6 @@ ifeq ($(CONFIG_XFS_TRACE),y)
30 EXTRA_CFLAGS += -DXFS_BLI_TRACE 30 EXTRA_CFLAGS += -DXFS_BLI_TRACE
31 EXTRA_CFLAGS += -DXFS_BMAP_TRACE 31 EXTRA_CFLAGS += -DXFS_BMAP_TRACE
32 EXTRA_CFLAGS += -DXFS_BMBT_TRACE 32 EXTRA_CFLAGS += -DXFS_BMBT_TRACE
33 EXTRA_CFLAGS += -DXFS_DIR_TRACE
34 EXTRA_CFLAGS += -DXFS_DIR2_TRACE 33 EXTRA_CFLAGS += -DXFS_DIR2_TRACE
35 EXTRA_CFLAGS += -DXFS_DQUOT_TRACE 34 EXTRA_CFLAGS += -DXFS_DQUOT_TRACE
36 EXTRA_CFLAGS += -DXFS_ILOCK_TRACE 35 EXTRA_CFLAGS += -DXFS_ILOCK_TRACE
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index aba7fcf881a2..d59737589815 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -34,6 +34,14 @@ kmem_alloc(size_t size, unsigned int __nocast flags)
34 gfp_t lflags = kmem_flags_convert(flags); 34 gfp_t lflags = kmem_flags_convert(flags);
35 void *ptr; 35 void *ptr;
36 36
37#ifdef DEBUG
38 if (unlikely(!(flags & KM_LARGE) && (size > PAGE_SIZE))) {
39 printk(KERN_WARNING "Large %s attempt, size=%ld\n",
40 __FUNCTION__, (long)size);
41 dump_stack();
42 }
43#endif
44
37 do { 45 do {
38 if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS) 46 if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS)
39 ptr = kmalloc(size, lflags); 47 ptr = kmalloc(size, lflags);
@@ -60,6 +68,27 @@ kmem_zalloc(size_t size, unsigned int __nocast flags)
60 return ptr; 68 return ptr;
61} 69}
62 70
71void *
72kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize,
73 unsigned int __nocast flags)
74{
75 void *ptr;
76 size_t kmsize = maxsize;
77 unsigned int kmflags = (flags & ~KM_SLEEP) | KM_NOSLEEP;
78
79 while (!(ptr = kmem_zalloc(kmsize, kmflags))) {
80 if ((kmsize <= minsize) && (flags & KM_NOSLEEP))
81 break;
82 if ((kmsize >>= 1) <= minsize) {
83 kmsize = minsize;
84 kmflags = flags;
85 }
86 }
87 if (ptr)
88 *size = kmsize;
89 return ptr;
90}
91
63void 92void
64kmem_free(void *ptr, size_t size) 93kmem_free(void *ptr, size_t size)
65{ 94{
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index 939bd84bc7ee..9ebabdf7829c 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -30,6 +30,7 @@
30#define KM_NOSLEEP 0x0002u 30#define KM_NOSLEEP 0x0002u
31#define KM_NOFS 0x0004u 31#define KM_NOFS 0x0004u
32#define KM_MAYFAIL 0x0008u 32#define KM_MAYFAIL 0x0008u
33#define KM_LARGE 0x0010u
33 34
34/* 35/*
35 * We use a special process flag to avoid recursive callbacks into 36 * We use a special process flag to avoid recursive callbacks into
@@ -41,7 +42,7 @@ kmem_flags_convert(unsigned int __nocast flags)
41{ 42{
42 gfp_t lflags; 43 gfp_t lflags;
43 44
44 BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL)); 45 BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL|KM_LARGE));
45 46
46 if (flags & KM_NOSLEEP) { 47 if (flags & KM_NOSLEEP) {
47 lflags = GFP_ATOMIC | __GFP_NOWARN; 48 lflags = GFP_ATOMIC | __GFP_NOWARN;
@@ -54,8 +55,9 @@ kmem_flags_convert(unsigned int __nocast flags)
54} 55}
55 56
56extern void *kmem_alloc(size_t, unsigned int __nocast); 57extern void *kmem_alloc(size_t, unsigned int __nocast);
57extern void *kmem_realloc(void *, size_t, size_t, unsigned int __nocast);
58extern void *kmem_zalloc(size_t, unsigned int __nocast); 58extern void *kmem_zalloc(size_t, unsigned int __nocast);
59extern void *kmem_zalloc_greedy(size_t *, size_t, size_t, unsigned int __nocast);
60extern void *kmem_realloc(void *, size_t, size_t, unsigned int __nocast);
59extern void kmem_free(void *, size_t); 61extern void kmem_free(void *, size_t);
60 62
61/* 63/*
@@ -91,8 +93,8 @@ kmem_zone_free(kmem_zone_t *zone, void *ptr)
91static inline void 93static inline void
92kmem_zone_destroy(kmem_zone_t *zone) 94kmem_zone_destroy(kmem_zone_t *zone)
93{ 95{
94 if (zone && kmem_cache_destroy(zone)) 96 if (zone)
95 BUG(); 97 kmem_cache_destroy(zone);
96} 98}
97 99
98extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast); 100extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
diff --git a/fs/xfs/linux-2.6/sema.h b/fs/xfs/linux-2.6/sema.h
index b25090094cca..2009e6d922ce 100644
--- a/fs/xfs/linux-2.6/sema.h
+++ b/fs/xfs/linux-2.6/sema.h
@@ -29,8 +29,6 @@
29 29
30typedef struct semaphore sema_t; 30typedef struct semaphore sema_t;
31 31
32#define init_sema(sp, val, c, d) sema_init(sp, val)
33#define initsema(sp, val) sema_init(sp, val)
34#define initnsema(sp, val, name) sema_init(sp, val) 32#define initnsema(sp, val, name) sema_init(sp, val)
35#define psema(sp, b) down(sp) 33#define psema(sp, b) down(sp)
36#define vsema(sp) up(sp) 34#define vsema(sp) up(sp)
diff --git a/fs/xfs/linux-2.6/sv.h b/fs/xfs/linux-2.6/sv.h
index 9a8ad481b008..351a8f454bd1 100644
--- a/fs/xfs/linux-2.6/sv.h
+++ b/fs/xfs/linux-2.6/sv.h
@@ -53,8 +53,6 @@ static inline void _sv_wait(sv_t *sv, spinlock_t *lock, int state,
53 remove_wait_queue(&sv->waiters, &wait); 53 remove_wait_queue(&sv->waiters, &wait);
54} 54}
55 55
56#define init_sv(sv,type,name,flag) \
57 init_waitqueue_head(&(sv)->waiters)
58#define sv_init(sv,flag,name) \ 56#define sv_init(sv,flag,name) \
59 init_waitqueue_head(&(sv)->waiters) 57 init_waitqueue_head(&(sv)->waiters)
60#define sv_destroy(sv) \ 58#define sv_destroy(sv) \
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 34dcb43a7837..09360cf1e1f2 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -71,7 +71,7 @@ xfs_page_trace(
71 int tag, 71 int tag,
72 struct inode *inode, 72 struct inode *inode,
73 struct page *page, 73 struct page *page,
74 int mask) 74 unsigned long pgoff)
75{ 75{
76 xfs_inode_t *ip; 76 xfs_inode_t *ip;
77 bhv_vnode_t *vp = vn_from_inode(inode); 77 bhv_vnode_t *vp = vn_from_inode(inode);
@@ -91,7 +91,7 @@ xfs_page_trace(
91 (void *)ip, 91 (void *)ip,
92 (void *)inode, 92 (void *)inode,
93 (void *)page, 93 (void *)page,
94 (void *)((unsigned long)mask), 94 (void *)pgoff,
95 (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)), 95 (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
96 (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)), 96 (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
97 (void *)((unsigned long)((isize >> 32) & 0xffffffff)), 97 (void *)((unsigned long)((isize >> 32) & 0xffffffff)),
@@ -105,7 +105,7 @@ xfs_page_trace(
105 (void *)NULL); 105 (void *)NULL);
106} 106}
107#else 107#else
108#define xfs_page_trace(tag, inode, page, mask) 108#define xfs_page_trace(tag, inode, page, pgoff)
109#endif 109#endif
110 110
111/* 111/*
@@ -1197,7 +1197,7 @@ xfs_vm_releasepage(
1197 .nr_to_write = 1, 1197 .nr_to_write = 1,
1198 }; 1198 };
1199 1199
1200 xfs_page_trace(XFS_RELEASEPAGE_ENTER, inode, page, gfp_mask); 1200 xfs_page_trace(XFS_RELEASEPAGE_ENTER, inode, page, 0);
1201 1201
1202 if (!page_has_buffers(page)) 1202 if (!page_has_buffers(page))
1203 return 0; 1203 return 0;
@@ -1356,7 +1356,6 @@ xfs_end_io_direct(
1356 ioend->io_size = size; 1356 ioend->io_size = size;
1357 xfs_finish_ioend(ioend); 1357 xfs_finish_ioend(ioend);
1358 } else { 1358 } else {
1359 ASSERT(size >= 0);
1360 xfs_destroy_ioend(ioend); 1359 xfs_destroy_ioend(ioend);
1361 } 1360 }
1362 1361
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 2af528dcfb04..9bbadafdcb00 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
@@ -318,8 +318,12 @@ xfs_buf_free(
318 if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) 318 if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1))
319 free_address(bp->b_addr - bp->b_offset); 319 free_address(bp->b_addr - bp->b_offset);
320 320
321 for (i = 0; i < bp->b_page_count; i++) 321 for (i = 0; i < bp->b_page_count; i++) {
322 page_cache_release(bp->b_pages[i]); 322 struct page *page = bp->b_pages[i];
323
324 ASSERT(!PagePrivate(page));
325 page_cache_release(page);
326 }
323 _xfs_buf_free_pages(bp); 327 _xfs_buf_free_pages(bp);
324 } else if (bp->b_flags & _XBF_KMEM_ALLOC) { 328 } else if (bp->b_flags & _XBF_KMEM_ALLOC) {
325 /* 329 /*
@@ -400,6 +404,7 @@ _xfs_buf_lookup_pages(
400 nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset); 404 nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset);
401 size -= nbytes; 405 size -= nbytes;
402 406
407 ASSERT(!PagePrivate(page));
403 if (!PageUptodate(page)) { 408 if (!PageUptodate(page)) {
404 page_count--; 409 page_count--;
405 if (blocksize >= PAGE_CACHE_SIZE) { 410 if (blocksize >= PAGE_CACHE_SIZE) {
@@ -768,7 +773,7 @@ xfs_buf_get_noaddr(
768 _xfs_buf_initialize(bp, target, 0, len, 0); 773 _xfs_buf_initialize(bp, target, 0, len, 0);
769 774
770 try_again: 775 try_again:
771 data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL); 776 data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL | KM_LARGE);
772 if (unlikely(data == NULL)) 777 if (unlikely(data == NULL))
773 goto fail_free_buf; 778 goto fail_free_buf;
774 779
@@ -1117,10 +1122,10 @@ xfs_buf_bio_end_io(
1117 do { 1122 do {
1118 struct page *page = bvec->bv_page; 1123 struct page *page = bvec->bv_page;
1119 1124
1125 ASSERT(!PagePrivate(page));
1120 if (unlikely(bp->b_error)) { 1126 if (unlikely(bp->b_error)) {
1121 if (bp->b_flags & XBF_READ) 1127 if (bp->b_flags & XBF_READ)
1122 ClearPageUptodate(page); 1128 ClearPageUptodate(page);
1123 SetPageError(page);
1124 } else if (blocksize >= PAGE_CACHE_SIZE) { 1129 } else if (blocksize >= PAGE_CACHE_SIZE) {
1125 SetPageUptodate(page); 1130 SetPageUptodate(page);
1126 } else if (!PagePrivate(page) && 1131 } else if (!PagePrivate(page) &&
@@ -1156,16 +1161,16 @@ _xfs_buf_ioapply(
1156 total_nr_pages = bp->b_page_count; 1161 total_nr_pages = bp->b_page_count;
1157 map_i = 0; 1162 map_i = 0;
1158 1163
1159 if (bp->b_flags & _XBF_RUN_QUEUES) {
1160 bp->b_flags &= ~_XBF_RUN_QUEUES;
1161 rw = (bp->b_flags & XBF_READ) ? READ_SYNC : WRITE_SYNC;
1162 } else {
1163 rw = (bp->b_flags & XBF_READ) ? READ : WRITE;
1164 }
1165
1166 if (bp->b_flags & XBF_ORDERED) { 1164 if (bp->b_flags & XBF_ORDERED) {
1167 ASSERT(!(bp->b_flags & XBF_READ)); 1165 ASSERT(!(bp->b_flags & XBF_READ));
1168 rw = WRITE_BARRIER; 1166 rw = WRITE_BARRIER;
1167 } else if (bp->b_flags & _XBF_RUN_QUEUES) {
1168 ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
1169 bp->b_flags &= ~_XBF_RUN_QUEUES;
1170 rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC;
1171 } else {
1172 rw = (bp->b_flags & XBF_WRITE) ? WRITE :
1173 (bp->b_flags & XBF_READ_AHEAD) ? READA : READ;
1169 } 1174 }
1170 1175
1171 /* Special code path for reading a sub page size buffer in -- 1176 /* Special code path for reading a sub page size buffer in --
@@ -1681,6 +1686,7 @@ xfsbufd(
1681 xfs_buf_t *bp, *n; 1686 xfs_buf_t *bp, *n;
1682 struct list_head *dwq = &target->bt_delwrite_queue; 1687 struct list_head *dwq = &target->bt_delwrite_queue;
1683 spinlock_t *dwlk = &target->bt_delwrite_lock; 1688 spinlock_t *dwlk = &target->bt_delwrite_lock;
1689 int count;
1684 1690
1685 current->flags |= PF_MEMALLOC; 1691 current->flags |= PF_MEMALLOC;
1686 1692
@@ -1696,6 +1702,7 @@ xfsbufd(
1696 schedule_timeout_interruptible( 1702 schedule_timeout_interruptible(
1697 xfs_buf_timer_centisecs * msecs_to_jiffies(10)); 1703 xfs_buf_timer_centisecs * msecs_to_jiffies(10));
1698 1704
1705 count = 0;
1699 age = xfs_buf_age_centisecs * msecs_to_jiffies(10); 1706 age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
1700 spin_lock(dwlk); 1707 spin_lock(dwlk);
1701 list_for_each_entry_safe(bp, n, dwq, b_list) { 1708 list_for_each_entry_safe(bp, n, dwq, b_list) {
@@ -1711,9 +1718,11 @@ xfsbufd(
1711 break; 1718 break;
1712 } 1719 }
1713 1720
1714 bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q); 1721 bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q|
1722 _XBF_RUN_QUEUES);
1715 bp->b_flags |= XBF_WRITE; 1723 bp->b_flags |= XBF_WRITE;
1716 list_move(&bp->b_list, &tmp); 1724 list_move_tail(&bp->b_list, &tmp);
1725 count++;
1717 } 1726 }
1718 } 1727 }
1719 spin_unlock(dwlk); 1728 spin_unlock(dwlk);
@@ -1724,12 +1733,12 @@ xfsbufd(
1724 1733
1725 list_del_init(&bp->b_list); 1734 list_del_init(&bp->b_list);
1726 xfs_buf_iostrategy(bp); 1735 xfs_buf_iostrategy(bp);
1727
1728 blk_run_address_space(target->bt_mapping);
1729 } 1736 }
1730 1737
1731 if (as_list_len > 0) 1738 if (as_list_len > 0)
1732 purge_addresses(); 1739 purge_addresses();
1740 if (count)
1741 blk_run_address_space(target->bt_mapping);
1733 1742
1734 clear_bit(XBT_FORCE_FLUSH, &target->bt_flags); 1743 clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);
1735 } while (!kthread_should_stop()); 1744 } while (!kthread_should_stop());
@@ -1767,7 +1776,7 @@ xfs_flush_buftarg(
1767 continue; 1776 continue;
1768 } 1777 }
1769 1778
1770 list_move(&bp->b_list, &tmp); 1779 list_move_tail(&bp->b_list, &tmp);
1771 } 1780 }
1772 spin_unlock(dwlk); 1781 spin_unlock(dwlk);
1773 1782
@@ -1776,7 +1785,7 @@ xfs_flush_buftarg(
1776 */ 1785 */
1777 list_for_each_entry_safe(bp, n, &tmp, b_list) { 1786 list_for_each_entry_safe(bp, n, &tmp, b_list) {
1778 xfs_buf_lock(bp); 1787 xfs_buf_lock(bp);
1779 bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q); 1788 bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q|_XBF_RUN_QUEUES);
1780 bp->b_flags |= XBF_WRITE; 1789 bp->b_flags |= XBF_WRITE;
1781 if (wait) 1790 if (wait)
1782 bp->b_flags &= ~XBF_ASYNC; 1791 bp->b_flags &= ~XBF_ASYNC;
@@ -1786,6 +1795,9 @@ xfs_flush_buftarg(
1786 xfs_buf_iostrategy(bp); 1795 xfs_buf_iostrategy(bp);
1787 } 1796 }
1788 1797
1798 if (wait)
1799 blk_run_address_space(target->bt_mapping);
1800
1789 /* 1801 /*
1790 * Remaining list items must be flushed before returning 1802 * Remaining list items must be flushed before returning
1791 */ 1803 */
@@ -1797,9 +1809,6 @@ xfs_flush_buftarg(
1797 xfs_buf_relse(bp); 1809 xfs_buf_relse(bp);
1798 } 1810 }
1799 1811
1800 if (wait)
1801 blk_run_address_space(target->bt_mapping);
1802
1803 return pincount; 1812 return pincount;
1804} 1813}
1805 1814
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 7858703ed84c..9dd235cb0107 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -298,11 +298,6 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);
298#define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE) 298#define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE)
299#define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE) 299#define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE)
300 300
301#define XFS_BUF_ISUNINITIAL(bp) (0)
302#define XFS_BUF_UNUNINITIAL(bp) (0)
303
304#define XFS_BUF_BP_ISMAPPED(bp) (1)
305
306#define XFS_BUF_IODONE_FUNC(bp) ((bp)->b_iodone) 301#define XFS_BUF_IODONE_FUNC(bp) ((bp)->b_iodone)
307#define XFS_BUF_SET_IODONE_FUNC(bp, func) ((bp)->b_iodone = (func)) 302#define XFS_BUF_SET_IODONE_FUNC(bp, func) ((bp)->b_iodone = (func))
308#define XFS_BUF_CLR_IODONE_FUNC(bp) ((bp)->b_iodone = NULL) 303#define XFS_BUF_CLR_IODONE_FUNC(bp) ((bp)->b_iodone = NULL)
@@ -393,8 +388,6 @@ static inline int XFS_bwrite(xfs_buf_t *bp)
393 return error; 388 return error;
394} 389}
395 390
396#define XFS_bdwrite(bp) xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC)
397
398static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp) 391static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp)
399{ 392{
400 bp->b_strat = xfs_bdstrat_cb; 393 bp->b_strat = xfs_bdstrat_cb;
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 3d4f6dff2113..41cfcba7ce49 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -370,7 +370,7 @@ xfs_file_readdir(
370 370
371 /* Try fairly hard to get memory */ 371 /* Try fairly hard to get memory */
372 do { 372 do {
373 if ((read_buf = (caddr_t)kmalloc(rlen, GFP_KERNEL))) 373 if ((read_buf = kmalloc(rlen, GFP_KERNEL)))
374 break; 374 break;
375 rlen >>= 1; 375 rlen >>= 1;
376 } while (rlen >= 1024); 376 } while (rlen >= 1024);
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c
index 6c162c3dde7e..ed3a5e1b4b67 100644
--- a/fs/xfs/linux-2.6/xfs_globals.c
+++ b/fs/xfs/linux-2.6/xfs_globals.c
@@ -34,7 +34,7 @@ xfs_param_t xfs_params = {
34 .restrict_chown = { 0, 1, 1 }, 34 .restrict_chown = { 0, 1, 1 },
35 .sgid_inherit = { 0, 0, 1 }, 35 .sgid_inherit = { 0, 0, 1 },
36 .symlink_mode = { 0, 0, 1 }, 36 .symlink_mode = { 0, 0, 1 },
37 .panic_mask = { 0, 0, 127 }, 37 .panic_mask = { 0, 0, 255 },
38 .error_level = { 0, 3, 11 }, 38 .error_level = { 0, 3, 11 },
39 .syncd_timer = { 1*100, 30*100, 7200*100}, 39 .syncd_timer = { 1*100, 30*100, 7200*100},
40 .stats_clear = { 0, 0, 1 }, 40 .stats_clear = { 0, 0, 1 },
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 6e52a5dd38d8..a74f854d91e6 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -653,7 +653,7 @@ xfs_attrmulti_by_handle(
653STATIC int 653STATIC int
654xfs_ioc_space( 654xfs_ioc_space(
655 bhv_desc_t *bdp, 655 bhv_desc_t *bdp,
656 bhv_vnode_t *vp, 656 struct inode *inode,
657 struct file *filp, 657 struct file *filp,
658 int flags, 658 int flags,
659 unsigned int cmd, 659 unsigned int cmd,
@@ -735,7 +735,7 @@ xfs_ioctl(
735 !capable(CAP_SYS_ADMIN)) 735 !capable(CAP_SYS_ADMIN))
736 return -EPERM; 736 return -EPERM;
737 737
738 return xfs_ioc_space(bdp, vp, filp, ioflags, cmd, arg); 738 return xfs_ioc_space(bdp, inode, filp, ioflags, cmd, arg);
739 739
740 case XFS_IOC_DIOINFO: { 740 case XFS_IOC_DIOINFO: {
741 struct dioattr da; 741 struct dioattr da;
@@ -763,6 +763,8 @@ xfs_ioctl(
763 return xfs_ioc_fsgeometry(mp, arg); 763 return xfs_ioc_fsgeometry(mp, arg);
764 764
765 case XFS_IOC_GETVERSION: 765 case XFS_IOC_GETVERSION:
766 return put_user(inode->i_generation, (int __user *)arg);
767
766 case XFS_IOC_GETXFLAGS: 768 case XFS_IOC_GETXFLAGS:
767 case XFS_IOC_SETXFLAGS: 769 case XFS_IOC_SETXFLAGS:
768 case XFS_IOC_FSGETXATTR: 770 case XFS_IOC_FSGETXATTR:
@@ -957,7 +959,7 @@ xfs_ioctl(
957STATIC int 959STATIC int
958xfs_ioc_space( 960xfs_ioc_space(
959 bhv_desc_t *bdp, 961 bhv_desc_t *bdp,
960 bhv_vnode_t *vp, 962 struct inode *inode,
961 struct file *filp, 963 struct file *filp,
962 int ioflags, 964 int ioflags,
963 unsigned int cmd, 965 unsigned int cmd,
@@ -967,13 +969,13 @@ xfs_ioc_space(
967 int attr_flags = 0; 969 int attr_flags = 0;
968 int error; 970 int error;
969 971
970 if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND)) 972 if (inode->i_flags & (S_IMMUTABLE|S_APPEND))
971 return -XFS_ERROR(EPERM); 973 return -XFS_ERROR(EPERM);
972 974
973 if (!(filp->f_mode & FMODE_WRITE)) 975 if (!(filp->f_mode & FMODE_WRITE))
974 return -XFS_ERROR(EBADF); 976 return -XFS_ERROR(EBADF);
975 977
976 if (!VN_ISREG(vp)) 978 if (!S_ISREG(inode->i_mode))
977 return -XFS_ERROR(EINVAL); 979 return -XFS_ERROR(EINVAL);
978 980
979 if (copy_from_user(&bf, arg, sizeof(bf))) 981 if (copy_from_user(&bf, arg, sizeof(bf)))
@@ -1264,13 +1266,6 @@ xfs_ioc_xattr(
1264 break; 1266 break;
1265 } 1267 }
1266 1268
1267 case XFS_IOC_GETVERSION: {
1268 flags = vn_to_inode(vp)->i_generation;
1269 if (copy_to_user(arg, &flags, sizeof(flags)))
1270 error = -EFAULT;
1271 break;
1272 }
1273
1274 default: 1269 default:
1275 error = -ENOTTY; 1270 error = -ENOTTY;
1276 break; 1271 break;
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index d9180020de63..3ba814ae3bba 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -553,13 +553,13 @@ xfs_vn_follow_link(
553 ASSERT(dentry); 553 ASSERT(dentry);
554 ASSERT(nd); 554 ASSERT(nd);
555 555
556 link = (char *)kmalloc(MAXPATHLEN+1, GFP_KERNEL); 556 link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
557 if (!link) { 557 if (!link) {
558 nd_set_link(nd, ERR_PTR(-ENOMEM)); 558 nd_set_link(nd, ERR_PTR(-ENOMEM));
559 return NULL; 559 return NULL;
560 } 560 }
561 561
562 uio = (uio_t *)kmalloc(sizeof(uio_t), GFP_KERNEL); 562 uio = kmalloc(sizeof(uio_t), GFP_KERNEL);
563 if (!uio) { 563 if (!uio) {
564 kfree(link); 564 kfree(link);
565 nd_set_link(nd, ERR_PTR(-ENOMEM)); 565 nd_set_link(nd, ERR_PTR(-ENOMEM));
@@ -623,12 +623,27 @@ xfs_vn_getattr(
623{ 623{
624 struct inode *inode = dentry->d_inode; 624 struct inode *inode = dentry->d_inode;
625 bhv_vnode_t *vp = vn_from_inode(inode); 625 bhv_vnode_t *vp = vn_from_inode(inode);
626 int error = 0; 626 bhv_vattr_t vattr = { .va_mask = XFS_AT_STAT };
627 int error;
627 628
628 if (unlikely(vp->v_flag & VMODIFIED)) 629 error = bhv_vop_getattr(vp, &vattr, ATTR_LAZY, NULL);
629 error = vn_revalidate(vp); 630 if (likely(!error)) {
630 if (!error) 631 stat->size = i_size_read(inode);
631 generic_fillattr(inode, stat); 632 stat->dev = inode->i_sb->s_dev;
633 stat->rdev = (vattr.va_rdev == 0) ? 0 :
634 MKDEV(sysv_major(vattr.va_rdev) & 0x1ff,
635 sysv_minor(vattr.va_rdev));
636 stat->mode = vattr.va_mode;
637 stat->nlink = vattr.va_nlink;
638 stat->uid = vattr.va_uid;
639 stat->gid = vattr.va_gid;
640 stat->ino = vattr.va_nodeid;
641 stat->atime = vattr.va_atime;
642 stat->mtime = vattr.va_mtime;
643 stat->ctime = vattr.va_ctime;
644 stat->blocks = vattr.va_nblocks;
645 stat->blksize = vattr.va_blocksize;
646 }
632 return -error; 647 return -error;
633} 648}
634 649
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index a13f75c1a936..2b0e0018738a 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -148,11 +148,7 @@ BUFFER_FNS(PrivateStart, unwritten);
148 (current->flags = ((current->flags & ~(f)) | (*(sp) & (f)))) 148 (current->flags = ((current->flags & ~(f)) | (*(sp) & (f))))
149 149
150#define NBPP PAGE_SIZE 150#define NBPP PAGE_SIZE
151#define DPPSHFT (PAGE_SHIFT - 9)
152#define NDPP (1 << (PAGE_SHIFT - 9)) 151#define NDPP (1 << (PAGE_SHIFT - 9))
153#define dtop(DD) (((DD) + NDPP - 1) >> DPPSHFT)
154#define dtopt(DD) ((DD) >> DPPSHFT)
155#define dpoff(DD) ((DD) & (NDPP-1))
156 152
157#define NBBY 8 /* number of bits per byte */ 153#define NBBY 8 /* number of bits per byte */
158#define NBPC PAGE_SIZE /* Number of bytes per click */ 154#define NBPC PAGE_SIZE /* Number of bytes per click */
@@ -172,8 +168,6 @@ BUFFER_FNS(PrivateStart, unwritten);
172#define btoct(x) ((__psunsigned_t)(x)>>BPCSHIFT) 168#define btoct(x) ((__psunsigned_t)(x)>>BPCSHIFT)
173#define btoc64(x) (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT) 169#define btoc64(x) (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT)
174#define btoct64(x) ((__uint64_t)(x)>>BPCSHIFT) 170#define btoct64(x) ((__uint64_t)(x)>>BPCSHIFT)
175#define io_btoc(x) (((__psunsigned_t)(x)+(IO_NBPC-1))>>IO_BPCSHIFT)
176#define io_btoct(x) ((__psunsigned_t)(x)>>IO_BPCSHIFT)
177 171
178/* off_t bytes to clicks */ 172/* off_t bytes to clicks */
179#define offtoc(x) (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT) 173#define offtoc(x) (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT)
@@ -186,7 +180,6 @@ BUFFER_FNS(PrivateStart, unwritten);
186#define ctob(x) ((__psunsigned_t)(x)<<BPCSHIFT) 180#define ctob(x) ((__psunsigned_t)(x)<<BPCSHIFT)
187#define btoct(x) ((__psunsigned_t)(x)>>BPCSHIFT) 181#define btoct(x) ((__psunsigned_t)(x)>>BPCSHIFT)
188#define ctob64(x) ((__uint64_t)(x)<<BPCSHIFT) 182#define ctob64(x) ((__uint64_t)(x)<<BPCSHIFT)
189#define io_ctob(x) ((__psunsigned_t)(x)<<IO_BPCSHIFT)
190 183
191/* bytes to clicks */ 184/* bytes to clicks */
192#define btoc(x) (((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT) 185#define btoc(x) (((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT)
@@ -339,4 +332,11 @@ static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y)
339 return(x * y); 332 return(x * y);
340} 333}
341 334
335static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
336{
337 x += y - 1;
338 do_div(x, y);
339 return x;
340}
341
342#endif /* __XFS_LINUX__ */ 342#endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index ee788b1cb364..55992b40353c 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -270,12 +270,12 @@ xfs_read(
270 } 270 }
271 } 271 }
272 272
273 if (unlikely((ioflags & IO_ISDIRECT) && VN_CACHED(vp))) 273 if (unlikely(ioflags & IO_ISDIRECT)) {
274 bhv_vop_flushinval_pages(vp, ctooff(offtoct(*offset)), 274 if (VN_CACHED(vp))
275 -1, FI_REMAPF_LOCKED); 275 bhv_vop_flushinval_pages(vp, ctooff(offtoct(*offset)),
276 276 -1, FI_REMAPF_LOCKED);
277 if (unlikely(ioflags & IO_ISDIRECT))
278 mutex_unlock(&inode->i_mutex); 277 mutex_unlock(&inode->i_mutex);
278 }
279 279
280 xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore, 280 xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore,
281 (void *)iovp, segs, *offset, ioflags); 281 (void *)iovp, segs, *offset, ioflags);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 4754f342a5d3..38c4d128a8c0 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -171,7 +171,6 @@ xfs_revalidate_inode(
171 break; 171 break;
172 } 172 }
173 173
174 inode->i_blksize = xfs_preferred_iosize(mp);
175 inode->i_generation = ip->i_d.di_gen; 174 inode->i_generation = ip->i_d.di_gen;
176 i_size_write(inode, ip->i_d.di_size); 175 i_size_write(inode, ip->i_d.di_size);
177 inode->i_blocks = 176 inode->i_blocks =
@@ -228,7 +227,9 @@ xfs_initialize_vnode(
228 xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip); 227 xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip);
229 xfs_set_inodeops(inode); 228 xfs_set_inodeops(inode);
230 229
230 spin_lock(&ip->i_flags_lock);
231 ip->i_flags &= ~XFS_INEW; 231 ip->i_flags &= ~XFS_INEW;
232 spin_unlock(&ip->i_flags_lock);
232 barrier(); 233 barrier();
233 234
234 unlock_new_inode(inode); 235 unlock_new_inode(inode);
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
index 91fc2c4b3353..da255bdf5260 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ b/fs/xfs/linux-2.6/xfs_vfs.h
@@ -79,7 +79,7 @@ typedef enum {
79#define VFS_RDONLY 0x0001 /* read-only vfs */ 79#define VFS_RDONLY 0x0001 /* read-only vfs */
80#define VFS_GRPID 0x0002 /* group-ID assigned from directory */ 80#define VFS_GRPID 0x0002 /* group-ID assigned from directory */
81#define VFS_DMI 0x0004 /* filesystem has the DMI enabled */ 81#define VFS_DMI 0x0004 /* filesystem has the DMI enabled */
82#define VFS_UMOUNT 0x0008 /* unmount in progress */ 82/* ---- VFS_UMOUNT ---- 0x0008 -- unneeded, fixed via kthread APIs */
83#define VFS_32BITINODES 0x0010 /* do not use inums above 32 bits */ 83#define VFS_32BITINODES 0x0010 /* do not use inums above 32 bits */
84#define VFS_END 0x0010 /* max flag */ 84#define VFS_END 0x0010 /* max flag */
85 85
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index 6628d96b6fd6..553fa731ade5 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -122,7 +122,6 @@ vn_revalidate_core(
122 inode->i_blocks = vap->va_nblocks; 122 inode->i_blocks = vap->va_nblocks;
123 inode->i_mtime = vap->va_mtime; 123 inode->i_mtime = vap->va_mtime;
124 inode->i_ctime = vap->va_ctime; 124 inode->i_ctime = vap->va_ctime;
125 inode->i_blksize = vap->va_blocksize;
126 if (vap->va_xflags & XFS_XFLAG_IMMUTABLE) 125 if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
127 inode->i_flags |= S_IMMUTABLE; 126 inode->i_flags |= S_IMMUTABLE;
128 else 127 else
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index c42b3221b20c..515f5fdea57a 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -85,8 +85,6 @@ typedef enum {
85#define VN_BHV_HEAD(vp) ((bhv_head_t *)(&((vp)->v_bh))) 85#define VN_BHV_HEAD(vp) ((bhv_head_t *)(&((vp)->v_bh)))
86#define vn_bhv_head_init(bhp,name) bhv_head_init(bhp,name) 86#define vn_bhv_head_init(bhp,name) bhv_head_init(bhp,name)
87#define vn_bhv_remove(bhp,bdp) bhv_remove(bhp,bdp) 87#define vn_bhv_remove(bhp,bdp) bhv_remove(bhp,bdp)
88#define vn_bhv_lookup(bhp,ops) bhv_lookup(bhp,ops)
89#define vn_bhv_lookup_unlocked(bhp,ops) bhv_lookup_unlocked(bhp,ops)
90 88
91/* 89/*
92 * Vnode to Linux inode mapping. 90 * Vnode to Linux inode mapping.
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 5b2dcc58b244..33ad5af386e0 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -382,18 +382,6 @@ xfs_qm_dquot_logitem_unlock(
382 382
383 383
384/* 384/*
385 * The transaction with the dquot locked has aborted. The dquot
386 * must not be dirty within the transaction. We simply unlock just
387 * as if the transaction had been cancelled.
388 */
389STATIC void
390xfs_qm_dquot_logitem_abort(
391 xfs_dq_logitem_t *ql)
392{
393 xfs_qm_dquot_logitem_unlock(ql);
394}
395
396/*
397 * this needs to stamp an lsn into the dquot, I think. 385 * this needs to stamp an lsn into the dquot, I think.
398 * rpc's that look at user dquot's would then have to 386 * rpc's that look at user dquot's would then have to
399 * push on the dependency recorded in the dquot 387 * push on the dependency recorded in the dquot
@@ -426,7 +414,6 @@ STATIC struct xfs_item_ops xfs_dquot_item_ops = {
426 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) 414 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
427 xfs_qm_dquot_logitem_committed, 415 xfs_qm_dquot_logitem_committed,
428 .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_push, 416 .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_push,
429 .iop_abort = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_abort,
430 .iop_pushbuf = (void(*)(xfs_log_item_t*)) 417 .iop_pushbuf = (void(*)(xfs_log_item_t*))
431 xfs_qm_dquot_logitem_pushbuf, 418 xfs_qm_dquot_logitem_pushbuf,
432 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) 419 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
@@ -559,17 +546,6 @@ xfs_qm_qoff_logitem_committed(xfs_qoff_logitem_t *qf, xfs_lsn_t lsn)
559} 546}
560 547
561/* 548/*
562 * The transaction of which this QUOTAOFF is a part has been aborted.
563 * Just clean up after ourselves.
564 * Shouldn't this never happen in the case of qoffend logitems? XXX
565 */
566STATIC void
567xfs_qm_qoff_logitem_abort(xfs_qoff_logitem_t *qf)
568{
569 kmem_free(qf, sizeof(xfs_qoff_logitem_t));
570}
571
572/*
573 * There isn't much you can do to push on an quotaoff item. It is simply 549 * There isn't much you can do to push on an quotaoff item. It is simply
574 * stuck waiting for the log to be flushed to disk. 550 * stuck waiting for the log to be flushed to disk.
575 */ 551 */
@@ -644,7 +620,6 @@ STATIC struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
644 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) 620 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
645 xfs_qm_qoffend_logitem_committed, 621 xfs_qm_qoffend_logitem_committed,
646 .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_push, 622 .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_push,
647 .iop_abort = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_abort,
648 .iop_pushbuf = NULL, 623 .iop_pushbuf = NULL,
649 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) 624 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
650 xfs_qm_qoffend_logitem_committing 625 xfs_qm_qoffend_logitem_committing
@@ -667,7 +642,6 @@ STATIC struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
667 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) 642 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
668 xfs_qm_qoff_logitem_committed, 643 xfs_qm_qoff_logitem_committed,
669 .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_push, 644 .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_push,
670 .iop_abort = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_abort,
671 .iop_pushbuf = NULL, 645 .iop_pushbuf = NULL,
672 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) 646 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
673 xfs_qm_qoff_logitem_committing 647 xfs_qm_qoff_logitem_committing
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index e23e45535c48..7c6a3a50379e 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -112,17 +112,17 @@ xfs_Gqm_init(void)
112{ 112{
113 xfs_dqhash_t *udqhash, *gdqhash; 113 xfs_dqhash_t *udqhash, *gdqhash;
114 xfs_qm_t *xqm; 114 xfs_qm_t *xqm;
115 uint i, hsize, flags = KM_SLEEP | KM_MAYFAIL; 115 size_t hsize;
116 uint i;
116 117
117 /* 118 /*
118 * Initialize the dquot hash tables. 119 * Initialize the dquot hash tables.
119 */ 120 */
120 hsize = XFS_QM_HASHSIZE_HIGH; 121 udqhash = kmem_zalloc_greedy(&hsize,
121 while (!(udqhash = kmem_zalloc(hsize * sizeof(xfs_dqhash_t), flags))) { 122 XFS_QM_HASHSIZE_LOW, XFS_QM_HASHSIZE_HIGH,
122 if ((hsize >>= 1) <= XFS_QM_HASHSIZE_LOW) 123 KM_SLEEP | KM_MAYFAIL | KM_LARGE);
123 flags = KM_SLEEP; 124 gdqhash = kmem_zalloc(hsize, KM_SLEEP | KM_LARGE);
124 } 125 hsize /= sizeof(xfs_dqhash_t);
125 gdqhash = kmem_zalloc(hsize * sizeof(xfs_dqhash_t), KM_SLEEP);
126 ndquot = hsize << 8; 126 ndquot = hsize << 8;
127 127
128 xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP); 128 xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index 4568deb6da86..689407de0a20 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -56,12 +56,6 @@ extern kmem_zone_t *qm_dqtrxzone;
56#define XFS_QM_HASHSIZE_HIGH ((NBPP * 4) / sizeof(xfs_dqhash_t)) 56#define XFS_QM_HASHSIZE_HIGH ((NBPP * 4) / sizeof(xfs_dqhash_t))
57 57
58/* 58/*
59 * We output a cmn_err when quotachecking a quota file with more than
60 * this many fsbs.
61 */
62#define XFS_QM_BIG_QCHECK_NBLKS 500
63
64/*
65 * This defines the unit of allocation of dquots. 59 * This defines the unit of allocation of dquots.
66 * Currently, it is just one file system block, and a 4K blk contains 30 60 * Currently, it is just one file system block, and a 4K blk contains 30
67 * (136 * 30 = 4080) dquots. It's probably not worth trying to make 61 * (136 * 30 = 4080) dquots. It's probably not worth trying to make
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
index b7ddd04aae32..a8b85e2be9d5 100644
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ b/fs/xfs/quota/xfs_quota_priv.h
@@ -75,7 +75,6 @@ static inline int XQMISLCKD(struct xfs_dqhash *h)
75 75
76#define xfs_qm_freelist_lock(qm) XQMLCK(&((qm)->qm_dqfreelist)) 76#define xfs_qm_freelist_lock(qm) XQMLCK(&((qm)->qm_dqfreelist))
77#define xfs_qm_freelist_unlock(qm) XQMUNLCK(&((qm)->qm_dqfreelist)) 77#define xfs_qm_freelist_unlock(qm) XQMUNLCK(&((qm)->qm_dqfreelist))
78#define XFS_QM_IS_FREELIST_LOCKED(qm) XQMISLCKD(&((qm)->qm_dqfreelist))
79 78
80/* 79/*
81 * Hash into a bucket in the dquot hash table, based on <mp, id>. 80 * Hash into a bucket in the dquot hash table, based on <mp, id>.
@@ -170,6 +169,5 @@ for ((dqp) = (qlist)->qh_next; (dqp) != (xfs_dquot_t *)(qlist); \
170#define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \ 169#define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \
171 (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \ 170 (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \
172 (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???"))) 171 (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???")))
173#define DQFLAGTO_DIRTYSTR(d) (XFS_DQ_IS_DIRTY(d) ? "DIRTY" : "NOTDIRTY")
174 172
175#endif /* __XFS_QUOTA_PRIV_H__ */ 173#endif /* __XFS_QUOTA_PRIV_H__ */
diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c
index addf5a7ea06c..5cf2e86caa71 100644
--- a/fs/xfs/support/ktrace.c
+++ b/fs/xfs/support/ktrace.c
@@ -75,7 +75,7 @@ ktrace_alloc(int nentries, unsigned int __nocast sleep)
75 sleep); 75 sleep);
76 } else { 76 } else {
77 ktep = (ktrace_entry_t*)kmem_zalloc((nentries * sizeof(*ktep)), 77 ktep = (ktrace_entry_t*)kmem_zalloc((nentries * sizeof(*ktep)),
78 sleep); 78 sleep | KM_LARGE);
79 } 79 }
80 80
81 if (ktep == NULL) { 81 if (ktep == NULL) {
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index dc2361dd740a..9ece7f87ec5b 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -150,7 +150,7 @@ typedef struct xfs_agi {
150#define XFS_BUF_TO_AGFL(bp) ((xfs_agfl_t *)XFS_BUF_PTR(bp)) 150#define XFS_BUF_TO_AGFL(bp) ((xfs_agfl_t *)XFS_BUF_PTR(bp))
151 151
152typedef struct xfs_agfl { 152typedef struct xfs_agfl {
153 xfs_agblock_t agfl_bno[1]; /* actually XFS_AGFL_SIZE(mp) */ 153 __be32 agfl_bno[1]; /* actually XFS_AGFL_SIZE(mp) */
154} xfs_agfl_t; 154} xfs_agfl_t;
155 155
156/* 156/*
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index d2bbcd882a69..e80dda3437d1 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1477,8 +1477,10 @@ xfs_alloc_ag_vextent_small(
1477 /* 1477 /*
1478 * Can't allocate from the freelist for some reason. 1478 * Can't allocate from the freelist for some reason.
1479 */ 1479 */
1480 else 1480 else {
1481 fbno = NULLAGBLOCK;
1481 flen = 0; 1482 flen = 0;
1483 }
1482 /* 1484 /*
1483 * Can't do the allocation, give up. 1485 * Can't do the allocation, give up.
1484 */ 1486 */
@@ -2021,7 +2023,7 @@ xfs_alloc_get_freelist(
2021 /* 2023 /*
2022 * Get the block number and update the data structures. 2024 * Get the block number and update the data structures.
2023 */ 2025 */
2024 bno = INT_GET(agfl->agfl_bno[be32_to_cpu(agf->agf_flfirst)], ARCH_CONVERT); 2026 bno = be32_to_cpu(agfl->agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
2025 be32_add(&agf->agf_flfirst, 1); 2027 be32_add(&agf->agf_flfirst, 1);
2026 xfs_trans_brelse(tp, agflbp); 2028 xfs_trans_brelse(tp, agflbp);
2027 if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp)) 2029 if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
@@ -2108,7 +2110,7 @@ xfs_alloc_put_freelist(
2108{ 2110{
2109 xfs_agf_t *agf; /* a.g. freespace structure */ 2111 xfs_agf_t *agf; /* a.g. freespace structure */
2110 xfs_agfl_t *agfl; /* a.g. free block array */ 2112 xfs_agfl_t *agfl; /* a.g. free block array */
2111 xfs_agblock_t *blockp;/* pointer to array entry */ 2113 __be32 *blockp;/* pointer to array entry */
2112 int error; 2114 int error;
2113#ifdef XFS_ALLOC_TRACE 2115#ifdef XFS_ALLOC_TRACE
2114 static char fname[] = "xfs_alloc_put_freelist"; 2116 static char fname[] = "xfs_alloc_put_freelist";
@@ -2132,7 +2134,7 @@ xfs_alloc_put_freelist(
2132 pag->pagf_flcount++; 2134 pag->pagf_flcount++;
2133 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)); 2135 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
2134 blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)]; 2136 blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)];
2135 INT_SET(*blockp, ARCH_CONVERT, bno); 2137 *blockp = cpu_to_be32(bno);
2136 TRACE_MODAGF(NULL, agf, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT); 2138 TRACE_MODAGF(NULL, agf, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT);
2137 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT); 2139 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT);
2138 xfs_trans_log_buf(tp, agflbp, 2140 xfs_trans_log_buf(tp, agflbp,
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 7446556e8021..74cadf95d4e8 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -92,6 +92,7 @@ xfs_alloc_delrec(
92 xfs_alloc_key_t *rkp; /* right block key pointer */ 92 xfs_alloc_key_t *rkp; /* right block key pointer */
93 xfs_alloc_ptr_t *rpp; /* right block address pointer */ 93 xfs_alloc_ptr_t *rpp; /* right block address pointer */
94 int rrecs=0; /* number of records in right block */ 94 int rrecs=0; /* number of records in right block */
95 int numrecs;
95 xfs_alloc_rec_t *rrp; /* right block record pointer */ 96 xfs_alloc_rec_t *rrp; /* right block record pointer */
96 xfs_btree_cur_t *tcur; /* temporary btree cursor */ 97 xfs_btree_cur_t *tcur; /* temporary btree cursor */
97 98
@@ -115,7 +116,8 @@ xfs_alloc_delrec(
115 /* 116 /*
116 * Fail if we're off the end of the block. 117 * Fail if we're off the end of the block.
117 */ 118 */
118 if (ptr > be16_to_cpu(block->bb_numrecs)) { 119 numrecs = be16_to_cpu(block->bb_numrecs);
120 if (ptr > numrecs) {
119 *stat = 0; 121 *stat = 0;
120 return 0; 122 return 0;
121 } 123 }
@@ -129,18 +131,18 @@ xfs_alloc_delrec(
129 lkp = XFS_ALLOC_KEY_ADDR(block, 1, cur); 131 lkp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
130 lpp = XFS_ALLOC_PTR_ADDR(block, 1, cur); 132 lpp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
131#ifdef DEBUG 133#ifdef DEBUG
132 for (i = ptr; i < be16_to_cpu(block->bb_numrecs); i++) { 134 for (i = ptr; i < numrecs; i++) {
133 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level))) 135 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
134 return error; 136 return error;
135 } 137 }
136#endif 138#endif
137 if (ptr < be16_to_cpu(block->bb_numrecs)) { 139 if (ptr < numrecs) {
138 memmove(&lkp[ptr - 1], &lkp[ptr], 140 memmove(&lkp[ptr - 1], &lkp[ptr],
139 (be16_to_cpu(block->bb_numrecs) - ptr) * sizeof(*lkp)); 141 (numrecs - ptr) * sizeof(*lkp));
140 memmove(&lpp[ptr - 1], &lpp[ptr], 142 memmove(&lpp[ptr - 1], &lpp[ptr],
141 (be16_to_cpu(block->bb_numrecs) - ptr) * sizeof(*lpp)); 143 (numrecs - ptr) * sizeof(*lpp));
142 xfs_alloc_log_ptrs(cur, bp, ptr, be16_to_cpu(block->bb_numrecs) - 1); 144 xfs_alloc_log_ptrs(cur, bp, ptr, numrecs - 1);
143 xfs_alloc_log_keys(cur, bp, ptr, be16_to_cpu(block->bb_numrecs) - 1); 145 xfs_alloc_log_keys(cur, bp, ptr, numrecs - 1);
144 } 146 }
145 } 147 }
146 /* 148 /*
@@ -149,10 +151,10 @@ xfs_alloc_delrec(
149 */ 151 */
150 else { 152 else {
151 lrp = XFS_ALLOC_REC_ADDR(block, 1, cur); 153 lrp = XFS_ALLOC_REC_ADDR(block, 1, cur);
152 if (ptr < be16_to_cpu(block->bb_numrecs)) { 154 if (ptr < numrecs) {
153 memmove(&lrp[ptr - 1], &lrp[ptr], 155 memmove(&lrp[ptr - 1], &lrp[ptr],
154 (be16_to_cpu(block->bb_numrecs) - ptr) * sizeof(*lrp)); 156 (numrecs - ptr) * sizeof(*lrp));
155 xfs_alloc_log_recs(cur, bp, ptr, be16_to_cpu(block->bb_numrecs) - 1); 157 xfs_alloc_log_recs(cur, bp, ptr, numrecs - 1);
156 } 158 }
157 /* 159 /*
158 * If it's the first record in the block, we'll need a key 160 * If it's the first record in the block, we'll need a key
@@ -167,7 +169,8 @@ xfs_alloc_delrec(
167 /* 169 /*
168 * Decrement and log the number of entries in the block. 170 * Decrement and log the number of entries in the block.
169 */ 171 */
170 be16_add(&block->bb_numrecs, -1); 172 numrecs--;
173 block->bb_numrecs = cpu_to_be16(numrecs);
171 xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS); 174 xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
172 /* 175 /*
173 * See if the longest free extent in the allocation group was 176 * See if the longest free extent in the allocation group was
@@ -181,14 +184,14 @@ xfs_alloc_delrec(
181 if (level == 0 && 184 if (level == 0 &&
182 cur->bc_btnum == XFS_BTNUM_CNT && 185 cur->bc_btnum == XFS_BTNUM_CNT &&
183 be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK && 186 be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK &&
184 ptr > be16_to_cpu(block->bb_numrecs)) { 187 ptr > numrecs) {
185 ASSERT(ptr == be16_to_cpu(block->bb_numrecs) + 1); 188 ASSERT(ptr == numrecs + 1);
186 /* 189 /*
187 * There are still records in the block. Grab the size 190 * There are still records in the block. Grab the size
188 * from the last one. 191 * from the last one.
189 */ 192 */
190 if (be16_to_cpu(block->bb_numrecs)) { 193 if (numrecs) {
191 rrp = XFS_ALLOC_REC_ADDR(block, be16_to_cpu(block->bb_numrecs), cur); 194 rrp = XFS_ALLOC_REC_ADDR(block, numrecs, cur);
192 agf->agf_longest = rrp->ar_blockcount; 195 agf->agf_longest = rrp->ar_blockcount;
193 } 196 }
194 /* 197 /*
@@ -211,7 +214,7 @@ xfs_alloc_delrec(
211 * and it's NOT the leaf level, 214 * and it's NOT the leaf level,
212 * then we can get rid of this level. 215 * then we can get rid of this level.
213 */ 216 */
214 if (be16_to_cpu(block->bb_numrecs) == 1 && level > 0) { 217 if (numrecs == 1 && level > 0) {
215 /* 218 /*
216 * lpp is still set to the first pointer in the block. 219 * lpp is still set to the first pointer in the block.
217 * Make it the new root of the btree. 220 * Make it the new root of the btree.
@@ -267,7 +270,7 @@ xfs_alloc_delrec(
267 * If the number of records remaining in the block is at least 270 * If the number of records remaining in the block is at least
268 * the minimum, we're done. 271 * the minimum, we're done.
269 */ 272 */
270 if (be16_to_cpu(block->bb_numrecs) >= XFS_ALLOC_BLOCK_MINRECS(level, cur)) { 273 if (numrecs >= XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
271 if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i))) 274 if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i)))
272 return error; 275 return error;
273 *stat = 1; 276 *stat = 1;
@@ -419,19 +422,21 @@ xfs_alloc_delrec(
419 * See if we can join with the left neighbor block. 422 * See if we can join with the left neighbor block.
420 */ 423 */
421 if (lbno != NULLAGBLOCK && 424 if (lbno != NULLAGBLOCK &&
422 lrecs + be16_to_cpu(block->bb_numrecs) <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) { 425 lrecs + numrecs <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
423 /* 426 /*
424 * Set "right" to be the starting block, 427 * Set "right" to be the starting block,
425 * "left" to be the left neighbor. 428 * "left" to be the left neighbor.
426 */ 429 */
427 rbno = bno; 430 rbno = bno;
428 right = block; 431 right = block;
432 rrecs = be16_to_cpu(right->bb_numrecs);
429 rbp = bp; 433 rbp = bp;
430 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, 434 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
431 cur->bc_private.a.agno, lbno, 0, &lbp, 435 cur->bc_private.a.agno, lbno, 0, &lbp,
432 XFS_ALLOC_BTREE_REF))) 436 XFS_ALLOC_BTREE_REF)))
433 return error; 437 return error;
434 left = XFS_BUF_TO_ALLOC_BLOCK(lbp); 438 left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
439 lrecs = be16_to_cpu(left->bb_numrecs);
435 if ((error = xfs_btree_check_sblock(cur, left, level, lbp))) 440 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
436 return error; 441 return error;
437 } 442 }
@@ -439,20 +444,21 @@ xfs_alloc_delrec(
439 * If that won't work, see if we can join with the right neighbor block. 444 * If that won't work, see if we can join with the right neighbor block.
440 */ 445 */
441 else if (rbno != NULLAGBLOCK && 446 else if (rbno != NULLAGBLOCK &&
442 rrecs + be16_to_cpu(block->bb_numrecs) <= 447 rrecs + numrecs <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
443 XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
444 /* 448 /*
445 * Set "left" to be the starting block, 449 * Set "left" to be the starting block,
446 * "right" to be the right neighbor. 450 * "right" to be the right neighbor.
447 */ 451 */
448 lbno = bno; 452 lbno = bno;
449 left = block; 453 left = block;
454 lrecs = be16_to_cpu(left->bb_numrecs);
450 lbp = bp; 455 lbp = bp;
451 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, 456 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
452 cur->bc_private.a.agno, rbno, 0, &rbp, 457 cur->bc_private.a.agno, rbno, 0, &rbp,
453 XFS_ALLOC_BTREE_REF))) 458 XFS_ALLOC_BTREE_REF)))
454 return error; 459 return error;
455 right = XFS_BUF_TO_ALLOC_BLOCK(rbp); 460 right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
461 rrecs = be16_to_cpu(right->bb_numrecs);
456 if ((error = xfs_btree_check_sblock(cur, right, level, rbp))) 462 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
457 return error; 463 return error;
458 } 464 }
@@ -474,34 +480,28 @@ xfs_alloc_delrec(
474 /* 480 /*
475 * It's a non-leaf. Move keys and pointers. 481 * It's a non-leaf. Move keys and pointers.
476 */ 482 */
477 lkp = XFS_ALLOC_KEY_ADDR(left, be16_to_cpu(left->bb_numrecs) + 1, cur); 483 lkp = XFS_ALLOC_KEY_ADDR(left, lrecs + 1, cur);
478 lpp = XFS_ALLOC_PTR_ADDR(left, be16_to_cpu(left->bb_numrecs) + 1, cur); 484 lpp = XFS_ALLOC_PTR_ADDR(left, lrecs + 1, cur);
479 rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur); 485 rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
480 rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur); 486 rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
481#ifdef DEBUG 487#ifdef DEBUG
482 for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) { 488 for (i = 0; i < rrecs; i++) {
483 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level))) 489 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
484 return error; 490 return error;
485 } 491 }
486#endif 492#endif
487 memcpy(lkp, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*lkp)); 493 memcpy(lkp, rkp, rrecs * sizeof(*lkp));
488 memcpy(lpp, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*lpp)); 494 memcpy(lpp, rpp, rrecs * sizeof(*lpp));
489 xfs_alloc_log_keys(cur, lbp, be16_to_cpu(left->bb_numrecs) + 1, 495 xfs_alloc_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
490 be16_to_cpu(left->bb_numrecs) + 496 xfs_alloc_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
491 be16_to_cpu(right->bb_numrecs));
492 xfs_alloc_log_ptrs(cur, lbp, be16_to_cpu(left->bb_numrecs) + 1,
493 be16_to_cpu(left->bb_numrecs) +
494 be16_to_cpu(right->bb_numrecs));
495 } else { 497 } else {
496 /* 498 /*
497 * It's a leaf. Move records. 499 * It's a leaf. Move records.
498 */ 500 */
499 lrp = XFS_ALLOC_REC_ADDR(left, be16_to_cpu(left->bb_numrecs) + 1, cur); 501 lrp = XFS_ALLOC_REC_ADDR(left, lrecs + 1, cur);
500 rrp = XFS_ALLOC_REC_ADDR(right, 1, cur); 502 rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
501 memcpy(lrp, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*lrp)); 503 memcpy(lrp, rrp, rrecs * sizeof(*lrp));
502 xfs_alloc_log_recs(cur, lbp, be16_to_cpu(left->bb_numrecs) + 1, 504 xfs_alloc_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
503 be16_to_cpu(left->bb_numrecs) +
504 be16_to_cpu(right->bb_numrecs));
505 } 505 }
506 /* 506 /*
507 * If we joined with the left neighbor, set the buffer in the 507 * If we joined with the left neighbor, set the buffer in the
@@ -509,7 +509,7 @@ xfs_alloc_delrec(
509 */ 509 */
510 if (bp != lbp) { 510 if (bp != lbp) {
511 xfs_btree_setbuf(cur, level, lbp); 511 xfs_btree_setbuf(cur, level, lbp);
512 cur->bc_ptrs[level] += be16_to_cpu(left->bb_numrecs); 512 cur->bc_ptrs[level] += lrecs;
513 } 513 }
514 /* 514 /*
515 * If we joined with the right neighbor and there's a level above 515 * If we joined with the right neighbor and there's a level above
@@ -521,7 +521,8 @@ xfs_alloc_delrec(
521 /* 521 /*
522 * Fix up the number of records in the surviving block. 522 * Fix up the number of records in the surviving block.
523 */ 523 */
524 be16_add(&left->bb_numrecs, be16_to_cpu(right->bb_numrecs)); 524 lrecs += rrecs;
525 left->bb_numrecs = cpu_to_be16(lrecs);
525 /* 526 /*
526 * Fix up the right block pointer in the surviving block, and log it. 527 * Fix up the right block pointer in the surviving block, and log it.
527 */ 528 */
@@ -608,6 +609,7 @@ xfs_alloc_insrec(
608 xfs_btree_cur_t *ncur; /* new cursor to be used at next lvl */ 609 xfs_btree_cur_t *ncur; /* new cursor to be used at next lvl */
609 xfs_alloc_key_t nkey; /* new key value, from split */ 610 xfs_alloc_key_t nkey; /* new key value, from split */
610 xfs_alloc_rec_t nrec; /* new record value, for caller */ 611 xfs_alloc_rec_t nrec; /* new record value, for caller */
612 int numrecs;
611 int optr; /* old ptr value */ 613 int optr; /* old ptr value */
612 xfs_alloc_ptr_t *pp; /* pointer to btree addresses */ 614 xfs_alloc_ptr_t *pp; /* pointer to btree addresses */
613 int ptr; /* index in btree block for this rec */ 615 int ptr; /* index in btree block for this rec */
@@ -653,13 +655,14 @@ xfs_alloc_insrec(
653 */ 655 */
654 bp = cur->bc_bufs[level]; 656 bp = cur->bc_bufs[level];
655 block = XFS_BUF_TO_ALLOC_BLOCK(bp); 657 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
658 numrecs = be16_to_cpu(block->bb_numrecs);
656#ifdef DEBUG 659#ifdef DEBUG
657 if ((error = xfs_btree_check_sblock(cur, block, level, bp))) 660 if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
658 return error; 661 return error;
659 /* 662 /*
660 * Check that the new entry is being inserted in the right place. 663 * Check that the new entry is being inserted in the right place.
661 */ 664 */
662 if (ptr <= be16_to_cpu(block->bb_numrecs)) { 665 if (ptr <= numrecs) {
663 if (level == 0) { 666 if (level == 0) {
664 rp = XFS_ALLOC_REC_ADDR(block, ptr, cur); 667 rp = XFS_ALLOC_REC_ADDR(block, ptr, cur);
665 xfs_btree_check_rec(cur->bc_btnum, recp, rp); 668 xfs_btree_check_rec(cur->bc_btnum, recp, rp);
@@ -670,12 +673,12 @@ xfs_alloc_insrec(
670 } 673 }
671#endif 674#endif
672 nbno = NULLAGBLOCK; 675 nbno = NULLAGBLOCK;
673 ncur = (xfs_btree_cur_t *)0; 676 ncur = NULL;
674 /* 677 /*
675 * If the block is full, we can't insert the new entry until we 678 * If the block is full, we can't insert the new entry until we
676 * make the block un-full. 679 * make the block un-full.
677 */ 680 */
678 if (be16_to_cpu(block->bb_numrecs) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) { 681 if (numrecs == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
679 /* 682 /*
680 * First, try shifting an entry to the right neighbor. 683 * First, try shifting an entry to the right neighbor.
681 */ 684 */
@@ -729,6 +732,7 @@ xfs_alloc_insrec(
729 * At this point we know there's room for our new entry in the block 732 * At this point we know there's room for our new entry in the block
730 * we're pointing at. 733 * we're pointing at.
731 */ 734 */
735 numrecs = be16_to_cpu(block->bb_numrecs);
732 if (level > 0) { 736 if (level > 0) {
733 /* 737 /*
734 * It's a non-leaf entry. Make a hole for the new data 738 * It's a non-leaf entry. Make a hole for the new data
@@ -737,15 +741,15 @@ xfs_alloc_insrec(
737 kp = XFS_ALLOC_KEY_ADDR(block, 1, cur); 741 kp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
738 pp = XFS_ALLOC_PTR_ADDR(block, 1, cur); 742 pp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
739#ifdef DEBUG 743#ifdef DEBUG
740 for (i = be16_to_cpu(block->bb_numrecs); i >= ptr; i--) { 744 for (i = numrecs; i >= ptr; i--) {
741 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i - 1]), level))) 745 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i - 1]), level)))
742 return error; 746 return error;
743 } 747 }
744#endif 748#endif
745 memmove(&kp[ptr], &kp[ptr - 1], 749 memmove(&kp[ptr], &kp[ptr - 1],
746 (be16_to_cpu(block->bb_numrecs) - ptr + 1) * sizeof(*kp)); 750 (numrecs - ptr + 1) * sizeof(*kp));
747 memmove(&pp[ptr], &pp[ptr - 1], 751 memmove(&pp[ptr], &pp[ptr - 1],
748 (be16_to_cpu(block->bb_numrecs) - ptr + 1) * sizeof(*pp)); 752 (numrecs - ptr + 1) * sizeof(*pp));
749#ifdef DEBUG 753#ifdef DEBUG
750 if ((error = xfs_btree_check_sptr(cur, *bnop, level))) 754 if ((error = xfs_btree_check_sptr(cur, *bnop, level)))
751 return error; 755 return error;
@@ -755,11 +759,12 @@ xfs_alloc_insrec(
755 */ 759 */
756 kp[ptr - 1] = key; 760 kp[ptr - 1] = key;
757 pp[ptr - 1] = cpu_to_be32(*bnop); 761 pp[ptr - 1] = cpu_to_be32(*bnop);
758 be16_add(&block->bb_numrecs, 1); 762 numrecs++;
759 xfs_alloc_log_keys(cur, bp, ptr, be16_to_cpu(block->bb_numrecs)); 763 block->bb_numrecs = cpu_to_be16(numrecs);
760 xfs_alloc_log_ptrs(cur, bp, ptr, be16_to_cpu(block->bb_numrecs)); 764 xfs_alloc_log_keys(cur, bp, ptr, numrecs);
765 xfs_alloc_log_ptrs(cur, bp, ptr, numrecs);
761#ifdef DEBUG 766#ifdef DEBUG
762 if (ptr < be16_to_cpu(block->bb_numrecs)) 767 if (ptr < numrecs)
763 xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1, 768 xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1,
764 kp + ptr); 769 kp + ptr);
765#endif 770#endif
@@ -769,16 +774,17 @@ xfs_alloc_insrec(
769 */ 774 */
770 rp = XFS_ALLOC_REC_ADDR(block, 1, cur); 775 rp = XFS_ALLOC_REC_ADDR(block, 1, cur);
771 memmove(&rp[ptr], &rp[ptr - 1], 776 memmove(&rp[ptr], &rp[ptr - 1],
772 (be16_to_cpu(block->bb_numrecs) - ptr + 1) * sizeof(*rp)); 777 (numrecs - ptr + 1) * sizeof(*rp));
773 /* 778 /*
774 * Now stuff the new record in, bump numrecs 779 * Now stuff the new record in, bump numrecs
775 * and log the new data. 780 * and log the new data.
776 */ 781 */
777 rp[ptr - 1] = *recp; /* INT_: struct copy */ 782 rp[ptr - 1] = *recp;
778 be16_add(&block->bb_numrecs, 1); 783 numrecs++;
779 xfs_alloc_log_recs(cur, bp, ptr, be16_to_cpu(block->bb_numrecs)); 784 block->bb_numrecs = cpu_to_be16(numrecs);
785 xfs_alloc_log_recs(cur, bp, ptr, numrecs);
780#ifdef DEBUG 786#ifdef DEBUG
781 if (ptr < be16_to_cpu(block->bb_numrecs)) 787 if (ptr < numrecs)
782 xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1, 788 xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1,
783 rp + ptr); 789 rp + ptr);
784#endif 790#endif
@@ -819,8 +825,8 @@ xfs_alloc_insrec(
819 */ 825 */
820 *bnop = nbno; 826 *bnop = nbno;
821 if (nbno != NULLAGBLOCK) { 827 if (nbno != NULLAGBLOCK) {
822 *recp = nrec; /* INT_: struct copy */ 828 *recp = nrec;
823 *curp = ncur; /* INT_: struct copy */ 829 *curp = ncur;
824 } 830 }
825 *stat = 1; 831 *stat = 1;
826 return 0; 832 return 0;
@@ -981,7 +987,7 @@ xfs_alloc_lookup(
981 */ 987 */
982 bp = cur->bc_bufs[level]; 988 bp = cur->bc_bufs[level];
983 if (bp && XFS_BUF_ADDR(bp) != d) 989 if (bp && XFS_BUF_ADDR(bp) != d)
984 bp = (xfs_buf_t *)0; 990 bp = NULL;
985 if (!bp) { 991 if (!bp) {
986 /* 992 /*
987 * Need to get a new buffer. Read it, then 993 * Need to get a new buffer. Read it, then
@@ -1229,7 +1235,7 @@ xfs_alloc_lshift(
1229 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level))) 1235 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level)))
1230 return error; 1236 return error;
1231#endif 1237#endif
1232 *lpp = *rpp; /* INT_: copy */ 1238 *lpp = *rpp;
1233 xfs_alloc_log_ptrs(cur, lbp, nrec, nrec); 1239 xfs_alloc_log_ptrs(cur, lbp, nrec, nrec);
1234 xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp); 1240 xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp);
1235 } 1241 }
@@ -1406,8 +1412,8 @@ xfs_alloc_newroot(
1406 1412
1407 kp = XFS_ALLOC_KEY_ADDR(new, 1, cur); 1413 kp = XFS_ALLOC_KEY_ADDR(new, 1, cur);
1408 if (be16_to_cpu(left->bb_level) > 0) { 1414 if (be16_to_cpu(left->bb_level) > 0) {
1409 kp[0] = *XFS_ALLOC_KEY_ADDR(left, 1, cur); /* INT_: structure copy */ 1415 kp[0] = *XFS_ALLOC_KEY_ADDR(left, 1, cur);
1410 kp[1] = *XFS_ALLOC_KEY_ADDR(right, 1, cur);/* INT_: structure copy */ 1416 kp[1] = *XFS_ALLOC_KEY_ADDR(right, 1, cur);
1411 } else { 1417 } else {
1412 xfs_alloc_rec_t *rp; /* btree record pointer */ 1418 xfs_alloc_rec_t *rp; /* btree record pointer */
1413 1419
@@ -1527,8 +1533,8 @@ xfs_alloc_rshift(
1527 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*lpp), level))) 1533 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*lpp), level)))
1528 return error; 1534 return error;
1529#endif 1535#endif
1530 *rkp = *lkp; /* INT_: copy */ 1536 *rkp = *lkp;
1531 *rpp = *lpp; /* INT_: copy */ 1537 *rpp = *lpp;
1532 xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1); 1538 xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1533 xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1); 1539 xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1534 xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1); 1540 xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1);
@@ -2044,7 +2050,7 @@ xfs_alloc_insert(
2044 nbno = NULLAGBLOCK; 2050 nbno = NULLAGBLOCK;
2045 nrec.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock); 2051 nrec.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock);
2046 nrec.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount); 2052 nrec.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount);
2047 ncur = (xfs_btree_cur_t *)0; 2053 ncur = NULL;
2048 pcur = cur; 2054 pcur = cur;
2049 /* 2055 /*
2050 * Loop going up the tree, starting at the leaf level. 2056 * Loop going up the tree, starting at the leaf level.
@@ -2076,7 +2082,7 @@ xfs_alloc_insert(
2076 */ 2082 */
2077 if (ncur) { 2083 if (ncur) {
2078 pcur = ncur; 2084 pcur = ncur;
2079 ncur = (xfs_btree_cur_t *)0; 2085 ncur = NULL;
2080 } 2086 }
2081 } while (nbno != NULLAGBLOCK); 2087 } while (nbno != NULLAGBLOCK);
2082 *stat = i; 2088 *stat = i;
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 1a2101043275..9ada7bdbae52 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -91,7 +91,6 @@ STATIC int xfs_attr_refillstate(xfs_da_state_t *state);
91/* 91/*
92 * Routines to manipulate out-of-line attribute values. 92 * Routines to manipulate out-of-line attribute values.
93 */ 93 */
94STATIC int xfs_attr_rmtval_get(xfs_da_args_t *args);
95STATIC int xfs_attr_rmtval_set(xfs_da_args_t *args); 94STATIC int xfs_attr_rmtval_set(xfs_da_args_t *args);
96STATIC int xfs_attr_rmtval_remove(xfs_da_args_t *args); 95STATIC int xfs_attr_rmtval_remove(xfs_da_args_t *args);
97 96
@@ -180,7 +179,7 @@ xfs_attr_get(bhv_desc_t *bdp, const char *name, char *value, int *valuelenp,
180 return(error); 179 return(error);
181} 180}
182 181
183STATIC int 182int
184xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen, 183xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
185 char *value, int valuelen, int flags) 184 char *value, int valuelen, int flags)
186{ 185{
@@ -440,7 +439,7 @@ xfs_attr_set(bhv_desc_t *bdp, const char *name, char *value, int valuelen, int f
440 * Generic handler routine to remove a name from an attribute list. 439 * Generic handler routine to remove a name from an attribute list.
441 * Transitions attribute list from Btree to shortform as necessary. 440 * Transitions attribute list from Btree to shortform as necessary.
442 */ 441 */
443STATIC int 442int
444xfs_attr_remove_int(xfs_inode_t *dp, const char *name, int namelen, int flags) 443xfs_attr_remove_int(xfs_inode_t *dp, const char *name, int namelen, int flags)
445{ 444{
446 xfs_da_args_t args; 445 xfs_da_args_t args;
@@ -591,6 +590,110 @@ xfs_attr_remove(bhv_desc_t *bdp, const char *name, int flags, struct cred *cred)
591 return xfs_attr_remove_int(dp, name, namelen, flags); 590 return xfs_attr_remove_int(dp, name, namelen, flags);
592} 591}
593 592
593int /* error */
594xfs_attr_list_int(xfs_attr_list_context_t *context)
595{
596 int error;
597 xfs_inode_t *dp = context->dp;
598
599 /*
600 * Decide on what work routines to call based on the inode size.
601 */
602 if (XFS_IFORK_Q(dp) == 0 ||
603 (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
604 dp->i_d.di_anextents == 0)) {
605 error = 0;
606 } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
607 error = xfs_attr_shortform_list(context);
608 } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
609 error = xfs_attr_leaf_list(context);
610 } else {
611 error = xfs_attr_node_list(context);
612 }
613 return error;
614}
615
616#define ATTR_ENTBASESIZE /* minimum bytes used by an attr */ \
617 (((struct attrlist_ent *) 0)->a_name - (char *) 0)
618#define ATTR_ENTSIZE(namelen) /* actual bytes used by an attr */ \
619 ((ATTR_ENTBASESIZE + (namelen) + 1 + sizeof(u_int32_t)-1) \
620 & ~(sizeof(u_int32_t)-1))
621
622/*
623 * Format an attribute and copy it out to the user's buffer.
624 * Take care to check values and protect against them changing later,
625 * we may be reading them directly out of a user buffer.
626 */
627/*ARGSUSED*/
628STATIC int
629xfs_attr_put_listent(xfs_attr_list_context_t *context, attrnames_t *namesp,
630 char *name, int namelen,
631 int valuelen, char *value)
632{
633 attrlist_ent_t *aep;
634 int arraytop;
635
636 ASSERT(!(context->flags & ATTR_KERNOVAL));
637 ASSERT(context->count >= 0);
638 ASSERT(context->count < (ATTR_MAX_VALUELEN/8));
639 ASSERT(context->firstu >= sizeof(*context->alist));
640 ASSERT(context->firstu <= context->bufsize);
641
642 arraytop = sizeof(*context->alist) +
643 context->count * sizeof(context->alist->al_offset[0]);
644 context->firstu -= ATTR_ENTSIZE(namelen);
645 if (context->firstu < arraytop) {
646 xfs_attr_trace_l_c("buffer full", context);
647 context->alist->al_more = 1;
648 context->seen_enough = 1;
649 return 1;
650 }
651
652 aep = (attrlist_ent_t *)&(((char *)context->alist)[ context->firstu ]);
653 aep->a_valuelen = valuelen;
654 memcpy(aep->a_name, name, namelen);
655 aep->a_name[ namelen ] = 0;
656 context->alist->al_offset[ context->count++ ] = context->firstu;
657 context->alist->al_count = context->count;
658 xfs_attr_trace_l_c("add", context);
659 return 0;
660}
661
662STATIC int
663xfs_attr_kern_list(xfs_attr_list_context_t *context, attrnames_t *namesp,
664 char *name, int namelen,
665 int valuelen, char *value)
666{
667 char *offset;
668 int arraytop;
669
670 ASSERT(context->count >= 0);
671
672 arraytop = context->count + namesp->attr_namelen + namelen + 1;
673 if (arraytop > context->firstu) {
674 context->count = -1; /* insufficient space */
675 return 1;
676 }
677 offset = (char *)context->alist + context->count;
678 strncpy(offset, namesp->attr_name, namesp->attr_namelen);
679 offset += namesp->attr_namelen;
680 strncpy(offset, name, namelen); /* real name */
681 offset += namelen;
682 *offset = '\0';
683 context->count += namesp->attr_namelen + namelen + 1;
684 return 0;
685}
686
687/*ARGSUSED*/
688STATIC int
689xfs_attr_kern_list_sizes(xfs_attr_list_context_t *context, attrnames_t *namesp,
690 char *name, int namelen,
691 int valuelen, char *value)
692{
693 context->count += namesp->attr_namelen + namelen + 1;
694 return 0;
695}
696
594/* 697/*
595 * Generate a list of extended attribute names and optionally 698 * Generate a list of extended attribute names and optionally
596 * also value lengths. Positive return value follows the XFS 699 * also value lengths. Positive return value follows the XFS
@@ -615,13 +718,13 @@ xfs_attr_list(bhv_desc_t *bdp, char *buffer, int bufsize, int flags,
615 return(XFS_ERROR(EINVAL)); 718 return(XFS_ERROR(EINVAL));
616 if ((cursor->initted == 0) && 719 if ((cursor->initted == 0) &&
617 (cursor->hashval || cursor->blkno || cursor->offset)) 720 (cursor->hashval || cursor->blkno || cursor->offset))
618 return(XFS_ERROR(EINVAL)); 721 return XFS_ERROR(EINVAL);
619 722
620 /* 723 /*
621 * Check for a properly aligned buffer. 724 * Check for a properly aligned buffer.
622 */ 725 */
623 if (((long)buffer) & (sizeof(int)-1)) 726 if (((long)buffer) & (sizeof(int)-1))
624 return(XFS_ERROR(EFAULT)); 727 return XFS_ERROR(EFAULT);
625 if (flags & ATTR_KERNOVAL) 728 if (flags & ATTR_KERNOVAL)
626 bufsize = 0; 729 bufsize = 0;
627 730
@@ -634,53 +737,47 @@ xfs_attr_list(bhv_desc_t *bdp, char *buffer, int bufsize, int flags,
634 context.dupcnt = 0; 737 context.dupcnt = 0;
635 context.resynch = 1; 738 context.resynch = 1;
636 context.flags = flags; 739 context.flags = flags;
637 if (!(flags & ATTR_KERNAMELS)) { 740 context.seen_enough = 0;
741 context.alist = (attrlist_t *)buffer;
742 context.put_value = 0;
743
744 if (flags & ATTR_KERNAMELS) {
745 context.bufsize = bufsize;
746 context.firstu = context.bufsize;
747 if (flags & ATTR_KERNOVAL)
748 context.put_listent = xfs_attr_kern_list_sizes;
749 else
750 context.put_listent = xfs_attr_kern_list;
751 } else {
638 context.bufsize = (bufsize & ~(sizeof(int)-1)); /* align */ 752 context.bufsize = (bufsize & ~(sizeof(int)-1)); /* align */
639 context.firstu = context.bufsize; 753 context.firstu = context.bufsize;
640 context.alist = (attrlist_t *)buffer;
641 context.alist->al_count = 0; 754 context.alist->al_count = 0;
642 context.alist->al_more = 0; 755 context.alist->al_more = 0;
643 context.alist->al_offset[0] = context.bufsize; 756 context.alist->al_offset[0] = context.bufsize;
644 } 757 context.put_listent = xfs_attr_put_listent;
645 else {
646 context.bufsize = bufsize;
647 context.firstu = context.bufsize;
648 context.alist = (attrlist_t *)buffer;
649 } 758 }
650 759
651 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 760 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
652 return (EIO); 761 return EIO;
653 762
654 xfs_ilock(dp, XFS_ILOCK_SHARED); 763 xfs_ilock(dp, XFS_ILOCK_SHARED);
655 /*
656 * Decide on what work routines to call based on the inode size.
657 */
658 xfs_attr_trace_l_c("syscall start", &context); 764 xfs_attr_trace_l_c("syscall start", &context);
659 if (XFS_IFORK_Q(dp) == 0 || 765
660 (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && 766 error = xfs_attr_list_int(&context);
661 dp->i_d.di_anextents == 0)) { 767
662 error = 0;
663 } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
664 error = xfs_attr_shortform_list(&context);
665 } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
666 error = xfs_attr_leaf_list(&context);
667 } else {
668 error = xfs_attr_node_list(&context);
669 }
670 xfs_iunlock(dp, XFS_ILOCK_SHARED); 768 xfs_iunlock(dp, XFS_ILOCK_SHARED);
671 xfs_attr_trace_l_c("syscall end", &context); 769 xfs_attr_trace_l_c("syscall end", &context);
672 770
673 if (!(context.flags & (ATTR_KERNOVAL|ATTR_KERNAMELS))) { 771 if (context.flags & (ATTR_KERNOVAL|ATTR_KERNAMELS)) {
674 ASSERT(error >= 0); 772 /* must return negated buffer size or the error */
675 }
676 else { /* must return negated buffer size or the error */
677 if (context.count < 0) 773 if (context.count < 0)
678 error = XFS_ERROR(ERANGE); 774 error = XFS_ERROR(ERANGE);
679 else 775 else
680 error = -context.count; 776 error = -context.count;
681 } 777 } else
778 ASSERT(error >= 0);
682 779
683 return(error); 780 return error;
684} 781}
685 782
686int /* error */ 783int /* error */
@@ -1122,19 +1219,19 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context)
1122 context->cursor->blkno = 0; 1219 context->cursor->blkno = 0;
1123 error = xfs_da_read_buf(NULL, context->dp, 0, -1, &bp, XFS_ATTR_FORK); 1220 error = xfs_da_read_buf(NULL, context->dp, 0, -1, &bp, XFS_ATTR_FORK);
1124 if (error) 1221 if (error)
1125 return(error); 1222 return XFS_ERROR(error);
1126 ASSERT(bp != NULL); 1223 ASSERT(bp != NULL);
1127 leaf = bp->data; 1224 leaf = bp->data;
1128 if (unlikely(be16_to_cpu(leaf->hdr.info.magic) != XFS_ATTR_LEAF_MAGIC)) { 1225 if (unlikely(be16_to_cpu(leaf->hdr.info.magic) != XFS_ATTR_LEAF_MAGIC)) {
1129 XFS_CORRUPTION_ERROR("xfs_attr_leaf_list", XFS_ERRLEVEL_LOW, 1226 XFS_CORRUPTION_ERROR("xfs_attr_leaf_list", XFS_ERRLEVEL_LOW,
1130 context->dp->i_mount, leaf); 1227 context->dp->i_mount, leaf);
1131 xfs_da_brelse(NULL, bp); 1228 xfs_da_brelse(NULL, bp);
1132 return(XFS_ERROR(EFSCORRUPTED)); 1229 return XFS_ERROR(EFSCORRUPTED);
1133 } 1230 }
1134 1231
1135 (void)xfs_attr_leaf_list_int(bp, context); 1232 error = xfs_attr_leaf_list_int(bp, context);
1136 xfs_da_brelse(NULL, bp); 1233 xfs_da_brelse(NULL, bp);
1137 return(0); 1234 return XFS_ERROR(error);
1138} 1235}
1139 1236
1140 1237
@@ -1858,8 +1955,12 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
1858 return(XFS_ERROR(EFSCORRUPTED)); 1955 return(XFS_ERROR(EFSCORRUPTED));
1859 } 1956 }
1860 error = xfs_attr_leaf_list_int(bp, context); 1957 error = xfs_attr_leaf_list_int(bp, context);
1861 if (error || !leaf->hdr.info.forw) 1958 if (error) {
1862 break; /* not really an error, buffer full or EOF */ 1959 xfs_da_brelse(NULL, bp);
1960 return error;
1961 }
1962 if (context->seen_enough || leaf->hdr.info.forw == 0)
1963 break;
1863 cursor->blkno = be32_to_cpu(leaf->hdr.info.forw); 1964 cursor->blkno = be32_to_cpu(leaf->hdr.info.forw);
1864 xfs_da_brelse(NULL, bp); 1965 xfs_da_brelse(NULL, bp);
1865 error = xfs_da_read_buf(NULL, context->dp, cursor->blkno, -1, 1966 error = xfs_da_read_buf(NULL, context->dp, cursor->blkno, -1,
@@ -1886,7 +1987,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
1886 * Read the value associated with an attribute from the out-of-line buffer 1987 * Read the value associated with an attribute from the out-of-line buffer
1887 * that we stored it in. 1988 * that we stored it in.
1888 */ 1989 */
1889STATIC int 1990int
1890xfs_attr_rmtval_get(xfs_da_args_t *args) 1991xfs_attr_rmtval_get(xfs_da_args_t *args)
1891{ 1992{
1892 xfs_bmbt_irec_t map[ATTR_RMTVALUE_MAPSIZE]; 1993 xfs_bmbt_irec_t map[ATTR_RMTVALUE_MAPSIZE];
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index 981633f6c077..783977d3ea71 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -37,6 +37,7 @@
37 37
38struct cred; 38struct cred;
39struct bhv_vnode; 39struct bhv_vnode;
40struct xfs_attr_list_context;
40 41
41typedef int (*attrset_t)(struct bhv_vnode *, char *, void *, size_t, int); 42typedef int (*attrset_t)(struct bhv_vnode *, char *, void *, size_t, int);
42typedef int (*attrget_t)(struct bhv_vnode *, char *, void *, size_t, int); 43typedef int (*attrget_t)(struct bhv_vnode *, char *, void *, size_t, int);
@@ -160,13 +161,16 @@ struct xfs_da_args;
160 */ 161 */
161int xfs_attr_get(bhv_desc_t *, const char *, char *, int *, int, struct cred *); 162int xfs_attr_get(bhv_desc_t *, const char *, char *, int *, int, struct cred *);
162int xfs_attr_set(bhv_desc_t *, const char *, char *, int, int, struct cred *); 163int xfs_attr_set(bhv_desc_t *, const char *, char *, int, int, struct cred *);
164int xfs_attr_set_int(struct xfs_inode *, const char *, int, char *, int, int);
163int xfs_attr_remove(bhv_desc_t *, const char *, int, struct cred *); 165int xfs_attr_remove(bhv_desc_t *, const char *, int, struct cred *);
164int xfs_attr_list(bhv_desc_t *, char *, int, int, 166int xfs_attr_remove_int(struct xfs_inode *, const char *, int, int);
165 struct attrlist_cursor_kern *, struct cred *); 167int xfs_attr_list(bhv_desc_t *, char *, int, int, struct attrlist_cursor_kern *, struct cred *);
168int xfs_attr_list_int(struct xfs_attr_list_context *);
166int xfs_attr_inactive(struct xfs_inode *dp); 169int xfs_attr_inactive(struct xfs_inode *dp);
167 170
168int xfs_attr_shortform_getvalue(struct xfs_da_args *); 171int xfs_attr_shortform_getvalue(struct xfs_da_args *);
169int xfs_attr_fetch(struct xfs_inode *, const char *, int, 172int xfs_attr_fetch(struct xfs_inode *, const char *, int,
170 char *, int *, int, struct cred *); 173 char *, int *, int, struct cred *);
174int xfs_attr_rmtval_get(struct xfs_da_args *args);
171 175
172#endif /* __XFS_ATTR_H__ */ 176#endif /* __XFS_ATTR_H__ */
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 9455051f0120..9719bbef122c 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -89,9 +89,46 @@ STATIC void xfs_attr_leaf_moveents(xfs_attr_leafblock_t *src_leaf,
89 int dst_start, int move_count, 89 int dst_start, int move_count,
90 xfs_mount_t *mp); 90 xfs_mount_t *mp);
91STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index); 91STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
92STATIC int xfs_attr_put_listent(xfs_attr_list_context_t *context, 92
93 attrnames_t *, char *name, int namelen, 93/*========================================================================
94 int valuelen); 94 * Namespace helper routines
95 *========================================================================*/
96
97STATIC inline attrnames_t *
98xfs_attr_flags_namesp(int flags)
99{
100 return ((flags & XFS_ATTR_SECURE) ? &attr_secure:
101 ((flags & XFS_ATTR_ROOT) ? &attr_trusted : &attr_user));
102}
103
104/*
105 * If namespace bits don't match return 0.
106 * If all match then return 1.
107 */
108STATIC inline int
109xfs_attr_namesp_match(int arg_flags, int ondisk_flags)
110{
111 return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags);
112}
113
114/*
115 * If namespace bits don't match and we don't have an override for it
116 * then return 0.
117 * If all match or are overridable then return 1.
118 */
119STATIC inline int
120xfs_attr_namesp_match_overrides(int arg_flags, int ondisk_flags)
121{
122 if (((arg_flags & ATTR_SECURE) == 0) !=
123 ((ondisk_flags & XFS_ATTR_SECURE) == 0) &&
124 !(arg_flags & ATTR_KERNORMALS))
125 return 0;
126 if (((arg_flags & ATTR_ROOT) == 0) !=
127 ((ondisk_flags & XFS_ATTR_ROOT) == 0) &&
128 !(arg_flags & ATTR_KERNROOTLS))
129 return 0;
130 return 1;
131}
95 132
96 133
97/*======================================================================== 134/*========================================================================
@@ -228,11 +265,7 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff)
228 continue; 265 continue;
229 if (memcmp(args->name, sfe->nameval, args->namelen) != 0) 266 if (memcmp(args->name, sfe->nameval, args->namelen) != 0)
230 continue; 267 continue;
231 if (((args->flags & ATTR_SECURE) != 0) != 268 if (!xfs_attr_namesp_match(args->flags, sfe->flags))
232 ((sfe->flags & XFS_ATTR_SECURE) != 0))
233 continue;
234 if (((args->flags & ATTR_ROOT) != 0) !=
235 ((sfe->flags & XFS_ATTR_ROOT) != 0))
236 continue; 269 continue;
237 ASSERT(0); 270 ASSERT(0);
238#endif 271#endif
@@ -246,8 +279,7 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff)
246 279
247 sfe->namelen = args->namelen; 280 sfe->namelen = args->namelen;
248 sfe->valuelen = args->valuelen; 281 sfe->valuelen = args->valuelen;
249 sfe->flags = (args->flags & ATTR_SECURE) ? XFS_ATTR_SECURE : 282 sfe->flags = XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags);
250 ((args->flags & ATTR_ROOT) ? XFS_ATTR_ROOT : 0);
251 memcpy(sfe->nameval, args->name, args->namelen); 283 memcpy(sfe->nameval, args->name, args->namelen);
252 memcpy(&sfe->nameval[args->namelen], args->value, args->valuelen); 284 memcpy(&sfe->nameval[args->namelen], args->value, args->valuelen);
253 sf->hdr.count++; 285 sf->hdr.count++;
@@ -282,11 +314,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
282 continue; 314 continue;
283 if (memcmp(sfe->nameval, args->name, args->namelen) != 0) 315 if (memcmp(sfe->nameval, args->name, args->namelen) != 0)
284 continue; 316 continue;
285 if (((args->flags & ATTR_SECURE) != 0) != 317 if (!xfs_attr_namesp_match(args->flags, sfe->flags))
286 ((sfe->flags & XFS_ATTR_SECURE) != 0))
287 continue;
288 if (((args->flags & ATTR_ROOT) != 0) !=
289 ((sfe->flags & XFS_ATTR_ROOT) != 0))
290 continue; 318 continue;
291 break; 319 break;
292 } 320 }
@@ -363,11 +391,7 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args)
363 continue; 391 continue;
364 if (memcmp(args->name, sfe->nameval, args->namelen) != 0) 392 if (memcmp(args->name, sfe->nameval, args->namelen) != 0)
365 continue; 393 continue;
366 if (((args->flags & ATTR_SECURE) != 0) != 394 if (!xfs_attr_namesp_match(args->flags, sfe->flags))
367 ((sfe->flags & XFS_ATTR_SECURE) != 0))
368 continue;
369 if (((args->flags & ATTR_ROOT) != 0) !=
370 ((sfe->flags & XFS_ATTR_ROOT) != 0))
371 continue; 395 continue;
372 return(XFS_ERROR(EEXIST)); 396 return(XFS_ERROR(EEXIST));
373 } 397 }
@@ -394,11 +418,7 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args)
394 continue; 418 continue;
395 if (memcmp(args->name, sfe->nameval, args->namelen) != 0) 419 if (memcmp(args->name, sfe->nameval, args->namelen) != 0)
396 continue; 420 continue;
397 if (((args->flags & ATTR_SECURE) != 0) != 421 if (!xfs_attr_namesp_match(args->flags, sfe->flags))
398 ((sfe->flags & XFS_ATTR_SECURE) != 0))
399 continue;
400 if (((args->flags & ATTR_ROOT) != 0) !=
401 ((sfe->flags & XFS_ATTR_ROOT) != 0))
402 continue; 422 continue;
403 if (args->flags & ATTR_KERNOVAL) { 423 if (args->flags & ATTR_KERNOVAL) {
404 args->valuelen = sfe->valuelen; 424 args->valuelen = sfe->valuelen;
@@ -485,8 +505,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
485 nargs.valuelen = sfe->valuelen; 505 nargs.valuelen = sfe->valuelen;
486 nargs.hashval = xfs_da_hashname((char *)sfe->nameval, 506 nargs.hashval = xfs_da_hashname((char *)sfe->nameval,
487 sfe->namelen); 507 sfe->namelen);
488 nargs.flags = (sfe->flags & XFS_ATTR_SECURE) ? ATTR_SECURE : 508 nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(sfe->flags);
489 ((sfe->flags & XFS_ATTR_ROOT) ? ATTR_ROOT : 0);
490 error = xfs_attr_leaf_lookup_int(bp, &nargs); /* set a->index */ 509 error = xfs_attr_leaf_lookup_int(bp, &nargs); /* set a->index */
491 ASSERT(error == ENOATTR); 510 ASSERT(error == ENOATTR);
492 error = xfs_attr_leaf_add(bp, &nargs); 511 error = xfs_attr_leaf_add(bp, &nargs);
@@ -520,6 +539,10 @@ xfs_attr_shortform_compare(const void *a, const void *b)
520 } 539 }
521} 540}
522 541
542
543#define XFS_ISRESET_CURSOR(cursor) \
544 (!((cursor)->initted) && !((cursor)->hashval) && \
545 !((cursor)->blkno) && !((cursor)->offset))
523/* 546/*
524 * Copy out entries of shortform attribute lists for attr_list(). 547 * Copy out entries of shortform attribute lists for attr_list().
525 * Shortform attribute lists are not stored in hashval sorted order. 548 * Shortform attribute lists are not stored in hashval sorted order.
@@ -537,6 +560,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
537 xfs_attr_sf_entry_t *sfe; 560 xfs_attr_sf_entry_t *sfe;
538 xfs_inode_t *dp; 561 xfs_inode_t *dp;
539 int sbsize, nsbuf, count, i; 562 int sbsize, nsbuf, count, i;
563 int error;
540 564
541 ASSERT(context != NULL); 565 ASSERT(context != NULL);
542 dp = context->dp; 566 dp = context->dp;
@@ -552,46 +576,51 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
552 xfs_attr_trace_l_c("sf start", context); 576 xfs_attr_trace_l_c("sf start", context);
553 577
554 /* 578 /*
555 * If the buffer is large enough, do not bother with sorting. 579 * If the buffer is large enough and the cursor is at the start,
580 * do not bother with sorting since we will return everything in
581 * one buffer and another call using the cursor won't need to be
582 * made.
556 * Note the generous fudge factor of 16 overhead bytes per entry. 583 * Note the generous fudge factor of 16 overhead bytes per entry.
584 * If bufsize is zero then put_listent must be a search function
585 * and can just scan through what we have.
557 */ 586 */
558 if ((dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize) { 587 if (context->bufsize == 0 ||
588 (XFS_ISRESET_CURSOR(cursor) &&
589 (dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize)) {
559 for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) { 590 for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
560 attrnames_t *namesp; 591 attrnames_t *namesp;
561 592
562 if (((context->flags & ATTR_SECURE) != 0) != 593 if (!xfs_attr_namesp_match_overrides(context->flags, sfe->flags)) {
563 ((sfe->flags & XFS_ATTR_SECURE) != 0) &&
564 !(context->flags & ATTR_KERNORMALS)) {
565 sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
566 continue;
567 }
568 if (((context->flags & ATTR_ROOT) != 0) !=
569 ((sfe->flags & XFS_ATTR_ROOT) != 0) &&
570 !(context->flags & ATTR_KERNROOTLS)) {
571 sfe = XFS_ATTR_SF_NEXTENTRY(sfe); 594 sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
572 continue; 595 continue;
573 } 596 }
574 namesp = (sfe->flags & XFS_ATTR_SECURE) ? &attr_secure: 597 namesp = xfs_attr_flags_namesp(sfe->flags);
575 ((sfe->flags & XFS_ATTR_ROOT) ? &attr_trusted : 598 error = context->put_listent(context,
576 &attr_user); 599 namesp,
577 if (context->flags & ATTR_KERNOVAL) { 600 (char *)sfe->nameval,
578 ASSERT(context->flags & ATTR_KERNAMELS); 601 (int)sfe->namelen,
579 context->count += namesp->attr_namelen + 602 (int)sfe->valuelen,
580 sfe->namelen + 1; 603 (char*)&sfe->nameval[sfe->namelen]);
581 } 604
582 else { 605 /*
583 if (xfs_attr_put_listent(context, namesp, 606 * Either search callback finished early or
584 (char *)sfe->nameval, 607 * didn't fit it all in the buffer after all.
585 (int)sfe->namelen, 608 */
586 (int)sfe->valuelen)) 609 if (context->seen_enough)
587 break; 610 break;
588 } 611
612 if (error)
613 return error;
589 sfe = XFS_ATTR_SF_NEXTENTRY(sfe); 614 sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
590 } 615 }
591 xfs_attr_trace_l_c("sf big-gulp", context); 616 xfs_attr_trace_l_c("sf big-gulp", context);
592 return(0); 617 return(0);
593 } 618 }
594 619
620 /* do no more for a search callback */
621 if (context->bufsize == 0)
622 return 0;
623
595 /* 624 /*
596 * It didn't all fit, so we have to sort everything on hashval. 625 * It didn't all fit, so we have to sort everything on hashval.
597 */ 626 */
@@ -614,15 +643,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
614 kmem_free(sbuf, sbsize); 643 kmem_free(sbuf, sbsize);
615 return XFS_ERROR(EFSCORRUPTED); 644 return XFS_ERROR(EFSCORRUPTED);
616 } 645 }
617 if (((context->flags & ATTR_SECURE) != 0) != 646 if (!xfs_attr_namesp_match_overrides(context->flags, sfe->flags)) {
618 ((sfe->flags & XFS_ATTR_SECURE) != 0) &&
619 !(context->flags & ATTR_KERNORMALS)) {
620 sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
621 continue;
622 }
623 if (((context->flags & ATTR_ROOT) != 0) !=
624 ((sfe->flags & XFS_ATTR_ROOT) != 0) &&
625 !(context->flags & ATTR_KERNROOTLS)) {
626 sfe = XFS_ATTR_SF_NEXTENTRY(sfe); 647 sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
627 continue; 648 continue;
628 } 649 }
@@ -671,24 +692,22 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
671 for ( ; i < nsbuf; i++, sbp++) { 692 for ( ; i < nsbuf; i++, sbp++) {
672 attrnames_t *namesp; 693 attrnames_t *namesp;
673 694
674 namesp = (sbp->flags & XFS_ATTR_SECURE) ? &attr_secure : 695 namesp = xfs_attr_flags_namesp(sbp->flags);
675 ((sbp->flags & XFS_ATTR_ROOT) ? &attr_trusted :
676 &attr_user);
677 696
678 if (cursor->hashval != sbp->hash) { 697 if (cursor->hashval != sbp->hash) {
679 cursor->hashval = sbp->hash; 698 cursor->hashval = sbp->hash;
680 cursor->offset = 0; 699 cursor->offset = 0;
681 } 700 }
682 if (context->flags & ATTR_KERNOVAL) { 701 error = context->put_listent(context,
683 ASSERT(context->flags & ATTR_KERNAMELS); 702 namesp,
684 context->count += namesp->attr_namelen + 703 sbp->name,
685 sbp->namelen + 1; 704 sbp->namelen,
686 } else { 705 sbp->valuelen,
687 if (xfs_attr_put_listent(context, namesp, 706 &sbp->name[sbp->namelen]);
688 sbp->name, sbp->namelen, 707 if (error)
689 sbp->valuelen)) 708 return error;
690 break; 709 if (context->seen_enough)
691 } 710 break;
692 cursor->offset++; 711 cursor->offset++;
693 } 712 }
694 713
@@ -810,8 +829,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
810 nargs.value = (char *)&name_loc->nameval[nargs.namelen]; 829 nargs.value = (char *)&name_loc->nameval[nargs.namelen];
811 nargs.valuelen = be16_to_cpu(name_loc->valuelen); 830 nargs.valuelen = be16_to_cpu(name_loc->valuelen);
812 nargs.hashval = be32_to_cpu(entry->hashval); 831 nargs.hashval = be32_to_cpu(entry->hashval);
813 nargs.flags = (entry->flags & XFS_ATTR_SECURE) ? ATTR_SECURE : 832 nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(entry->flags);
814 ((entry->flags & XFS_ATTR_ROOT) ? ATTR_ROOT : 0);
815 xfs_attr_shortform_add(&nargs, forkoff); 833 xfs_attr_shortform_add(&nargs, forkoff);
816 } 834 }
817 error = 0; 835 error = 0;
@@ -1098,8 +1116,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
1098 be16_to_cpu(map->size)); 1116 be16_to_cpu(map->size));
1099 entry->hashval = cpu_to_be32(args->hashval); 1117 entry->hashval = cpu_to_be32(args->hashval);
1100 entry->flags = tmp ? XFS_ATTR_LOCAL : 0; 1118 entry->flags = tmp ? XFS_ATTR_LOCAL : 0;
1101 entry->flags |= (args->flags & ATTR_SECURE) ? XFS_ATTR_SECURE : 1119 entry->flags |= XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags);
1102 ((args->flags & ATTR_ROOT) ? XFS_ATTR_ROOT : 0);
1103 if (args->rename) { 1120 if (args->rename) {
1104 entry->flags |= XFS_ATTR_INCOMPLETE; 1121 entry->flags |= XFS_ATTR_INCOMPLETE;
1105 if ((args->blkno2 == args->blkno) && 1122 if ((args->blkno2 == args->blkno) &&
@@ -1926,7 +1943,7 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
1926 else 1943 else
1927 break; 1944 break;
1928 } 1945 }
1929 ASSERT((probe >= 0) && 1946 ASSERT((probe >= 0) &&
1930 (!leaf->hdr.count 1947 (!leaf->hdr.count
1931 || (probe < be16_to_cpu(leaf->hdr.count)))); 1948 || (probe < be16_to_cpu(leaf->hdr.count))));
1932 ASSERT((span <= 4) || (be32_to_cpu(entry->hashval) == hashval)); 1949 ASSERT((span <= 4) || (be32_to_cpu(entry->hashval) == hashval));
@@ -1971,14 +1988,9 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
1971 name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, probe); 1988 name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, probe);
1972 if (name_loc->namelen != args->namelen) 1989 if (name_loc->namelen != args->namelen)
1973 continue; 1990 continue;
1974 if (memcmp(args->name, (char *)name_loc->nameval, 1991 if (memcmp(args->name, (char *)name_loc->nameval, args->namelen) != 0)
1975 args->namelen) != 0)
1976 continue; 1992 continue;
1977 if (((args->flags & ATTR_SECURE) != 0) != 1993 if (!xfs_attr_namesp_match(args->flags, entry->flags))
1978 ((entry->flags & XFS_ATTR_SECURE) != 0))
1979 continue;
1980 if (((args->flags & ATTR_ROOT) != 0) !=
1981 ((entry->flags & XFS_ATTR_ROOT) != 0))
1982 continue; 1994 continue;
1983 args->index = probe; 1995 args->index = probe;
1984 return(XFS_ERROR(EEXIST)); 1996 return(XFS_ERROR(EEXIST));
@@ -1989,11 +2001,7 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
1989 if (memcmp(args->name, (char *)name_rmt->name, 2001 if (memcmp(args->name, (char *)name_rmt->name,
1990 args->namelen) != 0) 2002 args->namelen) != 0)
1991 continue; 2003 continue;
1992 if (((args->flags & ATTR_SECURE) != 0) != 2004 if (!xfs_attr_namesp_match(args->flags, entry->flags))
1993 ((entry->flags & XFS_ATTR_SECURE) != 0))
1994 continue;
1995 if (((args->flags & ATTR_ROOT) != 0) !=
1996 ((entry->flags & XFS_ATTR_ROOT) != 0))
1997 continue; 2005 continue;
1998 args->index = probe; 2006 args->index = probe;
1999 args->rmtblkno = be32_to_cpu(name_rmt->valueblk); 2007 args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
@@ -2312,8 +2320,6 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
2312 attrlist_cursor_kern_t *cursor; 2320 attrlist_cursor_kern_t *cursor;
2313 xfs_attr_leafblock_t *leaf; 2321 xfs_attr_leafblock_t *leaf;
2314 xfs_attr_leaf_entry_t *entry; 2322 xfs_attr_leaf_entry_t *entry;
2315 xfs_attr_leaf_name_local_t *name_loc;
2316 xfs_attr_leaf_name_remote_t *name_rmt;
2317 int retval, i; 2323 int retval, i;
2318 2324
2319 ASSERT(bp != NULL); 2325 ASSERT(bp != NULL);
@@ -2355,9 +2361,8 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
2355 * We have found our place, start copying out the new attributes. 2361 * We have found our place, start copying out the new attributes.
2356 */ 2362 */
2357 retval = 0; 2363 retval = 0;
2358 for ( ; (i < be16_to_cpu(leaf->hdr.count)) 2364 for ( ; (i < be16_to_cpu(leaf->hdr.count)); entry++, i++) {
2359 && (retval == 0); entry++, i++) { 2365 attrnames_t *namesp;
2360 attrnames_t *namesp;
2361 2366
2362 if (be32_to_cpu(entry->hashval) != cursor->hashval) { 2367 if (be32_to_cpu(entry->hashval) != cursor->hashval) {
2363 cursor->hashval = be32_to_cpu(entry->hashval); 2368 cursor->hashval = be32_to_cpu(entry->hashval);
@@ -2366,115 +2371,69 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
2366 2371
2367 if (entry->flags & XFS_ATTR_INCOMPLETE) 2372 if (entry->flags & XFS_ATTR_INCOMPLETE)
2368 continue; /* skip incomplete entries */ 2373 continue; /* skip incomplete entries */
2369 if (((context->flags & ATTR_SECURE) != 0) != 2374 if (!xfs_attr_namesp_match_overrides(context->flags, entry->flags))
2370 ((entry->flags & XFS_ATTR_SECURE) != 0) && 2375 continue;
2371 !(context->flags & ATTR_KERNORMALS)) 2376
2372 continue; /* skip non-matching entries */ 2377 namesp = xfs_attr_flags_namesp(entry->flags);
2373 if (((context->flags & ATTR_ROOT) != 0) !=
2374 ((entry->flags & XFS_ATTR_ROOT) != 0) &&
2375 !(context->flags & ATTR_KERNROOTLS))
2376 continue; /* skip non-matching entries */
2377
2378 namesp = (entry->flags & XFS_ATTR_SECURE) ? &attr_secure :
2379 ((entry->flags & XFS_ATTR_ROOT) ? &attr_trusted :
2380 &attr_user);
2381 2378
2382 if (entry->flags & XFS_ATTR_LOCAL) { 2379 if (entry->flags & XFS_ATTR_LOCAL) {
2383 name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, i); 2380 xfs_attr_leaf_name_local_t *name_loc =
2384 if (context->flags & ATTR_KERNOVAL) { 2381 XFS_ATTR_LEAF_NAME_LOCAL(leaf, i);
2385 ASSERT(context->flags & ATTR_KERNAMELS); 2382
2386 context->count += namesp->attr_namelen + 2383 retval = context->put_listent(context,
2387 (int)name_loc->namelen + 1; 2384 namesp,
2388 } else { 2385 (char *)name_loc->nameval,
2389 retval = xfs_attr_put_listent(context, namesp, 2386 (int)name_loc->namelen,
2390 (char *)name_loc->nameval, 2387 be16_to_cpu(name_loc->valuelen),
2391 (int)name_loc->namelen, 2388 (char *)&name_loc->nameval[name_loc->namelen]);
2392 be16_to_cpu(name_loc->valuelen)); 2389 if (retval)
2393 } 2390 return retval;
2394 } else { 2391 } else {
2395 name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, i); 2392 xfs_attr_leaf_name_remote_t *name_rmt =
2396 if (context->flags & ATTR_KERNOVAL) { 2393 XFS_ATTR_LEAF_NAME_REMOTE(leaf, i);
2397 ASSERT(context->flags & ATTR_KERNAMELS); 2394
2398 context->count += namesp->attr_namelen + 2395 int valuelen = be32_to_cpu(name_rmt->valuelen);
2399 (int)name_rmt->namelen + 1; 2396
2400 } else { 2397 if (context->put_value) {
2401 retval = xfs_attr_put_listent(context, namesp, 2398 xfs_da_args_t args;
2402 (char *)name_rmt->name, 2399
2403 (int)name_rmt->namelen, 2400 memset((char *)&args, 0, sizeof(args));
2404 be32_to_cpu(name_rmt->valuelen)); 2401 args.dp = context->dp;
2402 args.whichfork = XFS_ATTR_FORK;
2403 args.valuelen = valuelen;
2404 args.value = kmem_alloc(valuelen, KM_SLEEP);
2405 args.rmtblkno = be32_to_cpu(name_rmt->valueblk);
2406 args.rmtblkcnt = XFS_B_TO_FSB(args.dp->i_mount, valuelen);
2407 retval = xfs_attr_rmtval_get(&args);
2408 if (retval)
2409 return retval;
2410 retval = context->put_listent(context,
2411 namesp,
2412 (char *)name_rmt->name,
2413 (int)name_rmt->namelen,
2414 valuelen,
2415 (char*)args.value);
2416 kmem_free(args.value, valuelen);
2405 } 2417 }
2418 else {
2419 retval = context->put_listent(context,
2420 namesp,
2421 (char *)name_rmt->name,
2422 (int)name_rmt->namelen,
2423 valuelen,
2424 NULL);
2425 }
2426 if (retval)
2427 return retval;
2406 } 2428 }
2407 if (retval == 0) { 2429 if (context->seen_enough)
2408 cursor->offset++; 2430 break;
2409 } 2431 cursor->offset++;
2410 } 2432 }
2411 xfs_attr_trace_l_cl("blk end", context, leaf); 2433 xfs_attr_trace_l_cl("blk end", context, leaf);
2412 return(retval); 2434 return(retval);
2413} 2435}
2414 2436
2415#define ATTR_ENTBASESIZE /* minimum bytes used by an attr */ \
2416 (((struct attrlist_ent *) 0)->a_name - (char *) 0)
2417#define ATTR_ENTSIZE(namelen) /* actual bytes used by an attr */ \
2418 ((ATTR_ENTBASESIZE + (namelen) + 1 + sizeof(u_int32_t)-1) \
2419 & ~(sizeof(u_int32_t)-1))
2420
2421/*
2422 * Format an attribute and copy it out to the user's buffer.
2423 * Take care to check values and protect against them changing later,
2424 * we may be reading them directly out of a user buffer.
2425 */
2426/*ARGSUSED*/
2427STATIC int
2428xfs_attr_put_listent(xfs_attr_list_context_t *context,
2429 attrnames_t *namesp, char *name, int namelen, int valuelen)
2430{
2431 attrlist_ent_t *aep;
2432 int arraytop;
2433
2434 ASSERT(!(context->flags & ATTR_KERNOVAL));
2435 if (context->flags & ATTR_KERNAMELS) {
2436 char *offset;
2437
2438 ASSERT(context->count >= 0);
2439
2440 arraytop = context->count + namesp->attr_namelen + namelen + 1;
2441 if (arraytop > context->firstu) {
2442 context->count = -1; /* insufficient space */
2443 return(1);
2444 }
2445 offset = (char *)context->alist + context->count;
2446 strncpy(offset, namesp->attr_name, namesp->attr_namelen);
2447 offset += namesp->attr_namelen;
2448 strncpy(offset, name, namelen); /* real name */
2449 offset += namelen;
2450 *offset = '\0';
2451 context->count += namesp->attr_namelen + namelen + 1;
2452 return(0);
2453 }
2454
2455 ASSERT(context->count >= 0);
2456 ASSERT(context->count < (ATTR_MAX_VALUELEN/8));
2457 ASSERT(context->firstu >= sizeof(*context->alist));
2458 ASSERT(context->firstu <= context->bufsize);
2459
2460 arraytop = sizeof(*context->alist) +
2461 context->count * sizeof(context->alist->al_offset[0]);
2462 context->firstu -= ATTR_ENTSIZE(namelen);
2463 if (context->firstu < arraytop) {
2464 xfs_attr_trace_l_c("buffer full", context);
2465 context->alist->al_more = 1;
2466 return(1);
2467 }
2468
2469 aep = (attrlist_ent_t *)&(((char *)context->alist)[ context->firstu ]);
2470 aep->a_valuelen = valuelen;
2471 memcpy(aep->a_name, name, namelen);
2472 aep->a_name[ namelen ] = 0;
2473 context->alist->al_offset[ context->count++ ] = context->firstu;
2474 context->alist->al_count = context->count;
2475 xfs_attr_trace_l_c("add", context);
2476 return(0);
2477}
2478 2437
2479/*======================================================================== 2438/*========================================================================
2480 * Manage the INCOMPLETE flag in a leaf entry 2439 * Manage the INCOMPLETE flag in a leaf entry
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index 51c3ee156b2f..040f732ce1e2 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -130,6 +130,19 @@ typedef struct xfs_attr_leafblock {
130#define XFS_ATTR_INCOMPLETE (1 << XFS_ATTR_INCOMPLETE_BIT) 130#define XFS_ATTR_INCOMPLETE (1 << XFS_ATTR_INCOMPLETE_BIT)
131 131
132/* 132/*
133 * Conversion macros for converting namespace bits from argument flags
134 * to ondisk flags.
135 */
136#define XFS_ATTR_NSP_ARGS_MASK (ATTR_ROOT | ATTR_SECURE)
137#define XFS_ATTR_NSP_ONDISK_MASK (XFS_ATTR_ROOT | XFS_ATTR_SECURE)
138#define XFS_ATTR_NSP_ONDISK(flags) ((flags) & XFS_ATTR_NSP_ONDISK_MASK)
139#define XFS_ATTR_NSP_ARGS(flags) ((flags) & XFS_ATTR_NSP_ARGS_MASK)
140#define XFS_ATTR_NSP_ARGS_TO_ONDISK(x) (((x) & ATTR_ROOT ? XFS_ATTR_ROOT : 0) |\
141 ((x) & ATTR_SECURE ? XFS_ATTR_SECURE : 0))
142#define XFS_ATTR_NSP_ONDISK_TO_ARGS(x) (((x) & XFS_ATTR_ROOT ? ATTR_ROOT : 0) |\
143 ((x) & XFS_ATTR_SECURE ? ATTR_SECURE : 0))
144
145/*
133 * Alignment for namelist and valuelist entries (since they are mixed 146 * Alignment for namelist and valuelist entries (since they are mixed
134 * there can be only one alignment value) 147 * there can be only one alignment value)
135 */ 148 */
@@ -196,16 +209,26 @@ static inline int xfs_attr_leaf_entsize_local_max(int bsize)
196 * Structure used to pass context around among the routines. 209 * Structure used to pass context around among the routines.
197 *========================================================================*/ 210 *========================================================================*/
198 211
212
213struct xfs_attr_list_context;
214
215typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, struct attrnames *,
216 char *, int, int, char *);
217
199typedef struct xfs_attr_list_context { 218typedef struct xfs_attr_list_context {
200 struct xfs_inode *dp; /* inode */ 219 struct xfs_inode *dp; /* inode */
201 struct attrlist_cursor_kern *cursor;/* position in list */ 220 struct attrlist_cursor_kern *cursor; /* position in list */
202 struct attrlist *alist; /* output buffer */ 221 struct attrlist *alist; /* output buffer */
203 int count; /* num used entries */ 222 int seen_enough; /* T/F: seen enough of list? */
204 int dupcnt; /* count dup hashvals seen */ 223 int count; /* num used entries */
205 int bufsize;/* total buffer size */ 224 int dupcnt; /* count dup hashvals seen */
206 int firstu; /* first used byte in buffer */ 225 int bufsize; /* total buffer size */
207 int flags; /* from VOP call */ 226 int firstu; /* first used byte in buffer */
208 int resynch;/* T/F: resynch with cursor */ 227 int flags; /* from VOP call */
228 int resynch; /* T/F: resynch with cursor */
229 int put_value; /* T/F: need value for listent */
230 put_listent_func_t put_listent; /* list output fmt function */
231 int index; /* index into output buffer */
209} xfs_attr_list_context_t; 232} xfs_attr_list_context_t;
210 233
211/* 234/*
diff --git a/fs/xfs/xfs_behavior.c b/fs/xfs/xfs_behavior.c
index f4fe3715a803..0dc17219d412 100644
--- a/fs/xfs/xfs_behavior.c
+++ b/fs/xfs/xfs_behavior.c
@@ -110,26 +110,6 @@ bhv_remove_not_first(bhv_head_t *bhp, bhv_desc_t *bdp)
110} 110}
111 111
112/* 112/*
113 * Look for a specific ops vector on the specified behavior chain.
114 * Return the associated behavior descriptor. Or NULL, if not found.
115 */
116bhv_desc_t *
117bhv_lookup(bhv_head_t *bhp, void *ops)
118{
119 bhv_desc_t *curdesc;
120
121 for (curdesc = bhp->bh_first;
122 curdesc != NULL;
123 curdesc = curdesc->bd_next) {
124
125 if (curdesc->bd_ops == ops)
126 return curdesc;
127 }
128
129 return NULL;
130}
131
132/*
133 * Looks for the first behavior within a specified range of positions. 113 * Looks for the first behavior within a specified range of positions.
134 * Return the associated behavior descriptor. Or NULL, if none found. 114 * Return the associated behavior descriptor. Or NULL, if none found.
135 */ 115 */
diff --git a/fs/xfs/xfs_behavior.h b/fs/xfs/xfs_behavior.h
index 6e6e56fb352d..e7ca1fed955a 100644
--- a/fs/xfs/xfs_behavior.h
+++ b/fs/xfs/xfs_behavior.h
@@ -176,12 +176,10 @@ extern void bhv_insert_initial(bhv_head_t *, bhv_desc_t *);
176 * Behavior module prototypes. 176 * Behavior module prototypes.
177 */ 177 */
178extern void bhv_remove_not_first(bhv_head_t *bhp, bhv_desc_t *bdp); 178extern void bhv_remove_not_first(bhv_head_t *bhp, bhv_desc_t *bdp);
179extern bhv_desc_t * bhv_lookup(bhv_head_t *bhp, void *ops);
180extern bhv_desc_t * bhv_lookup_range(bhv_head_t *bhp, int low, int high); 179extern bhv_desc_t * bhv_lookup_range(bhv_head_t *bhp, int low, int high);
181extern bhv_desc_t * bhv_base(bhv_head_t *bhp); 180extern bhv_desc_t * bhv_base(bhv_head_t *bhp);
182 181
183/* No bhv locking on Linux */ 182/* No bhv locking on Linux */
184#define bhv_lookup_unlocked bhv_lookup
185#define bhv_base_unlocked bhv_base 183#define bhv_base_unlocked bhv_base
186 184
187#endif /* __XFS_BEHAVIOR_H__ */ 185#endif /* __XFS_BEHAVIOR_H__ */
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index bf46fae303af..5b050c06795f 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2999,7 +2999,7 @@ xfs_bmap_btree_to_extents(
2999 int error; /* error return value */ 2999 int error; /* error return value */
3000 xfs_ifork_t *ifp; /* inode fork data */ 3000 xfs_ifork_t *ifp; /* inode fork data */
3001 xfs_mount_t *mp; /* mount point structure */ 3001 xfs_mount_t *mp; /* mount point structure */
3002 xfs_bmbt_ptr_t *pp; /* ptr to block address */ 3002 __be64 *pp; /* ptr to block address */
3003 xfs_bmbt_block_t *rblock;/* root btree block */ 3003 xfs_bmbt_block_t *rblock;/* root btree block */
3004 3004
3005 ifp = XFS_IFORK_PTR(ip, whichfork); 3005 ifp = XFS_IFORK_PTR(ip, whichfork);
@@ -3011,12 +3011,12 @@ xfs_bmap_btree_to_extents(
3011 ASSERT(XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes) == 1); 3011 ASSERT(XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes) == 1);
3012 mp = ip->i_mount; 3012 mp = ip->i_mount;
3013 pp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, ifp->if_broot_bytes); 3013 pp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, ifp->if_broot_bytes);
3014 cbno = be64_to_cpu(*pp);
3014 *logflagsp = 0; 3015 *logflagsp = 0;
3015#ifdef DEBUG 3016#ifdef DEBUG
3016 if ((error = xfs_btree_check_lptr(cur, INT_GET(*pp, ARCH_CONVERT), 1))) 3017 if ((error = xfs_btree_check_lptr(cur, cbno, 1)))
3017 return error; 3018 return error;
3018#endif 3019#endif
3019 cbno = INT_GET(*pp, ARCH_CONVERT);
3020 if ((error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, 3020 if ((error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp,
3021 XFS_BMAP_BTREE_REF))) 3021 XFS_BMAP_BTREE_REF)))
3022 return error; 3022 return error;
@@ -3512,9 +3512,9 @@ xfs_bmap_extents_to_btree(
3512 */ 3512 */
3513 kp = XFS_BMAP_KEY_IADDR(block, 1, cur); 3513 kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
3514 arp = XFS_BMAP_REC_IADDR(ablock, 1, cur); 3514 arp = XFS_BMAP_REC_IADDR(ablock, 1, cur);
3515 INT_SET(kp->br_startoff, ARCH_CONVERT, xfs_bmbt_disk_get_startoff(arp)); 3515 kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
3516 pp = XFS_BMAP_PTR_IADDR(block, 1, cur); 3516 pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
3517 INT_SET(*pp, ARCH_CONVERT, args.fsbno); 3517 *pp = cpu_to_be64(args.fsbno);
3518 /* 3518 /*
3519 * Do all this logging at the end so that 3519 * Do all this logging at the end so that
3520 * the root is at the right level. 3520 * the root is at the right level.
@@ -3705,7 +3705,7 @@ STATIC xfs_bmbt_rec_t * /* pointer to found extent entry */
3705xfs_bmap_search_extents( 3705xfs_bmap_search_extents(
3706 xfs_inode_t *ip, /* incore inode pointer */ 3706 xfs_inode_t *ip, /* incore inode pointer */
3707 xfs_fileoff_t bno, /* block number searched for */ 3707 xfs_fileoff_t bno, /* block number searched for */
3708 int whichfork, /* data or attr fork */ 3708 int fork, /* data or attr fork */
3709 int *eofp, /* out: end of file found */ 3709 int *eofp, /* out: end of file found */
3710 xfs_extnum_t *lastxp, /* out: last extent index */ 3710 xfs_extnum_t *lastxp, /* out: last extent index */
3711 xfs_bmbt_irec_t *gotp, /* out: extent entry found */ 3711 xfs_bmbt_irec_t *gotp, /* out: extent entry found */
@@ -3713,25 +3713,28 @@ xfs_bmap_search_extents(
3713{ 3713{
3714 xfs_ifork_t *ifp; /* inode fork pointer */ 3714 xfs_ifork_t *ifp; /* inode fork pointer */
3715 xfs_bmbt_rec_t *ep; /* extent record pointer */ 3715 xfs_bmbt_rec_t *ep; /* extent record pointer */
3716 int rt; /* realtime flag */
3717 3716
3718 XFS_STATS_INC(xs_look_exlist); 3717 XFS_STATS_INC(xs_look_exlist);
3719 ifp = XFS_IFORK_PTR(ip, whichfork); 3718 ifp = XFS_IFORK_PTR(ip, fork);
3720 3719
3721 ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp); 3720 ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp);
3722 3721
3723 rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); 3722 if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) &&
3724 if (unlikely(!rt && !gotp->br_startblock && (*lastxp != NULLEXTNUM))) { 3723 !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) {
3725 cmn_err(CE_PANIC,"Access to block zero: fs: <%s> inode: %lld " 3724 xfs_cmn_err(XFS_PTAG_FSBLOCK_ZERO, CE_ALERT, ip->i_mount,
3726 "start_block : %llx start_off : %llx blkcnt : %llx " 3725 "Access to block zero in inode %llu "
3727 "extent-state : %x \n", 3726 "start_block: %llx start_off: %llx "
3728 (ip->i_mount)->m_fsname, (long long)ip->i_ino, 3727 "blkcnt: %llx extent-state: %x lastx: %x\n",
3728 (unsigned long long)ip->i_ino,
3729 (unsigned long long)gotp->br_startblock, 3729 (unsigned long long)gotp->br_startblock,
3730 (unsigned long long)gotp->br_startoff, 3730 (unsigned long long)gotp->br_startoff,
3731 (unsigned long long)gotp->br_blockcount, 3731 (unsigned long long)gotp->br_blockcount,
3732 gotp->br_state); 3732 gotp->br_state, *lastxp);
3733 } 3733 *lastxp = NULLEXTNUM;
3734 return ep; 3734 *eofp = 1;
3735 return NULL;
3736 }
3737 return ep;
3735} 3738}
3736 3739
3737 3740
@@ -4494,7 +4497,7 @@ xfs_bmap_read_extents(
4494 xfs_ifork_t *ifp; /* fork structure */ 4497 xfs_ifork_t *ifp; /* fork structure */
4495 int level; /* btree level, for checking */ 4498 int level; /* btree level, for checking */
4496 xfs_mount_t *mp; /* file system mount structure */ 4499 xfs_mount_t *mp; /* file system mount structure */
4497 xfs_bmbt_ptr_t *pp; /* pointer to block address */ 4500 __be64 *pp; /* pointer to block address */
4498 /* REFERENCED */ 4501 /* REFERENCED */
4499 xfs_extnum_t room; /* number of entries there's room for */ 4502 xfs_extnum_t room; /* number of entries there's room for */
4500 4503
@@ -4510,10 +4513,10 @@ xfs_bmap_read_extents(
4510 level = be16_to_cpu(block->bb_level); 4513 level = be16_to_cpu(block->bb_level);
4511 ASSERT(level > 0); 4514 ASSERT(level > 0);
4512 pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes); 4515 pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes);
4513 ASSERT(INT_GET(*pp, ARCH_CONVERT) != NULLDFSBNO); 4516 bno = be64_to_cpu(*pp);
4514 ASSERT(XFS_FSB_TO_AGNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agcount); 4517 ASSERT(bno != NULLDFSBNO);
4515 ASSERT(XFS_FSB_TO_AGBNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agblocks); 4518 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
4516 bno = INT_GET(*pp, ARCH_CONVERT); 4519 ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
4517 /* 4520 /*
4518 * Go down the tree until leaf level is reached, following the first 4521 * Go down the tree until leaf level is reached, following the first
4519 * pointer (leftmost) at each level. 4522 * pointer (leftmost) at each level.
@@ -4530,10 +4533,8 @@ xfs_bmap_read_extents(
4530 break; 4533 break;
4531 pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, 4534 pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block,
4532 1, mp->m_bmap_dmxr[1]); 4535 1, mp->m_bmap_dmxr[1]);
4533 XFS_WANT_CORRUPTED_GOTO( 4536 bno = be64_to_cpu(*pp);
4534 XFS_FSB_SANITY_CHECK(mp, INT_GET(*pp, ARCH_CONVERT)), 4537 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
4535 error0);
4536 bno = INT_GET(*pp, ARCH_CONVERT);
4537 xfs_trans_brelse(tp, bp); 4538 xfs_trans_brelse(tp, bp);
4538 } 4539 }
4539 /* 4540 /*
@@ -6141,7 +6142,7 @@ xfs_check_block(
6141 short sz) 6142 short sz)
6142{ 6143{
6143 int i, j, dmxr; 6144 int i, j, dmxr;
6144 xfs_bmbt_ptr_t *pp, *thispa; /* pointer to block address */ 6145 __be64 *pp, *thispa; /* pointer to block address */
6145 xfs_bmbt_key_t *prevp, *keyp; 6146 xfs_bmbt_key_t *prevp, *keyp;
6146 6147
6147 ASSERT(be16_to_cpu(block->bb_level) > 0); 6148 ASSERT(be16_to_cpu(block->bb_level) > 0);
@@ -6179,11 +6180,10 @@ xfs_check_block(
6179 thispa = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, 6180 thispa = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize,
6180 xfs_bmbt, block, j, dmxr); 6181 xfs_bmbt, block, j, dmxr);
6181 } 6182 }
6182 if (INT_GET(*thispa, ARCH_CONVERT) == 6183 if (*thispa == *pp) {
6183 INT_GET(*pp, ARCH_CONVERT)) {
6184 cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld", 6184 cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld",
6185 __FUNCTION__, j, i, 6185 __FUNCTION__, j, i,
6186 INT_GET(*thispa, ARCH_CONVERT)); 6186 (unsigned long long)be64_to_cpu(*thispa));
6187 panic("%s: ptrs are equal in node\n", 6187 panic("%s: ptrs are equal in node\n",
6188 __FUNCTION__); 6188 __FUNCTION__);
6189 } 6189 }
@@ -6210,7 +6210,7 @@ xfs_bmap_check_leaf_extents(
6210 xfs_ifork_t *ifp; /* fork structure */ 6210 xfs_ifork_t *ifp; /* fork structure */
6211 int level; /* btree level, for checking */ 6211 int level; /* btree level, for checking */
6212 xfs_mount_t *mp; /* file system mount structure */ 6212 xfs_mount_t *mp; /* file system mount structure */
6213 xfs_bmbt_ptr_t *pp; /* pointer to block address */ 6213 __be64 *pp; /* pointer to block address */
6214 xfs_bmbt_rec_t *ep; /* pointer to current extent */ 6214 xfs_bmbt_rec_t *ep; /* pointer to current extent */
6215 xfs_bmbt_rec_t *lastp; /* pointer to previous extent */ 6215 xfs_bmbt_rec_t *lastp; /* pointer to previous extent */
6216 xfs_bmbt_rec_t *nextp; /* pointer to next extent */ 6216 xfs_bmbt_rec_t *nextp; /* pointer to next extent */
@@ -6231,10 +6231,12 @@ xfs_bmap_check_leaf_extents(
6231 ASSERT(level > 0); 6231 ASSERT(level > 0);
6232 xfs_check_block(block, mp, 1, ifp->if_broot_bytes); 6232 xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
6233 pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes); 6233 pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes);
6234 ASSERT(INT_GET(*pp, ARCH_CONVERT) != NULLDFSBNO); 6234 bno = be64_to_cpu(*pp);
6235 ASSERT(XFS_FSB_TO_AGNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agcount); 6235
6236 ASSERT(XFS_FSB_TO_AGBNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agblocks); 6236 ASSERT(bno != NULLDFSBNO);
6237 bno = INT_GET(*pp, ARCH_CONVERT); 6237 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
6238 ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
6239
6238 /* 6240 /*
6239 * Go down the tree until leaf level is reached, following the first 6241 * Go down the tree until leaf level is reached, following the first
6240 * pointer (leftmost) at each level. 6242 * pointer (leftmost) at each level.
@@ -6265,8 +6267,8 @@ xfs_bmap_check_leaf_extents(
6265 xfs_check_block(block, mp, 0, 0); 6267 xfs_check_block(block, mp, 0, 0);
6266 pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, 6268 pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block,
6267 1, mp->m_bmap_dmxr[1]); 6269 1, mp->m_bmap_dmxr[1]);
6268 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, INT_GET(*pp, ARCH_CONVERT)), error0); 6270 bno = be64_to_cpu(*pp);
6269 bno = INT_GET(*pp, ARCH_CONVERT); 6271 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
6270 if (bp_release) { 6272 if (bp_release) {
6271 bp_release = 0; 6273 bp_release = 0;
6272 xfs_trans_brelse(NULL, bp); 6274 xfs_trans_brelse(NULL, bp);
@@ -6372,7 +6374,7 @@ xfs_bmap_count_blocks(
6372 xfs_ifork_t *ifp; /* fork structure */ 6374 xfs_ifork_t *ifp; /* fork structure */
6373 int level; /* btree level, for checking */ 6375 int level; /* btree level, for checking */
6374 xfs_mount_t *mp; /* file system mount structure */ 6376 xfs_mount_t *mp; /* file system mount structure */
6375 xfs_bmbt_ptr_t *pp; /* pointer to block address */ 6377 __be64 *pp; /* pointer to block address */
6376 6378
6377 bno = NULLFSBLOCK; 6379 bno = NULLFSBLOCK;
6378 mp = ip->i_mount; 6380 mp = ip->i_mount;
@@ -6395,10 +6397,10 @@ xfs_bmap_count_blocks(
6395 level = be16_to_cpu(block->bb_level); 6397 level = be16_to_cpu(block->bb_level);
6396 ASSERT(level > 0); 6398 ASSERT(level > 0);
6397 pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes); 6399 pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes);
6398 ASSERT(INT_GET(*pp, ARCH_CONVERT) != NULLDFSBNO); 6400 bno = be64_to_cpu(*pp);
6399 ASSERT(XFS_FSB_TO_AGNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agcount); 6401 ASSERT(bno != NULLDFSBNO);
6400 ASSERT(XFS_FSB_TO_AGBNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agblocks); 6402 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
6401 bno = INT_GET(*pp, ARCH_CONVERT); 6403 ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
6402 6404
6403 if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) { 6405 if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) {
6404 XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW, 6406 XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW,
@@ -6425,7 +6427,7 @@ xfs_bmap_count_tree(
6425 int error; 6427 int error;
6426 xfs_buf_t *bp, *nbp; 6428 xfs_buf_t *bp, *nbp;
6427 int level = levelin; 6429 int level = levelin;
6428 xfs_bmbt_ptr_t *pp; 6430 __be64 *pp;
6429 xfs_fsblock_t bno = blockno; 6431 xfs_fsblock_t bno = blockno;
6430 xfs_fsblock_t nextbno; 6432 xfs_fsblock_t nextbno;
6431 xfs_bmbt_block_t *block, *nextblock; 6433 xfs_bmbt_block_t *block, *nextblock;
@@ -6452,7 +6454,7 @@ xfs_bmap_count_tree(
6452 /* Dive to the next level */ 6454 /* Dive to the next level */
6453 pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, 6455 pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize,
6454 xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]); 6456 xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]);
6455 bno = INT_GET(*pp, ARCH_CONVERT); 6457 bno = be64_to_cpu(*pp);
6456 if (unlikely((error = 6458 if (unlikely((error =
6457 xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) { 6459 xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) {
6458 xfs_trans_brelse(tp, bp); 6460 xfs_trans_brelse(tp, bp);
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 18fb7385d719..a7b835bf870a 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -58,7 +58,7 @@ STATIC void xfs_bmbt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
58STATIC int xfs_bmbt_lshift(xfs_btree_cur_t *, int, int *); 58STATIC int xfs_bmbt_lshift(xfs_btree_cur_t *, int, int *);
59STATIC int xfs_bmbt_rshift(xfs_btree_cur_t *, int, int *); 59STATIC int xfs_bmbt_rshift(xfs_btree_cur_t *, int, int *);
60STATIC int xfs_bmbt_split(xfs_btree_cur_t *, int, xfs_fsblock_t *, 60STATIC int xfs_bmbt_split(xfs_btree_cur_t *, int, xfs_fsblock_t *,
61 xfs_bmbt_key_t *, xfs_btree_cur_t **, int *); 61 __uint64_t *, xfs_btree_cur_t **, int *);
62STATIC int xfs_bmbt_updkey(xfs_btree_cur_t *, xfs_bmbt_key_t *, int); 62STATIC int xfs_bmbt_updkey(xfs_btree_cur_t *, xfs_bmbt_key_t *, int);
63 63
64 64
@@ -192,16 +192,11 @@ xfs_bmbt_trace_argifk(
192 xfs_btree_cur_t *cur, 192 xfs_btree_cur_t *cur,
193 int i, 193 int i,
194 xfs_fsblock_t f, 194 xfs_fsblock_t f,
195 xfs_bmbt_key_t *k, 195 xfs_dfiloff_t o,
196 int line) 196 int line)
197{ 197{
198 xfs_dfsbno_t d;
199 xfs_dfiloff_t o;
200
201 d = (xfs_dfsbno_t)f;
202 o = INT_GET(k->br_startoff, ARCH_CONVERT);
203 xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGIFK, line, 198 xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGIFK, line,
204 i, d >> 32, (int)d, o >> 32, 199 i, (xfs_dfsbno_t)f >> 32, (int)f, o >> 32,
205 (int)o, 0, 0, 0, 200 (int)o, 0, 0, 0,
206 0, 0, 0); 201 0, 0, 0);
207} 202}
@@ -248,7 +243,7 @@ xfs_bmbt_trace_argik(
248{ 243{
249 xfs_dfiloff_t o; 244 xfs_dfiloff_t o;
250 245
251 o = INT_GET(k->br_startoff, ARCH_CONVERT); 246 o = be64_to_cpu(k->br_startoff);
252 xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGIFK, line, 247 xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGIFK, line,
253 i, o >> 32, (int)o, 0, 248 i, o >> 32, (int)o, 0,
254 0, 0, 0, 0, 249 0, 0, 0, 0,
@@ -286,8 +281,8 @@ xfs_bmbt_trace_cursor(
286 xfs_bmbt_trace_argfffi(fname, c, o, b, i, j, __LINE__) 281 xfs_bmbt_trace_argfffi(fname, c, o, b, i, j, __LINE__)
287#define XFS_BMBT_TRACE_ARGI(c,i) \ 282#define XFS_BMBT_TRACE_ARGI(c,i) \
288 xfs_bmbt_trace_argi(fname, c, i, __LINE__) 283 xfs_bmbt_trace_argi(fname, c, i, __LINE__)
289#define XFS_BMBT_TRACE_ARGIFK(c,i,f,k) \ 284#define XFS_BMBT_TRACE_ARGIFK(c,i,f,s) \
290 xfs_bmbt_trace_argifk(fname, c, i, f, k, __LINE__) 285 xfs_bmbt_trace_argifk(fname, c, i, f, s, __LINE__)
291#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) \ 286#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) \
292 xfs_bmbt_trace_argifr(fname, c, i, f, r, __LINE__) 287 xfs_bmbt_trace_argifr(fname, c, i, f, r, __LINE__)
293#define XFS_BMBT_TRACE_ARGIK(c,i,k) \ 288#define XFS_BMBT_TRACE_ARGIK(c,i,k) \
@@ -299,7 +294,7 @@ xfs_bmbt_trace_cursor(
299#define XFS_BMBT_TRACE_ARGBII(c,b,i,j) 294#define XFS_BMBT_TRACE_ARGBII(c,b,i,j)
300#define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j) 295#define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j)
301#define XFS_BMBT_TRACE_ARGI(c,i) 296#define XFS_BMBT_TRACE_ARGI(c,i)
302#define XFS_BMBT_TRACE_ARGIFK(c,i,f,k) 297#define XFS_BMBT_TRACE_ARGIFK(c,i,f,s)
303#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) 298#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r)
304#define XFS_BMBT_TRACE_ARGIK(c,i,k) 299#define XFS_BMBT_TRACE_ARGIK(c,i,k)
305#define XFS_BMBT_TRACE_CURSOR(c,s) 300#define XFS_BMBT_TRACE_CURSOR(c,s)
@@ -357,7 +352,7 @@ xfs_bmbt_delrec(
357 XFS_BMBT_TRACE_CURSOR(cur, ENTRY); 352 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
358 XFS_BMBT_TRACE_ARGI(cur, level); 353 XFS_BMBT_TRACE_ARGI(cur, level);
359 ptr = cur->bc_ptrs[level]; 354 ptr = cur->bc_ptrs[level];
360 tcur = (xfs_btree_cur_t *)0; 355 tcur = NULL;
361 if (ptr == 0) { 356 if (ptr == 0) {
362 XFS_BMBT_TRACE_CURSOR(cur, EXIT); 357 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
363 *stat = 0; 358 *stat = 0;
@@ -382,7 +377,7 @@ xfs_bmbt_delrec(
382 pp = XFS_BMAP_PTR_IADDR(block, 1, cur); 377 pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
383#ifdef DEBUG 378#ifdef DEBUG
384 for (i = ptr; i < numrecs; i++) { 379 for (i = ptr; i < numrecs; i++) {
385 if ((error = xfs_btree_check_lptr(cur, INT_GET(pp[i], ARCH_CONVERT), level))) { 380 if ((error = xfs_btree_check_lptr_disk(cur, pp[i], level))) {
386 XFS_BMBT_TRACE_CURSOR(cur, ERROR); 381 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
387 goto error0; 382 goto error0;
388 } 383 }
@@ -404,7 +399,8 @@ xfs_bmbt_delrec(
404 xfs_bmbt_log_recs(cur, bp, ptr, numrecs - 1); 399 xfs_bmbt_log_recs(cur, bp, ptr, numrecs - 1);
405 } 400 }
406 if (ptr == 1) { 401 if (ptr == 1) {
407 INT_SET(key.br_startoff, ARCH_CONVERT, xfs_bmbt_disk_get_startoff(rp)); 402 key.br_startoff =
403 cpu_to_be64(xfs_bmbt_disk_get_startoff(rp));
408 kp = &key; 404 kp = &key;
409 } 405 }
410 } 406 }
@@ -621,7 +617,7 @@ xfs_bmbt_delrec(
621 rpp = XFS_BMAP_PTR_IADDR(right, 1, cur); 617 rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
622#ifdef DEBUG 618#ifdef DEBUG
623 for (i = 0; i < numrrecs; i++) { 619 for (i = 0; i < numrrecs; i++) {
624 if ((error = xfs_btree_check_lptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level))) { 620 if ((error = xfs_btree_check_lptr_disk(cur, rpp[i], level))) {
625 XFS_BMBT_TRACE_CURSOR(cur, ERROR); 621 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
626 goto error0; 622 goto error0;
627 } 623 }
@@ -748,7 +744,7 @@ xfs_bmbt_insrec(
748 int logflags; /* inode logging flags */ 744 int logflags; /* inode logging flags */
749 xfs_fsblock_t nbno; /* new block number */ 745 xfs_fsblock_t nbno; /* new block number */
750 struct xfs_btree_cur *ncur; /* new btree cursor */ 746 struct xfs_btree_cur *ncur; /* new btree cursor */
751 xfs_bmbt_key_t nkey; /* new btree key value */ 747 __uint64_t startoff; /* new btree key value */
752 xfs_bmbt_rec_t nrec; /* new record count */ 748 xfs_bmbt_rec_t nrec; /* new record count */
753 int optr; /* old key/record index */ 749 int optr; /* old key/record index */
754 xfs_bmbt_ptr_t *pp; /* pointer to bmap block addr */ 750 xfs_bmbt_ptr_t *pp; /* pointer to bmap block addr */
@@ -759,9 +755,8 @@ xfs_bmbt_insrec(
759 ASSERT(level < cur->bc_nlevels); 755 ASSERT(level < cur->bc_nlevels);
760 XFS_BMBT_TRACE_CURSOR(cur, ENTRY); 756 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
761 XFS_BMBT_TRACE_ARGIFR(cur, level, *bnop, recp); 757 XFS_BMBT_TRACE_ARGIFR(cur, level, *bnop, recp);
762 ncur = (xfs_btree_cur_t *)0; 758 ncur = NULL;
763 INT_SET(key.br_startoff, ARCH_CONVERT, 759 key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(recp));
764 xfs_bmbt_disk_get_startoff(recp));
765 optr = ptr = cur->bc_ptrs[level]; 760 optr = ptr = cur->bc_ptrs[level];
766 if (ptr == 0) { 761 if (ptr == 0) {
767 XFS_BMBT_TRACE_CURSOR(cur, EXIT); 762 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
@@ -820,7 +815,7 @@ xfs_bmbt_insrec(
820 optr = ptr = cur->bc_ptrs[level]; 815 optr = ptr = cur->bc_ptrs[level];
821 } else { 816 } else {
822 if ((error = xfs_bmbt_split(cur, level, 817 if ((error = xfs_bmbt_split(cur, level,
823 &nbno, &nkey, &ncur, 818 &nbno, &startoff, &ncur,
824 &i))) { 819 &i))) {
825 XFS_BMBT_TRACE_CURSOR(cur, 820 XFS_BMBT_TRACE_CURSOR(cur,
826 ERROR); 821 ERROR);
@@ -840,7 +835,7 @@ xfs_bmbt_insrec(
840#endif 835#endif
841 ptr = cur->bc_ptrs[level]; 836 ptr = cur->bc_ptrs[level];
842 xfs_bmbt_disk_set_allf(&nrec, 837 xfs_bmbt_disk_set_allf(&nrec,
843 nkey.br_startoff, 0, 0, 838 startoff, 0, 0,
844 XFS_EXT_NORM); 839 XFS_EXT_NORM);
845 } else { 840 } else {
846 XFS_BMBT_TRACE_CURSOR(cur, 841 XFS_BMBT_TRACE_CURSOR(cur,
@@ -858,7 +853,7 @@ xfs_bmbt_insrec(
858 pp = XFS_BMAP_PTR_IADDR(block, 1, cur); 853 pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
859#ifdef DEBUG 854#ifdef DEBUG
860 for (i = numrecs; i >= ptr; i--) { 855 for (i = numrecs; i >= ptr; i--) {
861 if ((error = xfs_btree_check_lptr(cur, INT_GET(pp[i - 1], ARCH_CONVERT), 856 if ((error = xfs_btree_check_lptr_disk(cur, pp[i - 1],
862 level))) { 857 level))) {
863 XFS_BMBT_TRACE_CURSOR(cur, ERROR); 858 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
864 return error; 859 return error;
@@ -870,14 +865,13 @@ xfs_bmbt_insrec(
870 memmove(&pp[ptr], &pp[ptr - 1], /* INT_: direct copy */ 865 memmove(&pp[ptr], &pp[ptr - 1], /* INT_: direct copy */
871 (numrecs - ptr + 1) * sizeof(*pp)); 866 (numrecs - ptr + 1) * sizeof(*pp));
872#ifdef DEBUG 867#ifdef DEBUG
873 if ((error = xfs_btree_check_lptr(cur, (xfs_bmbt_ptr_t)*bnop, 868 if ((error = xfs_btree_check_lptr(cur, *bnop, level))) {
874 level))) {
875 XFS_BMBT_TRACE_CURSOR(cur, ERROR); 869 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
876 return error; 870 return error;
877 } 871 }
878#endif 872#endif
879 kp[ptr - 1] = key; 873 kp[ptr - 1] = key;
880 INT_SET(pp[ptr - 1], ARCH_CONVERT, *bnop); 874 pp[ptr - 1] = cpu_to_be64(*bnop);
881 numrecs++; 875 numrecs++;
882 block->bb_numrecs = cpu_to_be16(numrecs); 876 block->bb_numrecs = cpu_to_be16(numrecs);
883 xfs_bmbt_log_keys(cur, bp, ptr, numrecs); 877 xfs_bmbt_log_keys(cur, bp, ptr, numrecs);
@@ -988,7 +982,7 @@ xfs_bmbt_killroot(
988 cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur); 982 cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur);
989#ifdef DEBUG 983#ifdef DEBUG
990 for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) { 984 for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
991 if ((error = xfs_btree_check_lptr(cur, INT_GET(cpp[i], ARCH_CONVERT), level - 1))) { 985 if ((error = xfs_btree_check_lptr_disk(cur, cpp[i], level - 1))) {
992 XFS_BMBT_TRACE_CURSOR(cur, ERROR); 986 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
993 return error; 987 return error;
994 } 988 }
@@ -1132,7 +1126,7 @@ xfs_bmbt_lookup(
1132 d = XFS_FSB_TO_DADDR(mp, fsbno); 1126 d = XFS_FSB_TO_DADDR(mp, fsbno);
1133 bp = cur->bc_bufs[level]; 1127 bp = cur->bc_bufs[level];
1134 if (bp && XFS_BUF_ADDR(bp) != d) 1128 if (bp && XFS_BUF_ADDR(bp) != d)
1135 bp = (xfs_buf_t *)0; 1129 bp = NULL;
1136 if (!bp) { 1130 if (!bp) {
1137 if ((error = xfs_btree_read_bufl(mp, tp, fsbno, 1131 if ((error = xfs_btree_read_bufl(mp, tp, fsbno,
1138 0, &bp, XFS_BMAP_BTREE_REF))) { 1132 0, &bp, XFS_BMAP_BTREE_REF))) {
@@ -1170,7 +1164,7 @@ xfs_bmbt_lookup(
1170 keyno = (low + high) >> 1; 1164 keyno = (low + high) >> 1;
1171 if (level > 0) { 1165 if (level > 0) {
1172 kkp = kkbase + keyno - 1; 1166 kkp = kkbase + keyno - 1;
1173 startoff = INT_GET(kkp->br_startoff, ARCH_CONVERT); 1167 startoff = be64_to_cpu(kkp->br_startoff);
1174 } else { 1168 } else {
1175 krp = krbase + keyno - 1; 1169 krp = krbase + keyno - 1;
1176 startoff = xfs_bmbt_disk_get_startoff(krp); 1170 startoff = xfs_bmbt_disk_get_startoff(krp);
@@ -1189,13 +1183,13 @@ xfs_bmbt_lookup(
1189 if (diff > 0 && --keyno < 1) 1183 if (diff > 0 && --keyno < 1)
1190 keyno = 1; 1184 keyno = 1;
1191 pp = XFS_BMAP_PTR_IADDR(block, keyno, cur); 1185 pp = XFS_BMAP_PTR_IADDR(block, keyno, cur);
1186 fsbno = be64_to_cpu(*pp);
1192#ifdef DEBUG 1187#ifdef DEBUG
1193 if ((error = xfs_btree_check_lptr(cur, INT_GET(*pp, ARCH_CONVERT), level))) { 1188 if ((error = xfs_btree_check_lptr(cur, fsbno, level))) {
1194 XFS_BMBT_TRACE_CURSOR(cur, ERROR); 1189 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1195 return error; 1190 return error;
1196 } 1191 }
1197#endif 1192#endif
1198 fsbno = INT_GET(*pp, ARCH_CONVERT);
1199 cur->bc_ptrs[level] = keyno; 1193 cur->bc_ptrs[level] = keyno;
1200 } 1194 }
1201 } 1195 }
@@ -1313,7 +1307,7 @@ xfs_bmbt_lshift(
1313 lpp = XFS_BMAP_PTR_IADDR(left, lrecs, cur); 1307 lpp = XFS_BMAP_PTR_IADDR(left, lrecs, cur);
1314 rpp = XFS_BMAP_PTR_IADDR(right, 1, cur); 1308 rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
1315#ifdef DEBUG 1309#ifdef DEBUG
1316 if ((error = xfs_btree_check_lptr(cur, INT_GET(*rpp, ARCH_CONVERT), level))) { 1310 if ((error = xfs_btree_check_lptr_disk(cur, *rpp, level))) {
1317 XFS_BMBT_TRACE_CURSOR(cur, ERROR); 1311 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1318 return error; 1312 return error;
1319 } 1313 }
@@ -1340,7 +1334,7 @@ xfs_bmbt_lshift(
1340 if (level > 0) { 1334 if (level > 0) {
1341#ifdef DEBUG 1335#ifdef DEBUG
1342 for (i = 0; i < rrecs; i++) { 1336 for (i = 0; i < rrecs; i++) {
1343 if ((error = xfs_btree_check_lptr(cur, INT_GET(rpp[i + 1], ARCH_CONVERT), 1337 if ((error = xfs_btree_check_lptr_disk(cur, rpp[i + 1],
1344 level))) { 1338 level))) {
1345 XFS_BMBT_TRACE_CURSOR(cur, ERROR); 1339 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1346 return error; 1340 return error;
@@ -1354,8 +1348,7 @@ xfs_bmbt_lshift(
1354 } else { 1348 } else {
1355 memmove(rrp, rrp + 1, rrecs * sizeof(*rrp)); 1349 memmove(rrp, rrp + 1, rrecs * sizeof(*rrp));
1356 xfs_bmbt_log_recs(cur, rbp, 1, rrecs); 1350 xfs_bmbt_log_recs(cur, rbp, 1, rrecs);
1357 INT_SET(key.br_startoff, ARCH_CONVERT, 1351 key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(rrp));
1358 xfs_bmbt_disk_get_startoff(rrp));
1359 rkp = &key; 1352 rkp = &key;
1360 } 1353 }
1361 if ((error = xfs_bmbt_updkey(cur, rkp, level + 1))) { 1354 if ((error = xfs_bmbt_updkey(cur, rkp, level + 1))) {
@@ -1445,7 +1438,7 @@ xfs_bmbt_rshift(
1445 rpp = XFS_BMAP_PTR_IADDR(right, 1, cur); 1438 rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
1446#ifdef DEBUG 1439#ifdef DEBUG
1447 for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) { 1440 for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) {
1448 if ((error = xfs_btree_check_lptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level))) { 1441 if ((error = xfs_btree_check_lptr_disk(cur, rpp[i], level))) {
1449 XFS_BMBT_TRACE_CURSOR(cur, ERROR); 1442 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1450 return error; 1443 return error;
1451 } 1444 }
@@ -1454,7 +1447,7 @@ xfs_bmbt_rshift(
1454 memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp)); 1447 memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1455 memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp)); 1448 memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1456#ifdef DEBUG 1449#ifdef DEBUG
1457 if ((error = xfs_btree_check_lptr(cur, INT_GET(*lpp, ARCH_CONVERT), level))) { 1450 if ((error = xfs_btree_check_lptr_disk(cur, *lpp, level))) {
1458 XFS_BMBT_TRACE_CURSOR(cur, ERROR); 1451 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1459 return error; 1452 return error;
1460 } 1453 }
@@ -1469,8 +1462,7 @@ xfs_bmbt_rshift(
1469 memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp)); 1462 memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1470 *rrp = *lrp; 1463 *rrp = *lrp;
1471 xfs_bmbt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1); 1464 xfs_bmbt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1472 INT_SET(key.br_startoff, ARCH_CONVERT, 1465 key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(rrp));
1473 xfs_bmbt_disk_get_startoff(rrp));
1474 rkp = &key; 1466 rkp = &key;
1475 } 1467 }
1476 be16_add(&left->bb_numrecs, -1); 1468 be16_add(&left->bb_numrecs, -1);
@@ -1535,7 +1527,7 @@ xfs_bmbt_split(
1535 xfs_btree_cur_t *cur, 1527 xfs_btree_cur_t *cur,
1536 int level, 1528 int level,
1537 xfs_fsblock_t *bnop, 1529 xfs_fsblock_t *bnop,
1538 xfs_bmbt_key_t *keyp, 1530 __uint64_t *startoff,
1539 xfs_btree_cur_t **curp, 1531 xfs_btree_cur_t **curp,
1540 int *stat) /* success/failure */ 1532 int *stat) /* success/failure */
1541{ 1533{
@@ -1560,7 +1552,7 @@ xfs_bmbt_split(
1560 xfs_bmbt_rec_t *rrp; /* right record pointer */ 1552 xfs_bmbt_rec_t *rrp; /* right record pointer */
1561 1553
1562 XFS_BMBT_TRACE_CURSOR(cur, ENTRY); 1554 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
1563 XFS_BMBT_TRACE_ARGIFK(cur, level, *bnop, keyp); 1555 XFS_BMBT_TRACE_ARGIFK(cur, level, *bnop, *startoff);
1564 args.tp = cur->bc_tp; 1556 args.tp = cur->bc_tp;
1565 args.mp = cur->bc_mp; 1557 args.mp = cur->bc_mp;
1566 lbp = cur->bc_bufs[level]; 1558 lbp = cur->bc_bufs[level];
@@ -1619,7 +1611,7 @@ xfs_bmbt_split(
1619 rpp = XFS_BMAP_PTR_IADDR(right, 1, cur); 1611 rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
1620#ifdef DEBUG 1612#ifdef DEBUG
1621 for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) { 1613 for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
1622 if ((error = xfs_btree_check_lptr(cur, INT_GET(lpp[i], ARCH_CONVERT), level))) { 1614 if ((error = xfs_btree_check_lptr_disk(cur, lpp[i], level))) {
1623 XFS_BMBT_TRACE_CURSOR(cur, ERROR); 1615 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1624 return error; 1616 return error;
1625 } 1617 }
@@ -1629,13 +1621,13 @@ xfs_bmbt_split(
1629 memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp)); 1621 memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1630 xfs_bmbt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); 1622 xfs_bmbt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1631 xfs_bmbt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); 1623 xfs_bmbt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1632 keyp->br_startoff = INT_GET(rkp->br_startoff, ARCH_CONVERT); 1624 *startoff = be64_to_cpu(rkp->br_startoff);
1633 } else { 1625 } else {
1634 lrp = XFS_BMAP_REC_IADDR(left, i, cur); 1626 lrp = XFS_BMAP_REC_IADDR(left, i, cur);
1635 rrp = XFS_BMAP_REC_IADDR(right, 1, cur); 1627 rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
1636 memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp)); 1628 memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1637 xfs_bmbt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); 1629 xfs_bmbt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1638 keyp->br_startoff = xfs_bmbt_disk_get_startoff(rrp); 1630 *startoff = xfs_bmbt_disk_get_startoff(rrp);
1639 } 1631 }
1640 be16_add(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs))); 1632 be16_add(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
1641 right->bb_rightsib = left->bb_rightsib; 1633 right->bb_rightsib = left->bb_rightsib;
@@ -1728,9 +1720,9 @@ xfs_bmdr_to_bmbt(
1728{ 1720{
1729 int dmxr; 1721 int dmxr;
1730 xfs_bmbt_key_t *fkp; 1722 xfs_bmbt_key_t *fkp;
1731 xfs_bmbt_ptr_t *fpp; 1723 __be64 *fpp;
1732 xfs_bmbt_key_t *tkp; 1724 xfs_bmbt_key_t *tkp;
1733 xfs_bmbt_ptr_t *tpp; 1725 __be64 *tpp;
1734 1726
1735 rblock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC); 1727 rblock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
1736 rblock->bb_level = dblock->bb_level; 1728 rblock->bb_level = dblock->bb_level;
@@ -1745,7 +1737,7 @@ xfs_bmdr_to_bmbt(
1745 tpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen); 1737 tpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen);
1746 dmxr = be16_to_cpu(dblock->bb_numrecs); 1738 dmxr = be16_to_cpu(dblock->bb_numrecs);
1747 memcpy(tkp, fkp, sizeof(*fkp) * dmxr); 1739 memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
1748 memcpy(tpp, fpp, sizeof(*fpp) * dmxr); /* INT_: direct copy */ 1740 memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
1749} 1741}
1750 1742
1751/* 1743/*
@@ -1805,7 +1797,7 @@ xfs_bmbt_decrement(
1805 tp = cur->bc_tp; 1797 tp = cur->bc_tp;
1806 mp = cur->bc_mp; 1798 mp = cur->bc_mp;
1807 for (block = xfs_bmbt_get_block(cur, lev, &bp); lev > level; ) { 1799 for (block = xfs_bmbt_get_block(cur, lev, &bp); lev > level; ) {
1808 fsbno = INT_GET(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT); 1800 fsbno = be64_to_cpu(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur));
1809 if ((error = xfs_btree_read_bufl(mp, tp, fsbno, 0, &bp, 1801 if ((error = xfs_btree_read_bufl(mp, tp, fsbno, 0, &bp,
1810 XFS_BMAP_BTREE_REF))) { 1802 XFS_BMAP_BTREE_REF))) {
1811 XFS_BMBT_TRACE_CURSOR(cur, ERROR); 1803 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
@@ -2135,7 +2127,7 @@ xfs_bmbt_increment(
2135 tp = cur->bc_tp; 2127 tp = cur->bc_tp;
2136 mp = cur->bc_mp; 2128 mp = cur->bc_mp;
2137 for (block = xfs_bmbt_get_block(cur, lev, &bp); lev > level; ) { 2129 for (block = xfs_bmbt_get_block(cur, lev, &bp); lev > level; ) {
2138 fsbno = INT_GET(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT); 2130 fsbno = be64_to_cpu(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur));
2139 if ((error = xfs_btree_read_bufl(mp, tp, fsbno, 0, &bp, 2131 if ((error = xfs_btree_read_bufl(mp, tp, fsbno, 0, &bp,
2140 XFS_BMAP_BTREE_REF))) { 2132 XFS_BMAP_BTREE_REF))) {
2141 XFS_BMBT_TRACE_CURSOR(cur, ERROR); 2133 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
@@ -2178,7 +2170,7 @@ xfs_bmbt_insert(
2178 level = 0; 2170 level = 0;
2179 nbno = NULLFSBLOCK; 2171 nbno = NULLFSBLOCK;
2180 xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b); 2172 xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b);
2181 ncur = (xfs_btree_cur_t *)0; 2173 ncur = NULL;
2182 pcur = cur; 2174 pcur = cur;
2183 do { 2175 do {
2184 if ((error = xfs_bmbt_insrec(pcur, level++, &nbno, &nrec, &ncur, 2176 if ((error = xfs_bmbt_insrec(pcur, level++, &nbno, &nrec, &ncur,
@@ -2205,7 +2197,7 @@ xfs_bmbt_insert(
2205 } 2197 }
2206 if (ncur) { 2198 if (ncur) {
2207 pcur = ncur; 2199 pcur = ncur;
2208 ncur = (xfs_btree_cur_t *)0; 2200 ncur = NULL;
2209 } 2201 }
2210 } while (nbno != NULLFSBLOCK); 2202 } while (nbno != NULLFSBLOCK);
2211 XFS_BMBT_TRACE_CURSOR(cur, EXIT); 2203 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
@@ -2356,12 +2348,12 @@ xfs_bmbt_newroot(
2356 args.firstblock = args.fsbno; 2348 args.firstblock = args.fsbno;
2357 if (args.fsbno == NULLFSBLOCK) { 2349 if (args.fsbno == NULLFSBLOCK) {
2358#ifdef DEBUG 2350#ifdef DEBUG
2359 if ((error = xfs_btree_check_lptr(cur, INT_GET(*pp, ARCH_CONVERT), level))) { 2351 if ((error = xfs_btree_check_lptr_disk(cur, *pp, level))) {
2360 XFS_BMBT_TRACE_CURSOR(cur, ERROR); 2352 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2361 return error; 2353 return error;
2362 } 2354 }
2363#endif 2355#endif
2364 args.fsbno = INT_GET(*pp, ARCH_CONVERT); 2356 args.fsbno = be64_to_cpu(*pp);
2365 args.type = XFS_ALLOCTYPE_START_BNO; 2357 args.type = XFS_ALLOCTYPE_START_BNO;
2366 } else 2358 } else
2367 args.type = XFS_ALLOCTYPE_NEAR_BNO; 2359 args.type = XFS_ALLOCTYPE_NEAR_BNO;
@@ -2393,7 +2385,7 @@ xfs_bmbt_newroot(
2393 cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur); 2385 cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur);
2394#ifdef DEBUG 2386#ifdef DEBUG
2395 for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) { 2387 for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
2396 if ((error = xfs_btree_check_lptr(cur, INT_GET(pp[i], ARCH_CONVERT), level))) { 2388 if ((error = xfs_btree_check_lptr_disk(cur, pp[i], level))) {
2397 XFS_BMBT_TRACE_CURSOR(cur, ERROR); 2389 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2398 return error; 2390 return error;
2399 } 2391 }
@@ -2401,13 +2393,12 @@ xfs_bmbt_newroot(
2401#endif 2393#endif
2402 memcpy(cpp, pp, be16_to_cpu(cblock->bb_numrecs) * sizeof(*pp)); 2394 memcpy(cpp, pp, be16_to_cpu(cblock->bb_numrecs) * sizeof(*pp));
2403#ifdef DEBUG 2395#ifdef DEBUG
2404 if ((error = xfs_btree_check_lptr(cur, (xfs_bmbt_ptr_t)args.fsbno, 2396 if ((error = xfs_btree_check_lptr(cur, args.fsbno, level))) {
2405 level))) {
2406 XFS_BMBT_TRACE_CURSOR(cur, ERROR); 2397 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2407 return error; 2398 return error;
2408 } 2399 }
2409#endif 2400#endif
2410 INT_SET(*pp, ARCH_CONVERT, args.fsbno); 2401 *pp = cpu_to_be64(args.fsbno);
2411 xfs_iroot_realloc(cur->bc_private.b.ip, 1 - be16_to_cpu(cblock->bb_numrecs), 2402 xfs_iroot_realloc(cur->bc_private.b.ip, 1 - be16_to_cpu(cblock->bb_numrecs),
2412 cur->bc_private.b.whichfork); 2403 cur->bc_private.b.whichfork);
2413 xfs_btree_setbuf(cur, level, bp); 2404 xfs_btree_setbuf(cur, level, bp);
@@ -2681,9 +2672,9 @@ xfs_bmbt_to_bmdr(
2681{ 2672{
2682 int dmxr; 2673 int dmxr;
2683 xfs_bmbt_key_t *fkp; 2674 xfs_bmbt_key_t *fkp;
2684 xfs_bmbt_ptr_t *fpp; 2675 __be64 *fpp;
2685 xfs_bmbt_key_t *tkp; 2676 xfs_bmbt_key_t *tkp;
2686 xfs_bmbt_ptr_t *tpp; 2677 __be64 *tpp;
2687 2678
2688 ASSERT(be32_to_cpu(rblock->bb_magic) == XFS_BMAP_MAGIC); 2679 ASSERT(be32_to_cpu(rblock->bb_magic) == XFS_BMAP_MAGIC);
2689 ASSERT(be64_to_cpu(rblock->bb_leftsib) == NULLDFSBNO); 2680 ASSERT(be64_to_cpu(rblock->bb_leftsib) == NULLDFSBNO);
@@ -2698,7 +2689,7 @@ xfs_bmbt_to_bmdr(
2698 tpp = XFS_BTREE_PTR_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr); 2689 tpp = XFS_BTREE_PTR_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr);
2699 dmxr = be16_to_cpu(dblock->bb_numrecs); 2690 dmxr = be16_to_cpu(dblock->bb_numrecs);
2700 memcpy(tkp, fkp, sizeof(*fkp) * dmxr); 2691 memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
2701 memcpy(tpp, fpp, sizeof(*fpp) * dmxr); /* INT_: direct copy */ 2692 memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
2702} 2693}
2703 2694
2704/* 2695/*
@@ -2740,7 +2731,7 @@ xfs_bmbt_update(
2740 XFS_BMBT_TRACE_CURSOR(cur, EXIT); 2731 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2741 return 0; 2732 return 0;
2742 } 2733 }
2743 INT_SET(key.br_startoff, ARCH_CONVERT, off); 2734 key.br_startoff = cpu_to_be64(off);
2744 if ((error = xfs_bmbt_updkey(cur, &key, 1))) { 2735 if ((error = xfs_bmbt_updkey(cur, &key, 1))) {
2745 XFS_BMBT_TRACE_CURSOR(cur, ERROR); 2736 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2746 return error; 2737 return error;
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index 6478cfa0e539..49539de9525b 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -163,13 +163,14 @@ typedef struct xfs_bmbt_irec
163/* 163/*
164 * Key structure for non-leaf levels of the tree. 164 * Key structure for non-leaf levels of the tree.
165 */ 165 */
166typedef struct xfs_bmbt_key 166typedef struct xfs_bmbt_key {
167{ 167 __be64 br_startoff; /* starting file offset */
168 xfs_dfiloff_t br_startoff; /* starting file offset */
169} xfs_bmbt_key_t, xfs_bmdr_key_t; 168} xfs_bmbt_key_t, xfs_bmdr_key_t;
170 169
171typedef xfs_dfsbno_t xfs_bmbt_ptr_t, xfs_bmdr_ptr_t; /* btree pointer type */ 170/* btree pointer type */
172 /* btree block header type */ 171typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;
172
173/* btree block header type */
173typedef struct xfs_btree_lblock xfs_bmbt_block_t; 174typedef struct xfs_btree_lblock xfs_bmbt_block_t;
174 175
175#define XFS_BUF_TO_BMBT_BLOCK(bp) ((xfs_bmbt_block_t *)XFS_BUF_PTR(bp)) 176#define XFS_BUF_TO_BMBT_BLOCK(bp) ((xfs_bmbt_block_t *)XFS_BUF_PTR(bp))
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index ee2255bd6562..aeb87ca69fcc 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -161,7 +161,7 @@ xfs_btree_check_key(
161 161
162 k1 = ak1; 162 k1 = ak1;
163 k2 = ak2; 163 k2 = ak2;
164 ASSERT(INT_GET(k1->br_startoff, ARCH_CONVERT) < INT_GET(k2->br_startoff, ARCH_CONVERT)); 164 ASSERT(be64_to_cpu(k1->br_startoff) < be64_to_cpu(k2->br_startoff));
165 break; 165 break;
166 } 166 }
167 case XFS_BTNUM_INO: { 167 case XFS_BTNUM_INO: {
@@ -170,7 +170,7 @@ xfs_btree_check_key(
170 170
171 k1 = ak1; 171 k1 = ak1;
172 k2 = ak2; 172 k2 = ak2;
173 ASSERT(INT_GET(k1->ir_startino, ARCH_CONVERT) < INT_GET(k2->ir_startino, ARCH_CONVERT)); 173 ASSERT(be32_to_cpu(k1->ir_startino) < be32_to_cpu(k2->ir_startino));
174 break; 174 break;
175 } 175 }
176 default: 176 default:
@@ -285,8 +285,8 @@ xfs_btree_check_rec(
285 285
286 r1 = ar1; 286 r1 = ar1;
287 r2 = ar2; 287 r2 = ar2;
288 ASSERT(INT_GET(r1->ir_startino, ARCH_CONVERT) + XFS_INODES_PER_CHUNK <= 288 ASSERT(be32_to_cpu(r1->ir_startino) + XFS_INODES_PER_CHUNK <=
289 INT_GET(r2->ir_startino, ARCH_CONVERT)); 289 be32_to_cpu(r2->ir_startino));
290 break; 290 break;
291 } 291 }
292 default: 292 default:
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 44f1bd98064a..892b06c54263 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -145,7 +145,7 @@ typedef struct xfs_btree_cur
145 union { 145 union {
146 xfs_alloc_rec_incore_t a; 146 xfs_alloc_rec_incore_t a;
147 xfs_bmbt_irec_t b; 147 xfs_bmbt_irec_t b;
148 xfs_inobt_rec_t i; 148 xfs_inobt_rec_incore_t i;
149 } bc_rec; /* current insert/search record value */ 149 } bc_rec; /* current insert/search record value */
150 struct xfs_buf *bc_bufs[XFS_BTREE_MAXLEVELS]; /* buf ptr per level */ 150 struct xfs_buf *bc_bufs[XFS_BTREE_MAXLEVELS]; /* buf ptr per level */
151 int bc_ptrs[XFS_BTREE_MAXLEVELS]; /* key/record # */ 151 int bc_ptrs[XFS_BTREE_MAXLEVELS]; /* key/record # */
@@ -243,6 +243,9 @@ xfs_btree_check_lptr(
243 xfs_dfsbno_t ptr, /* btree block disk address */ 243 xfs_dfsbno_t ptr, /* btree block disk address */
244 int level); /* btree block level */ 244 int level); /* btree block level */
245 245
246#define xfs_btree_check_lptr_disk(cur, ptr, level) \
247 xfs_btree_check_lptr(cur, be64_to_cpu(ptr), level)
248
246/* 249/*
247 * Checking routine: check that short form block header is ok. 250 * Checking routine: check that short form block header is ok.
248 */ 251 */
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index a4aa53974f76..7a55c248ea70 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -234,7 +234,6 @@ xfs_buf_item_format(
234 ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || 234 ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
235 (bip->bli_flags & XFS_BLI_STALE)); 235 (bip->bli_flags & XFS_BLI_STALE));
236 bp = bip->bli_buf; 236 bp = bip->bli_buf;
237 ASSERT(XFS_BUF_BP_ISMAPPED(bp));
238 vecp = log_vector; 237 vecp = log_vector;
239 238
240 /* 239 /*
@@ -628,25 +627,6 @@ xfs_buf_item_committed(
628} 627}
629 628
630/* 629/*
631 * This is called when the transaction holding the buffer is aborted.
632 * Just behave as if the transaction had been cancelled. If we're shutting down
633 * and have aborted this transaction, we'll trap this buffer when it tries to
634 * get written out.
635 */
636STATIC void
637xfs_buf_item_abort(
638 xfs_buf_log_item_t *bip)
639{
640 xfs_buf_t *bp;
641
642 bp = bip->bli_buf;
643 xfs_buftrace("XFS_ABORT", bp);
644 XFS_BUF_SUPER_STALE(bp);
645 xfs_buf_item_unlock(bip);
646 return;
647}
648
649/*
650 * This is called to asynchronously write the buffer associated with this 630 * This is called to asynchronously write the buffer associated with this
651 * buf log item out to disk. The buffer will already have been locked by 631 * buf log item out to disk. The buffer will already have been locked by
652 * a successful call to xfs_buf_item_trylock(). If the buffer still has 632 * a successful call to xfs_buf_item_trylock(). If the buffer still has
@@ -693,7 +673,6 @@ STATIC struct xfs_item_ops xfs_buf_item_ops = {
693 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) 673 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
694 xfs_buf_item_committed, 674 xfs_buf_item_committed,
695 .iop_push = (void(*)(xfs_log_item_t*))xfs_buf_item_push, 675 .iop_push = (void(*)(xfs_log_item_t*))xfs_buf_item_push,
696 .iop_abort = (void(*)(xfs_log_item_t*))xfs_buf_item_abort,
697 .iop_pushbuf = NULL, 676 .iop_pushbuf = NULL,
698 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) 677 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
699 xfs_buf_item_committing 678 xfs_buf_item_committing
@@ -901,7 +880,6 @@ xfs_buf_item_relse(
901 XFS_BUF_SET_FSPRIVATE(bp, bip->bli_item.li_bio_list); 880 XFS_BUF_SET_FSPRIVATE(bp, bip->bli_item.li_bio_list);
902 if ((XFS_BUF_FSPRIVATE(bp, void *) == NULL) && 881 if ((XFS_BUF_FSPRIVATE(bp, void *) == NULL) &&
903 (XFS_BUF_IODONE_FUNC(bp) != NULL)) { 882 (XFS_BUF_IODONE_FUNC(bp) != NULL)) {
904 ASSERT((XFS_BUF_ISUNINITIAL(bp)) == 0);
905 XFS_BUF_CLR_IODONE_FUNC(bp); 883 XFS_BUF_CLR_IODONE_FUNC(bp);
906 } 884 }
907 885
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 32ab61d17ace..a68bc1f1a313 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1054,7 +1054,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
1054 xfs_da_node_entry_t *btree; 1054 xfs_da_node_entry_t *btree;
1055 xfs_dablk_t blkno; 1055 xfs_dablk_t blkno;
1056 int probe, span, max, error, retval; 1056 int probe, span, max, error, retval;
1057 xfs_dahash_t hashval; 1057 xfs_dahash_t hashval, btreehashval;
1058 xfs_da_args_t *args; 1058 xfs_da_args_t *args;
1059 1059
1060 args = state->args; 1060 args = state->args;
@@ -1079,30 +1079,32 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
1079 return(error); 1079 return(error);
1080 } 1080 }
1081 curr = blk->bp->data; 1081 curr = blk->bp->data;
1082 ASSERT(be16_to_cpu(curr->magic) == XFS_DA_NODE_MAGIC || 1082 blk->magic = be16_to_cpu(curr->magic);
1083 be16_to_cpu(curr->magic) == XFS_DIR2_LEAFN_MAGIC || 1083 ASSERT(blk->magic == XFS_DA_NODE_MAGIC ||
1084 be16_to_cpu(curr->magic) == XFS_ATTR_LEAF_MAGIC); 1084 blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1085 blk->magic == XFS_ATTR_LEAF_MAGIC);
1085 1086
1086 /* 1087 /*
1087 * Search an intermediate node for a match. 1088 * Search an intermediate node for a match.
1088 */ 1089 */
1089 blk->magic = be16_to_cpu(curr->magic);
1090 if (blk->magic == XFS_DA_NODE_MAGIC) { 1090 if (blk->magic == XFS_DA_NODE_MAGIC) {
1091 node = blk->bp->data; 1091 node = blk->bp->data;
1092 blk->hashval = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval); 1092 max = be16_to_cpu(node->hdr.count);
1093 btreehashval = node->btree[max-1].hashval;
1094 blk->hashval = be32_to_cpu(btreehashval);
1093 1095
1094 /* 1096 /*
1095 * Binary search. (note: small blocks will skip loop) 1097 * Binary search. (note: small blocks will skip loop)
1096 */ 1098 */
1097 max = be16_to_cpu(node->hdr.count);
1098 probe = span = max / 2; 1099 probe = span = max / 2;
1099 hashval = args->hashval; 1100 hashval = args->hashval;
1100 for (btree = &node->btree[probe]; span > 4; 1101 for (btree = &node->btree[probe]; span > 4;
1101 btree = &node->btree[probe]) { 1102 btree = &node->btree[probe]) {
1102 span /= 2; 1103 span /= 2;
1103 if (be32_to_cpu(btree->hashval) < hashval) 1104 btreehashval = be32_to_cpu(btree->hashval);
1105 if (btreehashval < hashval)
1104 probe += span; 1106 probe += span;
1105 else if (be32_to_cpu(btree->hashval) > hashval) 1107 else if (btreehashval > hashval)
1106 probe -= span; 1108 probe -= span;
1107 else 1109 else
1108 break; 1110 break;
@@ -1133,10 +1135,10 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
1133 blk->index = probe; 1135 blk->index = probe;
1134 blkno = be32_to_cpu(btree->before); 1136 blkno = be32_to_cpu(btree->before);
1135 } 1137 }
1136 } else if (be16_to_cpu(curr->magic) == XFS_ATTR_LEAF_MAGIC) { 1138 } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1137 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL); 1139 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
1138 break; 1140 break;
1139 } else if (be16_to_cpu(curr->magic) == XFS_DIR2_LEAFN_MAGIC) { 1141 } else if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
1140 blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL); 1142 blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL);
1141 break; 1143 break;
1142 } 1144 }
@@ -1152,11 +1154,13 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
1152 if (blk->magic == XFS_DIR2_LEAFN_MAGIC) { 1154 if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
1153 retval = xfs_dir2_leafn_lookup_int(blk->bp, args, 1155 retval = xfs_dir2_leafn_lookup_int(blk->bp, args,
1154 &blk->index, state); 1156 &blk->index, state);
1155 } 1157 } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1156 else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1157 retval = xfs_attr_leaf_lookup_int(blk->bp, args); 1158 retval = xfs_attr_leaf_lookup_int(blk->bp, args);
1158 blk->index = args->index; 1159 blk->index = args->index;
1159 args->blkno = blk->blkno; 1160 args->blkno = blk->blkno;
1161 } else {
1162 ASSERT(0);
1163 return XFS_ERROR(EFSCORRUPTED);
1160 } 1164 }
1161 if (((retval == ENOENT) || (retval == ENOATTR)) && 1165 if (((retval == ENOENT) || (retval == ENOATTR)) &&
1162 (blk->hashval == args->hashval)) { 1166 (blk->hashval == args->hashval)) {
@@ -1166,8 +1170,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
1166 return(error); 1170 return(error);
1167 if (retval == 0) { 1171 if (retval == 0) {
1168 continue; 1172 continue;
1169 } 1173 } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1170 else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1171 /* path_shift() gives ENOENT */ 1174 /* path_shift() gives ENOENT */
1172 retval = XFS_ERROR(ENOATTR); 1175 retval = XFS_ERROR(ENOATTR);
1173 } 1176 }
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index bc43163456ef..0893e16b7d83 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -18,14 +18,6 @@
18#ifndef __XFS_ERROR_H__ 18#ifndef __XFS_ERROR_H__
19#define __XFS_ERROR_H__ 19#define __XFS_ERROR_H__
20 20
21#define XFS_ERECOVER 1 /* Failure to recover log */
22#define XFS_ELOGSTAT 2 /* Failure to stat log in user space */
23#define XFS_ENOLOGSPACE 3 /* Reservation too large */
24#define XFS_ENOTSUP 4 /* Operation not supported */
25#define XFS_ENOLSN 5 /* Can't find the lsn you asked for */
26#define XFS_ENOTFOUND 6
27#define XFS_ENOTXFS 7 /* Not XFS filesystem */
28
29#ifdef DEBUG 21#ifdef DEBUG
30#define XFS_ERROR_NTRAP 10 22#define XFS_ERROR_NTRAP 10
31extern int xfs_etrap[XFS_ERROR_NTRAP]; 23extern int xfs_etrap[XFS_ERROR_NTRAP];
@@ -175,6 +167,7 @@ extern int xfs_errortag_clearall_umount(int64_t fsid, char *fsname, int loud);
175#define XFS_PTAG_SHUTDOWN_CORRUPT 0x00000010 167#define XFS_PTAG_SHUTDOWN_CORRUPT 0x00000010
176#define XFS_PTAG_SHUTDOWN_IOERROR 0x00000020 168#define XFS_PTAG_SHUTDOWN_IOERROR 0x00000020
177#define XFS_PTAG_SHUTDOWN_LOGERROR 0x00000040 169#define XFS_PTAG_SHUTDOWN_LOGERROR 0x00000040
170#define XFS_PTAG_FSBLOCK_ZERO 0x00000080
178 171
179struct xfs_mount; 172struct xfs_mount;
180/* PRINTFLIKE4 */ 173/* PRINTFLIKE4 */
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 6cf6d8769b97..6dba78199faf 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -33,9 +33,6 @@ kmem_zone_t *xfs_efi_zone;
33kmem_zone_t *xfs_efd_zone; 33kmem_zone_t *xfs_efd_zone;
34 34
35STATIC void xfs_efi_item_unlock(xfs_efi_log_item_t *); 35STATIC void xfs_efi_item_unlock(xfs_efi_log_item_t *);
36STATIC void xfs_efi_item_abort(xfs_efi_log_item_t *);
37STATIC void xfs_efd_item_abort(xfs_efd_log_item_t *);
38
39 36
40void 37void
41xfs_efi_item_free(xfs_efi_log_item_t *efip) 38xfs_efi_item_free(xfs_efi_log_item_t *efip)
@@ -184,7 +181,7 @@ STATIC void
184xfs_efi_item_unlock(xfs_efi_log_item_t *efip) 181xfs_efi_item_unlock(xfs_efi_log_item_t *efip)
185{ 182{
186 if (efip->efi_item.li_flags & XFS_LI_ABORTED) 183 if (efip->efi_item.li_flags & XFS_LI_ABORTED)
187 xfs_efi_item_abort(efip); 184 xfs_efi_item_free(efip);
188 return; 185 return;
189} 186}
190 187
@@ -202,18 +199,6 @@ xfs_efi_item_committed(xfs_efi_log_item_t *efip, xfs_lsn_t lsn)
202} 199}
203 200
204/* 201/*
205 * This is called when the transaction logging the EFI is aborted.
206 * Free up the EFI and return. No need to clean up the slot for
207 * the item in the transaction. That was done by the unpin code
208 * which is called prior to this routine in the abort/fs-shutdown path.
209 */
210STATIC void
211xfs_efi_item_abort(xfs_efi_log_item_t *efip)
212{
213 xfs_efi_item_free(efip);
214}
215
216/*
217 * There isn't much you can do to push on an efi item. It is simply 202 * There isn't much you can do to push on an efi item. It is simply
218 * stuck waiting for all of its corresponding efd items to be 203 * stuck waiting for all of its corresponding efd items to be
219 * committed to disk. 204 * committed to disk.
@@ -255,7 +240,6 @@ STATIC struct xfs_item_ops xfs_efi_item_ops = {
255 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) 240 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
256 xfs_efi_item_committed, 241 xfs_efi_item_committed,
257 .iop_push = (void(*)(xfs_log_item_t*))xfs_efi_item_push, 242 .iop_push = (void(*)(xfs_log_item_t*))xfs_efi_item_push,
258 .iop_abort = (void(*)(xfs_log_item_t*))xfs_efi_item_abort,
259 .iop_pushbuf = NULL, 243 .iop_pushbuf = NULL,
260 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) 244 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
261 xfs_efi_item_committing 245 xfs_efi_item_committing
@@ -386,33 +370,6 @@ xfs_efi_release(xfs_efi_log_item_t *efip,
386 } 370 }
387} 371}
388 372
389/*
390 * This is called when the transaction that should be committing the
391 * EFD corresponding to the given EFI is aborted. The committed and
392 * canceled flags are used to coordinate the freeing of the EFI and
393 * the references by the transaction that committed it.
394 */
395STATIC void
396xfs_efi_cancel(
397 xfs_efi_log_item_t *efip)
398{
399 xfs_mount_t *mp;
400 SPLDECL(s);
401
402 mp = efip->efi_item.li_mountp;
403 AIL_LOCK(mp, s);
404 if (efip->efi_flags & XFS_EFI_COMMITTED) {
405 /*
406 * xfs_trans_delete_ail() drops the AIL lock.
407 */
408 xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s);
409 xfs_efi_item_free(efip);
410 } else {
411 efip->efi_flags |= XFS_EFI_CANCELED;
412 AIL_UNLOCK(mp, s);
413 }
414}
415
416STATIC void 373STATIC void
417xfs_efd_item_free(xfs_efd_log_item_t *efdp) 374xfs_efd_item_free(xfs_efd_log_item_t *efdp)
418{ 375{
@@ -514,7 +471,7 @@ STATIC void
514xfs_efd_item_unlock(xfs_efd_log_item_t *efdp) 471xfs_efd_item_unlock(xfs_efd_log_item_t *efdp)
515{ 472{
516 if (efdp->efd_item.li_flags & XFS_LI_ABORTED) 473 if (efdp->efd_item.li_flags & XFS_LI_ABORTED)
517 xfs_efd_item_abort(efdp); 474 xfs_efd_item_free(efdp);
518 return; 475 return;
519} 476}
520 477
@@ -541,27 +498,6 @@ xfs_efd_item_committed(xfs_efd_log_item_t *efdp, xfs_lsn_t lsn)
541} 498}
542 499
543/* 500/*
544 * The transaction of which this EFD is a part has been aborted.
545 * Inform its companion EFI of this fact and then clean up after
546 * ourselves. No need to clean up the slot for the item in the
547 * transaction. That was done by the unpin code which is called
548 * prior to this routine in the abort/fs-shutdown path.
549 */
550STATIC void
551xfs_efd_item_abort(xfs_efd_log_item_t *efdp)
552{
553 /*
554 * If we got a log I/O error, it's always the case that the LR with the
555 * EFI got unpinned and freed before the EFD got aborted. So don't
556 * reference the EFI at all in that case.
557 */
558 if ((efdp->efd_item.li_flags & XFS_LI_ABORTED) == 0)
559 xfs_efi_cancel(efdp->efd_efip);
560
561 xfs_efd_item_free(efdp);
562}
563
564/*
565 * There isn't much you can do to push on an efd item. It is simply 501 * There isn't much you can do to push on an efd item. It is simply
566 * stuck waiting for the log to be flushed to disk. 502 * stuck waiting for the log to be flushed to disk.
567 */ 503 */
@@ -602,7 +538,6 @@ STATIC struct xfs_item_ops xfs_efd_item_ops = {
602 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) 538 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
603 xfs_efd_item_committed, 539 xfs_efd_item_committed,
604 .iop_push = (void(*)(xfs_log_item_t*))xfs_efd_item_push, 540 .iop_push = (void(*)(xfs_log_item_t*))xfs_efd_item_push,
605 .iop_abort = (void(*)(xfs_log_item_t*))xfs_efd_item_abort,
606 .iop_pushbuf = NULL, 541 .iop_pushbuf = NULL,
607 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) 542 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
608 xfs_efd_item_committing 543 xfs_efd_item_committing
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index 0ea45edaab03..2f049f63e85f 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -33,14 +33,16 @@ typedef struct xfs_extent {
33 * conversion routine. 33 * conversion routine.
34 */ 34 */
35 35
36#ifndef HAVE_FORMAT32
36typedef struct xfs_extent_32 { 37typedef struct xfs_extent_32 {
37 xfs_dfsbno_t ext_start; 38 __uint64_t ext_start;
38 xfs_extlen_t ext_len; 39 __uint32_t ext_len;
39} __attribute__((packed)) xfs_extent_32_t; 40} __attribute__((packed)) xfs_extent_32_t;
41#endif
40 42
41typedef struct xfs_extent_64 { 43typedef struct xfs_extent_64 {
42 xfs_dfsbno_t ext_start; 44 __uint64_t ext_start;
43 xfs_extlen_t ext_len; 45 __uint32_t ext_len;
44 __uint32_t ext_pad; 46 __uint32_t ext_pad;
45} xfs_extent_64_t; 47} xfs_extent_64_t;
46 48
@@ -50,25 +52,27 @@ typedef struct xfs_extent_64 {
50 * size is given by efi_nextents. 52 * size is given by efi_nextents.
51 */ 53 */
52typedef struct xfs_efi_log_format { 54typedef struct xfs_efi_log_format {
53 unsigned short efi_type; /* efi log item type */ 55 __uint16_t efi_type; /* efi log item type */
54 unsigned short efi_size; /* size of this item */ 56 __uint16_t efi_size; /* size of this item */
55 uint efi_nextents; /* # extents to free */ 57 __uint32_t efi_nextents; /* # extents to free */
56 __uint64_t efi_id; /* efi identifier */ 58 __uint64_t efi_id; /* efi identifier */
57 xfs_extent_t efi_extents[1]; /* array of extents to free */ 59 xfs_extent_t efi_extents[1]; /* array of extents to free */
58} xfs_efi_log_format_t; 60} xfs_efi_log_format_t;
59 61
62#ifndef HAVE_FORMAT32
60typedef struct xfs_efi_log_format_32 { 63typedef struct xfs_efi_log_format_32 {
61 unsigned short efi_type; /* efi log item type */ 64 __uint16_t efi_type; /* efi log item type */
62 unsigned short efi_size; /* size of this item */ 65 __uint16_t efi_size; /* size of this item */
63 uint efi_nextents; /* # extents to free */ 66 __uint32_t efi_nextents; /* # extents to free */
64 __uint64_t efi_id; /* efi identifier */ 67 __uint64_t efi_id; /* efi identifier */
65 xfs_extent_32_t efi_extents[1]; /* array of extents to free */ 68 xfs_extent_32_t efi_extents[1]; /* array of extents to free */
66} __attribute__((packed)) xfs_efi_log_format_32_t; 69} __attribute__((packed)) xfs_efi_log_format_32_t;
70#endif
67 71
68typedef struct xfs_efi_log_format_64 { 72typedef struct xfs_efi_log_format_64 {
69 unsigned short efi_type; /* efi log item type */ 73 __uint16_t efi_type; /* efi log item type */
70 unsigned short efi_size; /* size of this item */ 74 __uint16_t efi_size; /* size of this item */
71 uint efi_nextents; /* # extents to free */ 75 __uint32_t efi_nextents; /* # extents to free */
72 __uint64_t efi_id; /* efi identifier */ 76 __uint64_t efi_id; /* efi identifier */
73 xfs_extent_64_t efi_extents[1]; /* array of extents to free */ 77 xfs_extent_64_t efi_extents[1]; /* array of extents to free */
74} xfs_efi_log_format_64_t; 78} xfs_efi_log_format_64_t;
@@ -79,25 +83,27 @@ typedef struct xfs_efi_log_format_64 {
79 * size is given by efd_nextents; 83 * size is given by efd_nextents;
80 */ 84 */
81typedef struct xfs_efd_log_format { 85typedef struct xfs_efd_log_format {
82 unsigned short efd_type; /* efd log item type */ 86 __uint16_t efd_type; /* efd log item type */
83 unsigned short efd_size; /* size of this item */ 87 __uint16_t efd_size; /* size of this item */
84 uint efd_nextents; /* # of extents freed */ 88 __uint32_t efd_nextents; /* # of extents freed */
85 __uint64_t efd_efi_id; /* id of corresponding efi */ 89 __uint64_t efd_efi_id; /* id of corresponding efi */
86 xfs_extent_t efd_extents[1]; /* array of extents freed */ 90 xfs_extent_t efd_extents[1]; /* array of extents freed */
87} xfs_efd_log_format_t; 91} xfs_efd_log_format_t;
88 92
93#ifndef HAVE_FORMAT32
89typedef struct xfs_efd_log_format_32 { 94typedef struct xfs_efd_log_format_32 {
90 unsigned short efd_type; /* efd log item type */ 95 __uint16_t efd_type; /* efd log item type */
91 unsigned short efd_size; /* size of this item */ 96 __uint16_t efd_size; /* size of this item */
92 uint efd_nextents; /* # of extents freed */ 97 __uint32_t efd_nextents; /* # of extents freed */
93 __uint64_t efd_efi_id; /* id of corresponding efi */ 98 __uint64_t efd_efi_id; /* id of corresponding efi */
94 xfs_extent_32_t efd_extents[1]; /* array of extents freed */ 99 xfs_extent_32_t efd_extents[1]; /* array of extents freed */
95} __attribute__((packed)) xfs_efd_log_format_32_t; 100} __attribute__((packed)) xfs_efd_log_format_32_t;
101#endif
96 102
97typedef struct xfs_efd_log_format_64 { 103typedef struct xfs_efd_log_format_64 {
98 unsigned short efd_type; /* efd log item type */ 104 __uint16_t efd_type; /* efd log item type */
99 unsigned short efd_size; /* size of this item */ 105 __uint16_t efd_size; /* size of this item */
100 uint efd_nextents; /* # of extents freed */ 106 __uint32_t efd_nextents; /* # of extents freed */
101 __uint64_t efd_efi_id; /* id of corresponding efi */ 107 __uint64_t efd_efi_id; /* id of corresponding efi */
102 xfs_extent_64_t efd_extents[1]; /* array of extents freed */ 108 xfs_extent_64_t efd_extents[1]; /* array of extents freed */
103} xfs_efd_log_format_64_t; 109} xfs_efd_log_format_64_t;
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 0f0ad1535951..1335449841cd 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -22,8 +22,6 @@
22 * SGI's XFS filesystem's major stuff (constants, structures) 22 * SGI's XFS filesystem's major stuff (constants, structures)
23 */ 23 */
24 24
25#define XFS_NAME "xfs"
26
27/* 25/*
28 * Direct I/O attribute record used with XFS_IOC_DIOINFO 26 * Direct I/O attribute record used with XFS_IOC_DIOINFO
29 * d_miniosz is the min xfer size, xfer size multiple and file seek offset 27 * d_miniosz is the min xfer size, xfer size multiple and file seek offset
@@ -426,11 +424,7 @@ typedef struct xfs_handle {
426 - (char *) &(handle)) \ 424 - (char *) &(handle)) \
427 + (handle).ha_fid.xfs_fid_len) 425 + (handle).ha_fid.xfs_fid_len)
428 426
429#define XFS_HANDLE_CMP(h1, h2) memcmp(h1, h2, sizeof(xfs_handle_t)) 427/*
430
431#define FSHSIZE sizeof(fsid_t)
432
433/*
434 * Flags for going down operation 428 * Flags for going down operation
435 */ 429 */
436#define XFS_FSOP_GOING_FLAGS_DEFAULT 0x0 /* going down */ 430#define XFS_FSOP_GOING_FLAGS_DEFAULT 0x0 /* going down */
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 33164a85aa9d..a446e5a115c6 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -458,7 +458,7 @@ nextag:
458 */ 458 */
459 if (XFS_FORCED_SHUTDOWN(mp)) { 459 if (XFS_FORCED_SHUTDOWN(mp)) {
460 up_read(&mp->m_peraglock); 460 up_read(&mp->m_peraglock);
461 return (xfs_buf_t *)0; 461 return NULL;
462 } 462 }
463 agno++; 463 agno++;
464 if (agno >= agcount) 464 if (agno >= agcount)
@@ -466,7 +466,7 @@ nextag:
466 if (agno == pagno) { 466 if (agno == pagno) {
467 if (flags == 0) { 467 if (flags == 0) {
468 up_read(&mp->m_peraglock); 468 up_read(&mp->m_peraglock);
469 return (xfs_buf_t *)0; 469 return NULL;
470 } 470 }
471 flags = 0; 471 flags = 0;
472 } 472 }
@@ -529,10 +529,10 @@ xfs_dialloc(
529 int offset; /* index of inode in chunk */ 529 int offset; /* index of inode in chunk */
530 xfs_agino_t pagino; /* parent's a.g. relative inode # */ 530 xfs_agino_t pagino; /* parent's a.g. relative inode # */
531 xfs_agnumber_t pagno; /* parent's allocation group number */ 531 xfs_agnumber_t pagno; /* parent's allocation group number */
532 xfs_inobt_rec_t rec; /* inode allocation record */ 532 xfs_inobt_rec_incore_t rec; /* inode allocation record */
533 xfs_agnumber_t tagno; /* testing allocation group number */ 533 xfs_agnumber_t tagno; /* testing allocation group number */
534 xfs_btree_cur_t *tcur; /* temp cursor */ 534 xfs_btree_cur_t *tcur; /* temp cursor */
535 xfs_inobt_rec_t trec; /* temp inode allocation record */ 535 xfs_inobt_rec_incore_t trec; /* temp inode allocation record */
536 536
537 537
538 if (*IO_agbp == NULL) { 538 if (*IO_agbp == NULL) {
@@ -945,7 +945,7 @@ xfs_difree(
945 int ilen; /* inodes in an inode cluster */ 945 int ilen; /* inodes in an inode cluster */
946 xfs_mount_t *mp; /* mount structure for filesystem */ 946 xfs_mount_t *mp; /* mount structure for filesystem */
947 int off; /* offset of inode in inode chunk */ 947 int off; /* offset of inode in inode chunk */
948 xfs_inobt_rec_t rec; /* btree record */ 948 xfs_inobt_rec_incore_t rec; /* btree record */
949 949
950 mp = tp->t_mountp; 950 mp = tp->t_mountp;
951 951
@@ -1195,6 +1195,7 @@ xfs_dilocate(
1195 "(0x%llx)", 1195 "(0x%llx)",
1196 ino, XFS_AGINO_TO_INO(mp, agno, agino)); 1196 ino, XFS_AGINO_TO_INO(mp, agno, agino));
1197 } 1197 }
1198 xfs_stack_trace();
1198#endif /* DEBUG */ 1199#endif /* DEBUG */
1199 return XFS_ERROR(EINVAL); 1200 return XFS_ERROR(EINVAL);
1200 } 1201 }
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index 616eeeb6953e..8cdeeaf8632b 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -568,7 +568,7 @@ xfs_inobt_insrec(
568 /* 568 /*
569 * Make a key out of the record data to be inserted, and save it. 569 * Make a key out of the record data to be inserted, and save it.
570 */ 570 */
571 key.ir_startino = recp->ir_startino; /* INT_: direct copy */ 571 key.ir_startino = recp->ir_startino;
572 optr = ptr = cur->bc_ptrs[level]; 572 optr = ptr = cur->bc_ptrs[level];
573 /* 573 /*
574 * If we're off the left edge, return failure. 574 * If we're off the left edge, return failure.
@@ -600,7 +600,7 @@ xfs_inobt_insrec(
600 } 600 }
601#endif 601#endif
602 nbno = NULLAGBLOCK; 602 nbno = NULLAGBLOCK;
603 ncur = (xfs_btree_cur_t *)0; 603 ncur = NULL;
604 /* 604 /*
605 * If the block is full, we can't insert the new entry until we 605 * If the block is full, we can't insert the new entry until we
606 * make the block un-full. 606 * make the block un-full.
@@ -641,7 +641,7 @@ xfs_inobt_insrec(
641 return error; 641 return error;
642#endif 642#endif
643 ptr = cur->bc_ptrs[level]; 643 ptr = cur->bc_ptrs[level];
644 nrec.ir_startino = nkey.ir_startino; /* INT_: direct copy */ 644 nrec.ir_startino = nkey.ir_startino;
645 } else { 645 } else {
646 /* 646 /*
647 * Otherwise the insert fails. 647 * Otherwise the insert fails.
@@ -681,7 +681,7 @@ xfs_inobt_insrec(
681 if ((error = xfs_btree_check_sptr(cur, *bnop, level))) 681 if ((error = xfs_btree_check_sptr(cur, *bnop, level)))
682 return error; 682 return error;
683#endif 683#endif
684 kp[ptr - 1] = key; /* INT_: struct copy */ 684 kp[ptr - 1] = key;
685 pp[ptr - 1] = cpu_to_be32(*bnop); 685 pp[ptr - 1] = cpu_to_be32(*bnop);
686 numrecs++; 686 numrecs++;
687 block->bb_numrecs = cpu_to_be16(numrecs); 687 block->bb_numrecs = cpu_to_be16(numrecs);
@@ -698,7 +698,7 @@ xfs_inobt_insrec(
698 * Now stuff the new record in, bump numrecs 698 * Now stuff the new record in, bump numrecs
699 * and log the new data. 699 * and log the new data.
700 */ 700 */
701 rp[ptr - 1] = *recp; /* INT_: struct copy */ 701 rp[ptr - 1] = *recp;
702 numrecs++; 702 numrecs++;
703 block->bb_numrecs = cpu_to_be16(numrecs); 703 block->bb_numrecs = cpu_to_be16(numrecs);
704 xfs_inobt_log_recs(cur, bp, ptr, numrecs); 704 xfs_inobt_log_recs(cur, bp, ptr, numrecs);
@@ -731,7 +731,7 @@ xfs_inobt_insrec(
731 */ 731 */
732 *bnop = nbno; 732 *bnop = nbno;
733 if (nbno != NULLAGBLOCK) { 733 if (nbno != NULLAGBLOCK) {
734 *recp = nrec; /* INT_: struct copy */ 734 *recp = nrec;
735 *curp = ncur; 735 *curp = ncur;
736 } 736 }
737 *stat = 1; 737 *stat = 1;
@@ -878,7 +878,7 @@ xfs_inobt_lookup(
878 */ 878 */
879 bp = cur->bc_bufs[level]; 879 bp = cur->bc_bufs[level];
880 if (bp && XFS_BUF_ADDR(bp) != d) 880 if (bp && XFS_BUF_ADDR(bp) != d)
881 bp = (xfs_buf_t *)0; 881 bp = NULL;
882 if (!bp) { 882 if (!bp) {
883 /* 883 /*
884 * Need to get a new buffer. Read it, then 884 * Need to get a new buffer. Read it, then
@@ -950,12 +950,12 @@ xfs_inobt_lookup(
950 xfs_inobt_key_t *kkp; 950 xfs_inobt_key_t *kkp;
951 951
952 kkp = kkbase + keyno - 1; 952 kkp = kkbase + keyno - 1;
953 startino = INT_GET(kkp->ir_startino, ARCH_CONVERT); 953 startino = be32_to_cpu(kkp->ir_startino);
954 } else { 954 } else {
955 xfs_inobt_rec_t *krp; 955 xfs_inobt_rec_t *krp;
956 956
957 krp = krbase + keyno - 1; 957 krp = krbase + keyno - 1;
958 startino = INT_GET(krp->ir_startino, ARCH_CONVERT); 958 startino = be32_to_cpu(krp->ir_startino);
959 } 959 }
960 /* 960 /*
961 * Compute difference to get next direction. 961 * Compute difference to get next direction.
@@ -1117,7 +1117,7 @@ xfs_inobt_lshift(
1117 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level))) 1117 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level)))
1118 return error; 1118 return error;
1119#endif 1119#endif
1120 *lpp = *rpp; /* INT_: no-change copy */ 1120 *lpp = *rpp;
1121 xfs_inobt_log_ptrs(cur, lbp, nrec, nrec); 1121 xfs_inobt_log_ptrs(cur, lbp, nrec, nrec);
1122 } 1122 }
1123 /* 1123 /*
@@ -1160,7 +1160,7 @@ xfs_inobt_lshift(
1160 } else { 1160 } else {
1161 memmove(rrp, rrp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp)); 1161 memmove(rrp, rrp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1162 xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); 1162 xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1163 key.ir_startino = rrp->ir_startino; /* INT_: direct copy */ 1163 key.ir_startino = rrp->ir_startino;
1164 rkp = &key; 1164 rkp = &key;
1165 } 1165 }
1166 /* 1166 /*
@@ -1297,13 +1297,13 @@ xfs_inobt_newroot(
1297 */ 1297 */
1298 kp = XFS_INOBT_KEY_ADDR(new, 1, cur); 1298 kp = XFS_INOBT_KEY_ADDR(new, 1, cur);
1299 if (be16_to_cpu(left->bb_level) > 0) { 1299 if (be16_to_cpu(left->bb_level) > 0) {
1300 kp[0] = *XFS_INOBT_KEY_ADDR(left, 1, cur); /* INT_: struct copy */ 1300 kp[0] = *XFS_INOBT_KEY_ADDR(left, 1, cur);
1301 kp[1] = *XFS_INOBT_KEY_ADDR(right, 1, cur); /* INT_: struct copy */ 1301 kp[1] = *XFS_INOBT_KEY_ADDR(right, 1, cur);
1302 } else { 1302 } else {
1303 rp = XFS_INOBT_REC_ADDR(left, 1, cur); 1303 rp = XFS_INOBT_REC_ADDR(left, 1, cur);
1304 INT_COPY(kp[0].ir_startino, rp->ir_startino, ARCH_CONVERT); 1304 kp[0].ir_startino = rp->ir_startino;
1305 rp = XFS_INOBT_REC_ADDR(right, 1, cur); 1305 rp = XFS_INOBT_REC_ADDR(right, 1, cur);
1306 INT_COPY(kp[1].ir_startino, rp->ir_startino, ARCH_CONVERT); 1306 kp[1].ir_startino = rp->ir_startino;
1307 } 1307 }
1308 xfs_inobt_log_keys(cur, nbp, 1, 2); 1308 xfs_inobt_log_keys(cur, nbp, 1, 2);
1309 /* 1309 /*
@@ -1410,8 +1410,8 @@ xfs_inobt_rshift(
1410 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*lpp), level))) 1410 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*lpp), level)))
1411 return error; 1411 return error;
1412#endif 1412#endif
1413 *rkp = *lkp; /* INT_: no change copy */ 1413 *rkp = *lkp;
1414 *rpp = *lpp; /* INT_: no change copy */ 1414 *rpp = *lpp;
1415 xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1); 1415 xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1416 xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1); 1416 xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1417 } else { 1417 } else {
@@ -1420,7 +1420,7 @@ xfs_inobt_rshift(
1420 memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp)); 1420 memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1421 *rrp = *lrp; 1421 *rrp = *lrp;
1422 xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1); 1422 xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1423 key.ir_startino = rrp->ir_startino; /* INT_: direct copy */ 1423 key.ir_startino = rrp->ir_startino;
1424 rkp = &key; 1424 rkp = &key;
1425 } 1425 }
1426 /* 1426 /*
@@ -1559,7 +1559,7 @@ xfs_inobt_split(
1559 rrp = XFS_INOBT_REC_ADDR(right, 1, cur); 1559 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
1560 memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp)); 1560 memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1561 xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); 1561 xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1562 keyp->ir_startino = rrp->ir_startino; /* INT_: direct copy */ 1562 keyp->ir_startino = rrp->ir_startino;
1563 } 1563 }
1564 /* 1564 /*
1565 * Find the left block number by looking in the buffer. 1565 * Find the left block number by looking in the buffer.
@@ -1813,9 +1813,9 @@ xfs_inobt_get_rec(
1813 * Point to the record and extract its data. 1813 * Point to the record and extract its data.
1814 */ 1814 */
1815 rec = XFS_INOBT_REC_ADDR(block, ptr, cur); 1815 rec = XFS_INOBT_REC_ADDR(block, ptr, cur);
1816 *ino = INT_GET(rec->ir_startino, ARCH_CONVERT); 1816 *ino = be32_to_cpu(rec->ir_startino);
1817 *fcnt = INT_GET(rec->ir_freecount, ARCH_CONVERT); 1817 *fcnt = be32_to_cpu(rec->ir_freecount);
1818 *free = INT_GET(rec->ir_free, ARCH_CONVERT); 1818 *free = be64_to_cpu(rec->ir_free);
1819 *stat = 1; 1819 *stat = 1;
1820 return 0; 1820 return 0;
1821} 1821}
@@ -1930,10 +1930,10 @@ xfs_inobt_insert(
1930 1930
1931 level = 0; 1931 level = 0;
1932 nbno = NULLAGBLOCK; 1932 nbno = NULLAGBLOCK;
1933 INT_SET(nrec.ir_startino, ARCH_CONVERT, cur->bc_rec.i.ir_startino); 1933 nrec.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino);
1934 INT_SET(nrec.ir_freecount, ARCH_CONVERT, cur->bc_rec.i.ir_freecount); 1934 nrec.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount);
1935 INT_SET(nrec.ir_free, ARCH_CONVERT, cur->bc_rec.i.ir_free); 1935 nrec.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free);
1936 ncur = (xfs_btree_cur_t *)0; 1936 ncur = NULL;
1937 pcur = cur; 1937 pcur = cur;
1938 /* 1938 /*
1939 * Loop going up the tree, starting at the leaf level. 1939 * Loop going up the tree, starting at the leaf level.
@@ -1965,7 +1965,7 @@ xfs_inobt_insert(
1965 */ 1965 */
1966 if (ncur) { 1966 if (ncur) {
1967 pcur = ncur; 1967 pcur = ncur;
1968 ncur = (xfs_btree_cur_t *)0; 1968 ncur = NULL;
1969 } 1969 }
1970 } while (nbno != NULLAGBLOCK); 1970 } while (nbno != NULLAGBLOCK);
1971 *stat = i; 1971 *stat = i;
@@ -2060,9 +2060,9 @@ xfs_inobt_update(
2060 /* 2060 /*
2061 * Fill in the new contents and log them. 2061 * Fill in the new contents and log them.
2062 */ 2062 */
2063 INT_SET(rp->ir_startino, ARCH_CONVERT, ino); 2063 rp->ir_startino = cpu_to_be32(ino);
2064 INT_SET(rp->ir_freecount, ARCH_CONVERT, fcnt); 2064 rp->ir_freecount = cpu_to_be32(fcnt);
2065 INT_SET(rp->ir_free, ARCH_CONVERT, free); 2065 rp->ir_free = cpu_to_be64(free);
2066 xfs_inobt_log_recs(cur, bp, ptr, ptr); 2066 xfs_inobt_log_recs(cur, bp, ptr, ptr);
2067 /* 2067 /*
2068 * Updating first record in leaf. Pass new key value up to our parent. 2068 * Updating first record in leaf. Pass new key value up to our parent.
@@ -2070,7 +2070,7 @@ xfs_inobt_update(
2070 if (ptr == 1) { 2070 if (ptr == 1) {
2071 xfs_inobt_key_t key; /* key containing [ino] */ 2071 xfs_inobt_key_t key; /* key containing [ino] */
2072 2072
2073 INT_SET(key.ir_startino, ARCH_CONVERT, ino); 2073 key.ir_startino = cpu_to_be32(ino);
2074 if ((error = xfs_inobt_updkey(cur, &key, 1))) 2074 if ((error = xfs_inobt_updkey(cur, &key, 1)))
2075 return error; 2075 return error;
2076 } 2076 }
diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h
index ae3904cb1ee8..2c0e49893ff7 100644
--- a/fs/xfs/xfs_ialloc_btree.h
+++ b/fs/xfs/xfs_ialloc_btree.h
@@ -47,19 +47,24 @@ static inline xfs_inofree_t xfs_inobt_maskn(int i, int n)
47/* 47/*
48 * Data record structure 48 * Data record structure
49 */ 49 */
50typedef struct xfs_inobt_rec 50typedef struct xfs_inobt_rec {
51{ 51 __be32 ir_startino; /* starting inode number */
52 __be32 ir_freecount; /* count of free inodes (set bits) */
53 __be64 ir_free; /* free inode mask */
54} xfs_inobt_rec_t;
55
56typedef struct xfs_inobt_rec_incore {
52 xfs_agino_t ir_startino; /* starting inode number */ 57 xfs_agino_t ir_startino; /* starting inode number */
53 __int32_t ir_freecount; /* count of free inodes (set bits) */ 58 __int32_t ir_freecount; /* count of free inodes (set bits) */
54 xfs_inofree_t ir_free; /* free inode mask */ 59 xfs_inofree_t ir_free; /* free inode mask */
55} xfs_inobt_rec_t; 60} xfs_inobt_rec_incore_t;
61
56 62
57/* 63/*
58 * Key structure 64 * Key structure
59 */ 65 */
60typedef struct xfs_inobt_key 66typedef struct xfs_inobt_key {
61{ 67 __be32 ir_startino; /* starting inode number */
62 xfs_agino_t ir_startino; /* starting inode number */
63} xfs_inobt_key_t; 68} xfs_inobt_key_t;
64 69
65/* btree pointer type */ 70/* btree pointer type */
@@ -77,7 +82,7 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t;
77#define XFS_INOBT_IS_FREE(rp,i) \ 82#define XFS_INOBT_IS_FREE(rp,i) \
78 (((rp)->ir_free & XFS_INOBT_MASK(i)) != 0) 83 (((rp)->ir_free & XFS_INOBT_MASK(i)) != 0)
79#define XFS_INOBT_IS_FREE_DISK(rp,i) \ 84#define XFS_INOBT_IS_FREE_DISK(rp,i) \
80 ((INT_GET((rp)->ir_free,ARCH_CONVERT) & XFS_INOBT_MASK(i)) != 0) 85 ((be64_to_cpu((rp)->ir_free) & XFS_INOBT_MASK(i)) != 0)
81#define XFS_INOBT_SET_FREE(rp,i) ((rp)->ir_free |= XFS_INOBT_MASK(i)) 86#define XFS_INOBT_SET_FREE(rp,i) ((rp)->ir_free |= XFS_INOBT_MASK(i))
82#define XFS_INOBT_CLR_FREE(rp,i) ((rp)->ir_free &= ~XFS_INOBT_MASK(i)) 87#define XFS_INOBT_CLR_FREE(rp,i) ((rp)->ir_free &= ~XFS_INOBT_MASK(i))
83 88
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 0724df7fabb7..b73d216ecaf9 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -50,7 +50,7 @@ void
50xfs_ihash_init(xfs_mount_t *mp) 50xfs_ihash_init(xfs_mount_t *mp)
51{ 51{
52 __uint64_t icount; 52 __uint64_t icount;
53 uint i, flags = KM_SLEEP | KM_MAYFAIL; 53 uint i;
54 54
55 if (!mp->m_ihsize) { 55 if (!mp->m_ihsize) {
56 icount = mp->m_maxicount ? mp->m_maxicount : 56 icount = mp->m_maxicount ? mp->m_maxicount :
@@ -61,14 +61,13 @@ xfs_ihash_init(xfs_mount_t *mp)
61 (64 * NBPP) / sizeof(xfs_ihash_t)); 61 (64 * NBPP) / sizeof(xfs_ihash_t));
62 } 62 }
63 63
64 while (!(mp->m_ihash = (xfs_ihash_t *)kmem_zalloc(mp->m_ihsize * 64 mp->m_ihash = kmem_zalloc_greedy(&mp->m_ihsize,
65 sizeof(xfs_ihash_t), flags))) { 65 NBPC * sizeof(xfs_ihash_t),
66 if ((mp->m_ihsize >>= 1) <= NBPP) 66 mp->m_ihsize * sizeof(xfs_ihash_t),
67 flags = KM_SLEEP; 67 KM_SLEEP | KM_MAYFAIL | KM_LARGE);
68 } 68 mp->m_ihsize /= sizeof(xfs_ihash_t);
69 for (i = 0; i < mp->m_ihsize; i++) { 69 for (i = 0; i < mp->m_ihsize; i++)
70 rwlock_init(&(mp->m_ihash[i].ih_lock)); 70 rwlock_init(&(mp->m_ihash[i].ih_lock));
71 }
72} 71}
73 72
74/* 73/*
@@ -77,7 +76,7 @@ xfs_ihash_init(xfs_mount_t *mp)
77void 76void
78xfs_ihash_free(xfs_mount_t *mp) 77xfs_ihash_free(xfs_mount_t *mp)
79{ 78{
80 kmem_free(mp->m_ihash, mp->m_ihsize*sizeof(xfs_ihash_t)); 79 kmem_free(mp->m_ihash, mp->m_ihsize * sizeof(xfs_ihash_t));
81 mp->m_ihash = NULL; 80 mp->m_ihash = NULL;
82} 81}
83 82
@@ -95,7 +94,7 @@ xfs_chash_init(xfs_mount_t *mp)
95 mp->m_chsize = min_t(uint, mp->m_chsize, mp->m_ihsize); 94 mp->m_chsize = min_t(uint, mp->m_chsize, mp->m_ihsize);
96 mp->m_chash = (xfs_chash_t *)kmem_zalloc(mp->m_chsize 95 mp->m_chash = (xfs_chash_t *)kmem_zalloc(mp->m_chsize
97 * sizeof(xfs_chash_t), 96 * sizeof(xfs_chash_t),
98 KM_SLEEP); 97 KM_SLEEP | KM_LARGE);
99 for (i = 0; i < mp->m_chsize; i++) { 98 for (i = 0; i < mp->m_chsize; i++) {
100 spinlock_init(&mp->m_chash[i].ch_lock,"xfshash"); 99 spinlock_init(&mp->m_chash[i].ch_lock,"xfshash");
101 } 100 }
@@ -244,7 +243,9 @@ again:
244 243
245 XFS_STATS_INC(xs_ig_found); 244 XFS_STATS_INC(xs_ig_found);
246 245
246 spin_lock(&ip->i_flags_lock);
247 ip->i_flags &= ~XFS_IRECLAIMABLE; 247 ip->i_flags &= ~XFS_IRECLAIMABLE;
248 spin_unlock(&ip->i_flags_lock);
248 version = ih->ih_version; 249 version = ih->ih_version;
249 read_unlock(&ih->ih_lock); 250 read_unlock(&ih->ih_lock);
250 xfs_ihash_promote(ih, ip, version); 251 xfs_ihash_promote(ih, ip, version);
@@ -290,15 +291,17 @@ again:
290 291
291finish_inode: 292finish_inode:
292 if (ip->i_d.di_mode == 0) { 293 if (ip->i_d.di_mode == 0) {
293 if (!(flags & IGET_CREATE)) 294 if (!(flags & XFS_IGET_CREATE))
294 return ENOENT; 295 return ENOENT;
295 xfs_iocore_inode_reinit(ip); 296 xfs_iocore_inode_reinit(ip);
296 } 297 }
297 298
298 if (lock_flags != 0) 299 if (lock_flags != 0)
299 xfs_ilock(ip, lock_flags); 300 xfs_ilock(ip, lock_flags);
300 301
302 spin_lock(&ip->i_flags_lock);
301 ip->i_flags &= ~XFS_ISTALE; 303 ip->i_flags &= ~XFS_ISTALE;
304 spin_unlock(&ip->i_flags_lock);
302 305
303 vn_trace_exit(vp, "xfs_iget.found", 306 vn_trace_exit(vp, "xfs_iget.found",
304 (inst_t *)__return_address); 307 (inst_t *)__return_address);
@@ -320,21 +323,20 @@ finish_inode:
320 * Read the disk inode attributes into a new inode structure and get 323 * Read the disk inode attributes into a new inode structure and get
321 * a new vnode for it. This should also initialize i_ino and i_mount. 324 * a new vnode for it. This should also initialize i_ino and i_mount.
322 */ 325 */
323 error = xfs_iread(mp, tp, ino, &ip, bno); 326 error = xfs_iread(mp, tp, ino, &ip, bno,
324 if (error) { 327 (flags & XFS_IGET_BULKSTAT) ? XFS_IMAP_BULKSTAT : 0);
328 if (error)
325 return error; 329 return error;
326 }
327 330
328 vn_trace_exit(vp, "xfs_iget.alloc", (inst_t *)__return_address); 331 vn_trace_exit(vp, "xfs_iget.alloc", (inst_t *)__return_address);
329 332
330 xfs_inode_lock_init(ip, vp); 333 xfs_inode_lock_init(ip, vp);
331 xfs_iocore_inode_init(ip); 334 xfs_iocore_inode_init(ip);
332 335
333 if (lock_flags != 0) { 336 if (lock_flags)
334 xfs_ilock(ip, lock_flags); 337 xfs_ilock(ip, lock_flags);
335 } 338
336 339 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
337 if ((ip->i_d.di_mode == 0) && !(flags & IGET_CREATE)) {
338 xfs_idestroy(ip); 340 xfs_idestroy(ip);
339 return ENOENT; 341 return ENOENT;
340 } 342 }
@@ -369,7 +371,9 @@ finish_inode:
369 ih->ih_next = ip; 371 ih->ih_next = ip;
370 ip->i_udquot = ip->i_gdquot = NULL; 372 ip->i_udquot = ip->i_gdquot = NULL;
371 ih->ih_version++; 373 ih->ih_version++;
374 spin_lock(&ip->i_flags_lock);
372 ip->i_flags |= XFS_INEW; 375 ip->i_flags |= XFS_INEW;
376 spin_unlock(&ip->i_flags_lock);
373 377
374 write_unlock(&ih->ih_lock); 378 write_unlock(&ih->ih_lock);
375 379
@@ -548,7 +552,7 @@ xfs_inode_lock_init(
548 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", vp->v_number); 552 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", vp->v_number);
549 init_waitqueue_head(&ip->i_ipin_wait); 553 init_waitqueue_head(&ip->i_ipin_wait);
550 atomic_set(&ip->i_pincount, 0); 554 atomic_set(&ip->i_pincount, 0);
551 init_sema(&ip->i_flock, 1, "xfsfino", vp->v_number); 555 initnsema(&ip->i_flock, 1, "xfsfino");
552} 556}
553 557
554/* 558/*
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 1f8ecff8553a..c27d7d495aa0 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -854,7 +854,8 @@ xfs_iread(
854 xfs_trans_t *tp, 854 xfs_trans_t *tp,
855 xfs_ino_t ino, 855 xfs_ino_t ino,
856 xfs_inode_t **ipp, 856 xfs_inode_t **ipp,
857 xfs_daddr_t bno) 857 xfs_daddr_t bno,
858 uint imap_flags)
858{ 859{
859 xfs_buf_t *bp; 860 xfs_buf_t *bp;
860 xfs_dinode_t *dip; 861 xfs_dinode_t *dip;
@@ -866,6 +867,7 @@ xfs_iread(
866 ip = kmem_zone_zalloc(xfs_inode_zone, KM_SLEEP); 867 ip = kmem_zone_zalloc(xfs_inode_zone, KM_SLEEP);
867 ip->i_ino = ino; 868 ip->i_ino = ino;
868 ip->i_mount = mp; 869 ip->i_mount = mp;
870 spin_lock_init(&ip->i_flags_lock);
869 871
870 /* 872 /*
871 * Get pointer's to the on-disk inode and the buffer containing it. 873 * Get pointer's to the on-disk inode and the buffer containing it.
@@ -874,7 +876,7 @@ xfs_iread(
874 * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will 876 * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will
875 * know that this is a new incore inode. 877 * know that this is a new incore inode.
876 */ 878 */
877 error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, 0); 879 error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags);
878 if (error) { 880 if (error) {
879 kmem_zone_free(xfs_inode_zone, ip); 881 kmem_zone_free(xfs_inode_zone, ip);
880 return error; 882 return error;
@@ -1113,7 +1115,7 @@ xfs_ialloc(
1113 * to prevent others from looking at until we're done. 1115 * to prevent others from looking at until we're done.
1114 */ 1116 */
1115 error = xfs_trans_iget(tp->t_mountp, tp, ino, 1117 error = xfs_trans_iget(tp->t_mountp, tp, ino,
1116 IGET_CREATE, XFS_ILOCK_EXCL, &ip); 1118 XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip);
1117 if (error != 0) { 1119 if (error != 0) {
1118 return error; 1120 return error;
1119 } 1121 }
@@ -2213,7 +2215,9 @@ xfs_ifree_cluster(
2213 2215
2214 if (ip == free_ip) { 2216 if (ip == free_ip) {
2215 if (xfs_iflock_nowait(ip)) { 2217 if (xfs_iflock_nowait(ip)) {
2218 spin_lock(&ip->i_flags_lock);
2216 ip->i_flags |= XFS_ISTALE; 2219 ip->i_flags |= XFS_ISTALE;
2220 spin_unlock(&ip->i_flags_lock);
2217 2221
2218 if (xfs_inode_clean(ip)) { 2222 if (xfs_inode_clean(ip)) {
2219 xfs_ifunlock(ip); 2223 xfs_ifunlock(ip);
@@ -2227,7 +2231,9 @@ xfs_ifree_cluster(
2227 2231
2228 if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { 2232 if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
2229 if (xfs_iflock_nowait(ip)) { 2233 if (xfs_iflock_nowait(ip)) {
2234 spin_lock(&ip->i_flags_lock);
2230 ip->i_flags |= XFS_ISTALE; 2235 ip->i_flags |= XFS_ISTALE;
2236 spin_unlock(&ip->i_flags_lock);
2231 2237
2232 if (xfs_inode_clean(ip)) { 2238 if (xfs_inode_clean(ip)) {
2233 xfs_ifunlock(ip); 2239 xfs_ifunlock(ip);
@@ -2257,7 +2263,9 @@ xfs_ifree_cluster(
2257 AIL_LOCK(mp,s); 2263 AIL_LOCK(mp,s);
2258 iip->ili_flush_lsn = iip->ili_item.li_lsn; 2264 iip->ili_flush_lsn = iip->ili_item.li_lsn;
2259 AIL_UNLOCK(mp, s); 2265 AIL_UNLOCK(mp, s);
2266 spin_lock(&iip->ili_inode->i_flags_lock);
2260 iip->ili_inode->i_flags |= XFS_ISTALE; 2267 iip->ili_inode->i_flags |= XFS_ISTALE;
2268 spin_unlock(&iip->ili_inode->i_flags_lock);
2261 pre_flushed++; 2269 pre_flushed++;
2262 } 2270 }
2263 lip = lip->li_bio_list; 2271 lip = lip->li_bio_list;
@@ -2753,19 +2761,29 @@ xfs_iunpin(
2753 * call as the inode reclaim may be blocked waiting for 2761 * call as the inode reclaim may be blocked waiting for
2754 * the inode to become unpinned. 2762 * the inode to become unpinned.
2755 */ 2763 */
2764 struct inode *inode = NULL;
2765
2766 spin_lock(&ip->i_flags_lock);
2756 if (!(ip->i_flags & (XFS_IRECLAIM|XFS_IRECLAIMABLE))) { 2767 if (!(ip->i_flags & (XFS_IRECLAIM|XFS_IRECLAIMABLE))) {
2757 bhv_vnode_t *vp = XFS_ITOV_NULL(ip); 2768 bhv_vnode_t *vp = XFS_ITOV_NULL(ip);
2758 2769
2759 /* make sync come back and flush this inode */ 2770 /* make sync come back and flush this inode */
2760 if (vp) { 2771 if (vp) {
2761 struct inode *inode = vn_to_inode(vp); 2772 inode = vn_to_inode(vp);
2762 2773
2763 if (!(inode->i_state & 2774 if (!(inode->i_state &
2764 (I_NEW|I_FREEING|I_CLEAR))) 2775 (I_NEW|I_FREEING|I_CLEAR))) {
2765 mark_inode_dirty_sync(inode); 2776 inode = igrab(inode);
2777 if (inode)
2778 mark_inode_dirty_sync(inode);
2779 } else
2780 inode = NULL;
2766 } 2781 }
2767 } 2782 }
2783 spin_unlock(&ip->i_flags_lock);
2768 wake_up(&ip->i_ipin_wait); 2784 wake_up(&ip->i_ipin_wait);
2785 if (inode)
2786 iput(inode);
2769 } 2787 }
2770} 2788}
2771 2789
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index d10b76ed1e5b..e96eb0835fe6 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -267,6 +267,7 @@ typedef struct xfs_inode {
267 sema_t i_flock; /* inode flush lock */ 267 sema_t i_flock; /* inode flush lock */
268 atomic_t i_pincount; /* inode pin count */ 268 atomic_t i_pincount; /* inode pin count */
269 wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */ 269 wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */
270 spinlock_t i_flags_lock; /* inode i_flags lock */
270#ifdef HAVE_REFCACHE 271#ifdef HAVE_REFCACHE
271 struct xfs_inode **i_refcache; /* ptr to entry in ref cache */ 272 struct xfs_inode **i_refcache; /* ptr to entry in ref cache */
272 struct xfs_inode *i_release; /* inode to unref */ 273 struct xfs_inode *i_release; /* inode to unref */
@@ -389,11 +390,14 @@ typedef struct xfs_inode {
389 (((vfsp)->vfs_flag & VFS_GRPID) || ((pip)->i_d.di_mode & S_ISGID)) 390 (((vfsp)->vfs_flag & VFS_GRPID) || ((pip)->i_d.di_mode & S_ISGID))
390 391
391/* 392/*
392 * xfs_iget.c prototypes. 393 * Flags for xfs_iget()
393 */ 394 */
395#define XFS_IGET_CREATE 0x1
396#define XFS_IGET_BULKSTAT 0x2
394 397
395#define IGET_CREATE 1 398/*
396 399 * xfs_iget.c prototypes.
400 */
397void xfs_ihash_init(struct xfs_mount *); 401void xfs_ihash_init(struct xfs_mount *);
398void xfs_ihash_free(struct xfs_mount *); 402void xfs_ihash_free(struct xfs_mount *);
399void xfs_chash_init(struct xfs_mount *); 403void xfs_chash_init(struct xfs_mount *);
@@ -425,7 +429,7 @@ int xfs_itobp(struct xfs_mount *, struct xfs_trans *,
425 xfs_inode_t *, xfs_dinode_t **, struct xfs_buf **, 429 xfs_inode_t *, xfs_dinode_t **, struct xfs_buf **,
426 xfs_daddr_t, uint); 430 xfs_daddr_t, uint);
427int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, 431int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
428 xfs_inode_t **, xfs_daddr_t); 432 xfs_inode_t **, xfs_daddr_t, uint);
429int xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int); 433int xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int);
430int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, 434int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t,
431 xfs_nlink_t, xfs_dev_t, struct cred *, xfs_prid_t, 435 xfs_nlink_t, xfs_dev_t, struct cred *, xfs_prid_t,
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index f8e80d8e7237..a7a92251eb56 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -743,21 +743,6 @@ xfs_inode_item_committed(
743} 743}
744 744
745/* 745/*
746 * The transaction with the inode locked has aborted. The inode
747 * must not be dirty within the transaction (unless we're forcibly
748 * shutting down). We simply unlock just as if the transaction
749 * had been cancelled.
750 */
751STATIC void
752xfs_inode_item_abort(
753 xfs_inode_log_item_t *iip)
754{
755 xfs_inode_item_unlock(iip);
756 return;
757}
758
759
760/*
761 * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK 746 * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK
762 * failed to get the inode flush lock but did get the inode locked SHARED. 747 * failed to get the inode flush lock but did get the inode locked SHARED.
763 * Here we're trying to see if the inode buffer is incore, and if so whether it's 748 * Here we're trying to see if the inode buffer is incore, and if so whether it's
@@ -915,7 +900,6 @@ STATIC struct xfs_item_ops xfs_inode_item_ops = {
915 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) 900 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
916 xfs_inode_item_committed, 901 xfs_inode_item_committed,
917 .iop_push = (void(*)(xfs_log_item_t*))xfs_inode_item_push, 902 .iop_push = (void(*)(xfs_log_item_t*))xfs_inode_item_push,
918 .iop_abort = (void(*)(xfs_log_item_t*))xfs_inode_item_abort,
919 .iop_pushbuf = (void(*)(xfs_log_item_t*))xfs_inode_item_pushbuf, 903 .iop_pushbuf = (void(*)(xfs_log_item_t*))xfs_inode_item_pushbuf,
920 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) 904 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
921 xfs_inode_item_committing 905 xfs_inode_item_committing
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 5db6cd1b4cf3..bfe92ea17952 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -25,52 +25,54 @@
25 * must be added on to the end. 25 * must be added on to the end.
26 */ 26 */
27typedef struct xfs_inode_log_format { 27typedef struct xfs_inode_log_format {
28 unsigned short ilf_type; /* inode log item type */ 28 __uint16_t ilf_type; /* inode log item type */
29 unsigned short ilf_size; /* size of this item */ 29 __uint16_t ilf_size; /* size of this item */
30 uint ilf_fields; /* flags for fields logged */ 30 __uint32_t ilf_fields; /* flags for fields logged */
31 ushort ilf_asize; /* size of attr d/ext/root */ 31 __uint16_t ilf_asize; /* size of attr d/ext/root */
32 ushort ilf_dsize; /* size of data/ext/root */ 32 __uint16_t ilf_dsize; /* size of data/ext/root */
33 xfs_ino_t ilf_ino; /* inode number */ 33 __uint64_t ilf_ino; /* inode number */
34 union { 34 union {
35 xfs_dev_t ilfu_rdev; /* rdev value for dev inode*/ 35 __uint32_t ilfu_rdev; /* rdev value for dev inode*/
36 uuid_t ilfu_uuid; /* mount point value */ 36 uuid_t ilfu_uuid; /* mount point value */
37 } ilf_u; 37 } ilf_u;
38 __int64_t ilf_blkno; /* blkno of inode buffer */ 38 __int64_t ilf_blkno; /* blkno of inode buffer */
39 int ilf_len; /* len of inode buffer */ 39 __int32_t ilf_len; /* len of inode buffer */
40 int ilf_boffset; /* off of inode in buffer */ 40 __int32_t ilf_boffset; /* off of inode in buffer */
41} xfs_inode_log_format_t; 41} xfs_inode_log_format_t;
42 42
43#ifndef HAVE_FORMAT32
43typedef struct xfs_inode_log_format_32 { 44typedef struct xfs_inode_log_format_32 {
44 unsigned short ilf_type; /* 16: inode log item type */ 45 __uint16_t ilf_type; /* inode log item type */
45 unsigned short ilf_size; /* 16: size of this item */ 46 __uint16_t ilf_size; /* size of this item */
46 uint ilf_fields; /* 32: flags for fields logged */ 47 __uint32_t ilf_fields; /* flags for fields logged */
47 ushort ilf_asize; /* 32: size of attr d/ext/root */ 48 __uint16_t ilf_asize; /* size of attr d/ext/root */
48 ushort ilf_dsize; /* 32: size of data/ext/root */ 49 __uint16_t ilf_dsize; /* size of data/ext/root */
49 xfs_ino_t ilf_ino; /* 64: inode number */ 50 __uint64_t ilf_ino; /* inode number */
50 union { 51 union {
51 xfs_dev_t ilfu_rdev; /* 32: rdev value for dev inode*/ 52 __uint32_t ilfu_rdev; /* rdev value for dev inode*/
52 uuid_t ilfu_uuid; /* 128: mount point value */ 53 uuid_t ilfu_uuid; /* mount point value */
53 } ilf_u; 54 } ilf_u;
54 __int64_t ilf_blkno; /* 64: blkno of inode buffer */ 55 __int64_t ilf_blkno; /* blkno of inode buffer */
55 int ilf_len; /* 32: len of inode buffer */ 56 __int32_t ilf_len; /* len of inode buffer */
56 int ilf_boffset; /* 32: off of inode in buffer */ 57 __int32_t ilf_boffset; /* off of inode in buffer */
57} __attribute__((packed)) xfs_inode_log_format_32_t; 58} __attribute__((packed)) xfs_inode_log_format_32_t;
59#endif
58 60
59typedef struct xfs_inode_log_format_64 { 61typedef struct xfs_inode_log_format_64 {
60 unsigned short ilf_type; /* 16: inode log item type */ 62 __uint16_t ilf_type; /* inode log item type */
61 unsigned short ilf_size; /* 16: size of this item */ 63 __uint16_t ilf_size; /* size of this item */
62 uint ilf_fields; /* 32: flags for fields logged */ 64 __uint32_t ilf_fields; /* flags for fields logged */
63 ushort ilf_asize; /* 32: size of attr d/ext/root */ 65 __uint16_t ilf_asize; /* size of attr d/ext/root */
64 ushort ilf_dsize; /* 32: size of data/ext/root */ 66 __uint16_t ilf_dsize; /* size of data/ext/root */
65 __uint32_t ilf_pad; /* 32: pad for 64 bit boundary */ 67 __uint32_t ilf_pad; /* pad for 64 bit boundary */
66 xfs_ino_t ilf_ino; /* 64: inode number */ 68 __uint64_t ilf_ino; /* inode number */
67 union { 69 union {
68 xfs_dev_t ilfu_rdev; /* 32: rdev value for dev inode*/ 70 __uint32_t ilfu_rdev; /* rdev value for dev inode*/
69 uuid_t ilfu_uuid; /* 128: mount point value */ 71 uuid_t ilfu_uuid; /* mount point value */
70 } ilf_u; 72 } ilf_u;
71 __int64_t ilf_blkno; /* 64: blkno of inode buffer */ 73 __int64_t ilf_blkno; /* blkno of inode buffer */
72 int ilf_len; /* 32: len of inode buffer */ 74 __int32_t ilf_len; /* len of inode buffer */
73 int ilf_boffset; /* 32: off of inode in buffer */ 75 __int32_t ilf_boffset; /* off of inode in buffer */
74} xfs_inode_log_format_64_t; 76} xfs_inode_log_format_64_t;
75 77
76/* 78/*
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index f1949c16df15..19655124da78 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -398,6 +398,23 @@ xfs_flush_space(
398 return 1; 398 return 1;
399} 399}
400 400
401STATIC int
402xfs_cmn_err_fsblock_zero(
403 xfs_inode_t *ip,
404 xfs_bmbt_irec_t *imap)
405{
406 xfs_cmn_err(XFS_PTAG_FSBLOCK_ZERO, CE_ALERT, ip->i_mount,
407 "Access to block zero in inode %llu "
408 "start_block: %llx start_off: %llx "
409 "blkcnt: %llx extent-state: %x\n",
410 (unsigned long long)ip->i_ino,
411 (unsigned long long)imap->br_startblock,
412 (unsigned long long)imap->br_startoff,
413 (unsigned long long)imap->br_blockcount,
414 imap->br_state);
415 return EFSCORRUPTED;
416}
417
401int 418int
402xfs_iomap_write_direct( 419xfs_iomap_write_direct(
403 xfs_inode_t *ip, 420 xfs_inode_t *ip,
@@ -536,23 +553,17 @@ xfs_iomap_write_direct(
536 * Copy any maps to caller's array and return any error. 553 * Copy any maps to caller's array and return any error.
537 */ 554 */
538 if (nimaps == 0) { 555 if (nimaps == 0) {
539 error = (ENOSPC); 556 error = ENOSPC;
557 goto error_out;
558 }
559
560 if (unlikely(!imap.br_startblock && !(io->io_flags & XFS_IOCORE_RT))) {
561 error = xfs_cmn_err_fsblock_zero(ip, &imap);
540 goto error_out; 562 goto error_out;
541 } 563 }
542 564
543 *ret_imap = imap; 565 *ret_imap = imap;
544 *nmaps = 1; 566 *nmaps = 1;
545 if ( !(io->io_flags & XFS_IOCORE_RT) && !ret_imap->br_startblock) {
546 cmn_err(CE_PANIC,"Access to block zero: fs <%s> inode: %lld "
547 "start_block : %llx start_off : %llx blkcnt : %llx "
548 "extent-state : %x \n",
549 (ip->i_mount)->m_fsname,
550 (long long)ip->i_ino,
551 (unsigned long long)ret_imap->br_startblock,
552 (unsigned long long)ret_imap->br_startoff,
553 (unsigned long long)ret_imap->br_blockcount,
554 ret_imap->br_state);
555 }
556 return 0; 567 return 0;
557 568
558error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ 569error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
@@ -715,17 +726,8 @@ retry:
715 goto retry; 726 goto retry;
716 } 727 }
717 728
718 if (!(io->io_flags & XFS_IOCORE_RT) && !ret_imap->br_startblock) { 729 if (unlikely(!imap[0].br_startblock && !(io->io_flags & XFS_IOCORE_RT)))
719 cmn_err(CE_PANIC,"Access to block zero: fs <%s> inode: %lld " 730 return xfs_cmn_err_fsblock_zero(ip, &imap[0]);
720 "start_block : %llx start_off : %llx blkcnt : %llx "
721 "extent-state : %x \n",
722 (ip->i_mount)->m_fsname,
723 (long long)ip->i_ino,
724 (unsigned long long)ret_imap->br_startblock,
725 (unsigned long long)ret_imap->br_startoff,
726 (unsigned long long)ret_imap->br_blockcount,
727 ret_imap->br_state);
728 }
729 731
730 *ret_imap = imap[0]; 732 *ret_imap = imap[0];
731 *nmaps = 1; 733 *nmaps = 1;
@@ -853,24 +855,10 @@ xfs_iomap_write_allocate(
853 * See if we were able to allocate an extent that 855 * See if we were able to allocate an extent that
854 * covers at least part of the callers request 856 * covers at least part of the callers request
855 */ 857 */
856
857 for (i = 0; i < nimaps; i++) { 858 for (i = 0; i < nimaps; i++) {
858 if (!(io->io_flags & XFS_IOCORE_RT) && 859 if (unlikely(!imap[i].br_startblock &&
859 !imap[i].br_startblock) { 860 !(io->io_flags & XFS_IOCORE_RT)))
860 cmn_err(CE_PANIC,"Access to block zero: " 861 return xfs_cmn_err_fsblock_zero(ip, &imap[i]);
861 "fs <%s> inode: %lld "
862 "start_block : %llx start_off : %llx "
863 "blkcnt : %llx extent-state : %x \n",
864 (ip->i_mount)->m_fsname,
865 (long long)ip->i_ino,
866 (unsigned long long)
867 imap[i].br_startblock,
868 (unsigned long long)
869 imap[i].br_startoff,
870 (unsigned long long)
871 imap[i].br_blockcount,
872 imap[i].br_state);
873 }
874 if ((offset_fsb >= imap[i].br_startoff) && 862 if ((offset_fsb >= imap[i].br_startoff) &&
875 (offset_fsb < (imap[i].br_startoff + 863 (offset_fsb < (imap[i].br_startoff +
876 imap[i].br_blockcount))) { 864 imap[i].br_blockcount))) {
@@ -941,7 +929,7 @@ xfs_iomap_write_unwritten(
941 XFS_WRITE_LOG_COUNT); 929 XFS_WRITE_LOG_COUNT);
942 if (error) { 930 if (error) {
943 xfs_trans_cancel(tp, 0); 931 xfs_trans_cancel(tp, 0);
944 goto error0; 932 return XFS_ERROR(error);
945 } 933 }
946 934
947 xfs_ilock(ip, XFS_ILOCK_EXCL); 935 xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -967,19 +955,11 @@ xfs_iomap_write_unwritten(
967 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 955 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
968 xfs_iunlock(ip, XFS_ILOCK_EXCL); 956 xfs_iunlock(ip, XFS_ILOCK_EXCL);
969 if (error) 957 if (error)
970 goto error0; 958 return XFS_ERROR(error);
971 959
972 if ( !(io->io_flags & XFS_IOCORE_RT) && !imap.br_startblock) { 960 if (unlikely(!imap.br_startblock &&
973 cmn_err(CE_PANIC,"Access to block zero: fs <%s> " 961 !(io->io_flags & XFS_IOCORE_RT)))
974 "inode: %lld start_block : %llx start_off : " 962 return xfs_cmn_err_fsblock_zero(ip, &imap);
975 "%llx blkcnt : %llx extent-state : %x \n",
976 (ip->i_mount)->m_fsname,
977 (long long)ip->i_ino,
978 (unsigned long long)imap.br_startblock,
979 (unsigned long long)imap.br_startoff,
980 (unsigned long long)imap.br_blockcount,
981 imap.br_state);
982 }
983 963
984 if ((numblks_fsb = imap.br_blockcount) == 0) { 964 if ((numblks_fsb = imap.br_blockcount) == 0) {
985 /* 965 /*
@@ -999,6 +979,5 @@ error_on_bmapi_transaction:
999 xfs_bmap_cancel(&free_list); 979 xfs_bmap_cancel(&free_list);
1000 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT)); 980 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT));
1001 xfs_iunlock(ip, XFS_ILOCK_EXCL); 981 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1002error0:
1003 return XFS_ERROR(error); 982 return XFS_ERROR(error);
1004} 983}
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 46249e4d1fea..7775ddc0b3c6 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -39,6 +39,16 @@
39#include "xfs_error.h" 39#include "xfs_error.h"
40#include "xfs_btree.h" 40#include "xfs_btree.h"
41 41
42int
43xfs_internal_inum(
44 xfs_mount_t *mp,
45 xfs_ino_t ino)
46{
47 return (ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino ||
48 (XFS_SB_VERSION_HASQUOTA(&mp->m_sb) &&
49 (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino)));
50}
51
42STATIC int 52STATIC int
43xfs_bulkstat_one_iget( 53xfs_bulkstat_one_iget(
44 xfs_mount_t *mp, /* mount point for filesystem */ 54 xfs_mount_t *mp, /* mount point for filesystem */
@@ -52,7 +62,8 @@ xfs_bulkstat_one_iget(
52 bhv_vnode_t *vp; 62 bhv_vnode_t *vp;
53 int error; 63 int error;
54 64
55 error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, bno); 65 error = xfs_iget(mp, NULL, ino,
66 XFS_IGET_BULKSTAT, XFS_ILOCK_SHARED, &ip, bno);
56 if (error) { 67 if (error) {
57 *stat = BULKSTAT_RV_NOTHING; 68 *stat = BULKSTAT_RV_NOTHING;
58 return error; 69 return error;
@@ -212,17 +223,12 @@ xfs_bulkstat_one(
212 xfs_dinode_t *dip; /* dinode inode pointer */ 223 xfs_dinode_t *dip; /* dinode inode pointer */
213 224
214 dip = (xfs_dinode_t *)dibuff; 225 dip = (xfs_dinode_t *)dibuff;
226 *stat = BULKSTAT_RV_NOTHING;
215 227
216 if (!buffer || ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino || 228 if (!buffer || xfs_internal_inum(mp, ino))
217 (XFS_SB_VERSION_HASQUOTA(&mp->m_sb) &&
218 (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino))) {
219 *stat = BULKSTAT_RV_NOTHING;
220 return XFS_ERROR(EINVAL); 229 return XFS_ERROR(EINVAL);
221 } 230 if (ubsize < sizeof(*buf))
222 if (ubsize < sizeof(*buf)) {
223 *stat = BULKSTAT_RV_NOTHING;
224 return XFS_ERROR(ENOMEM); 231 return XFS_ERROR(ENOMEM);
225 }
226 232
227 buf = kmem_alloc(sizeof(*buf), KM_SLEEP); 233 buf = kmem_alloc(sizeof(*buf), KM_SLEEP);
228 234
@@ -238,8 +244,7 @@ xfs_bulkstat_one(
238 } 244 }
239 245
240 if (copy_to_user(buffer, buf, sizeof(*buf))) { 246 if (copy_to_user(buffer, buf, sizeof(*buf))) {
241 *stat = BULKSTAT_RV_NOTHING; 247 error = EFAULT;
242 error = EFAULT;
243 goto out_free; 248 goto out_free;
244 } 249 }
245 250
@@ -253,6 +258,46 @@ xfs_bulkstat_one(
253} 258}
254 259
255/* 260/*
261 * Test to see whether we can use the ondisk inode directly, based
262 * on the given bulkstat flags, filling in dipp accordingly.
263 * Returns zero if the inode is dodgey.
264 */
265STATIC int
266xfs_bulkstat_use_dinode(
267 xfs_mount_t *mp,
268 int flags,
269 xfs_buf_t *bp,
270 int clustidx,
271 xfs_dinode_t **dipp)
272{
273 xfs_dinode_t *dip;
274 unsigned int aformat;
275
276 *dipp = NULL;
277 if (!bp || (flags & BULKSTAT_FG_IGET))
278 return 1;
279 dip = (xfs_dinode_t *)
280 xfs_buf_offset(bp, clustidx << mp->m_sb.sb_inodelog);
281 if (INT_GET(dip->di_core.di_magic, ARCH_CONVERT) != XFS_DINODE_MAGIC ||
282 !XFS_DINODE_GOOD_VERSION(
283 INT_GET(dip->di_core.di_version, ARCH_CONVERT)))
284 return 0;
285 if (flags & BULKSTAT_FG_QUICK) {
286 *dipp = dip;
287 return 1;
288 }
289 /* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */
290 aformat = INT_GET(dip->di_core.di_aformat, ARCH_CONVERT);
291 if ((XFS_CFORK_Q(&dip->di_core) == 0) ||
292 (aformat == XFS_DINODE_FMT_LOCAL) ||
293 (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_core.di_anextents)) {
294 *dipp = dip;
295 return 1;
296 }
297 return 1;
298}
299
300/*
256 * Return stat information in bulk (by-inode) for the filesystem. 301 * Return stat information in bulk (by-inode) for the filesystem.
257 */ 302 */
258int /* error status */ 303int /* error status */
@@ -284,10 +329,11 @@ xfs_bulkstat(
284 xfs_agino_t gino; /* current btree rec's start inode */ 329 xfs_agino_t gino; /* current btree rec's start inode */
285 int i; /* loop index */ 330 int i; /* loop index */
286 int icount; /* count of inodes good in irbuf */ 331 int icount; /* count of inodes good in irbuf */
332 size_t irbsize; /* size of irec buffer in bytes */
287 xfs_ino_t ino; /* inode number (filesystem) */ 333 xfs_ino_t ino; /* inode number (filesystem) */
288 xfs_inobt_rec_t *irbp; /* current irec buffer pointer */ 334 xfs_inobt_rec_incore_t *irbp; /* current irec buffer pointer */
289 xfs_inobt_rec_t *irbuf; /* start of irec buffer */ 335 xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */
290 xfs_inobt_rec_t *irbufend; /* end of good irec buffer entries */ 336 xfs_inobt_rec_incore_t *irbufend; /* end of good irec buffer entries */
291 xfs_ino_t lastino=0; /* last inode number returned */ 337 xfs_ino_t lastino=0; /* last inode number returned */
292 int nbcluster; /* # of blocks in a cluster */ 338 int nbcluster; /* # of blocks in a cluster */
293 int nicluster; /* # of inodes in a cluster */ 339 int nicluster; /* # of inodes in a cluster */
@@ -328,13 +374,10 @@ xfs_bulkstat(
328 (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog); 374 (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog);
329 nimask = ~(nicluster - 1); 375 nimask = ~(nicluster - 1);
330 nbcluster = nicluster >> mp->m_sb.sb_inopblog; 376 nbcluster = nicluster >> mp->m_sb.sb_inopblog;
331 /* 377 irbuf = kmem_zalloc_greedy(&irbsize, NBPC, NBPC * 4,
332 * Allocate a page-sized buffer for inode btree records. 378 KM_SLEEP | KM_MAYFAIL | KM_LARGE);
333 * We could try allocating something smaller, but for normal 379 nirbuf = irbsize / sizeof(*irbuf);
334 * calls we'll always (potentially) need the whole page. 380
335 */
336 irbuf = kmem_alloc(NBPC, KM_SLEEP);
337 nirbuf = NBPC / sizeof(*irbuf);
338 /* 381 /*
339 * Loop over the allocation groups, starting from the last 382 * Loop over the allocation groups, starting from the last
340 * inode returned; 0 means start of the allocation group. 383 * inode returned; 0 means start of the allocation group.
@@ -358,7 +401,7 @@ xfs_bulkstat(
358 * Allocate and initialize a btree cursor for ialloc btree. 401 * Allocate and initialize a btree cursor for ialloc btree.
359 */ 402 */
360 cur = xfs_btree_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_INO, 403 cur = xfs_btree_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_INO,
361 (xfs_inode_t *)0, 0); 404 (xfs_inode_t *)0, 0);
362 irbp = irbuf; 405 irbp = irbuf;
363 irbufend = irbuf + nirbuf; 406 irbufend = irbuf + nirbuf;
364 end_of_ag = 0; 407 end_of_ag = 0;
@@ -395,9 +438,9 @@ xfs_bulkstat(
395 gcnt++; 438 gcnt++;
396 } 439 }
397 gfree |= XFS_INOBT_MASKN(0, chunkidx); 440 gfree |= XFS_INOBT_MASKN(0, chunkidx);
398 INT_SET(irbp->ir_startino, ARCH_CONVERT, gino); 441 irbp->ir_startino = gino;
399 INT_SET(irbp->ir_freecount, ARCH_CONVERT, gcnt); 442 irbp->ir_freecount = gcnt;
400 INT_SET(irbp->ir_free, ARCH_CONVERT, gfree); 443 irbp->ir_free = gfree;
401 irbp++; 444 irbp++;
402 agino = gino + XFS_INODES_PER_CHUNK; 445 agino = gino + XFS_INODES_PER_CHUNK;
403 icount = XFS_INODES_PER_CHUNK - gcnt; 446 icount = XFS_INODES_PER_CHUNK - gcnt;
@@ -451,11 +494,27 @@ xfs_bulkstat(
451 } 494 }
452 /* 495 /*
453 * If this chunk has any allocated inodes, save it. 496 * If this chunk has any allocated inodes, save it.
497 * Also start read-ahead now for this chunk.
454 */ 498 */
455 if (gcnt < XFS_INODES_PER_CHUNK) { 499 if (gcnt < XFS_INODES_PER_CHUNK) {
456 INT_SET(irbp->ir_startino, ARCH_CONVERT, gino); 500 /*
457 INT_SET(irbp->ir_freecount, ARCH_CONVERT, gcnt); 501 * Loop over all clusters in the next chunk.
458 INT_SET(irbp->ir_free, ARCH_CONVERT, gfree); 502 * Do a readahead if there are any allocated
503 * inodes in that cluster.
504 */
505 for (agbno = XFS_AGINO_TO_AGBNO(mp, gino),
506 chunkidx = 0;
507 chunkidx < XFS_INODES_PER_CHUNK;
508 chunkidx += nicluster,
509 agbno += nbcluster) {
510 if (XFS_INOBT_MASKN(chunkidx,
511 nicluster) & ~gfree)
512 xfs_btree_reada_bufs(mp, agno,
513 agbno, nbcluster);
514 }
515 irbp->ir_startino = gino;
516 irbp->ir_freecount = gcnt;
517 irbp->ir_free = gfree;
459 irbp++; 518 irbp++;
460 icount += XFS_INODES_PER_CHUNK - gcnt; 519 icount += XFS_INODES_PER_CHUNK - gcnt;
461 } 520 }
@@ -479,33 +538,11 @@ xfs_bulkstat(
479 for (irbp = irbuf; 538 for (irbp = irbuf;
480 irbp < irbufend && ubleft >= statstruct_size; irbp++) { 539 irbp < irbufend && ubleft >= statstruct_size; irbp++) {
481 /* 540 /*
482 * Read-ahead the next chunk's worth of inodes.
483 */
484 if (&irbp[1] < irbufend) {
485 /*
486 * Loop over all clusters in the next chunk.
487 * Do a readahead if there are any allocated
488 * inodes in that cluster.
489 */
490 for (agbno = XFS_AGINO_TO_AGBNO(mp,
491 INT_GET(irbp[1].ir_startino, ARCH_CONVERT)),
492 chunkidx = 0;
493 chunkidx < XFS_INODES_PER_CHUNK;
494 chunkidx += nicluster,
495 agbno += nbcluster) {
496 if (XFS_INOBT_MASKN(chunkidx,
497 nicluster) &
498 ~(INT_GET(irbp[1].ir_free, ARCH_CONVERT)))
499 xfs_btree_reada_bufs(mp, agno,
500 agbno, nbcluster);
501 }
502 }
503 /*
504 * Now process this chunk of inodes. 541 * Now process this chunk of inodes.
505 */ 542 */
506 for (agino = INT_GET(irbp->ir_startino, ARCH_CONVERT), chunkidx = 0, clustidx = 0; 543 for (agino = irbp->ir_startino, chunkidx = clustidx = 0;
507 ubleft > 0 && 544 ubleft > 0 &&
508 INT_GET(irbp->ir_freecount, ARCH_CONVERT) < XFS_INODES_PER_CHUNK; 545 irbp->ir_freecount < XFS_INODES_PER_CHUNK;
509 chunkidx++, clustidx++, agino++) { 546 chunkidx++, clustidx++, agino++) {
510 ASSERT(chunkidx < XFS_INODES_PER_CHUNK); 547 ASSERT(chunkidx < XFS_INODES_PER_CHUNK);
511 /* 548 /*
@@ -525,11 +562,12 @@ xfs_bulkstat(
525 */ 562 */
526 if ((chunkidx & (nicluster - 1)) == 0) { 563 if ((chunkidx & (nicluster - 1)) == 0) {
527 agbno = XFS_AGINO_TO_AGBNO(mp, 564 agbno = XFS_AGINO_TO_AGBNO(mp,
528 INT_GET(irbp->ir_startino, ARCH_CONVERT)) + 565 irbp->ir_startino) +
529 ((chunkidx & nimask) >> 566 ((chunkidx & nimask) >>
530 mp->m_sb.sb_inopblog); 567 mp->m_sb.sb_inopblog);
531 568
532 if (flags & BULKSTAT_FG_QUICK) { 569 if (flags & (BULKSTAT_FG_QUICK |
570 BULKSTAT_FG_INLINE)) {
533 ino = XFS_AGINO_TO_INO(mp, agno, 571 ino = XFS_AGINO_TO_INO(mp, agno,
534 agino); 572 agino);
535 bno = XFS_AGB_TO_DADDR(mp, agno, 573 bno = XFS_AGB_TO_DADDR(mp, agno,
@@ -543,6 +581,7 @@ xfs_bulkstat(
543 KM_SLEEP); 581 KM_SLEEP);
544 ip->i_ino = ino; 582 ip->i_ino = ino;
545 ip->i_mount = mp; 583 ip->i_mount = mp;
584 spin_lock_init(&ip->i_flags_lock);
546 if (bp) 585 if (bp)
547 xfs_buf_relse(bp); 586 xfs_buf_relse(bp);
548 error = xfs_itobp(mp, NULL, ip, 587 error = xfs_itobp(mp, NULL, ip,
@@ -564,30 +603,34 @@ xfs_bulkstat(
564 /* 603 /*
565 * Skip if this inode is free. 604 * Skip if this inode is free.
566 */ 605 */
567 if (XFS_INOBT_MASK(chunkidx) & INT_GET(irbp->ir_free, ARCH_CONVERT)) 606 if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free)
568 continue; 607 continue;
569 /* 608 /*
570 * Count used inodes as free so we can tell 609 * Count used inodes as free so we can tell
571 * when the chunk is used up. 610 * when the chunk is used up.
572 */ 611 */
573 INT_MOD(irbp->ir_freecount, ARCH_CONVERT, +1); 612 irbp->ir_freecount++;
574 ino = XFS_AGINO_TO_INO(mp, agno, agino); 613 ino = XFS_AGINO_TO_INO(mp, agno, agino);
575 bno = XFS_AGB_TO_DADDR(mp, agno, agbno); 614 bno = XFS_AGB_TO_DADDR(mp, agno, agbno);
576 if (flags & BULKSTAT_FG_QUICK) { 615 if (!xfs_bulkstat_use_dinode(mp, flags, bp,
577 dip = (xfs_dinode_t *)xfs_buf_offset(bp, 616 clustidx, &dip))
578 (clustidx << mp->m_sb.sb_inodelog)); 617 continue;
579 618 /*
580 if (INT_GET(dip->di_core.di_magic, ARCH_CONVERT) 619 * If we need to do an iget, cannot hold bp.
581 != XFS_DINODE_MAGIC 620 * Drop it, until starting the next cluster.
582 || !XFS_DINODE_GOOD_VERSION( 621 */
583 INT_GET(dip->di_core.di_version, ARCH_CONVERT))) 622 if ((flags & BULKSTAT_FG_INLINE) && !dip) {
584 continue; 623 if (bp)
624 xfs_buf_relse(bp);
625 bp = NULL;
585 } 626 }
586 627
587 /* 628 /*
588 * Get the inode and fill in a single buffer. 629 * Get the inode and fill in a single buffer.
589 * BULKSTAT_FG_QUICK uses dip to fill it in. 630 * BULKSTAT_FG_QUICK uses dip to fill it in.
590 * BULKSTAT_FG_IGET uses igets. 631 * BULKSTAT_FG_IGET uses igets.
632 * BULKSTAT_FG_INLINE uses dip if we have an
633 * inline attr fork, else igets.
591 * See: xfs_bulkstat_one & xfs_dm_bulkstat_one. 634 * See: xfs_bulkstat_one & xfs_dm_bulkstat_one.
592 * This is also used to count inodes/blks, etc 635 * This is also used to count inodes/blks, etc
593 * in xfs_qm_quotacheck. 636 * in xfs_qm_quotacheck.
@@ -597,8 +640,15 @@ xfs_bulkstat(
597 ubleft, private_data, 640 ubleft, private_data,
598 bno, &ubused, dip, &fmterror); 641 bno, &ubused, dip, &fmterror);
599 if (fmterror == BULKSTAT_RV_NOTHING) { 642 if (fmterror == BULKSTAT_RV_NOTHING) {
600 if (error == ENOMEM) 643 if (error == EFAULT) {
644 ubleft = 0;
645 rval = error;
646 break;
647 }
648 else if (error == ENOMEM)
601 ubleft = 0; 649 ubleft = 0;
650 else
651 lastino = ino;
602 continue; 652 continue;
603 } 653 }
604 if (fmterror == BULKSTAT_RV_GIVEUP) { 654 if (fmterror == BULKSTAT_RV_GIVEUP) {
@@ -633,7 +683,7 @@ xfs_bulkstat(
633 /* 683 /*
634 * Done, we're either out of filesystem or space to put the data. 684 * Done, we're either out of filesystem or space to put the data.
635 */ 685 */
636 kmem_free(irbuf, NBPC); 686 kmem_free(irbuf, irbsize);
637 *ubcountp = ubelem; 687 *ubcountp = ubelem;
638 if (agno >= mp->m_sb.sb_agcount) { 688 if (agno >= mp->m_sb.sb_agcount) {
639 /* 689 /*
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index be5f12e07d22..f25a28862a17 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -36,15 +36,16 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp,
36/* 36/*
37 * Values for stat return value. 37 * Values for stat return value.
38 */ 38 */
39#define BULKSTAT_RV_NOTHING 0 39#define BULKSTAT_RV_NOTHING 0
40#define BULKSTAT_RV_DIDONE 1 40#define BULKSTAT_RV_DIDONE 1
41#define BULKSTAT_RV_GIVEUP 2 41#define BULKSTAT_RV_GIVEUP 2
42 42
43/* 43/*
44 * Values for bulkstat flag argument. 44 * Values for bulkstat flag argument.
45 */ 45 */
46#define BULKSTAT_FG_IGET 0x1 /* Go through the buffer cache */ 46#define BULKSTAT_FG_IGET 0x1 /* Go through the buffer cache */
47#define BULKSTAT_FG_QUICK 0x2 /* No iget, walk the dinode cluster */ 47#define BULKSTAT_FG_QUICK 0x2 /* No iget, walk the dinode cluster */
48#define BULKSTAT_FG_INLINE 0x4 /* No iget if inline attrs */
48 49
49/* 50/*
50 * Return stat information in bulk (by-inode) for the filesystem. 51 * Return stat information in bulk (by-inode) for the filesystem.
@@ -80,6 +81,11 @@ xfs_bulkstat_one(
80 void *dibuff, 81 void *dibuff,
81 int *stat); 82 int *stat);
82 83
84int
85xfs_internal_inum(
86 xfs_mount_t *mp,
87 xfs_ino_t ino);
88
83int /* error status */ 89int /* error status */
84xfs_inumbers( 90xfs_inumbers(
85 xfs_mount_t *mp, /* mount point for filesystem */ 91 xfs_mount_t *mp, /* mount point for filesystem */
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 21ac1a67e3e0..c48bf61f17bd 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -617,7 +617,8 @@ xfs_log_unmount_write(xfs_mount_t *mp)
617 reg[0].i_len = sizeof(magic); 617 reg[0].i_len = sizeof(magic);
618 XLOG_VEC_SET_TYPE(&reg[0], XLOG_REG_TYPE_UNMOUNT); 618 XLOG_VEC_SET_TYPE(&reg[0], XLOG_REG_TYPE_UNMOUNT);
619 619
620 error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0, 0); 620 error = xfs_log_reserve(mp, 600, 1, &tic,
621 XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE);
621 if (!error) { 622 if (!error) {
622 /* remove inited flag */ 623 /* remove inited flag */
623 ((xlog_ticket_t *)tic)->t_flags = 0; 624 ((xlog_ticket_t *)tic)->t_flags = 0;
@@ -655,8 +656,11 @@ xfs_log_unmount_write(xfs_mount_t *mp)
655 } else { 656 } else {
656 LOG_UNLOCK(log, s); 657 LOG_UNLOCK(log, s);
657 } 658 }
658 if (tic) 659 if (tic) {
660 xlog_trace_loggrant(log, tic, "unmount rec");
661 xlog_ungrant_log_space(log, tic);
659 xlog_state_put_ticket(log, tic); 662 xlog_state_put_ticket(log, tic);
663 }
660 } else { 664 } else {
661 /* 665 /*
662 * We're already in forced_shutdown mode, couldn't 666 * We're already in forced_shutdown mode, couldn't
@@ -1196,7 +1200,7 @@ xlog_alloc_log(xfs_mount_t *mp,
1196 kmem_zalloc(sizeof(xlog_in_core_t), KM_SLEEP); 1200 kmem_zalloc(sizeof(xlog_in_core_t), KM_SLEEP);
1197 iclog = *iclogp; 1201 iclog = *iclogp;
1198 iclog->hic_data = (xlog_in_core_2_t *) 1202 iclog->hic_data = (xlog_in_core_2_t *)
1199 kmem_zalloc(iclogsize, KM_SLEEP); 1203 kmem_zalloc(iclogsize, KM_SLEEP | KM_LARGE);
1200 1204
1201 iclog->ic_prev = prev_iclog; 1205 iclog->ic_prev = prev_iclog;
1202 prev_iclog = iclog; 1206 prev_iclog = iclog;
@@ -2212,9 +2216,13 @@ xlog_state_do_callback(
2212 2216
2213 iclog = iclog->ic_next; 2217 iclog = iclog->ic_next;
2214 } while (first_iclog != iclog); 2218 } while (first_iclog != iclog);
2215 if (repeats && (repeats % 10) == 0) { 2219
2220 if (repeats > 5000) {
2221 flushcnt += repeats;
2222 repeats = 0;
2216 xfs_fs_cmn_err(CE_WARN, log->l_mp, 2223 xfs_fs_cmn_err(CE_WARN, log->l_mp,
2217 "xlog_state_do_callback: looping %d", repeats); 2224 "%s: possible infinite loop (%d iterations)",
2225 __FUNCTION__, flushcnt);
2218 } 2226 }
2219 } while (!ioerrors && loopdidcallbacks); 2227 } while (!ioerrors && loopdidcallbacks);
2220 2228
@@ -2246,6 +2254,7 @@ xlog_state_do_callback(
2246 } 2254 }
2247#endif 2255#endif
2248 2256
2257 flushcnt = 0;
2249 if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR)) { 2258 if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR)) {
2250 flushcnt = log->l_flushcnt; 2259 flushcnt = log->l_flushcnt;
2251 log->l_flushcnt = 0; 2260 log->l_flushcnt = 0;
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index eacb3d4987f2..ebbe93f4f97b 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -48,16 +48,10 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
48 */ 48 */
49 49
50/* 50/*
51 * Flags to xfs_log_mount
52 */
53#define XFS_LOG_RECOVER 0x1
54
55/*
56 * Flags to xfs_log_done() 51 * Flags to xfs_log_done()
57 */ 52 */
58#define XFS_LOG_REL_PERM_RESERV 0x1 53#define XFS_LOG_REL_PERM_RESERV 0x1
59 54
60
61/* 55/*
62 * Flags to xfs_log_reserve() 56 * Flags to xfs_log_reserve()
63 * 57 *
@@ -70,8 +64,6 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
70#define XFS_LOG_SLEEP 0x0 64#define XFS_LOG_SLEEP 0x0
71#define XFS_LOG_NOSLEEP 0x1 65#define XFS_LOG_NOSLEEP 0x1
72#define XFS_LOG_PERM_RESERV 0x2 66#define XFS_LOG_PERM_RESERV 0x2
73#define XFS_LOG_RESV_ALL (XFS_LOG_NOSLEEP|XFS_LOG_PERM_RESERV)
74
75 67
76/* 68/*
77 * Flags to xfs_log_force() 69 * Flags to xfs_log_force()
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 34bcbf50789c..9bd3cdf11a87 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -32,7 +32,6 @@ struct xfs_mount;
32#define XLOG_MIN_ICLOGS 2 32#define XLOG_MIN_ICLOGS 2
33#define XLOG_MED_ICLOGS 4 33#define XLOG_MED_ICLOGS 4
34#define XLOG_MAX_ICLOGS 8 34#define XLOG_MAX_ICLOGS 8
35#define XLOG_CALLBACK_SIZE 10
36#define XLOG_HEADER_MAGIC_NUM 0xFEEDbabe /* Invalid cycle number */ 35#define XLOG_HEADER_MAGIC_NUM 0xFEEDbabe /* Invalid cycle number */
37#define XLOG_VERSION_1 1 36#define XLOG_VERSION_1 1
38#define XLOG_VERSION_2 2 /* Large IClogs, Log sunit */ 37#define XLOG_VERSION_2 2 /* Large IClogs, Log sunit */
@@ -149,9 +148,6 @@ struct xfs_mount;
149#define XLOG_WAS_CONT_TRANS 0x08 /* Cont this trans into new region */ 148#define XLOG_WAS_CONT_TRANS 0x08 /* Cont this trans into new region */
150#define XLOG_END_TRANS 0x10 /* End a continued transaction */ 149#define XLOG_END_TRANS 0x10 /* End a continued transaction */
151#define XLOG_UNMOUNT_TRANS 0x20 /* Unmount a filesystem transaction */ 150#define XLOG_UNMOUNT_TRANS 0x20 /* Unmount a filesystem transaction */
152#define XLOG_SKIP_TRANS (XLOG_COMMIT_TRANS | XLOG_CONTINUE_TRANS | \
153 XLOG_WAS_CONT_TRANS | XLOG_END_TRANS | \
154 XLOG_UNMOUNT_TRANS)
155 151
156#ifdef __KERNEL__ 152#ifdef __KERNEL__
157/* 153/*
@@ -506,6 +502,12 @@ extern int xlog_bread(xlog_t *, xfs_daddr_t, int, struct xfs_buf *);
506#define XLOG_TRACE_SLEEP_FLUSH 3 502#define XLOG_TRACE_SLEEP_FLUSH 3
507#define XLOG_TRACE_WAKE_FLUSH 4 503#define XLOG_TRACE_WAKE_FLUSH 4
508 504
505/*
506 * Unmount record type is used as a pseudo transaction type for the ticket.
507 * It's value must be outside the range of XFS_TRANS_* values.
508 */
509#define XLOG_UNMOUNT_REC_TYPE (-1U)
510
509#endif /* __KERNEL__ */ 511#endif /* __KERNEL__ */
510 512
511#endif /* __XFS_LOG_PRIV_H__ */ 513#endif /* __XFS_LOG_PRIV_H__ */
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index b2bd4be4200a..e5f396ff9a3d 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -331,7 +331,7 @@ typedef struct xfs_mount {
331 xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */ 331 xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */
332 lock_t m_agirotor_lock;/* .. and lock protecting it */ 332 lock_t m_agirotor_lock;/* .. and lock protecting it */
333 xfs_agnumber_t m_maxagi; /* highest inode alloc group */ 333 xfs_agnumber_t m_maxagi; /* highest inode alloc group */
334 uint m_ihsize; /* size of next field */ 334 size_t m_ihsize; /* size of next field */
335 struct xfs_ihash *m_ihash; /* fs private inode hash table*/ 335 struct xfs_ihash *m_ihash; /* fs private inode hash table*/
336 struct xfs_inode *m_inodes; /* active inode list */ 336 struct xfs_inode *m_inodes; /* active inode list */
337 struct list_head m_del_inodes; /* inodes to reclaim */ 337 struct list_head m_del_inodes; /* inodes to reclaim */
@@ -541,7 +541,8 @@ static inline xfs_mount_t *xfs_bhvtom(bhv_desc_t *bdp)
541#define XFS_VFSTOM(vfs) xfs_vfstom(vfs) 541#define XFS_VFSTOM(vfs) xfs_vfstom(vfs)
542static inline xfs_mount_t *xfs_vfstom(bhv_vfs_t *vfs) 542static inline xfs_mount_t *xfs_vfstom(bhv_vfs_t *vfs)
543{ 543{
544 return XFS_BHVTOM(bhv_lookup(VFS_BHVHEAD(vfs), &xfs_vfsops)); 544 return XFS_BHVTOM(bhv_lookup_range(VFS_BHVHEAD(vfs),
545 VFS_POSITION_XFS, VFS_POSITION_XFS));
545} 546}
546 547
547#define XFS_DADDR_TO_AGNO(mp,d) xfs_daddr_to_agno(mp,d) 548#define XFS_DADDR_TO_AGNO(mp,d) xfs_daddr_to_agno(mp,d)
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index acb853b33ebb..9dcb32aa4e2e 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -281,8 +281,6 @@ typedef struct xfs_qoff_logformat {
281 XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\ 281 XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\
282 XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD|\ 282 XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD|\
283 XFS_GQUOTA_ACCT) 283 XFS_GQUOTA_ACCT)
284#define XFS_MOUNT_QUOTA_MASK (XFS_MOUNT_QUOTA_ALL | XFS_UQUOTA_ACTIVE | \
285 XFS_GQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE)
286 284
287 285
288/* 286/*
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 5a0b678956e0..880c73271c05 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1948,7 +1948,7 @@ xfs_growfs_rt(
1948 */ 1948 */
1949 nrextents = nrblocks; 1949 nrextents = nrblocks;
1950 do_div(nrextents, in->extsize); 1950 do_div(nrextents, in->extsize);
1951 nrbmblocks = roundup_64(nrextents, NBBY * sbp->sb_blocksize); 1951 nrbmblocks = howmany_64(nrextents, NBBY * sbp->sb_blocksize);
1952 nrextslog = xfs_highbit32(nrextents); 1952 nrextslog = xfs_highbit32(nrextents);
1953 nrsumlevels = nrextslog + 1; 1953 nrsumlevels = nrextslog + 1;
1954 nrsumsize = (uint)sizeof(xfs_suminfo_t) * nrsumlevels * nrbmblocks; 1954 nrsumsize = (uint)sizeof(xfs_suminfo_t) * nrsumlevels * nrbmblocks;
@@ -1976,7 +1976,10 @@ xfs_growfs_rt(
1976 if ((error = xfs_growfs_rt_alloc(mp, rsumblocks, nrsumblocks, 1976 if ((error = xfs_growfs_rt_alloc(mp, rsumblocks, nrsumblocks,
1977 mp->m_sb.sb_rsumino))) 1977 mp->m_sb.sb_rsumino)))
1978 return error; 1978 return error;
1979 nmp = NULL; 1979 /*
1980 * Allocate a new (fake) mount/sb.
1981 */
1982 nmp = kmem_alloc(sizeof(*nmp), KM_SLEEP);
1980 /* 1983 /*
1981 * Loop over the bitmap blocks. 1984 * Loop over the bitmap blocks.
1982 * We will do everything one bitmap block at a time. 1985 * We will do everything one bitmap block at a time.
@@ -1987,10 +1990,6 @@ xfs_growfs_rt(
1987 ((sbp->sb_rextents & ((1 << mp->m_blkbit_log) - 1)) != 0); 1990 ((sbp->sb_rextents & ((1 << mp->m_blkbit_log) - 1)) != 0);
1988 bmbno < nrbmblocks; 1991 bmbno < nrbmblocks;
1989 bmbno++) { 1992 bmbno++) {
1990 /*
1991 * Allocate a new (fake) mount/sb.
1992 */
1993 nmp = kmem_alloc(sizeof(*nmp), KM_SLEEP);
1994 *nmp = *mp; 1993 *nmp = *mp;
1995 nsbp = &nmp->m_sb; 1994 nsbp = &nmp->m_sb;
1996 /* 1995 /*
@@ -2018,13 +2017,13 @@ xfs_growfs_rt(
2018 cancelflags = 0; 2017 cancelflags = 0;
2019 if ((error = xfs_trans_reserve(tp, 0, 2018 if ((error = xfs_trans_reserve(tp, 0,
2020 XFS_GROWRTFREE_LOG_RES(nmp), 0, 0, 0))) 2019 XFS_GROWRTFREE_LOG_RES(nmp), 0, 0, 0)))
2021 goto error_exit; 2020 break;
2022 /* 2021 /*
2023 * Lock out other callers by grabbing the bitmap inode lock. 2022 * Lock out other callers by grabbing the bitmap inode lock.
2024 */ 2023 */
2025 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, 2024 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0,
2026 XFS_ILOCK_EXCL, &ip))) 2025 XFS_ILOCK_EXCL, &ip)))
2027 goto error_exit; 2026 break;
2028 ASSERT(ip == mp->m_rbmip); 2027 ASSERT(ip == mp->m_rbmip);
2029 /* 2028 /*
2030 * Update the bitmap inode's size. 2029 * Update the bitmap inode's size.
@@ -2038,7 +2037,7 @@ xfs_growfs_rt(
2038 */ 2037 */
2039 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 0, 2038 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 0,
2040 XFS_ILOCK_EXCL, &ip))) 2039 XFS_ILOCK_EXCL, &ip)))
2041 goto error_exit; 2040 break;
2042 ASSERT(ip == mp->m_rsumip); 2041 ASSERT(ip == mp->m_rsumip);
2043 /* 2042 /*
2044 * Update the summary inode's size. 2043 * Update the summary inode's size.
@@ -2053,7 +2052,7 @@ xfs_growfs_rt(
2053 mp->m_rsumlevels != nmp->m_rsumlevels) { 2052 mp->m_rsumlevels != nmp->m_rsumlevels) {
2054 error = xfs_rtcopy_summary(mp, nmp, tp); 2053 error = xfs_rtcopy_summary(mp, nmp, tp);
2055 if (error) 2054 if (error)
2056 goto error_exit; 2055 break;
2057 } 2056 }
2058 /* 2057 /*
2059 * Update superblock fields. 2058 * Update superblock fields.
@@ -2080,18 +2079,13 @@ xfs_growfs_rt(
2080 error = xfs_rtfree_range(nmp, tp, sbp->sb_rextents, 2079 error = xfs_rtfree_range(nmp, tp, sbp->sb_rextents,
2081 nsbp->sb_rextents - sbp->sb_rextents, &bp, &sumbno); 2080 nsbp->sb_rextents - sbp->sb_rextents, &bp, &sumbno);
2082 if (error) 2081 if (error)
2083 goto error_exit; 2082 break;
2084 /* 2083 /*
2085 * Mark more blocks free in the superblock. 2084 * Mark more blocks free in the superblock.
2086 */ 2085 */
2087 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, 2086 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS,
2088 nsbp->sb_rextents - sbp->sb_rextents); 2087 nsbp->sb_rextents - sbp->sb_rextents);
2089 /* 2088 /*
2090 * Free the fake mp structure.
2091 */
2092 kmem_free(nmp, sizeof(*nmp));
2093 nmp = NULL;
2094 /*
2095 * Update mp values into the real mp structure. 2089 * Update mp values into the real mp structure.
2096 */ 2090 */
2097 mp->m_rsumlevels = nrsumlevels; 2091 mp->m_rsumlevels = nrsumlevels;
@@ -2101,15 +2095,15 @@ xfs_growfs_rt(
2101 */ 2095 */
2102 xfs_trans_commit(tp, 0, NULL); 2096 xfs_trans_commit(tp, 0, NULL);
2103 } 2097 }
2104 return 0; 2098
2099 if (error)
2100 xfs_trans_cancel(tp, cancelflags);
2105 2101
2106 /* 2102 /*
2107 * Error paths come here. 2103 * Free the fake mp structure.
2108 */ 2104 */
2109error_exit: 2105 kmem_free(nmp, sizeof(*nmp));
2110 if (nmp) 2106
2111 kmem_free(nmp, sizeof(*nmp));
2112 xfs_trans_cancel(tp, cancelflags);
2113 return error; 2107 return error;
2114} 2108}
2115 2109
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index bf168a91ddb8..467854b45c8f 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -60,10 +60,6 @@ struct xfs_mount;
60 XFS_SB_VERSION_LOGV2BIT | \ 60 XFS_SB_VERSION_LOGV2BIT | \
61 XFS_SB_VERSION_SECTORBIT | \ 61 XFS_SB_VERSION_SECTORBIT | \
62 XFS_SB_VERSION_MOREBITSBIT) 62 XFS_SB_VERSION_MOREBITSBIT)
63#define XFS_SB_VERSION_OKSASHBITS \
64 (XFS_SB_VERSION_NUMBITS | \
65 XFS_SB_VERSION_REALFBITS | \
66 XFS_SB_VERSION_OKSASHFBITS)
67#define XFS_SB_VERSION_OKREALBITS \ 63#define XFS_SB_VERSION_OKREALBITS \
68 (XFS_SB_VERSION_NUMBITS | \ 64 (XFS_SB_VERSION_NUMBITS | \
69 XFS_SB_VERSION_OKREALFBITS | \ 65 XFS_SB_VERSION_OKREALFBITS | \
@@ -81,9 +77,6 @@ struct xfs_mount;
81#define XFS_SB_VERSION2_RESERVED2BIT 0x00000002 77#define XFS_SB_VERSION2_RESERVED2BIT 0x00000002
82#define XFS_SB_VERSION2_RESERVED4BIT 0x00000004 78#define XFS_SB_VERSION2_RESERVED4BIT 0x00000004
83#define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */ 79#define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */
84#define XFS_SB_VERSION2_SASHFBITS 0xff000000 /* Mask: features that
85 require changing
86 PROM and SASH */
87 80
88#define XFS_SB_VERSION2_OKREALFBITS \ 81#define XFS_SB_VERSION2_OKREALFBITS \
89 (XFS_SB_VERSION2_ATTR2BIT) 82 (XFS_SB_VERSION2_ATTR2BIT)
@@ -238,12 +231,6 @@ static inline int xfs_sb_good_version(xfs_sb_t *sbp)
238} 231}
239#endif /* __KERNEL__ */ 232#endif /* __KERNEL__ */
240 233
241#define XFS_SB_GOOD_SASH_VERSION(sbp) \
242 ((((sbp)->sb_versionnum >= XFS_SB_VERSION_1) && \
243 ((sbp)->sb_versionnum <= XFS_SB_VERSION_3)) || \
244 ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
245 !((sbp)->sb_versionnum & ~XFS_SB_VERSION_OKSASHBITS)))
246
247#define XFS_SB_VERSION_TONEW(v) xfs_sb_version_tonew(v) 234#define XFS_SB_VERSION_TONEW(v) xfs_sb_version_tonew(v)
248static inline unsigned xfs_sb_version_tonew(unsigned v) 235static inline unsigned xfs_sb_version_tonew(unsigned v)
249{ 236{
@@ -461,15 +448,6 @@ static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp)
461 * File system sector to basic block conversions. 448 * File system sector to basic block conversions.
462 */ 449 */
463#define XFS_FSS_TO_BB(mp,sec) ((sec) << (mp)->m_sectbb_log) 450#define XFS_FSS_TO_BB(mp,sec) ((sec) << (mp)->m_sectbb_log)
464#define XFS_BB_TO_FSS(mp,bb) \
465 (((bb) + (XFS_FSS_TO_BB(mp,1) - 1)) >> (mp)->m_sectbb_log)
466#define XFS_BB_TO_FSST(mp,bb) ((bb) >> (mp)->m_sectbb_log)
467
468/*
469 * File system sector to byte conversions.
470 */
471#define XFS_FSS_TO_B(mp,sectno) ((xfs_fsize_t)(sectno) << (mp)->m_sb.sb_sectlog)
472#define XFS_B_TO_FSST(mp,b) (((__uint64_t)(b)) >> (mp)->m_sb.sb_sectlog)
473 451
474/* 452/*
475 * File system block to basic block conversions. 453 * File system block to basic block conversions.
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 9dc88b380608..c68e00105d23 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -149,7 +149,6 @@ typedef struct xfs_item_ops {
149 void (*iop_unlock)(xfs_log_item_t *); 149 void (*iop_unlock)(xfs_log_item_t *);
150 xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); 150 xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
151 void (*iop_push)(xfs_log_item_t *); 151 void (*iop_push)(xfs_log_item_t *);
152 void (*iop_abort)(xfs_log_item_t *);
153 void (*iop_pushbuf)(xfs_log_item_t *); 152 void (*iop_pushbuf)(xfs_log_item_t *);
154 void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); 153 void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
155} xfs_item_ops_t; 154} xfs_item_ops_t;
@@ -163,7 +162,6 @@ typedef struct xfs_item_ops {
163#define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip) 162#define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip)
164#define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn) 163#define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn)
165#define IOP_PUSH(ip) (*(ip)->li_ops->iop_push)(ip) 164#define IOP_PUSH(ip) (*(ip)->li_ops->iop_push)(ip)
166#define IOP_ABORT(ip) (*(ip)->li_ops->iop_abort)(ip)
167#define IOP_PUSHBUF(ip) (*(ip)->li_ops->iop_pushbuf)(ip) 165#define IOP_PUSHBUF(ip) (*(ip)->li_ops->iop_pushbuf)(ip)
168#define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn) 166#define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn)
169 167
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 558c87ff0c41..fc39b166d403 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -276,7 +276,7 @@ xfs_trans_update_ail(
276 xfs_mount_t *mp, 276 xfs_mount_t *mp,
277 xfs_log_item_t *lip, 277 xfs_log_item_t *lip,
278 xfs_lsn_t lsn, 278 xfs_lsn_t lsn,
279 unsigned long s) 279 unsigned long s) __releases(mp->m_ail_lock)
280{ 280{
281 xfs_ail_entry_t *ailp; 281 xfs_ail_entry_t *ailp;
282 xfs_log_item_t *dlip=NULL; 282 xfs_log_item_t *dlip=NULL;
@@ -328,7 +328,7 @@ void
328xfs_trans_delete_ail( 328xfs_trans_delete_ail(
329 xfs_mount_t *mp, 329 xfs_mount_t *mp,
330 xfs_log_item_t *lip, 330 xfs_log_item_t *lip,
331 unsigned long s) 331 unsigned long s) __releases(mp->m_ail_lock)
332{ 332{
333 xfs_ail_entry_t *ailp; 333 xfs_ail_entry_t *ailp;
334 xfs_log_item_t *dlip; 334 xfs_log_item_t *dlip;
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 13edab8a9e94..447ac4308c91 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -46,11 +46,13 @@ xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp,
46/* 46/*
47 * From xfs_trans_ail.c 47 * From xfs_trans_ail.c
48 */ 48 */
49void xfs_trans_update_ail(struct xfs_mount *, 49void xfs_trans_update_ail(struct xfs_mount *mp,
50 struct xfs_log_item *, xfs_lsn_t, 50 struct xfs_log_item *lip, xfs_lsn_t lsn,
51 unsigned long); 51 unsigned long s)
52void xfs_trans_delete_ail(struct xfs_mount *, 52 __releases(mp->m_ail_lock);
53 struct xfs_log_item *, unsigned long); 53void xfs_trans_delete_ail(struct xfs_mount *mp,
54 struct xfs_log_item *lip, unsigned long s)
55 __releases(mp->m_ail_lock);
54struct xfs_log_item *xfs_trans_first_ail(struct xfs_mount *, int *); 56struct xfs_log_item *xfs_trans_first_ail(struct xfs_mount *, int *);
55struct xfs_log_item *xfs_trans_next_ail(struct xfs_mount *, 57struct xfs_log_item *xfs_trans_next_ail(struct xfs_mount *,
56 struct xfs_log_item *, int *, int *); 58 struct xfs_log_item *, int *, int *);
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index a34796e57afb..62336a4cc5a4 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -1922,7 +1922,7 @@ xfs_showargs(
1922 } 1922 }
1923 1923
1924 if (mp->m_flags & XFS_MOUNT_IHASHSIZE) 1924 if (mp->m_flags & XFS_MOUNT_IHASHSIZE)
1925 seq_printf(m, "," MNTOPT_IHASHSIZE "=%d", mp->m_ihsize); 1925 seq_printf(m, "," MNTOPT_IHASHSIZE "=%d", (int)mp->m_ihsize);
1926 1926
1927 if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) 1927 if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
1928 seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk", 1928 seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk",
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 23cfa5837728..061e2ffdd1de 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -2366,10 +2366,15 @@ xfs_remove(
2366 2366
2367 namelen = VNAMELEN(dentry); 2367 namelen = VNAMELEN(dentry);
2368 2368
2369 if (!xfs_get_dir_entry(dentry, &ip)) {
2370 dm_di_mode = ip->i_d.di_mode;
2371 IRELE(ip);
2372 }
2373
2369 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_REMOVE)) { 2374 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_REMOVE)) {
2370 error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dir_vp, 2375 error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dir_vp,
2371 DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, 2376 DM_RIGHT_NULL, NULL, DM_RIGHT_NULL,
2372 name, NULL, 0, 0, 0); 2377 name, NULL, dm_di_mode, 0, 0);
2373 if (error) 2378 if (error)
2374 return error; 2379 return error;
2375 } 2380 }
@@ -2995,7 +3000,7 @@ xfs_rmdir(
2995 int cancel_flags; 3000 int cancel_flags;
2996 int committed; 3001 int committed;
2997 bhv_vnode_t *dir_vp; 3002 bhv_vnode_t *dir_vp;
2998 int dm_di_mode = 0; 3003 int dm_di_mode = S_IFDIR;
2999 int last_cdp_link; 3004 int last_cdp_link;
3000 int namelen; 3005 int namelen;
3001 uint resblks; 3006 uint resblks;
@@ -3010,11 +3015,16 @@ xfs_rmdir(
3010 return XFS_ERROR(EIO); 3015 return XFS_ERROR(EIO);
3011 namelen = VNAMELEN(dentry); 3016 namelen = VNAMELEN(dentry);
3012 3017
3018 if (!xfs_get_dir_entry(dentry, &cdp)) {
3019 dm_di_mode = cdp->i_d.di_mode;
3020 IRELE(cdp);
3021 }
3022
3013 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_REMOVE)) { 3023 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_REMOVE)) {
3014 error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, 3024 error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE,
3015 dir_vp, DM_RIGHT_NULL, 3025 dir_vp, DM_RIGHT_NULL,
3016 NULL, DM_RIGHT_NULL, 3026 NULL, DM_RIGHT_NULL,
3017 name, NULL, 0, 0, 0); 3027 name, NULL, dm_di_mode, 0, 0);
3018 if (error) 3028 if (error)
3019 return XFS_ERROR(error); 3029 return XFS_ERROR(error);
3020 } 3030 }
@@ -3834,7 +3844,9 @@ xfs_reclaim(
3834 XFS_MOUNT_ILOCK(mp); 3844 XFS_MOUNT_ILOCK(mp);
3835 vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip)); 3845 vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip));
3836 list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); 3846 list_add_tail(&ip->i_reclaim, &mp->m_del_inodes);
3847 spin_lock(&ip->i_flags_lock);
3837 ip->i_flags |= XFS_IRECLAIMABLE; 3848 ip->i_flags |= XFS_IRECLAIMABLE;
3849 spin_unlock(&ip->i_flags_lock);
3838 XFS_MOUNT_IUNLOCK(mp); 3850 XFS_MOUNT_IUNLOCK(mp);
3839 } 3851 }
3840 return 0; 3852 return 0;
@@ -3859,8 +3871,10 @@ xfs_finish_reclaim(
3859 * us. 3871 * us.
3860 */ 3872 */
3861 write_lock(&ih->ih_lock); 3873 write_lock(&ih->ih_lock);
3874 spin_lock(&ip->i_flags_lock);
3862 if ((ip->i_flags & XFS_IRECLAIM) || 3875 if ((ip->i_flags & XFS_IRECLAIM) ||
3863 (!(ip->i_flags & XFS_IRECLAIMABLE) && vp == NULL)) { 3876 (!(ip->i_flags & XFS_IRECLAIMABLE) && vp == NULL)) {
3877 spin_unlock(&ip->i_flags_lock);
3864 write_unlock(&ih->ih_lock); 3878 write_unlock(&ih->ih_lock);
3865 if (locked) { 3879 if (locked) {
3866 xfs_ifunlock(ip); 3880 xfs_ifunlock(ip);
@@ -3869,6 +3883,7 @@ xfs_finish_reclaim(
3869 return 1; 3883 return 1;
3870 } 3884 }
3871 ip->i_flags |= XFS_IRECLAIM; 3885 ip->i_flags |= XFS_IRECLAIM;
3886 spin_unlock(&ip->i_flags_lock);
3872 write_unlock(&ih->ih_lock); 3887 write_unlock(&ih->ih_lock);
3873 3888
3874 /* 3889 /*
@@ -4272,7 +4287,7 @@ xfs_free_file_space(
4272 xfs_mount_t *mp; 4287 xfs_mount_t *mp;
4273 int nimap; 4288 int nimap;
4274 uint resblks; 4289 uint resblks;
4275 int rounding; 4290 uint rounding;
4276 int rt; 4291 int rt;
4277 xfs_fileoff_t startoffset_fsb; 4292 xfs_fileoff_t startoffset_fsb;
4278 xfs_trans_t *tp; 4293 xfs_trans_t *tp;
@@ -4313,8 +4328,7 @@ xfs_free_file_space(
4313 vn_iowait(vp); /* wait for the completion of any pending DIOs */ 4328 vn_iowait(vp); /* wait for the completion of any pending DIOs */
4314 } 4329 }
4315 4330
4316 rounding = MAX((__uint8_t)(1 << mp->m_sb.sb_blocklog), 4331 rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, NBPP);
4317 (__uint8_t)NBPP);
4318 ilen = len + (offset & (rounding - 1)); 4332 ilen = len + (offset & (rounding - 1));
4319 ioffset = offset & ~(rounding - 1); 4333 ioffset = offset & ~(rounding - 1);
4320 if (ilen & (rounding - 1)) 4334 if (ilen & (rounding - 1))