aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2008-05-06 10:57:55 -0400
committerJiri Kosina <jkosina@suse.cz>2008-05-06 10:57:55 -0400
commit7022b15e2a9f878fd5184586064c63352c3dd225 (patch)
tree5365c2f5bc82ae1946636ee8d5cd5d3b7e804f1b /fs
parentaaad2b0c757f3e6e02552cb0bdcd91a5ec0d6305 (diff)
parenta15306365a16380f3bafee9e181ba01231d4acd7 (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_super.c7
-rw-r--r--fs/Kconfig182
-rw-r--r--fs/Kconfig.binfmt2
-rw-r--r--fs/adfs/adfs.h2
-rw-r--r--fs/adfs/dir_f.c4
-rw-r--r--fs/affs/file.c25
-rw-r--r--fs/affs/super.c3
-rw-r--r--fs/afs/afs_cm.h3
-rw-r--r--fs/afs/cell.c2
-rw-r--r--fs/afs/cmservice.c133
-rw-r--r--fs/afs/dir.c4
-rw-r--r--fs/afs/internal.h8
-rw-r--r--fs/afs/proc.c33
-rw-r--r--fs/aio.c91
-rw-r--r--fs/anon_inodes.c13
-rw-r--r--fs/autofs4/autofs_i.h2
-rw-r--r--fs/autofs4/expire.c26
-rw-r--r--fs/autofs4/root.c40
-rw-r--r--fs/autofs4/waitq.c2
-rw-r--r--fs/befs/linuxvfs.c6
-rw-r--r--fs/bfs/bfs.h2
-rw-r--r--fs/binfmt_aout.c12
-rw-r--r--fs/binfmt_elf.c53
-rw-r--r--fs/binfmt_elf_fdpic.c14
-rw-r--r--fs/binfmt_em86.c2
-rw-r--r--fs/binfmt_flat.c9
-rw-r--r--fs/binfmt_misc.c24
-rw-r--r--fs/binfmt_script.c2
-rw-r--r--fs/binfmt_som.c10
-rw-r--r--fs/bio.c90
-rw-r--r--fs/buffer.c44
-rw-r--r--fs/char_dev.c1
-rw-r--r--fs/cifs/CHANGES3
-rw-r--r--fs/cifs/README9
-rw-r--r--fs/cifs/cifs_debug.c4
-rw-r--r--fs/cifs/cifs_dfs_ref.c31
-rw-r--r--fs/cifs/cifsacl.c14
-rw-r--r--fs/cifs/cifsacl.h1
-rw-r--r--fs/cifs/cifsfs.c10
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifspdu.h121
-rw-r--r--fs/cifs/cifsproto.h9
-rw-r--r--fs/cifs/cifssmb.c32
-rw-r--r--fs/cifs/connect.c1
-rw-r--r--fs/cifs/inode.c15
-rw-r--r--fs/cifs/transport.c18
-rw-r--r--fs/coda/coda_linux.c2
-rw-r--r--fs/coda/dir.c6
-rw-r--r--fs/compat.c15
-rw-r--r--fs/compat_ioctl.c4
-rw-r--r--fs/configfs/file.c2
-rw-r--r--fs/configfs/inode.c2
-rw-r--r--fs/configfs/mount.c2
-rw-r--r--fs/configfs/symlink.c4
-rw-r--r--fs/dcache.c114
-rw-r--r--fs/debugfs/file.c2
-rw-r--r--fs/devpts/inode.c43
-rw-r--r--fs/dlm/lockspace.c2
-rw-r--r--fs/dnotify.c11
-rw-r--r--fs/dquot.c109
-rw-r--r--fs/drop_caches.c14
-rw-r--r--fs/ecryptfs/Makefile2
-rw-r--r--fs/ecryptfs/crypto.c33
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h102
-rw-r--r--fs/ecryptfs/file.c2
-rw-r--r--fs/ecryptfs/inode.c6
-rw-r--r--fs/ecryptfs/keystore.c89
-rw-r--r--fs/ecryptfs/main.c2
-rw-r--r--fs/ecryptfs/messaging.c524
-rw-r--r--fs/ecryptfs/miscdev.c598
-rw-r--r--fs/ecryptfs/mmap.c18
-rw-r--r--fs/ecryptfs/netlink.c33
-rw-r--r--fs/ecryptfs/read_write.c16
-rw-r--r--fs/eventfd.c15
-rw-r--r--fs/eventpoll.c57
-rw-r--r--fs/exec.c52
-rw-r--r--fs/exportfs/expfs.c10
-rw-r--r--fs/ext2/balloc.c29
-rw-r--r--fs/ext2/dir.c20
-rw-r--r--fs/ext2/ialloc.c12
-rw-r--r--fs/ext2/inode.c15
-rw-r--r--fs/ext2/super.c27
-rw-r--r--fs/ext2/xattr.c15
-rw-r--r--fs/ext2/xip.c53
-rw-r--r--fs/ext2/xip.h9
-rw-r--r--fs/ext3/balloc.c30
-rw-r--r--fs/ext3/ext3_jbd.c12
-rw-r--r--fs/ext3/fsync.c3
-rw-r--r--fs/ext3/ialloc.c6
-rw-r--r--fs/ext3/inode.c43
-rw-r--r--fs/ext3/namei.c37
-rw-r--r--fs/ext3/resize.c71
-rw-r--r--fs/ext3/super.c53
-rw-r--r--fs/ext3/xattr.c24
-rw-r--r--fs/ext3/xattr.h7
-rw-r--r--fs/ext4/acl.c12
-rw-r--r--fs/ext4/balloc.c33
-rw-r--r--fs/ext4/bitmap.c2
-rw-r--r--fs/ext4/dir.c4
-rw-r--r--fs/ext4/ext4.h1205
-rw-r--r--fs/ext4/ext4_extents.h232
-rw-r--r--fs/ext4/ext4_i.h167
-rw-r--r--fs/ext4/ext4_jbd2.c14
-rw-r--r--fs/ext4/ext4_jbd2.h231
-rw-r--r--fs/ext4/ext4_sb.h148
-rw-r--r--fs/ext4/extents.c354
-rw-r--r--fs/ext4/file.c6
-rw-r--r--fs/ext4/fsync.c7
-rw-r--r--fs/ext4/hash.c2
-rw-r--r--fs/ext4/ialloc.c44
-rw-r--r--fs/ext4/inode.c57
-rw-r--r--fs/ext4/ioctl.c16
-rw-r--r--fs/ext4/mballoc.c459
-rw-r--r--fs/ext4/mballoc.h304
-rw-r--r--fs/ext4/migrate.c43
-rw-r--r--fs/ext4/namei.c44
-rw-r--r--fs/ext4/resize.c83
-rw-r--r--fs/ext4/super.c81
-rw-r--r--fs/ext4/symlink.c2
-rw-r--r--fs/ext4/xattr.c40
-rw-r--r--fs/ext4/xattr.h7
-rw-r--r--fs/ext4/xattr_security.c4
-rw-r--r--fs/ext4/xattr_trusted.c4
-rw-r--r--fs/ext4/xattr_user.c4
-rw-r--r--fs/fat/cache.c6
-rw-r--r--fs/fat/dir.c52
-rw-r--r--fs/fat/fatent.c11
-rw-r--r--fs/fat/file.c206
-rw-r--r--fs/fat/inode.c46
-rw-r--r--fs/fcntl.c41
-rw-r--r--fs/file.c23
-rw-r--r--fs/file_table.c1
-rw-r--r--fs/freevxfs/vxfs_extern.h5
-rw-r--r--fs/freevxfs/vxfs_immed.c1
-rw-r--r--fs/freevxfs/vxfs_inode.c5
-rw-r--r--fs/fs-writeback.c78
-rw-r--r--fs/fuse/control.c2
-rw-r--r--fs/fuse/dev.c23
-rw-r--r--fs/fuse/dir.c86
-rw-r--r--fs/fuse/file.c633
-rw-r--r--fs/fuse/fuse_i.h52
-rw-r--r--fs/fuse/inode.c95
-rw-r--r--fs/gfs2/locking/dlm/sysfs.c2
-rw-r--r--fs/gfs2/ops_address.c2
-rw-r--r--fs/gfs2/util.h18
-rw-r--r--fs/hfs/btree.c10
-rw-r--r--fs/hfs/mdb.c2
-rw-r--r--fs/hfs/super.c6
-rw-r--r--fs/hfsplus/btree.c10
-rw-r--r--fs/hfsplus/hfsplus_fs.h4
-rw-r--r--fs/hfsplus/inode.c3
-rw-r--r--fs/hfsplus/options.c3
-rw-r--r--fs/hfsplus/super.c4
-rw-r--r--fs/hfsplus/wrapper.c2
-rw-r--r--fs/hugetlbfs/inode.c4
-rw-r--r--fs/inode.c6
-rw-r--r--fs/inotify_user.c2
-rw-r--r--fs/ioctl.c4
-rw-r--r--fs/isofs/dir.c8
-rw-r--r--fs/isofs/isofs.h12
-rw-r--r--fs/isofs/namei.c7
-rw-r--r--fs/jbd/commit.c19
-rw-r--r--fs/jbd/journal.c18
-rw-r--r--fs/jbd/revoke.c2
-rw-r--r--fs/jbd/transaction.c38
-rw-r--r--fs/jbd2/commit.c19
-rw-r--r--fs/jbd2/journal.c55
-rw-r--r--fs/jbd2/revoke.c165
-rw-r--r--fs/jbd2/transaction.c41
-rw-r--r--fs/jffs2/README.Locking22
-rw-r--r--fs/jffs2/build.c32
-rw-r--r--fs/jffs2/debug.c164
-rw-r--r--fs/jffs2/debug.h14
-rw-r--r--fs/jffs2/dir.c100
-rw-r--r--fs/jffs2/erase.c89
-rw-r--r--fs/jffs2/file.c16
-rw-r--r--fs/jffs2/fs.c56
-rw-r--r--fs/jffs2/gc.c50
-rw-r--r--fs/jffs2/ioctl.c1
-rw-r--r--fs/jffs2/jffs2_fs_i.h4
-rw-r--r--fs/jffs2/jffs2_fs_sb.h7
-rw-r--r--fs/jffs2/nodelist.h7
-rw-r--r--fs/jffs2/nodemgmt.c26
-rw-r--r--fs/jffs2/os-linux.h2
-rw-r--r--fs/jffs2/readinode.c54
-rw-r--r--fs/jffs2/scan.c9
-rw-r--r--fs/jffs2/super.c27
-rw-r--r--fs/jffs2/wbuf.c30
-rw-r--r--fs/jffs2/write.c67
-rw-r--r--fs/jffs2/xattr.c8
-rw-r--r--fs/jfs/jfs_debug.c4
-rw-r--r--fs/lockd/clntproc.c186
-rw-r--r--fs/lockd/host.c93
-rw-r--r--fs/lockd/mon.c113
-rw-r--r--fs/lockd/svc.c162
-rw-r--r--fs/lockd/svclock.c10
-rw-r--r--fs/lockd/svcshare.c3
-rw-r--r--fs/lockd/svcsubs.c69
-rw-r--r--fs/locks.c34
-rw-r--r--fs/msdos/namei.c4
-rw-r--r--fs/namei.c9
-rw-r--r--fs/namespace.c293
-rw-r--r--fs/ncpfs/inode.c6
-rw-r--r--fs/ncpfs/ioctl.c17
-rw-r--r--fs/ncpfs/ncplib_kernel.c39
-rw-r--r--fs/ncpfs/ncpsign_kernel.c2
-rw-r--r--fs/nfs/Makefile3
-rw-r--r--fs/nfs/callback.c93
-rw-r--r--fs/nfs/client.c43
-rw-r--r--fs/nfs/dir.c2
-rw-r--r--fs/nfs/direct.c88
-rw-r--r--fs/nfs/file.c18
-rw-r--r--fs/nfs/inode.c45
-rw-r--r--fs/nfs/internal.h13
-rw-r--r--fs/nfs/namespace.c2
-rw-r--r--fs/nfs/nfs2xdr.c113
-rw-r--r--fs/nfs/nfs3xdr.c71
-rw-r--r--fs/nfs/nfs4proc.c39
-rw-r--r--fs/nfs/nfs4state.c49
-rw-r--r--fs/nfs/nfs4xdr.c147
-rw-r--r--fs/nfs/read.c94
-rw-r--r--fs/nfs/super.c181
-rw-r--r--fs/nfs/symlink.c1
-rw-r--r--fs/nfs/unlink.c2
-rw-r--r--fs/nfs/write.c207
-rw-r--r--fs/nfsd/auth.c1
-rw-r--r--fs/nfsd/export.c9
-rw-r--r--fs/nfsd/nfs4callback.c32
-rw-r--r--fs/nfsd/nfs4idmap.c2
-rw-r--r--fs/nfsd/nfs4state.c74
-rw-r--r--fs/nfsd/nfs4xdr.c27
-rw-r--r--fs/nfsd/nfsctl.c91
-rw-r--r--fs/nfsd/nfsfh.c228
-rw-r--r--fs/nfsd/nfssvc.c2
-rw-r--r--fs/nfsd/vfs.c35
-rw-r--r--fs/ntfs/debug.h6
-rw-r--r--fs/ntfs/mft.c6
-rw-r--r--fs/ocfs2/cluster/sys.c2
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c8
-rw-r--r--fs/ocfs2/dlm/dlmfs.c2
-rw-r--r--fs/ocfs2/file.c4
-rw-r--r--fs/ocfs2/localalloc.c4
-rw-r--r--fs/ocfs2/stack_o2cb.c2
-rw-r--r--fs/ocfs2/stack_user.c2
-rw-r--r--fs/ocfs2/symlink.c2
-rw-r--r--fs/open.c3
-rw-r--r--fs/partitions/ldm.c8
-rw-r--r--fs/partitions/msdos.c20
-rw-r--r--fs/pipe.c17
-rw-r--r--fs/pnode.c56
-rw-r--r--fs/pnode.h1
-rw-r--r--fs/proc/array.c7
-rw-r--r--fs/proc/base.c235
-rw-r--r--fs/proc/generic.c149
-rw-r--r--fs/proc/inode.c69
-rw-r--r--fs/proc/internal.h4
-rw-r--r--fs/proc/nommu.c2
-rw-r--r--fs/proc/proc_misc.c85
-rw-r--r--fs/proc/proc_net.c11
-rw-r--r--fs/proc/proc_sysctl.c52
-rw-r--r--fs/proc/proc_tty.c87
-rw-r--r--fs/proc/root.c14
-rw-r--r--fs/proc/task_mmu.c52
-rw-r--r--fs/proc/task_nommu.c35
-rw-r--r--fs/quota.c5
-rw-r--r--fs/quota_v1.c3
-rw-r--r--fs/quota_v2.c7
-rw-r--r--fs/ramfs/file-mmu.c3
-rw-r--r--fs/ramfs/inode.c2
-rw-r--r--fs/ramfs/internal.h1
-rw-r--r--fs/reiserfs/bitmap.c8
-rw-r--r--fs/reiserfs/do_balan.c14
-rw-r--r--fs/reiserfs/ioctl.c4
-rw-r--r--fs/reiserfs/journal.c60
-rw-r--r--fs/reiserfs/namei.c8
-rw-r--r--fs/reiserfs/objectid.c7
-rw-r--r--fs/reiserfs/procfs.c9
-rw-r--r--fs/reiserfs/stree.c3
-rw-r--r--fs/reiserfs/super.c38
-rw-r--r--fs/select.c15
-rw-r--r--fs/seq_file.c95
-rw-r--r--fs/signalfd.c17
-rw-r--r--fs/smbfs/smb_debug.h6
-rw-r--r--fs/splice.c2
-rw-r--r--fs/super.c12
-rw-r--r--fs/sync.c2
-rw-r--r--fs/sysfs/file.c16
-rw-r--r--fs/sysfs/group.c83
-rw-r--r--fs/sysfs/inode.c4
-rw-r--r--fs/sysfs/mount.c2
-rw-r--r--fs/sysfs/sysfs.h2
-rw-r--r--fs/sysv/sysv.h8
-rw-r--r--fs/timerfd.c12
-rw-r--r--fs/udf/namei.c8
-rw-r--r--fs/udf/super.c4
-rw-r--r--fs/ufs/balloc.c4
-rw-r--r--fs/ufs/dir.c14
-rw-r--r--fs/ufs/inode.c2
-rw-r--r--fs/ufs/swab.h36
-rw-r--r--fs/ufs/ufs.h2
-rw-r--r--fs/utimes.c17
-rw-r--r--fs/vfat/namei.c39
-rw-r--r--fs/xattr.c42
-rw-r--r--fs/xfs/Kconfig13
-rw-r--r--fs/xfs/linux-2.6/mrlock.h60
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c75
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c21
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h24
-rw-r--r--fs/xfs/quota/xfs_dquot.c4
-rw-r--r--fs/xfs/quota/xfs_qm.c27
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c6
-rw-r--r--fs/xfs/quota/xfs_quota_priv.h5
-rw-r--r--fs/xfs/quota/xfs_trans_dquot.c2
-rw-r--r--fs/xfs/support/debug.h2
-rw-r--r--fs/xfs/xfs.h2
-rw-r--r--fs/xfs/xfs_acl.c53
-rw-r--r--fs/xfs/xfs_attr.c93
-rw-r--r--fs/xfs/xfs_attr.h6
-rw-r--r--fs/xfs/xfs_bmap.c1
-rw-r--r--fs/xfs/xfs_dfrag.c4
-rw-r--r--fs/xfs/xfs_fsops.c8
-rw-r--r--fs/xfs/xfs_ialloc.c10
-rw-r--r--fs/xfs/xfs_iget.c140
-rw-r--r--fs/xfs/xfs_inode.c25
-rw-r--r--fs/xfs/xfs_inode.h16
-rw-r--r--fs/xfs/xfs_inode_item.c12
-rw-r--r--fs/xfs/xfs_iomap.c19
-rw-r--r--fs/xfs/xfs_itable.c6
-rw-r--r--fs/xfs/xfs_mount.c83
-rw-r--r--fs/xfs/xfs_mount.h7
-rw-r--r--fs/xfs/xfs_rename.c252
-rw-r--r--fs/xfs/xfs_trans_inode.c12
-rw-r--r--fs/xfs/xfs_utils.c45
-rw-r--r--fs/xfs/xfs_utils.h2
-rw-r--r--fs/xfs/xfs_vfsops.c1
-rw-r--r--fs/xfs/xfs_vnodeops.c274
-rw-r--r--fs/xfs/xfs_vnodeops.h8
344 files changed, 10172 insertions, 5187 deletions
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 678c02f1ae23..a452ac67fc94 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -224,12 +224,11 @@ static int v9fs_show_options(struct seq_file *m, struct vfsmount *mnt)
224} 224}
225 225
226static void 226static void
227v9fs_umount_begin(struct vfsmount *vfsmnt, int flags) 227v9fs_umount_begin(struct super_block *sb)
228{ 228{
229 struct v9fs_session_info *v9ses = vfsmnt->mnt_sb->s_fs_info; 229 struct v9fs_session_info *v9ses = sb->s_fs_info;
230 230
231 if (flags & MNT_FORCE) 231 v9fs_session_cancel(v9ses);
232 v9fs_session_cancel(v9ses);
233} 232}
234 233
235static const struct super_operations v9fs_super_ops = { 234static const struct super_operations v9fs_super_ops = {
diff --git a/fs/Kconfig b/fs/Kconfig
index 8b18a8758677..cf12c403b8c7 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -411,7 +411,7 @@ config JFS_STATISTICS
411 to be made available to the user in the /proc/fs/jfs/ directory. 411 to be made available to the user in the /proc/fs/jfs/ directory.
412 412
413config FS_POSIX_ACL 413config FS_POSIX_ACL
414# Posix ACL utility routines (for now, only ext2/ext3/jfs/reiserfs) 414# Posix ACL utility routines (for now, only ext2/ext3/jfs/reiserfs/nfs4)
415# 415#
416# NOTE: you can implement Posix ACLs without these helpers (XFS does). 416# NOTE: you can implement Posix ACLs without these helpers (XFS does).
417# Never use this symbol for ifdefs. 417# Never use this symbol for ifdefs.
@@ -1005,7 +1005,8 @@ config TMPFS_POSIX_ACL
1005 1005
1006config HUGETLBFS 1006config HUGETLBFS
1007 bool "HugeTLB file system support" 1007 bool "HugeTLB file system support"
1008 depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || BROKEN 1008 depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || \
1009 (S390 && 64BIT) || BROKEN
1009 help 1010 help
1010 hugetlbfs is a filesystem backing for HugeTLB pages, based on 1011 hugetlbfs is a filesystem backing for HugeTLB pages, based on
1011 ramfs. For architectures that support it, say Y here and read 1012 ramfs. For architectures that support it, say Y here and read
@@ -1664,105 +1665,86 @@ config NFS_V4
1664 1665
1665 If unsure, say N. 1666 If unsure, say N.
1666 1667
1667config NFS_DIRECTIO
1668 bool "Allow direct I/O on NFS files"
1669 depends on NFS_FS
1670 help
1671 This option enables applications to perform uncached I/O on files
1672 in NFS file systems using the O_DIRECT open() flag. When O_DIRECT
1673 is set for a file, its data is not cached in the system's page
1674 cache. Data is moved to and from user-level application buffers
1675 directly. Unlike local disk-based file systems, NFS O_DIRECT has
1676 no alignment restrictions.
1677
1678 Unless your program is designed to use O_DIRECT properly, you are
1679 much better off allowing the NFS client to manage data caching for
1680 you. Misusing O_DIRECT can cause poor server performance or network
1681 storms. This kernel build option defaults OFF to avoid exposing
1682 system administrators unwittingly to a potentially hazardous
1683 feature.
1684
1685 For more details on NFS O_DIRECT, see fs/nfs/direct.c.
1686
1687 If unsure, say N. This reduces the size of the NFS client, and
1688 causes open() to return EINVAL if a file residing in NFS is
1689 opened with the O_DIRECT flag.
1690
1691config NFSD 1668config NFSD
1692 tristate "NFS server support" 1669 tristate "NFS server support"
1693 depends on INET 1670 depends on INET
1694 select LOCKD 1671 select LOCKD
1695 select SUNRPC 1672 select SUNRPC
1696 select EXPORTFS 1673 select EXPORTFS
1697 select NFSD_V2_ACL if NFSD_V3_ACL
1698 select NFS_ACL_SUPPORT if NFSD_V2_ACL 1674 select NFS_ACL_SUPPORT if NFSD_V2_ACL
1699 select NFSD_TCP if NFSD_V4 1675 help
1700 select CRYPTO_MD5 if NFSD_V4 1676 Choose Y here if you want to allow other computers to access
1701 select CRYPTO if NFSD_V4 1677 files residing on this system using Sun's Network File System
1702 select FS_POSIX_ACL if NFSD_V4 1678 protocol. To compile the NFS server support as a module,
1703 select PROC_FS if NFSD_V4 1679 choose M here: the module will be called nfsd.
1704 select PROC_FS if SUNRPC_GSS 1680
1705 help 1681 You may choose to use a user-space NFS server instead, in which
1706 If you want your Linux box to act as an NFS *server*, so that other 1682 case you can choose N here.
1707 computers on your local network which support NFS can access certain 1683
1708 directories on your box transparently, you have two options: you can 1684 To export local file systems using NFS, you also need to install
1709 use the self-contained user space program nfsd, in which case you 1685 user space programs which can be found in the Linux nfs-utils
1710 should say N here, or you can say Y and use the kernel based NFS 1686 package, available from http://linux-nfs.org/. More detail about
1711 server. The advantage of the kernel based solution is that it is 1687 the Linux NFS server implementation is available via the
1712 faster. 1688 exports(5) man page.
1713 1689
1714 In either case, you will need support software; the respective 1690 Below you can choose which versions of the NFS protocol are
1715 locations are given in the file <file:Documentation/Changes> in the 1691 available to clients mounting the NFS server on this system.
1716 NFS section. 1692 Support for NFS version 2 (RFC 1094) is always available when
1717 1693 CONFIG_NFSD is selected.
1718 If you say Y here, you will get support for version 2 of the NFS 1694
1719 protocol (NFSv2). If you also want NFSv3, say Y to the next question 1695 If unsure, say N.
1720 as well.
1721
1722 Please read the NFS-HOWTO, available from
1723 <http://www.tldp.org/docs.html#howto>.
1724
1725 To compile the NFS server support as a module, choose M here: the
1726 module will be called nfsd. If unsure, say N.
1727 1696
1728config NFSD_V2_ACL 1697config NFSD_V2_ACL
1729 bool 1698 bool
1730 depends on NFSD 1699 depends on NFSD
1731 1700
1732config NFSD_V3 1701config NFSD_V3
1733 bool "Provide NFSv3 server support" 1702 bool "NFS server support for NFS version 3"
1734 depends on NFSD 1703 depends on NFSD
1735 help 1704 help
1736 If you would like to include the NFSv3 server as well as the NFSv2 1705 This option enables support in your system's NFS server for
1737 server, say Y here. If unsure, say Y. 1706 version 3 of the NFS protocol (RFC 1813).
1707
1708 If unsure, say Y.
1738 1709
1739config NFSD_V3_ACL 1710config NFSD_V3_ACL
1740 bool "Provide server support for the NFSv3 ACL protocol extension" 1711 bool "NFS server support for the NFSv3 ACL protocol extension"
1741 depends on NFSD_V3 1712 depends on NFSD_V3
1713 select NFSD_V2_ACL
1742 help 1714 help
1743 Implement the NFSv3 ACL protocol extension for manipulating POSIX 1715 Solaris NFS servers support an auxiliary NFSv3 ACL protocol that
1744 Access Control Lists on exported file systems. NFS clients should 1716 never became an official part of the NFS version 3 protocol.
1745 be compiled with the NFSv3 ACL protocol extension; see the 1717 This protocol extension allows applications on NFS clients to
1746 CONFIG_NFS_V3_ACL option. If unsure, say N. 1718 manipulate POSIX Access Control Lists on files residing on NFS
1719 servers. NFS servers enforce POSIX ACLs on local files whether
1720 this protocol is available or not.
1721
1722 This option enables support in your system's NFS server for the
1723 NFSv3 ACL protocol extension allowing NFS clients to manipulate
1724 POSIX ACLs on files exported by your system's NFS server. NFS
1725 clients which support the Solaris NFSv3 ACL protocol can then
1726 access and modify ACLs on your NFS server.
1727
1728 To store ACLs on your NFS server, you also need to enable ACL-
1729 related CONFIG options for your local file systems of choice.
1730
1731 If unsure, say N.
1747 1732
1748config NFSD_V4 1733config NFSD_V4
1749 bool "Provide NFSv4 server support (EXPERIMENTAL)" 1734 bool "NFS server support for NFS version 4 (EXPERIMENTAL)"
1750 depends on NFSD && NFSD_V3 && EXPERIMENTAL 1735 depends on NFSD && PROC_FS && EXPERIMENTAL
1736 select NFSD_V3
1737 select FS_POSIX_ACL
1751 select RPCSEC_GSS_KRB5 1738 select RPCSEC_GSS_KRB5
1752 help 1739 help
1753 If you would like to include the NFSv4 server as well as the NFSv2 1740 This option enables support in your system's NFS server for
1754 and NFSv3 servers, say Y here. This feature is experimental, and 1741 version 4 of the NFS protocol (RFC 3530).
1755 should only be used if you are interested in helping to test NFSv4.
1756 If unsure, say N.
1757 1742
1758config NFSD_TCP 1743 To export files using NFSv4, you need to install additional user
1759 bool "Provide NFS server over TCP support" 1744 space programs which can be found in the Linux nfs-utils package,
1760 depends on NFSD 1745 available from http://linux-nfs.org/.
1761 default y 1746
1762 help 1747 If unsure, say N.
1763 If you want your NFS server to support TCP connections, say Y here.
1764 TCP connections usually perform better than the default UDP when
1765 the network is lossy or congested. If unsure, say Y.
1766 1748
1767config ROOT_NFS 1749config ROOT_NFS
1768 bool "Root file system on NFS" 1750 bool "Root file system on NFS"
@@ -1808,15 +1790,33 @@ config SUNRPC_XPRT_RDMA
1808 tristate 1790 tristate
1809 depends on SUNRPC && INFINIBAND && EXPERIMENTAL 1791 depends on SUNRPC && INFINIBAND && EXPERIMENTAL
1810 default SUNRPC && INFINIBAND 1792 default SUNRPC && INFINIBAND
1793 help
1794 This option enables an RPC client transport capability that
1795 allows the NFS client to mount servers via an RDMA-enabled
1796 transport.
1797
1798 To compile RPC client RDMA transport support as a module,
1799 choose M here: the module will be called xprtrdma.
1800
1801 If unsure, say N.
1811 1802
1812config SUNRPC_BIND34 1803config SUNRPC_BIND34
1813 bool "Support for rpcbind versions 3 & 4 (EXPERIMENTAL)" 1804 bool "Support for rpcbind versions 3 & 4 (EXPERIMENTAL)"
1814 depends on SUNRPC && EXPERIMENTAL 1805 depends on SUNRPC && EXPERIMENTAL
1806 default n
1815 help 1807 help
1816 Provides kernel support for querying rpcbind servers via versions 3 1808 RPC requests over IPv6 networks require support for larger
1817 and 4 of the rpcbind protocol. The kernel automatically falls back 1809 addresses when performing an RPC bind. Sun added support for
1818 to version 2 if a remote rpcbind service does not support versions 1810 IPv6 addressing by creating two new versions of the rpcbind
1819 3 or 4. 1811 protocol (RFC 1833).
1812
1813 This option enables support in the kernel RPC client for
1814 querying rpcbind servers via versions 3 and 4 of the rpcbind
1815 protocol. The kernel automatically falls back to version 2
1816 if a remote rpcbind service does not support versions 3 or 4.
1817 By themselves, these new versions do not provide support for
1818 RPC over IPv6, but the new protocol versions are necessary to
1819 support it.
1820 1820
1821 If unsure, say N to get traditional behavior (version 2 rpcbind 1821 If unsure, say N to get traditional behavior (version 2 rpcbind
1822 requests only). 1822 requests only).
@@ -1830,12 +1830,13 @@ config RPCSEC_GSS_KRB5
1830 select CRYPTO_DES 1830 select CRYPTO_DES
1831 select CRYPTO_CBC 1831 select CRYPTO_CBC
1832 help 1832 help
1833 Provides for secure RPC calls by means of a gss-api 1833 Choose Y here to enable Secure RPC using the Kerberos version 5
1834 mechanism based on Kerberos V5. This is required for 1834 GSS-API mechanism (RFC 1964).
1835 NFSv4.
1836 1835
1837 Note: Requires an auxiliary userspace daemon which may be found on 1836 Secure RPC calls with Kerberos require an auxiliary user-space
1838 http://www.citi.umich.edu/projects/nfsv4/ 1837 daemon which may be found in the Linux nfs-utils package
1838 available from http://linux-nfs.org/. In addition, user-space
1839 Kerberos support should be installed.
1839 1840
1840 If unsure, say N. 1841 If unsure, say N.
1841 1842
@@ -1849,11 +1850,12 @@ config RPCSEC_GSS_SPKM3
1849 select CRYPTO_CAST5 1850 select CRYPTO_CAST5
1850 select CRYPTO_CBC 1851 select CRYPTO_CBC
1851 help 1852 help
1852 Provides for secure RPC calls by means of a gss-api 1853 Choose Y here to enable Secure RPC using the SPKM3 public key
1853 mechanism based on the SPKM3 public-key mechanism. 1854 GSS-API mechansim (RFC 2025).
1854 1855
1855 Note: Requires an auxiliary userspace daemon which may be found on 1856 Secure RPC calls with SPKM3 require an auxiliary userspace
1856 http://www.citi.umich.edu/projects/nfsv4/ 1857 daemon which may be found in the Linux nfs-utils package
1858 available from http://linux-nfs.org/.
1857 1859
1858 If unsure, say N. 1860 If unsure, say N.
1859 1861
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 853845abcca6..55e8ee1900a5 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -41,7 +41,7 @@ config BINFMT_ELF_FDPIC
41 It is also possible to run FDPIC ELF binaries on MMU linux also. 41 It is also possible to run FDPIC ELF binaries on MMU linux also.
42 42
43config BINFMT_FLAT 43config BINFMT_FLAT
44 tristate "Kernel support for flat binaries" 44 bool "Kernel support for flat binaries"
45 depends on !MMU 45 depends on !MMU
46 help 46 help
47 Support uClinux FLAT format binaries. 47 Support uClinux FLAT format binaries.
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index 936f2af39c43..831157502d5a 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -75,7 +75,7 @@ extern unsigned int adfs_map_free(struct super_block *sb);
75/* Misc */ 75/* Misc */
76void __adfs_error(struct super_block *sb, const char *function, 76void __adfs_error(struct super_block *sb, const char *function,
77 const char *fmt, ...); 77 const char *fmt, ...);
78#define adfs_error(sb, fmt...) __adfs_error(sb, __FUNCTION__, fmt) 78#define adfs_error(sb, fmt...) __adfs_error(sb, __func__, fmt)
79 79
80/* super.c */ 80/* super.c */
81 81
diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c
index b9b2b27b68c3..ea7df2146921 100644
--- a/fs/adfs/dir_f.c
+++ b/fs/adfs/dir_f.c
@@ -122,9 +122,9 @@ adfs_dir_checkbyte(const struct adfs_dir *dir)
122 ptr.ptr8 = bufoff(bh, i); 122 ptr.ptr8 = bufoff(bh, i);
123 end.ptr8 = ptr.ptr8 + last - i; 123 end.ptr8 = ptr.ptr8 + last - i;
124 124
125 do 125 do {
126 dircheck = *ptr.ptr8++ ^ ror13(dircheck); 126 dircheck = *ptr.ptr8++ ^ ror13(dircheck);
127 while (ptr.ptr8 < end.ptr8); 127 } while (ptr.ptr8 < end.ptr8);
128 } 128 }
129 129
130 /* 130 /*
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 6e0c9399200e..1a4f092f24ef 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -325,8 +325,7 @@ affs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_resul
325 pr_debug("AFFS: get_block(%u, %lu)\n", (u32)inode->i_ino, (unsigned long)block); 325 pr_debug("AFFS: get_block(%u, %lu)\n", (u32)inode->i_ino, (unsigned long)block);
326 326
327 327
328 if (block > (sector_t)0x7fffffffUL) 328 BUG_ON(block > (sector_t)0x7fffffffUL);
329 BUG();
330 329
331 if (block >= AFFS_I(inode)->i_blkcnt) { 330 if (block >= AFFS_I(inode)->i_blkcnt) {
332 if (block > AFFS_I(inode)->i_blkcnt || !create) 331 if (block > AFFS_I(inode)->i_blkcnt || !create)
@@ -493,8 +492,7 @@ affs_do_readpage_ofs(struct file *file, struct page *page, unsigned from, unsign
493 u32 tmp; 492 u32 tmp;
494 493
495 pr_debug("AFFS: read_page(%u, %ld, %d, %d)\n", (u32)inode->i_ino, page->index, from, to); 494 pr_debug("AFFS: read_page(%u, %ld, %d, %d)\n", (u32)inode->i_ino, page->index, from, to);
496 if (from > to || to > PAGE_CACHE_SIZE) 495 BUG_ON(from > to || to > PAGE_CACHE_SIZE);
497 BUG();
498 kmap(page); 496 kmap(page);
499 data = page_address(page); 497 data = page_address(page);
500 bsize = AFFS_SB(sb)->s_data_blksize; 498 bsize = AFFS_SB(sb)->s_data_blksize;
@@ -507,8 +505,7 @@ affs_do_readpage_ofs(struct file *file, struct page *page, unsigned from, unsign
507 if (IS_ERR(bh)) 505 if (IS_ERR(bh))
508 return PTR_ERR(bh); 506 return PTR_ERR(bh);
509 tmp = min(bsize - boff, to - from); 507 tmp = min(bsize - boff, to - from);
510 if (from + tmp > to || tmp > bsize) 508 BUG_ON(from + tmp > to || tmp > bsize);
511 BUG();
512 memcpy(data + from, AFFS_DATA(bh) + boff, tmp); 509 memcpy(data + from, AFFS_DATA(bh) + boff, tmp);
513 affs_brelse(bh); 510 affs_brelse(bh);
514 bidx++; 511 bidx++;
@@ -540,10 +537,9 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize)
540 if (IS_ERR(bh)) 537 if (IS_ERR(bh))
541 return PTR_ERR(bh); 538 return PTR_ERR(bh);
542 tmp = min(bsize - boff, newsize - size); 539 tmp = min(bsize - boff, newsize - size);
543 if (boff + tmp > bsize || tmp > bsize) 540 BUG_ON(boff + tmp > bsize || tmp > bsize);
544 BUG();
545 memset(AFFS_DATA(bh) + boff, 0, tmp); 541 memset(AFFS_DATA(bh) + boff, 0, tmp);
546 AFFS_DATA_HEAD(bh)->size = cpu_to_be32(be32_to_cpu(AFFS_DATA_HEAD(bh)->size) + tmp); 542 be32_add_cpu(&AFFS_DATA_HEAD(bh)->size, tmp);
547 affs_fix_checksum(sb, bh); 543 affs_fix_checksum(sb, bh);
548 mark_buffer_dirty_inode(bh, inode); 544 mark_buffer_dirty_inode(bh, inode);
549 size += tmp; 545 size += tmp;
@@ -560,8 +556,7 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize)
560 if (IS_ERR(bh)) 556 if (IS_ERR(bh))
561 goto out; 557 goto out;
562 tmp = min(bsize, newsize - size); 558 tmp = min(bsize, newsize - size);
563 if (tmp > bsize) 559 BUG_ON(tmp > bsize);
564 BUG();
565 AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); 560 AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA);
566 AFFS_DATA_HEAD(bh)->key = cpu_to_be32(inode->i_ino); 561 AFFS_DATA_HEAD(bh)->key = cpu_to_be32(inode->i_ino);
567 AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx); 562 AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx);
@@ -683,10 +678,9 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
683 if (IS_ERR(bh)) 678 if (IS_ERR(bh))
684 return PTR_ERR(bh); 679 return PTR_ERR(bh);
685 tmp = min(bsize - boff, to - from); 680 tmp = min(bsize - boff, to - from);
686 if (boff + tmp > bsize || tmp > bsize) 681 BUG_ON(boff + tmp > bsize || tmp > bsize);
687 BUG();
688 memcpy(AFFS_DATA(bh) + boff, data + from, tmp); 682 memcpy(AFFS_DATA(bh) + boff, data + from, tmp);
689 AFFS_DATA_HEAD(bh)->size = cpu_to_be32(be32_to_cpu(AFFS_DATA_HEAD(bh)->size) + tmp); 683 be32_add_cpu(&AFFS_DATA_HEAD(bh)->size, tmp);
690 affs_fix_checksum(sb, bh); 684 affs_fix_checksum(sb, bh);
691 mark_buffer_dirty_inode(bh, inode); 685 mark_buffer_dirty_inode(bh, inode);
692 written += tmp; 686 written += tmp;
@@ -732,8 +726,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
732 if (IS_ERR(bh)) 726 if (IS_ERR(bh))
733 goto out; 727 goto out;
734 tmp = min(bsize, to - from); 728 tmp = min(bsize, to - from);
735 if (tmp > bsize) 729 BUG_ON(tmp > bsize);
736 BUG();
737 memcpy(AFFS_DATA(bh), data + from, tmp); 730 memcpy(AFFS_DATA(bh), data + from, tmp);
738 if (buffer_new(bh)) { 731 if (buffer_new(bh)) {
739 AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); 732 AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA);
diff --git a/fs/affs/super.c b/fs/affs/super.c
index d2dc047cb479..01d25d532541 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -199,7 +199,6 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
199 case Opt_prefix: 199 case Opt_prefix:
200 /* Free any previous prefix */ 200 /* Free any previous prefix */
201 kfree(*prefix); 201 kfree(*prefix);
202 *prefix = NULL;
203 *prefix = match_strdup(&args[0]); 202 *prefix = match_strdup(&args[0]);
204 if (!*prefix) 203 if (!*prefix)
205 return 0; 204 return 0;
@@ -233,6 +232,8 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
233 break; 232 break;
234 case Opt_volume: { 233 case Opt_volume: {
235 char *vol = match_strdup(&args[0]); 234 char *vol = match_strdup(&args[0]);
235 if (!vol)
236 return 0;
236 strlcpy(volume, vol, 32); 237 strlcpy(volume, vol, 32);
237 kfree(vol); 238 kfree(vol);
238 break; 239 break;
diff --git a/fs/afs/afs_cm.h b/fs/afs/afs_cm.h
index 7b4d4fab4c80..255f5dd6040c 100644
--- a/fs/afs/afs_cm.h
+++ b/fs/afs/afs_cm.h
@@ -24,7 +24,8 @@ enum AFS_CM_Operations {
24 CBGetXStatsVersion = 209, /* get version of extended statistics */ 24 CBGetXStatsVersion = 209, /* get version of extended statistics */
25 CBGetXStats = 210, /* get contents of extended statistics data */ 25 CBGetXStats = 210, /* get contents of extended statistics data */
26 CBInitCallBackState3 = 213, /* initialise callback state, version 3 */ 26 CBInitCallBackState3 = 213, /* initialise callback state, version 3 */
27 CBGetCapabilities = 65538, /* get client capabilities */ 27 CBProbeUuid = 214, /* check the client hasn't rebooted */
28 CBTellMeAboutYourself = 65538, /* get client capabilities */
28}; 29};
29 30
30#define AFS_CAP_ERROR_TRANSLATION 0x1 31#define AFS_CAP_ERROR_TRANSLATION 0x1
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 584bb0f9c36a..5e1df14e16b1 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -20,7 +20,7 @@
20DECLARE_RWSEM(afs_proc_cells_sem); 20DECLARE_RWSEM(afs_proc_cells_sem);
21LIST_HEAD(afs_proc_cells); 21LIST_HEAD(afs_proc_cells);
22 22
23static struct list_head afs_cells = LIST_HEAD_INIT(afs_cells); 23static LIST_HEAD(afs_cells);
24static DEFINE_RWLOCK(afs_cells_lock); 24static DEFINE_RWLOCK(afs_cells_lock);
25static DECLARE_RWSEM(afs_cells_sem); /* add/remove serialisation */ 25static DECLARE_RWSEM(afs_cells_sem); /* add/remove serialisation */
26static DECLARE_WAIT_QUEUE_HEAD(afs_cells_freeable_wq); 26static DECLARE_WAIT_QUEUE_HEAD(afs_cells_freeable_wq);
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 47b71c8947f9..eb765489164f 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -26,8 +26,9 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *,
26 struct sk_buff *, bool); 26 struct sk_buff *, bool);
27static int afs_deliver_cb_probe(struct afs_call *, struct sk_buff *, bool); 27static int afs_deliver_cb_probe(struct afs_call *, struct sk_buff *, bool);
28static int afs_deliver_cb_callback(struct afs_call *, struct sk_buff *, bool); 28static int afs_deliver_cb_callback(struct afs_call *, struct sk_buff *, bool);
29static int afs_deliver_cb_get_capabilities(struct afs_call *, struct sk_buff *, 29static int afs_deliver_cb_probe_uuid(struct afs_call *, struct sk_buff *, bool);
30 bool); 30static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *,
31 struct sk_buff *, bool);
31static void afs_cm_destructor(struct afs_call *); 32static void afs_cm_destructor(struct afs_call *);
32 33
33/* 34/*
@@ -71,11 +72,21 @@ static const struct afs_call_type afs_SRXCBProbe = {
71}; 72};
72 73
73/* 74/*
74 * CB.GetCapabilities operation type 75 * CB.ProbeUuid operation type
75 */ 76 */
76static const struct afs_call_type afs_SRXCBGetCapabilites = { 77static const struct afs_call_type afs_SRXCBProbeUuid = {
77 .name = "CB.GetCapabilities", 78 .name = "CB.ProbeUuid",
78 .deliver = afs_deliver_cb_get_capabilities, 79 .deliver = afs_deliver_cb_probe_uuid,
80 .abort_to_error = afs_abort_to_error,
81 .destructor = afs_cm_destructor,
82};
83
84/*
85 * CB.TellMeAboutYourself operation type
86 */
87static const struct afs_call_type afs_SRXCBTellMeAboutYourself = {
88 .name = "CB.TellMeAboutYourself",
89 .deliver = afs_deliver_cb_tell_me_about_yourself,
79 .abort_to_error = afs_abort_to_error, 90 .abort_to_error = afs_abort_to_error,
80 .destructor = afs_cm_destructor, 91 .destructor = afs_cm_destructor,
81}; 92};
@@ -103,8 +114,8 @@ bool afs_cm_incoming_call(struct afs_call *call)
103 case CBProbe: 114 case CBProbe:
104 call->type = &afs_SRXCBProbe; 115 call->type = &afs_SRXCBProbe;
105 return true; 116 return true;
106 case CBGetCapabilities: 117 case CBTellMeAboutYourself:
107 call->type = &afs_SRXCBGetCapabilites; 118 call->type = &afs_SRXCBTellMeAboutYourself;
108 return true; 119 return true;
109 default: 120 default:
110 return false; 121 return false;
@@ -393,9 +404,105 @@ static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb,
393} 404}
394 405
395/* 406/*
407 * allow the fileserver to quickly find out if the fileserver has been rebooted
408 */
409static void SRXAFSCB_ProbeUuid(struct work_struct *work)
410{
411 struct afs_call *call = container_of(work, struct afs_call, work);
412 struct afs_uuid *r = call->request;
413
414 struct {
415 __be32 match;
416 } reply;
417
418 _enter("");
419
420
421 if (memcmp(r, &afs_uuid, sizeof(afs_uuid)) == 0)
422 reply.match = htonl(0);
423 else
424 reply.match = htonl(1);
425
426 afs_send_simple_reply(call, &reply, sizeof(reply));
427 _leave("");
428}
429
430/*
431 * deliver request data to a CB.ProbeUuid call
432 */
433static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb,
434 bool last)
435{
436 struct afs_uuid *r;
437 unsigned loop;
438 __be32 *b;
439 int ret;
440
441 _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
442
443 if (skb->len > 0)
444 return -EBADMSG;
445 if (!last)
446 return 0;
447
448 switch (call->unmarshall) {
449 case 0:
450 call->offset = 0;
451 call->buffer = kmalloc(11 * sizeof(__be32), GFP_KERNEL);
452 if (!call->buffer)
453 return -ENOMEM;
454 call->unmarshall++;
455
456 case 1:
457 _debug("extract UUID");
458 ret = afs_extract_data(call, skb, last, call->buffer,
459 11 * sizeof(__be32));
460 switch (ret) {
461 case 0: break;
462 case -EAGAIN: return 0;
463 default: return ret;
464 }
465
466 _debug("unmarshall UUID");
467 call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL);
468 if (!call->request)
469 return -ENOMEM;
470
471 b = call->buffer;
472 r = call->request;
473 r->time_low = ntohl(b[0]);
474 r->time_mid = ntohl(b[1]);
475 r->time_hi_and_version = ntohl(b[2]);
476 r->clock_seq_hi_and_reserved = ntohl(b[3]);
477 r->clock_seq_low = ntohl(b[4]);
478
479 for (loop = 0; loop < 6; loop++)
480 r->node[loop] = ntohl(b[loop + 5]);
481
482 call->offset = 0;
483 call->unmarshall++;
484
485 case 2:
486 _debug("trailer");
487 if (skb->len != 0)
488 return -EBADMSG;
489 break;
490 }
491
492 if (!last)
493 return 0;
494
495 call->state = AFS_CALL_REPLYING;
496
497 INIT_WORK(&call->work, SRXAFSCB_ProbeUuid);
498 schedule_work(&call->work);
499 return 0;
500}
501
502/*
396 * allow the fileserver to ask about the cache manager's capabilities 503 * allow the fileserver to ask about the cache manager's capabilities
397 */ 504 */
398static void SRXAFSCB_GetCapabilities(struct work_struct *work) 505static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work)
399{ 506{
400 struct afs_interface *ifs; 507 struct afs_interface *ifs;
401 struct afs_call *call = container_of(work, struct afs_call, work); 508 struct afs_call *call = container_of(work, struct afs_call, work);
@@ -456,10 +563,10 @@ static void SRXAFSCB_GetCapabilities(struct work_struct *work)
456} 563}
457 564
458/* 565/*
459 * deliver request data to a CB.GetCapabilities call 566 * deliver request data to a CB.TellMeAboutYourself call
460 */ 567 */
461static int afs_deliver_cb_get_capabilities(struct afs_call *call, 568static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call,
462 struct sk_buff *skb, bool last) 569 struct sk_buff *skb, bool last)
463{ 570{
464 _enter(",{%u},%d", skb->len, last); 571 _enter(",{%u},%d", skb->len, last);
465 572
@@ -471,7 +578,7 @@ static int afs_deliver_cb_get_capabilities(struct afs_call *call,
471 /* no unmarshalling required */ 578 /* no unmarshalling required */
472 call->state = AFS_CALL_REPLYING; 579 call->state = AFS_CALL_REPLYING;
473 580
474 INIT_WORK(&call->work, SRXAFSCB_GetCapabilities); 581 INIT_WORK(&call->work, SRXAFSCB_TellMeAboutYourself);
475 schedule_work(&call->work); 582 schedule_work(&call->work);
476 return 0; 583 return 0;
477} 584}
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index b58af8f18bc4..dfda03d4397d 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -140,7 +140,7 @@ static inline void afs_dir_check_page(struct inode *dir, struct page *page)
140 140
141 if (page->index == 0 && qty != ntohs(dbuf->blocks[0].pagehdr.npages)) { 141 if (page->index == 0 && qty != ntohs(dbuf->blocks[0].pagehdr.npages)) {
142 printk("kAFS: %s(%lu): wrong number of dir blocks %d!=%hu\n", 142 printk("kAFS: %s(%lu): wrong number of dir blocks %d!=%hu\n",
143 __FUNCTION__, dir->i_ino, qty, 143 __func__, dir->i_ino, qty,
144 ntohs(dbuf->blocks[0].pagehdr.npages)); 144 ntohs(dbuf->blocks[0].pagehdr.npages));
145 goto error; 145 goto error;
146 } 146 }
@@ -159,7 +159,7 @@ static inline void afs_dir_check_page(struct inode *dir, struct page *page)
159 for (tmp = 0; tmp < qty; tmp++) { 159 for (tmp = 0; tmp < qty; tmp++) {
160 if (dbuf->blocks[tmp].pagehdr.magic != AFS_DIR_MAGIC) { 160 if (dbuf->blocks[tmp].pagehdr.magic != AFS_DIR_MAGIC) {
161 printk("kAFS: %s(%lu): bad magic %d/%d is %04hx\n", 161 printk("kAFS: %s(%lu): bad magic %d/%d is %04hx\n",
162 __FUNCTION__, dir->i_ino, tmp, qty, 162 __func__, dir->i_ino, tmp, qty,
163 ntohs(dbuf->blocks[tmp].pagehdr.magic)); 163 ntohs(dbuf->blocks[tmp].pagehdr.magic));
164 goto error; 164 goto error;
165 } 165 }
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index eec41c76de72..7102824ba847 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -757,8 +757,8 @@ void _dbprintk(const char *fmt, ...)
757{ 757{
758} 758}
759 759
760#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__) 760#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__)
761#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__) 761#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__)
762#define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__) 762#define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__)
763 763
764 764
@@ -791,8 +791,8 @@ do { \
791} while (0) 791} while (0)
792 792
793#else 793#else
794#define _enter(FMT,...) _dbprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__) 794#define _enter(FMT,...) _dbprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__)
795#define _leave(FMT,...) _dbprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__) 795#define _leave(FMT,...) _dbprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__)
796#define _debug(FMT,...) _dbprintk(" "FMT ,##__VA_ARGS__) 796#define _debug(FMT,...) _dbprintk(" "FMT ,##__VA_ARGS__)
797#endif 797#endif
798 798
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 846c7615ac9e..9f7d1ae70269 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -41,6 +41,7 @@ static const struct file_operations afs_proc_cells_fops = {
41 .write = afs_proc_cells_write, 41 .write = afs_proc_cells_write,
42 .llseek = seq_lseek, 42 .llseek = seq_lseek,
43 .release = seq_release, 43 .release = seq_release,
44 .owner = THIS_MODULE,
44}; 45};
45 46
46static int afs_proc_rootcell_open(struct inode *inode, struct file *file); 47static int afs_proc_rootcell_open(struct inode *inode, struct file *file);
@@ -56,7 +57,8 @@ static const struct file_operations afs_proc_rootcell_fops = {
56 .read = afs_proc_rootcell_read, 57 .read = afs_proc_rootcell_read,
57 .write = afs_proc_rootcell_write, 58 .write = afs_proc_rootcell_write,
58 .llseek = no_llseek, 59 .llseek = no_llseek,
59 .release = afs_proc_rootcell_release 60 .release = afs_proc_rootcell_release,
61 .owner = THIS_MODULE,
60}; 62};
61 63
62static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file); 64static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file);
@@ -80,6 +82,7 @@ static const struct file_operations afs_proc_cell_volumes_fops = {
80 .read = seq_read, 82 .read = seq_read,
81 .llseek = seq_lseek, 83 .llseek = seq_lseek,
82 .release = afs_proc_cell_volumes_release, 84 .release = afs_proc_cell_volumes_release,
85 .owner = THIS_MODULE,
83}; 86};
84 87
85static int afs_proc_cell_vlservers_open(struct inode *inode, 88static int afs_proc_cell_vlservers_open(struct inode *inode,
@@ -104,6 +107,7 @@ static const struct file_operations afs_proc_cell_vlservers_fops = {
104 .read = seq_read, 107 .read = seq_read,
105 .llseek = seq_lseek, 108 .llseek = seq_lseek,
106 .release = afs_proc_cell_vlservers_release, 109 .release = afs_proc_cell_vlservers_release,
110 .owner = THIS_MODULE,
107}; 111};
108 112
109static int afs_proc_cell_servers_open(struct inode *inode, struct file *file); 113static int afs_proc_cell_servers_open(struct inode *inode, struct file *file);
@@ -127,6 +131,7 @@ static const struct file_operations afs_proc_cell_servers_fops = {
127 .read = seq_read, 131 .read = seq_read,
128 .llseek = seq_lseek, 132 .llseek = seq_lseek,
129 .release = afs_proc_cell_servers_release, 133 .release = afs_proc_cell_servers_release,
134 .owner = THIS_MODULE,
130}; 135};
131 136
132/* 137/*
@@ -143,17 +148,13 @@ int afs_proc_init(void)
143 goto error_dir; 148 goto error_dir;
144 proc_afs->owner = THIS_MODULE; 149 proc_afs->owner = THIS_MODULE;
145 150
146 p = create_proc_entry("cells", 0, proc_afs); 151 p = proc_create("cells", 0, proc_afs, &afs_proc_cells_fops);
147 if (!p) 152 if (!p)
148 goto error_cells; 153 goto error_cells;
149 p->proc_fops = &afs_proc_cells_fops;
150 p->owner = THIS_MODULE;
151 154
152 p = create_proc_entry("rootcell", 0, proc_afs); 155 p = proc_create("rootcell", 0, proc_afs, &afs_proc_rootcell_fops);
153 if (!p) 156 if (!p)
154 goto error_rootcell; 157 goto error_rootcell;
155 p->proc_fops = &afs_proc_rootcell_fops;
156 p->owner = THIS_MODULE;
157 158
158 _leave(" = 0"); 159 _leave(" = 0");
159 return 0; 160 return 0;
@@ -395,26 +396,20 @@ int afs_proc_cell_setup(struct afs_cell *cell)
395 if (!cell->proc_dir) 396 if (!cell->proc_dir)
396 goto error_dir; 397 goto error_dir;
397 398
398 p = create_proc_entry("servers", 0, cell->proc_dir); 399 p = proc_create_data("servers", 0, cell->proc_dir,
400 &afs_proc_cell_servers_fops, cell);
399 if (!p) 401 if (!p)
400 goto error_servers; 402 goto error_servers;
401 p->proc_fops = &afs_proc_cell_servers_fops;
402 p->owner = THIS_MODULE;
403 p->data = cell;
404 403
405 p = create_proc_entry("vlservers", 0, cell->proc_dir); 404 p = proc_create_data("vlservers", 0, cell->proc_dir,
405 &afs_proc_cell_vlservers_fops, cell);
406 if (!p) 406 if (!p)
407 goto error_vlservers; 407 goto error_vlservers;
408 p->proc_fops = &afs_proc_cell_vlservers_fops;
409 p->owner = THIS_MODULE;
410 p->data = cell;
411 408
412 p = create_proc_entry("volumes", 0, cell->proc_dir); 409 p = proc_create_data("volumes", 0, cell->proc_dir,
410 &afs_proc_cell_volumes_fops, cell);
413 if (!p) 411 if (!p)
414 goto error_volumes; 412 goto error_volumes;
415 p->proc_fops = &afs_proc_cell_volumes_fops;
416 p->owner = THIS_MODULE;
417 p->data = cell;
418 413
419 _leave(" = 0"); 414 _leave(" = 0");
420 return 0; 415 return 0;
diff --git a/fs/aio.c b/fs/aio.c
index 228368610dfa..b5253e77eb2f 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -191,6 +191,43 @@ static int aio_setup_ring(struct kioctx *ctx)
191 kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \ 191 kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \
192} while(0) 192} while(0)
193 193
194
195/* __put_ioctx
196 * Called when the last user of an aio context has gone away,
197 * and the struct needs to be freed.
198 */
199static void __put_ioctx(struct kioctx *ctx)
200{
201 unsigned nr_events = ctx->max_reqs;
202
203 BUG_ON(ctx->reqs_active);
204
205 cancel_delayed_work(&ctx->wq);
206 cancel_work_sync(&ctx->wq.work);
207 aio_free_ring(ctx);
208 mmdrop(ctx->mm);
209 ctx->mm = NULL;
210 pr_debug("__put_ioctx: freeing %p\n", ctx);
211 kmem_cache_free(kioctx_cachep, ctx);
212
213 if (nr_events) {
214 spin_lock(&aio_nr_lock);
215 BUG_ON(aio_nr - nr_events > aio_nr);
216 aio_nr -= nr_events;
217 spin_unlock(&aio_nr_lock);
218 }
219}
220
221#define get_ioctx(kioctx) do { \
222 BUG_ON(atomic_read(&(kioctx)->users) <= 0); \
223 atomic_inc(&(kioctx)->users); \
224} while (0)
225#define put_ioctx(kioctx) do { \
226 BUG_ON(atomic_read(&(kioctx)->users) <= 0); \
227 if (unlikely(atomic_dec_and_test(&(kioctx)->users))) \
228 __put_ioctx(kioctx); \
229} while (0)
230
194/* ioctx_alloc 231/* ioctx_alloc
195 * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. 232 * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed.
196 */ 233 */
@@ -240,7 +277,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
240 if (ctx->max_reqs == 0) 277 if (ctx->max_reqs == 0)
241 goto out_cleanup; 278 goto out_cleanup;
242 279
243 /* now link into global list. kludge. FIXME */ 280 /* now link into global list. */
244 write_lock(&mm->ioctx_list_lock); 281 write_lock(&mm->ioctx_list_lock);
245 ctx->next = mm->ioctx_list; 282 ctx->next = mm->ioctx_list;
246 mm->ioctx_list = ctx; 283 mm->ioctx_list = ctx;
@@ -361,32 +398,6 @@ void exit_aio(struct mm_struct *mm)
361 } 398 }
362} 399}
363 400
364/* __put_ioctx
365 * Called when the last user of an aio context has gone away,
366 * and the struct needs to be freed.
367 */
368void __put_ioctx(struct kioctx *ctx)
369{
370 unsigned nr_events = ctx->max_reqs;
371
372 BUG_ON(ctx->reqs_active);
373
374 cancel_delayed_work(&ctx->wq);
375 cancel_work_sync(&ctx->wq.work);
376 aio_free_ring(ctx);
377 mmdrop(ctx->mm);
378 ctx->mm = NULL;
379 pr_debug("__put_ioctx: freeing %p\n", ctx);
380 kmem_cache_free(kioctx_cachep, ctx);
381
382 if (nr_events) {
383 spin_lock(&aio_nr_lock);
384 BUG_ON(aio_nr - nr_events > aio_nr);
385 aio_nr -= nr_events;
386 spin_unlock(&aio_nr_lock);
387 }
388}
389
390/* aio_get_req 401/* aio_get_req
391 * Allocate a slot for an aio request. Increments the users count 402 * Allocate a slot for an aio request. Increments the users count
392 * of the kioctx so that the kioctx stays around until all requests are 403 * of the kioctx so that the kioctx stays around until all requests are
@@ -542,10 +553,7 @@ int aio_put_req(struct kiocb *req)
542 return ret; 553 return ret;
543} 554}
544 555
545/* Lookup an ioctx id. ioctx_list is lockless for reads. 556static struct kioctx *lookup_ioctx(unsigned long ctx_id)
546 * FIXME: this is O(n) and is only suitable for development.
547 */
548struct kioctx *lookup_ioctx(unsigned long ctx_id)
549{ 557{
550 struct kioctx *ioctx; 558 struct kioctx *ioctx;
551 struct mm_struct *mm; 559 struct mm_struct *mm;
@@ -1070,9 +1078,7 @@ static void timeout_func(unsigned long data)
1070 1078
1071static inline void init_timeout(struct aio_timeout *to) 1079static inline void init_timeout(struct aio_timeout *to)
1072{ 1080{
1073 init_timer(&to->timer); 1081 setup_timer_on_stack(&to->timer, timeout_func, (unsigned long) to);
1074 to->timer.data = (unsigned long)to;
1075 to->timer.function = timeout_func;
1076 to->timed_out = 0; 1082 to->timed_out = 0;
1077 to->p = current; 1083 to->p = current;
1078} 1084}
@@ -1166,7 +1172,10 @@ retry:
1166 break; 1172 break;
1167 if (min_nr <= i) 1173 if (min_nr <= i)
1168 break; 1174 break;
1169 ret = 0; 1175 if (unlikely(ctx->dead)) {
1176 ret = -EINVAL;
1177 break;
1178 }
1170 if (to.timed_out) /* Only check after read evt */ 1179 if (to.timed_out) /* Only check after read evt */
1171 break; 1180 break;
1172 /* Try to only show up in io wait if there are ops 1181 /* Try to only show up in io wait if there are ops
@@ -1202,6 +1211,7 @@ retry:
1202 if (timeout) 1211 if (timeout)
1203 clear_timeout(&to); 1212 clear_timeout(&to);
1204out: 1213out:
1214 destroy_timer_on_stack(&to.timer);
1205 return i ? i : ret; 1215 return i ? i : ret;
1206} 1216}
1207 1217
@@ -1231,6 +1241,13 @@ static void io_destroy(struct kioctx *ioctx)
1231 1241
1232 aio_cancel_all(ioctx); 1242 aio_cancel_all(ioctx);
1233 wait_for_all_aios(ioctx); 1243 wait_for_all_aios(ioctx);
1244
1245 /*
1246 * Wake up any waiters. The setting of ctx->dead must be seen
1247 * by other CPUs at this point. Right now, we rely on the
1248 * locking done by the above calls to ensure this consistency.
1249 */
1250 wake_up(&ioctx->wait);
1234 put_ioctx(ioctx); /* once for the lookup */ 1251 put_ioctx(ioctx); /* once for the lookup */
1235} 1252}
1236 1253
@@ -1542,7 +1559,7 @@ static int aio_wake_function(wait_queue_t *wait, unsigned mode,
1542 return 1; 1559 return 1;
1543} 1560}
1544 1561
1545int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, 1562static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1546 struct iocb *iocb) 1563 struct iocb *iocb)
1547{ 1564{
1548 struct kiocb *req; 1565 struct kiocb *req;
@@ -1583,7 +1600,7 @@ int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1583 * event using the eventfd_signal() function. 1600 * event using the eventfd_signal() function.
1584 */ 1601 */
1585 req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd); 1602 req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd);
1586 if (unlikely(IS_ERR(req->ki_eventfd))) { 1603 if (IS_ERR(req->ki_eventfd)) {
1587 ret = PTR_ERR(req->ki_eventfd); 1604 ret = PTR_ERR(req->ki_eventfd);
1588 goto out_put_req; 1605 goto out_put_req;
1589 } 1606 }
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index f42be069e085..977ef208c051 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -57,9 +57,6 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
57 * anonymous inode, and a dentry that describe the "class" 57 * anonymous inode, and a dentry that describe the "class"
58 * of the file 58 * of the file
59 * 59 *
60 * @pfd: [out] pointer to the file descriptor
61 * @dpinode: [out] pointer to the inode
62 * @pfile: [out] pointer to the file struct
63 * @name: [in] name of the "class" of the new file 60 * @name: [in] name of the "class" of the new file
64 * @fops [in] file operations for the new file 61 * @fops [in] file operations for the new file
65 * @priv [in] private data for the new file (will be file's private_data) 62 * @priv [in] private data for the new file (will be file's private_data)
@@ -68,10 +65,9 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
68 * that do not need to have a full-fledged inode in order to operate correctly. 65 * that do not need to have a full-fledged inode in order to operate correctly.
69 * All the files created with anon_inode_getfd() will share a single inode, 66 * All the files created with anon_inode_getfd() will share a single inode,
70 * hence saving memory and avoiding code duplication for the file/inode/dentry 67 * hence saving memory and avoiding code duplication for the file/inode/dentry
71 * setup. 68 * setup. Returns new descriptor or -error.
72 */ 69 */
73int anon_inode_getfd(int *pfd, struct inode **pinode, struct file **pfile, 70int anon_inode_getfd(const char *name, const struct file_operations *fops,
74 const char *name, const struct file_operations *fops,
75 void *priv) 71 void *priv)
76{ 72{
77 struct qstr this; 73 struct qstr this;
@@ -125,10 +121,7 @@ int anon_inode_getfd(int *pfd, struct inode **pinode, struct file **pfile,
125 121
126 fd_install(fd, file); 122 fd_install(fd, file);
127 123
128 *pfd = fd; 124 return fd;
129 *pinode = anon_inode_inode;
130 *pfile = file;
131 return 0;
132 125
133err_dput: 126err_dput:
134 dput(dentry); 127 dput(dentry);
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 2d4ae40718d9..c3d352d7fa93 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -35,7 +35,7 @@
35/* #define DEBUG */ 35/* #define DEBUG */
36 36
37#ifdef DEBUG 37#ifdef DEBUG
38#define DPRINTK(fmt,args...) do { printk(KERN_DEBUG "pid %d: %s: " fmt "\n" , current->pid , __FUNCTION__ , ##args); } while(0) 38#define DPRINTK(fmt,args...) do { printk(KERN_DEBUG "pid %d: %s: " fmt "\n" , current->pid , __func__ , ##args); } while(0)
39#else 39#else
40#define DPRINTK(fmt,args...) do {} while(0) 40#define DPRINTK(fmt,args...) do {} while(0)
41#endif 41#endif
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index d96e5c14a9ca..894fee54d4d8 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -73,8 +73,8 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
73 status = 0; 73 status = 0;
74done: 74done:
75 DPRINTK("returning = %d", status); 75 DPRINTK("returning = %d", status);
76 mntput(mnt);
77 dput(dentry); 76 dput(dentry);
77 mntput(mnt);
78 return status; 78 return status;
79} 79}
80 80
@@ -333,7 +333,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
333 /* Can we expire this guy */ 333 /* Can we expire this guy */
334 if (autofs4_can_expire(dentry, timeout, do_now)) { 334 if (autofs4_can_expire(dentry, timeout, do_now)) {
335 expired = dentry; 335 expired = dentry;
336 break; 336 goto found;
337 } 337 }
338 goto next; 338 goto next;
339 } 339 }
@@ -352,7 +352,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
352 inf->flags |= AUTOFS_INF_EXPIRING; 352 inf->flags |= AUTOFS_INF_EXPIRING;
353 spin_unlock(&sbi->fs_lock); 353 spin_unlock(&sbi->fs_lock);
354 expired = dentry; 354 expired = dentry;
355 break; 355 goto found;
356 } 356 }
357 spin_unlock(&sbi->fs_lock); 357 spin_unlock(&sbi->fs_lock);
358 /* 358 /*
@@ -363,7 +363,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
363 expired = autofs4_check_leaves(mnt, dentry, timeout, do_now); 363 expired = autofs4_check_leaves(mnt, dentry, timeout, do_now);
364 if (expired) { 364 if (expired) {
365 dput(dentry); 365 dput(dentry);
366 break; 366 goto found;
367 } 367 }
368 } 368 }
369next: 369next:
@@ -371,18 +371,16 @@ next:
371 spin_lock(&dcache_lock); 371 spin_lock(&dcache_lock);
372 next = next->next; 372 next = next->next;
373 } 373 }
374
375 if (expired) {
376 DPRINTK("returning %p %.*s",
377 expired, (int)expired->d_name.len, expired->d_name.name);
378 spin_lock(&dcache_lock);
379 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
380 spin_unlock(&dcache_lock);
381 return expired;
382 }
383 spin_unlock(&dcache_lock); 374 spin_unlock(&dcache_lock);
384
385 return NULL; 375 return NULL;
376
377found:
378 DPRINTK("returning %p %.*s",
379 expired, (int)expired->d_name.len, expired->d_name.name);
380 spin_lock(&dcache_lock);
381 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
382 spin_unlock(&dcache_lock);
383 return expired;
386} 384}
387 385
388/* Perform an expiry operation */ 386/* Perform an expiry operation */
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index a54a946a50ae..edf5b6bddb52 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -146,17 +146,17 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
146 146
147 if (d_mountpoint(dentry)) { 147 if (d_mountpoint(dentry)) {
148 struct file *fp = NULL; 148 struct file *fp = NULL;
149 struct vfsmount *fp_mnt = mntget(mnt); 149 struct path fp_path = { .dentry = dentry, .mnt = mnt };
150 struct dentry *fp_dentry = dget(dentry);
151 150
152 if (!autofs4_follow_mount(&fp_mnt, &fp_dentry)) { 151 path_get(&fp_path);
153 dput(fp_dentry); 152
154 mntput(fp_mnt); 153 if (!autofs4_follow_mount(&fp_path.mnt, &fp_path.dentry)) {
154 path_put(&fp_path);
155 dcache_dir_close(inode, file); 155 dcache_dir_close(inode, file);
156 goto out; 156 goto out;
157 } 157 }
158 158
159 fp = dentry_open(fp_dentry, fp_mnt, file->f_flags); 159 fp = dentry_open(fp_path.dentry, fp_path.mnt, file->f_flags);
160 status = PTR_ERR(fp); 160 status = PTR_ERR(fp);
161 if (IS_ERR(fp)) { 161 if (IS_ERR(fp)) {
162 dcache_dir_close(inode, file); 162 dcache_dir_close(inode, file);
@@ -242,7 +242,8 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
242{ 242{
243 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 243 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
244 struct autofs_info *ino = autofs4_dentry_ino(dentry); 244 struct autofs_info *ino = autofs4_dentry_ino(dentry);
245 int status = 0; 245 struct dentry *new;
246 int status;
246 247
247 /* Block on any pending expiry here; invalidate the dentry 248 /* Block on any pending expiry here; invalidate the dentry
248 when expiration is done to trigger mount request with a new 249 when expiration is done to trigger mount request with a new
@@ -318,7 +319,28 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
318 spin_lock(&dentry->d_lock); 319 spin_lock(&dentry->d_lock);
319 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; 320 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
320 spin_unlock(&dentry->d_lock); 321 spin_unlock(&dentry->d_lock);
321 return status; 322
323 /*
324 * The dentry that is passed in from lookup may not be the one
325 * we end up using, as mkdir can create a new one. If this
326 * happens, and another process tries the lookup at the same time,
327 * it will set the PENDING flag on this new dentry, but add itself
328 * to our waitq. Then, if after the lookup succeeds, the first
329 * process that requested the mount performs another lookup of the
330 * same directory, it will show up as still pending! So, we need
331 * to redo the lookup here and clear pending on that dentry.
332 */
333 if (d_unhashed(dentry)) {
334 new = d_lookup(dentry->d_parent, &dentry->d_name);
335 if (new) {
336 spin_lock(&new->d_lock);
337 new->d_flags &= ~DCACHE_AUTOFS_PENDING;
338 spin_unlock(&new->d_lock);
339 dput(new);
340 }
341 }
342
343 return 0;
322} 344}
323 345
324/* For autofs direct mounts the follow link triggers the mount */ 346/* For autofs direct mounts the follow link triggers the mount */
@@ -533,9 +555,9 @@ static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct
533 goto next; 555 goto next;
534 556
535 if (d_unhashed(dentry)) { 557 if (d_unhashed(dentry)) {
536 struct autofs_info *ino = autofs4_dentry_ino(dentry);
537 struct inode *inode = dentry->d_inode; 558 struct inode *inode = dentry->d_inode;
538 559
560 ino = autofs4_dentry_ino(dentry);
539 list_del_init(&ino->rehash); 561 list_del_init(&ino->rehash);
540 dget(dentry); 562 dget(dentry);
541 /* 563 /*
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 1fe28e4754c2..75e5955c3f6d 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -171,7 +171,7 @@ static int autofs4_getpath(struct autofs_sb_info *sbi,
171 for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent) 171 for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent)
172 len += tmp->d_name.len + 1; 172 len += tmp->d_name.len + 1;
173 173
174 if (--len > NAME_MAX) { 174 if (!len || --len > NAME_MAX) {
175 spin_unlock(&dcache_lock); 175 spin_unlock(&dcache_lock);
176 return 0; 176 return 0;
177 } 177 }
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 82123ff3e1dd..e8717de3bab3 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -489,9 +489,9 @@ static void befs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
489{ 489{
490 befs_inode_info *befs_ino = BEFS_I(dentry->d_inode); 490 befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
491 if (befs_ino->i_flags & BEFS_LONG_SYMLINK) { 491 if (befs_ino->i_flags & BEFS_LONG_SYMLINK) {
492 char *p = nd_get_link(nd); 492 char *link = nd_get_link(nd);
493 if (!IS_ERR(p)) 493 if (!IS_ERR(link))
494 kfree(p); 494 kfree(link);
495 } 495 }
496} 496}
497 497
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index 71faf4d23908..70f5d3a8eede 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -42,7 +42,7 @@ static inline struct bfs_inode_info *BFS_I(struct inode *inode)
42 42
43 43
44#define printf(format, args...) \ 44#define printf(format, args...) \
45 printk(KERN_ERR "BFS-fs: %s(): " format, __FUNCTION__, ## args) 45 printk(KERN_ERR "BFS-fs: %s(): " format, __func__, ## args)
46 46
47/* inode.c */ 47/* inode.c */
48extern struct inode *bfs_iget(struct super_block *sb, unsigned long ino); 48extern struct inode *bfs_iget(struct super_block *sb, unsigned long ino);
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index a1bb2244cac7..ba4cddb92f1d 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -372,21 +372,17 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
372 372
373 flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data); 373 flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data);
374 } else { 374 } else {
375 static unsigned long error_time, error_time2;
376 if ((ex.a_text & 0xfff || ex.a_data & 0xfff) && 375 if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
377 (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ) 376 (N_MAGIC(ex) != NMAGIC) && printk_ratelimit())
378 { 377 {
379 printk(KERN_NOTICE "executable not page aligned\n"); 378 printk(KERN_NOTICE "executable not page aligned\n");
380 error_time2 = jiffies;
381 } 379 }
382 380
383 if ((fd_offset & ~PAGE_MASK) != 0 && 381 if ((fd_offset & ~PAGE_MASK) != 0 && printk_ratelimit())
384 (jiffies-error_time) > 5*HZ)
385 { 382 {
386 printk(KERN_WARNING 383 printk(KERN_WARNING
387 "fd_offset is not page aligned. Please convert program: %s\n", 384 "fd_offset is not page aligned. Please convert program: %s\n",
388 bprm->file->f_path.dentry->d_name.name); 385 bprm->file->f_path.dentry->d_name.name);
389 error_time = jiffies;
390 } 386 }
391 387
392 if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) { 388 if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
@@ -495,15 +491,13 @@ static int load_aout_library(struct file *file)
495 start_addr = ex.a_entry & 0xfffff000; 491 start_addr = ex.a_entry & 0xfffff000;
496 492
497 if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) { 493 if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) {
498 static unsigned long error_time;
499 loff_t pos = N_TXTOFF(ex); 494 loff_t pos = N_TXTOFF(ex);
500 495
501 if ((jiffies-error_time) > 5*HZ) 496 if (printk_ratelimit())
502 { 497 {
503 printk(KERN_WARNING 498 printk(KERN_WARNING
504 "N_TXTOFF is not page aligned. Please convert library: %s\n", 499 "N_TXTOFF is not page aligned. Please convert library: %s\n",
505 file->f_path.dentry->d_name.name); 500 file->f_path.dentry->d_name.name);
506 error_time = jiffies;
507 } 501 }
508 down_write(&current->mm->mmap_sem); 502 down_write(&current->mm->mmap_sem);
509 do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); 503 do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 5e1a4fb5cacb..b25707fee2cc 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -543,7 +543,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
543 unsigned long interp_load_addr = 0; 543 unsigned long interp_load_addr = 0;
544 unsigned long start_code, end_code, start_data, end_data; 544 unsigned long start_code, end_code, start_data, end_data;
545 unsigned long reloc_func_desc = 0; 545 unsigned long reloc_func_desc = 0;
546 struct files_struct *files;
547 int executable_stack = EXSTACK_DEFAULT; 546 int executable_stack = EXSTACK_DEFAULT;
548 unsigned long def_flags = 0; 547 unsigned long def_flags = 0;
549 struct { 548 struct {
@@ -593,20 +592,9 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
593 goto out_free_ph; 592 goto out_free_ph;
594 } 593 }
595 594
596 files = current->files; /* Refcounted so ok */
597 retval = unshare_files();
598 if (retval < 0)
599 goto out_free_ph;
600 if (files == current->files) {
601 put_files_struct(files);
602 files = NULL;
603 }
604
605 /* exec will make our files private anyway, but for the a.out
606 loader stuff we need to do it earlier */
607 retval = get_unused_fd(); 595 retval = get_unused_fd();
608 if (retval < 0) 596 if (retval < 0)
609 goto out_free_fh; 597 goto out_free_ph;
610 get_file(bprm->file); 598 get_file(bprm->file);
611 fd_install(elf_exec_fileno = retval, bprm->file); 599 fd_install(elf_exec_fileno = retval, bprm->file);
612 600
@@ -728,12 +716,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
728 if (retval) 716 if (retval)
729 goto out_free_dentry; 717 goto out_free_dentry;
730 718
731 /* Discard our unneeded old files struct */
732 if (files) {
733 put_files_struct(files);
734 files = NULL;
735 }
736
737 /* OK, This is the point of no return */ 719 /* OK, This is the point of no return */
738 current->flags &= ~PF_FORKNOEXEC; 720 current->flags &= ~PF_FORKNOEXEC;
739 current->mm->def_flags = def_flags; 721 current->mm->def_flags = def_flags;
@@ -1016,9 +998,6 @@ out_free_interp:
1016 kfree(elf_interpreter); 998 kfree(elf_interpreter);
1017out_free_file: 999out_free_file:
1018 sys_close(elf_exec_fileno); 1000 sys_close(elf_exec_fileno);
1019out_free_fh:
1020 if (files)
1021 reset_files_struct(current, files);
1022out_free_ph: 1001out_free_ph:
1023 kfree(elf_phdata); 1002 kfree(elf_phdata);
1024 goto out; 1003 goto out;
@@ -1276,26 +1255,23 @@ static int writenote(struct memelfnote *men, struct file *file,
1276static void fill_elf_header(struct elfhdr *elf, int segs, 1255static void fill_elf_header(struct elfhdr *elf, int segs,
1277 u16 machine, u32 flags, u8 osabi) 1256 u16 machine, u32 flags, u8 osabi)
1278{ 1257{
1258 memset(elf, 0, sizeof(*elf));
1259
1279 memcpy(elf->e_ident, ELFMAG, SELFMAG); 1260 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1280 elf->e_ident[EI_CLASS] = ELF_CLASS; 1261 elf->e_ident[EI_CLASS] = ELF_CLASS;
1281 elf->e_ident[EI_DATA] = ELF_DATA; 1262 elf->e_ident[EI_DATA] = ELF_DATA;
1282 elf->e_ident[EI_VERSION] = EV_CURRENT; 1263 elf->e_ident[EI_VERSION] = EV_CURRENT;
1283 elf->e_ident[EI_OSABI] = ELF_OSABI; 1264 elf->e_ident[EI_OSABI] = ELF_OSABI;
1284 memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1285 1265
1286 elf->e_type = ET_CORE; 1266 elf->e_type = ET_CORE;
1287 elf->e_machine = machine; 1267 elf->e_machine = machine;
1288 elf->e_version = EV_CURRENT; 1268 elf->e_version = EV_CURRENT;
1289 elf->e_entry = 0;
1290 elf->e_phoff = sizeof(struct elfhdr); 1269 elf->e_phoff = sizeof(struct elfhdr);
1291 elf->e_shoff = 0;
1292 elf->e_flags = flags; 1270 elf->e_flags = flags;
1293 elf->e_ehsize = sizeof(struct elfhdr); 1271 elf->e_ehsize = sizeof(struct elfhdr);
1294 elf->e_phentsize = sizeof(struct elf_phdr); 1272 elf->e_phentsize = sizeof(struct elf_phdr);
1295 elf->e_phnum = segs; 1273 elf->e_phnum = segs;
1296 elf->e_shentsize = 0; 1274
1297 elf->e_shnum = 0;
1298 elf->e_shstrndx = 0;
1299 return; 1275 return;
1300} 1276}
1301 1277
@@ -1746,26 +1722,25 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1746 1722
1747 info->thread_status_size = 0; 1723 info->thread_status_size = 0;
1748 if (signr) { 1724 if (signr) {
1749 struct elf_thread_status *tmp; 1725 struct elf_thread_status *ets;
1750 rcu_read_lock(); 1726 rcu_read_lock();
1751 do_each_thread(g, p) 1727 do_each_thread(g, p)
1752 if (current->mm == p->mm && current != p) { 1728 if (current->mm == p->mm && current != p) {
1753 tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); 1729 ets = kzalloc(sizeof(*ets), GFP_ATOMIC);
1754 if (!tmp) { 1730 if (!ets) {
1755 rcu_read_unlock(); 1731 rcu_read_unlock();
1756 return 0; 1732 return 0;
1757 } 1733 }
1758 tmp->thread = p; 1734 ets->thread = p;
1759 list_add(&tmp->list, &info->thread_list); 1735 list_add(&ets->list, &info->thread_list);
1760 } 1736 }
1761 while_each_thread(g, p); 1737 while_each_thread(g, p);
1762 rcu_read_unlock(); 1738 rcu_read_unlock();
1763 list_for_each(t, &info->thread_list) { 1739 list_for_each(t, &info->thread_list) {
1764 struct elf_thread_status *tmp;
1765 int sz; 1740 int sz;
1766 1741
1767 tmp = list_entry(t, struct elf_thread_status, list); 1742 ets = list_entry(t, struct elf_thread_status, list);
1768 sz = elf_dump_thread_status(signr, tmp); 1743 sz = elf_dump_thread_status(signr, ets);
1769 info->thread_status_size += sz; 1744 info->thread_status_size += sz;
1770 } 1745 }
1771 } 1746 }
@@ -2021,10 +1996,10 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
2021 1996
2022 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) { 1997 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2023 struct page *page; 1998 struct page *page;
2024 struct vm_area_struct *vma; 1999 struct vm_area_struct *tmp_vma;
2025 2000
2026 if (get_user_pages(current, current->mm, addr, 1, 0, 1, 2001 if (get_user_pages(current, current->mm, addr, 1, 0, 1,
2027 &page, &vma) <= 0) { 2002 &page, &tmp_vma) <= 0) {
2028 DUMP_SEEK(PAGE_SIZE); 2003 DUMP_SEEK(PAGE_SIZE);
2029 } else { 2004 } else {
2030 if (page == ZERO_PAGE(0)) { 2005 if (page == ZERO_PAGE(0)) {
@@ -2034,7 +2009,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
2034 } 2009 }
2035 } else { 2010 } else {
2036 void *kaddr; 2011 void *kaddr;
2037 flush_cache_page(vma, addr, 2012 flush_cache_page(tmp_vma, addr,
2038 page_to_pfn(page)); 2013 page_to_pfn(page));
2039 kaddr = kmap(page); 2014 kaddr = kmap(page);
2040 if ((size += PAGE_SIZE) > limit || 2015 if ((size += PAGE_SIZE) > limit ||
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 32649f2a1654..ddd35d873391 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -136,8 +136,8 @@ static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params,
136 136
137 retval = kernel_read(file, params->hdr.e_phoff, 137 retval = kernel_read(file, params->hdr.e_phoff,
138 (char *) params->phdrs, size); 138 (char *) params->phdrs, size);
139 if (retval < 0) 139 if (unlikely(retval != size))
140 return retval; 140 return retval < 0 ? retval : -ENOEXEC;
141 141
142 /* determine stack size for this binary */ 142 /* determine stack size for this binary */
143 phdr = params->phdrs; 143 phdr = params->phdrs;
@@ -218,8 +218,11 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
218 phdr->p_offset, 218 phdr->p_offset,
219 interpreter_name, 219 interpreter_name,
220 phdr->p_filesz); 220 phdr->p_filesz);
221 if (retval < 0) 221 if (unlikely(retval != phdr->p_filesz)) {
222 if (retval >= 0)
223 retval = -ENOEXEC;
222 goto error; 224 goto error;
225 }
223 226
224 retval = -ENOENT; 227 retval = -ENOENT;
225 if (interpreter_name[phdr->p_filesz - 1] != '\0') 228 if (interpreter_name[phdr->p_filesz - 1] != '\0')
@@ -245,8 +248,11 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
245 248
246 retval = kernel_read(interpreter, 0, bprm->buf, 249 retval = kernel_read(interpreter, 0, bprm->buf,
247 BINPRM_BUF_SIZE); 250 BINPRM_BUF_SIZE);
248 if (retval < 0) 251 if (unlikely(retval != BINPRM_BUF_SIZE)) {
252 if (retval >= 0)
253 retval = -ENOEXEC;
249 goto error; 254 goto error;
255 }
250 256
251 interp_params.hdr = *((struct elfhdr *) bprm->buf); 257 interp_params.hdr = *((struct elfhdr *) bprm->buf);
252 break; 258 break;
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index f95ae9789c91..f9c88d0c8ced 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -43,7 +43,7 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs)
43 return -ENOEXEC; 43 return -ENOEXEC;
44 } 44 }
45 45
46 bprm->sh_bang++; /* Well, the bang-shell is implicit... */ 46 bprm->sh_bang = 1; /* Well, the bang-shell is implicit... */
47 allow_write_access(bprm->file); 47 allow_write_access(bprm->file);
48 fput(bprm->file); 48 fput(bprm->file);
49 bprm->file = NULL; 49 bprm->file = NULL;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 0498b181dd52..3b40d45a3a16 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -531,7 +531,8 @@ static int load_flat_file(struct linux_binprm * bprm,
531 DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n"); 531 DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n");
532 532
533 down_write(&current->mm->mmap_sem); 533 down_write(&current->mm->mmap_sem);
534 textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, MAP_PRIVATE, 0); 534 textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC,
535 MAP_PRIVATE|MAP_EXECUTABLE, 0);
535 up_write(&current->mm->mmap_sem); 536 up_write(&current->mm->mmap_sem);
536 if (!textpos || textpos >= (unsigned long) -4096) { 537 if (!textpos || textpos >= (unsigned long) -4096) {
537 if (!textpos) 538 if (!textpos)
@@ -932,14 +933,8 @@ static int __init init_flat_binfmt(void)
932 return register_binfmt(&flat_format); 933 return register_binfmt(&flat_format);
933} 934}
934 935
935static void __exit exit_flat_binfmt(void)
936{
937 unregister_binfmt(&flat_format);
938}
939
940/****************************************************************************/ 936/****************************************************************************/
941 937
942core_initcall(init_flat_binfmt); 938core_initcall(init_flat_binfmt);
943module_exit(exit_flat_binfmt);
944 939
945/****************************************************************************/ 940/****************************************************************************/
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index b53c7e5f41bb..7191306367c5 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -110,12 +110,17 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
110 char *iname_addr = iname; 110 char *iname_addr = iname;
111 int retval; 111 int retval;
112 int fd_binary = -1; 112 int fd_binary = -1;
113 struct files_struct *files = NULL;
114 113
115 retval = -ENOEXEC; 114 retval = -ENOEXEC;
116 if (!enabled) 115 if (!enabled)
117 goto _ret; 116 goto _ret;
118 117
118 retval = -ENOEXEC;
119 if (bprm->misc_bang)
120 goto _ret;
121
122 bprm->misc_bang = 1;
123
119 /* to keep locking time low, we copy the interpreter string */ 124 /* to keep locking time low, we copy the interpreter string */
120 read_lock(&entries_lock); 125 read_lock(&entries_lock);
121 fmt = check_file(bprm); 126 fmt = check_file(bprm);
@@ -133,21 +138,13 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
133 138
134 if (fmt->flags & MISC_FMT_OPEN_BINARY) { 139 if (fmt->flags & MISC_FMT_OPEN_BINARY) {
135 140
136 files = current->files;
137 retval = unshare_files();
138 if (retval < 0)
139 goto _ret;
140 if (files == current->files) {
141 put_files_struct(files);
142 files = NULL;
143 }
144 /* if the binary should be opened on behalf of the 141 /* if the binary should be opened on behalf of the
145 * interpreter than keep it open and assign descriptor 142 * interpreter than keep it open and assign descriptor
146 * to it */ 143 * to it */
147 fd_binary = get_unused_fd(); 144 fd_binary = get_unused_fd();
148 if (fd_binary < 0) { 145 if (fd_binary < 0) {
149 retval = fd_binary; 146 retval = fd_binary;
150 goto _unshare; 147 goto _ret;
151 } 148 }
152 fd_install(fd_binary, bprm->file); 149 fd_install(fd_binary, bprm->file);
153 150
@@ -205,10 +202,6 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
205 if (retval < 0) 202 if (retval < 0)
206 goto _error; 203 goto _error;
207 204
208 if (files) {
209 put_files_struct(files);
210 files = NULL;
211 }
212_ret: 205_ret:
213 return retval; 206 return retval;
214_error: 207_error:
@@ -216,9 +209,6 @@ _error:
216 sys_close(fd_binary); 209 sys_close(fd_binary);
217 bprm->interp_flags = 0; 210 bprm->interp_flags = 0;
218 bprm->interp_data = 0; 211 bprm->interp_data = 0;
219_unshare:
220 if (files)
221 reset_files_struct(current, files);
222 goto _ret; 212 goto _ret;
223} 213}
224 214
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index ab33939b12a7..9e3963f7ebf1 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -29,7 +29,7 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs)
29 * Sorta complicated, but hopefully it will work. -TYT 29 * Sorta complicated, but hopefully it will work. -TYT
30 */ 30 */
31 31
32 bprm->sh_bang++; 32 bprm->sh_bang = 1;
33 allow_write_access(bprm->file); 33 allow_write_access(bprm->file);
34 fput(bprm->file); 34 fput(bprm->file);
35 bprm->file = NULL; 35 bprm->file = NULL;
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index 14c63527c762..fdc36bfd6a7b 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -194,7 +194,6 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
194 unsigned long som_entry; 194 unsigned long som_entry;
195 struct som_hdr *som_ex; 195 struct som_hdr *som_ex;
196 struct som_exec_auxhdr *hpuxhdr; 196 struct som_exec_auxhdr *hpuxhdr;
197 struct files_struct *files;
198 197
199 /* Get the exec-header */ 198 /* Get the exec-header */
200 som_ex = (struct som_hdr *) bprm->buf; 199 som_ex = (struct som_hdr *) bprm->buf;
@@ -221,15 +220,6 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
221 goto out_free; 220 goto out_free;
222 } 221 }
223 222
224 files = current->files; /* Refcounted so ok */
225 retval = unshare_files();
226 if (retval < 0)
227 goto out_free;
228 if (files == current->files) {
229 put_files_struct(files);
230 files = NULL;
231 }
232
233 retval = get_unused_fd(); 223 retval = get_unused_fd();
234 if (retval < 0) 224 if (retval < 0)
235 goto out_free; 225 goto out_free;
diff --git a/fs/bio.c b/fs/bio.c
index 6e0b6f66df03..799f86deff24 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -937,6 +937,95 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
937 return ERR_PTR(-EINVAL); 937 return ERR_PTR(-EINVAL);
938} 938}
939 939
940static void bio_copy_kern_endio(struct bio *bio, int err)
941{
942 struct bio_vec *bvec;
943 const int read = bio_data_dir(bio) == READ;
944 char *p = bio->bi_private;
945 int i;
946
947 __bio_for_each_segment(bvec, bio, i, 0) {
948 char *addr = page_address(bvec->bv_page);
949
950 if (read && !err)
951 memcpy(p, addr, bvec->bv_len);
952
953 __free_page(bvec->bv_page);
954 p += bvec->bv_len;
955 }
956
957 bio_put(bio);
958}
959
960/**
961 * bio_copy_kern - copy kernel address into bio
962 * @q: the struct request_queue for the bio
963 * @data: pointer to buffer to copy
964 * @len: length in bytes
965 * @gfp_mask: allocation flags for bio and page allocation
966 *
967 * copy the kernel address into a bio suitable for io to a block
968 * device. Returns an error pointer in case of error.
969 */
970struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
971 gfp_t gfp_mask, int reading)
972{
973 unsigned long kaddr = (unsigned long)data;
974 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
975 unsigned long start = kaddr >> PAGE_SHIFT;
976 const int nr_pages = end - start;
977 struct bio *bio;
978 struct bio_vec *bvec;
979 int i, ret;
980
981 bio = bio_alloc(gfp_mask, nr_pages);
982 if (!bio)
983 return ERR_PTR(-ENOMEM);
984
985 while (len) {
986 struct page *page;
987 unsigned int bytes = PAGE_SIZE;
988
989 if (bytes > len)
990 bytes = len;
991
992 page = alloc_page(q->bounce_gfp | gfp_mask);
993 if (!page) {
994 ret = -ENOMEM;
995 goto cleanup;
996 }
997
998 if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) {
999 ret = -EINVAL;
1000 goto cleanup;
1001 }
1002
1003 len -= bytes;
1004 }
1005
1006 if (!reading) {
1007 void *p = data;
1008
1009 bio_for_each_segment(bvec, bio, i) {
1010 char *addr = page_address(bvec->bv_page);
1011
1012 memcpy(addr, p, bvec->bv_len);
1013 p += bvec->bv_len;
1014 }
1015 }
1016
1017 bio->bi_private = data;
1018 bio->bi_end_io = bio_copy_kern_endio;
1019 return bio;
1020cleanup:
1021 bio_for_each_segment(bvec, bio, i)
1022 __free_page(bvec->bv_page);
1023
1024 bio_put(bio);
1025
1026 return ERR_PTR(ret);
1027}
1028
940/* 1029/*
941 * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions 1030 * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
942 * for performing direct-IO in BIOs. 1031 * for performing direct-IO in BIOs.
@@ -1273,6 +1362,7 @@ EXPORT_SYMBOL(bio_get_nr_vecs);
1273EXPORT_SYMBOL(bio_map_user); 1362EXPORT_SYMBOL(bio_map_user);
1274EXPORT_SYMBOL(bio_unmap_user); 1363EXPORT_SYMBOL(bio_unmap_user);
1275EXPORT_SYMBOL(bio_map_kern); 1364EXPORT_SYMBOL(bio_map_kern);
1365EXPORT_SYMBOL(bio_copy_kern);
1276EXPORT_SYMBOL(bio_pair_release); 1366EXPORT_SYMBOL(bio_pair_release);
1277EXPORT_SYMBOL(bio_split); 1367EXPORT_SYMBOL(bio_split);
1278EXPORT_SYMBOL(bio_split_pool); 1368EXPORT_SYMBOL(bio_split_pool);
diff --git a/fs/buffer.c b/fs/buffer.c
index 39ff14403d13..a073f3f4f013 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -360,16 +360,19 @@ void invalidate_bdev(struct block_device *bdev)
360 */ 360 */
361static void free_more_memory(void) 361static void free_more_memory(void)
362{ 362{
363 struct zone **zones; 363 struct zone *zone;
364 pg_data_t *pgdat; 364 int nid;
365 365
366 wakeup_pdflush(1024); 366 wakeup_pdflush(1024);
367 yield(); 367 yield();
368 368
369 for_each_online_pgdat(pgdat) { 369 for_each_online_node(nid) {
370 zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones; 370 (void)first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
371 if (*zones) 371 gfp_zone(GFP_NOFS), NULL,
372 try_to_free_pages(zones, 0, GFP_NOFS); 372 &zone);
373 if (zone)
374 try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
375 GFP_NOFS);
373 } 376 }
374} 377}
375 378
@@ -1098,7 +1101,7 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
1098 1101
1099 printk(KERN_ERR "%s: requested out-of-range block %llu for " 1102 printk(KERN_ERR "%s: requested out-of-range block %llu for "
1100 "device %s\n", 1103 "device %s\n",
1101 __FUNCTION__, (unsigned long long)block, 1104 __func__, (unsigned long long)block,
1102 bdevname(bdev, b)); 1105 bdevname(bdev, b));
1103 return -EIO; 1106 return -EIO;
1104 } 1107 }
@@ -2208,8 +2211,8 @@ out:
2208 return err; 2211 return err;
2209} 2212}
2210 2213
2211int cont_expand_zero(struct file *file, struct address_space *mapping, 2214static int cont_expand_zero(struct file *file, struct address_space *mapping,
2212 loff_t pos, loff_t *bytes) 2215 loff_t pos, loff_t *bytes)
2213{ 2216{
2214 struct inode *inode = mapping->host; 2217 struct inode *inode = mapping->host;
2215 unsigned blocksize = 1 << inode->i_blkbits; 2218 unsigned blocksize = 1 << inode->i_blkbits;
@@ -2243,6 +2246,8 @@ int cont_expand_zero(struct file *file, struct address_space *mapping,
2243 goto out; 2246 goto out;
2244 BUG_ON(err != len); 2247 BUG_ON(err != len);
2245 err = 0; 2248 err = 0;
2249
2250 balance_dirty_pages_ratelimited(mapping);
2246 } 2251 }
2247 2252
2248 /* page covers the boundary, find the boundary offset */ 2253 /* page covers the boundary, find the boundary offset */
@@ -2323,23 +2328,6 @@ int block_commit_write(struct page *page, unsigned from, unsigned to)
2323 return 0; 2328 return 0;
2324} 2329}
2325 2330
2326int generic_commit_write(struct file *file, struct page *page,
2327 unsigned from, unsigned to)
2328{
2329 struct inode *inode = page->mapping->host;
2330 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
2331 __block_commit_write(inode,page,from,to);
2332 /*
2333 * No need to use i_size_read() here, the i_size
2334 * cannot change under us because we hold i_mutex.
2335 */
2336 if (pos > inode->i_size) {
2337 i_size_write(inode, pos);
2338 mark_inode_dirty(inode);
2339 }
2340 return 0;
2341}
2342
2343/* 2331/*
2344 * block_page_mkwrite() is not allowed to change the file size as it gets 2332 * block_page_mkwrite() is not allowed to change the file size as it gets
2345 * called from a page fault handler when a page is first dirtied. Hence we must 2333 * called from a page fault handler when a page is first dirtied. Hence we must
@@ -3180,8 +3168,7 @@ static void recalc_bh_state(void)
3180 3168
3181struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) 3169struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
3182{ 3170{
3183 struct buffer_head *ret = kmem_cache_alloc(bh_cachep, 3171 struct buffer_head *ret = kmem_cache_alloc(bh_cachep, gfp_flags);
3184 set_migrateflags(gfp_flags, __GFP_RECLAIMABLE));
3185 if (ret) { 3172 if (ret) {
3186 INIT_LIST_HEAD(&ret->b_assoc_buffers); 3173 INIT_LIST_HEAD(&ret->b_assoc_buffers);
3187 get_cpu_var(bh_accounting).nr++; 3174 get_cpu_var(bh_accounting).nr++;
@@ -3311,7 +3298,6 @@ EXPORT_SYMBOL(end_buffer_write_sync);
3311EXPORT_SYMBOL(file_fsync); 3298EXPORT_SYMBOL(file_fsync);
3312EXPORT_SYMBOL(fsync_bdev); 3299EXPORT_SYMBOL(fsync_bdev);
3313EXPORT_SYMBOL(generic_block_bmap); 3300EXPORT_SYMBOL(generic_block_bmap);
3314EXPORT_SYMBOL(generic_commit_write);
3315EXPORT_SYMBOL(generic_cont_expand_simple); 3301EXPORT_SYMBOL(generic_cont_expand_simple);
3316EXPORT_SYMBOL(init_buffer); 3302EXPORT_SYMBOL(init_buffer);
3317EXPORT_SYMBOL(invalidate_bdev); 3303EXPORT_SYMBOL(invalidate_bdev);
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 038674aa88a7..68e510b88457 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -55,7 +55,6 @@ static struct char_device_struct {
55 unsigned int baseminor; 55 unsigned int baseminor;
56 int minorct; 56 int minorct;
57 char name[64]; 57 char name[64];
58 struct file_operations *fops;
59 struct cdev *cdev; /* will die */ 58 struct cdev *cdev; /* will die */
60} *chrdevs[CHRDEV_MAJOR_HASH_SIZE]; 59} *chrdevs[CHRDEV_MAJOR_HASH_SIZE];
61 60
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index dbd91461853c..05c9da6181c3 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -8,7 +8,8 @@ of second share to disconnected server session (autoreconnect on this).
8Add ability to modify cifs acls for handling chmod (when mounted with 8Add ability to modify cifs acls for handling chmod (when mounted with
9cifsacl flag). Fix prefixpath path separator so we can handle mounts 9cifsacl flag). Fix prefixpath path separator so we can handle mounts
10with prefixpaths longer than one directory (one path component) when 10with prefixpaths longer than one directory (one path component) when
11mounted to Windows servers. 11mounted to Windows servers. Fix slow file open when cifsacl
12enabled.
12 13
13Version 1.51 14Version 1.51
14------------ 15------------
diff --git a/fs/cifs/README b/fs/cifs/README
index 50306229b0f9..621aa1a85971 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -3,7 +3,14 @@ features such as hierarchical dfs like namespace, hardlinks, locking and more.
3It was designed to comply with the SNIA CIFS Technical Reference (which 3It was designed to comply with the SNIA CIFS Technical Reference (which
4supersedes the 1992 X/Open SMB Standard) as well as to perform best practice 4supersedes the 1992 X/Open SMB Standard) as well as to perform best practice
5practical interoperability with Windows 2000, Windows XP, Samba and equivalent 5practical interoperability with Windows 2000, Windows XP, Samba and equivalent
6servers. 6servers. This code was developed in participation with the Protocol Freedom
7Information Foundation.
8
9Please see
10 http://protocolfreedom.org/ and
11 http://samba.org/samba/PFIF/
12for more details.
13
7 14
8For questions or bug reports please contact: 15For questions or bug reports please contact:
9 sfrench@samba.org (sfrench@us.ibm.com) 16 sfrench@samba.org (sfrench@us.ibm.com)
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 0228ed06069e..cc950f69e51e 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -468,7 +468,7 @@ cifs_proc_init(void)
468{ 468{
469 struct proc_dir_entry *pde; 469 struct proc_dir_entry *pde;
470 470
471 proc_fs_cifs = proc_mkdir("cifs", proc_root_fs); 471 proc_fs_cifs = proc_mkdir("fs/cifs", NULL);
472 if (proc_fs_cifs == NULL) 472 if (proc_fs_cifs == NULL)
473 return; 473 return;
474 474
@@ -559,7 +559,7 @@ cifs_proc_clean(void)
559 remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs); 559 remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs);
560 remove_proc_entry("Experimental", proc_fs_cifs); 560 remove_proc_entry("Experimental", proc_fs_cifs);
561 remove_proc_entry("LookupCacheEnabled", proc_fs_cifs); 561 remove_proc_entry("LookupCacheEnabled", proc_fs_cifs);
562 remove_proc_entry("cifs", proc_root_fs); 562 remove_proc_entry("fs/cifs", NULL);
563} 563}
564 564
565static int 565static int
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 56c924033b78..95024c066d89 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -23,16 +23,28 @@
23#include "dns_resolve.h" 23#include "dns_resolve.h"
24#include "cifs_debug.h" 24#include "cifs_debug.h"
25 25
26LIST_HEAD(cifs_dfs_automount_list); 26static LIST_HEAD(cifs_dfs_automount_list);
27 27
28/* 28static void cifs_dfs_expire_automounts(struct work_struct *work);
29 * DFS functions 29static DECLARE_DELAYED_WORK(cifs_dfs_automount_task,
30*/ 30 cifs_dfs_expire_automounts);
31static int cifs_dfs_mountpoint_expiry_timeout = 500 * HZ;
32
33static void cifs_dfs_expire_automounts(struct work_struct *work)
34{
35 struct list_head *list = &cifs_dfs_automount_list;
36
37 mark_mounts_for_expiry(list);
38 if (!list_empty(list))
39 schedule_delayed_work(&cifs_dfs_automount_task,
40 cifs_dfs_mountpoint_expiry_timeout);
41}
31 42
32void dfs_shrink_umount_helper(struct vfsmount *vfsmnt) 43void cifs_dfs_release_automount_timer(void)
33{ 44{
34 mark_mounts_for_expiry(&cifs_dfs_automount_list); 45 BUG_ON(!list_empty(&cifs_dfs_automount_list));
35 mark_mounts_for_expiry(&cifs_dfs_automount_list); 46 cancel_delayed_work(&cifs_dfs_automount_task);
47 flush_scheduled_work();
36} 48}
37 49
38/** 50/**
@@ -261,10 +273,11 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd,
261 err = do_add_mount(newmnt, nd, nd->path.mnt->mnt_flags, mntlist); 273 err = do_add_mount(newmnt, nd, nd->path.mnt->mnt_flags, mntlist);
262 switch (err) { 274 switch (err) {
263 case 0: 275 case 0:
264 dput(nd->path.dentry); 276 path_put(&nd->path);
265 mntput(nd->path.mnt);
266 nd->path.mnt = newmnt; 277 nd->path.mnt = newmnt;
267 nd->path.dentry = dget(newmnt->mnt_root); 278 nd->path.dentry = dget(newmnt->mnt_root);
279 schedule_delayed_work(&cifs_dfs_automount_task,
280 cifs_dfs_mountpoint_expiry_timeout);
268 break; 281 break;
269 case -EBUSY: 282 case -EBUSY:
270 /* someone else made a mount here whilst we were busy */ 283 /* someone else made a mount here whilst we were busy */
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 1cb5b0a9f2ac..e99d4faf5f02 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -516,7 +516,7 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
516 516
517/* Convert permission bits from mode to equivalent CIFS ACL */ 517/* Convert permission bits from mode to equivalent CIFS ACL */
518static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, 518static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
519 int acl_len, struct inode *inode, __u64 nmode) 519 struct inode *inode, __u64 nmode)
520{ 520{
521 int rc = 0; 521 int rc = 0;
522 __u32 dacloffset; 522 __u32 dacloffset;
@@ -692,14 +692,14 @@ void acl_to_uid_mode(struct inode *inode, const char *path, const __u16 *pfid)
692int mode_to_acl(struct inode *inode, const char *path, __u64 nmode) 692int mode_to_acl(struct inode *inode, const char *path, __u64 nmode)
693{ 693{
694 int rc = 0; 694 int rc = 0;
695 __u32 acllen = 0; 695 __u32 secdesclen = 0;
696 struct cifs_ntsd *pntsd = NULL; /* acl obtained from server */ 696 struct cifs_ntsd *pntsd = NULL; /* acl obtained from server */
697 struct cifs_ntsd *pnntsd = NULL; /* modified acl to be sent to server */ 697 struct cifs_ntsd *pnntsd = NULL; /* modified acl to be sent to server */
698 698
699 cFYI(DBG2, ("set ACL from mode for %s", path)); 699 cFYI(DBG2, ("set ACL from mode for %s", path));
700 700
701 /* Get the security descriptor */ 701 /* Get the security descriptor */
702 pntsd = get_cifs_acl(&acllen, inode, path, NULL); 702 pntsd = get_cifs_acl(&secdesclen, inode, path, NULL);
703 703
704 /* Add three ACEs for owner, group, everyone getting rid of 704 /* Add three ACEs for owner, group, everyone getting rid of
705 other ACEs as chmod disables ACEs and set the security descriptor */ 705 other ACEs as chmod disables ACEs and set the security descriptor */
@@ -709,20 +709,22 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode)
709 set security descriptor request security descriptor 709 set security descriptor request security descriptor
710 parameters, and secuirty descriptor itself */ 710 parameters, and secuirty descriptor itself */
711 711
712 pnntsd = kmalloc(acllen, GFP_KERNEL); 712 secdesclen = secdesclen < DEFSECDESCLEN ?
713 DEFSECDESCLEN : secdesclen;
714 pnntsd = kmalloc(secdesclen, GFP_KERNEL);
713 if (!pnntsd) { 715 if (!pnntsd) {
714 cERROR(1, ("Unable to allocate security descriptor")); 716 cERROR(1, ("Unable to allocate security descriptor"));
715 kfree(pntsd); 717 kfree(pntsd);
716 return (-ENOMEM); 718 return (-ENOMEM);
717 } 719 }
718 720
719 rc = build_sec_desc(pntsd, pnntsd, acllen, inode, nmode); 721 rc = build_sec_desc(pntsd, pnntsd, inode, nmode);
720 722
721 cFYI(DBG2, ("build_sec_desc rc: %d", rc)); 723 cFYI(DBG2, ("build_sec_desc rc: %d", rc));
722 724
723 if (!rc) { 725 if (!rc) {
724 /* Set the security descriptor */ 726 /* Set the security descriptor */
725 rc = set_cifs_acl(pnntsd, acllen, inode, path); 727 rc = set_cifs_acl(pnntsd, secdesclen, inode, path);
726 cFYI(DBG2, ("set_cifs_acl rc: %d", rc)); 728 cFYI(DBG2, ("set_cifs_acl rc: %d", rc));
727 } 729 }
728 730
diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h
index 93a7c3462ea2..6c8096cf5155 100644
--- a/fs/cifs/cifsacl.h
+++ b/fs/cifs/cifsacl.h
@@ -27,6 +27,7 @@
27#define NUM_SUBAUTHS 5 /* number of sub authority fields */ 27#define NUM_SUBAUTHS 5 /* number of sub authority fields */
28#define NUM_WK_SIDS 7 /* number of well known sids */ 28#define NUM_WK_SIDS 7 /* number of well known sids */
29#define SIDNAMELENGTH 20 /* long enough for the ones we care about */ 29#define SIDNAMELENGTH 20 /* long enough for the ones we care about */
30#define DEFSECDESCLEN 192 /* sec desc len contaiting a dacl with three aces */
30 31
31#define READ_BIT 0x4 32#define READ_BIT 0x4
32#define WRITE_BIT 0x2 33#define WRITE_BIT 0x2
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index a04b17e5a9d0..39c2cbdface7 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -466,16 +466,11 @@ static struct quotactl_ops cifs_quotactl_ops = {
466}; 466};
467#endif 467#endif
468 468
469static void cifs_umount_begin(struct vfsmount *vfsmnt, int flags) 469static void cifs_umount_begin(struct super_block *sb)
470{ 470{
471 struct cifs_sb_info *cifs_sb; 471 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
472 struct cifsTconInfo *tcon; 472 struct cifsTconInfo *tcon;
473 473
474 dfs_shrink_umount_helper(vfsmnt);
475
476 if (!(flags & MNT_FORCE))
477 return;
478 cifs_sb = CIFS_SB(vfsmnt->mnt_sb);
479 if (cifs_sb == NULL) 474 if (cifs_sb == NULL)
480 return; 475 return;
481 476
@@ -1100,6 +1095,7 @@ exit_cifs(void)
1100 cFYI(DBG2, ("exit_cifs")); 1095 cFYI(DBG2, ("exit_cifs"));
1101 cifs_proc_clean(); 1096 cifs_proc_clean();
1102#ifdef CONFIG_CIFS_DFS_UPCALL 1097#ifdef CONFIG_CIFS_DFS_UPCALL
1098 cifs_dfs_release_automount_timer();
1103 unregister_key_type(&key_type_dns_resolver); 1099 unregister_key_type(&key_type_dns_resolver);
1104#endif 1100#endif
1105#ifdef CONFIG_CIFS_UPCALL 1101#ifdef CONFIG_CIFS_UPCALL
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 68978306c3ca..e1dd9f32e1d7 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -62,11 +62,9 @@ extern int cifs_setattr(struct dentry *, struct iattr *);
62 62
63extern const struct inode_operations cifs_file_inode_ops; 63extern const struct inode_operations cifs_file_inode_ops;
64extern const struct inode_operations cifs_symlink_inode_ops; 64extern const struct inode_operations cifs_symlink_inode_ops;
65extern struct list_head cifs_dfs_automount_list;
66extern struct inode_operations cifs_dfs_referral_inode_operations; 65extern struct inode_operations cifs_dfs_referral_inode_operations;
67 66
68 67
69
70/* Functions related to files and directories */ 68/* Functions related to files and directories */
71extern const struct file_operations cifs_file_ops; 69extern const struct file_operations cifs_file_ops;
72extern const struct file_operations cifs_file_direct_ops; /* if directio mnt */ 70extern const struct file_operations cifs_file_direct_ops; /* if directio mnt */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 47f79504f57b..9f49c2f3582c 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/cifspdu.h 2 * fs/cifs/cifspdu.h
3 * 3 *
4 * Copyright (c) International Business Machines Corp., 2002,2007 4 * Copyright (c) International Business Machines Corp., 2002,2008
5 * Author(s): Steve French (sfrench@us.ibm.com) 5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 6 *
7 * This library is free software; you can redistribute it and/or modify 7 * This library is free software; you can redistribute it and/or modify
@@ -163,7 +163,10 @@
163 path names in response */ 163 path names in response */
164#define SMBFLG2_KNOWS_EAS cpu_to_le16(2) 164#define SMBFLG2_KNOWS_EAS cpu_to_le16(2)
165#define SMBFLG2_SECURITY_SIGNATURE cpu_to_le16(4) 165#define SMBFLG2_SECURITY_SIGNATURE cpu_to_le16(4)
166#define SMBFLG2_COMPRESSED (8)
167#define SMBFLG2_SECURITY_SIGNATURE_REQUIRED (0x10)
166#define SMBFLG2_IS_LONG_NAME cpu_to_le16(0x40) 168#define SMBFLG2_IS_LONG_NAME cpu_to_le16(0x40)
169#define SMBFLG2_REPARSE_PATH (0x400)
167#define SMBFLG2_EXT_SEC cpu_to_le16(0x800) 170#define SMBFLG2_EXT_SEC cpu_to_le16(0x800)
168#define SMBFLG2_DFS cpu_to_le16(0x1000) 171#define SMBFLG2_DFS cpu_to_le16(0x1000)
169#define SMBFLG2_PAGING_IO cpu_to_le16(0x2000) 172#define SMBFLG2_PAGING_IO cpu_to_le16(0x2000)
@@ -305,7 +308,7 @@
305#define FILE_SHARE_DELETE 0x00000004 308#define FILE_SHARE_DELETE 0x00000004
306#define FILE_SHARE_ALL 0x00000007 309#define FILE_SHARE_ALL 0x00000007
307 310
308/* CreateDisposition flags */ 311/* CreateDisposition flags, similar to CreateAction as well */
309#define FILE_SUPERSEDE 0x00000000 312#define FILE_SUPERSEDE 0x00000000
310#define FILE_OPEN 0x00000001 313#define FILE_OPEN 0x00000001
311#define FILE_CREATE 0x00000002 314#define FILE_CREATE 0x00000002
@@ -317,15 +320,25 @@
317#define CREATE_NOT_FILE 0x00000001 /* if set must not be file */ 320#define CREATE_NOT_FILE 0x00000001 /* if set must not be file */
318#define CREATE_WRITE_THROUGH 0x00000002 321#define CREATE_WRITE_THROUGH 0x00000002
319#define CREATE_SEQUENTIAL 0x00000004 322#define CREATE_SEQUENTIAL 0x00000004
320#define CREATE_SYNC_ALERT 0x00000010 323#define CREATE_NO_BUFFER 0x00000008 /* should not buffer on srv */
321#define CREATE_ASYNC_ALERT 0x00000020 324#define CREATE_SYNC_ALERT 0x00000010 /* MBZ */
325#define CREATE_ASYNC_ALERT 0x00000020 /* MBZ */
322#define CREATE_NOT_DIR 0x00000040 /* if set must not be directory */ 326#define CREATE_NOT_DIR 0x00000040 /* if set must not be directory */
327#define CREATE_TREE_CONNECTION 0x00000080 /* should be zero */
328#define CREATE_COMPLETE_IF_OPLK 0x00000100 /* should be zero */
323#define CREATE_NO_EA_KNOWLEDGE 0x00000200 329#define CREATE_NO_EA_KNOWLEDGE 0x00000200
324#define CREATE_EIGHT_DOT_THREE 0x00000400 330#define CREATE_EIGHT_DOT_THREE 0x00000400 /* doc says this is obsolete
331 open for recovery flag - should
332 be zero */
325#define CREATE_RANDOM_ACCESS 0x00000800 333#define CREATE_RANDOM_ACCESS 0x00000800
326#define CREATE_DELETE_ON_CLOSE 0x00001000 334#define CREATE_DELETE_ON_CLOSE 0x00001000
327#define CREATE_OPEN_BY_ID 0x00002000 335#define CREATE_OPEN_BY_ID 0x00002000
336#define CREATE_OPEN_BACKUP_INTN 0x00004000
337#define CREATE_NO_COMPRESSION 0x00008000
338#define CREATE_RESERVE_OPFILTER 0x00100000 /* should be zero */
328#define OPEN_REPARSE_POINT 0x00200000 339#define OPEN_REPARSE_POINT 0x00200000
340#define OPEN_NO_RECALL 0x00400000
341#define OPEN_FREE_SPACE_QUERY 0x00800000 /* should be zero */
329#define CREATE_OPTIONS_MASK 0x007FFFFF 342#define CREATE_OPTIONS_MASK 0x007FFFFF
330#define CREATE_OPTION_SPECIAL 0x20000000 /* system. NB not sent over wire */ 343#define CREATE_OPTION_SPECIAL 0x20000000 /* system. NB not sent over wire */
331 344
@@ -470,7 +483,7 @@ typedef struct lanman_neg_rsp {
470 483
471typedef struct negotiate_rsp { 484typedef struct negotiate_rsp {
472 struct smb_hdr hdr; /* wct = 17 */ 485 struct smb_hdr hdr; /* wct = 17 */
473 __le16 DialectIndex; 486 __le16 DialectIndex; /* 0xFFFF = no dialect acceptable */
474 __u8 SecurityMode; 487 __u8 SecurityMode;
475 __le16 MaxMpxCount; 488 __le16 MaxMpxCount;
476 __le16 MaxNumberVcs; 489 __le16 MaxNumberVcs;
@@ -516,10 +529,11 @@ typedef struct negotiate_rsp {
516#define CAP_INFOLEVEL_PASSTHRU 0x00002000 529#define CAP_INFOLEVEL_PASSTHRU 0x00002000
517#define CAP_LARGE_READ_X 0x00004000 530#define CAP_LARGE_READ_X 0x00004000
518#define CAP_LARGE_WRITE_X 0x00008000 531#define CAP_LARGE_WRITE_X 0x00008000
532#define CAP_LWIO 0x00010000 /* support fctl_srv_req_resume_key */
519#define CAP_UNIX 0x00800000 533#define CAP_UNIX 0x00800000
520#define CAP_RESERVED 0x02000000 534#define CAP_COMPRESSED_DATA 0x02000000
521#define CAP_BULK_TRANSFER 0x20000000 535#define CAP_DYNAMIC_REAUTH 0x20000000
522#define CAP_COMPRESSED_DATA 0x40000000 536#define CAP_PERSISTENT_HANDLES 0x40000000
523#define CAP_EXTENDED_SECURITY 0x80000000 537#define CAP_EXTENDED_SECURITY 0x80000000
524 538
525typedef union smb_com_session_setup_andx { 539typedef union smb_com_session_setup_andx {
@@ -668,9 +682,7 @@ typedef struct smb_com_tconx_req {
668} __attribute__((packed)) TCONX_REQ; 682} __attribute__((packed)) TCONX_REQ;
669 683
670typedef struct smb_com_tconx_rsp { 684typedef struct smb_com_tconx_rsp {
671 struct smb_hdr hdr; /* wct = 3 note that Win2000 has sent wct = 7 685 struct smb_hdr hdr; /* wct = 3 , not extended response */
672 in some cases on responses. Four unspecified
673 words followed OptionalSupport */
674 __u8 AndXCommand; 686 __u8 AndXCommand;
675 __u8 AndXReserved; 687 __u8 AndXReserved;
676 __le16 AndXOffset; 688 __le16 AndXOffset;
@@ -680,13 +692,48 @@ typedef struct smb_com_tconx_rsp {
680 /* STRING NativeFileSystem */ 692 /* STRING NativeFileSystem */
681} __attribute__((packed)) TCONX_RSP; 693} __attribute__((packed)) TCONX_RSP;
682 694
695typedef struct smb_com_tconx_rsp_ext {
696 struct smb_hdr hdr; /* wct = 7, extended response */
697 __u8 AndXCommand;
698 __u8 AndXReserved;
699 __le16 AndXOffset;
700 __le16 OptionalSupport; /* see below */
701 __le32 MaximalShareAccessRights;
702 __le32 GuestMaximalShareAccessRights;
703 __u16 ByteCount;
704 unsigned char Service[1]; /* always ASCII, not Unicode */
705 /* STRING NativeFileSystem */
706} __attribute__((packed)) TCONX_RSP_EXT;
707
708
683/* tree connect Flags */ 709/* tree connect Flags */
684#define DISCONNECT_TID 0x0001 710#define DISCONNECT_TID 0x0001
711#define TCON_EXTENDED_SIGNATURES 0x0004
685#define TCON_EXTENDED_SECINFO 0x0008 712#define TCON_EXTENDED_SECINFO 0x0008
713
686/* OptionalSupport bits */ 714/* OptionalSupport bits */
687#define SMB_SUPPORT_SEARCH_BITS 0x0001 /* "must have" directory search bits 715#define SMB_SUPPORT_SEARCH_BITS 0x0001 /* "must have" directory search bits
688 (exclusive searches supported) */ 716 (exclusive searches supported) */
689#define SMB_SHARE_IS_IN_DFS 0x0002 717#define SMB_SHARE_IS_IN_DFS 0x0002
718#define SMB_CSC_MASK 0x000C
719/* CSC flags defined as follows */
720#define SMB_CSC_CACHE_MANUAL_REINT 0x0000
721#define SMB_CSC_CACHE_AUTO_REINT 0x0004
722#define SMB_CSC_CACHE_VDO 0x0008
723#define SMB_CSC_NO_CACHING 0x000C
724
725#define SMB_UNIQUE_FILE_NAME 0x0010
726#define SMB_EXTENDED_SIGNATURES 0x0020
727
728/* services
729 *
730 * A: ie disk
731 * LPT1: ie printer
732 * IPC ie named pipe
733 * COMM
734 * ????? ie any type
735 *
736 */
690 737
691typedef struct smb_com_logoff_andx_req { 738typedef struct smb_com_logoff_andx_req {
692 struct smb_hdr hdr; /* wct = 2 */ 739 struct smb_hdr hdr; /* wct = 2 */
@@ -750,6 +797,17 @@ typedef struct smb_com_findclose_req {
750#define COMM_DEV_TYPE 0x0004 797#define COMM_DEV_TYPE 0x0004
751#define UNKNOWN_TYPE 0xFFFF 798#define UNKNOWN_TYPE 0xFFFF
752 799
800/* Device Type or File Status Flags */
801#define NO_EAS 0x0001
802#define NO_SUBSTREAMS 0x0002
803#define NO_REPARSETAG 0x0004
804/* following flags can apply if pipe */
805#define ICOUNT_MASK 0x00FF
806#define PIPE_READ_MODE 0x0100
807#define NAMED_PIPE_TYPE 0x0400
808#define PIPE_END_POINT 0x0800
809#define BLOCKING_NAMED_PIPE 0x8000
810
753typedef struct smb_com_open_req { /* also handles create */ 811typedef struct smb_com_open_req { /* also handles create */
754 struct smb_hdr hdr; /* wct = 24 */ 812 struct smb_hdr hdr; /* wct = 24 */
755 __u8 AndXCommand; 813 __u8 AndXCommand;
@@ -758,7 +816,7 @@ typedef struct smb_com_open_req { /* also handles create */
758 __u8 Reserved; /* Must Be Zero */ 816 __u8 Reserved; /* Must Be Zero */
759 __le16 NameLength; 817 __le16 NameLength;
760 __le32 OpenFlags; 818 __le32 OpenFlags;
761 __le32 RootDirectoryFid; 819 __u32 RootDirectoryFid;
762 __le32 DesiredAccess; 820 __le32 DesiredAccess;
763 __le64 AllocationSize; 821 __le64 AllocationSize;
764 __le32 FileAttributes; 822 __le32 FileAttributes;
@@ -801,6 +859,32 @@ typedef struct smb_com_open_rsp {
801 __u16 ByteCount; /* bct = 0 */ 859 __u16 ByteCount; /* bct = 0 */
802} __attribute__((packed)) OPEN_RSP; 860} __attribute__((packed)) OPEN_RSP;
803 861
862typedef struct smb_com_open_rsp_ext {
863 struct smb_hdr hdr; /* wct = 42 but meaningless due to MS bug? */
864 __u8 AndXCommand;
865 __u8 AndXReserved;
866 __le16 AndXOffset;
867 __u8 OplockLevel;
868 __u16 Fid;
869 __le32 CreateAction;
870 __le64 CreationTime;
871 __le64 LastAccessTime;
872 __le64 LastWriteTime;
873 __le64 ChangeTime;
874 __le32 FileAttributes;
875 __le64 AllocationSize;
876 __le64 EndOfFile;
877 __le16 FileType;
878 __le16 DeviceState;
879 __u8 DirectoryFlag;
880 __u8 VolumeGUID[16];
881 __u64 FileId; /* note no endian conversion - is opaque UniqueID */
882 __le32 MaximalAccessRights;
883 __le32 GuestMaximalAccessRights;
884 __u16 ByteCount; /* bct = 0 */
885} __attribute__((packed)) OPEN_RSP_EXT;
886
887
804/* format of legacy open request */ 888/* format of legacy open request */
805typedef struct smb_com_openx_req { 889typedef struct smb_com_openx_req {
806 struct smb_hdr hdr; /* wct = 15 */ 890 struct smb_hdr hdr; /* wct = 15 */
@@ -1703,6 +1787,12 @@ typedef struct smb_com_transaction2_fnext_rsp_parms {
1703#define SMB_QUERY_CIFS_UNIX_INFO 0x200 1787#define SMB_QUERY_CIFS_UNIX_INFO 0x200
1704#define SMB_QUERY_POSIX_FS_INFO 0x201 1788#define SMB_QUERY_POSIX_FS_INFO 0x201
1705#define SMB_QUERY_POSIX_WHO_AM_I 0x202 1789#define SMB_QUERY_POSIX_WHO_AM_I 0x202
1790#define SMB_REQUEST_TRANSPORT_ENCRYPTION 0x203
1791#define SMB_QUERY_FS_PROXY 0x204 /* WAFS enabled. Returns structure
1792 FILE_SYSTEM__UNIX_INFO to tell
1793 whether new NTIOCTL available
1794 (0xACE) for WAN friendly SMB
1795 operations to be carried */
1706#define SMB_QUERY_LABEL_INFO 0x3ea 1796#define SMB_QUERY_LABEL_INFO 0x3ea
1707#define SMB_QUERY_FS_QUOTA_INFO 0x3ee 1797#define SMB_QUERY_FS_QUOTA_INFO 0x3ee
1708#define SMB_QUERY_FS_FULL_SIZE_INFO 0x3ef 1798#define SMB_QUERY_FS_FULL_SIZE_INFO 0x3ef
@@ -1959,7 +2049,10 @@ typedef struct {
1959#define CIFS_UNIX_LARGE_READ_CAP 0x00000040 /* support reads >128K (up 2049#define CIFS_UNIX_LARGE_READ_CAP 0x00000040 /* support reads >128K (up
1960 to 0xFFFF00 */ 2050 to 0xFFFF00 */
1961#define CIFS_UNIX_LARGE_WRITE_CAP 0x00000080 2051#define CIFS_UNIX_LARGE_WRITE_CAP 0x00000080
1962 2052#define CIFS_UNIX_TRANSPORT_ENCRYPTION_CAP 0x00000100 /* can do SPNEGO crypt */
2053#define CIFS_UNIX_TRANPSORT_ENCRYPTION_MANDATORY_CAP 0x00000200 /* must do */
2054#define CIFS_UNIX_PROXY_CAP 0x00000400 /* Proxy cap: 0xACE ioctl and
2055 QFS PROXY call */
1963#ifdef CONFIG_CIFS_POSIX 2056#ifdef CONFIG_CIFS_POSIX
1964/* Can not set pathnames cap yet until we send new posix create SMB since 2057/* Can not set pathnames cap yet until we send new posix create SMB since
1965 otherwise server can treat such handles opened with older ntcreatex 2058 otherwise server can treat such handles opened with older ntcreatex
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 7e5e0e78cd72..50f9fdae19b3 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -84,6 +84,7 @@ extern __u16 GetNextMid(struct TCP_Server_Info *server);
84extern struct oplock_q_entry *AllocOplockQEntry(struct inode *, u16, 84extern struct oplock_q_entry *AllocOplockQEntry(struct inode *, u16,
85 struct cifsTconInfo *); 85 struct cifsTconInfo *);
86extern void DeleteOplockQEntry(struct oplock_q_entry *); 86extern void DeleteOplockQEntry(struct oplock_q_entry *);
87extern void DeleteTconOplockQEntries(struct cifsTconInfo *);
87extern struct timespec cifs_NTtimeToUnix(u64 utc_nanoseconds_since_1601); 88extern struct timespec cifs_NTtimeToUnix(u64 utc_nanoseconds_since_1601);
88extern u64 cifs_UnixTimeToNT(struct timespec); 89extern u64 cifs_UnixTimeToNT(struct timespec);
89extern __le64 cnvrtDosCifsTm(__u16 date, __u16 time); 90extern __le64 cnvrtDosCifsTm(__u16 date, __u16 time);
@@ -103,13 +104,7 @@ extern int mode_to_acl(struct inode *inode, const char *path, __u64);
103extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *, 104extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *,
104 const char *); 105 const char *);
105extern int cifs_umount(struct super_block *, struct cifs_sb_info *); 106extern int cifs_umount(struct super_block *, struct cifs_sb_info *);
106#ifdef CONFIG_CIFS_DFS_UPCALL 107extern void cifs_dfs_release_automount_timer(void);
107extern void dfs_shrink_umount_helper(struct vfsmount *vfsmnt);
108#else
109static inline void dfs_shrink_umount_helper(struct vfsmount *vfsmnt)
110{
111}
112#endif /* DFS_UPCALL */
113void cifs_proc_init(void); 108void cifs_proc_init(void);
114void cifs_proc_clean(void); 109void cifs_proc_clean(void);
115 110
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 30bbe448e260..4728fa982a4e 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -165,17 +165,19 @@ small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
165 rc = CIFSTCon(0, tcon->ses, tcon->treeName, 165 rc = CIFSTCon(0, tcon->ses, tcon->treeName,
166 tcon, nls_codepage); 166 tcon, nls_codepage);
167 up(&tcon->ses->sesSem); 167 up(&tcon->ses->sesSem);
168 /* tell server which Unix caps we support */
169 if (tcon->ses->capabilities & CAP_UNIX)
170 reset_cifs_unix_caps(0 /* no xid */,
171 tcon,
172 NULL /* we do not know sb */,
173 NULL /* no vol info */);
174 /* BB FIXME add code to check if wsize needs 168 /* BB FIXME add code to check if wsize needs
175 update due to negotiated smb buffer size 169 update due to negotiated smb buffer size
176 shrinking */ 170 shrinking */
177 if (rc == 0) 171 if (rc == 0) {
178 atomic_inc(&tconInfoReconnectCount); 172 atomic_inc(&tconInfoReconnectCount);
173 /* tell server Unix caps we support */
174 if (tcon->ses->capabilities & CAP_UNIX)
175 reset_cifs_unix_caps(
176 0 /* no xid */,
177 tcon,
178 NULL /* we do not know sb */,
179 NULL /* no vol info */);
180 }
179 181
180 cFYI(1, ("reconnect tcon rc = %d", rc)); 182 cFYI(1, ("reconnect tcon rc = %d", rc));
181 /* Removed call to reopen open files here. 183 /* Removed call to reopen open files here.
@@ -310,17 +312,19 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
310 rc = CIFSTCon(0, tcon->ses, tcon->treeName, 312 rc = CIFSTCon(0, tcon->ses, tcon->treeName,
311 tcon, nls_codepage); 313 tcon, nls_codepage);
312 up(&tcon->ses->sesSem); 314 up(&tcon->ses->sesSem);
313 /* tell server which Unix caps we support */
314 if (tcon->ses->capabilities & CAP_UNIX)
315 reset_cifs_unix_caps(0 /* no xid */,
316 tcon,
317 NULL /* do not know sb */,
318 NULL /* no vol info */);
319 /* BB FIXME add code to check if wsize needs 315 /* BB FIXME add code to check if wsize needs
320 update due to negotiated smb buffer size 316 update due to negotiated smb buffer size
321 shrinking */ 317 shrinking */
322 if (rc == 0) 318 if (rc == 0) {
323 atomic_inc(&tconInfoReconnectCount); 319 atomic_inc(&tconInfoReconnectCount);
320 /* tell server Unix caps we support */
321 if (tcon->ses->capabilities & CAP_UNIX)
322 reset_cifs_unix_caps(
323 0 /* no xid */,
324 tcon,
325 NULL /* do not know sb */,
326 NULL /* no vol info */);
327 }
324 328
325 cFYI(1, ("reconnect tcon rc = %d", rc)); 329 cFYI(1, ("reconnect tcon rc = %d", rc));
326 /* Removed call to reopen open files here. 330 /* Removed call to reopen open files here.
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 8dbfa97cd18c..e17106730168 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3527,6 +3527,7 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
3527 FreeXid(xid); 3527 FreeXid(xid);
3528 return 0; 3528 return 0;
3529 } 3529 }
3530 DeleteTconOplockQEntries(cifs_sb->tcon);
3530 tconInfoFree(cifs_sb->tcon); 3531 tconInfoFree(cifs_sb->tcon);
3531 if ((ses) && (ses->server)) { 3532 if ((ses) && (ses->server)) {
3532 /* save off task so we do not refer to ses later */ 3533 /* save off task so we do not refer to ses later */
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index bc673c8c1e6b..e1031b9e2c55 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -161,12 +161,14 @@ static void cifs_unix_info_to_inode(struct inode *inode,
161 spin_unlock(&inode->i_lock); 161 spin_unlock(&inode->i_lock);
162} 162}
163 163
164static const unsigned char *cifs_get_search_path(struct cifsTconInfo *pTcon, 164static const unsigned char *cifs_get_search_path(struct cifs_sb_info *cifs_sb,
165 const char *search_path) 165 const char *search_path)
166{ 166{
167 int tree_len; 167 int tree_len;
168 int path_len; 168 int path_len;
169 int i;
169 char *tmp_path; 170 char *tmp_path;
171 struct cifsTconInfo *pTcon = cifs_sb->tcon;
170 172
171 if (!(pTcon->Flags & SMB_SHARE_IS_IN_DFS)) 173 if (!(pTcon->Flags & SMB_SHARE_IS_IN_DFS))
172 return search_path; 174 return search_path;
@@ -180,6 +182,11 @@ static const unsigned char *cifs_get_search_path(struct cifsTconInfo *pTcon,
180 return search_path; 182 return search_path;
181 183
182 strncpy(tmp_path, pTcon->treeName, tree_len); 184 strncpy(tmp_path, pTcon->treeName, tree_len);
185 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
186 for (i = 0; i < tree_len; i++) {
187 if (tmp_path[i] == '\\')
188 tmp_path[i] = '/';
189 }
183 strncpy(tmp_path+tree_len, search_path, path_len); 190 strncpy(tmp_path+tree_len, search_path, path_len);
184 tmp_path[tree_len+path_len] = 0; 191 tmp_path[tree_len+path_len] = 0;
185 return tmp_path; 192 return tmp_path;
@@ -199,7 +206,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
199 pTcon = cifs_sb->tcon; 206 pTcon = cifs_sb->tcon;
200 cFYI(1, ("Getting info on %s", search_path)); 207 cFYI(1, ("Getting info on %s", search_path));
201 208
202 full_path = cifs_get_search_path(pTcon, search_path); 209 full_path = cifs_get_search_path(cifs_sb, search_path);
203 210
204try_again_CIFSSMBUnixQPathInfo: 211try_again_CIFSSMBUnixQPathInfo:
205 /* could have done a find first instead but this returns more info */ 212 /* could have done a find first instead but this returns more info */
@@ -402,7 +409,7 @@ int cifs_get_inode_info(struct inode **pinode,
402 return -ENOMEM; 409 return -ENOMEM;
403 pfindData = (FILE_ALL_INFO *)buf; 410 pfindData = (FILE_ALL_INFO *)buf;
404 411
405 full_path = cifs_get_search_path(pTcon, search_path); 412 full_path = cifs_get_search_path(cifs_sb, search_path);
406 413
407try_again_CIFSSMBQPathInfo: 414try_again_CIFSSMBQPathInfo:
408 /* could do find first instead but this returns more info */ 415 /* could do find first instead but this returns more info */
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 3612d6c0a0bb..000ac509c98a 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -142,6 +142,24 @@ void DeleteOplockQEntry(struct oplock_q_entry *oplockEntry)
142 kmem_cache_free(cifs_oplock_cachep, oplockEntry); 142 kmem_cache_free(cifs_oplock_cachep, oplockEntry);
143} 143}
144 144
145
146void DeleteTconOplockQEntries(struct cifsTconInfo *tcon)
147{
148 struct oplock_q_entry *temp;
149
150 if (tcon == NULL)
151 return;
152
153 spin_lock(&GlobalMid_Lock);
154 list_for_each_entry(temp, &GlobalOplock_Q, qhead) {
155 if ((temp->tcon) && (temp->tcon == tcon)) {
156 list_del(&temp->qhead);
157 kmem_cache_free(cifs_oplock_cachep, temp);
158 }
159 }
160 spin_unlock(&GlobalMid_Lock);
161}
162
145int 163int
146smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer, 164smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer,
147 unsigned int smb_buf_length, struct sockaddr *sin) 165 unsigned int smb_buf_length, struct sockaddr *sin)
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index 95a54253c047..e1c854890f94 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -134,7 +134,7 @@ void coda_iattr_to_vattr(struct iattr *iattr, struct coda_vattr *vattr)
134 unsigned int valid; 134 unsigned int valid;
135 135
136 /* clean out */ 136 /* clean out */
137 vattr->va_mode = (umode_t) -1; 137 vattr->va_mode = -1;
138 vattr->va_uid = (vuid_t) -1; 138 vattr->va_uid = (vuid_t) -1;
139 vattr->va_gid = (vgid_t) -1; 139 vattr->va_gid = (vgid_t) -1;
140 vattr->va_size = (off_t) -1; 140 vattr->va_size = (off_t) -1;
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index f89ff083079b..3d2580e00a3e 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -345,7 +345,7 @@ static int coda_symlink(struct inode *dir_inode, struct dentry *de,
345} 345}
346 346
347/* destruction routines: unlink, rmdir */ 347/* destruction routines: unlink, rmdir */
348int coda_unlink(struct inode *dir, struct dentry *de) 348static int coda_unlink(struct inode *dir, struct dentry *de)
349{ 349{
350 int error; 350 int error;
351 const char *name = de->d_name.name; 351 const char *name = de->d_name.name;
@@ -365,7 +365,7 @@ int coda_unlink(struct inode *dir, struct dentry *de)
365 return 0; 365 return 0;
366} 366}
367 367
368int coda_rmdir(struct inode *dir, struct dentry *de) 368static int coda_rmdir(struct inode *dir, struct dentry *de)
369{ 369{
370 const char *name = de->d_name.name; 370 const char *name = de->d_name.name;
371 int len = de->d_name.len; 371 int len = de->d_name.len;
@@ -424,7 +424,7 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry,
424 424
425 425
426/* file operations for directories */ 426/* file operations for directories */
427int coda_readdir(struct file *coda_file, void *buf, filldir_t filldir) 427static int coda_readdir(struct file *coda_file, void *buf, filldir_t filldir)
428{ 428{
429 struct coda_file_info *cfi; 429 struct coda_file_info *cfi;
430 struct file *host_file; 430 struct file *host_file;
diff --git a/fs/compat.c b/fs/compat.c
index 2ce4456aad30..332a869d2c53 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -24,6 +24,7 @@
24#include <linux/fcntl.h> 24#include <linux/fcntl.h>
25#include <linux/namei.h> 25#include <linux/namei.h>
26#include <linux/file.h> 26#include <linux/file.h>
27#include <linux/fdtable.h>
27#include <linux/vfs.h> 28#include <linux/vfs.h>
28#include <linux/ioctl.h> 29#include <linux/ioctl.h>
29#include <linux/init.h> 30#include <linux/init.h>
@@ -1634,7 +1635,7 @@ sticky:
1634 return ret; 1635 return ret;
1635} 1636}
1636 1637
1637#ifdef TIF_RESTORE_SIGMASK 1638#ifdef HAVE_SET_RESTORE_SIGMASK
1638asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp, 1639asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
1639 compat_ulong_t __user *outp, compat_ulong_t __user *exp, 1640 compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1640 struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask, 1641 struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask,
@@ -1720,7 +1721,7 @@ sticky:
1720 if (sigmask) { 1721 if (sigmask) {
1721 memcpy(&current->saved_sigmask, &sigsaved, 1722 memcpy(&current->saved_sigmask, &sigsaved,
1722 sizeof(sigsaved)); 1723 sizeof(sigsaved));
1723 set_thread_flag(TIF_RESTORE_SIGMASK); 1724 set_restore_sigmask();
1724 } 1725 }
1725 } else if (sigmask) 1726 } else if (sigmask)
1726 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1727 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
@@ -1791,7 +1792,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1791 if (sigmask) { 1792 if (sigmask) {
1792 memcpy(&current->saved_sigmask, &sigsaved, 1793 memcpy(&current->saved_sigmask, &sigsaved,
1793 sizeof(sigsaved)); 1794 sizeof(sigsaved));
1794 set_thread_flag(TIF_RESTORE_SIGMASK); 1795 set_restore_sigmask();
1795 } 1796 }
1796 ret = -ERESTARTNOHAND; 1797 ret = -ERESTARTNOHAND;
1797 } else if (sigmask) 1798 } else if (sigmask)
@@ -1825,7 +1826,7 @@ sticky:
1825 1826
1826 return ret; 1827 return ret;
1827} 1828}
1828#endif /* TIF_RESTORE_SIGMASK */ 1829#endif /* HAVE_SET_RESTORE_SIGMASK */
1829 1830
1830#if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE) 1831#if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)
1831/* Stuff for NFS server syscalls... */ 1832/* Stuff for NFS server syscalls... */
@@ -2080,7 +2081,7 @@ long asmlinkage compat_sys_nfsservctl(int cmd, void *notused, void *notused2)
2080 2081
2081#ifdef CONFIG_EPOLL 2082#ifdef CONFIG_EPOLL
2082 2083
2083#ifdef TIF_RESTORE_SIGMASK 2084#ifdef HAVE_SET_RESTORE_SIGMASK
2084asmlinkage long compat_sys_epoll_pwait(int epfd, 2085asmlinkage long compat_sys_epoll_pwait(int epfd,
2085 struct compat_epoll_event __user *events, 2086 struct compat_epoll_event __user *events,
2086 int maxevents, int timeout, 2087 int maxevents, int timeout,
@@ -2117,14 +2118,14 @@ asmlinkage long compat_sys_epoll_pwait(int epfd,
2117 if (err == -EINTR) { 2118 if (err == -EINTR) {
2118 memcpy(&current->saved_sigmask, &sigsaved, 2119 memcpy(&current->saved_sigmask, &sigsaved,
2119 sizeof(sigsaved)); 2120 sizeof(sigsaved));
2120 set_thread_flag(TIF_RESTORE_SIGMASK); 2121 set_restore_sigmask();
2121 } else 2122 } else
2122 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 2123 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2123 } 2124 }
2124 2125
2125 return err; 2126 return err;
2126} 2127}
2127#endif /* TIF_RESTORE_SIGMASK */ 2128#endif /* HAVE_SET_RESTORE_SIGMASK */
2128 2129
2129#endif /* CONFIG_EPOLL */ 2130#endif /* CONFIG_EPOLL */
2130 2131
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index c6e72aebd16b..97dba0d92348 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1046,14 +1046,14 @@ static int vt_check(struct file *file)
1046 struct inode *inode = file->f_path.dentry->d_inode; 1046 struct inode *inode = file->f_path.dentry->d_inode;
1047 struct vc_data *vc; 1047 struct vc_data *vc;
1048 1048
1049 if (file->f_op->ioctl != tty_ioctl) 1049 if (file->f_op->unlocked_ioctl != tty_ioctl)
1050 return -EINVAL; 1050 return -EINVAL;
1051 1051
1052 tty = (struct tty_struct *)file->private_data; 1052 tty = (struct tty_struct *)file->private_data;
1053 if (tty_paranoia_check(tty, inode, "tty_ioctl")) 1053 if (tty_paranoia_check(tty, inode, "tty_ioctl"))
1054 return -EINVAL; 1054 return -EINVAL;
1055 1055
1056 if (tty->driver->ioctl != vt_ioctl) 1056 if (tty->ops->ioctl != vt_ioctl)
1057 return -EINVAL; 1057 return -EINVAL;
1058 1058
1059 vc = (struct vc_data *)tty->driver_data; 1059 vc = (struct vc_data *)tty->driver_data;
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 397cb503a180..2b6cb23dd14e 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -115,7 +115,7 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp
115 goto out; 115 goto out;
116 } 116 }
117 pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", 117 pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n",
118 __FUNCTION__, count, *ppos, buffer->page); 118 __func__, count, *ppos, buffer->page);
119 retval = simple_read_from_buffer(buf, count, ppos, buffer->page, 119 retval = simple_read_from_buffer(buf, count, ppos, buffer->page,
120 buffer->count); 120 buffer->count);
121out: 121out:
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 4c1ebff778ee..b9a1d810346d 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -47,7 +47,7 @@ static const struct address_space_operations configfs_aops = {
47 47
48static struct backing_dev_info configfs_backing_dev_info = { 48static struct backing_dev_info configfs_backing_dev_info = {
49 .ra_pages = 0, /* No readahead */ 49 .ra_pages = 0, /* No readahead */
50 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 50 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
51}; 51};
52 52
53static const struct inode_operations configfs_inode_operations ={ 53static const struct inode_operations configfs_inode_operations ={
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index de3b31d0a37d..8421cea7d8c7 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -92,7 +92,7 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent)
92 92
93 root = d_alloc_root(inode); 93 root = d_alloc_root(inode);
94 if (!root) { 94 if (!root) {
95 pr_debug("%s: could not get root dentry!\n",__FUNCTION__); 95 pr_debug("%s: could not get root dentry!\n",__func__);
96 iput(inode); 96 iput(inode);
97 return -ENOMEM; 97 return -ENOMEM;
98 } 98 }
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 78929ea84ff2..2a731ef5f305 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -210,13 +210,13 @@ static int configfs_get_target_path(struct config_item * item, struct config_ite
210 if (size > PATH_MAX) 210 if (size > PATH_MAX)
211 return -ENAMETOOLONG; 211 return -ENAMETOOLONG;
212 212
213 pr_debug("%s: depth = %d, size = %d\n", __FUNCTION__, depth, size); 213 pr_debug("%s: depth = %d, size = %d\n", __func__, depth, size);
214 214
215 for (s = path; depth--; s += 3) 215 for (s = path; depth--; s += 3)
216 strcpy(s,"../"); 216 strcpy(s,"../");
217 217
218 fill_item_path(target, path, size); 218 fill_item_path(target, path, size);
219 pr_debug("%s: path = '%s'\n", __FUNCTION__, path); 219 pr_debug("%s: path = '%s'\n", __func__, path);
220 220
221 return 0; 221 return 0;
222} 222}
diff --git a/fs/dcache.c b/fs/dcache.c
index 43455776711e..3ee588d5f585 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1746,12 +1746,21 @@ shouldnt_be_hashed:
1746 goto shouldnt_be_hashed; 1746 goto shouldnt_be_hashed;
1747} 1747}
1748 1748
1749static int prepend(char **buffer, int *buflen, const char *str,
1750 int namelen)
1751{
1752 *buflen -= namelen;
1753 if (*buflen < 0)
1754 return -ENAMETOOLONG;
1755 *buffer -= namelen;
1756 memcpy(*buffer, str, namelen);
1757 return 0;
1758}
1759
1749/** 1760/**
1750 * d_path - return the path of a dentry 1761 * d_path - return the path of a dentry
1751 * @dentry: dentry to report 1762 * @path: the dentry/vfsmount to report
1752 * @vfsmnt: vfsmnt to which the dentry belongs 1763 * @root: root vfsmnt/dentry (may be modified by this function)
1753 * @root: root dentry
1754 * @rootmnt: vfsmnt to which the root dentry belongs
1755 * @buffer: buffer to return value in 1764 * @buffer: buffer to return value in
1756 * @buflen: buffer length 1765 * @buflen: buffer length
1757 * 1766 *
@@ -1761,23 +1770,22 @@ shouldnt_be_hashed:
1761 * Returns the buffer or an error code if the path was too long. 1770 * Returns the buffer or an error code if the path was too long.
1762 * 1771 *
1763 * "buflen" should be positive. Caller holds the dcache_lock. 1772 * "buflen" should be positive. Caller holds the dcache_lock.
1773 *
1774 * If path is not reachable from the supplied root, then the value of
1775 * root is changed (without modifying refcounts).
1764 */ 1776 */
1765static char *__d_path(struct dentry *dentry, struct vfsmount *vfsmnt, 1777char *__d_path(const struct path *path, struct path *root,
1766 struct path *root, char *buffer, int buflen) 1778 char *buffer, int buflen)
1767{ 1779{
1780 struct dentry *dentry = path->dentry;
1781 struct vfsmount *vfsmnt = path->mnt;
1768 char * end = buffer+buflen; 1782 char * end = buffer+buflen;
1769 char * retval; 1783 char * retval;
1770 int namelen; 1784
1771 1785 prepend(&end, &buflen, "\0", 1);
1772 *--end = '\0'; 1786 if (!IS_ROOT(dentry) && d_unhashed(dentry) &&
1773 buflen--; 1787 (prepend(&end, &buflen, " (deleted)", 10) != 0))
1774 if (!IS_ROOT(dentry) && d_unhashed(dentry)) {
1775 buflen -= 10;
1776 end -= 10;
1777 if (buflen < 0)
1778 goto Elong; 1788 goto Elong;
1779 memcpy(end, " (deleted)", 10);
1780 }
1781 1789
1782 if (buflen < 1) 1790 if (buflen < 1)
1783 goto Elong; 1791 goto Elong;
@@ -1804,13 +1812,10 @@ static char *__d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
1804 } 1812 }
1805 parent = dentry->d_parent; 1813 parent = dentry->d_parent;
1806 prefetch(parent); 1814 prefetch(parent);
1807 namelen = dentry->d_name.len; 1815 if ((prepend(&end, &buflen, dentry->d_name.name,
1808 buflen -= namelen + 1; 1816 dentry->d_name.len) != 0) ||
1809 if (buflen < 0) 1817 (prepend(&end, &buflen, "/", 1) != 0))
1810 goto Elong; 1818 goto Elong;
1811 end -= namelen;
1812 memcpy(end, dentry->d_name.name, namelen);
1813 *--end = '/';
1814 retval = end; 1819 retval = end;
1815 dentry = parent; 1820 dentry = parent;
1816 } 1821 }
@@ -1818,12 +1823,12 @@ static char *__d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
1818 return retval; 1823 return retval;
1819 1824
1820global_root: 1825global_root:
1821 namelen = dentry->d_name.len; 1826 retval += 1; /* hit the slash */
1822 buflen -= namelen; 1827 if (prepend(&retval, &buflen, dentry->d_name.name,
1823 if (buflen < 0) 1828 dentry->d_name.len) != 0)
1824 goto Elong; 1829 goto Elong;
1825 retval -= namelen-1; /* hit the slash */ 1830 root->mnt = vfsmnt;
1826 memcpy(retval, dentry->d_name.name, namelen); 1831 root->dentry = dentry;
1827 return retval; 1832 return retval;
1828Elong: 1833Elong:
1829 return ERR_PTR(-ENAMETOOLONG); 1834 return ERR_PTR(-ENAMETOOLONG);
@@ -1846,6 +1851,7 @@ char *d_path(struct path *path, char *buf, int buflen)
1846{ 1851{
1847 char *res; 1852 char *res;
1848 struct path root; 1853 struct path root;
1854 struct path tmp;
1849 1855
1850 /* 1856 /*
1851 * We have various synthetic filesystems that never get mounted. On 1857 * We have various synthetic filesystems that never get mounted. On
@@ -1859,10 +1865,11 @@ char *d_path(struct path *path, char *buf, int buflen)
1859 1865
1860 read_lock(&current->fs->lock); 1866 read_lock(&current->fs->lock);
1861 root = current->fs->root; 1867 root = current->fs->root;
1862 path_get(&current->fs->root); 1868 path_get(&root);
1863 read_unlock(&current->fs->lock); 1869 read_unlock(&current->fs->lock);
1864 spin_lock(&dcache_lock); 1870 spin_lock(&dcache_lock);
1865 res = __d_path(path->dentry, path->mnt, &root, buf, buflen); 1871 tmp = root;
1872 res = __d_path(path, &tmp, buf, buflen);
1866 spin_unlock(&dcache_lock); 1873 spin_unlock(&dcache_lock);
1867 path_put(&root); 1874 path_put(&root);
1868 return res; 1875 return res;
@@ -1890,6 +1897,48 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
1890} 1897}
1891 1898
1892/* 1899/*
1900 * Write full pathname from the root of the filesystem into the buffer.
1901 */
1902char *dentry_path(struct dentry *dentry, char *buf, int buflen)
1903{
1904 char *end = buf + buflen;
1905 char *retval;
1906
1907 spin_lock(&dcache_lock);
1908 prepend(&end, &buflen, "\0", 1);
1909 if (!IS_ROOT(dentry) && d_unhashed(dentry) &&
1910 (prepend(&end, &buflen, "//deleted", 9) != 0))
1911 goto Elong;
1912 if (buflen < 1)
1913 goto Elong;
1914 /* Get '/' right */
1915 retval = end-1;
1916 *retval = '/';
1917
1918 for (;;) {
1919 struct dentry *parent;
1920 if (IS_ROOT(dentry))
1921 break;
1922
1923 parent = dentry->d_parent;
1924 prefetch(parent);
1925
1926 if ((prepend(&end, &buflen, dentry->d_name.name,
1927 dentry->d_name.len) != 0) ||
1928 (prepend(&end, &buflen, "/", 1) != 0))
1929 goto Elong;
1930
1931 retval = end;
1932 dentry = parent;
1933 }
1934 spin_unlock(&dcache_lock);
1935 return retval;
1936Elong:
1937 spin_unlock(&dcache_lock);
1938 return ERR_PTR(-ENAMETOOLONG);
1939}
1940
1941/*
1893 * NOTE! The user-level library version returns a 1942 * NOTE! The user-level library version returns a
1894 * character pointer. The kernel system call just 1943 * character pointer. The kernel system call just
1895 * returns the length of the buffer filled (which 1944 * returns the length of the buffer filled (which
@@ -1918,9 +1967,9 @@ asmlinkage long sys_getcwd(char __user *buf, unsigned long size)
1918 1967
1919 read_lock(&current->fs->lock); 1968 read_lock(&current->fs->lock);
1920 pwd = current->fs->pwd; 1969 pwd = current->fs->pwd;
1921 path_get(&current->fs->pwd); 1970 path_get(&pwd);
1922 root = current->fs->root; 1971 root = current->fs->root;
1923 path_get(&current->fs->root); 1972 path_get(&root);
1924 read_unlock(&current->fs->lock); 1973 read_unlock(&current->fs->lock);
1925 1974
1926 error = -ENOENT; 1975 error = -ENOENT;
@@ -1928,9 +1977,10 @@ asmlinkage long sys_getcwd(char __user *buf, unsigned long size)
1928 spin_lock(&dcache_lock); 1977 spin_lock(&dcache_lock);
1929 if (pwd.dentry->d_parent == pwd.dentry || !d_unhashed(pwd.dentry)) { 1978 if (pwd.dentry->d_parent == pwd.dentry || !d_unhashed(pwd.dentry)) {
1930 unsigned long len; 1979 unsigned long len;
1980 struct path tmp = root;
1931 char * cwd; 1981 char * cwd;
1932 1982
1933 cwd = __d_path(pwd.dentry, pwd.mnt, &root, page, PAGE_SIZE); 1983 cwd = __d_path(&pwd, &tmp, page, PAGE_SIZE);
1934 spin_unlock(&dcache_lock); 1984 spin_unlock(&dcache_lock);
1935 1985
1936 error = PTR_ERR(cwd); 1986 error = PTR_ERR(cwd);
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index fddffe4851f5..159a5efd6a8a 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -9,7 +9,7 @@
9 * 2 as published by the Free Software Foundation. 9 * 2 as published by the Free Software Foundation.
10 * 10 *
11 * debugfs is for people to use instead of /proc or /sys. 11 * debugfs is for people to use instead of /proc or /sys.
12 * See Documentation/DocBook/kernel-api for more details. 12 * See Documentation/DocBook/filesystems for more details.
13 * 13 *
14 */ 14 */
15 15
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index f120e1207874..285b64a8b06e 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -17,6 +17,8 @@
17#include <linux/namei.h> 17#include <linux/namei.h>
18#include <linux/mount.h> 18#include <linux/mount.h>
19#include <linux/tty.h> 19#include <linux/tty.h>
20#include <linux/mutex.h>
21#include <linux/idr.h>
20#include <linux/devpts_fs.h> 22#include <linux/devpts_fs.h>
21#include <linux/parser.h> 23#include <linux/parser.h>
22#include <linux/fsnotify.h> 24#include <linux/fsnotify.h>
@@ -26,6 +28,10 @@
26 28
27#define DEVPTS_DEFAULT_MODE 0600 29#define DEVPTS_DEFAULT_MODE 0600
28 30
31extern int pty_limit; /* Config limit on Unix98 ptys */
32static DEFINE_IDR(allocated_ptys);
33static DEFINE_MUTEX(allocated_ptys_lock);
34
29static struct vfsmount *devpts_mnt; 35static struct vfsmount *devpts_mnt;
30static struct dentry *devpts_root; 36static struct dentry *devpts_root;
31 37
@@ -171,9 +177,44 @@ static struct dentry *get_node(int num)
171 return lookup_one_len(s, root, sprintf(s, "%d", num)); 177 return lookup_one_len(s, root, sprintf(s, "%d", num));
172} 178}
173 179
180int devpts_new_index(void)
181{
182 int index;
183 int idr_ret;
184
185retry:
186 if (!idr_pre_get(&allocated_ptys, GFP_KERNEL)) {
187 return -ENOMEM;
188 }
189
190 mutex_lock(&allocated_ptys_lock);
191 idr_ret = idr_get_new(&allocated_ptys, NULL, &index);
192 if (idr_ret < 0) {
193 mutex_unlock(&allocated_ptys_lock);
194 if (idr_ret == -EAGAIN)
195 goto retry;
196 return -EIO;
197 }
198
199 if (index >= pty_limit) {
200 idr_remove(&allocated_ptys, index);
201 mutex_unlock(&allocated_ptys_lock);
202 return -EIO;
203 }
204 mutex_unlock(&allocated_ptys_lock);
205 return index;
206}
207
208void devpts_kill_index(int idx)
209{
210 mutex_lock(&allocated_ptys_lock);
211 idr_remove(&allocated_ptys, idx);
212 mutex_unlock(&allocated_ptys_lock);
213}
214
174int devpts_pty_new(struct tty_struct *tty) 215int devpts_pty_new(struct tty_struct *tty)
175{ 216{
176 int number = tty->index; 217 int number = tty->index; /* tty layer puts index from devpts_new_index() in here */
177 struct tty_driver *driver = tty->driver; 218 struct tty_driver *driver = tty->driver;
178 dev_t device = MKDEV(driver->major, driver->minor_start+number); 219 dev_t device = MKDEV(driver->major, driver->minor_start+number);
179 struct dentry *dentry; 220 struct dentry *dentry;
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index b64e55e0515d..499e16759e96 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -200,7 +200,7 @@ int __init dlm_lockspace_init(void)
200 200
201 dlm_kset = kset_create_and_add("dlm", NULL, kernel_kobj); 201 dlm_kset = kset_create_and_add("dlm", NULL, kernel_kobj);
202 if (!dlm_kset) { 202 if (!dlm_kset) {
203 printk(KERN_WARNING "%s: can not create kset\n", __FUNCTION__); 203 printk(KERN_WARNING "%s: can not create kset\n", __func__);
204 return -ENOMEM; 204 return -ENOMEM;
205 } 205 }
206 return 0; 206 return 0;
diff --git a/fs/dnotify.c b/fs/dnotify.c
index 28d01ed66de0..676073b8dda5 100644
--- a/fs/dnotify.c
+++ b/fs/dnotify.c
@@ -20,6 +20,7 @@
20#include <linux/init.h> 20#include <linux/init.h>
21#include <linux/spinlock.h> 21#include <linux/spinlock.h>
22#include <linux/slab.h> 22#include <linux/slab.h>
23#include <linux/fdtable.h>
23 24
24int dir_notify_enable __read_mostly = 1; 25int dir_notify_enable __read_mostly = 1;
25 26
@@ -66,6 +67,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
66 struct dnotify_struct **prev; 67 struct dnotify_struct **prev;
67 struct inode *inode; 68 struct inode *inode;
68 fl_owner_t id = current->files; 69 fl_owner_t id = current->files;
70 struct file *f;
69 int error = 0; 71 int error = 0;
70 72
71 if ((arg & ~DN_MULTISHOT) == 0) { 73 if ((arg & ~DN_MULTISHOT) == 0) {
@@ -92,6 +94,15 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
92 prev = &odn->dn_next; 94 prev = &odn->dn_next;
93 } 95 }
94 96
97 rcu_read_lock();
98 f = fcheck(fd);
99 rcu_read_unlock();
100 /* we'd lost the race with close(), sod off silently */
101 /* note that inode->i_lock prevents reordering problems
102 * between accesses to descriptor table and ->i_dnotify */
103 if (f != filp)
104 goto out_free;
105
95 error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); 106 error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
96 if (error) 107 if (error)
97 goto out_free; 108 goto out_free;
diff --git a/fs/dquot.c b/fs/dquot.c
index 41b9dbd68b0e..dfba1623cccb 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -289,7 +289,15 @@ static void wait_on_dquot(struct dquot *dquot)
289 mutex_unlock(&dquot->dq_lock); 289 mutex_unlock(&dquot->dq_lock);
290} 290}
291 291
292#define mark_dquot_dirty(dquot) ((dquot)->dq_sb->dq_op->mark_dirty(dquot)) 292static inline int dquot_dirty(struct dquot *dquot)
293{
294 return test_bit(DQ_MOD_B, &dquot->dq_flags);
295}
296
297static inline int mark_dquot_dirty(struct dquot *dquot)
298{
299 return dquot->dq_sb->dq_op->mark_dirty(dquot);
300}
293 301
294int dquot_mark_dquot_dirty(struct dquot *dquot) 302int dquot_mark_dquot_dirty(struct dquot *dquot)
295{ 303{
@@ -1441,31 +1449,43 @@ static inline void set_enable_flags(struct quota_info *dqopt, int type)
1441 switch (type) { 1449 switch (type) {
1442 case USRQUOTA: 1450 case USRQUOTA:
1443 dqopt->flags |= DQUOT_USR_ENABLED; 1451 dqopt->flags |= DQUOT_USR_ENABLED;
1452 dqopt->flags &= ~DQUOT_USR_SUSPENDED;
1444 break; 1453 break;
1445 case GRPQUOTA: 1454 case GRPQUOTA:
1446 dqopt->flags |= DQUOT_GRP_ENABLED; 1455 dqopt->flags |= DQUOT_GRP_ENABLED;
1456 dqopt->flags &= ~DQUOT_GRP_SUSPENDED;
1447 break; 1457 break;
1448 } 1458 }
1449} 1459}
1450 1460
1451static inline void reset_enable_flags(struct quota_info *dqopt, int type) 1461static inline void reset_enable_flags(struct quota_info *dqopt, int type,
1462 int remount)
1452{ 1463{
1453 switch (type) { 1464 switch (type) {
1454 case USRQUOTA: 1465 case USRQUOTA:
1455 dqopt->flags &= ~DQUOT_USR_ENABLED; 1466 dqopt->flags &= ~DQUOT_USR_ENABLED;
1467 if (remount)
1468 dqopt->flags |= DQUOT_USR_SUSPENDED;
1469 else
1470 dqopt->flags &= ~DQUOT_USR_SUSPENDED;
1456 break; 1471 break;
1457 case GRPQUOTA: 1472 case GRPQUOTA:
1458 dqopt->flags &= ~DQUOT_GRP_ENABLED; 1473 dqopt->flags &= ~DQUOT_GRP_ENABLED;
1474 if (remount)
1475 dqopt->flags |= DQUOT_GRP_SUSPENDED;
1476 else
1477 dqopt->flags &= ~DQUOT_GRP_SUSPENDED;
1459 break; 1478 break;
1460 } 1479 }
1461} 1480}
1462 1481
1482
1463/* 1483/*
1464 * Turn quota off on a device. type == -1 ==> quotaoff for all types (umount) 1484 * Turn quota off on a device. type == -1 ==> quotaoff for all types (umount)
1465 */ 1485 */
1466int vfs_quota_off(struct super_block *sb, int type) 1486int vfs_quota_off(struct super_block *sb, int type, int remount)
1467{ 1487{
1468 int cnt; 1488 int cnt, ret = 0;
1469 struct quota_info *dqopt = sb_dqopt(sb); 1489 struct quota_info *dqopt = sb_dqopt(sb);
1470 struct inode *toputinode[MAXQUOTAS]; 1490 struct inode *toputinode[MAXQUOTAS];
1471 1491
@@ -1475,9 +1495,17 @@ int vfs_quota_off(struct super_block *sb, int type)
1475 toputinode[cnt] = NULL; 1495 toputinode[cnt] = NULL;
1476 if (type != -1 && cnt != type) 1496 if (type != -1 && cnt != type)
1477 continue; 1497 continue;
1498 /* If we keep inodes of quota files after remount and quotaoff
1499 * is called, drop kept inodes. */
1500 if (!remount && sb_has_quota_suspended(sb, cnt)) {
1501 iput(dqopt->files[cnt]);
1502 dqopt->files[cnt] = NULL;
1503 reset_enable_flags(dqopt, cnt, 0);
1504 continue;
1505 }
1478 if (!sb_has_quota_enabled(sb, cnt)) 1506 if (!sb_has_quota_enabled(sb, cnt))
1479 continue; 1507 continue;
1480 reset_enable_flags(dqopt, cnt); 1508 reset_enable_flags(dqopt, cnt, remount);
1481 1509
1482 /* Note: these are blocking operations */ 1510 /* Note: these are blocking operations */
1483 drop_dquot_ref(sb, cnt); 1511 drop_dquot_ref(sb, cnt);
@@ -1493,7 +1521,8 @@ int vfs_quota_off(struct super_block *sb, int type)
1493 put_quota_format(dqopt->info[cnt].dqi_format); 1521 put_quota_format(dqopt->info[cnt].dqi_format);
1494 1522
1495 toputinode[cnt] = dqopt->files[cnt]; 1523 toputinode[cnt] = dqopt->files[cnt];
1496 dqopt->files[cnt] = NULL; 1524 if (!remount)
1525 dqopt->files[cnt] = NULL;
1497 dqopt->info[cnt].dqi_flags = 0; 1526 dqopt->info[cnt].dqi_flags = 0;
1498 dqopt->info[cnt].dqi_igrace = 0; 1527 dqopt->info[cnt].dqi_igrace = 0;
1499 dqopt->info[cnt].dqi_bgrace = 0; 1528 dqopt->info[cnt].dqi_bgrace = 0;
@@ -1523,12 +1552,19 @@ int vfs_quota_off(struct super_block *sb, int type)
1523 mutex_unlock(&toputinode[cnt]->i_mutex); 1552 mutex_unlock(&toputinode[cnt]->i_mutex);
1524 mark_inode_dirty(toputinode[cnt]); 1553 mark_inode_dirty(toputinode[cnt]);
1525 } 1554 }
1526 iput(toputinode[cnt]);
1527 mutex_unlock(&dqopt->dqonoff_mutex); 1555 mutex_unlock(&dqopt->dqonoff_mutex);
1556 /* On remount RO, we keep the inode pointer so that we
1557 * can reenable quota on the subsequent remount RW.
1558 * But we have better not keep inode pointer when there
1559 * is pending delete on the quota file... */
1560 if (!remount)
1561 iput(toputinode[cnt]);
1562 else if (!toputinode[cnt]->i_nlink)
1563 ret = -EBUSY;
1528 } 1564 }
1529 if (sb->s_bdev) 1565 if (sb->s_bdev)
1530 invalidate_bdev(sb->s_bdev); 1566 invalidate_bdev(sb->s_bdev);
1531 return 0; 1567 return ret;
1532} 1568}
1533 1569
1534/* 1570/*
@@ -1566,7 +1602,8 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
1566 invalidate_bdev(sb->s_bdev); 1602 invalidate_bdev(sb->s_bdev);
1567 mutex_lock(&inode->i_mutex); 1603 mutex_lock(&inode->i_mutex);
1568 mutex_lock(&dqopt->dqonoff_mutex); 1604 mutex_lock(&dqopt->dqonoff_mutex);
1569 if (sb_has_quota_enabled(sb, type)) { 1605 if (sb_has_quota_enabled(sb, type) ||
1606 sb_has_quota_suspended(sb, type)) {
1570 error = -EBUSY; 1607 error = -EBUSY;
1571 goto out_lock; 1608 goto out_lock;
1572 } 1609 }
@@ -1589,6 +1626,7 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
1589 1626
1590 dqopt->ops[type] = fmt->qf_ops; 1627 dqopt->ops[type] = fmt->qf_ops;
1591 dqopt->info[type].dqi_format = fmt; 1628 dqopt->info[type].dqi_format = fmt;
1629 dqopt->info[type].dqi_fmt_id = format_id;
1592 INIT_LIST_HEAD(&dqopt->info[type].dqi_dirty_list); 1630 INIT_LIST_HEAD(&dqopt->info[type].dqi_dirty_list);
1593 mutex_lock(&dqopt->dqio_mutex); 1631 mutex_lock(&dqopt->dqio_mutex);
1594 if ((error = dqopt->ops[type]->read_file_info(sb, type)) < 0) { 1632 if ((error = dqopt->ops[type]->read_file_info(sb, type)) < 0) {
@@ -1624,12 +1662,41 @@ out_fmt:
1624 return error; 1662 return error;
1625} 1663}
1626 1664
1665/* Reenable quotas on remount RW */
1666static int vfs_quota_on_remount(struct super_block *sb, int type)
1667{
1668 struct quota_info *dqopt = sb_dqopt(sb);
1669 struct inode *inode;
1670 int ret;
1671
1672 mutex_lock(&dqopt->dqonoff_mutex);
1673 if (!sb_has_quota_suspended(sb, type)) {
1674 mutex_unlock(&dqopt->dqonoff_mutex);
1675 return 0;
1676 }
1677 BUG_ON(sb_has_quota_enabled(sb, type));
1678
1679 inode = dqopt->files[type];
1680 dqopt->files[type] = NULL;
1681 reset_enable_flags(dqopt, type, 0);
1682 mutex_unlock(&dqopt->dqonoff_mutex);
1683
1684 ret = vfs_quota_on_inode(inode, type, dqopt->info[type].dqi_fmt_id);
1685 iput(inode);
1686
1687 return ret;
1688}
1689
1627/* Actual function called from quotactl() */ 1690/* Actual function called from quotactl() */
1628int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path) 1691int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path,
1692 int remount)
1629{ 1693{
1630 struct nameidata nd; 1694 struct nameidata nd;
1631 int error; 1695 int error;
1632 1696
1697 if (remount)
1698 return vfs_quota_on_remount(sb, type);
1699
1633 error = path_lookup(path, LOOKUP_FOLLOW, &nd); 1700 error = path_lookup(path, LOOKUP_FOLLOW, &nd);
1634 if (error < 0) 1701 if (error < 0)
1635 return error; 1702 return error;
@@ -1709,10 +1776,19 @@ int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *d
1709} 1776}
1710 1777
1711/* Generic routine for setting common part of quota structure */ 1778/* Generic routine for setting common part of quota structure */
1712static void do_set_dqblk(struct dquot *dquot, struct if_dqblk *di) 1779static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
1713{ 1780{
1714 struct mem_dqblk *dm = &dquot->dq_dqb; 1781 struct mem_dqblk *dm = &dquot->dq_dqb;
1715 int check_blim = 0, check_ilim = 0; 1782 int check_blim = 0, check_ilim = 0;
1783 struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type];
1784
1785 if ((di->dqb_valid & QIF_BLIMITS &&
1786 (di->dqb_bhardlimit > dqi->dqi_maxblimit ||
1787 di->dqb_bsoftlimit > dqi->dqi_maxblimit)) ||
1788 (di->dqb_valid & QIF_ILIMITS &&
1789 (di->dqb_ihardlimit > dqi->dqi_maxilimit ||
1790 di->dqb_isoftlimit > dqi->dqi_maxilimit)))
1791 return -ERANGE;
1716 1792
1717 spin_lock(&dq_data_lock); 1793 spin_lock(&dq_data_lock);
1718 if (di->dqb_valid & QIF_SPACE) { 1794 if (di->dqb_valid & QIF_SPACE) {
@@ -1744,7 +1820,7 @@ static void do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
1744 clear_bit(DQ_BLKS_B, &dquot->dq_flags); 1820 clear_bit(DQ_BLKS_B, &dquot->dq_flags);
1745 } 1821 }
1746 else if (!(di->dqb_valid & QIF_BTIME)) /* Set grace only if user hasn't provided his own... */ 1822 else if (!(di->dqb_valid & QIF_BTIME)) /* Set grace only if user hasn't provided his own... */
1747 dm->dqb_btime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_bgrace; 1823 dm->dqb_btime = get_seconds() + dqi->dqi_bgrace;
1748 } 1824 }
1749 if (check_ilim) { 1825 if (check_ilim) {
1750 if (!dm->dqb_isoftlimit || dm->dqb_curinodes < dm->dqb_isoftlimit) { 1826 if (!dm->dqb_isoftlimit || dm->dqb_curinodes < dm->dqb_isoftlimit) {
@@ -1752,7 +1828,7 @@ static void do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
1752 clear_bit(DQ_INODES_B, &dquot->dq_flags); 1828 clear_bit(DQ_INODES_B, &dquot->dq_flags);
1753 } 1829 }
1754 else if (!(di->dqb_valid & QIF_ITIME)) /* Set grace only if user hasn't provided his own... */ 1830 else if (!(di->dqb_valid & QIF_ITIME)) /* Set grace only if user hasn't provided his own... */
1755 dm->dqb_itime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace; 1831 dm->dqb_itime = get_seconds() + dqi->dqi_igrace;
1756 } 1832 }
1757 if (dm->dqb_bhardlimit || dm->dqb_bsoftlimit || dm->dqb_ihardlimit || dm->dqb_isoftlimit) 1833 if (dm->dqb_bhardlimit || dm->dqb_bsoftlimit || dm->dqb_ihardlimit || dm->dqb_isoftlimit)
1758 clear_bit(DQ_FAKE_B, &dquot->dq_flags); 1834 clear_bit(DQ_FAKE_B, &dquot->dq_flags);
@@ -1760,21 +1836,24 @@ static void do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
1760 set_bit(DQ_FAKE_B, &dquot->dq_flags); 1836 set_bit(DQ_FAKE_B, &dquot->dq_flags);
1761 spin_unlock(&dq_data_lock); 1837 spin_unlock(&dq_data_lock);
1762 mark_dquot_dirty(dquot); 1838 mark_dquot_dirty(dquot);
1839
1840 return 0;
1763} 1841}
1764 1842
1765int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di) 1843int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di)
1766{ 1844{
1767 struct dquot *dquot; 1845 struct dquot *dquot;
1846 int rc;
1768 1847
1769 mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); 1848 mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
1770 if (!(dquot = dqget(sb, id, type))) { 1849 if (!(dquot = dqget(sb, id, type))) {
1771 mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); 1850 mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
1772 return -ESRCH; 1851 return -ESRCH;
1773 } 1852 }
1774 do_set_dqblk(dquot, di); 1853 rc = do_set_dqblk(dquot, di);
1775 dqput(dquot); 1854 dqput(dquot);
1776 mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); 1855 mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
1777 return 0; 1856 return rc;
1778} 1857}
1779 1858
1780/* Generic routine for getting common part of quota file information */ 1859/* Generic routine for getting common part of quota file information */
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 59375efcf39d..3e5637fc3779 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -14,18 +14,26 @@ int sysctl_drop_caches;
14 14
15static void drop_pagecache_sb(struct super_block *sb) 15static void drop_pagecache_sb(struct super_block *sb)
16{ 16{
17 struct inode *inode; 17 struct inode *inode, *toput_inode = NULL;
18 18
19 spin_lock(&inode_lock); 19 spin_lock(&inode_lock);
20 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 20 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
21 if (inode->i_state & (I_FREEING|I_WILL_FREE)) 21 if (inode->i_state & (I_FREEING|I_WILL_FREE))
22 continue; 22 continue;
23 if (inode->i_mapping->nrpages == 0)
24 continue;
25 __iget(inode);
26 spin_unlock(&inode_lock);
23 __invalidate_mapping_pages(inode->i_mapping, 0, -1, true); 27 __invalidate_mapping_pages(inode->i_mapping, 0, -1, true);
28 iput(toput_inode);
29 toput_inode = inode;
30 spin_lock(&inode_lock);
24 } 31 }
25 spin_unlock(&inode_lock); 32 spin_unlock(&inode_lock);
33 iput(toput_inode);
26} 34}
27 35
28void drop_pagecache(void) 36static void drop_pagecache(void)
29{ 37{
30 struct super_block *sb; 38 struct super_block *sb;
31 39
@@ -45,7 +53,7 @@ restart:
45 spin_unlock(&sb_lock); 53 spin_unlock(&sb_lock);
46} 54}
47 55
48void drop_slab(void) 56static void drop_slab(void)
49{ 57{
50 int nr_objects; 58 int nr_objects;
51 59
diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile
index 768857015516..1e34a7fd4884 100644
--- a/fs/ecryptfs/Makefile
+++ b/fs/ecryptfs/Makefile
@@ -4,4 +4,4 @@
4 4
5obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o 5obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o
6 6
7ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o debug.o 7ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o miscdev.o debug.o
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index a066e109ad9c..cd62d75b2cc0 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -119,21 +119,21 @@ static int ecryptfs_calculate_md5(char *dst,
119 if (rc) { 119 if (rc) {
120 printk(KERN_ERR 120 printk(KERN_ERR
121 "%s: Error initializing crypto hash; rc = [%d]\n", 121 "%s: Error initializing crypto hash; rc = [%d]\n",
122 __FUNCTION__, rc); 122 __func__, rc);
123 goto out; 123 goto out;
124 } 124 }
125 rc = crypto_hash_update(&desc, &sg, len); 125 rc = crypto_hash_update(&desc, &sg, len);
126 if (rc) { 126 if (rc) {
127 printk(KERN_ERR 127 printk(KERN_ERR
128 "%s: Error updating crypto hash; rc = [%d]\n", 128 "%s: Error updating crypto hash; rc = [%d]\n",
129 __FUNCTION__, rc); 129 __func__, rc);
130 goto out; 130 goto out;
131 } 131 }
132 rc = crypto_hash_final(&desc, dst); 132 rc = crypto_hash_final(&desc, dst);
133 if (rc) { 133 if (rc) {
134 printk(KERN_ERR 134 printk(KERN_ERR
135 "%s: Error finalizing crypto hash; rc = [%d]\n", 135 "%s: Error finalizing crypto hash; rc = [%d]\n",
136 __FUNCTION__, rc); 136 __func__, rc);
137 goto out; 137 goto out;
138 } 138 }
139out: 139out:
@@ -437,7 +437,7 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page,
437 if (rc < 0) { 437 if (rc < 0) {
438 printk(KERN_ERR "%s: Error attempting to encrypt page with " 438 printk(KERN_ERR "%s: Error attempting to encrypt page with "
439 "page->index = [%ld], extent_offset = [%ld]; " 439 "page->index = [%ld], extent_offset = [%ld]; "
440 "rc = [%d]\n", __FUNCTION__, page->index, extent_offset, 440 "rc = [%d]\n", __func__, page->index, extent_offset,
441 rc); 441 rc);
442 goto out; 442 goto out;
443 } 443 }
@@ -487,7 +487,7 @@ int ecryptfs_encrypt_page(struct page *page)
487 0, PAGE_CACHE_SIZE); 487 0, PAGE_CACHE_SIZE);
488 if (rc) 488 if (rc)
489 printk(KERN_ERR "%s: Error attempting to copy " 489 printk(KERN_ERR "%s: Error attempting to copy "
490 "page at index [%ld]\n", __FUNCTION__, 490 "page at index [%ld]\n", __func__,
491 page->index); 491 page->index);
492 goto out; 492 goto out;
493 } 493 }
@@ -508,7 +508,7 @@ int ecryptfs_encrypt_page(struct page *page)
508 extent_offset); 508 extent_offset);
509 if (rc) { 509 if (rc) {
510 printk(KERN_ERR "%s: Error encrypting extent; " 510 printk(KERN_ERR "%s: Error encrypting extent; "
511 "rc = [%d]\n", __FUNCTION__, rc); 511 "rc = [%d]\n", __func__, rc);
512 goto out; 512 goto out;
513 } 513 }
514 ecryptfs_lower_offset_for_extent( 514 ecryptfs_lower_offset_for_extent(
@@ -569,7 +569,7 @@ static int ecryptfs_decrypt_extent(struct page *page,
569 if (rc < 0) { 569 if (rc < 0) {
570 printk(KERN_ERR "%s: Error attempting to decrypt to page with " 570 printk(KERN_ERR "%s: Error attempting to decrypt to page with "
571 "page->index = [%ld], extent_offset = [%ld]; " 571 "page->index = [%ld], extent_offset = [%ld]; "
572 "rc = [%d]\n", __FUNCTION__, page->index, extent_offset, 572 "rc = [%d]\n", __func__, page->index, extent_offset,
573 rc); 573 rc);
574 goto out; 574 goto out;
575 } 575 }
@@ -622,7 +622,7 @@ int ecryptfs_decrypt_page(struct page *page)
622 ecryptfs_inode); 622 ecryptfs_inode);
623 if (rc) 623 if (rc)
624 printk(KERN_ERR "%s: Error attempting to copy " 624 printk(KERN_ERR "%s: Error attempting to copy "
625 "page at index [%ld]\n", __FUNCTION__, 625 "page at index [%ld]\n", __func__,
626 page->index); 626 page->index);
627 goto out; 627 goto out;
628 } 628 }
@@ -656,7 +656,7 @@ int ecryptfs_decrypt_page(struct page *page)
656 extent_offset); 656 extent_offset);
657 if (rc) { 657 if (rc) {
658 printk(KERN_ERR "%s: Error encrypting extent; " 658 printk(KERN_ERR "%s: Error encrypting extent; "
659 "rc = [%d]\n", __FUNCTION__, rc); 659 "rc = [%d]\n", __func__, rc);
660 goto out; 660 goto out;
661 } 661 }
662 } 662 }
@@ -1215,7 +1215,7 @@ int ecryptfs_read_and_validate_header_region(char *data,
1215 ecryptfs_inode); 1215 ecryptfs_inode);
1216 if (rc) { 1216 if (rc) {
1217 printk(KERN_ERR "%s: Error reading header region; rc = [%d]\n", 1217 printk(KERN_ERR "%s: Error reading header region; rc = [%d]\n",
1218 __FUNCTION__, rc); 1218 __func__, rc);
1219 goto out; 1219 goto out;
1220 } 1220 }
1221 if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES)) { 1221 if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES)) {
@@ -1246,7 +1246,6 @@ ecryptfs_write_header_metadata(char *virt,
1246 (*written) = 6; 1246 (*written) = 6;
1247} 1247}
1248 1248
1249struct kmem_cache *ecryptfs_header_cache_0;
1250struct kmem_cache *ecryptfs_header_cache_1; 1249struct kmem_cache *ecryptfs_header_cache_1;
1251struct kmem_cache *ecryptfs_header_cache_2; 1250struct kmem_cache *ecryptfs_header_cache_2;
1252 1251
@@ -1320,7 +1319,7 @@ ecryptfs_write_metadata_to_contents(struct ecryptfs_crypt_stat *crypt_stat,
1320 0, crypt_stat->num_header_bytes_at_front); 1319 0, crypt_stat->num_header_bytes_at_front);
1321 if (rc) 1320 if (rc)
1322 printk(KERN_ERR "%s: Error attempting to write header " 1321 printk(KERN_ERR "%s: Error attempting to write header "
1323 "information to lower file; rc = [%d]\n", __FUNCTION__, 1322 "information to lower file; rc = [%d]\n", __func__,
1324 rc); 1323 rc);
1325 return rc; 1324 return rc;
1326} 1325}
@@ -1365,14 +1364,14 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
1365 } 1364 }
1366 } else { 1365 } else {
1367 printk(KERN_WARNING "%s: Encrypted flag not set\n", 1366 printk(KERN_WARNING "%s: Encrypted flag not set\n",
1368 __FUNCTION__); 1367 __func__);
1369 rc = -EINVAL; 1368 rc = -EINVAL;
1370 goto out; 1369 goto out;
1371 } 1370 }
1372 /* Released in this function */ 1371 /* Released in this function */
1373 virt = kzalloc(crypt_stat->num_header_bytes_at_front, GFP_KERNEL); 1372 virt = kzalloc(crypt_stat->num_header_bytes_at_front, GFP_KERNEL);
1374 if (!virt) { 1373 if (!virt) {
1375 printk(KERN_ERR "%s: Out of memory\n", __FUNCTION__); 1374 printk(KERN_ERR "%s: Out of memory\n", __func__);
1376 rc = -ENOMEM; 1375 rc = -ENOMEM;
1377 goto out; 1376 goto out;
1378 } 1377 }
@@ -1380,7 +1379,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
1380 ecryptfs_dentry); 1379 ecryptfs_dentry);
1381 if (unlikely(rc)) { 1380 if (unlikely(rc)) {
1382 printk(KERN_ERR "%s: Error whilst writing headers; rc = [%d]\n", 1381 printk(KERN_ERR "%s: Error whilst writing headers; rc = [%d]\n",
1383 __FUNCTION__, rc); 1382 __func__, rc);
1384 goto out_free; 1383 goto out_free;
1385 } 1384 }
1386 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) 1385 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
@@ -1391,7 +1390,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
1391 ecryptfs_dentry, virt); 1390 ecryptfs_dentry, virt);
1392 if (rc) { 1391 if (rc) {
1393 printk(KERN_ERR "%s: Error writing metadata out to lower file; " 1392 printk(KERN_ERR "%s: Error writing metadata out to lower file; "
1394 "rc = [%d]\n", __FUNCTION__, rc); 1393 "rc = [%d]\n", __func__, rc);
1395 goto out_free; 1394 goto out_free;
1396 } 1395 }
1397out_free: 1396out_free:
@@ -1585,7 +1584,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
1585 if (!page_virt) { 1584 if (!page_virt) {
1586 rc = -ENOMEM; 1585 rc = -ENOMEM;
1587 printk(KERN_ERR "%s: Unable to allocate page_virt\n", 1586 printk(KERN_ERR "%s: Unable to allocate page_virt\n",
1588 __FUNCTION__); 1587 __func__);
1589 goto out; 1588 goto out;
1590 } 1589 }
1591 rc = ecryptfs_read_lower(page_virt, 0, crypt_stat->extent_size, 1590 rc = ecryptfs_read_lower(page_virt, 0, crypt_stat->extent_size,
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 5007f788da01..951ee33a022d 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -4,7 +4,7 @@
4 * 4 *
5 * Copyright (C) 1997-2003 Erez Zadok 5 * Copyright (C) 1997-2003 Erez Zadok
6 * Copyright (C) 2001-2003 Stony Brook University 6 * Copyright (C) 2001-2003 Stony Brook University
7 * Copyright (C) 2004-2007 International Business Machines Corp. 7 * Copyright (C) 2004-2008 International Business Machines Corp.
8 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> 8 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
9 * Trevor S. Highland <trevor.highland@gmail.com> 9 * Trevor S. Highland <trevor.highland@gmail.com>
10 * Tyler Hicks <tyhicks@ou.edu> 10 * Tyler Hicks <tyhicks@ou.edu>
@@ -34,6 +34,7 @@
34#include <linux/namei.h> 34#include <linux/namei.h>
35#include <linux/scatterlist.h> 35#include <linux/scatterlist.h>
36#include <linux/hash.h> 36#include <linux/hash.h>
37#include <linux/nsproxy.h>
37 38
38/* Version verification for shared data structures w/ userspace */ 39/* Version verification for shared data structures w/ userspace */
39#define ECRYPTFS_VERSION_MAJOR 0x00 40#define ECRYPTFS_VERSION_MAJOR 0x00
@@ -49,11 +50,13 @@
49#define ECRYPTFS_VERSIONING_POLICY 0x00000008 50#define ECRYPTFS_VERSIONING_POLICY 0x00000008
50#define ECRYPTFS_VERSIONING_XATTR 0x00000010 51#define ECRYPTFS_VERSIONING_XATTR 0x00000010
51#define ECRYPTFS_VERSIONING_MULTKEY 0x00000020 52#define ECRYPTFS_VERSIONING_MULTKEY 0x00000020
53#define ECRYPTFS_VERSIONING_DEVMISC 0x00000040
52#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \ 54#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \
53 | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \ 55 | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \
54 | ECRYPTFS_VERSIONING_PUBKEY \ 56 | ECRYPTFS_VERSIONING_PUBKEY \
55 | ECRYPTFS_VERSIONING_XATTR \ 57 | ECRYPTFS_VERSIONING_XATTR \
56 | ECRYPTFS_VERSIONING_MULTKEY) 58 | ECRYPTFS_VERSIONING_MULTKEY \
59 | ECRYPTFS_VERSIONING_DEVMISC)
57#define ECRYPTFS_MAX_PASSWORD_LENGTH 64 60#define ECRYPTFS_MAX_PASSWORD_LENGTH 64
58#define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH 61#define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH
59#define ECRYPTFS_SALT_SIZE 8 62#define ECRYPTFS_SALT_SIZE 8
@@ -73,17 +76,14 @@
73#define ECRYPTFS_DEFAULT_MSG_CTX_ELEMS 32 76#define ECRYPTFS_DEFAULT_MSG_CTX_ELEMS 32
74#define ECRYPTFS_DEFAULT_SEND_TIMEOUT HZ 77#define ECRYPTFS_DEFAULT_SEND_TIMEOUT HZ
75#define ECRYPTFS_MAX_MSG_CTX_TTL (HZ*3) 78#define ECRYPTFS_MAX_MSG_CTX_TTL (HZ*3)
76#define ECRYPTFS_NLMSG_HELO 100
77#define ECRYPTFS_NLMSG_QUIT 101
78#define ECRYPTFS_NLMSG_REQUEST 102
79#define ECRYPTFS_NLMSG_RESPONSE 103
80#define ECRYPTFS_MAX_PKI_NAME_BYTES 16 79#define ECRYPTFS_MAX_PKI_NAME_BYTES 16
81#define ECRYPTFS_DEFAULT_NUM_USERS 4 80#define ECRYPTFS_DEFAULT_NUM_USERS 4
82#define ECRYPTFS_MAX_NUM_USERS 32768 81#define ECRYPTFS_MAX_NUM_USERS 32768
83#define ECRYPTFS_TRANSPORT_NETLINK 0 82#define ECRYPTFS_TRANSPORT_NETLINK 0
84#define ECRYPTFS_TRANSPORT_CONNECTOR 1 83#define ECRYPTFS_TRANSPORT_CONNECTOR 1
85#define ECRYPTFS_TRANSPORT_RELAYFS 2 84#define ECRYPTFS_TRANSPORT_RELAYFS 2
86#define ECRYPTFS_DEFAULT_TRANSPORT ECRYPTFS_TRANSPORT_NETLINK 85#define ECRYPTFS_TRANSPORT_MISCDEV 3
86#define ECRYPTFS_DEFAULT_TRANSPORT ECRYPTFS_TRANSPORT_MISCDEV
87#define ECRYPTFS_XATTR_NAME "user.ecryptfs" 87#define ECRYPTFS_XATTR_NAME "user.ecryptfs"
88 88
89#define RFC2440_CIPHER_DES3_EDE 0x02 89#define RFC2440_CIPHER_DES3_EDE 0x02
@@ -366,32 +366,63 @@ struct ecryptfs_auth_tok_list_item {
366}; 366};
367 367
368struct ecryptfs_message { 368struct ecryptfs_message {
369 /* Can never be greater than ecryptfs_message_buf_len */
370 /* Used to find the parent msg_ctx */
371 /* Inherits from msg_ctx->index */
369 u32 index; 372 u32 index;
370 u32 data_len; 373 u32 data_len;
371 u8 data[]; 374 u8 data[];
372}; 375};
373 376
374struct ecryptfs_msg_ctx { 377struct ecryptfs_msg_ctx {
375#define ECRYPTFS_MSG_CTX_STATE_FREE 0x0001 378#define ECRYPTFS_MSG_CTX_STATE_FREE 0x01
376#define ECRYPTFS_MSG_CTX_STATE_PENDING 0x0002 379#define ECRYPTFS_MSG_CTX_STATE_PENDING 0x02
377#define ECRYPTFS_MSG_CTX_STATE_DONE 0x0003 380#define ECRYPTFS_MSG_CTX_STATE_DONE 0x03
378 u32 state; 381#define ECRYPTFS_MSG_CTX_STATE_NO_REPLY 0x04
379 unsigned int index; 382 u8 state;
380 unsigned int counter; 383#define ECRYPTFS_MSG_HELO 100
384#define ECRYPTFS_MSG_QUIT 101
385#define ECRYPTFS_MSG_REQUEST 102
386#define ECRYPTFS_MSG_RESPONSE 103
387 u8 type;
388 u32 index;
389 /* Counter converts to a sequence number. Each message sent
390 * out for which we expect a response has an associated
391 * sequence number. The response must have the same sequence
392 * number as the counter for the msg_stc for the message to be
393 * valid. */
394 u32 counter;
395 size_t msg_size;
381 struct ecryptfs_message *msg; 396 struct ecryptfs_message *msg;
382 struct task_struct *task; 397 struct task_struct *task;
383 struct list_head node; 398 struct list_head node;
399 struct list_head daemon_out_list;
384 struct mutex mux; 400 struct mutex mux;
385}; 401};
386 402
387extern unsigned int ecryptfs_transport; 403extern unsigned int ecryptfs_transport;
388 404
389struct ecryptfs_daemon_id { 405struct ecryptfs_daemon;
390 pid_t pid; 406
391 uid_t uid; 407struct ecryptfs_daemon {
392 struct hlist_node id_chain; 408#define ECRYPTFS_DAEMON_IN_READ 0x00000001
409#define ECRYPTFS_DAEMON_IN_POLL 0x00000002
410#define ECRYPTFS_DAEMON_ZOMBIE 0x00000004
411#define ECRYPTFS_DAEMON_MISCDEV_OPEN 0x00000008
412 u32 flags;
413 u32 num_queued_msg_ctx;
414 struct pid *pid;
415 uid_t euid;
416 struct user_namespace *user_ns;
417 struct task_struct *task;
418 struct mutex mux;
419 struct list_head msg_ctx_out_queue;
420 wait_queue_head_t wait;
421 struct hlist_node euid_chain;
393}; 422};
394 423
424extern struct mutex ecryptfs_daemon_hash_mux;
425
395static inline struct ecryptfs_file_info * 426static inline struct ecryptfs_file_info *
396ecryptfs_file_to_private(struct file *file) 427ecryptfs_file_to_private(struct file *file)
397{ 428{
@@ -500,7 +531,7 @@ ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt)
500} 531}
501 532
502#define ecryptfs_printk(type, fmt, arg...) \ 533#define ecryptfs_printk(type, fmt, arg...) \
503 __ecryptfs_printk(type "%s: " fmt, __FUNCTION__, ## arg); 534 __ecryptfs_printk(type "%s: " fmt, __func__, ## arg);
504void __ecryptfs_printk(const char *fmt, ...); 535void __ecryptfs_printk(const char *fmt, ...);
505 536
506extern const struct file_operations ecryptfs_main_fops; 537extern const struct file_operations ecryptfs_main_fops;
@@ -581,10 +612,13 @@ int
581ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, 612ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
582 size_t size, int flags); 613 size_t size, int flags);
583int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode); 614int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode);
584int ecryptfs_process_helo(unsigned int transport, uid_t uid, pid_t pid); 615int ecryptfs_process_helo(unsigned int transport, uid_t euid,
585int ecryptfs_process_quit(uid_t uid, pid_t pid); 616 struct user_namespace *user_ns, struct pid *pid);
586int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t uid, 617int ecryptfs_process_quit(uid_t euid, struct user_namespace *user_ns,
587 pid_t pid, u32 seq); 618 struct pid *pid);
619int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid,
620 struct user_namespace *user_ns, struct pid *pid,
621 u32 seq);
588int ecryptfs_send_message(unsigned int transport, char *data, int data_len, 622int ecryptfs_send_message(unsigned int transport, char *data, int data_len,
589 struct ecryptfs_msg_ctx **msg_ctx); 623 struct ecryptfs_msg_ctx **msg_ctx);
590int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx, 624int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx,
@@ -593,14 +627,14 @@ int ecryptfs_init_messaging(unsigned int transport);
593void ecryptfs_release_messaging(unsigned int transport); 627void ecryptfs_release_messaging(unsigned int transport);
594 628
595int ecryptfs_send_netlink(char *data, int data_len, 629int ecryptfs_send_netlink(char *data, int data_len,
596 struct ecryptfs_msg_ctx *msg_ctx, u16 msg_type, 630 struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
597 u16 msg_flags, pid_t daemon_pid); 631 u16 msg_flags, struct pid *daemon_pid);
598int ecryptfs_init_netlink(void); 632int ecryptfs_init_netlink(void);
599void ecryptfs_release_netlink(void); 633void ecryptfs_release_netlink(void);
600 634
601int ecryptfs_send_connector(char *data, int data_len, 635int ecryptfs_send_connector(char *data, int data_len,
602 struct ecryptfs_msg_ctx *msg_ctx, u16 msg_type, 636 struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
603 u16 msg_flags, pid_t daemon_pid); 637 u16 msg_flags, struct pid *daemon_pid);
604int ecryptfs_init_connector(void); 638int ecryptfs_init_connector(void);
605void ecryptfs_release_connector(void); 639void ecryptfs_release_connector(void);
606void 640void
@@ -642,5 +676,21 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs,
642 size_t offset_in_page, size_t size, 676 size_t offset_in_page, size_t size,
643 struct inode *ecryptfs_inode); 677 struct inode *ecryptfs_inode);
644struct page *ecryptfs_get_locked_page(struct file *file, loff_t index); 678struct page *ecryptfs_get_locked_page(struct file *file, loff_t index);
679int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon);
680int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid,
681 struct user_namespace *user_ns);
682int ecryptfs_parse_packet_length(unsigned char *data, size_t *size,
683 size_t *length_size);
684int ecryptfs_write_packet_length(char *dest, size_t size,
685 size_t *packet_size_length);
686int ecryptfs_init_ecryptfs_miscdev(void);
687void ecryptfs_destroy_ecryptfs_miscdev(void);
688int ecryptfs_send_miscdev(char *data, size_t data_size,
689 struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
690 u16 msg_flags, struct ecryptfs_daemon *daemon);
691void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx);
692int
693ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid,
694 struct user_namespace *user_ns, struct pid *pid);
645 695
646#endif /* #ifndef ECRYPTFS_KERNEL_H */ 696#endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 2b8f5ed4adea..2258b8f654a6 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -195,7 +195,9 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
195 file, ecryptfs_inode_to_private(inode)->lower_file); 195 file, ecryptfs_inode_to_private(inode)->lower_file);
196 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { 196 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
197 ecryptfs_printk(KERN_DEBUG, "This is a directory\n"); 197 ecryptfs_printk(KERN_DEBUG, "This is a directory\n");
198 mutex_lock(&crypt_stat->cs_mutex);
198 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); 199 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
200 mutex_unlock(&crypt_stat->cs_mutex);
199 rc = 0; 201 rc = 0;
200 goto out; 202 goto out;
201 } 203 }
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index e23861152101..0a1397335a8e 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -111,7 +111,7 @@ ecryptfs_do_create(struct inode *directory_inode,
111 111
112 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); 112 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
113 lower_dir_dentry = lock_parent(lower_dentry); 113 lower_dir_dentry = lock_parent(lower_dentry);
114 if (unlikely(IS_ERR(lower_dir_dentry))) { 114 if (IS_ERR(lower_dir_dentry)) {
115 ecryptfs_printk(KERN_ERR, "Error locking directory of " 115 ecryptfs_printk(KERN_ERR, "Error locking directory of "
116 "dentry\n"); 116 "dentry\n");
117 rc = PTR_ERR(lower_dir_dentry); 117 rc = PTR_ERR(lower_dir_dentry);
@@ -121,7 +121,7 @@ ecryptfs_do_create(struct inode *directory_inode,
121 ecryptfs_dentry, mode, nd); 121 ecryptfs_dentry, mode, nd);
122 if (rc) { 122 if (rc) {
123 printk(KERN_ERR "%s: Failure to create dentry in lower fs; " 123 printk(KERN_ERR "%s: Failure to create dentry in lower fs; "
124 "rc = [%d]\n", __FUNCTION__, rc); 124 "rc = [%d]\n", __func__, rc);
125 goto out_lock; 125 goto out_lock;
126 } 126 }
127 rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry, 127 rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry,
@@ -908,7 +908,9 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
908 if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) 908 if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
909 ia->ia_valid &= ~ATTR_MODE; 909 ia->ia_valid &= ~ATTR_MODE;
910 910
911 mutex_lock(&lower_dentry->d_inode->i_mutex);
911 rc = notify_change(lower_dentry, ia); 912 rc = notify_change(lower_dentry, ia);
913 mutex_unlock(&lower_dentry->d_inode->i_mutex);
912out: 914out:
913 fsstack_copy_attr_all(inode, lower_inode, NULL); 915 fsstack_copy_attr_all(inode, lower_inode, NULL);
914 return rc; 916 return rc;
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 682b1b2482c2..e82b457180be 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -65,7 +65,7 @@ static int process_request_key_err(long err_code)
65} 65}
66 66
67/** 67/**
68 * parse_packet_length 68 * ecryptfs_parse_packet_length
69 * @data: Pointer to memory containing length at offset 69 * @data: Pointer to memory containing length at offset
70 * @size: This function writes the decoded size to this memory 70 * @size: This function writes the decoded size to this memory
71 * address; zero on error 71 * address; zero on error
@@ -73,8 +73,8 @@ static int process_request_key_err(long err_code)
73 * 73 *
74 * Returns zero on success; non-zero on error 74 * Returns zero on success; non-zero on error
75 */ 75 */
76static int parse_packet_length(unsigned char *data, size_t *size, 76int ecryptfs_parse_packet_length(unsigned char *data, size_t *size,
77 size_t *length_size) 77 size_t *length_size)
78{ 78{
79 int rc = 0; 79 int rc = 0;
80 80
@@ -105,7 +105,7 @@ out:
105} 105}
106 106
107/** 107/**
108 * write_packet_length 108 * ecryptfs_write_packet_length
109 * @dest: The byte array target into which to write the length. Must 109 * @dest: The byte array target into which to write the length. Must
110 * have at least 5 bytes allocated. 110 * have at least 5 bytes allocated.
111 * @size: The length to write. 111 * @size: The length to write.
@@ -114,8 +114,8 @@ out:
114 * 114 *
115 * Returns zero on success; non-zero on error. 115 * Returns zero on success; non-zero on error.
116 */ 116 */
117static int write_packet_length(char *dest, size_t size, 117int ecryptfs_write_packet_length(char *dest, size_t size,
118 size_t *packet_size_length) 118 size_t *packet_size_length)
119{ 119{
120 int rc = 0; 120 int rc = 0;
121 121
@@ -162,8 +162,8 @@ write_tag_64_packet(char *signature, struct ecryptfs_session_key *session_key,
162 goto out; 162 goto out;
163 } 163 }
164 message[i++] = ECRYPTFS_TAG_64_PACKET_TYPE; 164 message[i++] = ECRYPTFS_TAG_64_PACKET_TYPE;
165 rc = write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX, 165 rc = ecryptfs_write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX,
166 &packet_size_len); 166 &packet_size_len);
167 if (rc) { 167 if (rc) {
168 ecryptfs_printk(KERN_ERR, "Error generating tag 64 packet " 168 ecryptfs_printk(KERN_ERR, "Error generating tag 64 packet "
169 "header; cannot generate packet length\n"); 169 "header; cannot generate packet length\n");
@@ -172,8 +172,9 @@ write_tag_64_packet(char *signature, struct ecryptfs_session_key *session_key,
172 i += packet_size_len; 172 i += packet_size_len;
173 memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX); 173 memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX);
174 i += ECRYPTFS_SIG_SIZE_HEX; 174 i += ECRYPTFS_SIG_SIZE_HEX;
175 rc = write_packet_length(&message[i], session_key->encrypted_key_size, 175 rc = ecryptfs_write_packet_length(&message[i],
176 &packet_size_len); 176 session_key->encrypted_key_size,
177 &packet_size_len);
177 if (rc) { 178 if (rc) {
178 ecryptfs_printk(KERN_ERR, "Error generating tag 64 packet " 179 ecryptfs_printk(KERN_ERR, "Error generating tag 64 packet "
179 "header; cannot generate packet length\n"); 180 "header; cannot generate packet length\n");
@@ -225,7 +226,7 @@ parse_tag_65_packet(struct ecryptfs_session_key *session_key, u8 *cipher_code,
225 rc = -EIO; 226 rc = -EIO;
226 goto out; 227 goto out;
227 } 228 }
228 rc = parse_packet_length(&data[i], &m_size, &data_len); 229 rc = ecryptfs_parse_packet_length(&data[i], &m_size, &data_len);
229 if (rc) { 230 if (rc) {
230 ecryptfs_printk(KERN_WARNING, "Error parsing packet length; " 231 ecryptfs_printk(KERN_WARNING, "Error parsing packet length; "
231 "rc = [%d]\n", rc); 232 "rc = [%d]\n", rc);
@@ -304,8 +305,8 @@ write_tag_66_packet(char *signature, u8 cipher_code,
304 goto out; 305 goto out;
305 } 306 }
306 message[i++] = ECRYPTFS_TAG_66_PACKET_TYPE; 307 message[i++] = ECRYPTFS_TAG_66_PACKET_TYPE;
307 rc = write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX, 308 rc = ecryptfs_write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX,
308 &packet_size_len); 309 &packet_size_len);
309 if (rc) { 310 if (rc) {
310 ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet " 311 ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet "
311 "header; cannot generate packet length\n"); 312 "header; cannot generate packet length\n");
@@ -315,8 +316,8 @@ write_tag_66_packet(char *signature, u8 cipher_code,
315 memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX); 316 memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX);
316 i += ECRYPTFS_SIG_SIZE_HEX; 317 i += ECRYPTFS_SIG_SIZE_HEX;
317 /* The encrypted key includes 1 byte cipher code and 2 byte checksum */ 318 /* The encrypted key includes 1 byte cipher code and 2 byte checksum */
318 rc = write_packet_length(&message[i], crypt_stat->key_size + 3, 319 rc = ecryptfs_write_packet_length(&message[i], crypt_stat->key_size + 3,
319 &packet_size_len); 320 &packet_size_len);
320 if (rc) { 321 if (rc) {
321 ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet " 322 ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet "
322 "header; cannot generate packet length\n"); 323 "header; cannot generate packet length\n");
@@ -357,20 +358,25 @@ parse_tag_67_packet(struct ecryptfs_key_record *key_rec,
357 /* verify that everything through the encrypted FEK size is present */ 358 /* verify that everything through the encrypted FEK size is present */
358 if (message_len < 4) { 359 if (message_len < 4) {
359 rc = -EIO; 360 rc = -EIO;
361 printk(KERN_ERR "%s: message_len is [%Zd]; minimum acceptable "
362 "message length is [%d]\n", __func__, message_len, 4);
360 goto out; 363 goto out;
361 } 364 }
362 if (data[i++] != ECRYPTFS_TAG_67_PACKET_TYPE) { 365 if (data[i++] != ECRYPTFS_TAG_67_PACKET_TYPE) {
363 ecryptfs_printk(KERN_ERR, "Type should be ECRYPTFS_TAG_67\n");
364 rc = -EIO; 366 rc = -EIO;
367 printk(KERN_ERR "%s: Type should be ECRYPTFS_TAG_67\n",
368 __func__);
365 goto out; 369 goto out;
366 } 370 }
367 if (data[i++]) { 371 if (data[i++]) {
368 ecryptfs_printk(KERN_ERR, "Status indicator has non zero value"
369 " [%d]\n", data[i-1]);
370 rc = -EIO; 372 rc = -EIO;
373 printk(KERN_ERR "%s: Status indicator has non zero "
374 "value [%d]\n", __func__, data[i-1]);
375
371 goto out; 376 goto out;
372 } 377 }
373 rc = parse_packet_length(&data[i], &key_rec->enc_key_size, &data_len); 378 rc = ecryptfs_parse_packet_length(&data[i], &key_rec->enc_key_size,
379 &data_len);
374 if (rc) { 380 if (rc) {
375 ecryptfs_printk(KERN_WARNING, "Error parsing packet length; " 381 ecryptfs_printk(KERN_WARNING, "Error parsing packet length; "
376 "rc = [%d]\n", rc); 382 "rc = [%d]\n", rc);
@@ -378,17 +384,17 @@ parse_tag_67_packet(struct ecryptfs_key_record *key_rec,
378 } 384 }
379 i += data_len; 385 i += data_len;
380 if (message_len < (i + key_rec->enc_key_size)) { 386 if (message_len < (i + key_rec->enc_key_size)) {
381 ecryptfs_printk(KERN_ERR, "message_len [%d]; max len is [%d]\n",
382 message_len, (i + key_rec->enc_key_size));
383 rc = -EIO; 387 rc = -EIO;
388 printk(KERN_ERR "%s: message_len [%Zd]; max len is [%Zd]\n",
389 __func__, message_len, (i + key_rec->enc_key_size));
384 goto out; 390 goto out;
385 } 391 }
386 if (key_rec->enc_key_size > ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) { 392 if (key_rec->enc_key_size > ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) {
387 ecryptfs_printk(KERN_ERR, "Encrypted key_size [%d] larger than "
388 "the maximum key size [%d]\n",
389 key_rec->enc_key_size,
390 ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES);
391 rc = -EIO; 393 rc = -EIO;
394 printk(KERN_ERR "%s: Encrypted key_size [%Zd] larger than "
395 "the maximum key size [%d]\n", __func__,
396 key_rec->enc_key_size,
397 ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES);
392 goto out; 398 goto out;
393 } 399 }
394 memcpy(key_rec->enc_key, &data[i], key_rec->enc_key_size); 400 memcpy(key_rec->enc_key, &data[i], key_rec->enc_key_size);
@@ -445,7 +451,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
445 rc = write_tag_64_packet(auth_tok_sig, &(auth_tok->session_key), 451 rc = write_tag_64_packet(auth_tok_sig, &(auth_tok->session_key),
446 &netlink_message, &netlink_message_length); 452 &netlink_message, &netlink_message_length);
447 if (rc) { 453 if (rc) {
448 ecryptfs_printk(KERN_ERR, "Failed to write tag 64 packet"); 454 ecryptfs_printk(KERN_ERR, "Failed to write tag 64 packet\n");
449 goto out; 455 goto out;
450 } 456 }
451 rc = ecryptfs_send_message(ecryptfs_transport, netlink_message, 457 rc = ecryptfs_send_message(ecryptfs_transport, netlink_message,
@@ -570,8 +576,8 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat,
570 goto out; 576 goto out;
571 } 577 }
572 (*new_auth_tok) = &auth_tok_list_item->auth_tok; 578 (*new_auth_tok) = &auth_tok_list_item->auth_tok;
573 rc = parse_packet_length(&data[(*packet_size)], &body_size, 579 rc = ecryptfs_parse_packet_length(&data[(*packet_size)], &body_size,
574 &length_size); 580 &length_size);
575 if (rc) { 581 if (rc) {
576 printk(KERN_WARNING "Error parsing packet length; " 582 printk(KERN_WARNING "Error parsing packet length; "
577 "rc = [%d]\n", rc); 583 "rc = [%d]\n", rc);
@@ -704,8 +710,8 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
704 goto out; 710 goto out;
705 } 711 }
706 (*new_auth_tok) = &auth_tok_list_item->auth_tok; 712 (*new_auth_tok) = &auth_tok_list_item->auth_tok;
707 rc = parse_packet_length(&data[(*packet_size)], &body_size, 713 rc = ecryptfs_parse_packet_length(&data[(*packet_size)], &body_size,
708 &length_size); 714 &length_size);
709 if (rc) { 715 if (rc) {
710 printk(KERN_WARNING "Error parsing packet length; rc = [%d]\n", 716 printk(KERN_WARNING "Error parsing packet length; rc = [%d]\n",
711 rc); 717 rc);
@@ -852,8 +858,8 @@ parse_tag_11_packet(unsigned char *data, unsigned char *contents,
852 rc = -EINVAL; 858 rc = -EINVAL;
853 goto out; 859 goto out;
854 } 860 }
855 rc = parse_packet_length(&data[(*packet_size)], &body_size, 861 rc = ecryptfs_parse_packet_length(&data[(*packet_size)], &body_size,
856 &length_size); 862 &length_size);
857 if (rc) { 863 if (rc) {
858 printk(KERN_WARNING "Invalid tag 11 packet format\n"); 864 printk(KERN_WARNING "Invalid tag 11 packet format\n");
859 goto out; 865 goto out;
@@ -1405,8 +1411,8 @@ write_tag_1_packet(char *dest, size_t *remaining_bytes,
1405 auth_tok->token.private_key.key_size; 1411 auth_tok->token.private_key.key_size;
1406 rc = pki_encrypt_session_key(auth_tok, crypt_stat, key_rec); 1412 rc = pki_encrypt_session_key(auth_tok, crypt_stat, key_rec);
1407 if (rc) { 1413 if (rc) {
1408 ecryptfs_printk(KERN_ERR, "Failed to encrypt session key " 1414 printk(KERN_ERR "Failed to encrypt session key via a key "
1409 "via a pki"); 1415 "module; rc = [%d]\n", rc);
1410 goto out; 1416 goto out;
1411 } 1417 }
1412 if (ecryptfs_verbosity > 0) { 1418 if (ecryptfs_verbosity > 0) {
@@ -1430,8 +1436,9 @@ encrypted_session_key_set:
1430 goto out; 1436 goto out;
1431 } 1437 }
1432 dest[(*packet_size)++] = ECRYPTFS_TAG_1_PACKET_TYPE; 1438 dest[(*packet_size)++] = ECRYPTFS_TAG_1_PACKET_TYPE;
1433 rc = write_packet_length(&dest[(*packet_size)], (max_packet_size - 4), 1439 rc = ecryptfs_write_packet_length(&dest[(*packet_size)],
1434 &packet_size_length); 1440 (max_packet_size - 4),
1441 &packet_size_length);
1435 if (rc) { 1442 if (rc) {
1436 ecryptfs_printk(KERN_ERR, "Error generating tag 1 packet " 1443 ecryptfs_printk(KERN_ERR, "Error generating tag 1 packet "
1437 "header; cannot generate packet length\n"); 1444 "header; cannot generate packet length\n");
@@ -1489,8 +1496,9 @@ write_tag_11_packet(char *dest, size_t *remaining_bytes, char *contents,
1489 goto out; 1496 goto out;
1490 } 1497 }
1491 dest[(*packet_length)++] = ECRYPTFS_TAG_11_PACKET_TYPE; 1498 dest[(*packet_length)++] = ECRYPTFS_TAG_11_PACKET_TYPE;
1492 rc = write_packet_length(&dest[(*packet_length)], 1499 rc = ecryptfs_write_packet_length(&dest[(*packet_length)],
1493 (max_packet_size - 4), &packet_size_length); 1500 (max_packet_size - 4),
1501 &packet_size_length);
1494 if (rc) { 1502 if (rc) {
1495 printk(KERN_ERR "Error generating tag 11 packet header; cannot " 1503 printk(KERN_ERR "Error generating tag 11 packet header; cannot "
1496 "generate packet length. rc = [%d]\n", rc); 1504 "generate packet length. rc = [%d]\n", rc);
@@ -1682,8 +1690,9 @@ encrypted_session_key_set:
1682 dest[(*packet_size)++] = ECRYPTFS_TAG_3_PACKET_TYPE; 1690 dest[(*packet_size)++] = ECRYPTFS_TAG_3_PACKET_TYPE;
1683 /* Chop off the Tag 3 identifier(1) and Tag 3 packet size(3) 1691 /* Chop off the Tag 3 identifier(1) and Tag 3 packet size(3)
1684 * to get the number of octets in the actual Tag 3 packet */ 1692 * to get the number of octets in the actual Tag 3 packet */
1685 rc = write_packet_length(&dest[(*packet_size)], (max_packet_size - 4), 1693 rc = ecryptfs_write_packet_length(&dest[(*packet_size)],
1686 &packet_size_length); 1694 (max_packet_size - 4),
1695 &packet_size_length);
1687 if (rc) { 1696 if (rc) {
1688 printk(KERN_ERR "Error generating tag 3 packet header; cannot " 1697 printk(KERN_ERR "Error generating tag 3 packet header; cannot "
1689 "generate packet length. rc = [%d]\n", rc); 1698 "generate packet length. rc = [%d]\n", rc);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index d25ac9500a92..d603631601eb 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -219,7 +219,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
219 if (rc) { 219 if (rc) {
220 printk(KERN_ERR "%s: Error attempting to initialize the " 220 printk(KERN_ERR "%s: Error attempting to initialize the "
221 "persistent file for the dentry with name [%s]; " 221 "persistent file for the dentry with name [%s]; "
222 "rc = [%d]\n", __FUNCTION__, dentry->d_name.name, rc); 222 "rc = [%d]\n", __func__, dentry->d_name.name, rc);
223 goto out; 223 goto out;
224 } 224 }
225out: 225out:
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 9cc2aec27b0d..1b5c20058acb 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -1,7 +1,7 @@
1/** 1/**
2 * eCryptfs: Linux filesystem encryption layer 2 * eCryptfs: Linux filesystem encryption layer
3 * 3 *
4 * Copyright (C) 2004-2006 International Business Machines Corp. 4 * Copyright (C) 2004-2008 International Business Machines Corp.
5 * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com> 5 * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com>
6 * Tyler Hicks <tyhicks@ou.edu> 6 * Tyler Hicks <tyhicks@ou.edu>
7 * 7 *
@@ -20,19 +20,21 @@
20 * 02111-1307, USA. 20 * 02111-1307, USA.
21 */ 21 */
22#include <linux/sched.h> 22#include <linux/sched.h>
23#include <linux/user_namespace.h>
24#include <linux/nsproxy.h>
23#include "ecryptfs_kernel.h" 25#include "ecryptfs_kernel.h"
24 26
25static LIST_HEAD(ecryptfs_msg_ctx_free_list); 27static LIST_HEAD(ecryptfs_msg_ctx_free_list);
26static LIST_HEAD(ecryptfs_msg_ctx_alloc_list); 28static LIST_HEAD(ecryptfs_msg_ctx_alloc_list);
27static struct mutex ecryptfs_msg_ctx_lists_mux; 29static struct mutex ecryptfs_msg_ctx_lists_mux;
28 30
29static struct hlist_head *ecryptfs_daemon_id_hash; 31static struct hlist_head *ecryptfs_daemon_hash;
30static struct mutex ecryptfs_daemon_id_hash_mux; 32struct mutex ecryptfs_daemon_hash_mux;
31static int ecryptfs_hash_buckets; 33static int ecryptfs_hash_buckets;
32#define ecryptfs_uid_hash(uid) \ 34#define ecryptfs_uid_hash(uid) \
33 hash_long((unsigned long)uid, ecryptfs_hash_buckets) 35 hash_long((unsigned long)uid, ecryptfs_hash_buckets)
34 36
35static unsigned int ecryptfs_msg_counter; 37static u32 ecryptfs_msg_counter;
36static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr; 38static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr;
37 39
38/** 40/**
@@ -40,9 +42,10 @@ static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr;
40 * @msg_ctx: The context that was acquired from the free list 42 * @msg_ctx: The context that was acquired from the free list
41 * 43 *
42 * Acquires a context element from the free list and locks the mutex 44 * Acquires a context element from the free list and locks the mutex
43 * on the context. Returns zero on success; non-zero on error or upon 45 * on the context. Sets the msg_ctx task to current. Returns zero on
44 * failure to acquire a free context element. Be sure to lock the 46 * success; non-zero on error or upon failure to acquire a free
45 * list mutex before calling. 47 * context element. Must be called with ecryptfs_msg_ctx_lists_mux
48 * held.
46 */ 49 */
47static int ecryptfs_acquire_free_msg_ctx(struct ecryptfs_msg_ctx **msg_ctx) 50static int ecryptfs_acquire_free_msg_ctx(struct ecryptfs_msg_ctx **msg_ctx)
48{ 51{
@@ -50,11 +53,11 @@ static int ecryptfs_acquire_free_msg_ctx(struct ecryptfs_msg_ctx **msg_ctx)
50 int rc; 53 int rc;
51 54
52 if (list_empty(&ecryptfs_msg_ctx_free_list)) { 55 if (list_empty(&ecryptfs_msg_ctx_free_list)) {
53 ecryptfs_printk(KERN_WARNING, "The eCryptfs free " 56 printk(KERN_WARNING "%s: The eCryptfs free "
54 "context list is empty. It may be helpful to " 57 "context list is empty. It may be helpful to "
55 "specify the ecryptfs_message_buf_len " 58 "specify the ecryptfs_message_buf_len "
56 "parameter to be greater than the current " 59 "parameter to be greater than the current "
57 "value of [%d]\n", ecryptfs_message_buf_len); 60 "value of [%d]\n", __func__, ecryptfs_message_buf_len);
58 rc = -ENOMEM; 61 rc = -ENOMEM;
59 goto out; 62 goto out;
60 } 63 }
@@ -75,8 +78,7 @@ out:
75 * ecryptfs_msg_ctx_free_to_alloc 78 * ecryptfs_msg_ctx_free_to_alloc
76 * @msg_ctx: The context to move from the free list to the alloc list 79 * @msg_ctx: The context to move from the free list to the alloc list
77 * 80 *
78 * Be sure to lock the list mutex and the context mutex before 81 * Must be called with ecryptfs_msg_ctx_lists_mux held.
79 * calling.
80 */ 82 */
81static void ecryptfs_msg_ctx_free_to_alloc(struct ecryptfs_msg_ctx *msg_ctx) 83static void ecryptfs_msg_ctx_free_to_alloc(struct ecryptfs_msg_ctx *msg_ctx)
82{ 84{
@@ -89,36 +91,39 @@ static void ecryptfs_msg_ctx_free_to_alloc(struct ecryptfs_msg_ctx *msg_ctx)
89 * ecryptfs_msg_ctx_alloc_to_free 91 * ecryptfs_msg_ctx_alloc_to_free
90 * @msg_ctx: The context to move from the alloc list to the free list 92 * @msg_ctx: The context to move from the alloc list to the free list
91 * 93 *
92 * Be sure to lock the list mutex and the context mutex before 94 * Must be called with ecryptfs_msg_ctx_lists_mux held.
93 * calling.
94 */ 95 */
95static void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx) 96void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx)
96{ 97{
97 list_move(&(msg_ctx->node), &ecryptfs_msg_ctx_free_list); 98 list_move(&(msg_ctx->node), &ecryptfs_msg_ctx_free_list);
98 if (msg_ctx->msg) 99 if (msg_ctx->msg)
99 kfree(msg_ctx->msg); 100 kfree(msg_ctx->msg);
101 msg_ctx->msg = NULL;
100 msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_FREE; 102 msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_FREE;
101} 103}
102 104
103/** 105/**
104 * ecryptfs_find_daemon_id 106 * ecryptfs_find_daemon_by_euid
105 * @uid: The user id which maps to the desired daemon id 107 * @euid: The effective user id which maps to the desired daemon id
106 * @id: If return value is zero, points to the desired daemon id 108 * @user_ns: The namespace in which @euid applies
107 * pointer 109 * @daemon: If return value is zero, points to the desired daemon pointer
108 * 110 *
109 * Search the hash list for the given user id. Returns zero if the 111 * Must be called with ecryptfs_daemon_hash_mux held.
110 * user id exists in the list; non-zero otherwise. The daemon id hash 112 *
111 * mutex should be held before calling this function. 113 * Search the hash list for the given user id.
114 *
115 * Returns zero if the user id exists in the list; non-zero otherwise.
112 */ 116 */
113static int ecryptfs_find_daemon_id(uid_t uid, struct ecryptfs_daemon_id **id) 117int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid,
118 struct user_namespace *user_ns)
114{ 119{
115 struct hlist_node *elem; 120 struct hlist_node *elem;
116 int rc; 121 int rc;
117 122
118 hlist_for_each_entry(*id, elem, 123 hlist_for_each_entry(*daemon, elem,
119 &ecryptfs_daemon_id_hash[ecryptfs_uid_hash(uid)], 124 &ecryptfs_daemon_hash[ecryptfs_uid_hash(euid)],
120 id_chain) { 125 euid_chain) {
121 if ((*id)->uid == uid) { 126 if ((*daemon)->euid == euid && (*daemon)->user_ns == user_ns) {
122 rc = 0; 127 rc = 0;
123 goto out; 128 goto out;
124 } 129 }
@@ -128,181 +133,325 @@ out:
128 return rc; 133 return rc;
129} 134}
130 135
131static int ecryptfs_send_raw_message(unsigned int transport, u16 msg_type, 136static int
132 pid_t pid) 137ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len,
138 u8 msg_type, struct ecryptfs_msg_ctx **msg_ctx);
139
140/**
141 * ecryptfs_send_raw_message
142 * @transport: Transport type
143 * @msg_type: Message type
144 * @daemon: Daemon struct for recipient of message
145 *
146 * A raw message is one that does not include an ecryptfs_message
147 * struct. It simply has a type.
148 *
149 * Must be called with ecryptfs_daemon_hash_mux held.
150 *
151 * Returns zero on success; non-zero otherwise
152 */
153static int ecryptfs_send_raw_message(unsigned int transport, u8 msg_type,
154 struct ecryptfs_daemon *daemon)
133{ 155{
156 struct ecryptfs_msg_ctx *msg_ctx;
134 int rc; 157 int rc;
135 158
136 switch(transport) { 159 switch(transport) {
137 case ECRYPTFS_TRANSPORT_NETLINK: 160 case ECRYPTFS_TRANSPORT_NETLINK:
138 rc = ecryptfs_send_netlink(NULL, 0, NULL, msg_type, 0, pid); 161 rc = ecryptfs_send_netlink(NULL, 0, NULL, msg_type, 0,
162 daemon->pid);
163 break;
164 case ECRYPTFS_TRANSPORT_MISCDEV:
165 rc = ecryptfs_send_message_locked(transport, NULL, 0, msg_type,
166 &msg_ctx);
167 if (rc) {
168 printk(KERN_ERR "%s: Error whilst attempting to send "
169 "message via procfs; rc = [%d]\n", __func__, rc);
170 goto out;
171 }
172 /* Raw messages are logically context-free (e.g., no
173 * reply is expected), so we set the state of the
174 * ecryptfs_msg_ctx object to indicate that it should
175 * be freed as soon as the transport sends out the message. */
176 mutex_lock(&msg_ctx->mux);
177 msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_NO_REPLY;
178 mutex_unlock(&msg_ctx->mux);
139 break; 179 break;
140 case ECRYPTFS_TRANSPORT_CONNECTOR: 180 case ECRYPTFS_TRANSPORT_CONNECTOR:
141 case ECRYPTFS_TRANSPORT_RELAYFS: 181 case ECRYPTFS_TRANSPORT_RELAYFS:
142 default: 182 default:
143 rc = -ENOSYS; 183 rc = -ENOSYS;
144 } 184 }
185out:
186 return rc;
187}
188
189/**
190 * ecryptfs_spawn_daemon - Create and initialize a new daemon struct
191 * @daemon: Pointer to set to newly allocated daemon struct
192 * @euid: Effective user id for the daemon
193 * @user_ns: The namespace in which @euid applies
194 * @pid: Process id for the daemon
195 *
196 * Must be called ceremoniously while in possession of
197 * ecryptfs_sacred_daemon_hash_mux
198 *
199 * Returns zero on success; non-zero otherwise
200 */
201int
202ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid,
203 struct user_namespace *user_ns, struct pid *pid)
204{
205 int rc = 0;
206
207 (*daemon) = kzalloc(sizeof(**daemon), GFP_KERNEL);
208 if (!(*daemon)) {
209 rc = -ENOMEM;
210 printk(KERN_ERR "%s: Failed to allocate [%Zd] bytes of "
211 "GFP_KERNEL memory\n", __func__, sizeof(**daemon));
212 goto out;
213 }
214 (*daemon)->euid = euid;
215 (*daemon)->user_ns = get_user_ns(user_ns);
216 (*daemon)->pid = get_pid(pid);
217 (*daemon)->task = current;
218 mutex_init(&(*daemon)->mux);
219 INIT_LIST_HEAD(&(*daemon)->msg_ctx_out_queue);
220 init_waitqueue_head(&(*daemon)->wait);
221 (*daemon)->num_queued_msg_ctx = 0;
222 hlist_add_head(&(*daemon)->euid_chain,
223 &ecryptfs_daemon_hash[ecryptfs_uid_hash(euid)]);
224out:
145 return rc; 225 return rc;
146} 226}
147 227
148/** 228/**
149 * ecryptfs_process_helo 229 * ecryptfs_process_helo
150 * @transport: The underlying transport (netlink, etc.) 230 * @transport: The underlying transport (netlink, etc.)
151 * @uid: The user ID owner of the message 231 * @euid: The user ID owner of the message
232 * @user_ns: The namespace in which @euid applies
152 * @pid: The process ID for the userspace program that sent the 233 * @pid: The process ID for the userspace program that sent the
153 * message 234 * message
154 * 235 *
155 * Adds the uid and pid values to the daemon id hash. If a uid 236 * Adds the euid and pid values to the daemon euid hash. If an euid
156 * already has a daemon pid registered, the daemon will be 237 * already has a daemon pid registered, the daemon will be
157 * unregistered before the new daemon id is put into the hash list. 238 * unregistered before the new daemon is put into the hash list.
158 * Returns zero after adding a new daemon id to the hash list; 239 * Returns zero after adding a new daemon to the hash list;
159 * non-zero otherwise. 240 * non-zero otherwise.
160 */ 241 */
161int ecryptfs_process_helo(unsigned int transport, uid_t uid, pid_t pid) 242int ecryptfs_process_helo(unsigned int transport, uid_t euid,
243 struct user_namespace *user_ns, struct pid *pid)
162{ 244{
163 struct ecryptfs_daemon_id *new_id; 245 struct ecryptfs_daemon *new_daemon;
164 struct ecryptfs_daemon_id *old_id; 246 struct ecryptfs_daemon *old_daemon;
165 int rc; 247 int rc;
166 248
167 mutex_lock(&ecryptfs_daemon_id_hash_mux); 249 mutex_lock(&ecryptfs_daemon_hash_mux);
168 new_id = kmalloc(sizeof(*new_id), GFP_KERNEL); 250 rc = ecryptfs_find_daemon_by_euid(&old_daemon, euid, user_ns);
169 if (!new_id) { 251 if (rc != 0) {
170 rc = -ENOMEM;
171 ecryptfs_printk(KERN_ERR, "Failed to allocate memory; unable "
172 "to register daemon [%d] for user [%d]\n",
173 pid, uid);
174 goto unlock;
175 }
176 if (!ecryptfs_find_daemon_id(uid, &old_id)) {
177 printk(KERN_WARNING "Received request from user [%d] " 252 printk(KERN_WARNING "Received request from user [%d] "
178 "to register daemon [%d]; unregistering daemon " 253 "to register daemon [0x%p]; unregistering daemon "
179 "[%d]\n", uid, pid, old_id->pid); 254 "[0x%p]\n", euid, pid, old_daemon->pid);
180 hlist_del(&old_id->id_chain); 255 rc = ecryptfs_send_raw_message(transport, ECRYPTFS_MSG_QUIT,
181 rc = ecryptfs_send_raw_message(transport, ECRYPTFS_NLMSG_QUIT, 256 old_daemon);
182 old_id->pid);
183 if (rc) 257 if (rc)
184 printk(KERN_WARNING "Failed to send QUIT " 258 printk(KERN_WARNING "Failed to send QUIT "
185 "message to daemon [%d]; rc = [%d]\n", 259 "message to daemon [0x%p]; rc = [%d]\n",
186 old_id->pid, rc); 260 old_daemon->pid, rc);
187 kfree(old_id); 261 hlist_del(&old_daemon->euid_chain);
262 kfree(old_daemon);
188 } 263 }
189 new_id->uid = uid; 264 rc = ecryptfs_spawn_daemon(&new_daemon, euid, user_ns, pid);
190 new_id->pid = pid; 265 if (rc)
191 hlist_add_head(&new_id->id_chain, 266 printk(KERN_ERR "%s: The gods are displeased with this attempt "
192 &ecryptfs_daemon_id_hash[ecryptfs_uid_hash(uid)]); 267 "to create a new daemon object for euid [%d]; pid "
193 rc = 0; 268 "[0x%p]; rc = [%d]\n", __func__, euid, pid, rc);
194unlock: 269 mutex_unlock(&ecryptfs_daemon_hash_mux);
195 mutex_unlock(&ecryptfs_daemon_id_hash_mux); 270 return rc;
271}
272
273/**
274 * ecryptfs_exorcise_daemon - Destroy the daemon struct
275 *
276 * Must be called ceremoniously while in possession of
277 * ecryptfs_daemon_hash_mux and the daemon's own mux.
278 */
279int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon)
280{
281 struct ecryptfs_msg_ctx *msg_ctx, *msg_ctx_tmp;
282 int rc = 0;
283
284 mutex_lock(&daemon->mux);
285 if ((daemon->flags & ECRYPTFS_DAEMON_IN_READ)
286 || (daemon->flags & ECRYPTFS_DAEMON_IN_POLL)) {
287 rc = -EBUSY;
288 printk(KERN_WARNING "%s: Attempt to destroy daemon with pid "
289 "[0x%p], but it is in the midst of a read or a poll\n",
290 __func__, daemon->pid);
291 mutex_unlock(&daemon->mux);
292 goto out;
293 }
294 list_for_each_entry_safe(msg_ctx, msg_ctx_tmp,
295 &daemon->msg_ctx_out_queue, daemon_out_list) {
296 list_del(&msg_ctx->daemon_out_list);
297 daemon->num_queued_msg_ctx--;
298 printk(KERN_WARNING "%s: Warning: dropping message that is in "
299 "the out queue of a dying daemon\n", __func__);
300 ecryptfs_msg_ctx_alloc_to_free(msg_ctx);
301 }
302 hlist_del(&daemon->euid_chain);
303 if (daemon->task)
304 wake_up_process(daemon->task);
305 if (daemon->pid)
306 put_pid(daemon->pid);
307 if (daemon->user_ns)
308 put_user_ns(daemon->user_ns);
309 mutex_unlock(&daemon->mux);
310 memset(daemon, 0, sizeof(*daemon));
311 kfree(daemon);
312out:
196 return rc; 313 return rc;
197} 314}
198 315
199/** 316/**
200 * ecryptfs_process_quit 317 * ecryptfs_process_quit
201 * @uid: The user ID owner of the message 318 * @euid: The user ID owner of the message
319 * @user_ns: The namespace in which @euid applies
202 * @pid: The process ID for the userspace program that sent the 320 * @pid: The process ID for the userspace program that sent the
203 * message 321 * message
204 * 322 *
205 * Deletes the corresponding daemon id for the given uid and pid, if 323 * Deletes the corresponding daemon for the given euid and pid, if
206 * it is the registered that is requesting the deletion. Returns zero 324 * it is the registered that is requesting the deletion. Returns zero
207 * after deleting the desired daemon id; non-zero otherwise. 325 * after deleting the desired daemon; non-zero otherwise.
208 */ 326 */
209int ecryptfs_process_quit(uid_t uid, pid_t pid) 327int ecryptfs_process_quit(uid_t euid, struct user_namespace *user_ns,
328 struct pid *pid)
210{ 329{
211 struct ecryptfs_daemon_id *id; 330 struct ecryptfs_daemon *daemon;
212 int rc; 331 int rc;
213 332
214 mutex_lock(&ecryptfs_daemon_id_hash_mux); 333 mutex_lock(&ecryptfs_daemon_hash_mux);
215 if (ecryptfs_find_daemon_id(uid, &id)) { 334 rc = ecryptfs_find_daemon_by_euid(&daemon, euid, user_ns);
335 if (rc || !daemon) {
216 rc = -EINVAL; 336 rc = -EINVAL;
217 ecryptfs_printk(KERN_ERR, "Received request from user [%d] to " 337 printk(KERN_ERR "Received request from user [%d] to "
218 "unregister unrecognized daemon [%d]\n", uid, 338 "unregister unrecognized daemon [0x%p]\n", euid, pid);
219 pid); 339 goto out_unlock;
220 goto unlock;
221 } 340 }
222 if (id->pid != pid) { 341 rc = ecryptfs_exorcise_daemon(daemon);
223 rc = -EINVAL; 342out_unlock:
224 ecryptfs_printk(KERN_WARNING, "Received request from user [%d] " 343 mutex_unlock(&ecryptfs_daemon_hash_mux);
225 "with pid [%d] to unregister daemon [%d]\n",
226 uid, pid, id->pid);
227 goto unlock;
228 }
229 hlist_del(&id->id_chain);
230 kfree(id);
231 rc = 0;
232unlock:
233 mutex_unlock(&ecryptfs_daemon_id_hash_mux);
234 return rc; 344 return rc;
235} 345}
236 346
237/** 347/**
238 * ecryptfs_process_reponse 348 * ecryptfs_process_reponse
239 * @msg: The ecryptfs message received; the caller should sanity check 349 * @msg: The ecryptfs message received; the caller should sanity check
240 * msg->data_len 350 * msg->data_len and free the memory
241 * @pid: The process ID of the userspace application that sent the 351 * @pid: The process ID of the userspace application that sent the
242 * message 352 * message
243 * @seq: The sequence number of the message 353 * @seq: The sequence number of the message; must match the sequence
354 * number for the existing message context waiting for this
355 * response
356 *
357 * Processes a response message after sending an operation request to
358 * userspace. Some other process is awaiting this response. Before
359 * sending out its first communications, the other process allocated a
360 * msg_ctx from the ecryptfs_msg_ctx_arr at a particular index. The
361 * response message contains this index so that we can copy over the
362 * response message into the msg_ctx that the process holds a
363 * reference to. The other process is going to wake up, check to see
364 * that msg_ctx->state == ECRYPTFS_MSG_CTX_STATE_DONE, and then
365 * proceed to read off and process the response message. Returns zero
366 * upon delivery to desired context element; non-zero upon delivery
367 * failure or error.
244 * 368 *
245 * Processes a response message after sending a operation request to 369 * Returns zero on success; non-zero otherwise
246 * userspace. Returns zero upon delivery to desired context element;
247 * non-zero upon delivery failure or error.
248 */ 370 */
249int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t uid, 371int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid,
250 pid_t pid, u32 seq) 372 struct user_namespace *user_ns, struct pid *pid,
373 u32 seq)
251{ 374{
252 struct ecryptfs_daemon_id *id; 375 struct ecryptfs_daemon *daemon;
253 struct ecryptfs_msg_ctx *msg_ctx; 376 struct ecryptfs_msg_ctx *msg_ctx;
254 int msg_size; 377 size_t msg_size;
378 struct nsproxy *nsproxy;
379 struct user_namespace *current_user_ns;
255 int rc; 380 int rc;
256 381
257 if (msg->index >= ecryptfs_message_buf_len) { 382 if (msg->index >= ecryptfs_message_buf_len) {
258 rc = -EINVAL; 383 rc = -EINVAL;
259 ecryptfs_printk(KERN_ERR, "Attempt to reference " 384 printk(KERN_ERR "%s: Attempt to reference "
260 "context buffer at index [%d]; maximum " 385 "context buffer at index [%d]; maximum "
261 "allowable is [%d]\n", msg->index, 386 "allowable is [%d]\n", __func__, msg->index,
262 (ecryptfs_message_buf_len - 1)); 387 (ecryptfs_message_buf_len - 1));
263 goto out; 388 goto out;
264 } 389 }
265 msg_ctx = &ecryptfs_msg_ctx_arr[msg->index]; 390 msg_ctx = &ecryptfs_msg_ctx_arr[msg->index];
266 mutex_lock(&msg_ctx->mux); 391 mutex_lock(&msg_ctx->mux);
267 if (ecryptfs_find_daemon_id(msg_ctx->task->euid, &id)) { 392 mutex_lock(&ecryptfs_daemon_hash_mux);
393 rcu_read_lock();
394 nsproxy = task_nsproxy(msg_ctx->task);
395 if (nsproxy == NULL) {
268 rc = -EBADMSG; 396 rc = -EBADMSG;
269 ecryptfs_printk(KERN_WARNING, "User [%d] received a " 397 printk(KERN_ERR "%s: Receiving process is a zombie. Dropping "
270 "message response from process [%d] but does " 398 "message.\n", __func__);
271 "not have a registered daemon\n", 399 rcu_read_unlock();
272 msg_ctx->task->euid, pid); 400 mutex_unlock(&ecryptfs_daemon_hash_mux);
273 goto wake_up; 401 goto wake_up;
274 } 402 }
275 if (msg_ctx->task->euid != uid) { 403 current_user_ns = nsproxy->user_ns;
404 rc = ecryptfs_find_daemon_by_euid(&daemon, msg_ctx->task->euid,
405 current_user_ns);
406 rcu_read_unlock();
407 mutex_unlock(&ecryptfs_daemon_hash_mux);
408 if (rc) {
409 rc = -EBADMSG;
410 printk(KERN_WARNING "%s: User [%d] received a "
411 "message response from process [0x%p] but does "
412 "not have a registered daemon\n", __func__,
413 msg_ctx->task->euid, pid);
414 goto wake_up;
415 }
416 if (msg_ctx->task->euid != euid) {
276 rc = -EBADMSG; 417 rc = -EBADMSG;
277 ecryptfs_printk(KERN_WARNING, "Received message from user " 418 printk(KERN_WARNING "%s: Received message from user "
278 "[%d]; expected message from user [%d]\n", 419 "[%d]; expected message from user [%d]\n", __func__,
279 uid, msg_ctx->task->euid); 420 euid, msg_ctx->task->euid);
280 goto unlock; 421 goto unlock;
281 } 422 }
282 if (id->pid != pid) { 423 if (current_user_ns != user_ns) {
283 rc = -EBADMSG; 424 rc = -EBADMSG;
284 ecryptfs_printk(KERN_ERR, "User [%d] received a " 425 printk(KERN_WARNING "%s: Received message from user_ns "
285 "message response from an unrecognized " 426 "[0x%p]; expected message from user_ns [0x%p]\n",
286 "process [%d]\n", msg_ctx->task->euid, pid); 427 __func__, user_ns, nsproxy->user_ns);
428 goto unlock;
429 }
430 if (daemon->pid != pid) {
431 rc = -EBADMSG;
432 printk(KERN_ERR "%s: User [%d] sent a message response "
433 "from an unrecognized process [0x%p]\n",
434 __func__, msg_ctx->task->euid, pid);
287 goto unlock; 435 goto unlock;
288 } 436 }
289 if (msg_ctx->state != ECRYPTFS_MSG_CTX_STATE_PENDING) { 437 if (msg_ctx->state != ECRYPTFS_MSG_CTX_STATE_PENDING) {
290 rc = -EINVAL; 438 rc = -EINVAL;
291 ecryptfs_printk(KERN_WARNING, "Desired context element is not " 439 printk(KERN_WARNING "%s: Desired context element is not "
292 "pending a response\n"); 440 "pending a response\n", __func__);
293 goto unlock; 441 goto unlock;
294 } else if (msg_ctx->counter != seq) { 442 } else if (msg_ctx->counter != seq) {
295 rc = -EINVAL; 443 rc = -EINVAL;
296 ecryptfs_printk(KERN_WARNING, "Invalid message sequence; " 444 printk(KERN_WARNING "%s: Invalid message sequence; "
297 "expected [%d]; received [%d]\n", 445 "expected [%d]; received [%d]\n", __func__,
298 msg_ctx->counter, seq); 446 msg_ctx->counter, seq);
299 goto unlock; 447 goto unlock;
300 } 448 }
301 msg_size = sizeof(*msg) + msg->data_len; 449 msg_size = (sizeof(*msg) + msg->data_len);
302 msg_ctx->msg = kmalloc(msg_size, GFP_KERNEL); 450 msg_ctx->msg = kmalloc(msg_size, GFP_KERNEL);
303 if (!msg_ctx->msg) { 451 if (!msg_ctx->msg) {
304 rc = -ENOMEM; 452 rc = -ENOMEM;
305 ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n"); 453 printk(KERN_ERR "%s: Failed to allocate [%Zd] bytes of "
454 "GFP_KERNEL memory\n", __func__, msg_size);
306 goto unlock; 455 goto unlock;
307 } 456 }
308 memcpy(msg_ctx->msg, msg, msg_size); 457 memcpy(msg_ctx->msg, msg, msg_size);
@@ -317,34 +466,38 @@ out:
317} 466}
318 467
319/** 468/**
320 * ecryptfs_send_message 469 * ecryptfs_send_message_locked
321 * @transport: The transport over which to send the message (i.e., 470 * @transport: The transport over which to send the message (i.e.,
322 * netlink) 471 * netlink)
323 * @data: The data to send 472 * @data: The data to send
324 * @data_len: The length of data 473 * @data_len: The length of data
325 * @msg_ctx: The message context allocated for the send 474 * @msg_ctx: The message context allocated for the send
475 *
476 * Must be called with ecryptfs_daemon_hash_mux held.
477 *
478 * Returns zero on success; non-zero otherwise
326 */ 479 */
327int ecryptfs_send_message(unsigned int transport, char *data, int data_len, 480static int
328 struct ecryptfs_msg_ctx **msg_ctx) 481ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len,
482 u8 msg_type, struct ecryptfs_msg_ctx **msg_ctx)
329{ 483{
330 struct ecryptfs_daemon_id *id; 484 struct ecryptfs_daemon *daemon;
331 int rc; 485 int rc;
332 486
333 mutex_lock(&ecryptfs_daemon_id_hash_mux); 487 rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid,
334 if (ecryptfs_find_daemon_id(current->euid, &id)) { 488 current->nsproxy->user_ns);
335 mutex_unlock(&ecryptfs_daemon_id_hash_mux); 489 if (rc || !daemon) {
336 rc = -ENOTCONN; 490 rc = -ENOTCONN;
337 ecryptfs_printk(KERN_ERR, "User [%d] does not have a daemon " 491 printk(KERN_ERR "%s: User [%d] does not have a daemon "
338 "registered\n", current->euid); 492 "registered\n", __func__, current->euid);
339 goto out; 493 goto out;
340 } 494 }
341 mutex_unlock(&ecryptfs_daemon_id_hash_mux);
342 mutex_lock(&ecryptfs_msg_ctx_lists_mux); 495 mutex_lock(&ecryptfs_msg_ctx_lists_mux);
343 rc = ecryptfs_acquire_free_msg_ctx(msg_ctx); 496 rc = ecryptfs_acquire_free_msg_ctx(msg_ctx);
344 if (rc) { 497 if (rc) {
345 mutex_unlock(&ecryptfs_msg_ctx_lists_mux); 498 mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
346 ecryptfs_printk(KERN_WARNING, "Could not claim a free " 499 printk(KERN_WARNING "%s: Could not claim a free "
347 "context element\n"); 500 "context element\n", __func__);
348 goto out; 501 goto out;
349 } 502 }
350 ecryptfs_msg_ctx_free_to_alloc(*msg_ctx); 503 ecryptfs_msg_ctx_free_to_alloc(*msg_ctx);
@@ -352,23 +505,50 @@ int ecryptfs_send_message(unsigned int transport, char *data, int data_len,
352 mutex_unlock(&ecryptfs_msg_ctx_lists_mux); 505 mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
353 switch (transport) { 506 switch (transport) {
354 case ECRYPTFS_TRANSPORT_NETLINK: 507 case ECRYPTFS_TRANSPORT_NETLINK:
355 rc = ecryptfs_send_netlink(data, data_len, *msg_ctx, 508 rc = ecryptfs_send_netlink(data, data_len, *msg_ctx, msg_type,
356 ECRYPTFS_NLMSG_REQUEST, 0, id->pid); 509 0, daemon->pid);
510 break;
511 case ECRYPTFS_TRANSPORT_MISCDEV:
512 rc = ecryptfs_send_miscdev(data, data_len, *msg_ctx, msg_type,
513 0, daemon);
357 break; 514 break;
358 case ECRYPTFS_TRANSPORT_CONNECTOR: 515 case ECRYPTFS_TRANSPORT_CONNECTOR:
359 case ECRYPTFS_TRANSPORT_RELAYFS: 516 case ECRYPTFS_TRANSPORT_RELAYFS:
360 default: 517 default:
361 rc = -ENOSYS; 518 rc = -ENOSYS;
362 } 519 }
363 if (rc) { 520 if (rc)
364 printk(KERN_ERR "Error attempting to send message to userspace " 521 printk(KERN_ERR "%s: Error attempting to send message to "
365 "daemon; rc = [%d]\n", rc); 522 "userspace daemon; rc = [%d]\n", __func__, rc);
366 }
367out: 523out:
368 return rc; 524 return rc;
369} 525}
370 526
371/** 527/**
528 * ecryptfs_send_message
529 * @transport: The transport over which to send the message (i.e.,
530 * netlink)
531 * @data: The data to send
532 * @data_len: The length of data
533 * @msg_ctx: The message context allocated for the send
534 *
535 * Grabs ecryptfs_daemon_hash_mux.
536 *
537 * Returns zero on success; non-zero otherwise
538 */
539int ecryptfs_send_message(unsigned int transport, char *data, int data_len,
540 struct ecryptfs_msg_ctx **msg_ctx)
541{
542 int rc;
543
544 mutex_lock(&ecryptfs_daemon_hash_mux);
545 rc = ecryptfs_send_message_locked(transport, data, data_len,
546 ECRYPTFS_MSG_REQUEST, msg_ctx);
547 mutex_unlock(&ecryptfs_daemon_hash_mux);
548 return rc;
549}
550
551/**
372 * ecryptfs_wait_for_response 552 * ecryptfs_wait_for_response
373 * @msg_ctx: The context that was assigned when sending a message 553 * @msg_ctx: The context that was assigned when sending a message
374 * @msg: The incoming message from userspace; not set if rc != 0 554 * @msg: The incoming message from userspace; not set if rc != 0
@@ -377,7 +557,7 @@ out:
377 * of time exceeds ecryptfs_message_wait_timeout. If zero is 557 * of time exceeds ecryptfs_message_wait_timeout. If zero is
378 * returned, msg will point to a valid message from userspace; a 558 * returned, msg will point to a valid message from userspace; a
379 * non-zero value is returned upon failure to receive a message or an 559 * non-zero value is returned upon failure to receive a message or an
380 * error occurs. 560 * error occurs. Callee must free @msg on success.
381 */ 561 */
382int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx, 562int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx,
383 struct ecryptfs_message **msg) 563 struct ecryptfs_message **msg)
@@ -413,32 +593,32 @@ int ecryptfs_init_messaging(unsigned int transport)
413 593
414 if (ecryptfs_number_of_users > ECRYPTFS_MAX_NUM_USERS) { 594 if (ecryptfs_number_of_users > ECRYPTFS_MAX_NUM_USERS) {
415 ecryptfs_number_of_users = ECRYPTFS_MAX_NUM_USERS; 595 ecryptfs_number_of_users = ECRYPTFS_MAX_NUM_USERS;
416 ecryptfs_printk(KERN_WARNING, "Specified number of users is " 596 printk(KERN_WARNING "%s: Specified number of users is "
417 "too large, defaulting to [%d] users\n", 597 "too large, defaulting to [%d] users\n", __func__,
418 ecryptfs_number_of_users); 598 ecryptfs_number_of_users);
419 } 599 }
420 mutex_init(&ecryptfs_daemon_id_hash_mux); 600 mutex_init(&ecryptfs_daemon_hash_mux);
421 mutex_lock(&ecryptfs_daemon_id_hash_mux); 601 mutex_lock(&ecryptfs_daemon_hash_mux);
422 ecryptfs_hash_buckets = 1; 602 ecryptfs_hash_buckets = 1;
423 while (ecryptfs_number_of_users >> ecryptfs_hash_buckets) 603 while (ecryptfs_number_of_users >> ecryptfs_hash_buckets)
424 ecryptfs_hash_buckets++; 604 ecryptfs_hash_buckets++;
425 ecryptfs_daemon_id_hash = kmalloc(sizeof(struct hlist_head) 605 ecryptfs_daemon_hash = kmalloc((sizeof(struct hlist_head)
426 * ecryptfs_hash_buckets, GFP_KERNEL); 606 * ecryptfs_hash_buckets), GFP_KERNEL);
427 if (!ecryptfs_daemon_id_hash) { 607 if (!ecryptfs_daemon_hash) {
428 rc = -ENOMEM; 608 rc = -ENOMEM;
429 ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n"); 609 printk(KERN_ERR "%s: Failed to allocate memory\n", __func__);
430 mutex_unlock(&ecryptfs_daemon_id_hash_mux); 610 mutex_unlock(&ecryptfs_daemon_hash_mux);
431 goto out; 611 goto out;
432 } 612 }
433 for (i = 0; i < ecryptfs_hash_buckets; i++) 613 for (i = 0; i < ecryptfs_hash_buckets; i++)
434 INIT_HLIST_HEAD(&ecryptfs_daemon_id_hash[i]); 614 INIT_HLIST_HEAD(&ecryptfs_daemon_hash[i]);
435 mutex_unlock(&ecryptfs_daemon_id_hash_mux); 615 mutex_unlock(&ecryptfs_daemon_hash_mux);
436
437 ecryptfs_msg_ctx_arr = kmalloc((sizeof(struct ecryptfs_msg_ctx) 616 ecryptfs_msg_ctx_arr = kmalloc((sizeof(struct ecryptfs_msg_ctx)
438 * ecryptfs_message_buf_len), GFP_KERNEL); 617 * ecryptfs_message_buf_len),
618 GFP_KERNEL);
439 if (!ecryptfs_msg_ctx_arr) { 619 if (!ecryptfs_msg_ctx_arr) {
440 rc = -ENOMEM; 620 rc = -ENOMEM;
441 ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n"); 621 printk(KERN_ERR "%s: Failed to allocate memory\n", __func__);
442 goto out; 622 goto out;
443 } 623 }
444 mutex_init(&ecryptfs_msg_ctx_lists_mux); 624 mutex_init(&ecryptfs_msg_ctx_lists_mux);
@@ -446,6 +626,7 @@ int ecryptfs_init_messaging(unsigned int transport)
446 ecryptfs_msg_counter = 0; 626 ecryptfs_msg_counter = 0;
447 for (i = 0; i < ecryptfs_message_buf_len; i++) { 627 for (i = 0; i < ecryptfs_message_buf_len; i++) {
448 INIT_LIST_HEAD(&ecryptfs_msg_ctx_arr[i].node); 628 INIT_LIST_HEAD(&ecryptfs_msg_ctx_arr[i].node);
629 INIT_LIST_HEAD(&ecryptfs_msg_ctx_arr[i].daemon_out_list);
449 mutex_init(&ecryptfs_msg_ctx_arr[i].mux); 630 mutex_init(&ecryptfs_msg_ctx_arr[i].mux);
450 mutex_lock(&ecryptfs_msg_ctx_arr[i].mux); 631 mutex_lock(&ecryptfs_msg_ctx_arr[i].mux);
451 ecryptfs_msg_ctx_arr[i].index = i; 632 ecryptfs_msg_ctx_arr[i].index = i;
@@ -464,6 +645,11 @@ int ecryptfs_init_messaging(unsigned int transport)
464 if (rc) 645 if (rc)
465 ecryptfs_release_messaging(transport); 646 ecryptfs_release_messaging(transport);
466 break; 647 break;
648 case ECRYPTFS_TRANSPORT_MISCDEV:
649 rc = ecryptfs_init_ecryptfs_miscdev();
650 if (rc)
651 ecryptfs_release_messaging(transport);
652 break;
467 case ECRYPTFS_TRANSPORT_CONNECTOR: 653 case ECRYPTFS_TRANSPORT_CONNECTOR:
468 case ECRYPTFS_TRANSPORT_RELAYFS: 654 case ECRYPTFS_TRANSPORT_RELAYFS:
469 default: 655 default:
@@ -488,27 +674,37 @@ void ecryptfs_release_messaging(unsigned int transport)
488 kfree(ecryptfs_msg_ctx_arr); 674 kfree(ecryptfs_msg_ctx_arr);
489 mutex_unlock(&ecryptfs_msg_ctx_lists_mux); 675 mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
490 } 676 }
491 if (ecryptfs_daemon_id_hash) { 677 if (ecryptfs_daemon_hash) {
492 struct hlist_node *elem; 678 struct hlist_node *elem;
493 struct ecryptfs_daemon_id *id; 679 struct ecryptfs_daemon *daemon;
494 int i; 680 int i;
495 681
496 mutex_lock(&ecryptfs_daemon_id_hash_mux); 682 mutex_lock(&ecryptfs_daemon_hash_mux);
497 for (i = 0; i < ecryptfs_hash_buckets; i++) { 683 for (i = 0; i < ecryptfs_hash_buckets; i++) {
498 hlist_for_each_entry(id, elem, 684 int rc;
499 &ecryptfs_daemon_id_hash[i], 685
500 id_chain) { 686 hlist_for_each_entry(daemon, elem,
501 hlist_del(elem); 687 &ecryptfs_daemon_hash[i],
502 kfree(id); 688 euid_chain) {
689 rc = ecryptfs_exorcise_daemon(daemon);
690 if (rc)
691 printk(KERN_ERR "%s: Error whilst "
692 "attempting to destroy daemon; "
693 "rc = [%d]. Dazed and confused, "
694 "but trying to continue.\n",
695 __func__, rc);
503 } 696 }
504 } 697 }
505 kfree(ecryptfs_daemon_id_hash); 698 kfree(ecryptfs_daemon_hash);
506 mutex_unlock(&ecryptfs_daemon_id_hash_mux); 699 mutex_unlock(&ecryptfs_daemon_hash_mux);
507 } 700 }
508 switch(transport) { 701 switch(transport) {
509 case ECRYPTFS_TRANSPORT_NETLINK: 702 case ECRYPTFS_TRANSPORT_NETLINK:
510 ecryptfs_release_netlink(); 703 ecryptfs_release_netlink();
511 break; 704 break;
705 case ECRYPTFS_TRANSPORT_MISCDEV:
706 ecryptfs_destroy_ecryptfs_miscdev();
707 break;
512 case ECRYPTFS_TRANSPORT_CONNECTOR: 708 case ECRYPTFS_TRANSPORT_CONNECTOR:
513 case ECRYPTFS_TRANSPORT_RELAYFS: 709 case ECRYPTFS_TRANSPORT_RELAYFS:
514 default: 710 default:
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
new file mode 100644
index 000000000000..788995efd1d3
--- /dev/null
+++ b/fs/ecryptfs/miscdev.c
@@ -0,0 +1,598 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 *
4 * Copyright (C) 2008 International Business Machines Corp.
5 * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License version
9 * 2 as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
19 * 02111-1307, USA.
20 */
21
22#include <linux/fs.h>
23#include <linux/hash.h>
24#include <linux/random.h>
25#include <linux/miscdevice.h>
26#include <linux/poll.h>
27#include <linux/wait.h>
28#include <linux/module.h>
29#include "ecryptfs_kernel.h"
30
31static atomic_t ecryptfs_num_miscdev_opens;
32
33/**
34 * ecryptfs_miscdev_poll
35 * @file: dev file (ignored)
36 * @pt: dev poll table (ignored)
37 *
38 * Returns the poll mask
39 */
40static unsigned int
41ecryptfs_miscdev_poll(struct file *file, poll_table *pt)
42{
43 struct ecryptfs_daemon *daemon;
44 unsigned int mask = 0;
45 int rc;
46
47 mutex_lock(&ecryptfs_daemon_hash_mux);
48 /* TODO: Just use file->private_data? */
49 rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid,
50 current->nsproxy->user_ns);
51 BUG_ON(rc || !daemon);
52 mutex_lock(&daemon->mux);
53 mutex_unlock(&ecryptfs_daemon_hash_mux);
54 if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
55 printk(KERN_WARNING "%s: Attempt to poll on zombified "
56 "daemon\n", __func__);
57 goto out_unlock_daemon;
58 }
59 if (daemon->flags & ECRYPTFS_DAEMON_IN_READ)
60 goto out_unlock_daemon;
61 if (daemon->flags & ECRYPTFS_DAEMON_IN_POLL)
62 goto out_unlock_daemon;
63 daemon->flags |= ECRYPTFS_DAEMON_IN_POLL;
64 mutex_unlock(&daemon->mux);
65 poll_wait(file, &daemon->wait, pt);
66 mutex_lock(&daemon->mux);
67 if (!list_empty(&daemon->msg_ctx_out_queue))
68 mask |= POLLIN | POLLRDNORM;
69out_unlock_daemon:
70 daemon->flags &= ~ECRYPTFS_DAEMON_IN_POLL;
71 mutex_unlock(&daemon->mux);
72 return mask;
73}
74
75/**
76 * ecryptfs_miscdev_open
77 * @inode: inode of miscdev handle (ignored)
78 * @file: file for miscdev handle (ignored)
79 *
80 * Returns zero on success; non-zero otherwise
81 */
82static int
83ecryptfs_miscdev_open(struct inode *inode, struct file *file)
84{
85 struct ecryptfs_daemon *daemon = NULL;
86 int rc;
87
88 mutex_lock(&ecryptfs_daemon_hash_mux);
89 rc = try_module_get(THIS_MODULE);
90 if (rc == 0) {
91 rc = -EIO;
92 printk(KERN_ERR "%s: Error attempting to increment module use "
93 "count; rc = [%d]\n", __func__, rc);
94 goto out_unlock_daemon_list;
95 }
96 rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid,
97 current->nsproxy->user_ns);
98 if (rc || !daemon) {
99 rc = ecryptfs_spawn_daemon(&daemon, current->euid,
100 current->nsproxy->user_ns,
101 task_pid(current));
102 if (rc) {
103 printk(KERN_ERR "%s: Error attempting to spawn daemon; "
104 "rc = [%d]\n", __func__, rc);
105 goto out_module_put_unlock_daemon_list;
106 }
107 }
108 mutex_lock(&daemon->mux);
109 if (daemon->pid != task_pid(current)) {
110 rc = -EINVAL;
111 printk(KERN_ERR "%s: pid [0x%p] has registered with euid [%d], "
112 "but pid [0x%p] has attempted to open the handle "
113 "instead\n", __func__, daemon->pid, daemon->euid,
114 task_pid(current));
115 goto out_unlock_daemon;
116 }
117 if (daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN) {
118 rc = -EBUSY;
119 printk(KERN_ERR "%s: Miscellaneous device handle may only be "
120 "opened once per daemon; pid [0x%p] already has this "
121 "handle open\n", __func__, daemon->pid);
122 goto out_unlock_daemon;
123 }
124 daemon->flags |= ECRYPTFS_DAEMON_MISCDEV_OPEN;
125 atomic_inc(&ecryptfs_num_miscdev_opens);
126out_unlock_daemon:
127 mutex_unlock(&daemon->mux);
128out_module_put_unlock_daemon_list:
129 if (rc)
130 module_put(THIS_MODULE);
131out_unlock_daemon_list:
132 mutex_unlock(&ecryptfs_daemon_hash_mux);
133 return rc;
134}
135
136/**
137 * ecryptfs_miscdev_release
138 * @inode: inode of fs/ecryptfs/euid handle (ignored)
139 * @file: file for fs/ecryptfs/euid handle (ignored)
140 *
141 * This keeps the daemon registered until the daemon sends another
142 * ioctl to fs/ecryptfs/ctl or until the kernel module unregisters.
143 *
144 * Returns zero on success; non-zero otherwise
145 */
146static int
147ecryptfs_miscdev_release(struct inode *inode, struct file *file)
148{
149 struct ecryptfs_daemon *daemon = NULL;
150 int rc;
151
152 mutex_lock(&ecryptfs_daemon_hash_mux);
153 rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid,
154 current->nsproxy->user_ns);
155 BUG_ON(rc || !daemon);
156 mutex_lock(&daemon->mux);
157 BUG_ON(daemon->pid != task_pid(current));
158 BUG_ON(!(daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN));
159 daemon->flags &= ~ECRYPTFS_DAEMON_MISCDEV_OPEN;
160 atomic_dec(&ecryptfs_num_miscdev_opens);
161 mutex_unlock(&daemon->mux);
162 rc = ecryptfs_exorcise_daemon(daemon);
163 if (rc) {
164 printk(KERN_CRIT "%s: Fatal error whilst attempting to "
165 "shut down daemon; rc = [%d]. Please report this "
166 "bug.\n", __func__, rc);
167 BUG();
168 }
169 module_put(THIS_MODULE);
170 mutex_unlock(&ecryptfs_daemon_hash_mux);
171 return rc;
172}
173
174/**
175 * ecryptfs_send_miscdev
176 * @data: Data to send to daemon; may be NULL
177 * @data_size: Amount of data to send to daemon
178 * @msg_ctx: Message context, which is used to handle the reply. If
179 * this is NULL, then we do not expect a reply.
180 * @msg_type: Type of message
181 * @msg_flags: Flags for message
182 * @daemon: eCryptfs daemon object
183 *
184 * Add msg_ctx to queue and then, if it exists, notify the blocked
185 * miscdevess about the data being available. Must be called with
186 * ecryptfs_daemon_hash_mux held.
187 *
188 * Returns zero on success; non-zero otherwise
189 */
190int ecryptfs_send_miscdev(char *data, size_t data_size,
191 struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
192 u16 msg_flags, struct ecryptfs_daemon *daemon)
193{
194 int rc = 0;
195
196 mutex_lock(&msg_ctx->mux);
197 if (data) {
198 msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size),
199 GFP_KERNEL);
200 if (!msg_ctx->msg) {
201 rc = -ENOMEM;
202 printk(KERN_ERR "%s: Out of memory whilst attempting "
203 "to kmalloc(%Zd, GFP_KERNEL)\n", __func__,
204 (sizeof(*msg_ctx->msg) + data_size));
205 goto out_unlock;
206 }
207 } else
208 msg_ctx->msg = NULL;
209 msg_ctx->msg->index = msg_ctx->index;
210 msg_ctx->msg->data_len = data_size;
211 msg_ctx->type = msg_type;
212 if (data) {
213 memcpy(msg_ctx->msg->data, data, data_size);
214 msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size);
215 } else
216 msg_ctx->msg_size = 0;
217 mutex_lock(&daemon->mux);
218 list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue);
219 daemon->num_queued_msg_ctx++;
220 wake_up_interruptible(&daemon->wait);
221 mutex_unlock(&daemon->mux);
222out_unlock:
223 mutex_unlock(&msg_ctx->mux);
224 return rc;
225}
226
227/**
228 * ecryptfs_miscdev_read - format and send message from queue
229 * @file: fs/ecryptfs/euid miscdevfs handle (ignored)
230 * @buf: User buffer into which to copy the next message on the daemon queue
231 * @count: Amount of space available in @buf
232 * @ppos: Offset in file (ignored)
233 *
234 * Pulls the most recent message from the daemon queue, formats it for
235 * being sent via a miscdevfs handle, and copies it into @buf
236 *
237 * Returns the number of bytes copied into the user buffer
238 */
239static ssize_t
240ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count,
241 loff_t *ppos)
242{
243 struct ecryptfs_daemon *daemon;
244 struct ecryptfs_msg_ctx *msg_ctx;
245 size_t packet_length_size;
246 u32 counter_nbo;
247 char packet_length[3];
248 size_t i;
249 size_t total_length;
250 int rc;
251
252 mutex_lock(&ecryptfs_daemon_hash_mux);
253 /* TODO: Just use file->private_data? */
254 rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid,
255 current->nsproxy->user_ns);
256 BUG_ON(rc || !daemon);
257 mutex_lock(&daemon->mux);
258 if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
259 rc = 0;
260 printk(KERN_WARNING "%s: Attempt to read from zombified "
261 "daemon\n", __func__);
262 goto out_unlock_daemon;
263 }
264 if (daemon->flags & ECRYPTFS_DAEMON_IN_READ) {
265 rc = 0;
266 goto out_unlock_daemon;
267 }
268 /* This daemon will not go away so long as this flag is set */
269 daemon->flags |= ECRYPTFS_DAEMON_IN_READ;
270 mutex_unlock(&ecryptfs_daemon_hash_mux);
271check_list:
272 if (list_empty(&daemon->msg_ctx_out_queue)) {
273 mutex_unlock(&daemon->mux);
274 rc = wait_event_interruptible(
275 daemon->wait, !list_empty(&daemon->msg_ctx_out_queue));
276 mutex_lock(&daemon->mux);
277 if (rc < 0) {
278 rc = 0;
279 goto out_unlock_daemon;
280 }
281 }
282 if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
283 rc = 0;
284 goto out_unlock_daemon;
285 }
286 if (list_empty(&daemon->msg_ctx_out_queue)) {
287 /* Something else jumped in since the
288 * wait_event_interruptable() and removed the
289 * message from the queue; try again */
290 goto check_list;
291 }
292 BUG_ON(current->euid != daemon->euid);
293 BUG_ON(current->nsproxy->user_ns != daemon->user_ns);
294 BUG_ON(task_pid(current) != daemon->pid);
295 msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue,
296 struct ecryptfs_msg_ctx, daemon_out_list);
297 BUG_ON(!msg_ctx);
298 mutex_lock(&msg_ctx->mux);
299 if (msg_ctx->msg) {
300 rc = ecryptfs_write_packet_length(packet_length,
301 msg_ctx->msg_size,
302 &packet_length_size);
303 if (rc) {
304 rc = 0;
305 printk(KERN_WARNING "%s: Error writing packet length; "
306 "rc = [%d]\n", __func__, rc);
307 goto out_unlock_msg_ctx;
308 }
309 } else {
310 packet_length_size = 0;
311 msg_ctx->msg_size = 0;
312 }
313 /* miscdevfs packet format:
314 * Octet 0: Type
315 * Octets 1-4: network byte order msg_ctx->counter
316 * Octets 5-N0: Size of struct ecryptfs_message to follow
317 * Octets N0-N1: struct ecryptfs_message (including data)
318 *
319 * Octets 5-N1 not written if the packet type does not
320 * include a message */
321 total_length = (1 + 4 + packet_length_size + msg_ctx->msg_size);
322 if (count < total_length) {
323 rc = 0;
324 printk(KERN_WARNING "%s: Only given user buffer of "
325 "size [%Zd], but we need [%Zd] to read the "
326 "pending message\n", __func__, count, total_length);
327 goto out_unlock_msg_ctx;
328 }
329 i = 0;
330 buf[i++] = msg_ctx->type;
331 counter_nbo = cpu_to_be32(msg_ctx->counter);
332 memcpy(&buf[i], (char *)&counter_nbo, 4);
333 i += 4;
334 if (msg_ctx->msg) {
335 memcpy(&buf[i], packet_length, packet_length_size);
336 i += packet_length_size;
337 rc = copy_to_user(&buf[i], msg_ctx->msg, msg_ctx->msg_size);
338 if (rc) {
339 printk(KERN_ERR "%s: copy_to_user returned error "
340 "[%d]\n", __func__, rc);
341 goto out_unlock_msg_ctx;
342 }
343 i += msg_ctx->msg_size;
344 }
345 rc = i;
346 list_del(&msg_ctx->daemon_out_list);
347 kfree(msg_ctx->msg);
348 msg_ctx->msg = NULL;
349 /* We do not expect a reply from the userspace daemon for any
350 * message type other than ECRYPTFS_MSG_REQUEST */
351 if (msg_ctx->type != ECRYPTFS_MSG_REQUEST)
352 ecryptfs_msg_ctx_alloc_to_free(msg_ctx);
353out_unlock_msg_ctx:
354 mutex_unlock(&msg_ctx->mux);
355out_unlock_daemon:
356 daemon->flags &= ~ECRYPTFS_DAEMON_IN_READ;
357 mutex_unlock(&daemon->mux);
358 return rc;
359}
360
361/**
362 * ecryptfs_miscdev_helo
363 * @euid: effective user id of miscdevess sending helo packet
364 * @user_ns: The namespace in which @euid applies
365 * @pid: miscdevess id of miscdevess sending helo packet
366 *
367 * Returns zero on success; non-zero otherwise
368 */
369static int ecryptfs_miscdev_helo(uid_t euid, struct user_namespace *user_ns,
370 struct pid *pid)
371{
372 int rc;
373
374 rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_MISCDEV, euid, user_ns,
375 pid);
376 if (rc)
377 printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc);
378 return rc;
379}
380
381/**
382 * ecryptfs_miscdev_quit
383 * @euid: effective user id of miscdevess sending quit packet
384 * @user_ns: The namespace in which @euid applies
385 * @pid: miscdevess id of miscdevess sending quit packet
386 *
387 * Returns zero on success; non-zero otherwise
388 */
389static int ecryptfs_miscdev_quit(uid_t euid, struct user_namespace *user_ns,
390 struct pid *pid)
391{
392 int rc;
393
394 rc = ecryptfs_process_quit(euid, user_ns, pid);
395 if (rc)
396 printk(KERN_WARNING
397 "Error processing QUIT message; rc = [%d]\n", rc);
398 return rc;
399}
400
401/**
402 * ecryptfs_miscdev_response - miscdevess response to message previously sent to daemon
403 * @data: Bytes comprising struct ecryptfs_message
404 * @data_size: sizeof(struct ecryptfs_message) + data len
405 * @euid: Effective user id of miscdevess sending the miscdev response
406 * @user_ns: The namespace in which @euid applies
407 * @pid: Miscdevess id of miscdevess sending the miscdev response
408 * @seq: Sequence number for miscdev response packet
409 *
410 * Returns zero on success; non-zero otherwise
411 */
412static int ecryptfs_miscdev_response(char *data, size_t data_size,
413 uid_t euid, struct user_namespace *user_ns,
414 struct pid *pid, u32 seq)
415{
416 struct ecryptfs_message *msg = (struct ecryptfs_message *)data;
417 int rc;
418
419 if ((sizeof(*msg) + msg->data_len) != data_size) {
420 printk(KERN_WARNING "%s: (sizeof(*msg) + msg->data_len) = "
421 "[%Zd]; data_size = [%Zd]. Invalid packet.\n", __func__,
422 (sizeof(*msg) + msg->data_len), data_size);
423 rc = -EINVAL;
424 goto out;
425 }
426 rc = ecryptfs_process_response(msg, euid, user_ns, pid, seq);
427 if (rc)
428 printk(KERN_ERR
429 "Error processing response message; rc = [%d]\n", rc);
430out:
431 return rc;
432}
433
434/**
435 * ecryptfs_miscdev_write - handle write to daemon miscdev handle
436 * @file: File for misc dev handle (ignored)
437 * @buf: Buffer containing user data
438 * @count: Amount of data in @buf
439 * @ppos: Pointer to offset in file (ignored)
440 *
441 * miscdevfs packet format:
442 * Octet 0: Type
443 * Octets 1-4: network byte order msg_ctx->counter (0's for non-response)
444 * Octets 5-N0: Size of struct ecryptfs_message to follow
445 * Octets N0-N1: struct ecryptfs_message (including data)
446 *
447 * Returns the number of bytes read from @buf
448 */
449static ssize_t
450ecryptfs_miscdev_write(struct file *file, const char __user *buf,
451 size_t count, loff_t *ppos)
452{
453 u32 counter_nbo, seq;
454 size_t packet_size, packet_size_length, i;
455 ssize_t sz = 0;
456 char *data;
457 int rc;
458
459 if (count == 0)
460 goto out;
461 data = kmalloc(count, GFP_KERNEL);
462 if (!data) {
463 printk(KERN_ERR "%s: Out of memory whilst attempting to "
464 "kmalloc([%Zd], GFP_KERNEL)\n", __func__, count);
465 goto out;
466 }
467 rc = copy_from_user(data, buf, count);
468 if (rc) {
469 printk(KERN_ERR "%s: copy_from_user returned error [%d]\n",
470 __func__, rc);
471 goto out_free;
472 }
473 sz = count;
474 i = 0;
475 switch (data[i++]) {
476 case ECRYPTFS_MSG_RESPONSE:
477 if (count < (1 + 4 + 1 + sizeof(struct ecryptfs_message))) {
478 printk(KERN_WARNING "%s: Minimum acceptable packet "
479 "size is [%Zd], but amount of data written is "
480 "only [%Zd]. Discarding response packet.\n",
481 __func__,
482 (1 + 4 + 1 + sizeof(struct ecryptfs_message)),
483 count);
484 goto out_free;
485 }
486 memcpy((char *)&counter_nbo, &data[i], 4);
487 seq = be32_to_cpu(counter_nbo);
488 i += 4;
489 rc = ecryptfs_parse_packet_length(&data[i], &packet_size,
490 &packet_size_length);
491 if (rc) {
492 printk(KERN_WARNING "%s: Error parsing packet length; "
493 "rc = [%d]\n", __func__, rc);
494 goto out_free;
495 }
496 i += packet_size_length;
497 if ((1 + 4 + packet_size_length + packet_size) != count) {
498 printk(KERN_WARNING "%s: (1 + packet_size_length([%Zd])"
499 " + packet_size([%Zd]))([%Zd]) != "
500 "count([%Zd]). Invalid packet format.\n",
501 __func__, packet_size_length, packet_size,
502 (1 + packet_size_length + packet_size), count);
503 goto out_free;
504 }
505 rc = ecryptfs_miscdev_response(&data[i], packet_size,
506 current->euid,
507 current->nsproxy->user_ns,
508 task_pid(current), seq);
509 if (rc)
510 printk(KERN_WARNING "%s: Failed to deliver miscdev "
511 "response to requesting operation; rc = [%d]\n",
512 __func__, rc);
513 break;
514 case ECRYPTFS_MSG_HELO:
515 rc = ecryptfs_miscdev_helo(current->euid,
516 current->nsproxy->user_ns,
517 task_pid(current));
518 if (rc) {
519 printk(KERN_ERR "%s: Error attempting to process "
520 "helo from pid [0x%p]; rc = [%d]\n", __func__,
521 task_pid(current), rc);
522 goto out_free;
523 }
524 break;
525 case ECRYPTFS_MSG_QUIT:
526 rc = ecryptfs_miscdev_quit(current->euid,
527 current->nsproxy->user_ns,
528 task_pid(current));
529 if (rc) {
530 printk(KERN_ERR "%s: Error attempting to process "
531 "quit from pid [0x%p]; rc = [%d]\n", __func__,
532 task_pid(current), rc);
533 goto out_free;
534 }
535 break;
536 default:
537 ecryptfs_printk(KERN_WARNING, "Dropping miscdev "
538 "message of unrecognized type [%d]\n",
539 data[0]);
540 break;
541 }
542out_free:
543 kfree(data);
544out:
545 return sz;
546}
547
548
549static const struct file_operations ecryptfs_miscdev_fops = {
550 .open = ecryptfs_miscdev_open,
551 .poll = ecryptfs_miscdev_poll,
552 .read = ecryptfs_miscdev_read,
553 .write = ecryptfs_miscdev_write,
554 .release = ecryptfs_miscdev_release,
555};
556
557static struct miscdevice ecryptfs_miscdev = {
558 .minor = MISC_DYNAMIC_MINOR,
559 .name = "ecryptfs",
560 .fops = &ecryptfs_miscdev_fops
561};
562
563/**
564 * ecryptfs_init_ecryptfs_miscdev
565 *
566 * Messages sent to the userspace daemon from the kernel are placed on
567 * a queue associated with the daemon. The next read against the
568 * miscdev handle by that daemon will return the oldest message placed
569 * on the message queue for the daemon.
570 *
571 * Returns zero on success; non-zero otherwise
572 */
573int ecryptfs_init_ecryptfs_miscdev(void)
574{
575 int rc;
576
577 atomic_set(&ecryptfs_num_miscdev_opens, 0);
578 mutex_lock(&ecryptfs_daemon_hash_mux);
579 rc = misc_register(&ecryptfs_miscdev);
580 if (rc)
581 printk(KERN_ERR "%s: Failed to register miscellaneous device "
582 "for communications with userspace daemons; rc = [%d]\n",
583 __func__, rc);
584 mutex_unlock(&ecryptfs_daemon_hash_mux);
585 return rc;
586}
587
588/**
589 * ecryptfs_destroy_ecryptfs_miscdev
590 *
591 * All of the daemons must be exorcised prior to calling this
592 * function.
593 */
594void ecryptfs_destroy_ecryptfs_miscdev(void)
595{
596 BUG_ON(atomic_read(&ecryptfs_num_miscdev_opens) != 0);
597 misc_deregister(&ecryptfs_miscdev);
598}
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 6df1debdccce..2b6fe1e6e8ba 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -153,7 +153,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
153 flush_dcache_page(page); 153 flush_dcache_page(page);
154 if (rc) { 154 if (rc) {
155 printk(KERN_ERR "%s: Error reading xattr " 155 printk(KERN_ERR "%s: Error reading xattr "
156 "region; rc = [%d]\n", __FUNCTION__, rc); 156 "region; rc = [%d]\n", __func__, rc);
157 goto out; 157 goto out;
158 } 158 }
159 } else { 159 } else {
@@ -169,7 +169,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
169 if (rc) { 169 if (rc) {
170 printk(KERN_ERR "%s: Error attempting to read " 170 printk(KERN_ERR "%s: Error attempting to read "
171 "extent at offset [%lld] in the lower " 171 "extent at offset [%lld] in the lower "
172 "file; rc = [%d]\n", __FUNCTION__, 172 "file; rc = [%d]\n", __func__,
173 lower_offset, rc); 173 lower_offset, rc);
174 goto out; 174 goto out;
175 } 175 }
@@ -212,7 +212,7 @@ static int ecryptfs_readpage(struct file *file, struct page *page)
212 "the encrypted content from the lower " 212 "the encrypted content from the lower "
213 "file whilst inserting the metadata " 213 "file whilst inserting the metadata "
214 "from the xattr into the header; rc = " 214 "from the xattr into the header; rc = "
215 "[%d]\n", __FUNCTION__, rc); 215 "[%d]\n", __func__, rc);
216 goto out; 216 goto out;
217 } 217 }
218 218
@@ -293,7 +293,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
293 if (rc) { 293 if (rc) {
294 printk(KERN_ERR "%s: Error attemping to read " 294 printk(KERN_ERR "%s: Error attemping to read "
295 "lower page segment; rc = [%d]\n", 295 "lower page segment; rc = [%d]\n",
296 __FUNCTION__, rc); 296 __func__, rc);
297 ClearPageUptodate(page); 297 ClearPageUptodate(page);
298 goto out; 298 goto out;
299 } else 299 } else
@@ -308,7 +308,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
308 "from the lower file whilst " 308 "from the lower file whilst "
309 "inserting the metadata from " 309 "inserting the metadata from "
310 "the xattr into the header; rc " 310 "the xattr into the header; rc "
311 "= [%d]\n", __FUNCTION__, rc); 311 "= [%d]\n", __func__, rc);
312 ClearPageUptodate(page); 312 ClearPageUptodate(page);
313 goto out; 313 goto out;
314 } 314 }
@@ -320,7 +320,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
320 if (rc) { 320 if (rc) {
321 printk(KERN_ERR "%s: Error reading " 321 printk(KERN_ERR "%s: Error reading "
322 "page; rc = [%d]\n", 322 "page; rc = [%d]\n",
323 __FUNCTION__, rc); 323 __func__, rc);
324 ClearPageUptodate(page); 324 ClearPageUptodate(page);
325 goto out; 325 goto out;
326 } 326 }
@@ -331,7 +331,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
331 if (rc) { 331 if (rc) {
332 printk(KERN_ERR "%s: Error decrypting page " 332 printk(KERN_ERR "%s: Error decrypting page "
333 "at index [%ld]; rc = [%d]\n", 333 "at index [%ld]; rc = [%d]\n",
334 __FUNCTION__, page->index, rc); 334 __func__, page->index, rc);
335 ClearPageUptodate(page); 335 ClearPageUptodate(page);
336 goto out; 336 goto out;
337 } 337 }
@@ -348,7 +348,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
348 if (rc) { 348 if (rc) {
349 printk(KERN_ERR "%s: Error on attempt to " 349 printk(KERN_ERR "%s: Error on attempt to "
350 "truncate to (higher) offset [%lld];" 350 "truncate to (higher) offset [%lld];"
351 " rc = [%d]\n", __FUNCTION__, 351 " rc = [%d]\n", __func__,
352 prev_page_end_size, rc); 352 prev_page_end_size, rc);
353 goto out; 353 goto out;
354 } 354 }
@@ -389,7 +389,7 @@ static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode)
389 kfree(file_size_virt); 389 kfree(file_size_virt);
390 if (rc) 390 if (rc)
391 printk(KERN_ERR "%s: Error writing file size to header; " 391 printk(KERN_ERR "%s: Error writing file size to header; "
392 "rc = [%d]\n", __FUNCTION__, rc); 392 "rc = [%d]\n", __func__, rc);
393out: 393out:
394 return rc; 394 return rc;
395} 395}
diff --git a/fs/ecryptfs/netlink.c b/fs/ecryptfs/netlink.c
index f638a698dc52..e0abad62b395 100644
--- a/fs/ecryptfs/netlink.c
+++ b/fs/ecryptfs/netlink.c
@@ -44,8 +44,8 @@ static struct sock *ecryptfs_nl_sock;
44 * upon sending the message; non-zero upon error. 44 * upon sending the message; non-zero upon error.
45 */ 45 */
46int ecryptfs_send_netlink(char *data, int data_len, 46int ecryptfs_send_netlink(char *data, int data_len,
47 struct ecryptfs_msg_ctx *msg_ctx, u16 msg_type, 47 struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
48 u16 msg_flags, pid_t daemon_pid) 48 u16 msg_flags, struct pid *daemon_pid)
49{ 49{
50 struct sk_buff *skb; 50 struct sk_buff *skb;
51 struct nlmsghdr *nlh; 51 struct nlmsghdr *nlh;
@@ -60,7 +60,7 @@ int ecryptfs_send_netlink(char *data, int data_len,
60 ecryptfs_printk(KERN_ERR, "Failed to allocate socket buffer\n"); 60 ecryptfs_printk(KERN_ERR, "Failed to allocate socket buffer\n");
61 goto out; 61 goto out;
62 } 62 }
63 nlh = NLMSG_PUT(skb, daemon_pid, msg_ctx ? msg_ctx->counter : 0, 63 nlh = NLMSG_PUT(skb, pid_nr(daemon_pid), msg_ctx ? msg_ctx->counter : 0,
64 msg_type, payload_len); 64 msg_type, payload_len);
65 nlh->nlmsg_flags = msg_flags; 65 nlh->nlmsg_flags = msg_flags;
66 if (msg_ctx && payload_len) { 66 if (msg_ctx && payload_len) {
@@ -69,7 +69,7 @@ int ecryptfs_send_netlink(char *data, int data_len,
69 msg->data_len = data_len; 69 msg->data_len = data_len;
70 memcpy(msg->data, data, data_len); 70 memcpy(msg->data, data, data_len);
71 } 71 }
72 rc = netlink_unicast(ecryptfs_nl_sock, skb, daemon_pid, 0); 72 rc = netlink_unicast(ecryptfs_nl_sock, skb, pid_nr(daemon_pid), 0);
73 if (rc < 0) { 73 if (rc < 0) {
74 ecryptfs_printk(KERN_ERR, "Failed to send eCryptfs netlink " 74 ecryptfs_printk(KERN_ERR, "Failed to send eCryptfs netlink "
75 "message; rc = [%d]\n", rc); 75 "message; rc = [%d]\n", rc);
@@ -99,6 +99,7 @@ static int ecryptfs_process_nl_response(struct sk_buff *skb)
99{ 99{
100 struct nlmsghdr *nlh = nlmsg_hdr(skb); 100 struct nlmsghdr *nlh = nlmsg_hdr(skb);
101 struct ecryptfs_message *msg = NLMSG_DATA(nlh); 101 struct ecryptfs_message *msg = NLMSG_DATA(nlh);
102 struct pid *pid;
102 int rc; 103 int rc;
103 104
104 if (skb->len - NLMSG_HDRLEN - sizeof(*msg) != msg->data_len) { 105 if (skb->len - NLMSG_HDRLEN - sizeof(*msg) != msg->data_len) {
@@ -107,8 +108,10 @@ static int ecryptfs_process_nl_response(struct sk_buff *skb)
107 "incorrectly specified data length\n"); 108 "incorrectly specified data length\n");
108 goto out; 109 goto out;
109 } 110 }
110 rc = ecryptfs_process_response(msg, NETLINK_CREDS(skb)->uid, 111 pid = find_get_pid(NETLINK_CREDS(skb)->pid);
111 NETLINK_CREDS(skb)->pid, nlh->nlmsg_seq); 112 rc = ecryptfs_process_response(msg, NETLINK_CREDS(skb)->uid, NULL,
113 pid, nlh->nlmsg_seq);
114 put_pid(pid);
112 if (rc) 115 if (rc)
113 printk(KERN_ERR 116 printk(KERN_ERR
114 "Error processing response message; rc = [%d]\n", rc); 117 "Error processing response message; rc = [%d]\n", rc);
@@ -126,11 +129,13 @@ out:
126 */ 129 */
127static int ecryptfs_process_nl_helo(struct sk_buff *skb) 130static int ecryptfs_process_nl_helo(struct sk_buff *skb)
128{ 131{
132 struct pid *pid;
129 int rc; 133 int rc;
130 134
135 pid = find_get_pid(NETLINK_CREDS(skb)->pid);
131 rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_NETLINK, 136 rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_NETLINK,
132 NETLINK_CREDS(skb)->uid, 137 NETLINK_CREDS(skb)->uid, NULL, pid);
133 NETLINK_CREDS(skb)->pid); 138 put_pid(pid);
134 if (rc) 139 if (rc)
135 printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc); 140 printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc);
136 return rc; 141 return rc;
@@ -147,10 +152,12 @@ static int ecryptfs_process_nl_helo(struct sk_buff *skb)
147 */ 152 */
148static int ecryptfs_process_nl_quit(struct sk_buff *skb) 153static int ecryptfs_process_nl_quit(struct sk_buff *skb)
149{ 154{
155 struct pid *pid;
150 int rc; 156 int rc;
151 157
152 rc = ecryptfs_process_quit(NETLINK_CREDS(skb)->uid, 158 pid = find_get_pid(NETLINK_CREDS(skb)->pid);
153 NETLINK_CREDS(skb)->pid); 159 rc = ecryptfs_process_quit(NETLINK_CREDS(skb)->uid, NULL, pid);
160 put_pid(pid);
154 if (rc) 161 if (rc)
155 printk(KERN_WARNING 162 printk(KERN_WARNING
156 "Error processing QUIT message; rc = [%d]\n", rc); 163 "Error processing QUIT message; rc = [%d]\n", rc);
@@ -176,20 +183,20 @@ static void ecryptfs_receive_nl_message(struct sk_buff *skb)
176 goto free; 183 goto free;
177 } 184 }
178 switch (nlh->nlmsg_type) { 185 switch (nlh->nlmsg_type) {
179 case ECRYPTFS_NLMSG_RESPONSE: 186 case ECRYPTFS_MSG_RESPONSE:
180 if (ecryptfs_process_nl_response(skb)) { 187 if (ecryptfs_process_nl_response(skb)) {
181 ecryptfs_printk(KERN_WARNING, "Failed to " 188 ecryptfs_printk(KERN_WARNING, "Failed to "
182 "deliver netlink response to " 189 "deliver netlink response to "
183 "requesting operation\n"); 190 "requesting operation\n");
184 } 191 }
185 break; 192 break;
186 case ECRYPTFS_NLMSG_HELO: 193 case ECRYPTFS_MSG_HELO:
187 if (ecryptfs_process_nl_helo(skb)) { 194 if (ecryptfs_process_nl_helo(skb)) {
188 ecryptfs_printk(KERN_WARNING, "Failed to " 195 ecryptfs_printk(KERN_WARNING, "Failed to "
189 "fulfill HELO request\n"); 196 "fulfill HELO request\n");
190 } 197 }
191 break; 198 break;
192 case ECRYPTFS_NLMSG_QUIT: 199 case ECRYPTFS_MSG_QUIT:
193 if (ecryptfs_process_nl_quit(skb)) { 200 if (ecryptfs_process_nl_quit(skb)) {
194 ecryptfs_printk(KERN_WARNING, "Failed to " 201 ecryptfs_printk(KERN_WARNING, "Failed to "
195 "fulfill QUIT request\n"); 202 "fulfill QUIT request\n");
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index 0c4928623bbc..ebf55150be56 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -55,7 +55,7 @@ int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data,
55 set_fs(fs_save); 55 set_fs(fs_save);
56 if (octets_written < 0) { 56 if (octets_written < 0) {
57 printk(KERN_ERR "%s: octets_written = [%td]; " 57 printk(KERN_ERR "%s: octets_written = [%td]; "
58 "expected [%td]\n", __FUNCTION__, octets_written, size); 58 "expected [%td]\n", __func__, octets_written, size);
59 rc = -EINVAL; 59 rc = -EINVAL;
60 } 60 }
61 mutex_unlock(&inode_info->lower_file_mutex); 61 mutex_unlock(&inode_info->lower_file_mutex);
@@ -153,7 +153,7 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
153 rc = PTR_ERR(ecryptfs_page); 153 rc = PTR_ERR(ecryptfs_page);
154 printk(KERN_ERR "%s: Error getting page at " 154 printk(KERN_ERR "%s: Error getting page at "
155 "index [%ld] from eCryptfs inode " 155 "index [%ld] from eCryptfs inode "
156 "mapping; rc = [%d]\n", __FUNCTION__, 156 "mapping; rc = [%d]\n", __func__,
157 ecryptfs_page_idx, rc); 157 ecryptfs_page_idx, rc);
158 goto out; 158 goto out;
159 } 159 }
@@ -165,7 +165,7 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
165 if (rc) { 165 if (rc) {
166 printk(KERN_ERR "%s: Error decrypting " 166 printk(KERN_ERR "%s: Error decrypting "
167 "page; rc = [%d]\n", 167 "page; rc = [%d]\n",
168 __FUNCTION__, rc); 168 __func__, rc);
169 ClearPageUptodate(ecryptfs_page); 169 ClearPageUptodate(ecryptfs_page);
170 page_cache_release(ecryptfs_page); 170 page_cache_release(ecryptfs_page);
171 goto out; 171 goto out;
@@ -202,7 +202,7 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
202 page_cache_release(ecryptfs_page); 202 page_cache_release(ecryptfs_page);
203 if (rc) { 203 if (rc) {
204 printk(KERN_ERR "%s: Error encrypting " 204 printk(KERN_ERR "%s: Error encrypting "
205 "page; rc = [%d]\n", __FUNCTION__, rc); 205 "page; rc = [%d]\n", __func__, rc);
206 goto out; 206 goto out;
207 } 207 }
208 pos += num_bytes; 208 pos += num_bytes;
@@ -254,7 +254,7 @@ int ecryptfs_read_lower(char *data, loff_t offset, size_t size,
254 set_fs(fs_save); 254 set_fs(fs_save);
255 if (octets_read < 0) { 255 if (octets_read < 0) {
256 printk(KERN_ERR "%s: octets_read = [%td]; " 256 printk(KERN_ERR "%s: octets_read = [%td]; "
257 "expected [%td]\n", __FUNCTION__, octets_read, size); 257 "expected [%td]\n", __func__, octets_read, size);
258 rc = -EINVAL; 258 rc = -EINVAL;
259 } 259 }
260 mutex_unlock(&inode_info->lower_file_mutex); 260 mutex_unlock(&inode_info->lower_file_mutex);
@@ -327,7 +327,7 @@ int ecryptfs_read(char *data, loff_t offset, size_t size,
327 printk(KERN_ERR "%s: Attempt to read data past the end of the " 327 printk(KERN_ERR "%s: Attempt to read data past the end of the "
328 "file; offset = [%lld]; size = [%td]; " 328 "file; offset = [%lld]; size = [%td]; "
329 "ecryptfs_file_size = [%lld]\n", 329 "ecryptfs_file_size = [%lld]\n",
330 __FUNCTION__, offset, size, ecryptfs_file_size); 330 __func__, offset, size, ecryptfs_file_size);
331 goto out; 331 goto out;
332 } 332 }
333 pos = offset; 333 pos = offset;
@@ -345,14 +345,14 @@ int ecryptfs_read(char *data, loff_t offset, size_t size,
345 rc = PTR_ERR(ecryptfs_page); 345 rc = PTR_ERR(ecryptfs_page);
346 printk(KERN_ERR "%s: Error getting page at " 346 printk(KERN_ERR "%s: Error getting page at "
347 "index [%ld] from eCryptfs inode " 347 "index [%ld] from eCryptfs inode "
348 "mapping; rc = [%d]\n", __FUNCTION__, 348 "mapping; rc = [%d]\n", __func__,
349 ecryptfs_page_idx, rc); 349 ecryptfs_page_idx, rc);
350 goto out; 350 goto out;
351 } 351 }
352 rc = ecryptfs_decrypt_page(ecryptfs_page); 352 rc = ecryptfs_decrypt_page(ecryptfs_page);
353 if (rc) { 353 if (rc) {
354 printk(KERN_ERR "%s: Error decrypting " 354 printk(KERN_ERR "%s: Error decrypting "
355 "page; rc = [%d]\n", __FUNCTION__, rc); 355 "page; rc = [%d]\n", __func__, rc);
356 ClearPageUptodate(ecryptfs_page); 356 ClearPageUptodate(ecryptfs_page);
357 page_cache_release(ecryptfs_page); 357 page_cache_release(ecryptfs_page);
358 goto out; 358 goto out;
diff --git a/fs/eventfd.c b/fs/eventfd.c
index a9f130cd50ac..343942deeec1 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -200,10 +200,8 @@ struct file *eventfd_fget(int fd)
200 200
201asmlinkage long sys_eventfd(unsigned int count) 201asmlinkage long sys_eventfd(unsigned int count)
202{ 202{
203 int error, fd; 203 int fd;
204 struct eventfd_ctx *ctx; 204 struct eventfd_ctx *ctx;
205 struct file *file;
206 struct inode *inode;
207 205
208 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 206 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
209 if (!ctx) 207 if (!ctx)
@@ -216,12 +214,9 @@ asmlinkage long sys_eventfd(unsigned int count)
216 * When we call this, the initialization must be complete, since 214 * When we call this, the initialization must be complete, since
217 * anon_inode_getfd() will install the fd. 215 * anon_inode_getfd() will install the fd.
218 */ 216 */
219 error = anon_inode_getfd(&fd, &inode, &file, "[eventfd]", 217 fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx);
220 &eventfd_fops, ctx); 218 if (fd < 0)
221 if (!error) 219 kfree(ctx);
222 return fd; 220 return fd;
223
224 kfree(ctx);
225 return error;
226} 221}
227 222
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index a415f42d32cf..990c01d2d66b 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -257,25 +257,6 @@ static inline int ep_cmp_ffd(struct epoll_filefd *p1,
257 (p1->file < p2->file ? -1 : p1->fd - p2->fd)); 257 (p1->file < p2->file ? -1 : p1->fd - p2->fd));
258} 258}
259 259
260/* Special initialization for the RB tree node to detect linkage */
261static inline void ep_rb_initnode(struct rb_node *n)
262{
263 rb_set_parent(n, n);
264}
265
266/* Removes a node from the RB tree and marks it for a fast is-linked check */
267static inline void ep_rb_erase(struct rb_node *n, struct rb_root *r)
268{
269 rb_erase(n, r);
270 rb_set_parent(n, n);
271}
272
273/* Fast check to verify that the item is linked to the main RB tree */
274static inline int ep_rb_linked(struct rb_node *n)
275{
276 return rb_parent(n) != n;
277}
278
279/* Tells us if the item is currently linked */ 260/* Tells us if the item is currently linked */
280static inline int ep_is_linked(struct list_head *p) 261static inline int ep_is_linked(struct list_head *p)
281{ 262{
@@ -283,13 +264,13 @@ static inline int ep_is_linked(struct list_head *p)
283} 264}
284 265
285/* Get the "struct epitem" from a wait queue pointer */ 266/* Get the "struct epitem" from a wait queue pointer */
286static inline struct epitem * ep_item_from_wait(wait_queue_t *p) 267static inline struct epitem *ep_item_from_wait(wait_queue_t *p)
287{ 268{
288 return container_of(p, struct eppoll_entry, wait)->base; 269 return container_of(p, struct eppoll_entry, wait)->base;
289} 270}
290 271
291/* Get the "struct epitem" from an epoll queue wrapper */ 272/* Get the "struct epitem" from an epoll queue wrapper */
292static inline struct epitem * ep_item_from_epqueue(poll_table *p) 273static inline struct epitem *ep_item_from_epqueue(poll_table *p)
293{ 274{
294 return container_of(p, struct ep_pqueue, pt)->epi; 275 return container_of(p, struct ep_pqueue, pt)->epi;
295} 276}
@@ -411,8 +392,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
411 list_del_init(&epi->fllink); 392 list_del_init(&epi->fllink);
412 spin_unlock(&file->f_ep_lock); 393 spin_unlock(&file->f_ep_lock);
413 394
414 if (ep_rb_linked(&epi->rbn)) 395 rb_erase(&epi->rbn, &ep->rbr);
415 ep_rb_erase(&epi->rbn, &ep->rbr);
416 396
417 spin_lock_irqsave(&ep->lock, flags); 397 spin_lock_irqsave(&ep->lock, flags);
418 if (ep_is_linked(&epi->rdllink)) 398 if (ep_is_linked(&epi->rdllink))
@@ -728,7 +708,6 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
728 goto error_return; 708 goto error_return;
729 709
730 /* Item initialization follow here ... */ 710 /* Item initialization follow here ... */
731 ep_rb_initnode(&epi->rbn);
732 INIT_LIST_HEAD(&epi->rdllink); 711 INIT_LIST_HEAD(&epi->rdllink);
733 INIT_LIST_HEAD(&epi->fllink); 712 INIT_LIST_HEAD(&epi->fllink);
734 INIT_LIST_HEAD(&epi->pwqlist); 713 INIT_LIST_HEAD(&epi->pwqlist);
@@ -1071,8 +1050,6 @@ asmlinkage long sys_epoll_create(int size)
1071{ 1050{
1072 int error, fd = -1; 1051 int error, fd = -1;
1073 struct eventpoll *ep; 1052 struct eventpoll *ep;
1074 struct inode *inode;
1075 struct file *file;
1076 1053
1077 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n", 1054 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n",
1078 current, size)); 1055 current, size));
@@ -1082,29 +1059,24 @@ asmlinkage long sys_epoll_create(int size)
1082 * structure ( "struct eventpoll" ). 1059 * structure ( "struct eventpoll" ).
1083 */ 1060 */
1084 error = -EINVAL; 1061 error = -EINVAL;
1085 if (size <= 0 || (error = ep_alloc(&ep)) != 0) 1062 if (size <= 0 || (error = ep_alloc(&ep)) < 0) {
1063 fd = error;
1086 goto error_return; 1064 goto error_return;
1065 }
1087 1066
1088 /* 1067 /*
1089 * Creates all the items needed to setup an eventpoll file. That is, 1068 * Creates all the items needed to setup an eventpoll file. That is,
1090 * a file structure, and inode and a free file descriptor. 1069 * a file structure and a free file descriptor.
1091 */ 1070 */
1092 error = anon_inode_getfd(&fd, &inode, &file, "[eventpoll]", 1071 fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep);
1093 &eventpoll_fops, ep); 1072 if (fd < 0)
1094 if (error) 1073 ep_free(ep);
1095 goto error_free;
1096 1074
1075error_return:
1097 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", 1076 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
1098 current, size, fd)); 1077 current, size, fd));
1099 1078
1100 return fd; 1079 return fd;
1101
1102error_free:
1103 ep_free(ep);
1104error_return:
1105 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
1106 current, size, error));
1107 return error;
1108} 1080}
1109 1081
1110/* 1082/*
@@ -1262,7 +1234,7 @@ error_return:
1262 return error; 1234 return error;
1263} 1235}
1264 1236
1265#ifdef TIF_RESTORE_SIGMASK 1237#ifdef HAVE_SET_RESTORE_SIGMASK
1266 1238
1267/* 1239/*
1268 * Implement the event wait interface for the eventpoll file. It is the kernel 1240 * Implement the event wait interface for the eventpoll file. It is the kernel
@@ -1300,7 +1272,7 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
1300 if (error == -EINTR) { 1272 if (error == -EINTR) {
1301 memcpy(&current->saved_sigmask, &sigsaved, 1273 memcpy(&current->saved_sigmask, &sigsaved,
1302 sizeof(sigsaved)); 1274 sizeof(sigsaved));
1303 set_thread_flag(TIF_RESTORE_SIGMASK); 1275 set_restore_sigmask();
1304 } else 1276 } else
1305 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1277 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1306 } 1278 }
@@ -1308,7 +1280,7 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
1308 return error; 1280 return error;
1309} 1281}
1310 1282
1311#endif /* #ifdef TIF_RESTORE_SIGMASK */ 1283#endif /* HAVE_SET_RESTORE_SIGMASK */
1312 1284
1313static int __init eventpoll_init(void) 1285static int __init eventpoll_init(void)
1314{ 1286{
@@ -1330,4 +1302,3 @@ static int __init eventpoll_init(void)
1330 return 0; 1302 return 0;
1331} 1303}
1332fs_initcall(eventpoll_init); 1304fs_initcall(eventpoll_init);
1333
diff --git a/fs/exec.c b/fs/exec.c
index 54a0a557b678..aeaa9791d8be 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -24,6 +24,7 @@
24 24
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/file.h> 26#include <linux/file.h>
27#include <linux/fdtable.h>
27#include <linux/mman.h> 28#include <linux/mman.h>
28#include <linux/a.out.h> 29#include <linux/a.out.h>
29#include <linux/stat.h> 30#include <linux/stat.h>
@@ -735,6 +736,7 @@ static int exec_mmap(struct mm_struct *mm)
735 tsk->active_mm = mm; 736 tsk->active_mm = mm;
736 activate_mm(active_mm, mm); 737 activate_mm(active_mm, mm);
737 task_unlock(tsk); 738 task_unlock(tsk);
739 mm_update_next_owner(mm);
738 arch_pick_mmap_layout(mm); 740 arch_pick_mmap_layout(mm);
739 if (old_mm) { 741 if (old_mm) {
740 up_read(&old_mm->mmap_sem); 742 up_read(&old_mm->mmap_sem);
@@ -765,9 +767,7 @@ static int de_thread(struct task_struct *tsk)
765 767
766 /* 768 /*
767 * Kill all other threads in the thread group. 769 * Kill all other threads in the thread group.
768 * We must hold tasklist_lock to call zap_other_threads.
769 */ 770 */
770 read_lock(&tasklist_lock);
771 spin_lock_irq(lock); 771 spin_lock_irq(lock);
772 if (signal_group_exit(sig)) { 772 if (signal_group_exit(sig)) {
773 /* 773 /*
@@ -775,21 +775,10 @@ static int de_thread(struct task_struct *tsk)
775 * return so that the signal is processed. 775 * return so that the signal is processed.
776 */ 776 */
777 spin_unlock_irq(lock); 777 spin_unlock_irq(lock);
778 read_unlock(&tasklist_lock);
779 return -EAGAIN; 778 return -EAGAIN;
780 } 779 }
781
782 /*
783 * child_reaper ignores SIGKILL, change it now.
784 * Reparenting needs write_lock on tasklist_lock,
785 * so it is safe to do it under read_lock.
786 */
787 if (unlikely(tsk->group_leader == task_child_reaper(tsk)))
788 task_active_pid_ns(tsk)->child_reaper = tsk;
789
790 sig->group_exit_task = tsk; 780 sig->group_exit_task = tsk;
791 zap_other_threads(tsk); 781 zap_other_threads(tsk);
792 read_unlock(&tasklist_lock);
793 782
794 /* Account for the thread group leader hanging around: */ 783 /* Account for the thread group leader hanging around: */
795 count = thread_group_leader(tsk) ? 1 : 2; 784 count = thread_group_leader(tsk) ? 1 : 2;
@@ -810,7 +799,7 @@ static int de_thread(struct task_struct *tsk)
810 if (!thread_group_leader(tsk)) { 799 if (!thread_group_leader(tsk)) {
811 leader = tsk->group_leader; 800 leader = tsk->group_leader;
812 801
813 sig->notify_count = -1; 802 sig->notify_count = -1; /* for exit_notify() */
814 for (;;) { 803 for (;;) {
815 write_lock_irq(&tasklist_lock); 804 write_lock_irq(&tasklist_lock);
816 if (likely(leader->exit_state)) 805 if (likely(leader->exit_state))
@@ -820,6 +809,8 @@ static int de_thread(struct task_struct *tsk)
820 schedule(); 809 schedule();
821 } 810 }
822 811
812 if (unlikely(task_child_reaper(tsk) == leader))
813 task_active_pid_ns(tsk)->child_reaper = tsk;
823 /* 814 /*
824 * The only record we have of the real-time age of a 815 * The only record we have of the real-time age of a
825 * process, regardless of execs it's done, is start_time. 816 * process, regardless of execs it's done, is start_time.
@@ -953,7 +944,6 @@ int flush_old_exec(struct linux_binprm * bprm)
953{ 944{
954 char * name; 945 char * name;
955 int i, ch, retval; 946 int i, ch, retval;
956 struct files_struct *files;
957 char tcomm[sizeof(current->comm)]; 947 char tcomm[sizeof(current->comm)];
958 948
959 /* 949 /*
@@ -964,27 +954,18 @@ int flush_old_exec(struct linux_binprm * bprm)
964 if (retval) 954 if (retval)
965 goto out; 955 goto out;
966 956
967 /* 957 set_mm_exe_file(bprm->mm, bprm->file);
968 * Make sure we have private file handles. Ask the 958
969 * fork helper to do the work for us and the exit
970 * helper to do the cleanup of the old one.
971 */
972 files = current->files; /* refcounted so safe to hold */
973 retval = unshare_files();
974 if (retval)
975 goto out;
976 /* 959 /*
977 * Release all of the old mmap stuff 960 * Release all of the old mmap stuff
978 */ 961 */
979 retval = exec_mmap(bprm->mm); 962 retval = exec_mmap(bprm->mm);
980 if (retval) 963 if (retval)
981 goto mmap_failed; 964 goto out;
982 965
983 bprm->mm = NULL; /* We're using it now */ 966 bprm->mm = NULL; /* We're using it now */
984 967
985 /* This is the point of no return */ 968 /* This is the point of no return */
986 put_files_struct(files);
987
988 current->sas_ss_sp = current->sas_ss_size = 0; 969 current->sas_ss_sp = current->sas_ss_size = 0;
989 970
990 if (current->euid == current->uid && current->egid == current->gid) 971 if (current->euid == current->uid && current->egid == current->gid)
@@ -1034,8 +1015,6 @@ int flush_old_exec(struct linux_binprm * bprm)
1034 1015
1035 return 0; 1016 return 0;
1036 1017
1037mmap_failed:
1038 reset_files_struct(current, files);
1039out: 1018out:
1040 return retval; 1019 return retval;
1041} 1020}
@@ -1282,13 +1261,17 @@ int do_execve(char * filename,
1282{ 1261{
1283 struct linux_binprm *bprm; 1262 struct linux_binprm *bprm;
1284 struct file *file; 1263 struct file *file;
1285 unsigned long env_p; 1264 struct files_struct *displaced;
1286 int retval; 1265 int retval;
1287 1266
1267 retval = unshare_files(&displaced);
1268 if (retval)
1269 goto out_ret;
1270
1288 retval = -ENOMEM; 1271 retval = -ENOMEM;
1289 bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); 1272 bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
1290 if (!bprm) 1273 if (!bprm)
1291 goto out_ret; 1274 goto out_files;
1292 1275
1293 file = open_exec(filename); 1276 file = open_exec(filename);
1294 retval = PTR_ERR(file); 1277 retval = PTR_ERR(file);
@@ -1330,11 +1313,9 @@ int do_execve(char * filename,
1330 if (retval < 0) 1313 if (retval < 0)
1331 goto out; 1314 goto out;
1332 1315
1333 env_p = bprm->p;
1334 retval = copy_strings(bprm->argc, argv, bprm); 1316 retval = copy_strings(bprm->argc, argv, bprm);
1335 if (retval < 0) 1317 if (retval < 0)
1336 goto out; 1318 goto out;
1337 bprm->argv_len = env_p - bprm->p;
1338 1319
1339 retval = search_binary_handler(bprm,regs); 1320 retval = search_binary_handler(bprm,regs);
1340 if (retval >= 0) { 1321 if (retval >= 0) {
@@ -1343,6 +1324,8 @@ int do_execve(char * filename,
1343 security_bprm_free(bprm); 1324 security_bprm_free(bprm);
1344 acct_update_integrals(current); 1325 acct_update_integrals(current);
1345 kfree(bprm); 1326 kfree(bprm);
1327 if (displaced)
1328 put_files_struct(displaced);
1346 return retval; 1329 return retval;
1347 } 1330 }
1348 1331
@@ -1363,6 +1346,9 @@ out_file:
1363out_kfree: 1346out_kfree:
1364 kfree(bprm); 1347 kfree(bprm);
1365 1348
1349out_files:
1350 if (displaced)
1351 reset_files_struct(displaced);
1366out_ret: 1352out_ret:
1367 return retval; 1353 return retval;
1368} 1354}
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 109ab5e44eca..cc91227d3bb8 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -150,12 +150,12 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir)
150 if (IS_ERR(ppd)) { 150 if (IS_ERR(ppd)) {
151 err = PTR_ERR(ppd); 151 err = PTR_ERR(ppd);
152 dprintk("%s: get_parent of %ld failed, err %d\n", 152 dprintk("%s: get_parent of %ld failed, err %d\n",
153 __FUNCTION__, pd->d_inode->i_ino, err); 153 __func__, pd->d_inode->i_ino, err);
154 dput(pd); 154 dput(pd);
155 break; 155 break;
156 } 156 }
157 157
158 dprintk("%s: find name of %lu in %lu\n", __FUNCTION__, 158 dprintk("%s: find name of %lu in %lu\n", __func__,
159 pd->d_inode->i_ino, ppd->d_inode->i_ino); 159 pd->d_inode->i_ino, ppd->d_inode->i_ino);
160 err = exportfs_get_name(mnt, ppd, nbuf, pd); 160 err = exportfs_get_name(mnt, ppd, nbuf, pd);
161 if (err) { 161 if (err) {
@@ -168,14 +168,14 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir)
168 continue; 168 continue;
169 break; 169 break;
170 } 170 }
171 dprintk("%s: found name: %s\n", __FUNCTION__, nbuf); 171 dprintk("%s: found name: %s\n", __func__, nbuf);
172 mutex_lock(&ppd->d_inode->i_mutex); 172 mutex_lock(&ppd->d_inode->i_mutex);
173 npd = lookup_one_len(nbuf, ppd, strlen(nbuf)); 173 npd = lookup_one_len(nbuf, ppd, strlen(nbuf));
174 mutex_unlock(&ppd->d_inode->i_mutex); 174 mutex_unlock(&ppd->d_inode->i_mutex);
175 if (IS_ERR(npd)) { 175 if (IS_ERR(npd)) {
176 err = PTR_ERR(npd); 176 err = PTR_ERR(npd);
177 dprintk("%s: lookup failed: %d\n", 177 dprintk("%s: lookup failed: %d\n",
178 __FUNCTION__, err); 178 __func__, err);
179 dput(ppd); 179 dput(ppd);
180 dput(pd); 180 dput(pd);
181 break; 181 break;
@@ -188,7 +188,7 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir)
188 if (npd == pd) 188 if (npd == pd)
189 noprogress = 0; 189 noprogress = 0;
190 else 190 else
191 printk("%s: npd != pd\n", __FUNCTION__); 191 printk("%s: npd != pd\n", __func__);
192 dput(npd); 192 dput(npd);
193 dput(ppd); 193 dput(ppd);
194 if (IS_ROOT(pd)) { 194 if (IS_ROOT(pd)) {
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index e7b2bafa1dd9..10bb02c3f25c 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -106,7 +106,7 @@ static int ext2_valid_block_bitmap(struct super_block *sb,
106 return 1; 106 return 1;
107 107
108err_out: 108err_out:
109 ext2_error(sb, __FUNCTION__, 109 ext2_error(sb, __func__,
110 "Invalid block bitmap - " 110 "Invalid block bitmap - "
111 "block_group = %d, block = %lu", 111 "block_group = %d, block = %lu",
112 block_group, bitmap_blk); 112 block_group, bitmap_blk);
@@ -132,7 +132,7 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
132 bitmap_blk = le32_to_cpu(desc->bg_block_bitmap); 132 bitmap_blk = le32_to_cpu(desc->bg_block_bitmap);
133 bh = sb_getblk(sb, bitmap_blk); 133 bh = sb_getblk(sb, bitmap_blk);
134 if (unlikely(!bh)) { 134 if (unlikely(!bh)) {
135 ext2_error(sb, __FUNCTION__, 135 ext2_error(sb, __func__,
136 "Cannot read block bitmap - " 136 "Cannot read block bitmap - "
137 "block_group = %d, block_bitmap = %u", 137 "block_group = %d, block_bitmap = %u",
138 block_group, le32_to_cpu(desc->bg_block_bitmap)); 138 block_group, le32_to_cpu(desc->bg_block_bitmap));
@@ -143,17 +143,18 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
143 143
144 if (bh_submit_read(bh) < 0) { 144 if (bh_submit_read(bh) < 0) {
145 brelse(bh); 145 brelse(bh);
146 ext2_error(sb, __FUNCTION__, 146 ext2_error(sb, __func__,
147 "Cannot read block bitmap - " 147 "Cannot read block bitmap - "
148 "block_group = %d, block_bitmap = %u", 148 "block_group = %d, block_bitmap = %u",
149 block_group, le32_to_cpu(desc->bg_block_bitmap)); 149 block_group, le32_to_cpu(desc->bg_block_bitmap));
150 return NULL; 150 return NULL;
151 } 151 }
152 if (!ext2_valid_block_bitmap(sb, desc, block_group, bh)) {
153 brelse(bh);
154 return NULL;
155 }
156 152
153 ext2_valid_block_bitmap(sb, desc, block_group, bh);
154 /*
155 * file system mounted not to panic on error, continue with corrupt
156 * bitmap
157 */
157 return bh; 158 return bh;
158} 159}
159 160
@@ -245,11 +246,10 @@ restart:
245 prev = rsv; 246 prev = rsv;
246 } 247 }
247 printk("Window map complete.\n"); 248 printk("Window map complete.\n");
248 if (bad) 249 BUG_ON(bad);
249 BUG();
250} 250}
251#define rsv_window_dump(root, verbose) \ 251#define rsv_window_dump(root, verbose) \
252 __rsv_window_dump((root), (verbose), __FUNCTION__) 252 __rsv_window_dump((root), (verbose), __func__)
253#else 253#else
254#define rsv_window_dump(root, verbose) do {} while (0) 254#define rsv_window_dump(root, verbose) do {} while (0)
255#endif 255#endif
@@ -548,7 +548,7 @@ do_more:
548 for (i = 0, group_freed = 0; i < count; i++) { 548 for (i = 0, group_freed = 0; i < count; i++) {
549 if (!ext2_clear_bit_atomic(sb_bgl_lock(sbi, block_group), 549 if (!ext2_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
550 bit + i, bitmap_bh->b_data)) { 550 bit + i, bitmap_bh->b_data)) {
551 ext2_error(sb, __FUNCTION__, 551 ext2_error(sb, __func__,
552 "bit already cleared for block %lu", block + i); 552 "bit already cleared for block %lu", block + i);
553 } else { 553 } else {
554 group_freed++; 554 group_freed++;
@@ -1381,7 +1381,12 @@ allocated:
1381 "Allocating block in system zone - " 1381 "Allocating block in system zone - "
1382 "blocks from "E2FSBLK", length %lu", 1382 "blocks from "E2FSBLK", length %lu",
1383 ret_block, num); 1383 ret_block, num);
1384 goto out; 1384 /*
1385 * ext2_try_to_allocate marked the blocks we allocated as in
1386 * use. So we may want to selectively mark some of the blocks
1387 * as free
1388 */
1389 goto retry_alloc;
1385 } 1390 }
1386 1391
1387 performed_allocation = 1; 1392 performed_allocation = 1;
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 8dededd80fe2..a78c6b4af060 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -41,8 +41,8 @@ static inline __le16 ext2_rec_len_to_disk(unsigned len)
41{ 41{
42 if (len == (1 << 16)) 42 if (len == (1 << 16))
43 return cpu_to_le16(EXT2_MAX_REC_LEN); 43 return cpu_to_le16(EXT2_MAX_REC_LEN);
44 else if (len > (1 << 16)) 44 else
45 BUG(); 45 BUG_ON(len > (1 << 16));
46 return cpu_to_le16(len); 46 return cpu_to_le16(len);
47} 47}
48 48
@@ -295,11 +295,11 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir)
295 struct page *page = ext2_get_page(inode, n); 295 struct page *page = ext2_get_page(inode, n);
296 296
297 if (IS_ERR(page)) { 297 if (IS_ERR(page)) {
298 ext2_error(sb, __FUNCTION__, 298 ext2_error(sb, __func__,
299 "bad page in #%lu", 299 "bad page in #%lu",
300 inode->i_ino); 300 inode->i_ino);
301 filp->f_pos += PAGE_CACHE_SIZE - offset; 301 filp->f_pos += PAGE_CACHE_SIZE - offset;
302 return -EIO; 302 return PTR_ERR(page);
303 } 303 }
304 kaddr = page_address(page); 304 kaddr = page_address(page);
305 if (unlikely(need_revalidate)) { 305 if (unlikely(need_revalidate)) {
@@ -314,7 +314,7 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir)
314 limit = kaddr + ext2_last_byte(inode, n) - EXT2_DIR_REC_LEN(1); 314 limit = kaddr + ext2_last_byte(inode, n) - EXT2_DIR_REC_LEN(1);
315 for ( ;(char*)de <= limit; de = ext2_next_entry(de)) { 315 for ( ;(char*)de <= limit; de = ext2_next_entry(de)) {
316 if (de->rec_len == 0) { 316 if (de->rec_len == 0) {
317 ext2_error(sb, __FUNCTION__, 317 ext2_error(sb, __func__,
318 "zero-length directory entry"); 318 "zero-length directory entry");
319 ext2_put_page(page); 319 ext2_put_page(page);
320 return -EIO; 320 return -EIO;
@@ -381,7 +381,7 @@ struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir,
381 kaddr += ext2_last_byte(dir, n) - reclen; 381 kaddr += ext2_last_byte(dir, n) - reclen;
382 while ((char *) de <= kaddr) { 382 while ((char *) de <= kaddr) {
383 if (de->rec_len == 0) { 383 if (de->rec_len == 0) {
384 ext2_error(dir->i_sb, __FUNCTION__, 384 ext2_error(dir->i_sb, __func__,
385 "zero-length directory entry"); 385 "zero-length directory entry");
386 ext2_put_page(page); 386 ext2_put_page(page);
387 goto out; 387 goto out;
@@ -396,7 +396,7 @@ struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir,
396 n = 0; 396 n = 0;
397 /* next page is past the blocks we've got */ 397 /* next page is past the blocks we've got */
398 if (unlikely(n > (dir->i_blocks >> (PAGE_CACHE_SHIFT - 9)))) { 398 if (unlikely(n > (dir->i_blocks >> (PAGE_CACHE_SHIFT - 9)))) {
399 ext2_error(dir->i_sb, __FUNCTION__, 399 ext2_error(dir->i_sb, __func__,
400 "dir %lu size %lld exceeds block count %llu", 400 "dir %lu size %lld exceeds block count %llu",
401 dir->i_ino, dir->i_size, 401 dir->i_ino, dir->i_size,
402 (unsigned long long)dir->i_blocks); 402 (unsigned long long)dir->i_blocks);
@@ -506,7 +506,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
506 goto got_it; 506 goto got_it;
507 } 507 }
508 if (de->rec_len == 0) { 508 if (de->rec_len == 0) {
509 ext2_error(dir->i_sb, __FUNCTION__, 509 ext2_error(dir->i_sb, __func__,
510 "zero-length directory entry"); 510 "zero-length directory entry");
511 err = -EIO; 511 err = -EIO;
512 goto out_unlock; 512 goto out_unlock;
@@ -578,7 +578,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
578 578
579 while ((char*)de < (char*)dir) { 579 while ((char*)de < (char*)dir) {
580 if (de->rec_len == 0) { 580 if (de->rec_len == 0) {
581 ext2_error(inode->i_sb, __FUNCTION__, 581 ext2_error(inode->i_sb, __func__,
582 "zero-length directory entry"); 582 "zero-length directory entry");
583 err = -EIO; 583 err = -EIO;
584 goto out; 584 goto out;
@@ -670,7 +670,7 @@ int ext2_empty_dir (struct inode * inode)
670 670
671 while ((char *)de <= kaddr) { 671 while ((char *)de <= kaddr) {
672 if (de->rec_len == 0) { 672 if (de->rec_len == 0) {
673 ext2_error(inode->i_sb, __FUNCTION__, 673 ext2_error(inode->i_sb, __func__,
674 "zero-length directory entry"); 674 "zero-length directory entry");
675 printk("kaddr=%p, de=%p\n", kaddr, de); 675 printk("kaddr=%p, de=%p\n", kaddr, de);
676 goto not_empty; 676 goto not_empty;
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 08f647d8188d..f59741346760 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -75,11 +75,9 @@ static void ext2_release_inode(struct super_block *sb, int group, int dir)
75 } 75 }
76 76
77 spin_lock(sb_bgl_lock(EXT2_SB(sb), group)); 77 spin_lock(sb_bgl_lock(EXT2_SB(sb), group));
78 desc->bg_free_inodes_count = 78 le16_add_cpu(&desc->bg_free_inodes_count, 1);
79 cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) + 1);
80 if (dir) 79 if (dir)
81 desc->bg_used_dirs_count = 80 le16_add_cpu(&desc->bg_used_dirs_count, -1);
82 cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) - 1);
83 spin_unlock(sb_bgl_lock(EXT2_SB(sb), group)); 81 spin_unlock(sb_bgl_lock(EXT2_SB(sb), group));
84 if (dir) 82 if (dir)
85 percpu_counter_dec(&EXT2_SB(sb)->s_dirs_counter); 83 percpu_counter_dec(&EXT2_SB(sb)->s_dirs_counter);
@@ -539,13 +537,11 @@ got:
539 percpu_counter_inc(&sbi->s_dirs_counter); 537 percpu_counter_inc(&sbi->s_dirs_counter);
540 538
541 spin_lock(sb_bgl_lock(sbi, group)); 539 spin_lock(sb_bgl_lock(sbi, group));
542 gdp->bg_free_inodes_count = 540 le16_add_cpu(&gdp->bg_free_inodes_count, -1);
543 cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
544 if (S_ISDIR(mode)) { 541 if (S_ISDIR(mode)) {
545 if (sbi->s_debts[group] < 255) 542 if (sbi->s_debts[group] < 255)
546 sbi->s_debts[group]++; 543 sbi->s_debts[group]++;
547 gdp->bg_used_dirs_count = 544 le16_add_cpu(&gdp->bg_used_dirs_count, 1);
548 cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
549 } else { 545 } else {
550 if (sbi->s_debts[group]) 546 if (sbi->s_debts[group])
551 sbi->s_debts[group]--; 547 sbi->s_debts[group]--;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index b8a2990bab83..384fc0d1dd74 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -254,13 +254,13 @@ no_block:
254 * Caller must make sure that @ind is valid and will stay that way. 254 * Caller must make sure that @ind is valid and will stay that way.
255 */ 255 */
256 256
257static unsigned long ext2_find_near(struct inode *inode, Indirect *ind) 257static ext2_fsblk_t ext2_find_near(struct inode *inode, Indirect *ind)
258{ 258{
259 struct ext2_inode_info *ei = EXT2_I(inode); 259 struct ext2_inode_info *ei = EXT2_I(inode);
260 __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data; 260 __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data;
261 __le32 *p; 261 __le32 *p;
262 unsigned long bg_start; 262 ext2_fsblk_t bg_start;
263 unsigned long colour; 263 ext2_fsblk_t colour;
264 264
265 /* Try to find previous block */ 265 /* Try to find previous block */
266 for (p = ind->p - 1; p >= start; p--) 266 for (p = ind->p - 1; p >= start; p--)
@@ -275,8 +275,7 @@ static unsigned long ext2_find_near(struct inode *inode, Indirect *ind)
275 * It is going to be refered from inode itself? OK, just put it into 275 * It is going to be refered from inode itself? OK, just put it into
276 * the same cylinder group then. 276 * the same cylinder group then.
277 */ 277 */
278 bg_start = (ei->i_block_group * EXT2_BLOCKS_PER_GROUP(inode->i_sb)) + 278 bg_start = ext2_group_first_block_no(inode->i_sb, ei->i_block_group);
279 le32_to_cpu(EXT2_SB(inode->i_sb)->s_es->s_first_data_block);
280 colour = (current->pid % 16) * 279 colour = (current->pid % 16) *
281 (EXT2_BLOCKS_PER_GROUP(inode->i_sb) / 16); 280 (EXT2_BLOCKS_PER_GROUP(inode->i_sb) / 16);
282 return bg_start + colour; 281 return bg_start + colour;
@@ -291,8 +290,8 @@ static unsigned long ext2_find_near(struct inode *inode, Indirect *ind)
291 * Returns preferred place for a block (the goal). 290 * Returns preferred place for a block (the goal).
292 */ 291 */
293 292
294static inline int ext2_find_goal(struct inode *inode, long block, 293static inline ext2_fsblk_t ext2_find_goal(struct inode *inode, long block,
295 Indirect *partial) 294 Indirect *partial)
296{ 295{
297 struct ext2_block_alloc_info *block_i; 296 struct ext2_block_alloc_info *block_i;
298 297
@@ -796,7 +795,7 @@ const struct address_space_operations ext2_aops = {
796 795
797const struct address_space_operations ext2_aops_xip = { 796const struct address_space_operations ext2_aops_xip = {
798 .bmap = ext2_bmap, 797 .bmap = ext2_bmap,
799 .get_xip_page = ext2_get_xip_page, 798 .get_xip_mem = ext2_get_xip_mem,
800}; 799};
801 800
802const struct address_space_operations ext2_nobh_aops = { 801const struct address_space_operations ext2_nobh_aops = {
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 088b011bb97e..ef50cbc792db 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -51,8 +51,7 @@ void ext2_error (struct super_block * sb, const char * function,
51 51
52 if (!(sb->s_flags & MS_RDONLY)) { 52 if (!(sb->s_flags & MS_RDONLY)) {
53 sbi->s_mount_state |= EXT2_ERROR_FS; 53 sbi->s_mount_state |= EXT2_ERROR_FS;
54 es->s_state = 54 es->s_state |= cpu_to_le16(EXT2_ERROR_FS);
55 cpu_to_le16(le16_to_cpu(es->s_state) | EXT2_ERROR_FS);
56 ext2_sync_super(sb, es); 55 ext2_sync_super(sb, es);
57 } 56 }
58 57
@@ -90,7 +89,7 @@ void ext2_update_dynamic_rev(struct super_block *sb)
90 if (le32_to_cpu(es->s_rev_level) > EXT2_GOOD_OLD_REV) 89 if (le32_to_cpu(es->s_rev_level) > EXT2_GOOD_OLD_REV)
91 return; 90 return;
92 91
93 ext2_warning(sb, __FUNCTION__, 92 ext2_warning(sb, __func__,
94 "updating to rev %d because of new feature flag, " 93 "updating to rev %d because of new feature flag, "
95 "running e2fsck is recommended", 94 "running e2fsck is recommended",
96 EXT2_DYNAMIC_REV); 95 EXT2_DYNAMIC_REV);
@@ -604,7 +603,7 @@ static int ext2_setup_super (struct super_block * sb,
604 "running e2fsck is recommended\n"); 603 "running e2fsck is recommended\n");
605 if (!le16_to_cpu(es->s_max_mnt_count)) 604 if (!le16_to_cpu(es->s_max_mnt_count))
606 es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT); 605 es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT);
607 es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1); 606 le16_add_cpu(&es->s_mnt_count, 1);
608 ext2_write_super(sb); 607 ext2_write_super(sb);
609 if (test_opt (sb, DEBUG)) 608 if (test_opt (sb, DEBUG))
610 printk ("[EXT II FS %s, %s, bs=%lu, fs=%lu, gc=%lu, " 609 printk ("[EXT II FS %s, %s, bs=%lu, fs=%lu, gc=%lu, "
@@ -622,13 +621,13 @@ static int ext2_check_descriptors(struct super_block *sb)
622{ 621{
623 int i; 622 int i;
624 struct ext2_sb_info *sbi = EXT2_SB(sb); 623 struct ext2_sb_info *sbi = EXT2_SB(sb);
625 unsigned long first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
626 unsigned long last_block;
627 624
628 ext2_debug ("Checking group descriptors"); 625 ext2_debug ("Checking group descriptors");
629 626
630 for (i = 0; i < sbi->s_groups_count; i++) { 627 for (i = 0; i < sbi->s_groups_count; i++) {
631 struct ext2_group_desc *gdp = ext2_get_group_desc(sb, i, NULL); 628 struct ext2_group_desc *gdp = ext2_get_group_desc(sb, i, NULL);
629 ext2_fsblk_t first_block = ext2_group_first_block_no(sb, i);
630 ext2_fsblk_t last_block;
632 631
633 if (i == sbi->s_groups_count - 1) 632 if (i == sbi->s_groups_count - 1)
634 last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1; 633 last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1;
@@ -664,7 +663,6 @@ static int ext2_check_descriptors(struct super_block *sb)
664 i, (unsigned long) le32_to_cpu(gdp->bg_inode_table)); 663 i, (unsigned long) le32_to_cpu(gdp->bg_inode_table));
665 return 0; 664 return 0;
666 } 665 }
667 first_block += EXT2_BLOCKS_PER_GROUP(sb);
668 } 666 }
669 return 1; 667 return 1;
670} 668}
@@ -721,10 +719,9 @@ static unsigned long descriptor_loc(struct super_block *sb,
721 int nr) 719 int nr)
722{ 720{
723 struct ext2_sb_info *sbi = EXT2_SB(sb); 721 struct ext2_sb_info *sbi = EXT2_SB(sb);
724 unsigned long bg, first_data_block, first_meta_bg; 722 unsigned long bg, first_meta_bg;
725 int has_super = 0; 723 int has_super = 0;
726 724
727 first_data_block = le32_to_cpu(sbi->s_es->s_first_data_block);
728 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); 725 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
729 726
730 if (!EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_META_BG) || 727 if (!EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_META_BG) ||
@@ -733,7 +730,8 @@ static unsigned long descriptor_loc(struct super_block *sb,
733 bg = sbi->s_desc_per_block * nr; 730 bg = sbi->s_desc_per_block * nr;
734 if (ext2_bg_has_super(sb, bg)) 731 if (ext2_bg_has_super(sb, bg))
735 has_super = 1; 732 has_super = 1;
736 return (first_data_block + has_super + (bg * sbi->s_blocks_per_group)); 733
734 return ext2_group_first_block_no(sb, bg) + has_super;
737} 735}
738 736
739static int ext2_fill_super(struct super_block *sb, void *data, int silent) 737static int ext2_fill_super(struct super_block *sb, void *data, int silent)
@@ -1062,7 +1060,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
1062 goto failed_mount3; 1060 goto failed_mount3;
1063 } 1061 }
1064 if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) 1062 if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL))
1065 ext2_warning(sb, __FUNCTION__, 1063 ext2_warning(sb, __func__,
1066 "mounting ext3 filesystem as ext2"); 1064 "mounting ext3 filesystem as ext2");
1067 ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY); 1065 ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY);
1068 return 0; 1066 return 0;
@@ -1126,10 +1124,9 @@ void ext2_write_super (struct super_block * sb)
1126 if (!(sb->s_flags & MS_RDONLY)) { 1124 if (!(sb->s_flags & MS_RDONLY)) {
1127 es = EXT2_SB(sb)->s_es; 1125 es = EXT2_SB(sb)->s_es;
1128 1126
1129 if (le16_to_cpu(es->s_state) & EXT2_VALID_FS) { 1127 if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) {
1130 ext2_debug ("setting valid to 0\n"); 1128 ext2_debug ("setting valid to 0\n");
1131 es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & 1129 es->s_state &= cpu_to_le16(~EXT2_VALID_FS);
1132 ~EXT2_VALID_FS);
1133 es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb)); 1130 es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb));
1134 es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb)); 1131 es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb));
1135 es->s_mtime = cpu_to_le32(get_seconds()); 1132 es->s_mtime = cpu_to_le32(get_seconds());
@@ -1180,7 +1177,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
1180 if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) != 1177 if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) !=
1181 (old_mount_opt & EXT2_MOUNT_XIP)) && 1178 (old_mount_opt & EXT2_MOUNT_XIP)) &&
1182 invalidate_inodes(sb)) 1179 invalidate_inodes(sb))
1183 ext2_warning(sb, __FUNCTION__, "busy inodes while remounting "\ 1180 ext2_warning(sb, __func__, "busy inodes while remounting "\
1184 "xip remain in cache (no functional problem)"); 1181 "xip remain in cache (no functional problem)");
1185 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) 1182 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
1186 return 0; 1183 return 0;
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index a99d46f3b26e..987a5261cc2e 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -646,8 +646,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
646 unlock_buffer(new_bh); 646 unlock_buffer(new_bh);
647 goto cleanup; 647 goto cleanup;
648 } 648 }
649 HDR(new_bh)->h_refcount = cpu_to_le32(1 + 649 le32_add_cpu(&HDR(new_bh)->h_refcount, 1);
650 le32_to_cpu(HDR(new_bh)->h_refcount));
651 ea_bdebug(new_bh, "refcount now=%d", 650 ea_bdebug(new_bh, "refcount now=%d",
652 le32_to_cpu(HDR(new_bh)->h_refcount)); 651 le32_to_cpu(HDR(new_bh)->h_refcount));
653 } 652 }
@@ -660,10 +659,8 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
660 ext2_xattr_cache_insert(new_bh); 659 ext2_xattr_cache_insert(new_bh);
661 } else { 660 } else {
662 /* We need to allocate a new block */ 661 /* We need to allocate a new block */
663 int goal = le32_to_cpu(EXT2_SB(sb)->s_es-> 662 ext2_fsblk_t goal = ext2_group_first_block_no(sb,
664 s_first_data_block) + 663 EXT2_I(inode)->i_block_group);
665 EXT2_I(inode)->i_block_group *
666 EXT2_BLOCKS_PER_GROUP(sb);
667 int block = ext2_new_block(inode, goal, &error); 664 int block = ext2_new_block(inode, goal, &error);
668 if (error) 665 if (error)
669 goto cleanup; 666 goto cleanup;
@@ -731,8 +728,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
731 bforget(old_bh); 728 bforget(old_bh);
732 } else { 729 } else {
733 /* Decrement the refcount only. */ 730 /* Decrement the refcount only. */
734 HDR(old_bh)->h_refcount = cpu_to_le32( 731 le32_add_cpu(&HDR(old_bh)->h_refcount, -1);
735 le32_to_cpu(HDR(old_bh)->h_refcount) - 1);
736 if (ce) 732 if (ce)
737 mb_cache_entry_release(ce); 733 mb_cache_entry_release(ce);
738 DQUOT_FREE_BLOCK(inode, 1); 734 DQUOT_FREE_BLOCK(inode, 1);
@@ -789,8 +785,7 @@ ext2_xattr_delete_inode(struct inode *inode)
789 bforget(bh); 785 bforget(bh);
790 unlock_buffer(bh); 786 unlock_buffer(bh);
791 } else { 787 } else {
792 HDR(bh)->h_refcount = cpu_to_le32( 788 le32_add_cpu(&HDR(bh)->h_refcount, -1);
793 le32_to_cpu(HDR(bh)->h_refcount) - 1);
794 if (ce) 789 if (ce)
795 mb_cache_entry_release(ce); 790 mb_cache_entry_release(ce);
796 ea_bdebug(bh, "refcount now=%d", 791 ea_bdebug(bh, "refcount now=%d",
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c
index ca7f00312388..4fb94c20041b 100644
--- a/fs/ext2/xip.c
+++ b/fs/ext2/xip.c
@@ -15,24 +15,28 @@
15#include "xip.h" 15#include "xip.h"
16 16
17static inline int 17static inline int
18__inode_direct_access(struct inode *inode, sector_t sector, 18__inode_direct_access(struct inode *inode, sector_t block,
19 unsigned long *data) 19 void **kaddr, unsigned long *pfn)
20{ 20{
21 BUG_ON(!inode->i_sb->s_bdev->bd_disk->fops->direct_access); 21 struct block_device *bdev = inode->i_sb->s_bdev;
22 return inode->i_sb->s_bdev->bd_disk->fops 22 struct block_device_operations *ops = bdev->bd_disk->fops;
23 ->direct_access(inode->i_sb->s_bdev,sector,data); 23 sector_t sector;
24
25 sector = block * (PAGE_SIZE / 512); /* ext2 block to bdev sector */
26
27 BUG_ON(!ops->direct_access);
28 return ops->direct_access(bdev, sector, kaddr, pfn);
24} 29}
25 30
26static inline int 31static inline int
27__ext2_get_sector(struct inode *inode, sector_t offset, int create, 32__ext2_get_block(struct inode *inode, pgoff_t pgoff, int create,
28 sector_t *result) 33 sector_t *result)
29{ 34{
30 struct buffer_head tmp; 35 struct buffer_head tmp;
31 int rc; 36 int rc;
32 37
33 memset(&tmp, 0, sizeof(struct buffer_head)); 38 memset(&tmp, 0, sizeof(struct buffer_head));
34 rc = ext2_get_block(inode, offset/ (PAGE_SIZE/512), &tmp, 39 rc = ext2_get_block(inode, pgoff, &tmp, create);
35 create);
36 *result = tmp.b_blocknr; 40 *result = tmp.b_blocknr;
37 41
38 /* did we get a sparse block (hole in the file)? */ 42 /* did we get a sparse block (hole in the file)? */
@@ -45,15 +49,15 @@ __ext2_get_sector(struct inode *inode, sector_t offset, int create,
45} 49}
46 50
47int 51int
48ext2_clear_xip_target(struct inode *inode, int block) 52ext2_clear_xip_target(struct inode *inode, sector_t block)
49{ 53{
50 sector_t sector = block * (PAGE_SIZE/512); 54 void *kaddr;
51 unsigned long data; 55 unsigned long pfn;
52 int rc; 56 int rc;
53 57
54 rc = __inode_direct_access(inode, sector, &data); 58 rc = __inode_direct_access(inode, block, &kaddr, &pfn);
55 if (!rc) 59 if (!rc)
56 clear_page((void*)data); 60 clear_page(kaddr);
57 return rc; 61 return rc;
58} 62}
59 63
@@ -64,30 +68,23 @@ void ext2_xip_verify_sb(struct super_block *sb)
64 if ((sbi->s_mount_opt & EXT2_MOUNT_XIP) && 68 if ((sbi->s_mount_opt & EXT2_MOUNT_XIP) &&
65 !sb->s_bdev->bd_disk->fops->direct_access) { 69 !sb->s_bdev->bd_disk->fops->direct_access) {
66 sbi->s_mount_opt &= (~EXT2_MOUNT_XIP); 70 sbi->s_mount_opt &= (~EXT2_MOUNT_XIP);
67 ext2_warning(sb, __FUNCTION__, 71 ext2_warning(sb, __func__,
68 "ignoring xip option - not supported by bdev"); 72 "ignoring xip option - not supported by bdev");
69 } 73 }
70} 74}
71 75
72struct page * 76int ext2_get_xip_mem(struct address_space *mapping, pgoff_t pgoff, int create,
73ext2_get_xip_page(struct address_space *mapping, sector_t offset, 77 void **kmem, unsigned long *pfn)
74 int create)
75{ 78{
76 int rc; 79 int rc;
77 unsigned long data; 80 sector_t block;
78 sector_t sector;
79 81
80 /* first, retrieve the sector number */ 82 /* first, retrieve the sector number */
81 rc = __ext2_get_sector(mapping->host, offset, create, &sector); 83 rc = __ext2_get_block(mapping->host, pgoff, create, &block);
82 if (rc) 84 if (rc)
83 goto error; 85 return rc;
84 86
85 /* retrieve address of the target data */ 87 /* retrieve address of the target data */
86 rc = __inode_direct_access 88 rc = __inode_direct_access(mapping->host, block, kmem, pfn);
87 (mapping->host, sector * (PAGE_SIZE/512), &data); 89 return rc;
88 if (!rc)
89 return virt_to_page(data);
90
91 error:
92 return ERR_PTR(rc);
93} 90}
diff --git a/fs/ext2/xip.h b/fs/ext2/xip.h
index aa85331d6c56..18b34d2f31b3 100644
--- a/fs/ext2/xip.h
+++ b/fs/ext2/xip.h
@@ -7,19 +7,20 @@
7 7
8#ifdef CONFIG_EXT2_FS_XIP 8#ifdef CONFIG_EXT2_FS_XIP
9extern void ext2_xip_verify_sb (struct super_block *); 9extern void ext2_xip_verify_sb (struct super_block *);
10extern int ext2_clear_xip_target (struct inode *, int); 10extern int ext2_clear_xip_target (struct inode *, sector_t);
11 11
12static inline int ext2_use_xip (struct super_block *sb) 12static inline int ext2_use_xip (struct super_block *sb)
13{ 13{
14 struct ext2_sb_info *sbi = EXT2_SB(sb); 14 struct ext2_sb_info *sbi = EXT2_SB(sb);
15 return (sbi->s_mount_opt & EXT2_MOUNT_XIP); 15 return (sbi->s_mount_opt & EXT2_MOUNT_XIP);
16} 16}
17struct page* ext2_get_xip_page (struct address_space *, sector_t, int); 17int ext2_get_xip_mem(struct address_space *, pgoff_t, int,
18#define mapping_is_xip(map) unlikely(map->a_ops->get_xip_page) 18 void **, unsigned long *);
19#define mapping_is_xip(map) unlikely(map->a_ops->get_xip_mem)
19#else 20#else
20#define mapping_is_xip(map) 0 21#define mapping_is_xip(map) 0
21#define ext2_xip_verify_sb(sb) do { } while (0) 22#define ext2_xip_verify_sb(sb) do { } while (0)
22#define ext2_use_xip(sb) 0 23#define ext2_use_xip(sb) 0
23#define ext2_clear_xip_target(inode, chain) 0 24#define ext2_clear_xip_target(inode, chain) 0
24#define ext2_get_xip_page NULL 25#define ext2_get_xip_mem NULL
25#endif 26#endif
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index da0cb2c0e437..92fd0338a6eb 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -117,7 +117,7 @@ static int ext3_valid_block_bitmap(struct super_block *sb,
117 return 1; 117 return 1;
118 118
119err_out: 119err_out:
120 ext3_error(sb, __FUNCTION__, 120 ext3_error(sb, __func__,
121 "Invalid block bitmap - " 121 "Invalid block bitmap - "
122 "block_group = %d, block = %lu", 122 "block_group = %d, block = %lu",
123 block_group, bitmap_blk); 123 block_group, bitmap_blk);
@@ -147,7 +147,7 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
147 bitmap_blk = le32_to_cpu(desc->bg_block_bitmap); 147 bitmap_blk = le32_to_cpu(desc->bg_block_bitmap);
148 bh = sb_getblk(sb, bitmap_blk); 148 bh = sb_getblk(sb, bitmap_blk);
149 if (unlikely(!bh)) { 149 if (unlikely(!bh)) {
150 ext3_error(sb, __FUNCTION__, 150 ext3_error(sb, __func__,
151 "Cannot read block bitmap - " 151 "Cannot read block bitmap - "
152 "block_group = %d, block_bitmap = %u", 152 "block_group = %d, block_bitmap = %u",
153 block_group, le32_to_cpu(desc->bg_block_bitmap)); 153 block_group, le32_to_cpu(desc->bg_block_bitmap));
@@ -158,16 +158,17 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
158 158
159 if (bh_submit_read(bh) < 0) { 159 if (bh_submit_read(bh) < 0) {
160 brelse(bh); 160 brelse(bh);
161 ext3_error(sb, __FUNCTION__, 161 ext3_error(sb, __func__,
162 "Cannot read block bitmap - " 162 "Cannot read block bitmap - "
163 "block_group = %d, block_bitmap = %u", 163 "block_group = %d, block_bitmap = %u",
164 block_group, le32_to_cpu(desc->bg_block_bitmap)); 164 block_group, le32_to_cpu(desc->bg_block_bitmap));
165 return NULL; 165 return NULL;
166 } 166 }
167 if (!ext3_valid_block_bitmap(sb, desc, block_group, bh)) { 167 ext3_valid_block_bitmap(sb, desc, block_group, bh);
168 brelse(bh); 168 /*
169 return NULL; 169 * file system mounted not to panic on error, continue with corrupt
170 } 170 * bitmap
171 */
171 return bh; 172 return bh;
172} 173}
173/* 174/*
@@ -232,11 +233,10 @@ restart:
232 prev = rsv; 233 prev = rsv;
233 } 234 }
234 printk("Window map complete.\n"); 235 printk("Window map complete.\n");
235 if (bad) 236 BUG_ON(bad);
236 BUG();
237} 237}
238#define rsv_window_dump(root, verbose) \ 238#define rsv_window_dump(root, verbose) \
239 __rsv_window_dump((root), (verbose), __FUNCTION__) 239 __rsv_window_dump((root), (verbose), __func__)
240#else 240#else
241#define rsv_window_dump(root, verbose) do {} while (0) 241#define rsv_window_dump(root, verbose) do {} while (0)
242#endif 242#endif
@@ -618,7 +618,7 @@ do_more:
618 if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group), 618 if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
619 bit + i, bitmap_bh->b_data)) { 619 bit + i, bitmap_bh->b_data)) {
620 jbd_unlock_bh_state(bitmap_bh); 620 jbd_unlock_bh_state(bitmap_bh);
621 ext3_error(sb, __FUNCTION__, 621 ext3_error(sb, __func__,
622 "bit already cleared for block "E3FSBLK, 622 "bit already cleared for block "E3FSBLK,
623 block + i); 623 block + i);
624 jbd_lock_bh_state(bitmap_bh); 624 jbd_lock_bh_state(bitmap_bh);
@@ -1642,7 +1642,11 @@ allocated:
1642 "Allocating block in system zone - " 1642 "Allocating block in system zone - "
1643 "blocks from "E3FSBLK", length %lu", 1643 "blocks from "E3FSBLK", length %lu",
1644 ret_block, num); 1644 ret_block, num);
1645 goto out; 1645 /*
1646 * claim_block() marked the blocks we allocated as in use. So we
1647 * may want to selectively mark some of the blocks as free.
1648 */
1649 goto retry_alloc;
1646 } 1650 }
1647 1651
1648 performed_allocation = 1; 1652 performed_allocation = 1;
@@ -1668,7 +1672,7 @@ allocated:
1668 if (ext3_test_bit(grp_alloc_blk+i, 1672 if (ext3_test_bit(grp_alloc_blk+i,
1669 bh2jh(bitmap_bh)->b_committed_data)) { 1673 bh2jh(bitmap_bh)->b_committed_data)) {
1670 printk("%s: block was unexpectedly set in " 1674 printk("%s: block was unexpectedly set in "
1671 "b_committed_data\n", __FUNCTION__); 1675 "b_committed_data\n", __func__);
1672 } 1676 }
1673 } 1677 }
1674 } 1678 }
diff --git a/fs/ext3/ext3_jbd.c b/fs/ext3/ext3_jbd.c
index e1f91fd26a93..d401f148d74d 100644
--- a/fs/ext3/ext3_jbd.c
+++ b/fs/ext3/ext3_jbd.c
@@ -9,7 +9,7 @@ int __ext3_journal_get_undo_access(const char *where, handle_t *handle,
9{ 9{
10 int err = journal_get_undo_access(handle, bh); 10 int err = journal_get_undo_access(handle, bh);
11 if (err) 11 if (err)
12 ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 12 ext3_journal_abort_handle(where, __func__, bh, handle,err);
13 return err; 13 return err;
14} 14}
15 15
@@ -18,7 +18,7 @@ int __ext3_journal_get_write_access(const char *where, handle_t *handle,
18{ 18{
19 int err = journal_get_write_access(handle, bh); 19 int err = journal_get_write_access(handle, bh);
20 if (err) 20 if (err)
21 ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 21 ext3_journal_abort_handle(where, __func__, bh, handle,err);
22 return err; 22 return err;
23} 23}
24 24
@@ -27,7 +27,7 @@ int __ext3_journal_forget(const char *where, handle_t *handle,
27{ 27{
28 int err = journal_forget(handle, bh); 28 int err = journal_forget(handle, bh);
29 if (err) 29 if (err)
30 ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 30 ext3_journal_abort_handle(where, __func__, bh, handle,err);
31 return err; 31 return err;
32} 32}
33 33
@@ -36,7 +36,7 @@ int __ext3_journal_revoke(const char *where, handle_t *handle,
36{ 36{
37 int err = journal_revoke(handle, blocknr, bh); 37 int err = journal_revoke(handle, blocknr, bh);
38 if (err) 38 if (err)
39 ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 39 ext3_journal_abort_handle(where, __func__, bh, handle,err);
40 return err; 40 return err;
41} 41}
42 42
@@ -45,7 +45,7 @@ int __ext3_journal_get_create_access(const char *where,
45{ 45{
46 int err = journal_get_create_access(handle, bh); 46 int err = journal_get_create_access(handle, bh);
47 if (err) 47 if (err)
48 ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 48 ext3_journal_abort_handle(where, __func__, bh, handle,err);
49 return err; 49 return err;
50} 50}
51 51
@@ -54,6 +54,6 @@ int __ext3_journal_dirty_metadata(const char *where,
54{ 54{
55 int err = journal_dirty_metadata(handle, bh); 55 int err = journal_dirty_metadata(handle, bh);
56 if (err) 56 if (err)
57 ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 57 ext3_journal_abort_handle(where, __func__, bh, handle,err);
58 return err; 58 return err;
59} 59}
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index a588e23841d4..d33634119e17 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -72,6 +72,9 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
72 goto out; 72 goto out;
73 } 73 }
74 74
75 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
76 goto out;
77
75 /* 78 /*
76 * The VFS has written the file data. If the inode is unaltered 79 * The VFS has written the file data. If the inode is unaltered
77 * then we need not start a commit. 80 * then we need not start a commit.
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 96dd5573e49b..77126821b2e9 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -644,7 +644,7 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
644 644
645 /* Error cases - e2fsck has already cleaned up for us */ 645 /* Error cases - e2fsck has already cleaned up for us */
646 if (ino > max_ino) { 646 if (ino > max_ino) {
647 ext3_warning(sb, __FUNCTION__, 647 ext3_warning(sb, __func__,
648 "bad orphan ino %lu! e2fsck was run?", ino); 648 "bad orphan ino %lu! e2fsck was run?", ino);
649 goto error; 649 goto error;
650 } 650 }
@@ -653,7 +653,7 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
653 bit = (ino - 1) % EXT3_INODES_PER_GROUP(sb); 653 bit = (ino - 1) % EXT3_INODES_PER_GROUP(sb);
654 bitmap_bh = read_inode_bitmap(sb, block_group); 654 bitmap_bh = read_inode_bitmap(sb, block_group);
655 if (!bitmap_bh) { 655 if (!bitmap_bh) {
656 ext3_warning(sb, __FUNCTION__, 656 ext3_warning(sb, __func__,
657 "inode bitmap error for orphan %lu", ino); 657 "inode bitmap error for orphan %lu", ino);
658 goto error; 658 goto error;
659 } 659 }
@@ -678,7 +678,7 @@ iget_failed:
678 err = PTR_ERR(inode); 678 err = PTR_ERR(inode);
679 inode = NULL; 679 inode = NULL;
680bad_orphan: 680bad_orphan:
681 ext3_warning(sb, __FUNCTION__, 681 ext3_warning(sb, __func__,
682 "bad orphan inode %lu! e2fsck was run?", ino); 682 "bad orphan inode %lu! e2fsck was run?", ino);
683 printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n", 683 printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n",
684 bit, (unsigned long long)bitmap_bh->b_blocknr, 684 bit, (unsigned long long)bitmap_bh->b_blocknr,
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index c683609b0e3a..6ae4ecf3ce40 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -95,7 +95,7 @@ int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
95 BUFFER_TRACE(bh, "call ext3_journal_revoke"); 95 BUFFER_TRACE(bh, "call ext3_journal_revoke");
96 err = ext3_journal_revoke(handle, blocknr, bh); 96 err = ext3_journal_revoke(handle, blocknr, bh);
97 if (err) 97 if (err)
98 ext3_abort(inode->i_sb, __FUNCTION__, 98 ext3_abort(inode->i_sb, __func__,
99 "error %d when attempting revoke", err); 99 "error %d when attempting revoke", err);
100 BUFFER_TRACE(bh, "exit"); 100 BUFFER_TRACE(bh, "exit");
101 return err; 101 return err;
@@ -1190,7 +1190,7 @@ int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
1190{ 1190{
1191 int err = journal_dirty_data(handle, bh); 1191 int err = journal_dirty_data(handle, bh);
1192 if (err) 1192 if (err)
1193 ext3_journal_abort_handle(__FUNCTION__, __FUNCTION__, 1193 ext3_journal_abort_handle(__func__, __func__,
1194 bh, handle, err); 1194 bh, handle, err);
1195 return err; 1195 return err;
1196} 1196}
@@ -1261,10 +1261,11 @@ static int ext3_ordered_write_end(struct file *file,
1261 new_i_size = pos + copied; 1261 new_i_size = pos + copied;
1262 if (new_i_size > EXT3_I(inode)->i_disksize) 1262 if (new_i_size > EXT3_I(inode)->i_disksize)
1263 EXT3_I(inode)->i_disksize = new_i_size; 1263 EXT3_I(inode)->i_disksize = new_i_size;
1264 copied = ext3_generic_write_end(file, mapping, pos, len, copied, 1264 ret2 = ext3_generic_write_end(file, mapping, pos, len, copied,
1265 page, fsdata); 1265 page, fsdata);
1266 if (copied < 0) 1266 copied = ret2;
1267 ret = copied; 1267 if (ret2 < 0)
1268 ret = ret2;
1268 } 1269 }
1269 ret2 = ext3_journal_stop(handle); 1270 ret2 = ext3_journal_stop(handle);
1270 if (!ret) 1271 if (!ret)
@@ -1289,10 +1290,11 @@ static int ext3_writeback_write_end(struct file *file,
1289 if (new_i_size > EXT3_I(inode)->i_disksize) 1290 if (new_i_size > EXT3_I(inode)->i_disksize)
1290 EXT3_I(inode)->i_disksize = new_i_size; 1291 EXT3_I(inode)->i_disksize = new_i_size;
1291 1292
1292 copied = ext3_generic_write_end(file, mapping, pos, len, copied, 1293 ret2 = ext3_generic_write_end(file, mapping, pos, len, copied,
1293 page, fsdata); 1294 page, fsdata);
1294 if (copied < 0) 1295 copied = ret2;
1295 ret = copied; 1296 if (ret2 < 0)
1297 ret = ret2;
1296 1298
1297 ret2 = ext3_journal_stop(handle); 1299 ret2 = ext3_journal_stop(handle);
1298 if (!ret) 1300 if (!ret)
@@ -2454,11 +2456,10 @@ out_stop:
2454static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb, 2456static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
2455 unsigned long ino, struct ext3_iloc *iloc) 2457 unsigned long ino, struct ext3_iloc *iloc)
2456{ 2458{
2457 unsigned long desc, group_desc, block_group; 2459 unsigned long block_group;
2458 unsigned long offset; 2460 unsigned long offset;
2459 ext3_fsblk_t block; 2461 ext3_fsblk_t block;
2460 struct buffer_head *bh; 2462 struct ext3_group_desc *gdp;
2461 struct ext3_group_desc * gdp;
2462 2463
2463 if (!ext3_valid_inum(sb, ino)) { 2464 if (!ext3_valid_inum(sb, ino)) {
2464 /* 2465 /*
@@ -2470,27 +2471,15 @@ static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
2470 } 2471 }
2471 2472
2472 block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb); 2473 block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
2473 if (block_group >= EXT3_SB(sb)->s_groups_count) { 2474 gdp = ext3_get_group_desc(sb, block_group, NULL);
2474 ext3_error(sb,"ext3_get_inode_block","group >= groups count"); 2475 if (!gdp)
2475 return 0;
2476 }
2477 smp_rmb();
2478 group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(sb);
2479 desc = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1);
2480 bh = EXT3_SB(sb)->s_group_desc[group_desc];
2481 if (!bh) {
2482 ext3_error (sb, "ext3_get_inode_block",
2483 "Descriptor not loaded");
2484 return 0; 2476 return 0;
2485 }
2486
2487 gdp = (struct ext3_group_desc *)bh->b_data;
2488 /* 2477 /*
2489 * Figure out the offset within the block group inode table 2478 * Figure out the offset within the block group inode table
2490 */ 2479 */
2491 offset = ((ino - 1) % EXT3_INODES_PER_GROUP(sb)) * 2480 offset = ((ino - 1) % EXT3_INODES_PER_GROUP(sb)) *
2492 EXT3_INODE_SIZE(sb); 2481 EXT3_INODE_SIZE(sb);
2493 block = le32_to_cpu(gdp[desc].bg_inode_table) + 2482 block = le32_to_cpu(gdp->bg_inode_table) +
2494 (offset >> EXT3_BLOCK_SIZE_BITS(sb)); 2483 (offset >> EXT3_BLOCK_SIZE_BITS(sb));
2495 2484
2496 iloc->block_group = block_group; 2485 iloc->block_group = block_group;
@@ -3214,7 +3203,7 @@ void ext3_dirty_inode(struct inode *inode)
3214 current_handle->h_transaction != handle->h_transaction) { 3203 current_handle->h_transaction != handle->h_transaction) {
3215 /* This task has a transaction open against a different fs */ 3204 /* This task has a transaction open against a different fs */
3216 printk(KERN_EMERG "%s: transactions do not match!\n", 3205 printk(KERN_EMERG "%s: transactions do not match!\n",
3217 __FUNCTION__); 3206 __func__);
3218 } else { 3207 } else {
3219 jbd_debug(5, "marking dirty. outer handle=%p\n", 3208 jbd_debug(5, "marking dirty. outer handle=%p\n",
3220 current_handle); 3209 current_handle);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index dec3e0d88ab1..0b8cf80154f1 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -57,10 +57,15 @@ static struct buffer_head *ext3_append(handle_t *handle,
57 57
58 *block = inode->i_size >> inode->i_sb->s_blocksize_bits; 58 *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
59 59
60 if ((bh = ext3_bread(handle, inode, *block, 1, err))) { 60 bh = ext3_bread(handle, inode, *block, 1, err);
61 if (bh) {
61 inode->i_size += inode->i_sb->s_blocksize; 62 inode->i_size += inode->i_sb->s_blocksize;
62 EXT3_I(inode)->i_disksize = inode->i_size; 63 EXT3_I(inode)->i_disksize = inode->i_size;
63 ext3_journal_get_write_access(handle,bh); 64 *err = ext3_journal_get_write_access(handle, bh);
65 if (*err) {
66 brelse(bh);
67 bh = NULL;
68 }
64 } 69 }
65 return bh; 70 return bh;
66} 71}
@@ -356,7 +361,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
356 if (root->info.hash_version != DX_HASH_TEA && 361 if (root->info.hash_version != DX_HASH_TEA &&
357 root->info.hash_version != DX_HASH_HALF_MD4 && 362 root->info.hash_version != DX_HASH_HALF_MD4 &&
358 root->info.hash_version != DX_HASH_LEGACY) { 363 root->info.hash_version != DX_HASH_LEGACY) {
359 ext3_warning(dir->i_sb, __FUNCTION__, 364 ext3_warning(dir->i_sb, __func__,
360 "Unrecognised inode hash code %d", 365 "Unrecognised inode hash code %d",
361 root->info.hash_version); 366 root->info.hash_version);
362 brelse(bh); 367 brelse(bh);
@@ -370,7 +375,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
370 hash = hinfo->hash; 375 hash = hinfo->hash;
371 376
372 if (root->info.unused_flags & 1) { 377 if (root->info.unused_flags & 1) {
373 ext3_warning(dir->i_sb, __FUNCTION__, 378 ext3_warning(dir->i_sb, __func__,
374 "Unimplemented inode hash flags: %#06x", 379 "Unimplemented inode hash flags: %#06x",
375 root->info.unused_flags); 380 root->info.unused_flags);
376 brelse(bh); 381 brelse(bh);
@@ -379,7 +384,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
379 } 384 }
380 385
381 if ((indirect = root->info.indirect_levels) > 1) { 386 if ((indirect = root->info.indirect_levels) > 1) {
382 ext3_warning(dir->i_sb, __FUNCTION__, 387 ext3_warning(dir->i_sb, __func__,
383 "Unimplemented inode hash depth: %#06x", 388 "Unimplemented inode hash depth: %#06x",
384 root->info.indirect_levels); 389 root->info.indirect_levels);
385 brelse(bh); 390 brelse(bh);
@@ -392,7 +397,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
392 397
393 if (dx_get_limit(entries) != dx_root_limit(dir, 398 if (dx_get_limit(entries) != dx_root_limit(dir,
394 root->info.info_length)) { 399 root->info.info_length)) {
395 ext3_warning(dir->i_sb, __FUNCTION__, 400 ext3_warning(dir->i_sb, __func__,
396 "dx entry: limit != root limit"); 401 "dx entry: limit != root limit");
397 brelse(bh); 402 brelse(bh);
398 *err = ERR_BAD_DX_DIR; 403 *err = ERR_BAD_DX_DIR;
@@ -404,7 +409,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
404 { 409 {
405 count = dx_get_count(entries); 410 count = dx_get_count(entries);
406 if (!count || count > dx_get_limit(entries)) { 411 if (!count || count > dx_get_limit(entries)) {
407 ext3_warning(dir->i_sb, __FUNCTION__, 412 ext3_warning(dir->i_sb, __func__,
408 "dx entry: no count or count > limit"); 413 "dx entry: no count or count > limit");
409 brelse(bh); 414 brelse(bh);
410 *err = ERR_BAD_DX_DIR; 415 *err = ERR_BAD_DX_DIR;
@@ -449,7 +454,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
449 goto fail2; 454 goto fail2;
450 at = entries = ((struct dx_node *) bh->b_data)->entries; 455 at = entries = ((struct dx_node *) bh->b_data)->entries;
451 if (dx_get_limit(entries) != dx_node_limit (dir)) { 456 if (dx_get_limit(entries) != dx_node_limit (dir)) {
452 ext3_warning(dir->i_sb, __FUNCTION__, 457 ext3_warning(dir->i_sb, __func__,
453 "dx entry: limit != node limit"); 458 "dx entry: limit != node limit");
454 brelse(bh); 459 brelse(bh);
455 *err = ERR_BAD_DX_DIR; 460 *err = ERR_BAD_DX_DIR;
@@ -465,7 +470,7 @@ fail2:
465 } 470 }
466fail: 471fail:
467 if (*err == ERR_BAD_DX_DIR) 472 if (*err == ERR_BAD_DX_DIR)
468 ext3_warning(dir->i_sb, __FUNCTION__, 473 ext3_warning(dir->i_sb, __func__,
469 "Corrupt dir inode %ld, running e2fsck is " 474 "Corrupt dir inode %ld, running e2fsck is "
470 "recommended.", dir->i_ino); 475 "recommended.", dir->i_ino);
471 return NULL; 476 return NULL;
@@ -913,7 +918,7 @@ restart:
913 wait_on_buffer(bh); 918 wait_on_buffer(bh);
914 if (!buffer_uptodate(bh)) { 919 if (!buffer_uptodate(bh)) {
915 /* read error, skip block & hope for the best */ 920 /* read error, skip block & hope for the best */
916 ext3_error(sb, __FUNCTION__, "reading directory #%lu " 921 ext3_error(sb, __func__, "reading directory #%lu "
917 "offset %lu", dir->i_ino, block); 922 "offset %lu", dir->i_ino, block);
918 brelse(bh); 923 brelse(bh);
919 goto next; 924 goto next;
@@ -1005,7 +1010,7 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
1005 retval = ext3_htree_next_block(dir, hash, frame, 1010 retval = ext3_htree_next_block(dir, hash, frame,
1006 frames, NULL); 1011 frames, NULL);
1007 if (retval < 0) { 1012 if (retval < 0) {
1008 ext3_warning(sb, __FUNCTION__, 1013 ext3_warning(sb, __func__,
1009 "error reading index page in directory #%lu", 1014 "error reading index page in directory #%lu",
1010 dir->i_ino); 1015 dir->i_ino);
1011 *err = retval; 1016 *err = retval;
@@ -1530,7 +1535,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
1530 1535
1531 if (levels && (dx_get_count(frames->entries) == 1536 if (levels && (dx_get_count(frames->entries) ==
1532 dx_get_limit(frames->entries))) { 1537 dx_get_limit(frames->entries))) {
1533 ext3_warning(sb, __FUNCTION__, 1538 ext3_warning(sb, __func__,
1534 "Directory index full!"); 1539 "Directory index full!");
1535 err = -ENOSPC; 1540 err = -ENOSPC;
1536 goto cleanup; 1541 goto cleanup;
@@ -1832,11 +1837,11 @@ static int empty_dir (struct inode * inode)
1832 if (inode->i_size < EXT3_DIR_REC_LEN(1) + EXT3_DIR_REC_LEN(2) || 1837 if (inode->i_size < EXT3_DIR_REC_LEN(1) + EXT3_DIR_REC_LEN(2) ||
1833 !(bh = ext3_bread (NULL, inode, 0, 0, &err))) { 1838 !(bh = ext3_bread (NULL, inode, 0, 0, &err))) {
1834 if (err) 1839 if (err)
1835 ext3_error(inode->i_sb, __FUNCTION__, 1840 ext3_error(inode->i_sb, __func__,
1836 "error %d reading directory #%lu offset 0", 1841 "error %d reading directory #%lu offset 0",
1837 err, inode->i_ino); 1842 err, inode->i_ino);
1838 else 1843 else
1839 ext3_warning(inode->i_sb, __FUNCTION__, 1844 ext3_warning(inode->i_sb, __func__,
1840 "bad directory (dir #%lu) - no data block", 1845 "bad directory (dir #%lu) - no data block",
1841 inode->i_ino); 1846 inode->i_ino);
1842 return 1; 1847 return 1;
@@ -1865,7 +1870,7 @@ static int empty_dir (struct inode * inode)
1865 offset >> EXT3_BLOCK_SIZE_BITS(sb), 0, &err); 1870 offset >> EXT3_BLOCK_SIZE_BITS(sb), 0, &err);
1866 if (!bh) { 1871 if (!bh) {
1867 if (err) 1872 if (err)
1868 ext3_error(sb, __FUNCTION__, 1873 ext3_error(sb, __func__,
1869 "error %d reading directory" 1874 "error %d reading directory"
1870 " #%lu offset %lu", 1875 " #%lu offset %lu",
1871 err, inode->i_ino, offset); 1876 err, inode->i_ino, offset);
@@ -2318,6 +2323,8 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
2318 EXT3_FEATURE_INCOMPAT_FILETYPE)) 2323 EXT3_FEATURE_INCOMPAT_FILETYPE))
2319 new_de->file_type = old_de->file_type; 2324 new_de->file_type = old_de->file_type;
2320 new_dir->i_version++; 2325 new_dir->i_version++;
2326 new_dir->i_ctime = new_dir->i_mtime = CURRENT_TIME_SEC;
2327 ext3_mark_inode_dirty(handle, new_dir);
2321 BUFFER_TRACE(new_bh, "call ext3_journal_dirty_metadata"); 2328 BUFFER_TRACE(new_bh, "call ext3_journal_dirty_metadata");
2322 ext3_journal_dirty_metadata(handle, new_bh); 2329 ext3_journal_dirty_metadata(handle, new_bh);
2323 brelse(new_bh); 2330 brelse(new_bh);
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 0e97b6e07cb0..28cfd0b40527 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -48,60 +48,60 @@ static int verify_group_input(struct super_block *sb,
48 free_blocks_count, input->reserved_blocks); 48 free_blocks_count, input->reserved_blocks);
49 49
50 if (group != sbi->s_groups_count) 50 if (group != sbi->s_groups_count)
51 ext3_warning(sb, __FUNCTION__, 51 ext3_warning(sb, __func__,
52 "Cannot add at group %u (only %lu groups)", 52 "Cannot add at group %u (only %lu groups)",
53 input->group, sbi->s_groups_count); 53 input->group, sbi->s_groups_count);
54 else if ((start - le32_to_cpu(es->s_first_data_block)) % 54 else if ((start - le32_to_cpu(es->s_first_data_block)) %
55 EXT3_BLOCKS_PER_GROUP(sb)) 55 EXT3_BLOCKS_PER_GROUP(sb))
56 ext3_warning(sb, __FUNCTION__, "Last group not full"); 56 ext3_warning(sb, __func__, "Last group not full");
57 else if (input->reserved_blocks > input->blocks_count / 5) 57 else if (input->reserved_blocks > input->blocks_count / 5)
58 ext3_warning(sb, __FUNCTION__, "Reserved blocks too high (%u)", 58 ext3_warning(sb, __func__, "Reserved blocks too high (%u)",
59 input->reserved_blocks); 59 input->reserved_blocks);
60 else if (free_blocks_count < 0) 60 else if (free_blocks_count < 0)
61 ext3_warning(sb, __FUNCTION__, "Bad blocks count %u", 61 ext3_warning(sb, __func__, "Bad blocks count %u",
62 input->blocks_count); 62 input->blocks_count);
63 else if (!(bh = sb_bread(sb, end - 1))) 63 else if (!(bh = sb_bread(sb, end - 1)))
64 ext3_warning(sb, __FUNCTION__, 64 ext3_warning(sb, __func__,
65 "Cannot read last block ("E3FSBLK")", 65 "Cannot read last block ("E3FSBLK")",
66 end - 1); 66 end - 1);
67 else if (outside(input->block_bitmap, start, end)) 67 else if (outside(input->block_bitmap, start, end))
68 ext3_warning(sb, __FUNCTION__, 68 ext3_warning(sb, __func__,
69 "Block bitmap not in group (block %u)", 69 "Block bitmap not in group (block %u)",
70 input->block_bitmap); 70 input->block_bitmap);
71 else if (outside(input->inode_bitmap, start, end)) 71 else if (outside(input->inode_bitmap, start, end))
72 ext3_warning(sb, __FUNCTION__, 72 ext3_warning(sb, __func__,
73 "Inode bitmap not in group (block %u)", 73 "Inode bitmap not in group (block %u)",
74 input->inode_bitmap); 74 input->inode_bitmap);
75 else if (outside(input->inode_table, start, end) || 75 else if (outside(input->inode_table, start, end) ||
76 outside(itend - 1, start, end)) 76 outside(itend - 1, start, end))
77 ext3_warning(sb, __FUNCTION__, 77 ext3_warning(sb, __func__,
78 "Inode table not in group (blocks %u-"E3FSBLK")", 78 "Inode table not in group (blocks %u-"E3FSBLK")",
79 input->inode_table, itend - 1); 79 input->inode_table, itend - 1);
80 else if (input->inode_bitmap == input->block_bitmap) 80 else if (input->inode_bitmap == input->block_bitmap)
81 ext3_warning(sb, __FUNCTION__, 81 ext3_warning(sb, __func__,
82 "Block bitmap same as inode bitmap (%u)", 82 "Block bitmap same as inode bitmap (%u)",
83 input->block_bitmap); 83 input->block_bitmap);
84 else if (inside(input->block_bitmap, input->inode_table, itend)) 84 else if (inside(input->block_bitmap, input->inode_table, itend))
85 ext3_warning(sb, __FUNCTION__, 85 ext3_warning(sb, __func__,
86 "Block bitmap (%u) in inode table (%u-"E3FSBLK")", 86 "Block bitmap (%u) in inode table (%u-"E3FSBLK")",
87 input->block_bitmap, input->inode_table, itend-1); 87 input->block_bitmap, input->inode_table, itend-1);
88 else if (inside(input->inode_bitmap, input->inode_table, itend)) 88 else if (inside(input->inode_bitmap, input->inode_table, itend))
89 ext3_warning(sb, __FUNCTION__, 89 ext3_warning(sb, __func__,
90 "Inode bitmap (%u) in inode table (%u-"E3FSBLK")", 90 "Inode bitmap (%u) in inode table (%u-"E3FSBLK")",
91 input->inode_bitmap, input->inode_table, itend-1); 91 input->inode_bitmap, input->inode_table, itend-1);
92 else if (inside(input->block_bitmap, start, metaend)) 92 else if (inside(input->block_bitmap, start, metaend))
93 ext3_warning(sb, __FUNCTION__, 93 ext3_warning(sb, __func__,
94 "Block bitmap (%u) in GDT table" 94 "Block bitmap (%u) in GDT table"
95 " ("E3FSBLK"-"E3FSBLK")", 95 " ("E3FSBLK"-"E3FSBLK")",
96 input->block_bitmap, start, metaend - 1); 96 input->block_bitmap, start, metaend - 1);
97 else if (inside(input->inode_bitmap, start, metaend)) 97 else if (inside(input->inode_bitmap, start, metaend))
98 ext3_warning(sb, __FUNCTION__, 98 ext3_warning(sb, __func__,
99 "Inode bitmap (%u) in GDT table" 99 "Inode bitmap (%u) in GDT table"
100 " ("E3FSBLK"-"E3FSBLK")", 100 " ("E3FSBLK"-"E3FSBLK")",
101 input->inode_bitmap, start, metaend - 1); 101 input->inode_bitmap, start, metaend - 1);
102 else if (inside(input->inode_table, start, metaend) || 102 else if (inside(input->inode_table, start, metaend) ||
103 inside(itend - 1, start, metaend)) 103 inside(itend - 1, start, metaend))
104 ext3_warning(sb, __FUNCTION__, 104 ext3_warning(sb, __func__,
105 "Inode table (%u-"E3FSBLK") overlaps" 105 "Inode table (%u-"E3FSBLK") overlaps"
106 "GDT table ("E3FSBLK"-"E3FSBLK")", 106 "GDT table ("E3FSBLK"-"E3FSBLK")",
107 input->inode_table, itend - 1, start, metaend - 1); 107 input->inode_table, itend - 1, start, metaend - 1);
@@ -386,7 +386,7 @@ static int verify_reserved_gdb(struct super_block *sb,
386 386
387 while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) { 387 while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) {
388 if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){ 388 if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){
389 ext3_warning(sb, __FUNCTION__, 389 ext3_warning(sb, __func__,
390 "reserved GDT "E3FSBLK 390 "reserved GDT "E3FSBLK
391 " missing grp %d ("E3FSBLK")", 391 " missing grp %d ("E3FSBLK")",
392 blk, grp, 392 blk, grp,
@@ -440,7 +440,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
440 */ 440 */
441 if (EXT3_SB(sb)->s_sbh->b_blocknr != 441 if (EXT3_SB(sb)->s_sbh->b_blocknr !=
442 le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) { 442 le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) {
443 ext3_warning(sb, __FUNCTION__, 443 ext3_warning(sb, __func__,
444 "won't resize using backup superblock at %llu", 444 "won't resize using backup superblock at %llu",
445 (unsigned long long)EXT3_SB(sb)->s_sbh->b_blocknr); 445 (unsigned long long)EXT3_SB(sb)->s_sbh->b_blocknr);
446 return -EPERM; 446 return -EPERM;
@@ -464,7 +464,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
464 464
465 data = (__le32 *)dind->b_data; 465 data = (__le32 *)dind->b_data;
466 if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) { 466 if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) {
467 ext3_warning(sb, __FUNCTION__, 467 ext3_warning(sb, __func__,
468 "new group %u GDT block "E3FSBLK" not reserved", 468 "new group %u GDT block "E3FSBLK" not reserved",
469 input->group, gdblock); 469 input->group, gdblock);
470 err = -EINVAL; 470 err = -EINVAL;
@@ -488,7 +488,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
488 GFP_NOFS); 488 GFP_NOFS);
489 if (!n_group_desc) { 489 if (!n_group_desc) {
490 err = -ENOMEM; 490 err = -ENOMEM;
491 ext3_warning (sb, __FUNCTION__, 491 ext3_warning (sb, __func__,
492 "not enough memory for %lu groups", gdb_num + 1); 492 "not enough memory for %lu groups", gdb_num + 1);
493 goto exit_inode; 493 goto exit_inode;
494 } 494 }
@@ -586,7 +586,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
586 /* Get each reserved primary GDT block and verify it holds backups */ 586 /* Get each reserved primary GDT block and verify it holds backups */
587 for (res = 0; res < reserved_gdb; res++, blk++) { 587 for (res = 0; res < reserved_gdb; res++, blk++) {
588 if (le32_to_cpu(*data) != blk) { 588 if (le32_to_cpu(*data) != blk) {
589 ext3_warning(sb, __FUNCTION__, 589 ext3_warning(sb, __func__,
590 "reserved block "E3FSBLK 590 "reserved block "E3FSBLK
591 " not at offset %ld", 591 " not at offset %ld",
592 blk, 592 blk,
@@ -730,7 +730,7 @@ static void update_backups(struct super_block *sb,
730 */ 730 */
731exit_err: 731exit_err:
732 if (err) { 732 if (err) {
733 ext3_warning(sb, __FUNCTION__, 733 ext3_warning(sb, __func__,
734 "can't update backup for group %d (err %d), " 734 "can't update backup for group %d (err %d), "
735 "forcing fsck on next reboot", group, err); 735 "forcing fsck on next reboot", group, err);
736 sbi->s_mount_state &= ~EXT3_VALID_FS; 736 sbi->s_mount_state &= ~EXT3_VALID_FS;
@@ -770,33 +770,33 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
770 770
771 if (gdb_off == 0 && !EXT3_HAS_RO_COMPAT_FEATURE(sb, 771 if (gdb_off == 0 && !EXT3_HAS_RO_COMPAT_FEATURE(sb,
772 EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) { 772 EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
773 ext3_warning(sb, __FUNCTION__, 773 ext3_warning(sb, __func__,
774 "Can't resize non-sparse filesystem further"); 774 "Can't resize non-sparse filesystem further");
775 return -EPERM; 775 return -EPERM;
776 } 776 }
777 777
778 if (le32_to_cpu(es->s_blocks_count) + input->blocks_count < 778 if (le32_to_cpu(es->s_blocks_count) + input->blocks_count <
779 le32_to_cpu(es->s_blocks_count)) { 779 le32_to_cpu(es->s_blocks_count)) {
780 ext3_warning(sb, __FUNCTION__, "blocks_count overflow\n"); 780 ext3_warning(sb, __func__, "blocks_count overflow\n");
781 return -EINVAL; 781 return -EINVAL;
782 } 782 }
783 783
784 if (le32_to_cpu(es->s_inodes_count) + EXT3_INODES_PER_GROUP(sb) < 784 if (le32_to_cpu(es->s_inodes_count) + EXT3_INODES_PER_GROUP(sb) <
785 le32_to_cpu(es->s_inodes_count)) { 785 le32_to_cpu(es->s_inodes_count)) {
786 ext3_warning(sb, __FUNCTION__, "inodes_count overflow\n"); 786 ext3_warning(sb, __func__, "inodes_count overflow\n");
787 return -EINVAL; 787 return -EINVAL;
788 } 788 }
789 789
790 if (reserved_gdb || gdb_off == 0) { 790 if (reserved_gdb || gdb_off == 0) {
791 if (!EXT3_HAS_COMPAT_FEATURE(sb, 791 if (!EXT3_HAS_COMPAT_FEATURE(sb,
792 EXT3_FEATURE_COMPAT_RESIZE_INODE)){ 792 EXT3_FEATURE_COMPAT_RESIZE_INODE)){
793 ext3_warning(sb, __FUNCTION__, 793 ext3_warning(sb, __func__,
794 "No reserved GDT blocks, can't resize"); 794 "No reserved GDT blocks, can't resize");
795 return -EPERM; 795 return -EPERM;
796 } 796 }
797 inode = ext3_iget(sb, EXT3_RESIZE_INO); 797 inode = ext3_iget(sb, EXT3_RESIZE_INO);
798 if (IS_ERR(inode)) { 798 if (IS_ERR(inode)) {
799 ext3_warning(sb, __FUNCTION__, 799 ext3_warning(sb, __func__,
800 "Error opening resize inode"); 800 "Error opening resize inode");
801 return PTR_ERR(inode); 801 return PTR_ERR(inode);
802 } 802 }
@@ -825,7 +825,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
825 825
826 lock_super(sb); 826 lock_super(sb);
827 if (input->group != sbi->s_groups_count) { 827 if (input->group != sbi->s_groups_count) {
828 ext3_warning(sb, __FUNCTION__, 828 ext3_warning(sb, __func__,
829 "multiple resizers run on filesystem!"); 829 "multiple resizers run on filesystem!");
830 err = -EBUSY; 830 err = -EBUSY;
831 goto exit_journal; 831 goto exit_journal;
@@ -988,13 +988,13 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
988 " too large to resize to %lu blocks safely\n", 988 " too large to resize to %lu blocks safely\n",
989 sb->s_id, n_blocks_count); 989 sb->s_id, n_blocks_count);
990 if (sizeof(sector_t) < 8) 990 if (sizeof(sector_t) < 8)
991 ext3_warning(sb, __FUNCTION__, 991 ext3_warning(sb, __func__,
992 "CONFIG_LBD not enabled\n"); 992 "CONFIG_LBD not enabled\n");
993 return -EINVAL; 993 return -EINVAL;
994 } 994 }
995 995
996 if (n_blocks_count < o_blocks_count) { 996 if (n_blocks_count < o_blocks_count) {
997 ext3_warning(sb, __FUNCTION__, 997 ext3_warning(sb, __func__,
998 "can't shrink FS - resize aborted"); 998 "can't shrink FS - resize aborted");
999 return -EBUSY; 999 return -EBUSY;
1000 } 1000 }
@@ -1004,7 +1004,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
1004 EXT3_BLOCKS_PER_GROUP(sb); 1004 EXT3_BLOCKS_PER_GROUP(sb);
1005 1005
1006 if (last == 0) { 1006 if (last == 0) {
1007 ext3_warning(sb, __FUNCTION__, 1007 ext3_warning(sb, __func__,
1008 "need to use ext2online to resize further"); 1008 "need to use ext2online to resize further");
1009 return -EPERM; 1009 return -EPERM;
1010 } 1010 }
@@ -1012,7 +1012,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
1012 add = EXT3_BLOCKS_PER_GROUP(sb) - last; 1012 add = EXT3_BLOCKS_PER_GROUP(sb) - last;
1013 1013
1014 if (o_blocks_count + add < o_blocks_count) { 1014 if (o_blocks_count + add < o_blocks_count) {
1015 ext3_warning(sb, __FUNCTION__, "blocks_count overflow"); 1015 ext3_warning(sb, __func__, "blocks_count overflow");
1016 return -EINVAL; 1016 return -EINVAL;
1017 } 1017 }
1018 1018
@@ -1020,7 +1020,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
1020 add = n_blocks_count - o_blocks_count; 1020 add = n_blocks_count - o_blocks_count;
1021 1021
1022 if (o_blocks_count + add < n_blocks_count) 1022 if (o_blocks_count + add < n_blocks_count)
1023 ext3_warning(sb, __FUNCTION__, 1023 ext3_warning(sb, __func__,
1024 "will only finish group ("E3FSBLK 1024 "will only finish group ("E3FSBLK
1025 " blocks, %u new)", 1025 " blocks, %u new)",
1026 o_blocks_count + add, add); 1026 o_blocks_count + add, add);
@@ -1028,7 +1028,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
1028 /* See if the device is actually as big as what was requested */ 1028 /* See if the device is actually as big as what was requested */
1029 bh = sb_bread(sb, o_blocks_count + add -1); 1029 bh = sb_bread(sb, o_blocks_count + add -1);
1030 if (!bh) { 1030 if (!bh) {
1031 ext3_warning(sb, __FUNCTION__, 1031 ext3_warning(sb, __func__,
1032 "can't read last block, resize aborted"); 1032 "can't read last block, resize aborted");
1033 return -ENOSPC; 1033 return -ENOSPC;
1034 } 1034 }
@@ -1040,22 +1040,23 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
1040 handle = ext3_journal_start_sb(sb, 3); 1040 handle = ext3_journal_start_sb(sb, 3);
1041 if (IS_ERR(handle)) { 1041 if (IS_ERR(handle)) {
1042 err = PTR_ERR(handle); 1042 err = PTR_ERR(handle);
1043 ext3_warning(sb, __FUNCTION__, "error %d on journal start",err); 1043 ext3_warning(sb, __func__, "error %d on journal start",err);
1044 goto exit_put; 1044 goto exit_put;
1045 } 1045 }
1046 1046
1047 lock_super(sb); 1047 lock_super(sb);
1048 if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) { 1048 if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
1049 ext3_warning(sb, __FUNCTION__, 1049 ext3_warning(sb, __func__,
1050 "multiple resizers run on filesystem!"); 1050 "multiple resizers run on filesystem!");
1051 unlock_super(sb); 1051 unlock_super(sb);
1052 ext3_journal_stop(handle);
1052 err = -EBUSY; 1053 err = -EBUSY;
1053 goto exit_put; 1054 goto exit_put;
1054 } 1055 }
1055 1056
1056 if ((err = ext3_journal_get_write_access(handle, 1057 if ((err = ext3_journal_get_write_access(handle,
1057 EXT3_SB(sb)->s_sbh))) { 1058 EXT3_SB(sb)->s_sbh))) {
1058 ext3_warning(sb, __FUNCTION__, 1059 ext3_warning(sb, __func__,
1059 "error %d on journal write access", err); 1060 "error %d on journal write access", err);
1060 unlock_super(sb); 1061 unlock_super(sb);
1061 ext3_journal_stop(handle); 1062 ext3_journal_stop(handle);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index ad5360664082..fe3119a71ada 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -84,7 +84,7 @@ handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
84 * take the FS itself readonly cleanly. */ 84 * take the FS itself readonly cleanly. */
85 journal = EXT3_SB(sb)->s_journal; 85 journal = EXT3_SB(sb)->s_journal;
86 if (is_journal_aborted(journal)) { 86 if (is_journal_aborted(journal)) {
87 ext3_abort(sb, __FUNCTION__, 87 ext3_abort(sb, __func__,
88 "Detected aborted journal"); 88 "Detected aborted journal");
89 return ERR_PTR(-EROFS); 89 return ERR_PTR(-EROFS);
90 } 90 }
@@ -304,7 +304,7 @@ void ext3_update_dynamic_rev(struct super_block *sb)
304 if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV) 304 if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV)
305 return; 305 return;
306 306
307 ext3_warning(sb, __FUNCTION__, 307 ext3_warning(sb, __func__,
308 "updating to rev %d because of new feature flag, " 308 "updating to rev %d because of new feature flag, "
309 "running e2fsck is recommended", 309 "running e2fsck is recommended",
310 EXT3_DYNAMIC_REV); 310 EXT3_DYNAMIC_REV);
@@ -685,7 +685,8 @@ static int ext3_acquire_dquot(struct dquot *dquot);
685static int ext3_release_dquot(struct dquot *dquot); 685static int ext3_release_dquot(struct dquot *dquot);
686static int ext3_mark_dquot_dirty(struct dquot *dquot); 686static int ext3_mark_dquot_dirty(struct dquot *dquot);
687static int ext3_write_info(struct super_block *sb, int type); 687static int ext3_write_info(struct super_block *sb, int type);
688static int ext3_quota_on(struct super_block *sb, int type, int format_id, char *path); 688static int ext3_quota_on(struct super_block *sb, int type, int format_id,
689 char *path, int remount);
689static int ext3_quota_on_mount(struct super_block *sb, int type); 690static int ext3_quota_on_mount(struct super_block *sb, int type);
690static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, 691static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
691 size_t len, loff_t off); 692 size_t len, loff_t off);
@@ -1096,6 +1097,9 @@ clear_qf_name:
1096 case Opt_quota: 1097 case Opt_quota:
1097 case Opt_usrquota: 1098 case Opt_usrquota:
1098 case Opt_grpquota: 1099 case Opt_grpquota:
1100 printk(KERN_ERR
1101 "EXT3-fs: quota options not supported.\n");
1102 break;
1099 case Opt_usrjquota: 1103 case Opt_usrjquota:
1100 case Opt_grpjquota: 1104 case Opt_grpjquota:
1101 case Opt_offusrjquota: 1105 case Opt_offusrjquota:
@@ -1103,7 +1107,7 @@ clear_qf_name:
1103 case Opt_jqfmt_vfsold: 1107 case Opt_jqfmt_vfsold:
1104 case Opt_jqfmt_vfsv0: 1108 case Opt_jqfmt_vfsv0:
1105 printk(KERN_ERR 1109 printk(KERN_ERR
1106 "EXT3-fs: journalled quota options not " 1110 "EXT3-fs: journaled quota options not "
1107 "supported.\n"); 1111 "supported.\n");
1108 break; 1112 break;
1109 case Opt_noquota: 1113 case Opt_noquota:
@@ -1218,7 +1222,7 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
1218 inconsistencies, to force a fsck at reboot. But for 1222 inconsistencies, to force a fsck at reboot. But for
1219 a plain journaled filesystem we can keep it set as 1223 a plain journaled filesystem we can keep it set as
1220 valid forever! :) */ 1224 valid forever! :) */
1221 es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT3_VALID_FS); 1225 es->s_state &= cpu_to_le16(~EXT3_VALID_FS);
1222#endif 1226#endif
1223 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 1227 if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
1224 es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT); 1228 es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT);
@@ -1253,14 +1257,14 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
1253static int ext3_check_descriptors(struct super_block *sb) 1257static int ext3_check_descriptors(struct super_block *sb)
1254{ 1258{
1255 struct ext3_sb_info *sbi = EXT3_SB(sb); 1259 struct ext3_sb_info *sbi = EXT3_SB(sb);
1256 ext3_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
1257 ext3_fsblk_t last_block;
1258 int i; 1260 int i;
1259 1261
1260 ext3_debug ("Checking group descriptors"); 1262 ext3_debug ("Checking group descriptors");
1261 1263
1262 for (i = 0; i < sbi->s_groups_count; i++) { 1264 for (i = 0; i < sbi->s_groups_count; i++) {
1263 struct ext3_group_desc *gdp = ext3_get_group_desc(sb, i, NULL); 1265 struct ext3_group_desc *gdp = ext3_get_group_desc(sb, i, NULL);
1266 ext3_fsblk_t first_block = ext3_group_first_block_no(sb, i);
1267 ext3_fsblk_t last_block;
1264 1268
1265 if (i == sbi->s_groups_count - 1) 1269 if (i == sbi->s_groups_count - 1)
1266 last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1; 1270 last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1;
@@ -1299,7 +1303,6 @@ static int ext3_check_descriptors(struct super_block *sb)
1299 le32_to_cpu(gdp->bg_inode_table)); 1303 le32_to_cpu(gdp->bg_inode_table));
1300 return 0; 1304 return 0;
1301 } 1305 }
1302 first_block += EXT3_BLOCKS_PER_GROUP(sb);
1303 } 1306 }
1304 1307
1305 sbi->s_es->s_free_blocks_count=cpu_to_le32(ext3_count_free_blocks(sb)); 1308 sbi->s_es->s_free_blocks_count=cpu_to_le32(ext3_count_free_blocks(sb));
@@ -1387,7 +1390,7 @@ static void ext3_orphan_cleanup (struct super_block * sb,
1387 if (inode->i_nlink) { 1390 if (inode->i_nlink) {
1388 printk(KERN_DEBUG 1391 printk(KERN_DEBUG
1389 "%s: truncating inode %lu to %Ld bytes\n", 1392 "%s: truncating inode %lu to %Ld bytes\n",
1390 __FUNCTION__, inode->i_ino, inode->i_size); 1393 __func__, inode->i_ino, inode->i_size);
1391 jbd_debug(2, "truncating inode %lu to %Ld bytes\n", 1394 jbd_debug(2, "truncating inode %lu to %Ld bytes\n",
1392 inode->i_ino, inode->i_size); 1395 inode->i_ino, inode->i_size);
1393 ext3_truncate(inode); 1396 ext3_truncate(inode);
@@ -1395,7 +1398,7 @@ static void ext3_orphan_cleanup (struct super_block * sb,
1395 } else { 1398 } else {
1396 printk(KERN_DEBUG 1399 printk(KERN_DEBUG
1397 "%s: deleting unreferenced inode %lu\n", 1400 "%s: deleting unreferenced inode %lu\n",
1398 __FUNCTION__, inode->i_ino); 1401 __func__, inode->i_ino);
1399 jbd_debug(2, "deleting unreferenced inode %lu\n", 1402 jbd_debug(2, "deleting unreferenced inode %lu\n",
1400 inode->i_ino); 1403 inode->i_ino);
1401 nr_orphans++; 1404 nr_orphans++;
@@ -1415,7 +1418,7 @@ static void ext3_orphan_cleanup (struct super_block * sb,
1415 /* Turn quotas off */ 1418 /* Turn quotas off */
1416 for (i = 0; i < MAXQUOTAS; i++) { 1419 for (i = 0; i < MAXQUOTAS; i++) {
1417 if (sb_dqopt(sb)->files[i]) 1420 if (sb_dqopt(sb)->files[i])
1418 vfs_quota_off(sb, i); 1421 vfs_quota_off(sb, i, 0);
1419 } 1422 }
1420#endif 1423#endif
1421 sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 1424 sb->s_flags = s_flags; /* Restore MS_RDONLY status */
@@ -2298,9 +2301,9 @@ static void ext3_clear_journal_err(struct super_block * sb,
2298 char nbuf[16]; 2301 char nbuf[16];
2299 2302
2300 errstr = ext3_decode_error(sb, j_errno, nbuf); 2303 errstr = ext3_decode_error(sb, j_errno, nbuf);
2301 ext3_warning(sb, __FUNCTION__, "Filesystem error recorded " 2304 ext3_warning(sb, __func__, "Filesystem error recorded "
2302 "from previous mount: %s", errstr); 2305 "from previous mount: %s", errstr);
2303 ext3_warning(sb, __FUNCTION__, "Marking fs in need of " 2306 ext3_warning(sb, __func__, "Marking fs in need of "
2304 "filesystem check."); 2307 "filesystem check.");
2305 2308
2306 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; 2309 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
@@ -2427,7 +2430,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
2427 } 2430 }
2428 2431
2429 if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) 2432 if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
2430 ext3_abort(sb, __FUNCTION__, "Abort forced by user"); 2433 ext3_abort(sb, __func__, "Abort forced by user");
2431 2434
2432 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 2435 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2433 ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 2436 ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
@@ -2639,8 +2642,14 @@ static int ext3_dquot_drop(struct inode *inode)
2639 2642
2640 /* We may delete quota structure so we need to reserve enough blocks */ 2643 /* We may delete quota structure so we need to reserve enough blocks */
2641 handle = ext3_journal_start(inode, 2*EXT3_QUOTA_DEL_BLOCKS(inode->i_sb)); 2644 handle = ext3_journal_start(inode, 2*EXT3_QUOTA_DEL_BLOCKS(inode->i_sb));
2642 if (IS_ERR(handle)) 2645 if (IS_ERR(handle)) {
2646 /*
2647 * We call dquot_drop() anyway to at least release references
2648 * to quota structures so that umount does not hang.
2649 */
2650 dquot_drop(inode);
2643 return PTR_ERR(handle); 2651 return PTR_ERR(handle);
2652 }
2644 ret = dquot_drop(inode); 2653 ret = dquot_drop(inode);
2645 err = ext3_journal_stop(handle); 2654 err = ext3_journal_stop(handle);
2646 if (!ret) 2655 if (!ret)
@@ -2743,17 +2752,17 @@ static int ext3_quota_on_mount(struct super_block *sb, int type)
2743 * Standard function to be called on quota_on 2752 * Standard function to be called on quota_on
2744 */ 2753 */
2745static int ext3_quota_on(struct super_block *sb, int type, int format_id, 2754static int ext3_quota_on(struct super_block *sb, int type, int format_id,
2746 char *path) 2755 char *path, int remount)
2747{ 2756{
2748 int err; 2757 int err;
2749 struct nameidata nd; 2758 struct nameidata nd;
2750 2759
2751 if (!test_opt(sb, QUOTA)) 2760 if (!test_opt(sb, QUOTA))
2752 return -EINVAL; 2761 return -EINVAL;
2753 /* Not journalling quota? */ 2762 /* Not journalling quota or remount? */
2754 if (!EXT3_SB(sb)->s_qf_names[USRQUOTA] && 2763 if ((!EXT3_SB(sb)->s_qf_names[USRQUOTA] &&
2755 !EXT3_SB(sb)->s_qf_names[GRPQUOTA]) 2764 !EXT3_SB(sb)->s_qf_names[GRPQUOTA]) || remount)
2756 return vfs_quota_on(sb, type, format_id, path); 2765 return vfs_quota_on(sb, type, format_id, path, remount);
2757 err = path_lookup(path, LOOKUP_FOLLOW, &nd); 2766 err = path_lookup(path, LOOKUP_FOLLOW, &nd);
2758 if (err) 2767 if (err)
2759 return err; 2768 return err;
@@ -2762,13 +2771,13 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
2762 path_put(&nd.path); 2771 path_put(&nd.path);
2763 return -EXDEV; 2772 return -EXDEV;
2764 } 2773 }
2765 /* Quotafile not of fs root? */ 2774 /* Quotafile not in fs root? */
2766 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) 2775 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
2767 printk(KERN_WARNING 2776 printk(KERN_WARNING
2768 "EXT3-fs: Quota file not on filesystem root. " 2777 "EXT3-fs: Quota file not on filesystem root. "
2769 "Journalled quota will not work.\n"); 2778 "Journalled quota will not work.\n");
2770 path_put(&nd.path); 2779 path_put(&nd.path);
2771 return vfs_quota_on(sb, type, format_id, path); 2780 return vfs_quota_on(sb, type, format_id, path, remount);
2772} 2781}
2773 2782
2774/* Read data from quotafile - avoid pagecache and such because we cannot afford 2783/* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 42856541e9a5..d4a4f0e9ff69 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -99,6 +99,8 @@ static struct buffer_head *ext3_xattr_cache_find(struct inode *,
99 struct mb_cache_entry **); 99 struct mb_cache_entry **);
100static void ext3_xattr_rehash(struct ext3_xattr_header *, 100static void ext3_xattr_rehash(struct ext3_xattr_header *,
101 struct ext3_xattr_entry *); 101 struct ext3_xattr_entry *);
102static int ext3_xattr_list(struct inode *inode, char *buffer,
103 size_t buffer_size);
102 104
103static struct mb_cache *ext3_xattr_cache; 105static struct mb_cache *ext3_xattr_cache;
104 106
@@ -232,7 +234,7 @@ ext3_xattr_block_get(struct inode *inode, int name_index, const char *name,
232 ea_bdebug(bh, "b_count=%d, refcount=%d", 234 ea_bdebug(bh, "b_count=%d, refcount=%d",
233 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); 235 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
234 if (ext3_xattr_check_block(bh)) { 236 if (ext3_xattr_check_block(bh)) {
235bad_block: ext3_error(inode->i_sb, __FUNCTION__, 237bad_block: ext3_error(inode->i_sb, __func__,
236 "inode %lu: bad block "E3FSBLK, inode->i_ino, 238 "inode %lu: bad block "E3FSBLK, inode->i_ino,
237 EXT3_I(inode)->i_file_acl); 239 EXT3_I(inode)->i_file_acl);
238 error = -EIO; 240 error = -EIO;
@@ -374,7 +376,7 @@ ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
374 ea_bdebug(bh, "b_count=%d, refcount=%d", 376 ea_bdebug(bh, "b_count=%d, refcount=%d",
375 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); 377 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
376 if (ext3_xattr_check_block(bh)) { 378 if (ext3_xattr_check_block(bh)) {
377 ext3_error(inode->i_sb, __FUNCTION__, 379 ext3_error(inode->i_sb, __func__,
378 "inode %lu: bad block "E3FSBLK, inode->i_ino, 380 "inode %lu: bad block "E3FSBLK, inode->i_ino,
379 EXT3_I(inode)->i_file_acl); 381 EXT3_I(inode)->i_file_acl);
380 error = -EIO; 382 error = -EIO;
@@ -427,7 +429,7 @@ cleanup:
427 * Returns a negative error number on failure, or the number of bytes 429 * Returns a negative error number on failure, or the number of bytes
428 * used / required on success. 430 * used / required on success.
429 */ 431 */
430int 432static int
431ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) 433ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
432{ 434{
433 int i_error, b_error; 435 int i_error, b_error;
@@ -649,7 +651,7 @@ ext3_xattr_block_find(struct inode *inode, struct ext3_xattr_info *i,
649 atomic_read(&(bs->bh->b_count)), 651 atomic_read(&(bs->bh->b_count)),
650 le32_to_cpu(BHDR(bs->bh)->h_refcount)); 652 le32_to_cpu(BHDR(bs->bh)->h_refcount));
651 if (ext3_xattr_check_block(bs->bh)) { 653 if (ext3_xattr_check_block(bs->bh)) {
652 ext3_error(sb, __FUNCTION__, 654 ext3_error(sb, __func__,
653 "inode %lu: bad block "E3FSBLK, inode->i_ino, 655 "inode %lu: bad block "E3FSBLK, inode->i_ino,
654 EXT3_I(inode)->i_file_acl); 656 EXT3_I(inode)->i_file_acl);
655 error = -EIO; 657 error = -EIO;
@@ -797,10 +799,8 @@ inserted:
797 get_bh(new_bh); 799 get_bh(new_bh);
798 } else { 800 } else {
799 /* We need to allocate a new block */ 801 /* We need to allocate a new block */
800 ext3_fsblk_t goal = le32_to_cpu( 802 ext3_fsblk_t goal = ext3_group_first_block_no(sb,
801 EXT3_SB(sb)->s_es->s_first_data_block) + 803 EXT3_I(inode)->i_block_group);
802 (ext3_fsblk_t)EXT3_I(inode)->i_block_group *
803 EXT3_BLOCKS_PER_GROUP(sb);
804 ext3_fsblk_t block = ext3_new_block(handle, inode, 804 ext3_fsblk_t block = ext3_new_block(handle, inode,
805 goal, &error); 805 goal, &error);
806 if (error) 806 if (error)
@@ -852,7 +852,7 @@ cleanup_dquot:
852 goto cleanup; 852 goto cleanup;
853 853
854bad_block: 854bad_block:
855 ext3_error(inode->i_sb, __FUNCTION__, 855 ext3_error(inode->i_sb, __func__,
856 "inode %lu: bad block "E3FSBLK, inode->i_ino, 856 "inode %lu: bad block "E3FSBLK, inode->i_ino,
857 EXT3_I(inode)->i_file_acl); 857 EXT3_I(inode)->i_file_acl);
858 goto cleanup; 858 goto cleanup;
@@ -1081,14 +1081,14 @@ ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
1081 goto cleanup; 1081 goto cleanup;
1082 bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); 1082 bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
1083 if (!bh) { 1083 if (!bh) {
1084 ext3_error(inode->i_sb, __FUNCTION__, 1084 ext3_error(inode->i_sb, __func__,
1085 "inode %lu: block "E3FSBLK" read error", inode->i_ino, 1085 "inode %lu: block "E3FSBLK" read error", inode->i_ino,
1086 EXT3_I(inode)->i_file_acl); 1086 EXT3_I(inode)->i_file_acl);
1087 goto cleanup; 1087 goto cleanup;
1088 } 1088 }
1089 if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || 1089 if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
1090 BHDR(bh)->h_blocks != cpu_to_le32(1)) { 1090 BHDR(bh)->h_blocks != cpu_to_le32(1)) {
1091 ext3_error(inode->i_sb, __FUNCTION__, 1091 ext3_error(inode->i_sb, __func__,
1092 "inode %lu: bad block "E3FSBLK, inode->i_ino, 1092 "inode %lu: bad block "E3FSBLK, inode->i_ino,
1093 EXT3_I(inode)->i_file_acl); 1093 EXT3_I(inode)->i_file_acl);
1094 goto cleanup; 1094 goto cleanup;
@@ -1215,7 +1215,7 @@ again:
1215 } 1215 }
1216 bh = sb_bread(inode->i_sb, ce->e_block); 1216 bh = sb_bread(inode->i_sb, ce->e_block);
1217 if (!bh) { 1217 if (!bh) {
1218 ext3_error(inode->i_sb, __FUNCTION__, 1218 ext3_error(inode->i_sb, __func__,
1219 "inode %lu: block %lu read error", 1219 "inode %lu: block %lu read error",
1220 inode->i_ino, (unsigned long) ce->e_block); 1220 inode->i_ino, (unsigned long) ce->e_block);
1221 } else if (le32_to_cpu(BHDR(bh)->h_refcount) >= 1221 } else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
diff --git a/fs/ext3/xattr.h b/fs/ext3/xattr.h
index 6b1ae1c6182c..148a4dfc82ab 100644
--- a/fs/ext3/xattr.h
+++ b/fs/ext3/xattr.h
@@ -67,7 +67,6 @@ extern struct xattr_handler ext3_xattr_security_handler;
67extern ssize_t ext3_listxattr(struct dentry *, char *, size_t); 67extern ssize_t ext3_listxattr(struct dentry *, char *, size_t);
68 68
69extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t); 69extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t);
70extern int ext3_xattr_list(struct inode *, char *, size_t);
71extern int ext3_xattr_set(struct inode *, int, const char *, const void *, size_t, int); 70extern int ext3_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
72extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int); 71extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
73 72
@@ -89,12 +88,6 @@ ext3_xattr_get(struct inode *inode, int name_index, const char *name,
89} 88}
90 89
91static inline int 90static inline int
92ext3_xattr_list(struct inode *inode, void *buffer, size_t size)
93{
94 return -EOPNOTSUPP;
95}
96
97static inline int
98ext3_xattr_set(struct inode *inode, int name_index, const char *name, 91ext3_xattr_set(struct inode *inode, int name_index, const char *name,
99 const void *value, size_t size, int flags) 92 const void *value, size_t size, int flags)
100{ 93{
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index a8bae8cd1d5d..3c8dab880d91 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -9,8 +9,8 @@
9#include <linux/slab.h> 9#include <linux/slab.h>
10#include <linux/capability.h> 10#include <linux/capability.h>
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/ext4_jbd2.h> 12#include "ext4_jbd2.h"
13#include <linux/ext4_fs.h> 13#include "ext4.h"
14#include "xattr.h" 14#include "xattr.h"
15#include "acl.h" 15#include "acl.h"
16 16
@@ -37,7 +37,7 @@ ext4_acl_from_disk(const void *value, size_t size)
37 return ERR_PTR(-EINVAL); 37 return ERR_PTR(-EINVAL);
38 if (count == 0) 38 if (count == 0)
39 return NULL; 39 return NULL;
40 acl = posix_acl_alloc(count, GFP_KERNEL); 40 acl = posix_acl_alloc(count, GFP_NOFS);
41 if (!acl) 41 if (!acl)
42 return ERR_PTR(-ENOMEM); 42 return ERR_PTR(-ENOMEM);
43 for (n=0; n < count; n++) { 43 for (n=0; n < count; n++) {
@@ -91,7 +91,7 @@ ext4_acl_to_disk(const struct posix_acl *acl, size_t *size)
91 91
92 *size = ext4_acl_size(acl->a_count); 92 *size = ext4_acl_size(acl->a_count);
93 ext_acl = kmalloc(sizeof(ext4_acl_header) + acl->a_count * 93 ext_acl = kmalloc(sizeof(ext4_acl_header) + acl->a_count *
94 sizeof(ext4_acl_entry), GFP_KERNEL); 94 sizeof(ext4_acl_entry), GFP_NOFS);
95 if (!ext_acl) 95 if (!ext_acl)
96 return ERR_PTR(-ENOMEM); 96 return ERR_PTR(-ENOMEM);
97 ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION); 97 ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION);
@@ -187,7 +187,7 @@ ext4_get_acl(struct inode *inode, int type)
187 } 187 }
188 retval = ext4_xattr_get(inode, name_index, "", NULL, 0); 188 retval = ext4_xattr_get(inode, name_index, "", NULL, 0);
189 if (retval > 0) { 189 if (retval > 0) {
190 value = kmalloc(retval, GFP_KERNEL); 190 value = kmalloc(retval, GFP_NOFS);
191 if (!value) 191 if (!value)
192 return ERR_PTR(-ENOMEM); 192 return ERR_PTR(-ENOMEM);
193 retval = ext4_xattr_get(inode, name_index, "", value, retval); 193 retval = ext4_xattr_get(inode, name_index, "", value, retval);
@@ -335,7 +335,7 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
335 if (error) 335 if (error)
336 goto cleanup; 336 goto cleanup;
337 } 337 }
338 clone = posix_acl_clone(acl, GFP_KERNEL); 338 clone = posix_acl_clone(acl, GFP_NOFS);
339 error = -ENOMEM; 339 error = -ENOMEM;
340 if (!clone) 340 if (!clone)
341 goto cleanup; 341 goto cleanup;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 0737e05ba3dd..da994374ec3b 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -15,12 +15,12 @@
15#include <linux/capability.h> 15#include <linux/capability.h>
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/jbd2.h> 17#include <linux/jbd2.h>
18#include <linux/ext4_fs.h>
19#include <linux/ext4_jbd2.h>
20#include <linux/quotaops.h> 18#include <linux/quotaops.h>
21#include <linux/buffer_head.h> 19#include <linux/buffer_head.h>
22 20#include "ext4.h"
21#include "ext4_jbd2.h"
23#include "group.h" 22#include "group.h"
23
24/* 24/*
25 * balloc.c contains the blocks allocation and deallocation routines 25 * balloc.c contains the blocks allocation and deallocation routines
26 */ 26 */
@@ -48,7 +48,6 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
48unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, 48unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
49 ext4_group_t block_group, struct ext4_group_desc *gdp) 49 ext4_group_t block_group, struct ext4_group_desc *gdp)
50{ 50{
51 unsigned long start;
52 int bit, bit_max; 51 int bit, bit_max;
53 unsigned free_blocks, group_blocks; 52 unsigned free_blocks, group_blocks;
54 struct ext4_sb_info *sbi = EXT4_SB(sb); 53 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -59,7 +58,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
59 /* If checksum is bad mark all blocks used to prevent allocation 58 /* If checksum is bad mark all blocks used to prevent allocation
60 * essentially implementing a per-group read-only flag. */ 59 * essentially implementing a per-group read-only flag. */
61 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { 60 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
62 ext4_error(sb, __FUNCTION__, 61 ext4_error(sb, __func__,
63 "Checksum bad for group %lu\n", block_group); 62 "Checksum bad for group %lu\n", block_group);
64 gdp->bg_free_blocks_count = 0; 63 gdp->bg_free_blocks_count = 0;
65 gdp->bg_free_inodes_count = 0; 64 gdp->bg_free_inodes_count = 0;
@@ -106,11 +105,12 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
106 free_blocks = group_blocks - bit_max; 105 free_blocks = group_blocks - bit_max;
107 106
108 if (bh) { 107 if (bh) {
108 ext4_fsblk_t start;
109
109 for (bit = 0; bit < bit_max; bit++) 110 for (bit = 0; bit < bit_max; bit++)
110 ext4_set_bit(bit, bh->b_data); 111 ext4_set_bit(bit, bh->b_data);
111 112
112 start = block_group * EXT4_BLOCKS_PER_GROUP(sb) + 113 start = ext4_group_first_block_no(sb, block_group);
113 le32_to_cpu(sbi->s_es->s_first_data_block);
114 114
115 /* Set bits for block and inode bitmaps, and inode table */ 115 /* Set bits for block and inode bitmaps, and inode table */
116 ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data); 116 ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data);
@@ -235,7 +235,7 @@ static int ext4_valid_block_bitmap(struct super_block *sb,
235 return 1; 235 return 1;
236 236
237err_out: 237err_out:
238 ext4_error(sb, __FUNCTION__, 238 ext4_error(sb, __func__,
239 "Invalid block bitmap - " 239 "Invalid block bitmap - "
240 "block_group = %d, block = %llu", 240 "block_group = %d, block = %llu",
241 block_group, bitmap_blk); 241 block_group, bitmap_blk);
@@ -264,7 +264,7 @@ read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
264 bitmap_blk = ext4_block_bitmap(sb, desc); 264 bitmap_blk = ext4_block_bitmap(sb, desc);
265 bh = sb_getblk(sb, bitmap_blk); 265 bh = sb_getblk(sb, bitmap_blk);
266 if (unlikely(!bh)) { 266 if (unlikely(!bh)) {
267 ext4_error(sb, __FUNCTION__, 267 ext4_error(sb, __func__,
268 "Cannot read block bitmap - " 268 "Cannot read block bitmap - "
269 "block_group = %d, block_bitmap = %llu", 269 "block_group = %d, block_bitmap = %llu",
270 (int)block_group, (unsigned long long)bitmap_blk); 270 (int)block_group, (unsigned long long)bitmap_blk);
@@ -281,7 +281,7 @@ read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
281 } 281 }
282 if (bh_submit_read(bh) < 0) { 282 if (bh_submit_read(bh) < 0) {
283 put_bh(bh); 283 put_bh(bh);
284 ext4_error(sb, __FUNCTION__, 284 ext4_error(sb, __func__,
285 "Cannot read block bitmap - " 285 "Cannot read block bitmap - "
286 "block_group = %d, block_bitmap = %llu", 286 "block_group = %d, block_bitmap = %llu",
287 (int)block_group, (unsigned long long)bitmap_blk); 287 (int)block_group, (unsigned long long)bitmap_blk);
@@ -360,7 +360,7 @@ restart:
360 BUG(); 360 BUG();
361} 361}
362#define rsv_window_dump(root, verbose) \ 362#define rsv_window_dump(root, verbose) \
363 __rsv_window_dump((root), (verbose), __FUNCTION__) 363 __rsv_window_dump((root), (verbose), __func__)
364#else 364#else
365#define rsv_window_dump(root, verbose) do {} while (0) 365#define rsv_window_dump(root, verbose) do {} while (0)
366#endif 366#endif
@@ -740,7 +740,7 @@ do_more:
740 if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), 740 if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
741 bit + i, bitmap_bh->b_data)) { 741 bit + i, bitmap_bh->b_data)) {
742 jbd_unlock_bh_state(bitmap_bh); 742 jbd_unlock_bh_state(bitmap_bh);
743 ext4_error(sb, __FUNCTION__, 743 ext4_error(sb, __func__,
744 "bit already cleared for block %llu", 744 "bit already cleared for block %llu",
745 (ext4_fsblk_t)(block + i)); 745 (ext4_fsblk_t)(block + i));
746 jbd_lock_bh_state(bitmap_bh); 746 jbd_lock_bh_state(bitmap_bh);
@@ -752,9 +752,7 @@ do_more:
752 jbd_unlock_bh_state(bitmap_bh); 752 jbd_unlock_bh_state(bitmap_bh);
753 753
754 spin_lock(sb_bgl_lock(sbi, block_group)); 754 spin_lock(sb_bgl_lock(sbi, block_group));
755 desc->bg_free_blocks_count = 755 le16_add_cpu(&desc->bg_free_blocks_count, group_freed);
756 cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) +
757 group_freed);
758 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); 756 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
759 spin_unlock(sb_bgl_lock(sbi, block_group)); 757 spin_unlock(sb_bgl_lock(sbi, block_group));
760 percpu_counter_add(&sbi->s_freeblocks_counter, count); 758 percpu_counter_add(&sbi->s_freeblocks_counter, count);
@@ -1798,7 +1796,7 @@ allocated:
1798 if (ext4_test_bit(grp_alloc_blk+i, 1796 if (ext4_test_bit(grp_alloc_blk+i,
1799 bh2jh(bitmap_bh)->b_committed_data)) { 1797 bh2jh(bitmap_bh)->b_committed_data)) {
1800 printk("%s: block was unexpectedly set in " 1798 printk("%s: block was unexpectedly set in "
1801 "b_committed_data\n", __FUNCTION__); 1799 "b_committed_data\n", __func__);
1802 } 1800 }
1803 } 1801 }
1804 } 1802 }
@@ -1823,8 +1821,7 @@ allocated:
1823 spin_lock(sb_bgl_lock(sbi, group_no)); 1821 spin_lock(sb_bgl_lock(sbi, group_no));
1824 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) 1822 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
1825 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); 1823 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
1826 gdp->bg_free_blocks_count = 1824 le16_add_cpu(&gdp->bg_free_blocks_count, -num);
1827 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num);
1828 gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp); 1825 gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
1829 spin_unlock(sb_bgl_lock(sbi, group_no)); 1826 spin_unlock(sb_bgl_lock(sbi, group_no));
1830 percpu_counter_sub(&sbi->s_freeblocks_counter, num); 1827 percpu_counter_sub(&sbi->s_freeblocks_counter, num);
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index 420554f8f79d..d37ea6750454 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -9,7 +9,7 @@
9 9
10#include <linux/buffer_head.h> 10#include <linux/buffer_head.h>
11#include <linux/jbd2.h> 11#include <linux/jbd2.h>
12#include <linux/ext4_fs.h> 12#include "ext4.h"
13 13
14#ifdef EXT4FS_DEBUG 14#ifdef EXT4FS_DEBUG
15 15
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 2c23bade9aa6..2bf0331ea194 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -23,10 +23,10 @@
23 23
24#include <linux/fs.h> 24#include <linux/fs.h>
25#include <linux/jbd2.h> 25#include <linux/jbd2.h>
26#include <linux/ext4_fs.h>
27#include <linux/buffer_head.h> 26#include <linux/buffer_head.h>
28#include <linux/slab.h> 27#include <linux/slab.h>
29#include <linux/rbtree.h> 28#include <linux/rbtree.h>
29#include "ext4.h"
30 30
31static unsigned char ext4_filetype_table[] = { 31static unsigned char ext4_filetype_table[] = {
32 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK 32 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
@@ -42,7 +42,7 @@ const struct file_operations ext4_dir_operations = {
42 .llseek = generic_file_llseek, 42 .llseek = generic_file_llseek,
43 .read = generic_read_dir, 43 .read = generic_read_dir,
44 .readdir = ext4_readdir, /* we take BKL. needed?*/ 44 .readdir = ext4_readdir, /* we take BKL. needed?*/
45 .ioctl = ext4_ioctl, /* BKL held */ 45 .unlocked_ioctl = ext4_ioctl,
46#ifdef CONFIG_COMPAT 46#ifdef CONFIG_COMPAT
47 .compat_ioctl = ext4_compat_ioctl, 47 .compat_ioctl = ext4_compat_ioctl,
48#endif 48#endif
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
new file mode 100644
index 000000000000..8158083f7ac0
--- /dev/null
+++ b/fs/ext4/ext4.h
@@ -0,0 +1,1205 @@
1/*
2 * ext4.h
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 * from
10 *
11 * linux/include/linux/minix_fs.h
12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds
14 */
15
16#ifndef _EXT4_H
17#define _EXT4_H
18
19#include <linux/types.h>
20#include <linux/blkdev.h>
21#include <linux/magic.h>
22#include "ext4_i.h"
23
24/*
25 * The second extended filesystem constants/structures
26 */
27
28/*
29 * Define EXT4FS_DEBUG to produce debug messages
30 */
31#undef EXT4FS_DEBUG
32
33/*
34 * Define EXT4_RESERVATION to reserve data blocks for expanding files
35 */
36#define EXT4_DEFAULT_RESERVE_BLOCKS 8
37/*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */
38#define EXT4_MAX_RESERVE_BLOCKS 1027
39#define EXT4_RESERVE_WINDOW_NOT_ALLOCATED 0
40
41/*
42 * Debug code
43 */
44#ifdef EXT4FS_DEBUG
45#define ext4_debug(f, a...) \
46 do { \
47 printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \
48 __FILE__, __LINE__, __FUNCTION__); \
49 printk (KERN_DEBUG f, ## a); \
50 } while (0)
51#else
52#define ext4_debug(f, a...) do {} while (0)
53#endif
54
55#define EXT4_MULTIBLOCK_ALLOCATOR 1
56
57/* prefer goal again. length */
58#define EXT4_MB_HINT_MERGE 1
59/* blocks already reserved */
60#define EXT4_MB_HINT_RESERVED 2
61/* metadata is being allocated */
62#define EXT4_MB_HINT_METADATA 4
63/* first blocks in the file */
64#define EXT4_MB_HINT_FIRST 8
65/* search for the best chunk */
66#define EXT4_MB_HINT_BEST 16
67/* data is being allocated */
68#define EXT4_MB_HINT_DATA 32
69/* don't preallocate (for tails) */
70#define EXT4_MB_HINT_NOPREALLOC 64
71/* allocate for locality group */
72#define EXT4_MB_HINT_GROUP_ALLOC 128
73/* allocate goal blocks or none */
74#define EXT4_MB_HINT_GOAL_ONLY 256
75/* goal is meaningful */
76#define EXT4_MB_HINT_TRY_GOAL 512
77
78struct ext4_allocation_request {
79 /* target inode for block we're allocating */
80 struct inode *inode;
81 /* logical block in target inode */
82 ext4_lblk_t logical;
83 /* phys. target (a hint) */
84 ext4_fsblk_t goal;
85 /* the closest logical allocated block to the left */
86 ext4_lblk_t lleft;
87 /* phys. block for ^^^ */
88 ext4_fsblk_t pleft;
89 /* the closest logical allocated block to the right */
90 ext4_lblk_t lright;
91 /* phys. block for ^^^ */
92 ext4_fsblk_t pright;
93 /* how many blocks we want to allocate */
94 unsigned long len;
95 /* flags. see above EXT4_MB_HINT_* */
96 unsigned long flags;
97};
98
99/*
100 * Special inodes numbers
101 */
102#define EXT4_BAD_INO 1 /* Bad blocks inode */
103#define EXT4_ROOT_INO 2 /* Root inode */
104#define EXT4_BOOT_LOADER_INO 5 /* Boot loader inode */
105#define EXT4_UNDEL_DIR_INO 6 /* Undelete directory inode */
106#define EXT4_RESIZE_INO 7 /* Reserved group descriptors inode */
107#define EXT4_JOURNAL_INO 8 /* Journal inode */
108
109/* First non-reserved inode for old ext4 filesystems */
110#define EXT4_GOOD_OLD_FIRST_INO 11
111
112/*
113 * Maximal count of links to a file
114 */
115#define EXT4_LINK_MAX 65000
116
117/*
118 * Macro-instructions used to manage several block sizes
119 */
120#define EXT4_MIN_BLOCK_SIZE 1024
121#define EXT4_MAX_BLOCK_SIZE 65536
122#define EXT4_MIN_BLOCK_LOG_SIZE 10
123#ifdef __KERNEL__
124# define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize)
125#else
126# define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size)
127#endif
128#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof (__u32))
129#ifdef __KERNEL__
130# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
131#else
132# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10)
133#endif
134#ifdef __KERNEL__
135#define EXT4_ADDR_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_addr_per_block_bits)
136#define EXT4_INODE_SIZE(s) (EXT4_SB(s)->s_inode_size)
137#define EXT4_FIRST_INO(s) (EXT4_SB(s)->s_first_ino)
138#else
139#define EXT4_INODE_SIZE(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \
140 EXT4_GOOD_OLD_INODE_SIZE : \
141 (s)->s_inode_size)
142#define EXT4_FIRST_INO(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \
143 EXT4_GOOD_OLD_FIRST_INO : \
144 (s)->s_first_ino)
145#endif
146#define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits)))
147
148/*
149 * Structure of a blocks group descriptor
150 */
151struct ext4_group_desc
152{
153 __le32 bg_block_bitmap_lo; /* Blocks bitmap block */
154 __le32 bg_inode_bitmap_lo; /* Inodes bitmap block */
155 __le32 bg_inode_table_lo; /* Inodes table block */
156 __le16 bg_free_blocks_count; /* Free blocks count */
157 __le16 bg_free_inodes_count; /* Free inodes count */
158 __le16 bg_used_dirs_count; /* Directories count */
159 __le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */
160 __u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */
161 __le16 bg_itable_unused; /* Unused inodes count */
162 __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */
163 __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */
164 __le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */
165 __le32 bg_inode_table_hi; /* Inodes table block MSB */
166 __le16 bg_free_blocks_count_hi;/* Free blocks count MSB */
167 __le16 bg_free_inodes_count_hi;/* Free inodes count MSB */
168 __le16 bg_used_dirs_count_hi; /* Directories count MSB */
169 __le16 bg_itable_unused_hi; /* Unused inodes count MSB */
170 __u32 bg_reserved2[3];
171};
172
173#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */
174#define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */
175#define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */
176
177#ifdef __KERNEL__
178#include "ext4_sb.h"
179#endif
180/*
181 * Macro-instructions used to manage group descriptors
182 */
183#define EXT4_MIN_DESC_SIZE 32
184#define EXT4_MIN_DESC_SIZE_64BIT 64
185#define EXT4_MAX_DESC_SIZE EXT4_MIN_BLOCK_SIZE
186#define EXT4_DESC_SIZE(s) (EXT4_SB(s)->s_desc_size)
187#ifdef __KERNEL__
188# define EXT4_BLOCKS_PER_GROUP(s) (EXT4_SB(s)->s_blocks_per_group)
189# define EXT4_DESC_PER_BLOCK(s) (EXT4_SB(s)->s_desc_per_block)
190# define EXT4_INODES_PER_GROUP(s) (EXT4_SB(s)->s_inodes_per_group)
191# define EXT4_DESC_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_desc_per_block_bits)
192#else
193# define EXT4_BLOCKS_PER_GROUP(s) ((s)->s_blocks_per_group)
194# define EXT4_DESC_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / EXT4_DESC_SIZE(s))
195# define EXT4_INODES_PER_GROUP(s) ((s)->s_inodes_per_group)
196#endif
197
198/*
199 * Constants relative to the data blocks
200 */
201#define EXT4_NDIR_BLOCKS 12
202#define EXT4_IND_BLOCK EXT4_NDIR_BLOCKS
203#define EXT4_DIND_BLOCK (EXT4_IND_BLOCK + 1)
204#define EXT4_TIND_BLOCK (EXT4_DIND_BLOCK + 1)
205#define EXT4_N_BLOCKS (EXT4_TIND_BLOCK + 1)
206
207/*
208 * Inode flags
209 */
210#define EXT4_SECRM_FL 0x00000001 /* Secure deletion */
211#define EXT4_UNRM_FL 0x00000002 /* Undelete */
212#define EXT4_COMPR_FL 0x00000004 /* Compress file */
213#define EXT4_SYNC_FL 0x00000008 /* Synchronous updates */
214#define EXT4_IMMUTABLE_FL 0x00000010 /* Immutable file */
215#define EXT4_APPEND_FL 0x00000020 /* writes to file may only append */
216#define EXT4_NODUMP_FL 0x00000040 /* do not dump file */
217#define EXT4_NOATIME_FL 0x00000080 /* do not update atime */
218/* Reserved for compression usage... */
219#define EXT4_DIRTY_FL 0x00000100
220#define EXT4_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */
221#define EXT4_NOCOMPR_FL 0x00000400 /* Don't compress */
222#define EXT4_ECOMPR_FL 0x00000800 /* Compression error */
223/* End compression flags --- maybe not all used */
224#define EXT4_INDEX_FL 0x00001000 /* hash-indexed directory */
225#define EXT4_IMAGIC_FL 0x00002000 /* AFS directory */
226#define EXT4_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */
227#define EXT4_NOTAIL_FL 0x00008000 /* file tail should not be merged */
228#define EXT4_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
229#define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
230#define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */
231#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */
232#define EXT4_EXT_MIGRATE 0x00100000 /* Inode is migrating */
233#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
234
235#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */
236#define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
237
238/*
239 * Inode dynamic state flags
240 */
241#define EXT4_STATE_JDATA 0x00000001 /* journaled data exists */
242#define EXT4_STATE_NEW 0x00000002 /* inode is newly created */
243#define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */
244#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */
245
246/* Used to pass group descriptor data when online resize is done */
247struct ext4_new_group_input {
248 __u32 group; /* Group number for this data */
249 __u64 block_bitmap; /* Absolute block number of block bitmap */
250 __u64 inode_bitmap; /* Absolute block number of inode bitmap */
251 __u64 inode_table; /* Absolute block number of inode table start */
252 __u32 blocks_count; /* Total number of blocks in this group */
253 __u16 reserved_blocks; /* Number of reserved blocks in this group */
254 __u16 unused;
255};
256
257/* The struct ext4_new_group_input in kernel space, with free_blocks_count */
258struct ext4_new_group_data {
259 __u32 group;
260 __u64 block_bitmap;
261 __u64 inode_bitmap;
262 __u64 inode_table;
263 __u32 blocks_count;
264 __u16 reserved_blocks;
265 __u16 unused;
266 __u32 free_blocks_count;
267};
268
269/*
270 * Following is used by preallocation code to tell get_blocks() that we
271 * want uninitialzed extents.
272 */
273#define EXT4_CREATE_UNINITIALIZED_EXT 2
274
275/*
276 * ioctl commands
277 */
278#define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS
279#define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS
280#define EXT4_IOC_GETVERSION _IOR('f', 3, long)
281#define EXT4_IOC_SETVERSION _IOW('f', 4, long)
282#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
283#define EXT4_IOC_GROUP_ADD _IOW('f', 8,struct ext4_new_group_input)
284#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION
285#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION
286#ifdef CONFIG_JBD2_DEBUG
287#define EXT4_IOC_WAIT_FOR_READONLY _IOR('f', 99, long)
288#endif
289#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long)
290#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long)
291#define EXT4_IOC_MIGRATE _IO('f', 7)
292
293/*
294 * ioctl commands in 32 bit emulation
295 */
296#define EXT4_IOC32_GETFLAGS FS_IOC32_GETFLAGS
297#define EXT4_IOC32_SETFLAGS FS_IOC32_SETFLAGS
298#define EXT4_IOC32_GETVERSION _IOR('f', 3, int)
299#define EXT4_IOC32_SETVERSION _IOW('f', 4, int)
300#define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int)
301#define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int)
302#define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int)
303#ifdef CONFIG_JBD2_DEBUG
304#define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int)
305#endif
306#define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION
307#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION
308
309
310/*
311 * Mount options
312 */
313struct ext4_mount_options {
314 unsigned long s_mount_opt;
315 uid_t s_resuid;
316 gid_t s_resgid;
317 unsigned long s_commit_interval;
318#ifdef CONFIG_QUOTA
319 int s_jquota_fmt;
320 char *s_qf_names[MAXQUOTAS];
321#endif
322};
323
324/*
325 * Structure of an inode on the disk
326 */
327struct ext4_inode {
328 __le16 i_mode; /* File mode */
329 __le16 i_uid; /* Low 16 bits of Owner Uid */
330 __le32 i_size_lo; /* Size in bytes */
331 __le32 i_atime; /* Access time */
332 __le32 i_ctime; /* Inode Change time */
333 __le32 i_mtime; /* Modification time */
334 __le32 i_dtime; /* Deletion Time */
335 __le16 i_gid; /* Low 16 bits of Group Id */
336 __le16 i_links_count; /* Links count */
337 __le32 i_blocks_lo; /* Blocks count */
338 __le32 i_flags; /* File flags */
339 union {
340 struct {
341 __le32 l_i_version;
342 } linux1;
343 struct {
344 __u32 h_i_translator;
345 } hurd1;
346 struct {
347 __u32 m_i_reserved1;
348 } masix1;
349 } osd1; /* OS dependent 1 */
350 __le32 i_block[EXT4_N_BLOCKS];/* Pointers to blocks */
351 __le32 i_generation; /* File version (for NFS) */
352 __le32 i_file_acl_lo; /* File ACL */
353 __le32 i_size_high;
354 __le32 i_obso_faddr; /* Obsoleted fragment address */
355 union {
356 struct {
357 __le16 l_i_blocks_high; /* were l_i_reserved1 */
358 __le16 l_i_file_acl_high;
359 __le16 l_i_uid_high; /* these 2 fields */
360 __le16 l_i_gid_high; /* were reserved2[0] */
361 __u32 l_i_reserved2;
362 } linux2;
363 struct {
364 __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */
365 __u16 h_i_mode_high;
366 __u16 h_i_uid_high;
367 __u16 h_i_gid_high;
368 __u32 h_i_author;
369 } hurd2;
370 struct {
371 __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */
372 __le16 m_i_file_acl_high;
373 __u32 m_i_reserved2[2];
374 } masix2;
375 } osd2; /* OS dependent 2 */
376 __le16 i_extra_isize;
377 __le16 i_pad1;
378 __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */
379 __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */
380 __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */
381 __le32 i_crtime; /* File Creation time */
382 __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
383 __le32 i_version_hi; /* high 32 bits for 64-bit version */
384};
385
386
387#define EXT4_EPOCH_BITS 2
388#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
389#define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS)
390
391/*
392 * Extended fields will fit into an inode if the filesystem was formatted
393 * with large inodes (-I 256 or larger) and there are not currently any EAs
394 * consuming all of the available space. For new inodes we always reserve
395 * enough space for the kernel's known extended fields, but for inodes
396 * created with an old kernel this might not have been the case. None of
397 * the extended inode fields is critical for correct filesystem operation.
398 * This macro checks if a certain field fits in the inode. Note that
399 * inode-size = GOOD_OLD_INODE_SIZE + i_extra_isize
400 */
401#define EXT4_FITS_IN_INODE(ext4_inode, einode, field) \
402 ((offsetof(typeof(*ext4_inode), field) + \
403 sizeof((ext4_inode)->field)) \
404 <= (EXT4_GOOD_OLD_INODE_SIZE + \
405 (einode)->i_extra_isize)) \
406
407static inline __le32 ext4_encode_extra_time(struct timespec *time)
408{
409 return cpu_to_le32((sizeof(time->tv_sec) > 4 ?
410 time->tv_sec >> 32 : 0) |
411 ((time->tv_nsec << 2) & EXT4_NSEC_MASK));
412}
413
414static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
415{
416 if (sizeof(time->tv_sec) > 4)
417 time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK)
418 << 32;
419 time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> 2;
420}
421
422#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \
423do { \
424 (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \
425 if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
426 (raw_inode)->xtime ## _extra = \
427 ext4_encode_extra_time(&(inode)->xtime); \
428} while (0)
429
430#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \
431do { \
432 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
433 (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \
434 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
435 (raw_inode)->xtime ## _extra = \
436 ext4_encode_extra_time(&(einode)->xtime); \
437} while (0)
438
439#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \
440do { \
441 (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \
442 if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
443 ext4_decode_extra_time(&(inode)->xtime, \
444 raw_inode->xtime ## _extra); \
445} while (0)
446
447#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \
448do { \
449 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
450 (einode)->xtime.tv_sec = \
451 (signed)le32_to_cpu((raw_inode)->xtime); \
452 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
453 ext4_decode_extra_time(&(einode)->xtime, \
454 raw_inode->xtime ## _extra); \
455} while (0)
456
457#define i_disk_version osd1.linux1.l_i_version
458
459#if defined(__KERNEL__) || defined(__linux__)
460#define i_reserved1 osd1.linux1.l_i_reserved1
461#define i_file_acl_high osd2.linux2.l_i_file_acl_high
462#define i_blocks_high osd2.linux2.l_i_blocks_high
463#define i_uid_low i_uid
464#define i_gid_low i_gid
465#define i_uid_high osd2.linux2.l_i_uid_high
466#define i_gid_high osd2.linux2.l_i_gid_high
467#define i_reserved2 osd2.linux2.l_i_reserved2
468
469#elif defined(__GNU__)
470
471#define i_translator osd1.hurd1.h_i_translator
472#define i_uid_high osd2.hurd2.h_i_uid_high
473#define i_gid_high osd2.hurd2.h_i_gid_high
474#define i_author osd2.hurd2.h_i_author
475
476#elif defined(__masix__)
477
478#define i_reserved1 osd1.masix1.m_i_reserved1
479#define i_file_acl_high osd2.masix2.m_i_file_acl_high
480#define i_reserved2 osd2.masix2.m_i_reserved2
481
482#endif /* defined(__KERNEL__) || defined(__linux__) */
483
484/*
485 * File system states
486 */
487#define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */
488#define EXT4_ERROR_FS 0x0002 /* Errors detected */
489#define EXT4_ORPHAN_FS 0x0004 /* Orphans being recovered */
490
491/*
492 * Misc. filesystem flags
493 */
494#define EXT2_FLAGS_SIGNED_HASH 0x0001 /* Signed dirhash in use */
495#define EXT2_FLAGS_UNSIGNED_HASH 0x0002 /* Unsigned dirhash in use */
496#define EXT2_FLAGS_TEST_FILESYS 0x0004 /* to test development code */
497
498/*
499 * Mount flags
500 */
501#define EXT4_MOUNT_CHECK 0x00001 /* Do mount-time checks */
502#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */
503#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */
504#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */
505#define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */
506#define EXT4_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */
507#define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */
508#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */
509#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
510#define EXT4_MOUNT_ABORT 0x00200 /* Fatal error detected */
511#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
512#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */
513#define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */
514#define EXT4_MOUNT_WRITEBACK_DATA 0x00C00 /* No data ordering */
515#define EXT4_MOUNT_UPDATE_JOURNAL 0x01000 /* Update the journal format */
516#define EXT4_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */
517#define EXT4_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */
518#define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
519#define EXT4_MOUNT_RESERVATION 0x10000 /* Preallocation */
520#define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */
521#define EXT4_MOUNT_NOBH 0x40000 /* No bufferheads */
522#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
523#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
524#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
525#define EXT4_MOUNT_EXTENTS 0x400000 /* Extents support */
526#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
527#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
528#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
529#define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */
530/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
531#ifndef _LINUX_EXT2_FS_H
532#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
533#define set_opt(o, opt) o |= EXT4_MOUNT_##opt
534#define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \
535 EXT4_MOUNT_##opt)
536#else
537#define EXT2_MOUNT_NOLOAD EXT4_MOUNT_NOLOAD
538#define EXT2_MOUNT_ABORT EXT4_MOUNT_ABORT
539#define EXT2_MOUNT_DATA_FLAGS EXT4_MOUNT_DATA_FLAGS
540#endif
541
542#define ext4_set_bit ext2_set_bit
543#define ext4_set_bit_atomic ext2_set_bit_atomic
544#define ext4_clear_bit ext2_clear_bit
545#define ext4_clear_bit_atomic ext2_clear_bit_atomic
546#define ext4_test_bit ext2_test_bit
547#define ext4_find_first_zero_bit ext2_find_first_zero_bit
548#define ext4_find_next_zero_bit ext2_find_next_zero_bit
549#define ext4_find_next_bit ext2_find_next_bit
550
551/*
552 * Maximal mount counts between two filesystem checks
553 */
554#define EXT4_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */
555#define EXT4_DFL_CHECKINTERVAL 0 /* Don't use interval check */
556
557/*
558 * Behaviour when detecting errors
559 */
560#define EXT4_ERRORS_CONTINUE 1 /* Continue execution */
561#define EXT4_ERRORS_RO 2 /* Remount fs read-only */
562#define EXT4_ERRORS_PANIC 3 /* Panic */
563#define EXT4_ERRORS_DEFAULT EXT4_ERRORS_CONTINUE
564
565/*
566 * Structure of the super block
567 */
568struct ext4_super_block {
569/*00*/ __le32 s_inodes_count; /* Inodes count */
570 __le32 s_blocks_count_lo; /* Blocks count */
571 __le32 s_r_blocks_count_lo; /* Reserved blocks count */
572 __le32 s_free_blocks_count_lo; /* Free blocks count */
573/*10*/ __le32 s_free_inodes_count; /* Free inodes count */
574 __le32 s_first_data_block; /* First Data Block */
575 __le32 s_log_block_size; /* Block size */
576 __le32 s_obso_log_frag_size; /* Obsoleted fragment size */
577/*20*/ __le32 s_blocks_per_group; /* # Blocks per group */
578 __le32 s_obso_frags_per_group; /* Obsoleted fragments per group */
579 __le32 s_inodes_per_group; /* # Inodes per group */
580 __le32 s_mtime; /* Mount time */
581/*30*/ __le32 s_wtime; /* Write time */
582 __le16 s_mnt_count; /* Mount count */
583 __le16 s_max_mnt_count; /* Maximal mount count */
584 __le16 s_magic; /* Magic signature */
585 __le16 s_state; /* File system state */
586 __le16 s_errors; /* Behaviour when detecting errors */
587 __le16 s_minor_rev_level; /* minor revision level */
588/*40*/ __le32 s_lastcheck; /* time of last check */
589 __le32 s_checkinterval; /* max. time between checks */
590 __le32 s_creator_os; /* OS */
591 __le32 s_rev_level; /* Revision level */
592/*50*/ __le16 s_def_resuid; /* Default uid for reserved blocks */
593 __le16 s_def_resgid; /* Default gid for reserved blocks */
594 /*
595 * These fields are for EXT4_DYNAMIC_REV superblocks only.
596 *
597 * Note: the difference between the compatible feature set and
598 * the incompatible feature set is that if there is a bit set
599 * in the incompatible feature set that the kernel doesn't
600 * know about, it should refuse to mount the filesystem.
601 *
602 * e2fsck's requirements are more strict; if it doesn't know
603 * about a feature in either the compatible or incompatible
604 * feature set, it must abort and not try to meddle with
605 * things it doesn't understand...
606 */
607 __le32 s_first_ino; /* First non-reserved inode */
608 __le16 s_inode_size; /* size of inode structure */
609 __le16 s_block_group_nr; /* block group # of this superblock */
610 __le32 s_feature_compat; /* compatible feature set */
611/*60*/ __le32 s_feature_incompat; /* incompatible feature set */
612 __le32 s_feature_ro_compat; /* readonly-compatible feature set */
613/*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */
614/*78*/ char s_volume_name[16]; /* volume name */
615/*88*/ char s_last_mounted[64]; /* directory where last mounted */
616/*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */
617 /*
618 * Performance hints. Directory preallocation should only
619 * happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on.
620 */
621 __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/
622 __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */
623 __le16 s_reserved_gdt_blocks; /* Per group desc for online growth */
624 /*
625 * Journaling support valid if EXT4_FEATURE_COMPAT_HAS_JOURNAL set.
626 */
627/*D0*/ __u8 s_journal_uuid[16]; /* uuid of journal superblock */
628/*E0*/ __le32 s_journal_inum; /* inode number of journal file */
629 __le32 s_journal_dev; /* device number of journal file */
630 __le32 s_last_orphan; /* start of list of inodes to delete */
631 __le32 s_hash_seed[4]; /* HTREE hash seed */
632 __u8 s_def_hash_version; /* Default hash version to use */
633 __u8 s_reserved_char_pad;
634 __le16 s_desc_size; /* size of group descriptor */
635/*100*/ __le32 s_default_mount_opts;
636 __le32 s_first_meta_bg; /* First metablock block group */
637 __le32 s_mkfs_time; /* When the filesystem was created */
638 __le32 s_jnl_blocks[17]; /* Backup of the journal inode */
639 /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */
640/*150*/ __le32 s_blocks_count_hi; /* Blocks count */
641 __le32 s_r_blocks_count_hi; /* Reserved blocks count */
642 __le32 s_free_blocks_count_hi; /* Free blocks count */
643 __le16 s_min_extra_isize; /* All inodes have at least # bytes */
644 __le16 s_want_extra_isize; /* New inodes should reserve # bytes */
645 __le32 s_flags; /* Miscellaneous flags */
646 __le16 s_raid_stride; /* RAID stride */
647 __le16 s_mmp_interval; /* # seconds to wait in MMP checking */
648 __le64 s_mmp_block; /* Block for multi-mount protection */
649 __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
650 __u32 s_reserved[163]; /* Padding to the end of the block */
651};
652
653#ifdef __KERNEL__
654static inline struct ext4_sb_info * EXT4_SB(struct super_block *sb)
655{
656 return sb->s_fs_info;
657}
658static inline struct ext4_inode_info *EXT4_I(struct inode *inode)
659{
660 return container_of(inode, struct ext4_inode_info, vfs_inode);
661}
662
663static inline struct timespec ext4_current_time(struct inode *inode)
664{
665 return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ?
666 current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
667}
668
669
670static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
671{
672 return ino == EXT4_ROOT_INO ||
673 ino == EXT4_JOURNAL_INO ||
674 ino == EXT4_RESIZE_INO ||
675 (ino >= EXT4_FIRST_INO(sb) &&
676 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
677}
678#else
679/* Assume that user mode programs are passing in an ext4fs superblock, not
680 * a kernel struct super_block. This will allow us to call the feature-test
681 * macros from user land. */
682#define EXT4_SB(sb) (sb)
683#endif
684
685#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime
686
687/*
688 * Codes for operating systems
689 */
690#define EXT4_OS_LINUX 0
691#define EXT4_OS_HURD 1
692#define EXT4_OS_MASIX 2
693#define EXT4_OS_FREEBSD 3
694#define EXT4_OS_LITES 4
695
696/*
697 * Revision levels
698 */
699#define EXT4_GOOD_OLD_REV 0 /* The good old (original) format */
700#define EXT4_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */
701
702#define EXT4_CURRENT_REV EXT4_GOOD_OLD_REV
703#define EXT4_MAX_SUPP_REV EXT4_DYNAMIC_REV
704
705#define EXT4_GOOD_OLD_INODE_SIZE 128
706
707/*
708 * Feature set definitions
709 */
710
711#define EXT4_HAS_COMPAT_FEATURE(sb,mask) \
712 ( EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) )
713#define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \
714 ( EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) )
715#define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \
716 ( EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) )
717#define EXT4_SET_COMPAT_FEATURE(sb,mask) \
718 EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask)
719#define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \
720 EXT4_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask)
721#define EXT4_SET_INCOMPAT_FEATURE(sb,mask) \
722 EXT4_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask)
723#define EXT4_CLEAR_COMPAT_FEATURE(sb,mask) \
724 EXT4_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask)
725#define EXT4_CLEAR_RO_COMPAT_FEATURE(sb,mask) \
726 EXT4_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask)
727#define EXT4_CLEAR_INCOMPAT_FEATURE(sb,mask) \
728 EXT4_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask)
729
730#define EXT4_FEATURE_COMPAT_DIR_PREALLOC 0x0001
731#define EXT4_FEATURE_COMPAT_IMAGIC_INODES 0x0002
732#define EXT4_FEATURE_COMPAT_HAS_JOURNAL 0x0004
733#define EXT4_FEATURE_COMPAT_EXT_ATTR 0x0008
734#define EXT4_FEATURE_COMPAT_RESIZE_INODE 0x0010
735#define EXT4_FEATURE_COMPAT_DIR_INDEX 0x0020
736
737#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
738#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
739#define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
740#define EXT4_FEATURE_RO_COMPAT_HUGE_FILE 0x0008
741#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010
742#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020
743#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
744
745#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
746#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
747#define EXT4_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */
748#define EXT4_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */
749#define EXT4_FEATURE_INCOMPAT_META_BG 0x0010
750#define EXT4_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */
751#define EXT4_FEATURE_INCOMPAT_64BIT 0x0080
752#define EXT4_FEATURE_INCOMPAT_MMP 0x0100
753#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200
754
755#define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
756#define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
757 EXT4_FEATURE_INCOMPAT_RECOVER| \
758 EXT4_FEATURE_INCOMPAT_META_BG| \
759 EXT4_FEATURE_INCOMPAT_EXTENTS| \
760 EXT4_FEATURE_INCOMPAT_64BIT| \
761 EXT4_FEATURE_INCOMPAT_FLEX_BG)
762#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
763 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
764 EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
765 EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
766 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
767 EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\
768 EXT4_FEATURE_RO_COMPAT_HUGE_FILE)
769
770/*
771 * Default values for user and/or group using reserved blocks
772 */
773#define EXT4_DEF_RESUID 0
774#define EXT4_DEF_RESGID 0
775
776/*
777 * Default mount options
778 */
779#define EXT4_DEFM_DEBUG 0x0001
780#define EXT4_DEFM_BSDGROUPS 0x0002
781#define EXT4_DEFM_XATTR_USER 0x0004
782#define EXT4_DEFM_ACL 0x0008
783#define EXT4_DEFM_UID16 0x0010
784#define EXT4_DEFM_JMODE 0x0060
785#define EXT4_DEFM_JMODE_DATA 0x0020
786#define EXT4_DEFM_JMODE_ORDERED 0x0040
787#define EXT4_DEFM_JMODE_WBACK 0x0060
788
789/*
790 * Structure of a directory entry
791 */
792#define EXT4_NAME_LEN 255
793
794struct ext4_dir_entry {
795 __le32 inode; /* Inode number */
796 __le16 rec_len; /* Directory entry length */
797 __le16 name_len; /* Name length */
798 char name[EXT4_NAME_LEN]; /* File name */
799};
800
801/*
802 * The new version of the directory entry. Since EXT4 structures are
803 * stored in intel byte order, and the name_len field could never be
804 * bigger than 255 chars, it's safe to reclaim the extra byte for the
805 * file_type field.
806 */
807struct ext4_dir_entry_2 {
808 __le32 inode; /* Inode number */
809 __le16 rec_len; /* Directory entry length */
810 __u8 name_len; /* Name length */
811 __u8 file_type;
812 char name[EXT4_NAME_LEN]; /* File name */
813};
814
815/*
816 * Ext4 directory file types. Only the low 3 bits are used. The
817 * other bits are reserved for now.
818 */
819#define EXT4_FT_UNKNOWN 0
820#define EXT4_FT_REG_FILE 1
821#define EXT4_FT_DIR 2
822#define EXT4_FT_CHRDEV 3
823#define EXT4_FT_BLKDEV 4
824#define EXT4_FT_FIFO 5
825#define EXT4_FT_SOCK 6
826#define EXT4_FT_SYMLINK 7
827
828#define EXT4_FT_MAX 8
829
830/*
831 * EXT4_DIR_PAD defines the directory entries boundaries
832 *
833 * NOTE: It must be a multiple of 4
834 */
835#define EXT4_DIR_PAD 4
836#define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1)
837#define EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \
838 ~EXT4_DIR_ROUND)
839#define EXT4_MAX_REC_LEN ((1<<16)-1)
840
841static inline unsigned ext4_rec_len_from_disk(__le16 dlen)
842{
843 unsigned len = le16_to_cpu(dlen);
844
845 if (len == EXT4_MAX_REC_LEN)
846 return 1 << 16;
847 return len;
848}
849
850static inline __le16 ext4_rec_len_to_disk(unsigned len)
851{
852 if (len == (1 << 16))
853 return cpu_to_le16(EXT4_MAX_REC_LEN);
854 else if (len > (1 << 16))
855 BUG();
856 return cpu_to_le16(len);
857}
858
859/*
860 * Hash Tree Directory indexing
861 * (c) Daniel Phillips, 2001
862 */
863
864#define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \
865 EXT4_FEATURE_COMPAT_DIR_INDEX) && \
866 (EXT4_I(dir)->i_flags & EXT4_INDEX_FL))
867#define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX)
868#define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
869
870/* Legal values for the dx_root hash_version field: */
871
872#define DX_HASH_LEGACY 0
873#define DX_HASH_HALF_MD4 1
874#define DX_HASH_TEA 2
875
876#ifdef __KERNEL__
877
878/* hash info structure used by the directory hash */
879struct dx_hash_info
880{
881 u32 hash;
882 u32 minor_hash;
883 int hash_version;
884 u32 *seed;
885};
886
887#define EXT4_HTREE_EOF 0x7fffffff
888
889/*
890 * Control parameters used by ext4_htree_next_block
891 */
892#define HASH_NB_ALWAYS 1
893
894
895/*
896 * Describe an inode's exact location on disk and in memory
897 */
898struct ext4_iloc
899{
900 struct buffer_head *bh;
901 unsigned long offset;
902 ext4_group_t block_group;
903};
904
905static inline struct ext4_inode *ext4_raw_inode(struct ext4_iloc *iloc)
906{
907 return (struct ext4_inode *) (iloc->bh->b_data + iloc->offset);
908}
909
910/*
911 * This structure is stuffed into the struct file's private_data field
912 * for directories. It is where we put information so that we can do
913 * readdir operations in hash tree order.
914 */
915struct dir_private_info {
916 struct rb_root root;
917 struct rb_node *curr_node;
918 struct fname *extra_fname;
919 loff_t last_pos;
920 __u32 curr_hash;
921 __u32 curr_minor_hash;
922 __u32 next_hash;
923};
924
925/* calculate the first block number of the group */
926static inline ext4_fsblk_t
927ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
928{
929 return group_no * (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) +
930 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
931}
932
933/*
934 * Special error return code only used by dx_probe() and its callers.
935 */
936#define ERR_BAD_DX_DIR -75000
937
938void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
939 unsigned long *blockgrpp, ext4_grpblk_t *offsetp);
940
941/*
942 * Function prototypes
943 */
944
945/*
946 * Ok, these declarations are also in <linux/kernel.h> but none of the
947 * ext4 source programs needs to include it so they are duplicated here.
948 */
949# define NORET_TYPE /**/
950# define ATTRIB_NORET __attribute__((noreturn))
951# define NORET_AND noreturn,
952
953/* balloc.c */
954extern unsigned int ext4_block_group(struct super_block *sb,
955 ext4_fsblk_t blocknr);
956extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb,
957 ext4_fsblk_t blocknr);
958extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group);
959extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
960 ext4_group_t group);
961extern ext4_fsblk_t ext4_new_block (handle_t *handle, struct inode *inode,
962 ext4_fsblk_t goal, int *errp);
963extern ext4_fsblk_t ext4_new_blocks (handle_t *handle, struct inode *inode,
964 ext4_fsblk_t goal, unsigned long *count, int *errp);
965extern ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
966 ext4_fsblk_t goal, unsigned long *count, int *errp);
967extern void ext4_free_blocks (handle_t *handle, struct inode *inode,
968 ext4_fsblk_t block, unsigned long count, int metadata);
969extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb,
970 ext4_fsblk_t block, unsigned long count,
971 unsigned long *pdquot_freed_blocks);
972extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *);
973extern void ext4_check_blocks_bitmap (struct super_block *);
974extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
975 ext4_group_t block_group,
976 struct buffer_head ** bh);
977extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
978extern void ext4_init_block_alloc_info(struct inode *);
979extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv);
980
981/* dir.c */
982extern int ext4_check_dir_entry(const char *, struct inode *,
983 struct ext4_dir_entry_2 *,
984 struct buffer_head *, unsigned long);
985extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
986 __u32 minor_hash,
987 struct ext4_dir_entry_2 *dirent);
988extern void ext4_htree_free_dir_info(struct dir_private_info *p);
989
990/* fsync.c */
991extern int ext4_sync_file (struct file *, struct dentry *, int);
992
993/* hash.c */
994extern int ext4fs_dirhash(const char *name, int len, struct
995 dx_hash_info *hinfo);
996
997/* ialloc.c */
998extern struct inode * ext4_new_inode (handle_t *, struct inode *, int);
999extern void ext4_free_inode (handle_t *, struct inode *);
1000extern struct inode * ext4_orphan_get (struct super_block *, unsigned long);
1001extern unsigned long ext4_count_free_inodes (struct super_block *);
1002extern unsigned long ext4_count_dirs (struct super_block *);
1003extern void ext4_check_inodes_bitmap (struct super_block *);
1004extern unsigned long ext4_count_free (struct buffer_head *, unsigned);
1005
1006/* mballoc.c */
1007extern long ext4_mb_stats;
1008extern long ext4_mb_max_to_scan;
1009extern int ext4_mb_init(struct super_block *, int);
1010extern int ext4_mb_release(struct super_block *);
1011extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
1012 struct ext4_allocation_request *, int *);
1013extern int ext4_mb_reserve_blocks(struct super_block *, int);
1014extern void ext4_mb_discard_inode_preallocations(struct inode *);
1015extern int __init init_ext4_mballoc(void);
1016extern void exit_ext4_mballoc(void);
1017extern void ext4_mb_free_blocks(handle_t *, struct inode *,
1018 unsigned long, unsigned long, int, unsigned long *);
1019
1020
1021/* inode.c */
1022int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
1023 struct buffer_head *bh, ext4_fsblk_t blocknr);
1024struct buffer_head *ext4_getblk(handle_t *, struct inode *,
1025 ext4_lblk_t, int, int *);
1026struct buffer_head *ext4_bread(handle_t *, struct inode *,
1027 ext4_lblk_t, int, int *);
1028int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
1029 ext4_lblk_t iblock, unsigned long maxblocks,
1030 struct buffer_head *bh_result,
1031 int create, int extend_disksize);
1032
1033extern struct inode *ext4_iget(struct super_block *, unsigned long);
1034extern int ext4_write_inode (struct inode *, int);
1035extern int ext4_setattr (struct dentry *, struct iattr *);
1036extern void ext4_delete_inode (struct inode *);
1037extern int ext4_sync_inode (handle_t *, struct inode *);
1038extern void ext4_discard_reservation (struct inode *);
1039extern void ext4_dirty_inode(struct inode *);
1040extern int ext4_change_inode_journal_flag(struct inode *, int);
1041extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
1042extern void ext4_truncate (struct inode *);
1043extern void ext4_set_inode_flags(struct inode *);
1044extern void ext4_get_inode_flags(struct ext4_inode_info *);
1045extern void ext4_set_aops(struct inode *inode);
1046extern int ext4_writepage_trans_blocks(struct inode *);
1047extern int ext4_block_truncate_page(handle_t *handle, struct page *page,
1048 struct address_space *mapping, loff_t from);
1049
1050/* ioctl.c */
1051extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
1052extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long);
1053
1054/* migrate.c */
1055extern int ext4_ext_migrate(struct inode *, struct file *, unsigned int,
1056 unsigned long);
1057/* namei.c */
1058extern int ext4_orphan_add(handle_t *, struct inode *);
1059extern int ext4_orphan_del(handle_t *, struct inode *);
1060extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
1061 __u32 start_minor_hash, __u32 *next_hash);
1062
1063/* resize.c */
1064extern int ext4_group_add(struct super_block *sb,
1065 struct ext4_new_group_data *input);
1066extern int ext4_group_extend(struct super_block *sb,
1067 struct ext4_super_block *es,
1068 ext4_fsblk_t n_blocks_count);
1069
1070/* super.c */
1071extern void ext4_error (struct super_block *, const char *, const char *, ...)
1072 __attribute__ ((format (printf, 3, 4)));
1073extern void __ext4_std_error (struct super_block *, const char *, int);
1074extern void ext4_abort (struct super_block *, const char *, const char *, ...)
1075 __attribute__ ((format (printf, 3, 4)));
1076extern void ext4_warning (struct super_block *, const char *, const char *, ...)
1077 __attribute__ ((format (printf, 3, 4)));
1078extern void ext4_update_dynamic_rev (struct super_block *sb);
1079extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
1080 __u32 compat);
1081extern int ext4_update_rocompat_feature(handle_t *handle,
1082 struct super_block *sb, __u32 rocompat);
1083extern int ext4_update_incompat_feature(handle_t *handle,
1084 struct super_block *sb, __u32 incompat);
1085extern ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
1086 struct ext4_group_desc *bg);
1087extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
1088 struct ext4_group_desc *bg);
1089extern ext4_fsblk_t ext4_inode_table(struct super_block *sb,
1090 struct ext4_group_desc *bg);
1091extern void ext4_block_bitmap_set(struct super_block *sb,
1092 struct ext4_group_desc *bg, ext4_fsblk_t blk);
1093extern void ext4_inode_bitmap_set(struct super_block *sb,
1094 struct ext4_group_desc *bg, ext4_fsblk_t blk);
1095extern void ext4_inode_table_set(struct super_block *sb,
1096 struct ext4_group_desc *bg, ext4_fsblk_t blk);
1097
1098static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
1099{
1100 return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) |
1101 le32_to_cpu(es->s_blocks_count_lo);
1102}
1103
1104static inline ext4_fsblk_t ext4_r_blocks_count(struct ext4_super_block *es)
1105{
1106 return ((ext4_fsblk_t)le32_to_cpu(es->s_r_blocks_count_hi) << 32) |
1107 le32_to_cpu(es->s_r_blocks_count_lo);
1108}
1109
1110static inline ext4_fsblk_t ext4_free_blocks_count(struct ext4_super_block *es)
1111{
1112 return ((ext4_fsblk_t)le32_to_cpu(es->s_free_blocks_count_hi) << 32) |
1113 le32_to_cpu(es->s_free_blocks_count_lo);
1114}
1115
1116static inline void ext4_blocks_count_set(struct ext4_super_block *es,
1117 ext4_fsblk_t blk)
1118{
1119 es->s_blocks_count_lo = cpu_to_le32((u32)blk);
1120 es->s_blocks_count_hi = cpu_to_le32(blk >> 32);
1121}
1122
1123static inline void ext4_free_blocks_count_set(struct ext4_super_block *es,
1124 ext4_fsblk_t blk)
1125{
1126 es->s_free_blocks_count_lo = cpu_to_le32((u32)blk);
1127 es->s_free_blocks_count_hi = cpu_to_le32(blk >> 32);
1128}
1129
1130static inline void ext4_r_blocks_count_set(struct ext4_super_block *es,
1131 ext4_fsblk_t blk)
1132{
1133 es->s_r_blocks_count_lo = cpu_to_le32((u32)blk);
1134 es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32);
1135}
1136
1137static inline loff_t ext4_isize(struct ext4_inode *raw_inode)
1138{
1139 return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) |
1140 le32_to_cpu(raw_inode->i_size_lo);
1141}
1142
1143static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size)
1144{
1145 raw_inode->i_size_lo = cpu_to_le32(i_size);
1146 raw_inode->i_size_high = cpu_to_le32(i_size >> 32);
1147}
1148
1149static inline
1150struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
1151 ext4_group_t group)
1152{
1153 struct ext4_group_info ***grp_info;
1154 long indexv, indexh;
1155 grp_info = EXT4_SB(sb)->s_group_info;
1156 indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb));
1157 indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1);
1158 return grp_info[indexv][indexh];
1159}
1160
1161
1162#define ext4_std_error(sb, errno) \
1163do { \
1164 if ((errno)) \
1165 __ext4_std_error((sb), __FUNCTION__, (errno)); \
1166} while (0)
1167
1168/*
1169 * Inodes and files operations
1170 */
1171
1172/* dir.c */
1173extern const struct file_operations ext4_dir_operations;
1174
1175/* file.c */
1176extern const struct inode_operations ext4_file_inode_operations;
1177extern const struct file_operations ext4_file_operations;
1178
1179/* namei.c */
1180extern const struct inode_operations ext4_dir_inode_operations;
1181extern const struct inode_operations ext4_special_inode_operations;
1182
1183/* symlink.c */
1184extern const struct inode_operations ext4_symlink_inode_operations;
1185extern const struct inode_operations ext4_fast_symlink_inode_operations;
1186
1187/* extents.c */
1188extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
1189extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
1190extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
1191 ext4_lblk_t iblock,
1192 unsigned long max_blocks, struct buffer_head *bh_result,
1193 int create, int extend_disksize);
1194extern void ext4_ext_truncate(struct inode *, struct page *);
1195extern void ext4_ext_init(struct super_block *);
1196extern void ext4_ext_release(struct super_block *);
1197extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
1198 loff_t len);
1199extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
1200 sector_t block, unsigned long max_blocks,
1201 struct buffer_head *bh, int create,
1202 int extend_disksize);
1203#endif /* __KERNEL__ */
1204
1205#endif /* _EXT4_H */
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
new file mode 100644
index 000000000000..75333b595fab
--- /dev/null
+++ b/fs/ext4/ext4_extents.h
@@ -0,0 +1,232 @@
1/*
2 * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
3 * Written by Alex Tomas <alex@clusterfs.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public Licens
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
17 */
18
19#ifndef _EXT4_EXTENTS
20#define _EXT4_EXTENTS
21
22#include "ext4.h"
23
24/*
25 * With AGGRESSIVE_TEST defined, the capacity of index/leaf blocks
26 * becomes very small, so index split, in-depth growing and
27 * other hard changes happen much more often.
28 * This is for debug purposes only.
29 */
30#define AGGRESSIVE_TEST_
31
32/*
33 * With EXTENTS_STATS defined, the number of blocks and extents
34 * are collected in the truncate path. They'll be shown at
35 * umount time.
36 */
37#define EXTENTS_STATS__
38
39/*
40 * If CHECK_BINSEARCH is defined, then the results of the binary search
41 * will also be checked by linear search.
42 */
43#define CHECK_BINSEARCH__
44
45/*
46 * If EXT_DEBUG is defined you can use the 'extdebug' mount option
47 * to get lots of info about what's going on.
48 */
49#define EXT_DEBUG__
50#ifdef EXT_DEBUG
51#define ext_debug(a...) printk(a)
52#else
53#define ext_debug(a...)
54#endif
55
56/*
57 * If EXT_STATS is defined then stats numbers are collected.
58 * These number will be displayed at umount time.
59 */
60#define EXT_STATS_
61
62
63/*
64 * ext4_inode has i_block array (60 bytes total).
65 * The first 12 bytes store ext4_extent_header;
66 * the remainder stores an array of ext4_extent.
67 */
68
69/*
70 * This is the extent on-disk structure.
71 * It's used at the bottom of the tree.
72 */
73struct ext4_extent {
74 __le32 ee_block; /* first logical block extent covers */
75 __le16 ee_len; /* number of blocks covered by extent */
76 __le16 ee_start_hi; /* high 16 bits of physical block */
77 __le32 ee_start_lo; /* low 32 bits of physical block */
78};
79
80/*
81 * This is index on-disk structure.
82 * It's used at all the levels except the bottom.
83 */
84struct ext4_extent_idx {
85 __le32 ei_block; /* index covers logical blocks from 'block' */
86 __le32 ei_leaf_lo; /* pointer to the physical block of the next *
87 * level. leaf or next index could be there */
88 __le16 ei_leaf_hi; /* high 16 bits of physical block */
89 __u16 ei_unused;
90};
91
92/*
93 * Each block (leaves and indexes), even inode-stored has header.
94 */
95struct ext4_extent_header {
96 __le16 eh_magic; /* probably will support different formats */
97 __le16 eh_entries; /* number of valid entries */
98 __le16 eh_max; /* capacity of store in entries */
99 __le16 eh_depth; /* has tree real underlying blocks? */
100 __le32 eh_generation; /* generation of the tree */
101};
102
103#define EXT4_EXT_MAGIC cpu_to_le16(0xf30a)
104
105/*
106 * Array of ext4_ext_path contains path to some extent.
107 * Creation/lookup routines use it for traversal/splitting/etc.
108 * Truncate uses it to simulate recursive walking.
109 */
110struct ext4_ext_path {
111 ext4_fsblk_t p_block;
112 __u16 p_depth;
113 struct ext4_extent *p_ext;
114 struct ext4_extent_idx *p_idx;
115 struct ext4_extent_header *p_hdr;
116 struct buffer_head *p_bh;
117};
118
119/*
120 * structure for external API
121 */
122
123#define EXT4_EXT_CACHE_NO 0
124#define EXT4_EXT_CACHE_GAP 1
125#define EXT4_EXT_CACHE_EXTENT 2
126
127
128#define EXT_MAX_BLOCK 0xffffffff
129
130/*
131 * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an
132 * initialized extent. This is 2^15 and not (2^16 - 1), since we use the
133 * MSB of ee_len field in the extent datastructure to signify if this
134 * particular extent is an initialized extent or an uninitialized (i.e.
135 * preallocated).
136 * EXT_UNINIT_MAX_LEN is the maximum number of blocks we can have in an
137 * uninitialized extent.
138 * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an
139 * uninitialized one. In other words, if MSB of ee_len is set, it is an
140 * uninitialized extent with only one special scenario when ee_len = 0x8000.
141 * In this case we can not have an uninitialized extent of zero length and
142 * thus we make it as a special case of initialized extent with 0x8000 length.
143 * This way we get better extent-to-group alignment for initialized extents.
144 * Hence, the maximum number of blocks we can have in an *initialized*
145 * extent is 2^15 (32768) and in an *uninitialized* extent is 2^15-1 (32767).
146 */
147#define EXT_INIT_MAX_LEN (1UL << 15)
148#define EXT_UNINIT_MAX_LEN (EXT_INIT_MAX_LEN - 1)
149
150
151#define EXT_FIRST_EXTENT(__hdr__) \
152 ((struct ext4_extent *) (((char *) (__hdr__)) + \
153 sizeof(struct ext4_extent_header)))
154#define EXT_FIRST_INDEX(__hdr__) \
155 ((struct ext4_extent_idx *) (((char *) (__hdr__)) + \
156 sizeof(struct ext4_extent_header)))
157#define EXT_HAS_FREE_INDEX(__path__) \
158 (le16_to_cpu((__path__)->p_hdr->eh_entries) \
159 < le16_to_cpu((__path__)->p_hdr->eh_max))
160#define EXT_LAST_EXTENT(__hdr__) \
161 (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1)
162#define EXT_LAST_INDEX(__hdr__) \
163 (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1)
164#define EXT_MAX_EXTENT(__hdr__) \
165 (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)
166#define EXT_MAX_INDEX(__hdr__) \
167 (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)
168
169static inline struct ext4_extent_header *ext_inode_hdr(struct inode *inode)
170{
171 return (struct ext4_extent_header *) EXT4_I(inode)->i_data;
172}
173
174static inline struct ext4_extent_header *ext_block_hdr(struct buffer_head *bh)
175{
176 return (struct ext4_extent_header *) bh->b_data;
177}
178
179static inline unsigned short ext_depth(struct inode *inode)
180{
181 return le16_to_cpu(ext_inode_hdr(inode)->eh_depth);
182}
183
184static inline void ext4_ext_tree_changed(struct inode *inode)
185{
186 EXT4_I(inode)->i_ext_generation++;
187}
188
189static inline void
190ext4_ext_invalidate_cache(struct inode *inode)
191{
192 EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO;
193}
194
195static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext)
196{
197 /* We can not have an uninitialized extent of zero length! */
198 BUG_ON((le16_to_cpu(ext->ee_len) & ~EXT_INIT_MAX_LEN) == 0);
199 ext->ee_len |= cpu_to_le16(EXT_INIT_MAX_LEN);
200}
201
202static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext)
203{
204 /* Extent with ee_len of 0x8000 is treated as an initialized extent */
205 return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN);
206}
207
208static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
209{
210 return (le16_to_cpu(ext->ee_len) <= EXT_INIT_MAX_LEN ?
211 le16_to_cpu(ext->ee_len) :
212 (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN));
213}
214
215extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
216extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
217extern int ext4_extent_tree_init(handle_t *, struct inode *);
218extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
219extern int ext4_ext_try_to_merge(struct inode *inode,
220 struct ext4_ext_path *path,
221 struct ext4_extent *);
222extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
223extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *);
224extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
225 struct ext4_ext_path *);
226extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *,
227 ext4_lblk_t *, ext4_fsblk_t *);
228extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *,
229 ext4_lblk_t *, ext4_fsblk_t *);
230extern void ext4_ext_drop_refs(struct ext4_ext_path *);
231#endif /* _EXT4_EXTENTS */
232
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h
new file mode 100644
index 000000000000..26a4ae255d79
--- /dev/null
+++ b/fs/ext4/ext4_i.h
@@ -0,0 +1,167 @@
1/*
2 * ext4_i.h
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 * from
10 *
11 * linux/include/linux/minix_fs_i.h
12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds
14 */
15
16#ifndef _EXT4_I
17#define _EXT4_I
18
19#include <linux/rwsem.h>
20#include <linux/rbtree.h>
21#include <linux/seqlock.h>
22#include <linux/mutex.h>
23
24/* data type for block offset of block group */
25typedef int ext4_grpblk_t;
26
27/* data type for filesystem-wide blocks number */
28typedef unsigned long long ext4_fsblk_t;
29
30/* data type for file logical block number */
31typedef __u32 ext4_lblk_t;
32
33/* data type for block group number */
34typedef unsigned long ext4_group_t;
35
36struct ext4_reserve_window {
37 ext4_fsblk_t _rsv_start; /* First byte reserved */
38 ext4_fsblk_t _rsv_end; /* Last byte reserved or 0 */
39};
40
41struct ext4_reserve_window_node {
42 struct rb_node rsv_node;
43 __u32 rsv_goal_size;
44 __u32 rsv_alloc_hit;
45 struct ext4_reserve_window rsv_window;
46};
47
48struct ext4_block_alloc_info {
49 /* information about reservation window */
50 struct ext4_reserve_window_node rsv_window_node;
51 /*
52 * was i_next_alloc_block in ext4_inode_info
53 * is the logical (file-relative) number of the
54 * most-recently-allocated block in this file.
55 * We use this for detecting linearly ascending allocation requests.
56 */
57 ext4_lblk_t last_alloc_logical_block;
58 /*
59 * Was i_next_alloc_goal in ext4_inode_info
60 * is the *physical* companion to i_next_alloc_block.
61 * it the physical block number of the block which was most-recentl
62 * allocated to this file. This give us the goal (target) for the next
63 * allocation when we detect linearly ascending requests.
64 */
65 ext4_fsblk_t last_alloc_physical_block;
66};
67
68#define rsv_start rsv_window._rsv_start
69#define rsv_end rsv_window._rsv_end
70
71/*
72 * storage for cached extent
73 */
74struct ext4_ext_cache {
75 ext4_fsblk_t ec_start;
76 ext4_lblk_t ec_block;
77 __u32 ec_len; /* must be 32bit to return holes */
78 __u32 ec_type;
79};
80
81/*
82 * third extended file system inode data in memory
83 */
84struct ext4_inode_info {
85 __le32 i_data[15]; /* unconverted */
86 __u32 i_flags;
87 ext4_fsblk_t i_file_acl;
88 __u32 i_dtime;
89
90 /*
91 * i_block_group is the number of the block group which contains
92 * this file's inode. Constant across the lifetime of the inode,
93 * it is ued for making block allocation decisions - we try to
94 * place a file's data blocks near its inode block, and new inodes
95 * near to their parent directory's inode.
96 */
97 ext4_group_t i_block_group;
98 __u32 i_state; /* Dynamic state flags for ext4 */
99
100 /* block reservation info */
101 struct ext4_block_alloc_info *i_block_alloc_info;
102
103 ext4_lblk_t i_dir_start_lookup;
104#ifdef CONFIG_EXT4DEV_FS_XATTR
105 /*
106 * Extended attributes can be read independently of the main file
107 * data. Taking i_mutex even when reading would cause contention
108 * between readers of EAs and writers of regular file data, so
109 * instead we synchronize on xattr_sem when reading or changing
110 * EAs.
111 */
112 struct rw_semaphore xattr_sem;
113#endif
114#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
115 struct posix_acl *i_acl;
116 struct posix_acl *i_default_acl;
117#endif
118
119 struct list_head i_orphan; /* unlinked but open inodes */
120
121 /*
122 * i_disksize keeps track of what the inode size is ON DISK, not
123 * in memory. During truncate, i_size is set to the new size by
124 * the VFS prior to calling ext4_truncate(), but the filesystem won't
125 * set i_disksize to 0 until the truncate is actually under way.
126 *
127 * The intent is that i_disksize always represents the blocks which
128 * are used by this file. This allows recovery to restart truncate
129 * on orphans if we crash during truncate. We actually write i_disksize
130 * into the on-disk inode when writing inodes out, instead of i_size.
131 *
132 * The only time when i_disksize and i_size may be different is when
133 * a truncate is in progress. The only things which change i_disksize
134 * are ext4_get_block (growth) and ext4_truncate (shrinkth).
135 */
136 loff_t i_disksize;
137
138 /* on-disk additional length */
139 __u16 i_extra_isize;
140
141 /*
142 * i_data_sem is for serialising ext4_truncate() against
143 * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's
144 * data tree are chopped off during truncate. We can't do that in
145 * ext4 because whenever we perform intermediate commits during
146 * truncate, the inode and all the metadata blocks *must* be in a
147 * consistent state which allows truncation of the orphans to restart
148 * during recovery. Hence we must fix the get_block-vs-truncate race
149 * by other means, so we have i_data_sem.
150 */
151 struct rw_semaphore i_data_sem;
152 struct inode vfs_inode;
153
154 unsigned long i_ext_generation;
155 struct ext4_ext_cache i_cached_extent;
156 /*
157 * File creation time. Its function is same as that of
158 * struct timespec i_{a,c,m}time in the generic inode.
159 */
160 struct timespec i_crtime;
161
162 /* mballoc */
163 struct list_head i_prealloc_list;
164 spinlock_t i_prealloc_lock;
165};
166
167#endif /* _EXT4_I */
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index d6afe4e27340..c75384b34f2c 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -2,14 +2,14 @@
2 * Interface between ext4 and JBD 2 * Interface between ext4 and JBD
3 */ 3 */
4 4
5#include <linux/ext4_jbd2.h> 5#include "ext4_jbd2.h"
6 6
7int __ext4_journal_get_undo_access(const char *where, handle_t *handle, 7int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
8 struct buffer_head *bh) 8 struct buffer_head *bh)
9{ 9{
10 int err = jbd2_journal_get_undo_access(handle, bh); 10 int err = jbd2_journal_get_undo_access(handle, bh);
11 if (err) 11 if (err)
12 ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 12 ext4_journal_abort_handle(where, __func__, bh, handle, err);
13 return err; 13 return err;
14} 14}
15 15
@@ -18,7 +18,7 @@ int __ext4_journal_get_write_access(const char *where, handle_t *handle,
18{ 18{
19 int err = jbd2_journal_get_write_access(handle, bh); 19 int err = jbd2_journal_get_write_access(handle, bh);
20 if (err) 20 if (err)
21 ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 21 ext4_journal_abort_handle(where, __func__, bh, handle, err);
22 return err; 22 return err;
23} 23}
24 24
@@ -27,7 +27,7 @@ int __ext4_journal_forget(const char *where, handle_t *handle,
27{ 27{
28 int err = jbd2_journal_forget(handle, bh); 28 int err = jbd2_journal_forget(handle, bh);
29 if (err) 29 if (err)
30 ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 30 ext4_journal_abort_handle(where, __func__, bh, handle, err);
31 return err; 31 return err;
32} 32}
33 33
@@ -36,7 +36,7 @@ int __ext4_journal_revoke(const char *where, handle_t *handle,
36{ 36{
37 int err = jbd2_journal_revoke(handle, blocknr, bh); 37 int err = jbd2_journal_revoke(handle, blocknr, bh);
38 if (err) 38 if (err)
39 ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 39 ext4_journal_abort_handle(where, __func__, bh, handle, err);
40 return err; 40 return err;
41} 41}
42 42
@@ -45,7 +45,7 @@ int __ext4_journal_get_create_access(const char *where,
45{ 45{
46 int err = jbd2_journal_get_create_access(handle, bh); 46 int err = jbd2_journal_get_create_access(handle, bh);
47 if (err) 47 if (err)
48 ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 48 ext4_journal_abort_handle(where, __func__, bh, handle, err);
49 return err; 49 return err;
50} 50}
51 51
@@ -54,6 +54,6 @@ int __ext4_journal_dirty_metadata(const char *where,
54{ 54{
55 int err = jbd2_journal_dirty_metadata(handle, bh); 55 int err = jbd2_journal_dirty_metadata(handle, bh);
56 if (err) 56 if (err)
57 ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 57 ext4_journal_abort_handle(where, __func__, bh, handle, err);
58 return err; 58 return err;
59} 59}
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
new file mode 100644
index 000000000000..9255a7d28b24
--- /dev/null
+++ b/fs/ext4/ext4_jbd2.h
@@ -0,0 +1,231 @@
1/*
2 * ext4_jbd2.h
3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
5 *
6 * Copyright 1998--1999 Red Hat corp --- All Rights Reserved
7 *
8 * This file is part of the Linux kernel and is made available under
9 * the terms of the GNU General Public License, version 2, or at your
10 * option, any later version, incorporated herein by reference.
11 *
12 * Ext4-specific journaling extensions.
13 */
14
15#ifndef _EXT4_JBD2_H
16#define _EXT4_JBD2_H
17
18#include <linux/fs.h>
19#include <linux/jbd2.h>
20#include "ext4.h"
21
22#define EXT4_JOURNAL(inode) (EXT4_SB((inode)->i_sb)->s_journal)
23
24/* Define the number of blocks we need to account to a transaction to
25 * modify one block of data.
26 *
27 * We may have to touch one inode, one bitmap buffer, up to three
28 * indirection blocks, the group and superblock summaries, and the data
29 * block to complete the transaction.
30 *
31 * For extents-enabled fs we may have to allocate and modify up to
32 * 5 levels of tree + root which are stored in the inode. */
33
34#define EXT4_SINGLEDATA_TRANS_BLOCKS(sb) \
35 (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS) \
36 || test_opt(sb, EXTENTS) ? 27U : 8U)
37
38/* Extended attribute operations touch at most two data buffers,
39 * two bitmap buffers, and two group summaries, in addition to the inode
40 * and the superblock, which are already accounted for. */
41
42#define EXT4_XATTR_TRANS_BLOCKS 6U
43
44/* Define the minimum size for a transaction which modifies data. This
45 * needs to take into account the fact that we may end up modifying two
46 * quota files too (one for the group, one for the user quota). The
47 * superblock only gets updated once, of course, so don't bother
48 * counting that again for the quota updates. */
49
50#define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \
51 EXT4_XATTR_TRANS_BLOCKS - 2 + \
52 2*EXT4_QUOTA_TRANS_BLOCKS(sb))
53
54/* Delete operations potentially hit one directory's namespace plus an
55 * entire inode, plus arbitrary amounts of bitmap/indirection data. Be
56 * generous. We can grow the delete transaction later if necessary. */
57
58#define EXT4_DELETE_TRANS_BLOCKS(sb) (2 * EXT4_DATA_TRANS_BLOCKS(sb) + 64)
59
60/* Define an arbitrary limit for the amount of data we will anticipate
61 * writing to any given transaction. For unbounded transactions such as
62 * write(2) and truncate(2) we can write more than this, but we always
63 * start off at the maximum transaction size and grow the transaction
64 * optimistically as we go. */
65
66#define EXT4_MAX_TRANS_DATA 64U
67
68/* We break up a large truncate or write transaction once the handle's
69 * buffer credits gets this low, we need either to extend the
70 * transaction or to start a new one. Reserve enough space here for
71 * inode, bitmap, superblock, group and indirection updates for at least
72 * one block, plus two quota updates. Quota allocations are not
73 * needed. */
74
75#define EXT4_RESERVE_TRANS_BLOCKS 12U
76
77#define EXT4_INDEX_EXTRA_TRANS_BLOCKS 8
78
79#ifdef CONFIG_QUOTA
80/* Amount of blocks needed for quota update - we know that the structure was
81 * allocated so we need to update only inode+data */
82#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0)
83/* Amount of blocks needed for quota insert/delete - we do some block writes
84 * but inode, sb and group updates are done only once */
85#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
86 (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0)
87#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
88 (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0)
89#else
90#define EXT4_QUOTA_TRANS_BLOCKS(sb) 0
91#define EXT4_QUOTA_INIT_BLOCKS(sb) 0
92#define EXT4_QUOTA_DEL_BLOCKS(sb) 0
93#endif
94
95int
96ext4_mark_iloc_dirty(handle_t *handle,
97 struct inode *inode,
98 struct ext4_iloc *iloc);
99
100/*
101 * On success, We end up with an outstanding reference count against
102 * iloc->bh. This _must_ be cleaned up later.
103 */
104
105int ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
106 struct ext4_iloc *iloc);
107
108int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
109
110/*
111 * Wrapper functions with which ext4 calls into JBD. The intent here is
112 * to allow these to be turned into appropriate stubs so ext4 can control
113 * ext2 filesystems, so ext2+ext4 systems only nee one fs. This work hasn't
114 * been done yet.
115 */
116
117static inline void ext4_journal_release_buffer(handle_t *handle,
118 struct buffer_head *bh)
119{
120 jbd2_journal_release_buffer(handle, bh);
121}
122
123void ext4_journal_abort_handle(const char *caller, const char *err_fn,
124 struct buffer_head *bh, handle_t *handle, int err);
125
126int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
127 struct buffer_head *bh);
128
129int __ext4_journal_get_write_access(const char *where, handle_t *handle,
130 struct buffer_head *bh);
131
132int __ext4_journal_forget(const char *where, handle_t *handle,
133 struct buffer_head *bh);
134
135int __ext4_journal_revoke(const char *where, handle_t *handle,
136 ext4_fsblk_t blocknr, struct buffer_head *bh);
137
138int __ext4_journal_get_create_access(const char *where,
139 handle_t *handle, struct buffer_head *bh);
140
141int __ext4_journal_dirty_metadata(const char *where,
142 handle_t *handle, struct buffer_head *bh);
143
144#define ext4_journal_get_undo_access(handle, bh) \
145 __ext4_journal_get_undo_access(__FUNCTION__, (handle), (bh))
146#define ext4_journal_get_write_access(handle, bh) \
147 __ext4_journal_get_write_access(__FUNCTION__, (handle), (bh))
148#define ext4_journal_revoke(handle, blocknr, bh) \
149 __ext4_journal_revoke(__FUNCTION__, (handle), (blocknr), (bh))
150#define ext4_journal_get_create_access(handle, bh) \
151 __ext4_journal_get_create_access(__FUNCTION__, (handle), (bh))
152#define ext4_journal_dirty_metadata(handle, bh) \
153 __ext4_journal_dirty_metadata(__FUNCTION__, (handle), (bh))
154#define ext4_journal_forget(handle, bh) \
155 __ext4_journal_forget(__FUNCTION__, (handle), (bh))
156
157int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh);
158
159handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
160int __ext4_journal_stop(const char *where, handle_t *handle);
161
162static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
163{
164 return ext4_journal_start_sb(inode->i_sb, nblocks);
165}
166
167#define ext4_journal_stop(handle) \
168 __ext4_journal_stop(__FUNCTION__, (handle))
169
170static inline handle_t *ext4_journal_current_handle(void)
171{
172 return journal_current_handle();
173}
174
175static inline int ext4_journal_extend(handle_t *handle, int nblocks)
176{
177 return jbd2_journal_extend(handle, nblocks);
178}
179
180static inline int ext4_journal_restart(handle_t *handle, int nblocks)
181{
182 return jbd2_journal_restart(handle, nblocks);
183}
184
185static inline int ext4_journal_blocks_per_page(struct inode *inode)
186{
187 return jbd2_journal_blocks_per_page(inode);
188}
189
190static inline int ext4_journal_force_commit(journal_t *journal)
191{
192 return jbd2_journal_force_commit(journal);
193}
194
195/* super.c */
196int ext4_force_commit(struct super_block *sb);
197
198static inline int ext4_should_journal_data(struct inode *inode)
199{
200 if (!S_ISREG(inode->i_mode))
201 return 1;
202 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
203 return 1;
204 if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
205 return 1;
206 return 0;
207}
208
209static inline int ext4_should_order_data(struct inode *inode)
210{
211 if (!S_ISREG(inode->i_mode))
212 return 0;
213 if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
214 return 0;
215 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
216 return 1;
217 return 0;
218}
219
220static inline int ext4_should_writeback_data(struct inode *inode)
221{
222 if (!S_ISREG(inode->i_mode))
223 return 0;
224 if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
225 return 0;
226 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
227 return 1;
228 return 0;
229}
230
231#endif /* _EXT4_JBD2_H */
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
new file mode 100644
index 000000000000..5802e69f2191
--- /dev/null
+++ b/fs/ext4/ext4_sb.h
@@ -0,0 +1,148 @@
1/*
2 * ext4_sb.h
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 * from
10 *
11 * linux/include/linux/minix_fs_sb.h
12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds
14 */
15
16#ifndef _EXT4_SB
17#define _EXT4_SB
18
19#ifdef __KERNEL__
20#include <linux/timer.h>
21#include <linux/wait.h>
22#include <linux/blockgroup_lock.h>
23#include <linux/percpu_counter.h>
24#endif
25#include <linux/rbtree.h>
26
27/*
28 * third extended-fs super-block data in memory
29 */
30struct ext4_sb_info {
31 unsigned long s_desc_size; /* Size of a group descriptor in bytes */
32 unsigned long s_inodes_per_block;/* Number of inodes per block */
33 unsigned long s_blocks_per_group;/* Number of blocks in a group */
34 unsigned long s_inodes_per_group;/* Number of inodes in a group */
35 unsigned long s_itb_per_group; /* Number of inode table blocks per group */
36 unsigned long s_gdb_count; /* Number of group descriptor blocks */
37 unsigned long s_desc_per_block; /* Number of group descriptors per block */
38 ext4_group_t s_groups_count; /* Number of groups in the fs */
39 unsigned long s_overhead_last; /* Last calculated overhead */
40 unsigned long s_blocks_last; /* Last seen block count */
41 loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
42 struct buffer_head * s_sbh; /* Buffer containing the super block */
43 struct ext4_super_block * s_es; /* Pointer to the super block in the buffer */
44 struct buffer_head ** s_group_desc;
45 unsigned long s_mount_opt;
46 ext4_fsblk_t s_sb_block;
47 uid_t s_resuid;
48 gid_t s_resgid;
49 unsigned short s_mount_state;
50 unsigned short s_pad;
51 int s_addr_per_block_bits;
52 int s_desc_per_block_bits;
53 int s_inode_size;
54 int s_first_ino;
55 spinlock_t s_next_gen_lock;
56 u32 s_next_generation;
57 u32 s_hash_seed[4];
58 int s_def_hash_version;
59 struct percpu_counter s_freeblocks_counter;
60 struct percpu_counter s_freeinodes_counter;
61 struct percpu_counter s_dirs_counter;
62 struct blockgroup_lock s_blockgroup_lock;
63
64 /* root of the per fs reservation window tree */
65 spinlock_t s_rsv_window_lock;
66 struct rb_root s_rsv_window_root;
67 struct ext4_reserve_window_node s_rsv_window_head;
68
69 /* Journaling */
70 struct inode * s_journal_inode;
71 struct journal_s * s_journal;
72 struct list_head s_orphan;
73 unsigned long s_commit_interval;
74 struct block_device *journal_bdev;
75#ifdef CONFIG_JBD2_DEBUG
76 struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
77 wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
78#endif
79#ifdef CONFIG_QUOTA
80 char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
81 int s_jquota_fmt; /* Format of quota to use */
82#endif
83 unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
84
85#ifdef EXTENTS_STATS
86 /* ext4 extents stats */
87 unsigned long s_ext_min;
88 unsigned long s_ext_max;
89 unsigned long s_depth_max;
90 spinlock_t s_ext_stats_lock;
91 unsigned long s_ext_blocks;
92 unsigned long s_ext_extents;
93#endif
94
95 /* for buddy allocator */
96 struct ext4_group_info ***s_group_info;
97 struct inode *s_buddy_cache;
98 long s_blocks_reserved;
99 spinlock_t s_reserve_lock;
100 struct list_head s_active_transaction;
101 struct list_head s_closed_transaction;
102 struct list_head s_committed_transaction;
103 spinlock_t s_md_lock;
104 tid_t s_last_transaction;
105 unsigned short *s_mb_offsets, *s_mb_maxs;
106
107 /* tunables */
108 unsigned long s_stripe;
109 unsigned long s_mb_stream_request;
110 unsigned long s_mb_max_to_scan;
111 unsigned long s_mb_min_to_scan;
112 unsigned long s_mb_stats;
113 unsigned long s_mb_order2_reqs;
114 unsigned long s_mb_group_prealloc;
115 /* where last allocation was done - for stream allocation */
116 unsigned long s_mb_last_group;
117 unsigned long s_mb_last_start;
118
119 /* history to debug policy */
120 struct ext4_mb_history *s_mb_history;
121 int s_mb_history_cur;
122 int s_mb_history_max;
123 int s_mb_history_num;
124 struct proc_dir_entry *s_mb_proc;
125 spinlock_t s_mb_history_lock;
126 int s_mb_history_filter;
127
128 /* stats for buddy allocator */
129 spinlock_t s_mb_pa_lock;
130 atomic_t s_bal_reqs; /* number of reqs with len > 1 */
131 atomic_t s_bal_success; /* we found long enough chunks */
132 atomic_t s_bal_allocated; /* in blocks */
133 atomic_t s_bal_ex_scanned; /* total extents scanned */
134 atomic_t s_bal_goals; /* goal hits */
135 atomic_t s_bal_breaks; /* too long searches */
136 atomic_t s_bal_2orders; /* 2^order hits */
137 spinlock_t s_bal_lock;
138 unsigned long s_mb_buddies_generated;
139 unsigned long long s_mb_generation_time;
140 atomic_t s_mb_lost_chunks;
141 atomic_t s_mb_preallocated;
142 atomic_t s_mb_discarded;
143
144 /* locality groups */
145 struct ext4_locality_group *s_locality_groups;
146};
147
148#endif /* _EXT4_SB */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 9ae6e67090cd..47929c4e3dae 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -32,7 +32,6 @@
32#include <linux/module.h> 32#include <linux/module.h>
33#include <linux/fs.h> 33#include <linux/fs.h>
34#include <linux/time.h> 34#include <linux/time.h>
35#include <linux/ext4_jbd2.h>
36#include <linux/jbd2.h> 35#include <linux/jbd2.h>
37#include <linux/highuid.h> 36#include <linux/highuid.h>
38#include <linux/pagemap.h> 37#include <linux/pagemap.h>
@@ -40,8 +39,9 @@
40#include <linux/string.h> 39#include <linux/string.h>
41#include <linux/slab.h> 40#include <linux/slab.h>
42#include <linux/falloc.h> 41#include <linux/falloc.h>
43#include <linux/ext4_fs_extents.h>
44#include <asm/uaccess.h> 42#include <asm/uaccess.h>
43#include "ext4_jbd2.h"
44#include "ext4_extents.h"
45 45
46 46
47/* 47/*
@@ -308,7 +308,7 @@ corrupted:
308} 308}
309 309
310#define ext4_ext_check_header(inode, eh, depth) \ 310#define ext4_ext_check_header(inode, eh, depth) \
311 __ext4_ext_check_header(__FUNCTION__, inode, eh, depth) 311 __ext4_ext_check_header(__func__, inode, eh, depth)
312 312
313#ifdef EXT_DEBUG 313#ifdef EXT_DEBUG
314static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) 314static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
@@ -614,7 +614,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
614 614
615 ix->ei_block = cpu_to_le32(logical); 615 ix->ei_block = cpu_to_le32(logical);
616 ext4_idx_store_pblock(ix, ptr); 616 ext4_idx_store_pblock(ix, ptr);
617 curp->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(curp->p_hdr->eh_entries)+1); 617 le16_add_cpu(&curp->p_hdr->eh_entries, 1);
618 618
619 BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries) 619 BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries)
620 > le16_to_cpu(curp->p_hdr->eh_max)); 620 > le16_to_cpu(curp->p_hdr->eh_max));
@@ -736,7 +736,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
736 } 736 }
737 if (m) { 737 if (m) {
738 memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m); 738 memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m);
739 neh->eh_entries = cpu_to_le16(le16_to_cpu(neh->eh_entries)+m); 739 le16_add_cpu(&neh->eh_entries, m);
740 } 740 }
741 741
742 set_buffer_uptodate(bh); 742 set_buffer_uptodate(bh);
@@ -753,8 +753,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
753 err = ext4_ext_get_access(handle, inode, path + depth); 753 err = ext4_ext_get_access(handle, inode, path + depth);
754 if (err) 754 if (err)
755 goto cleanup; 755 goto cleanup;
756 path[depth].p_hdr->eh_entries = 756 le16_add_cpu(&path[depth].p_hdr->eh_entries, -m);
757 cpu_to_le16(le16_to_cpu(path[depth].p_hdr->eh_entries)-m);
758 err = ext4_ext_dirty(handle, inode, path + depth); 757 err = ext4_ext_dirty(handle, inode, path + depth);
759 if (err) 758 if (err)
760 goto cleanup; 759 goto cleanup;
@@ -817,8 +816,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
817 if (m) { 816 if (m) {
818 memmove(++fidx, path[i].p_idx - m, 817 memmove(++fidx, path[i].p_idx - m,
819 sizeof(struct ext4_extent_idx) * m); 818 sizeof(struct ext4_extent_idx) * m);
820 neh->eh_entries = 819 le16_add_cpu(&neh->eh_entries, m);
821 cpu_to_le16(le16_to_cpu(neh->eh_entries) + m);
822 } 820 }
823 set_buffer_uptodate(bh); 821 set_buffer_uptodate(bh);
824 unlock_buffer(bh); 822 unlock_buffer(bh);
@@ -834,7 +832,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
834 err = ext4_ext_get_access(handle, inode, path + i); 832 err = ext4_ext_get_access(handle, inode, path + i);
835 if (err) 833 if (err)
836 goto cleanup; 834 goto cleanup;
837 path[i].p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path[i].p_hdr->eh_entries)-m); 835 le16_add_cpu(&path[i].p_hdr->eh_entries, -m);
838 err = ext4_ext_dirty(handle, inode, path + i); 836 err = ext4_ext_dirty(handle, inode, path + i);
839 if (err) 837 if (err)
840 goto cleanup; 838 goto cleanup;
@@ -1369,7 +1367,7 @@ int ext4_ext_try_to_merge(struct inode *inode,
1369 * sizeof(struct ext4_extent); 1367 * sizeof(struct ext4_extent);
1370 memmove(ex + 1, ex + 2, len); 1368 memmove(ex + 1, ex + 2, len);
1371 } 1369 }
1372 eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries) - 1); 1370 le16_add_cpu(&eh->eh_entries, -1);
1373 merge_done = 1; 1371 merge_done = 1;
1374 WARN_ON(eh->eh_entries == 0); 1372 WARN_ON(eh->eh_entries == 0);
1375 if (!eh->eh_entries) 1373 if (!eh->eh_entries)
@@ -1560,7 +1558,7 @@ has_space:
1560 path[depth].p_ext = nearex; 1558 path[depth].p_ext = nearex;
1561 } 1559 }
1562 1560
1563 eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)+1); 1561 le16_add_cpu(&eh->eh_entries, 1);
1564 nearex = path[depth].p_ext; 1562 nearex = path[depth].p_ext;
1565 nearex->ee_block = newext->ee_block; 1563 nearex->ee_block = newext->ee_block;
1566 ext4_ext_store_pblock(nearex, ext_pblock(newext)); 1564 ext4_ext_store_pblock(nearex, ext_pblock(newext));
@@ -1699,7 +1697,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
1699 err = ext4_ext_get_access(handle, inode, path); 1697 err = ext4_ext_get_access(handle, inode, path);
1700 if (err) 1698 if (err)
1701 return err; 1699 return err;
1702 path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1); 1700 le16_add_cpu(&path->p_hdr->eh_entries, -1);
1703 err = ext4_ext_dirty(handle, inode, path); 1701 err = ext4_ext_dirty(handle, inode, path);
1704 if (err) 1702 if (err)
1705 return err; 1703 return err;
@@ -1902,7 +1900,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
1902 if (num == 0) { 1900 if (num == 0) {
1903 /* this extent is removed; mark slot entirely unused */ 1901 /* this extent is removed; mark slot entirely unused */
1904 ext4_ext_store_pblock(ex, 0); 1902 ext4_ext_store_pblock(ex, 0);
1905 eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1); 1903 le16_add_cpu(&eh->eh_entries, -1);
1906 } 1904 }
1907 1905
1908 ex->ee_block = cpu_to_le32(block); 1906 ex->ee_block = cpu_to_le32(block);
@@ -1979,7 +1977,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
1979 * We start scanning from right side, freeing all the blocks 1977 * We start scanning from right side, freeing all the blocks
1980 * after i_size and walking into the tree depth-wise. 1978 * after i_size and walking into the tree depth-wise.
1981 */ 1979 */
1982 path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL); 1980 path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS);
1983 if (path == NULL) { 1981 if (path == NULL) {
1984 ext4_journal_stop(handle); 1982 ext4_journal_stop(handle);
1985 return -ENOMEM; 1983 return -ENOMEM;
@@ -2138,6 +2136,82 @@ void ext4_ext_release(struct super_block *sb)
2138#endif 2136#endif
2139} 2137}
2140 2138
2139static void bi_complete(struct bio *bio, int error)
2140{
2141 complete((struct completion *)bio->bi_private);
2142}
2143
2144/* FIXME!! we need to try to merge to left or right after zero-out */
2145static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
2146{
2147 int ret = -EIO;
2148 struct bio *bio;
2149 int blkbits, blocksize;
2150 sector_t ee_pblock;
2151 struct completion event;
2152 unsigned int ee_len, len, done, offset;
2153
2154
2155 blkbits = inode->i_blkbits;
2156 blocksize = inode->i_sb->s_blocksize;
2157 ee_len = ext4_ext_get_actual_len(ex);
2158 ee_pblock = ext_pblock(ex);
2159
2160 /* convert ee_pblock to 512 byte sectors */
2161 ee_pblock = ee_pblock << (blkbits - 9);
2162
2163 while (ee_len > 0) {
2164
2165 if (ee_len > BIO_MAX_PAGES)
2166 len = BIO_MAX_PAGES;
2167 else
2168 len = ee_len;
2169
2170 bio = bio_alloc(GFP_NOIO, len);
2171 if (!bio)
2172 return -ENOMEM;
2173 bio->bi_sector = ee_pblock;
2174 bio->bi_bdev = inode->i_sb->s_bdev;
2175
2176 done = 0;
2177 offset = 0;
2178 while (done < len) {
2179 ret = bio_add_page(bio, ZERO_PAGE(0),
2180 blocksize, offset);
2181 if (ret != blocksize) {
2182 /*
2183 * We can't add any more pages because of
2184 * hardware limitations. Start a new bio.
2185 */
2186 break;
2187 }
2188 done++;
2189 offset += blocksize;
2190 if (offset >= PAGE_CACHE_SIZE)
2191 offset = 0;
2192 }
2193
2194 init_completion(&event);
2195 bio->bi_private = &event;
2196 bio->bi_end_io = bi_complete;
2197 submit_bio(WRITE, bio);
2198 wait_for_completion(&event);
2199
2200 if (test_bit(BIO_UPTODATE, &bio->bi_flags))
2201 ret = 0;
2202 else {
2203 ret = -EIO;
2204 break;
2205 }
2206 bio_put(bio);
2207 ee_len -= done;
2208 ee_pblock += done << (blkbits - 9);
2209 }
2210 return ret;
2211}
2212
2213#define EXT4_EXT_ZERO_LEN 7
2214
2141/* 2215/*
2142 * This function is called by ext4_ext_get_blocks() if someone tries to write 2216 * This function is called by ext4_ext_get_blocks() if someone tries to write
2143 * to an uninitialized extent. It may result in splitting the uninitialized 2217 * to an uninitialized extent. It may result in splitting the uninitialized
@@ -2154,7 +2228,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2154 ext4_lblk_t iblock, 2228 ext4_lblk_t iblock,
2155 unsigned long max_blocks) 2229 unsigned long max_blocks)
2156{ 2230{
2157 struct ext4_extent *ex, newex; 2231 struct ext4_extent *ex, newex, orig_ex;
2158 struct ext4_extent *ex1 = NULL; 2232 struct ext4_extent *ex1 = NULL;
2159 struct ext4_extent *ex2 = NULL; 2233 struct ext4_extent *ex2 = NULL;
2160 struct ext4_extent *ex3 = NULL; 2234 struct ext4_extent *ex3 = NULL;
@@ -2173,10 +2247,26 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2173 allocated = ee_len - (iblock - ee_block); 2247 allocated = ee_len - (iblock - ee_block);
2174 newblock = iblock - ee_block + ext_pblock(ex); 2248 newblock = iblock - ee_block + ext_pblock(ex);
2175 ex2 = ex; 2249 ex2 = ex;
2250 orig_ex.ee_block = ex->ee_block;
2251 orig_ex.ee_len = cpu_to_le16(ee_len);
2252 ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
2176 2253
2177 err = ext4_ext_get_access(handle, inode, path + depth); 2254 err = ext4_ext_get_access(handle, inode, path + depth);
2178 if (err) 2255 if (err)
2179 goto out; 2256 goto out;
2257 /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
2258 if (ee_len <= 2*EXT4_EXT_ZERO_LEN) {
2259 err = ext4_ext_zeroout(inode, &orig_ex);
2260 if (err)
2261 goto fix_extent_len;
2262 /* update the extent length and mark as initialized */
2263 ex->ee_block = orig_ex.ee_block;
2264 ex->ee_len = orig_ex.ee_len;
2265 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2266 ext4_ext_dirty(handle, inode, path + depth);
2267 /* zeroed the full extent */
2268 return allocated;
2269 }
2180 2270
2181 /* ex1: ee_block to iblock - 1 : uninitialized */ 2271 /* ex1: ee_block to iblock - 1 : uninitialized */
2182 if (iblock > ee_block) { 2272 if (iblock > ee_block) {
@@ -2195,19 +2285,103 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2195 /* ex3: to ee_block + ee_len : uninitialised */ 2285 /* ex3: to ee_block + ee_len : uninitialised */
2196 if (allocated > max_blocks) { 2286 if (allocated > max_blocks) {
2197 unsigned int newdepth; 2287 unsigned int newdepth;
2288 /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
2289 if (allocated <= EXT4_EXT_ZERO_LEN) {
2290 /* Mark first half uninitialized.
2291 * Mark second half initialized and zero out the
2292 * initialized extent
2293 */
2294 ex->ee_block = orig_ex.ee_block;
2295 ex->ee_len = cpu_to_le16(ee_len - allocated);
2296 ext4_ext_mark_uninitialized(ex);
2297 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2298 ext4_ext_dirty(handle, inode, path + depth);
2299
2300 ex3 = &newex;
2301 ex3->ee_block = cpu_to_le32(iblock);
2302 ext4_ext_store_pblock(ex3, newblock);
2303 ex3->ee_len = cpu_to_le16(allocated);
2304 err = ext4_ext_insert_extent(handle, inode, path, ex3);
2305 if (err == -ENOSPC) {
2306 err = ext4_ext_zeroout(inode, &orig_ex);
2307 if (err)
2308 goto fix_extent_len;
2309 ex->ee_block = orig_ex.ee_block;
2310 ex->ee_len = orig_ex.ee_len;
2311 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2312 ext4_ext_dirty(handle, inode, path + depth);
2313 /* zeroed the full extent */
2314 return allocated;
2315
2316 } else if (err)
2317 goto fix_extent_len;
2318
2319 /*
2320 * We need to zero out the second half because
2321 * an fallocate request can update file size and
2322 * converting the second half to initialized extent
2323 * implies that we can leak some junk data to user
2324 * space.
2325 */
2326 err = ext4_ext_zeroout(inode, ex3);
2327 if (err) {
2328 /*
2329 * We should actually mark the
2330 * second half as uninit and return error
2331 * Insert would have changed the extent
2332 */
2333 depth = ext_depth(inode);
2334 ext4_ext_drop_refs(path);
2335 path = ext4_ext_find_extent(inode,
2336 iblock, path);
2337 if (IS_ERR(path)) {
2338 err = PTR_ERR(path);
2339 return err;
2340 }
2341 ex = path[depth].p_ext;
2342 err = ext4_ext_get_access(handle, inode,
2343 path + depth);
2344 if (err)
2345 return err;
2346 ext4_ext_mark_uninitialized(ex);
2347 ext4_ext_dirty(handle, inode, path + depth);
2348 return err;
2349 }
2350
2351 /* zeroed the second half */
2352 return allocated;
2353 }
2198 ex3 = &newex; 2354 ex3 = &newex;
2199 ex3->ee_block = cpu_to_le32(iblock + max_blocks); 2355 ex3->ee_block = cpu_to_le32(iblock + max_blocks);
2200 ext4_ext_store_pblock(ex3, newblock + max_blocks); 2356 ext4_ext_store_pblock(ex3, newblock + max_blocks);
2201 ex3->ee_len = cpu_to_le16(allocated - max_blocks); 2357 ex3->ee_len = cpu_to_le16(allocated - max_blocks);
2202 ext4_ext_mark_uninitialized(ex3); 2358 ext4_ext_mark_uninitialized(ex3);
2203 err = ext4_ext_insert_extent(handle, inode, path, ex3); 2359 err = ext4_ext_insert_extent(handle, inode, path, ex3);
2204 if (err) 2360 if (err == -ENOSPC) {
2205 goto out; 2361 err = ext4_ext_zeroout(inode, &orig_ex);
2362 if (err)
2363 goto fix_extent_len;
2364 /* update the extent length and mark as initialized */
2365 ex->ee_block = orig_ex.ee_block;
2366 ex->ee_len = orig_ex.ee_len;
2367 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2368 ext4_ext_dirty(handle, inode, path + depth);
2369 /* zeroed the full extent */
2370 return allocated;
2371
2372 } else if (err)
2373 goto fix_extent_len;
2206 /* 2374 /*
2207 * The depth, and hence eh & ex might change 2375 * The depth, and hence eh & ex might change
2208 * as part of the insert above. 2376 * as part of the insert above.
2209 */ 2377 */
2210 newdepth = ext_depth(inode); 2378 newdepth = ext_depth(inode);
2379 /*
2380 * update the extent length after successfull insert of the
2381 * split extent
2382 */
2383 orig_ex.ee_len = cpu_to_le16(ee_len -
2384 ext4_ext_get_actual_len(ex3));
2211 if (newdepth != depth) { 2385 if (newdepth != depth) {
2212 depth = newdepth; 2386 depth = newdepth;
2213 ext4_ext_drop_refs(path); 2387 ext4_ext_drop_refs(path);
@@ -2226,6 +2400,24 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2226 goto out; 2400 goto out;
2227 } 2401 }
2228 allocated = max_blocks; 2402 allocated = max_blocks;
2403
2404 /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
2405 * to insert a extent in the middle zerout directly
2406 * otherwise give the extent a chance to merge to left
2407 */
2408 if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
2409 iblock != ee_block) {
2410 err = ext4_ext_zeroout(inode, &orig_ex);
2411 if (err)
2412 goto fix_extent_len;
2413 /* update the extent length and mark as initialized */
2414 ex->ee_block = orig_ex.ee_block;
2415 ex->ee_len = orig_ex.ee_len;
2416 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2417 ext4_ext_dirty(handle, inode, path + depth);
2418 /* zero out the first half */
2419 return allocated;
2420 }
2229 } 2421 }
2230 /* 2422 /*
2231 * If there was a change of depth as part of the 2423 * If there was a change of depth as part of the
@@ -2282,8 +2474,29 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2282 goto out; 2474 goto out;
2283insert: 2475insert:
2284 err = ext4_ext_insert_extent(handle, inode, path, &newex); 2476 err = ext4_ext_insert_extent(handle, inode, path, &newex);
2477 if (err == -ENOSPC) {
2478 err = ext4_ext_zeroout(inode, &orig_ex);
2479 if (err)
2480 goto fix_extent_len;
2481 /* update the extent length and mark as initialized */
2482 ex->ee_block = orig_ex.ee_block;
2483 ex->ee_len = orig_ex.ee_len;
2484 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2485 ext4_ext_dirty(handle, inode, path + depth);
2486 /* zero out the first half */
2487 return allocated;
2488 } else if (err)
2489 goto fix_extent_len;
2285out: 2490out:
2286 return err ? err : allocated; 2491 return err ? err : allocated;
2492
2493fix_extent_len:
2494 ex->ee_block = orig_ex.ee_block;
2495 ex->ee_len = orig_ex.ee_len;
2496 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2497 ext4_ext_mark_uninitialized(ex);
2498 ext4_ext_dirty(handle, inode, path + depth);
2499 return err;
2287} 2500}
2288 2501
2289/* 2502/*
@@ -2393,8 +2606,20 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2393 } 2606 }
2394 if (create == EXT4_CREATE_UNINITIALIZED_EXT) 2607 if (create == EXT4_CREATE_UNINITIALIZED_EXT)
2395 goto out; 2608 goto out;
2396 if (!create) 2609 if (!create) {
2610 /*
2611 * We have blocks reserved already. We
2612 * return allocated blocks so that delalloc
2613 * won't do block reservation for us. But
2614 * the buffer head will be unmapped so that
2615 * a read from the block returns 0s.
2616 */
2617 if (allocated > max_blocks)
2618 allocated = max_blocks;
2619 /* mark the buffer unwritten */
2620 __set_bit(BH_Unwritten, &bh_result->b_state);
2397 goto out2; 2621 goto out2;
2622 }
2398 2623
2399 ret = ext4_ext_convert_to_initialized(handle, inode, 2624 ret = ext4_ext_convert_to_initialized(handle, inode,
2400 path, iblock, 2625 path, iblock,
@@ -2584,6 +2809,8 @@ out_stop:
2584 ext4_orphan_del(handle, inode); 2809 ext4_orphan_del(handle, inode);
2585 2810
2586 up_write(&EXT4_I(inode)->i_data_sem); 2811 up_write(&EXT4_I(inode)->i_data_sem);
2812 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
2813 ext4_mark_inode_dirty(handle, inode);
2587 ext4_journal_stop(handle); 2814 ext4_journal_stop(handle);
2588} 2815}
2589 2816
@@ -2608,6 +2835,28 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
2608 return needed; 2835 return needed;
2609} 2836}
2610 2837
2838static void ext4_falloc_update_inode(struct inode *inode,
2839 int mode, loff_t new_size, int update_ctime)
2840{
2841 struct timespec now;
2842
2843 if (update_ctime) {
2844 now = current_fs_time(inode->i_sb);
2845 if (!timespec_equal(&inode->i_ctime, &now))
2846 inode->i_ctime = now;
2847 }
2848 /*
2849 * Update only when preallocation was requested beyond
2850 * the file size.
2851 */
2852 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
2853 new_size > i_size_read(inode)) {
2854 i_size_write(inode, new_size);
2855 EXT4_I(inode)->i_disksize = new_size;
2856 }
2857
2858}
2859
2611/* 2860/*
2612 * preallocate space for a file. This implements ext4's fallocate inode 2861 * preallocate space for a file. This implements ext4's fallocate inode
2613 * operation, which gets called from sys_fallocate system call. 2862 * operation, which gets called from sys_fallocate system call.
@@ -2619,8 +2868,8 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
2619{ 2868{
2620 handle_t *handle; 2869 handle_t *handle;
2621 ext4_lblk_t block; 2870 ext4_lblk_t block;
2871 loff_t new_size;
2622 unsigned long max_blocks; 2872 unsigned long max_blocks;
2623 ext4_fsblk_t nblocks = 0;
2624 int ret = 0; 2873 int ret = 0;
2625 int ret2 = 0; 2874 int ret2 = 0;
2626 int retries = 0; 2875 int retries = 0;
@@ -2639,9 +2888,12 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
2639 return -ENODEV; 2888 return -ENODEV;
2640 2889
2641 block = offset >> blkbits; 2890 block = offset >> blkbits;
2891 /*
2892 * We can't just convert len to max_blocks because
2893 * If blocksize = 4096 offset = 3072 and len = 2048
2894 */
2642 max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) 2895 max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
2643 - block; 2896 - block;
2644
2645 /* 2897 /*
2646 * credits to insert 1 extent into extent tree + buffers to be able to 2898 * credits to insert 1 extent into extent tree + buffers to be able to
2647 * modify 1 super block, 1 block bitmap and 1 group descriptor. 2899 * modify 1 super block, 1 block bitmap and 1 group descriptor.
@@ -2657,7 +2909,6 @@ retry:
2657 ret = PTR_ERR(handle); 2909 ret = PTR_ERR(handle);
2658 break; 2910 break;
2659 } 2911 }
2660
2661 ret = ext4_get_blocks_wrap(handle, inode, block, 2912 ret = ext4_get_blocks_wrap(handle, inode, block,
2662 max_blocks, &map_bh, 2913 max_blocks, &map_bh,
2663 EXT4_CREATE_UNINITIALIZED_EXT, 0); 2914 EXT4_CREATE_UNINITIALIZED_EXT, 0);
@@ -2673,61 +2924,24 @@ retry:
2673 ret2 = ext4_journal_stop(handle); 2924 ret2 = ext4_journal_stop(handle);
2674 break; 2925 break;
2675 } 2926 }
2676 if (ret > 0) { 2927 if ((block + ret) >= (EXT4_BLOCK_ALIGN(offset + len,
2677 /* check wrap through sign-bit/zero here */ 2928 blkbits) >> blkbits))
2678 if ((block + ret) < 0 || (block + ret) < block) { 2929 new_size = offset + len;
2679 ret = -EIO; 2930 else
2680 ext4_mark_inode_dirty(handle, inode); 2931 new_size = (block + ret) << blkbits;
2681 ret2 = ext4_journal_stop(handle);
2682 break;
2683 }
2684 if (buffer_new(&map_bh) && ((block + ret) >
2685 (EXT4_BLOCK_ALIGN(i_size_read(inode), blkbits)
2686 >> blkbits)))
2687 nblocks = nblocks + ret;
2688 }
2689
2690 /* Update ctime if new blocks get allocated */
2691 if (nblocks) {
2692 struct timespec now;
2693
2694 now = current_fs_time(inode->i_sb);
2695 if (!timespec_equal(&inode->i_ctime, &now))
2696 inode->i_ctime = now;
2697 }
2698 2932
2933 ext4_falloc_update_inode(inode, mode, new_size,
2934 buffer_new(&map_bh));
2699 ext4_mark_inode_dirty(handle, inode); 2935 ext4_mark_inode_dirty(handle, inode);
2700 ret2 = ext4_journal_stop(handle); 2936 ret2 = ext4_journal_stop(handle);
2701 if (ret2) 2937 if (ret2)
2702 break; 2938 break;
2703 } 2939 }
2704 2940 if (ret == -ENOSPC &&
2705 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 2941 ext4_should_retry_alloc(inode->i_sb, &retries)) {
2942 ret = 0;
2706 goto retry; 2943 goto retry;
2707
2708 /*
2709 * Time to update the file size.
2710 * Update only when preallocation was requested beyond the file size.
2711 */
2712 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
2713 (offset + len) > i_size_read(inode)) {
2714 if (ret > 0) {
2715 /*
2716 * if no error, we assume preallocation succeeded
2717 * completely
2718 */
2719 i_size_write(inode, offset + len);
2720 EXT4_I(inode)->i_disksize = i_size_read(inode);
2721 } else if (ret < 0 && nblocks) {
2722 /* Handle partial allocation scenario */
2723 loff_t newsize;
2724
2725 newsize = (nblocks << blkbits) + i_size_read(inode);
2726 i_size_write(inode, EXT4_BLOCK_ALIGN(newsize, blkbits));
2727 EXT4_I(inode)->i_disksize = i_size_read(inode);
2728 }
2729 } 2944 }
2730
2731 mutex_unlock(&inode->i_mutex); 2945 mutex_unlock(&inode->i_mutex);
2732 return ret > 0 ? ret2 : ret; 2946 return ret > 0 ? ret2 : ret;
2733} 2947}
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index ac35ec58db55..4159be6366ab 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -21,8 +21,8 @@
21#include <linux/time.h> 21#include <linux/time.h>
22#include <linux/fs.h> 22#include <linux/fs.h>
23#include <linux/jbd2.h> 23#include <linux/jbd2.h>
24#include <linux/ext4_fs.h> 24#include "ext4.h"
25#include <linux/ext4_jbd2.h> 25#include "ext4_jbd2.h"
26#include "xattr.h" 26#include "xattr.h"
27#include "acl.h" 27#include "acl.h"
28 28
@@ -129,7 +129,7 @@ const struct file_operations ext4_file_operations = {
129 .write = do_sync_write, 129 .write = do_sync_write,
130 .aio_read = generic_file_aio_read, 130 .aio_read = generic_file_aio_read,
131 .aio_write = ext4_file_write, 131 .aio_write = ext4_file_write,
132 .ioctl = ext4_ioctl, 132 .unlocked_ioctl = ext4_ioctl,
133#ifdef CONFIG_COMPAT 133#ifdef CONFIG_COMPAT
134 .compat_ioctl = ext4_compat_ioctl, 134 .compat_ioctl = ext4_compat_ioctl,
135#endif 135#endif
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 8d50879d1c2c..1c8ba48d4f8d 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -27,8 +27,8 @@
27#include <linux/sched.h> 27#include <linux/sched.h>
28#include <linux/writeback.h> 28#include <linux/writeback.h>
29#include <linux/jbd2.h> 29#include <linux/jbd2.h>
30#include <linux/ext4_fs.h> 30#include "ext4.h"
31#include <linux/ext4_jbd2.h> 31#include "ext4_jbd2.h"
32 32
33/* 33/*
34 * akpm: A new design for ext4_sync_file(). 34 * akpm: A new design for ext4_sync_file().
@@ -72,6 +72,9 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
72 goto out; 72 goto out;
73 } 73 }
74 74
75 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
76 goto out;
77
75 /* 78 /*
76 * The VFS has written the file data. If the inode is unaltered 79 * The VFS has written the file data. If the inode is unaltered
77 * then we need not start a commit. 80 * then we need not start a commit.
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 1555024e3b36..1d6329dbe390 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -11,8 +11,8 @@
11 11
12#include <linux/fs.h> 12#include <linux/fs.h>
13#include <linux/jbd2.h> 13#include <linux/jbd2.h>
14#include <linux/ext4_fs.h>
15#include <linux/cryptohash.h> 14#include <linux/cryptohash.h>
15#include "ext4.h"
16 16
17#define DELTA 0x9E3779B9 17#define DELTA 0x9E3779B9
18 18
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 486e46a3918d..c6efbab0c801 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -15,8 +15,6 @@
15#include <linux/time.h> 15#include <linux/time.h>
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/jbd2.h> 17#include <linux/jbd2.h>
18#include <linux/ext4_fs.h>
19#include <linux/ext4_jbd2.h>
20#include <linux/stat.h> 18#include <linux/stat.h>
21#include <linux/string.h> 19#include <linux/string.h>
22#include <linux/quotaops.h> 20#include <linux/quotaops.h>
@@ -25,7 +23,8 @@
25#include <linux/bitops.h> 23#include <linux/bitops.h>
26#include <linux/blkdev.h> 24#include <linux/blkdev.h>
27#include <asm/byteorder.h> 25#include <asm/byteorder.h>
28 26#include "ext4.h"
27#include "ext4_jbd2.h"
29#include "xattr.h" 28#include "xattr.h"
30#include "acl.h" 29#include "acl.h"
31#include "group.h" 30#include "group.h"
@@ -75,7 +74,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
75 /* If checksum is bad mark all blocks and inodes use to prevent 74 /* If checksum is bad mark all blocks and inodes use to prevent
76 * allocation, essentially implementing a per-group read-only flag. */ 75 * allocation, essentially implementing a per-group read-only flag. */
77 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { 76 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
78 ext4_error(sb, __FUNCTION__, "Checksum bad for group %lu\n", 77 ext4_error(sb, __func__, "Checksum bad for group %lu\n",
79 block_group); 78 block_group);
80 gdp->bg_free_blocks_count = 0; 79 gdp->bg_free_blocks_count = 0;
81 gdp->bg_free_inodes_count = 0; 80 gdp->bg_free_inodes_count = 0;
@@ -223,11 +222,9 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
223 222
224 if (gdp) { 223 if (gdp) {
225 spin_lock(sb_bgl_lock(sbi, block_group)); 224 spin_lock(sb_bgl_lock(sbi, block_group));
226 gdp->bg_free_inodes_count = cpu_to_le16( 225 le16_add_cpu(&gdp->bg_free_inodes_count, 1);
227 le16_to_cpu(gdp->bg_free_inodes_count) + 1);
228 if (is_directory) 226 if (is_directory)
229 gdp->bg_used_dirs_count = cpu_to_le16( 227 le16_add_cpu(&gdp->bg_used_dirs_count, -1);
230 le16_to_cpu(gdp->bg_used_dirs_count) - 1);
231 gdp->bg_checksum = ext4_group_desc_csum(sbi, 228 gdp->bg_checksum = ext4_group_desc_csum(sbi,
232 block_group, gdp); 229 block_group, gdp);
233 spin_unlock(sb_bgl_lock(sbi, block_group)); 230 spin_unlock(sb_bgl_lock(sbi, block_group));
@@ -588,7 +585,7 @@ got:
588 ino++; 585 ino++;
589 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || 586 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
590 ino > EXT4_INODES_PER_GROUP(sb)) { 587 ino > EXT4_INODES_PER_GROUP(sb)) {
591 ext4_error(sb, __FUNCTION__, 588 ext4_error(sb, __func__,
592 "reserved inode or inode > inodes count - " 589 "reserved inode or inode > inodes count - "
593 "block_group = %lu, inode=%lu", group, 590 "block_group = %lu, inode=%lu", group,
594 ino + group * EXT4_INODES_PER_GROUP(sb)); 591 ino + group * EXT4_INODES_PER_GROUP(sb));
@@ -664,11 +661,9 @@ got:
664 cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino); 661 cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino);
665 } 662 }
666 663
667 gdp->bg_free_inodes_count = 664 le16_add_cpu(&gdp->bg_free_inodes_count, -1);
668 cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
669 if (S_ISDIR(mode)) { 665 if (S_ISDIR(mode)) {
670 gdp->bg_used_dirs_count = 666 le16_add_cpu(&gdp->bg_used_dirs_count, 1);
671 cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
672 } 667 }
673 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); 668 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
674 spin_unlock(sb_bgl_lock(sbi, group)); 669 spin_unlock(sb_bgl_lock(sbi, group));
@@ -744,23 +739,24 @@ got:
744 if (err) 739 if (err)
745 goto fail_free_drop; 740 goto fail_free_drop;
746 741
747 err = ext4_mark_inode_dirty(handle, inode);
748 if (err) {
749 ext4_std_error(sb, err);
750 goto fail_free_drop;
751 }
752 if (test_opt(sb, EXTENTS)) { 742 if (test_opt(sb, EXTENTS)) {
753 /* set extent flag only for directory and file */ 743 /* set extent flag only for diretory, file and normal symlink*/
754 if (S_ISDIR(mode) || S_ISREG(mode)) { 744 if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) {
755 EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL; 745 EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
756 ext4_ext_tree_init(handle, inode); 746 ext4_ext_tree_init(handle, inode);
757 err = ext4_update_incompat_feature(handle, sb, 747 err = ext4_update_incompat_feature(handle, sb,
758 EXT4_FEATURE_INCOMPAT_EXTENTS); 748 EXT4_FEATURE_INCOMPAT_EXTENTS);
759 if (err) 749 if (err)
760 goto fail; 750 goto fail_free_drop;
761 } 751 }
762 } 752 }
763 753
754 err = ext4_mark_inode_dirty(handle, inode);
755 if (err) {
756 ext4_std_error(sb, err);
757 goto fail_free_drop;
758 }
759
764 ext4_debug("allocating inode %lu\n", inode->i_ino); 760 ext4_debug("allocating inode %lu\n", inode->i_ino);
765 goto really_out; 761 goto really_out;
766fail: 762fail:
@@ -796,7 +792,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
796 792
797 /* Error cases - e2fsck has already cleaned up for us */ 793 /* Error cases - e2fsck has already cleaned up for us */
798 if (ino > max_ino) { 794 if (ino > max_ino) {
799 ext4_warning(sb, __FUNCTION__, 795 ext4_warning(sb, __func__,
800 "bad orphan ino %lu! e2fsck was run?", ino); 796 "bad orphan ino %lu! e2fsck was run?", ino);
801 goto error; 797 goto error;
802 } 798 }
@@ -805,7 +801,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
805 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); 801 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
806 bitmap_bh = read_inode_bitmap(sb, block_group); 802 bitmap_bh = read_inode_bitmap(sb, block_group);
807 if (!bitmap_bh) { 803 if (!bitmap_bh) {
808 ext4_warning(sb, __FUNCTION__, 804 ext4_warning(sb, __func__,
809 "inode bitmap error for orphan %lu", ino); 805 "inode bitmap error for orphan %lu", ino);
810 goto error; 806 goto error;
811 } 807 }
@@ -830,7 +826,7 @@ iget_failed:
830 err = PTR_ERR(inode); 826 err = PTR_ERR(inode);
831 inode = NULL; 827 inode = NULL;
832bad_orphan: 828bad_orphan:
833 ext4_warning(sb, __FUNCTION__, 829 ext4_warning(sb, __func__,
834 "bad orphan inode %lu! e2fsck was run?", ino); 830 "bad orphan inode %lu! e2fsck was run?", ino);
835 printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n", 831 printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n",
836 bit, (unsigned long long)bitmap_bh->b_blocknr, 832 bit, (unsigned long long)bitmap_bh->b_blocknr,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8fab233cb05f..8d9707746413 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -25,7 +25,6 @@
25#include <linux/module.h> 25#include <linux/module.h>
26#include <linux/fs.h> 26#include <linux/fs.h>
27#include <linux/time.h> 27#include <linux/time.h>
28#include <linux/ext4_jbd2.h>
29#include <linux/jbd2.h> 28#include <linux/jbd2.h>
30#include <linux/highuid.h> 29#include <linux/highuid.h>
31#include <linux/pagemap.h> 30#include <linux/pagemap.h>
@@ -36,6 +35,7 @@
36#include <linux/mpage.h> 35#include <linux/mpage.h>
37#include <linux/uio.h> 36#include <linux/uio.h>
38#include <linux/bio.h> 37#include <linux/bio.h>
38#include "ext4_jbd2.h"
39#include "xattr.h" 39#include "xattr.h"
40#include "acl.h" 40#include "acl.h"
41 41
@@ -93,7 +93,7 @@ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
93 BUFFER_TRACE(bh, "call ext4_journal_revoke"); 93 BUFFER_TRACE(bh, "call ext4_journal_revoke");
94 err = ext4_journal_revoke(handle, blocknr, bh); 94 err = ext4_journal_revoke(handle, blocknr, bh);
95 if (err) 95 if (err)
96 ext4_abort(inode->i_sb, __FUNCTION__, 96 ext4_abort(inode->i_sb, __func__,
97 "error %d when attempting revoke", err); 97 "error %d when attempting revoke", err);
98 BUFFER_TRACE(bh, "exit"); 98 BUFFER_TRACE(bh, "exit");
99 return err; 99 return err;
@@ -985,6 +985,16 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
985 } else { 985 } else {
986 retval = ext4_get_blocks_handle(handle, inode, block, 986 retval = ext4_get_blocks_handle(handle, inode, block,
987 max_blocks, bh, create, extend_disksize); 987 max_blocks, bh, create, extend_disksize);
988
989 if (retval > 0 && buffer_new(bh)) {
990 /*
991 * We allocated new blocks which will result in
992 * i_data's format changing. Force the migrate
993 * to fail by clearing migrate flags
994 */
995 EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags &
996 ~EXT4_EXT_MIGRATE;
997 }
988 } 998 }
989 up_write((&EXT4_I(inode)->i_data_sem)); 999 up_write((&EXT4_I(inode)->i_data_sem));
990 return retval; 1000 return retval;
@@ -1230,7 +1240,7 @@ int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
1230{ 1240{
1231 int err = jbd2_journal_dirty_data(handle, bh); 1241 int err = jbd2_journal_dirty_data(handle, bh);
1232 if (err) 1242 if (err)
1233 ext4_journal_abort_handle(__FUNCTION__, __FUNCTION__, 1243 ext4_journal_abort_handle(__func__, __func__,
1234 bh, handle, err); 1244 bh, handle, err);
1235 return err; 1245 return err;
1236} 1246}
@@ -1301,10 +1311,11 @@ static int ext4_ordered_write_end(struct file *file,
1301 new_i_size = pos + copied; 1311 new_i_size = pos + copied;
1302 if (new_i_size > EXT4_I(inode)->i_disksize) 1312 if (new_i_size > EXT4_I(inode)->i_disksize)
1303 EXT4_I(inode)->i_disksize = new_i_size; 1313 EXT4_I(inode)->i_disksize = new_i_size;
1304 copied = ext4_generic_write_end(file, mapping, pos, len, copied, 1314 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
1305 page, fsdata); 1315 page, fsdata);
1306 if (copied < 0) 1316 copied = ret2;
1307 ret = copied; 1317 if (ret2 < 0)
1318 ret = ret2;
1308 } 1319 }
1309 ret2 = ext4_journal_stop(handle); 1320 ret2 = ext4_journal_stop(handle);
1310 if (!ret) 1321 if (!ret)
@@ -1329,10 +1340,11 @@ static int ext4_writeback_write_end(struct file *file,
1329 if (new_i_size > EXT4_I(inode)->i_disksize) 1340 if (new_i_size > EXT4_I(inode)->i_disksize)
1330 EXT4_I(inode)->i_disksize = new_i_size; 1341 EXT4_I(inode)->i_disksize = new_i_size;
1331 1342
1332 copied = ext4_generic_write_end(file, mapping, pos, len, copied, 1343 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
1333 page, fsdata); 1344 page, fsdata);
1334 if (copied < 0) 1345 copied = ret2;
1335 ret = copied; 1346 if (ret2 < 0)
1347 ret = ret2;
1336 1348
1337 ret2 = ext4_journal_stop(handle); 1349 ret2 = ext4_journal_stop(handle);
1338 if (!ret) 1350 if (!ret)
@@ -2501,12 +2513,10 @@ out_stop:
2501static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, 2513static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
2502 unsigned long ino, struct ext4_iloc *iloc) 2514 unsigned long ino, struct ext4_iloc *iloc)
2503{ 2515{
2504 unsigned long desc, group_desc;
2505 ext4_group_t block_group; 2516 ext4_group_t block_group;
2506 unsigned long offset; 2517 unsigned long offset;
2507 ext4_fsblk_t block; 2518 ext4_fsblk_t block;
2508 struct buffer_head *bh; 2519 struct ext4_group_desc *gdp;
2509 struct ext4_group_desc * gdp;
2510 2520
2511 if (!ext4_valid_inum(sb, ino)) { 2521 if (!ext4_valid_inum(sb, ino)) {
2512 /* 2522 /*
@@ -2518,22 +2528,10 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
2518 } 2528 }
2519 2529
2520 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); 2530 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
2521 if (block_group >= EXT4_SB(sb)->s_groups_count) { 2531 gdp = ext4_get_group_desc(sb, block_group, NULL);
2522 ext4_error(sb,"ext4_get_inode_block","group >= groups count"); 2532 if (!gdp)
2523 return 0; 2533 return 0;
2524 }
2525 smp_rmb();
2526 group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
2527 desc = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
2528 bh = EXT4_SB(sb)->s_group_desc[group_desc];
2529 if (!bh) {
2530 ext4_error (sb, "ext4_get_inode_block",
2531 "Descriptor not loaded");
2532 return 0;
2533 }
2534 2534
2535 gdp = (struct ext4_group_desc *)((__u8 *)bh->b_data +
2536 desc * EXT4_DESC_SIZE(sb));
2537 /* 2535 /*
2538 * Figure out the offset within the block group inode table 2536 * Figure out the offset within the block group inode table
2539 */ 2537 */
@@ -2976,7 +2974,8 @@ static int ext4_do_update_inode(handle_t *handle,
2976 if (ext4_inode_blocks_set(handle, raw_inode, ei)) 2974 if (ext4_inode_blocks_set(handle, raw_inode, ei))
2977 goto out_brelse; 2975 goto out_brelse;
2978 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); 2976 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
2979 raw_inode->i_flags = cpu_to_le32(ei->i_flags); 2977 /* clear the migrate flag in the raw_inode */
2978 raw_inode->i_flags = cpu_to_le32(ei->i_flags & ~EXT4_EXT_MIGRATE);
2980 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != 2979 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
2981 cpu_to_le32(EXT4_OS_HURD)) 2980 cpu_to_le32(EXT4_OS_HURD))
2982 raw_inode->i_file_acl_high = 2981 raw_inode->i_file_acl_high =
@@ -3374,7 +3373,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
3374 EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; 3373 EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND;
3375 if (mnt_count != 3374 if (mnt_count !=
3376 le16_to_cpu(sbi->s_es->s_mnt_count)) { 3375 le16_to_cpu(sbi->s_es->s_mnt_count)) {
3377 ext4_warning(inode->i_sb, __FUNCTION__, 3376 ext4_warning(inode->i_sb, __func__,
3378 "Unable to expand inode %lu. Delete" 3377 "Unable to expand inode %lu. Delete"
3379 " some EAs or run e2fsck.", 3378 " some EAs or run e2fsck.",
3380 inode->i_ino); 3379 inode->i_ino);
@@ -3415,7 +3414,7 @@ void ext4_dirty_inode(struct inode *inode)
3415 current_handle->h_transaction != handle->h_transaction) { 3414 current_handle->h_transaction != handle->h_transaction) {
3416 /* This task has a transaction open against a different fs */ 3415 /* This task has a transaction open against a different fs */
3417 printk(KERN_EMERG "%s: transactions do not match!\n", 3416 printk(KERN_EMERG "%s: transactions do not match!\n",
3418 __FUNCTION__); 3417 __func__);
3419 } else { 3418 } else {
3420 jbd_debug(5, "marking dirty. outer handle=%p\n", 3419 jbd_debug(5, "marking dirty. outer handle=%p\n",
3421 current_handle); 3420 current_handle);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 25b13ede8086..7a6c2f1faba6 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -10,17 +10,17 @@
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/jbd2.h> 11#include <linux/jbd2.h>
12#include <linux/capability.h> 12#include <linux/capability.h>
13#include <linux/ext4_fs.h>
14#include <linux/ext4_jbd2.h>
15#include <linux/time.h> 13#include <linux/time.h>
16#include <linux/compat.h> 14#include <linux/compat.h>
17#include <linux/smp_lock.h> 15#include <linux/smp_lock.h>
18#include <linux/mount.h> 16#include <linux/mount.h>
19#include <asm/uaccess.h> 17#include <asm/uaccess.h>
18#include "ext4_jbd2.h"
19#include "ext4.h"
20 20
21int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, 21long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
22 unsigned long arg)
23{ 22{
23 struct inode *inode = filp->f_dentry->d_inode;
24 struct ext4_inode_info *ei = EXT4_I(inode); 24 struct ext4_inode_info *ei = EXT4_I(inode);
25 unsigned int flags; 25 unsigned int flags;
26 unsigned short rsv_window_size; 26 unsigned short rsv_window_size;
@@ -277,9 +277,6 @@ setversion_out:
277#ifdef CONFIG_COMPAT 277#ifdef CONFIG_COMPAT
278long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 278long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
279{ 279{
280 struct inode *inode = file->f_path.dentry->d_inode;
281 int ret;
282
283 /* These are just misnamed, they actually get/put from/to user an int */ 280 /* These are just misnamed, they actually get/put from/to user an int */
284 switch (cmd) { 281 switch (cmd) {
285 case EXT4_IOC32_GETFLAGS: 282 case EXT4_IOC32_GETFLAGS:
@@ -319,9 +316,6 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
319 default: 316 default:
320 return -ENOIOCTLCMD; 317 return -ENOIOCTLCMD;
321 } 318 }
322 lock_kernel(); 319 return ext4_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
323 ret = ext4_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
324 unlock_kernel();
325 return ret;
326} 320}
327#endif 321#endif
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index ef97f19c2f9d..fbec2ef93797 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -21,21 +21,7 @@
21 * mballoc.c contains the multiblocks allocation routines 21 * mballoc.c contains the multiblocks allocation routines
22 */ 22 */
23 23
24#include <linux/time.h> 24#include "mballoc.h"
25#include <linux/fs.h>
26#include <linux/namei.h>
27#include <linux/ext4_jbd2.h>
28#include <linux/ext4_fs.h>
29#include <linux/quotaops.h>
30#include <linux/buffer_head.h>
31#include <linux/module.h>
32#include <linux/swap.h>
33#include <linux/proc_fs.h>
34#include <linux/pagemap.h>
35#include <linux/seq_file.h>
36#include <linux/version.h>
37#include "group.h"
38
39/* 25/*
40 * MUSTDO: 26 * MUSTDO:
41 * - test ext4_ext_search_left() and ext4_ext_search_right() 27 * - test ext4_ext_search_left() and ext4_ext_search_right()
@@ -345,288 +331,6 @@
345 * 331 *
346 */ 332 */
347 333
348/*
349 * with AGGRESSIVE_CHECK allocator runs consistency checks over
350 * structures. these checks slow things down a lot
351 */
352#define AGGRESSIVE_CHECK__
353
354/*
355 * with DOUBLE_CHECK defined mballoc creates persistent in-core
356 * bitmaps, maintains and uses them to check for double allocations
357 */
358#define DOUBLE_CHECK__
359
360/*
361 */
362#define MB_DEBUG__
363#ifdef MB_DEBUG
364#define mb_debug(fmt, a...) printk(fmt, ##a)
365#else
366#define mb_debug(fmt, a...)
367#endif
368
369/*
370 * with EXT4_MB_HISTORY mballoc stores last N allocations in memory
371 * and you can monitor it in /proc/fs/ext4/<dev>/mb_history
372 */
373#define EXT4_MB_HISTORY
374#define EXT4_MB_HISTORY_ALLOC 1 /* allocation */
375#define EXT4_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */
376#define EXT4_MB_HISTORY_DISCARD 4 /* preallocation discarded */
377#define EXT4_MB_HISTORY_FREE 8 /* free */
378
379#define EXT4_MB_HISTORY_DEFAULT (EXT4_MB_HISTORY_ALLOC | \
380 EXT4_MB_HISTORY_PREALLOC)
381
382/*
383 * How long mballoc can look for a best extent (in found extents)
384 */
385#define MB_DEFAULT_MAX_TO_SCAN 200
386
387/*
388 * How long mballoc must look for a best extent
389 */
390#define MB_DEFAULT_MIN_TO_SCAN 10
391
392/*
393 * How many groups mballoc will scan looking for the best chunk
394 */
395#define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5
396
397/*
398 * with 'ext4_mb_stats' allocator will collect stats that will be
399 * shown at umount. The collecting costs though!
400 */
401#define MB_DEFAULT_STATS 1
402
403/*
404 * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served
405 * by the stream allocator, which purpose is to pack requests
406 * as close each to other as possible to produce smooth I/O traffic
407 * We use locality group prealloc space for stream request.
408 * We can tune the same via /proc/fs/ext4/<parition>/stream_req
409 */
410#define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */
411
412/*
413 * for which requests use 2^N search using buddies
414 */
415#define MB_DEFAULT_ORDER2_REQS 2
416
417/*
418 * default group prealloc size 512 blocks
419 */
420#define MB_DEFAULT_GROUP_PREALLOC 512
421
422static struct kmem_cache *ext4_pspace_cachep;
423static struct kmem_cache *ext4_ac_cachep;
424
425#ifdef EXT4_BB_MAX_BLOCKS
426#undef EXT4_BB_MAX_BLOCKS
427#endif
428#define EXT4_BB_MAX_BLOCKS 30
429
430struct ext4_free_metadata {
431 ext4_group_t group;
432 unsigned short num;
433 ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS];
434 struct list_head list;
435};
436
437struct ext4_group_info {
438 unsigned long bb_state;
439 unsigned long bb_tid;
440 struct ext4_free_metadata *bb_md_cur;
441 unsigned short bb_first_free;
442 unsigned short bb_free;
443 unsigned short bb_fragments;
444 struct list_head bb_prealloc_list;
445#ifdef DOUBLE_CHECK
446 void *bb_bitmap;
447#endif
448 unsigned short bb_counters[];
449};
450
451#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
452#define EXT4_GROUP_INFO_LOCKED_BIT 1
453
454#define EXT4_MB_GRP_NEED_INIT(grp) \
455 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
456
457
458struct ext4_prealloc_space {
459 struct list_head pa_inode_list;
460 struct list_head pa_group_list;
461 union {
462 struct list_head pa_tmp_list;
463 struct rcu_head pa_rcu;
464 } u;
465 spinlock_t pa_lock;
466 atomic_t pa_count;
467 unsigned pa_deleted;
468 ext4_fsblk_t pa_pstart; /* phys. block */
469 ext4_lblk_t pa_lstart; /* log. block */
470 unsigned short pa_len; /* len of preallocated chunk */
471 unsigned short pa_free; /* how many blocks are free */
472 unsigned short pa_linear; /* consumed in one direction
473 * strictly, for grp prealloc */
474 spinlock_t *pa_obj_lock;
475 struct inode *pa_inode; /* hack, for history only */
476};
477
478
479struct ext4_free_extent {
480 ext4_lblk_t fe_logical;
481 ext4_grpblk_t fe_start;
482 ext4_group_t fe_group;
483 int fe_len;
484};
485
486/*
487 * Locality group:
488 * we try to group all related changes together
489 * so that writeback can flush/allocate them together as well
490 */
491struct ext4_locality_group {
492 /* for allocator */
493 struct mutex lg_mutex; /* to serialize allocates */
494 struct list_head lg_prealloc_list;/* list of preallocations */
495 spinlock_t lg_prealloc_lock;
496};
497
498struct ext4_allocation_context {
499 struct inode *ac_inode;
500 struct super_block *ac_sb;
501
502 /* original request */
503 struct ext4_free_extent ac_o_ex;
504
505 /* goal request (after normalization) */
506 struct ext4_free_extent ac_g_ex;
507
508 /* the best found extent */
509 struct ext4_free_extent ac_b_ex;
510
511 /* copy of the bext found extent taken before preallocation efforts */
512 struct ext4_free_extent ac_f_ex;
513
514 /* number of iterations done. we have to track to limit searching */
515 unsigned long ac_ex_scanned;
516 __u16 ac_groups_scanned;
517 __u16 ac_found;
518 __u16 ac_tail;
519 __u16 ac_buddy;
520 __u16 ac_flags; /* allocation hints */
521 __u8 ac_status;
522 __u8 ac_criteria;
523 __u8 ac_repeats;
524 __u8 ac_2order; /* if request is to allocate 2^N blocks and
525 * N > 0, the field stores N, otherwise 0 */
526 __u8 ac_op; /* operation, for history only */
527 struct page *ac_bitmap_page;
528 struct page *ac_buddy_page;
529 struct ext4_prealloc_space *ac_pa;
530 struct ext4_locality_group *ac_lg;
531};
532
533#define AC_STATUS_CONTINUE 1
534#define AC_STATUS_FOUND 2
535#define AC_STATUS_BREAK 3
536
537struct ext4_mb_history {
538 struct ext4_free_extent orig; /* orig allocation */
539 struct ext4_free_extent goal; /* goal allocation */
540 struct ext4_free_extent result; /* result allocation */
541 unsigned pid;
542 unsigned ino;
543 __u16 found; /* how many extents have been found */
544 __u16 groups; /* how many groups have been scanned */
545 __u16 tail; /* what tail broke some buddy */
546 __u16 buddy; /* buddy the tail ^^^ broke */
547 __u16 flags;
548 __u8 cr:3; /* which phase the result extent was found at */
549 __u8 op:4;
550 __u8 merged:1;
551};
552
553struct ext4_buddy {
554 struct page *bd_buddy_page;
555 void *bd_buddy;
556 struct page *bd_bitmap_page;
557 void *bd_bitmap;
558 struct ext4_group_info *bd_info;
559 struct super_block *bd_sb;
560 __u16 bd_blkbits;
561 ext4_group_t bd_group;
562};
563#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap)
564#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy)
565
566#ifndef EXT4_MB_HISTORY
567static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
568{
569 return;
570}
571#else
572static void ext4_mb_store_history(struct ext4_allocation_context *ac);
573#endif
574
575#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
576
577static struct proc_dir_entry *proc_root_ext4;
578struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
579ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
580 ext4_fsblk_t goal, unsigned long *count, int *errp);
581
582static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
583 ext4_group_t group);
584static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
585static void ext4_mb_free_committed_blocks(struct super_block *);
586static void ext4_mb_return_to_preallocation(struct inode *inode,
587 struct ext4_buddy *e4b, sector_t block,
588 int count);
589static void ext4_mb_put_pa(struct ext4_allocation_context *,
590 struct super_block *, struct ext4_prealloc_space *pa);
591static int ext4_mb_init_per_dev_proc(struct super_block *sb);
592static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
593
594
595static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
596{
597 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
598
599 bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
600}
601
602static inline void ext4_unlock_group(struct super_block *sb,
603 ext4_group_t group)
604{
605 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
606
607 bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
608}
609
610static inline int ext4_is_group_locked(struct super_block *sb,
611 ext4_group_t group)
612{
613 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
614
615 return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
616 &(grinfo->bb_state));
617}
618
619static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
620 struct ext4_free_extent *fex)
621{
622 ext4_fsblk_t block;
623
624 block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb)
625 + fex->fe_start
626 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
627 return block;
628}
629
630static inline void *mb_correct_addr_and_bit(int *bit, void *addr) 334static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
631{ 335{
632#if BITS_PER_LONG == 64 336#if BITS_PER_LONG == 64
@@ -736,7 +440,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
736 blocknr += 440 blocknr +=
737 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 441 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
738 442
739 ext4_error(sb, __FUNCTION__, "double-free of inode" 443 ext4_error(sb, __func__, "double-free of inode"
740 " %lu's block %llu(bit %u in group %lu)\n", 444 " %lu's block %llu(bit %u in group %lu)\n",
741 inode ? inode->i_ino : 0, blocknr, 445 inode ? inode->i_ino : 0, blocknr,
742 first + i, e4b->bd_group); 446 first + i, e4b->bd_group);
@@ -898,17 +602,17 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
898 list_for_each(cur, &grp->bb_prealloc_list) { 602 list_for_each(cur, &grp->bb_prealloc_list) {
899 ext4_group_t groupnr; 603 ext4_group_t groupnr;
900 struct ext4_prealloc_space *pa; 604 struct ext4_prealloc_space *pa;
901 pa = list_entry(cur, struct ext4_prealloc_space, group_list); 605 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
902 ext4_get_group_no_and_offset(sb, pa->pstart, &groupnr, &k); 606 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
903 MB_CHECK_ASSERT(groupnr == e4b->bd_group); 607 MB_CHECK_ASSERT(groupnr == e4b->bd_group);
904 for (i = 0; i < pa->len; i++) 608 for (i = 0; i < pa->pa_len; i++)
905 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy)); 609 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
906 } 610 }
907 return 0; 611 return 0;
908} 612}
909#undef MB_CHECK_ASSERT 613#undef MB_CHECK_ASSERT
910#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \ 614#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \
911 __FILE__, __FUNCTION__, __LINE__) 615 __FILE__, __func__, __LINE__)
912#else 616#else
913#define mb_check_buddy(e4b) 617#define mb_check_buddy(e4b)
914#endif 618#endif
@@ -982,7 +686,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
982 grp->bb_fragments = fragments; 686 grp->bb_fragments = fragments;
983 687
984 if (free != grp->bb_free) { 688 if (free != grp->bb_free) {
985 ext4_error(sb, __FUNCTION__, 689 ext4_error(sb, __func__,
986 "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n", 690 "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n",
987 group, free, grp->bb_free); 691 group, free, grp->bb_free);
988 /* 692 /*
@@ -1168,8 +872,9 @@ out:
1168 return err; 872 return err;
1169} 873}
1170 874
1171static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, 875static noinline_for_stack int
1172 struct ext4_buddy *e4b) 876ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
877 struct ext4_buddy *e4b)
1173{ 878{
1174 struct ext4_sb_info *sbi = EXT4_SB(sb); 879 struct ext4_sb_info *sbi = EXT4_SB(sb);
1175 struct inode *inode = sbi->s_buddy_cache; 880 struct inode *inode = sbi->s_buddy_cache;
@@ -1367,7 +1072,7 @@ static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1367 blocknr += 1072 blocknr +=
1368 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 1073 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
1369 1074
1370 ext4_error(sb, __FUNCTION__, "double-free of inode" 1075 ext4_error(sb, __func__, "double-free of inode"
1371 " %lu's block %llu(bit %u in group %lu)\n", 1076 " %lu's block %llu(bit %u in group %lu)\n",
1372 inode ? inode->i_ino : 0, blocknr, block, 1077 inode ? inode->i_ino : 0, blocknr, block,
1373 e4b->bd_group); 1078 e4b->bd_group);
@@ -1848,7 +1553,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1848 * free blocks even though group info says we 1553 * free blocks even though group info says we
1849 * we have free blocks 1554 * we have free blocks
1850 */ 1555 */
1851 ext4_error(sb, __FUNCTION__, "%d free blocks as per " 1556 ext4_error(sb, __func__, "%d free blocks as per "
1852 "group info. But bitmap says 0\n", 1557 "group info. But bitmap says 0\n",
1853 free); 1558 free);
1854 break; 1559 break;
@@ -1857,7 +1562,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1857 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); 1562 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
1858 BUG_ON(ex.fe_len <= 0); 1563 BUG_ON(ex.fe_len <= 0);
1859 if (free < ex.fe_len) { 1564 if (free < ex.fe_len) {
1860 ext4_error(sb, __FUNCTION__, "%d free blocks as per " 1565 ext4_error(sb, __func__, "%d free blocks as per "
1861 "group info. But got %d blocks\n", 1566 "group info. But got %d blocks\n",
1862 free, ex.fe_len); 1567 free, ex.fe_len);
1863 /* 1568 /*
@@ -1965,7 +1670,8 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1965 return 0; 1670 return 0;
1966} 1671}
1967 1672
1968static int ext4_mb_regular_allocator(struct ext4_allocation_context *ac) 1673static noinline_for_stack int
1674ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1969{ 1675{
1970 ext4_group_t group; 1676 ext4_group_t group;
1971 ext4_group_t i; 1677 ext4_group_t i;
@@ -2449,17 +2155,10 @@ static void ext4_mb_history_init(struct super_block *sb)
2449 int i; 2155 int i;
2450 2156
2451 if (sbi->s_mb_proc != NULL) { 2157 if (sbi->s_mb_proc != NULL) {
2452 struct proc_dir_entry *p; 2158 proc_create_data("mb_history", S_IRUGO, sbi->s_mb_proc,
2453 p = create_proc_entry("mb_history", S_IRUGO, sbi->s_mb_proc); 2159 &ext4_mb_seq_history_fops, sb);
2454 if (p) { 2160 proc_create_data("mb_groups", S_IRUGO, sbi->s_mb_proc,
2455 p->proc_fops = &ext4_mb_seq_history_fops; 2161 &ext4_mb_seq_groups_fops, sb);
2456 p->data = sb;
2457 }
2458 p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc);
2459 if (p) {
2460 p->proc_fops = &ext4_mb_seq_groups_fops;
2461 p->data = sb;
2462 }
2463 } 2162 }
2464 2163
2465 sbi->s_mb_history_max = 1000; 2164 sbi->s_mb_history_max = 1000;
@@ -2472,7 +2171,8 @@ static void ext4_mb_history_init(struct super_block *sb)
2472 /* if we can't allocate history, then we simple won't use it */ 2171 /* if we can't allocate history, then we simple won't use it */
2473} 2172}
2474 2173
2475static void ext4_mb_store_history(struct ext4_allocation_context *ac) 2174static noinline_for_stack void
2175ext4_mb_store_history(struct ext4_allocation_context *ac)
2476{ 2176{
2477 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); 2177 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2478 struct ext4_mb_history h; 2178 struct ext4_mb_history h;
@@ -2572,13 +2272,13 @@ static int ext4_mb_init_backend(struct super_block *sb)
2572 meta_group_info[j] = kzalloc(len, GFP_KERNEL); 2272 meta_group_info[j] = kzalloc(len, GFP_KERNEL);
2573 if (meta_group_info[j] == NULL) { 2273 if (meta_group_info[j] == NULL) {
2574 printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); 2274 printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
2575 i--;
2576 goto err_freebuddy; 2275 goto err_freebuddy;
2577 } 2276 }
2578 desc = ext4_get_group_desc(sb, i, NULL); 2277 desc = ext4_get_group_desc(sb, i, NULL);
2579 if (desc == NULL) { 2278 if (desc == NULL) {
2580 printk(KERN_ERR 2279 printk(KERN_ERR
2581 "EXT4-fs: can't read descriptor %lu\n", i); 2280 "EXT4-fs: can't read descriptor %lu\n", i);
2281 i++;
2582 goto err_freebuddy; 2282 goto err_freebuddy;
2583 } 2283 }
2584 memset(meta_group_info[j], 0, len); 2284 memset(meta_group_info[j], 0, len);
@@ -2618,13 +2318,11 @@ static int ext4_mb_init_backend(struct super_block *sb)
2618 return 0; 2318 return 0;
2619 2319
2620err_freebuddy: 2320err_freebuddy:
2621 while (i >= 0) { 2321 while (i-- > 0)
2622 kfree(ext4_get_group_info(sb, i)); 2322 kfree(ext4_get_group_info(sb, i));
2623 i--;
2624 }
2625 i = num_meta_group_infos; 2323 i = num_meta_group_infos;
2626err_freemeta: 2324err_freemeta:
2627 while (--i >= 0) 2325 while (i-- > 0)
2628 kfree(sbi->s_group_info[i]); 2326 kfree(sbi->s_group_info[i]);
2629 iput(sbi->s_buddy_cache); 2327 iput(sbi->s_buddy_cache);
2630err_freesgi: 2328err_freesgi:
@@ -2808,7 +2506,8 @@ int ext4_mb_release(struct super_block *sb)
2808 return 0; 2506 return 0;
2809} 2507}
2810 2508
2811static void ext4_mb_free_committed_blocks(struct super_block *sb) 2509static noinline_for_stack void
2510ext4_mb_free_committed_blocks(struct super_block *sb)
2812{ 2511{
2813 struct ext4_sb_info *sbi = EXT4_SB(sb); 2512 struct ext4_sb_info *sbi = EXT4_SB(sb);
2814 int err; 2513 int err;
@@ -2867,7 +2566,6 @@ static void ext4_mb_free_committed_blocks(struct super_block *sb)
2867 mb_debug("freed %u blocks in %u structures\n", count, count2); 2566 mb_debug("freed %u blocks in %u structures\n", count, count2);
2868} 2567}
2869 2568
2870#define EXT4_ROOT "ext4"
2871#define EXT4_MB_STATS_NAME "stats" 2569#define EXT4_MB_STATS_NAME "stats"
2872#define EXT4_MB_MAX_TO_SCAN_NAME "max_to_scan" 2570#define EXT4_MB_MAX_TO_SCAN_NAME "max_to_scan"
2873#define EXT4_MB_MIN_TO_SCAN_NAME "min_to_scan" 2571#define EXT4_MB_MIN_TO_SCAN_NAME "min_to_scan"
@@ -3007,9 +2705,9 @@ int __init init_ext4_mballoc(void)
3007 return -ENOMEM; 2705 return -ENOMEM;
3008 } 2706 }
3009#ifdef CONFIG_PROC_FS 2707#ifdef CONFIG_PROC_FS
3010 proc_root_ext4 = proc_mkdir(EXT4_ROOT, proc_root_fs); 2708 proc_root_ext4 = proc_mkdir("fs/ext4", NULL);
3011 if (proc_root_ext4 == NULL) 2709 if (proc_root_ext4 == NULL)
3012 printk(KERN_ERR "EXT4-fs: Unable to create %s\n", EXT4_ROOT); 2710 printk(KERN_ERR "EXT4-fs: Unable to create fs/ext4\n");
3013#endif 2711#endif
3014 return 0; 2712 return 0;
3015} 2713}
@@ -3020,7 +2718,7 @@ void exit_ext4_mballoc(void)
3020 kmem_cache_destroy(ext4_pspace_cachep); 2718 kmem_cache_destroy(ext4_pspace_cachep);
3021 kmem_cache_destroy(ext4_ac_cachep); 2719 kmem_cache_destroy(ext4_ac_cachep);
3022#ifdef CONFIG_PROC_FS 2720#ifdef CONFIG_PROC_FS
3023 remove_proc_entry(EXT4_ROOT, proc_root_fs); 2721 remove_proc_entry("fs/ext4", NULL);
3024#endif 2722#endif
3025} 2723}
3026 2724
@@ -3029,7 +2727,8 @@ void exit_ext4_mballoc(void)
3029 * Check quota and mark choosed space (ac->ac_b_ex) non-free in bitmaps 2727 * Check quota and mark choosed space (ac->ac_b_ex) non-free in bitmaps
3030 * Returns 0 if success or error code 2728 * Returns 0 if success or error code
3031 */ 2729 */
3032static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, 2730static noinline_for_stack int
2731ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3033 handle_t *handle) 2732 handle_t *handle)
3034{ 2733{
3035 struct buffer_head *bitmap_bh = NULL; 2734 struct buffer_head *bitmap_bh = NULL;
@@ -3078,7 +2777,7 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3078 in_range(block, ext4_inode_table(sb, gdp), 2777 in_range(block, ext4_inode_table(sb, gdp),
3079 EXT4_SB(sb)->s_itb_per_group)) { 2778 EXT4_SB(sb)->s_itb_per_group)) {
3080 2779
3081 ext4_error(sb, __FUNCTION__, 2780 ext4_error(sb, __func__,
3082 "Allocating block in system zone - block = %llu", 2781 "Allocating block in system zone - block = %llu",
3083 block); 2782 block);
3084 } 2783 }
@@ -3102,9 +2801,7 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3102 ac->ac_b_ex.fe_group, 2801 ac->ac_b_ex.fe_group,
3103 gdp)); 2802 gdp));
3104 } 2803 }
3105 gdp->bg_free_blocks_count = 2804 le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
3106 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)
3107 - ac->ac_b_ex.fe_len);
3108 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); 2805 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
3109 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); 2806 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
3110 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); 2807 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
@@ -3138,7 +2835,7 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
3138 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe; 2835 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe;
3139 else 2836 else
3140 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; 2837 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
3141 mb_debug("#%u: goal %lu blocks for locality group\n", 2838 mb_debug("#%u: goal %u blocks for locality group\n",
3142 current->pid, ac->ac_g_ex.fe_len); 2839 current->pid, ac->ac_g_ex.fe_len);
3143} 2840}
3144 2841
@@ -3146,15 +2843,16 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
3146 * Normalization means making request better in terms of 2843 * Normalization means making request better in terms of
3147 * size and alignment 2844 * size and alignment
3148 */ 2845 */
3149static void ext4_mb_normalize_request(struct ext4_allocation_context *ac, 2846static noinline_for_stack void
2847ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3150 struct ext4_allocation_request *ar) 2848 struct ext4_allocation_request *ar)
3151{ 2849{
3152 int bsbits, max; 2850 int bsbits, max;
3153 ext4_lblk_t end; 2851 ext4_lblk_t end;
3154 struct list_head *cur;
3155 loff_t size, orig_size, start_off; 2852 loff_t size, orig_size, start_off;
3156 ext4_lblk_t start, orig_start; 2853 ext4_lblk_t start, orig_start;
3157 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); 2854 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
2855 struct ext4_prealloc_space *pa;
3158 2856
3159 /* do normalize only data requests, metadata requests 2857 /* do normalize only data requests, metadata requests
3160 do not need preallocation */ 2858 do not need preallocation */
@@ -3240,12 +2938,9 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3240 2938
3241 /* check we don't cross already preallocated blocks */ 2939 /* check we don't cross already preallocated blocks */
3242 rcu_read_lock(); 2940 rcu_read_lock();
3243 list_for_each_rcu(cur, &ei->i_prealloc_list) { 2941 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3244 struct ext4_prealloc_space *pa;
3245 unsigned long pa_end; 2942 unsigned long pa_end;
3246 2943
3247 pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
3248
3249 if (pa->pa_deleted) 2944 if (pa->pa_deleted)
3250 continue; 2945 continue;
3251 spin_lock(&pa->pa_lock); 2946 spin_lock(&pa->pa_lock);
@@ -3287,10 +2982,8 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3287 2982
3288 /* XXX: extra loop to check we really don't overlap preallocations */ 2983 /* XXX: extra loop to check we really don't overlap preallocations */
3289 rcu_read_lock(); 2984 rcu_read_lock();
3290 list_for_each_rcu(cur, &ei->i_prealloc_list) { 2985 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3291 struct ext4_prealloc_space *pa;
3292 unsigned long pa_end; 2986 unsigned long pa_end;
3293 pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
3294 spin_lock(&pa->pa_lock); 2987 spin_lock(&pa->pa_lock);
3295 if (pa->pa_deleted == 0) { 2988 if (pa->pa_deleted == 0) {
3296 pa_end = pa->pa_lstart + pa->pa_len; 2989 pa_end = pa->pa_lstart + pa->pa_len;
@@ -3382,7 +3075,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3382 BUG_ON(pa->pa_free < len); 3075 BUG_ON(pa->pa_free < len);
3383 pa->pa_free -= len; 3076 pa->pa_free -= len;
3384 3077
3385 mb_debug("use %llu/%lu from inode pa %p\n", start, len, pa); 3078 mb_debug("use %llu/%u from inode pa %p\n", start, len, pa);
3386} 3079}
3387 3080
3388/* 3081/*
@@ -3412,12 +3105,12 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3412/* 3105/*
3413 * search goal blocks in preallocated space 3106 * search goal blocks in preallocated space
3414 */ 3107 */
3415static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac) 3108static noinline_for_stack int
3109ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3416{ 3110{
3417 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); 3111 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3418 struct ext4_locality_group *lg; 3112 struct ext4_locality_group *lg;
3419 struct ext4_prealloc_space *pa; 3113 struct ext4_prealloc_space *pa;
3420 struct list_head *cur;
3421 3114
3422 /* only data can be preallocated */ 3115 /* only data can be preallocated */
3423 if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) 3116 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
@@ -3425,8 +3118,7 @@ static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3425 3118
3426 /* first, try per-file preallocation */ 3119 /* first, try per-file preallocation */
3427 rcu_read_lock(); 3120 rcu_read_lock();
3428 list_for_each_rcu(cur, &ei->i_prealloc_list) { 3121 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3429 pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
3430 3122
3431 /* all fields in this condition don't change, 3123 /* all fields in this condition don't change,
3432 * so we can skip locking for them */ 3124 * so we can skip locking for them */
@@ -3458,8 +3150,7 @@ static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3458 return 0; 3150 return 0;
3459 3151
3460 rcu_read_lock(); 3152 rcu_read_lock();
3461 list_for_each_rcu(cur, &lg->lg_prealloc_list) { 3153 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list, pa_inode_list) {
3462 pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
3463 spin_lock(&pa->pa_lock); 3154 spin_lock(&pa->pa_lock);
3464 if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) { 3155 if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) {
3465 atomic_inc(&pa->pa_count); 3156 atomic_inc(&pa->pa_count);
@@ -3579,7 +3270,8 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
3579/* 3270/*
3580 * creates new preallocated space for given inode 3271 * creates new preallocated space for given inode
3581 */ 3272 */
3582static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) 3273static noinline_for_stack int
3274ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3583{ 3275{
3584 struct super_block *sb = ac->ac_sb; 3276 struct super_block *sb = ac->ac_sb;
3585 struct ext4_prealloc_space *pa; 3277 struct ext4_prealloc_space *pa;
@@ -3666,7 +3358,8 @@ static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3666/* 3358/*
3667 * creates new preallocated space for locality group inodes belongs to 3359 * creates new preallocated space for locality group inodes belongs to
3668 */ 3360 */
3669static int ext4_mb_new_group_pa(struct ext4_allocation_context *ac) 3361static noinline_for_stack int
3362ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3670{ 3363{
3671 struct super_block *sb = ac->ac_sb; 3364 struct super_block *sb = ac->ac_sb;
3672 struct ext4_locality_group *lg; 3365 struct ext4_locality_group *lg;
@@ -3739,11 +3432,11 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
3739 * the caller MUST hold group/inode locks. 3432 * the caller MUST hold group/inode locks.
3740 * TODO: optimize the case when there are no in-core structures yet 3433 * TODO: optimize the case when there are no in-core structures yet
3741 */ 3434 */
3742static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b, 3435static noinline_for_stack int
3743 struct buffer_head *bitmap_bh, 3436ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3744 struct ext4_prealloc_space *pa) 3437 struct ext4_prealloc_space *pa,
3438 struct ext4_allocation_context *ac)
3745{ 3439{
3746 struct ext4_allocation_context *ac;
3747 struct super_block *sb = e4b->bd_sb; 3440 struct super_block *sb = e4b->bd_sb;
3748 struct ext4_sb_info *sbi = EXT4_SB(sb); 3441 struct ext4_sb_info *sbi = EXT4_SB(sb);
3749 unsigned long end; 3442 unsigned long end;
@@ -3759,8 +3452,6 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
3759 BUG_ON(group != e4b->bd_group && pa->pa_len != 0); 3452 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3760 end = bit + pa->pa_len; 3453 end = bit + pa->pa_len;
3761 3454
3762 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3763
3764 if (ac) { 3455 if (ac) {
3765 ac->ac_sb = sb; 3456 ac->ac_sb = sb;
3766 ac->ac_inode = pa->pa_inode; 3457 ac->ac_inode = pa->pa_inode;
@@ -3797,7 +3488,7 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
3797 pa, (unsigned long) pa->pa_lstart, 3488 pa, (unsigned long) pa->pa_lstart,
3798 (unsigned long) pa->pa_pstart, 3489 (unsigned long) pa->pa_pstart,
3799 (unsigned long) pa->pa_len); 3490 (unsigned long) pa->pa_len);
3800 ext4_error(sb, __FUNCTION__, "free %u, pa_free %u\n", 3491 ext4_error(sb, __func__, "free %u, pa_free %u\n",
3801 free, pa->pa_free); 3492 free, pa->pa_free);
3802 /* 3493 /*
3803 * pa is already deleted so we use the value obtained 3494 * pa is already deleted so we use the value obtained
@@ -3805,22 +3496,19 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
3805 */ 3496 */
3806 } 3497 }
3807 atomic_add(free, &sbi->s_mb_discarded); 3498 atomic_add(free, &sbi->s_mb_discarded);
3808 if (ac)
3809 kmem_cache_free(ext4_ac_cachep, ac);
3810 3499
3811 return err; 3500 return err;
3812} 3501}
3813 3502
3814static int ext4_mb_release_group_pa(struct ext4_buddy *e4b, 3503static noinline_for_stack int
3815 struct ext4_prealloc_space *pa) 3504ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3505 struct ext4_prealloc_space *pa,
3506 struct ext4_allocation_context *ac)
3816{ 3507{
3817 struct ext4_allocation_context *ac;
3818 struct super_block *sb = e4b->bd_sb; 3508 struct super_block *sb = e4b->bd_sb;
3819 ext4_group_t group; 3509 ext4_group_t group;
3820 ext4_grpblk_t bit; 3510 ext4_grpblk_t bit;
3821 3511
3822 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3823
3824 if (ac) 3512 if (ac)
3825 ac->ac_op = EXT4_MB_HISTORY_DISCARD; 3513 ac->ac_op = EXT4_MB_HISTORY_DISCARD;
3826 3514
@@ -3838,7 +3526,6 @@ static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3838 ac->ac_b_ex.fe_len = pa->pa_len; 3526 ac->ac_b_ex.fe_len = pa->pa_len;
3839 ac->ac_b_ex.fe_logical = 0; 3527 ac->ac_b_ex.fe_logical = 0;
3840 ext4_mb_store_history(ac); 3528 ext4_mb_store_history(ac);
3841 kmem_cache_free(ext4_ac_cachep, ac);
3842 } 3529 }
3843 3530
3844 return 0; 3531 return 0;
@@ -3853,12 +3540,14 @@ static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3853 * - how many do we discard 3540 * - how many do we discard
3854 * 1) how many requested 3541 * 1) how many requested
3855 */ 3542 */
3856static int ext4_mb_discard_group_preallocations(struct super_block *sb, 3543static noinline_for_stack int
3544ext4_mb_discard_group_preallocations(struct super_block *sb,
3857 ext4_group_t group, int needed) 3545 ext4_group_t group, int needed)
3858{ 3546{
3859 struct ext4_group_info *grp = ext4_get_group_info(sb, group); 3547 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3860 struct buffer_head *bitmap_bh = NULL; 3548 struct buffer_head *bitmap_bh = NULL;
3861 struct ext4_prealloc_space *pa, *tmp; 3549 struct ext4_prealloc_space *pa, *tmp;
3550 struct ext4_allocation_context *ac;
3862 struct list_head list; 3551 struct list_head list;
3863 struct ext4_buddy e4b; 3552 struct ext4_buddy e4b;
3864 int err; 3553 int err;
@@ -3886,6 +3575,7 @@ static int ext4_mb_discard_group_preallocations(struct super_block *sb,
3886 grp = ext4_get_group_info(sb, group); 3575 grp = ext4_get_group_info(sb, group);
3887 INIT_LIST_HEAD(&list); 3576 INIT_LIST_HEAD(&list);
3888 3577
3578 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3889repeat: 3579repeat:
3890 ext4_lock_group(sb, group); 3580 ext4_lock_group(sb, group);
3891 list_for_each_entry_safe(pa, tmp, 3581 list_for_each_entry_safe(pa, tmp,
@@ -3940,9 +3630,9 @@ repeat:
3940 spin_unlock(pa->pa_obj_lock); 3630 spin_unlock(pa->pa_obj_lock);
3941 3631
3942 if (pa->pa_linear) 3632 if (pa->pa_linear)
3943 ext4_mb_release_group_pa(&e4b, pa); 3633 ext4_mb_release_group_pa(&e4b, pa, ac);
3944 else 3634 else
3945 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); 3635 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
3946 3636
3947 list_del(&pa->u.pa_tmp_list); 3637 list_del(&pa->u.pa_tmp_list);
3948 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); 3638 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
@@ -3950,6 +3640,8 @@ repeat:
3950 3640
3951out: 3641out:
3952 ext4_unlock_group(sb, group); 3642 ext4_unlock_group(sb, group);
3643 if (ac)
3644 kmem_cache_free(ext4_ac_cachep, ac);
3953 ext4_mb_release_desc(&e4b); 3645 ext4_mb_release_desc(&e4b);
3954 put_bh(bitmap_bh); 3646 put_bh(bitmap_bh);
3955 return free; 3647 return free;
@@ -3970,6 +3662,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode)
3970 struct super_block *sb = inode->i_sb; 3662 struct super_block *sb = inode->i_sb;
3971 struct buffer_head *bitmap_bh = NULL; 3663 struct buffer_head *bitmap_bh = NULL;
3972 struct ext4_prealloc_space *pa, *tmp; 3664 struct ext4_prealloc_space *pa, *tmp;
3665 struct ext4_allocation_context *ac;
3973 ext4_group_t group = 0; 3666 ext4_group_t group = 0;
3974 struct list_head list; 3667 struct list_head list;
3975 struct ext4_buddy e4b; 3668 struct ext4_buddy e4b;
@@ -3984,6 +3677,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode)
3984 3677
3985 INIT_LIST_HEAD(&list); 3678 INIT_LIST_HEAD(&list);
3986 3679
3680 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3987repeat: 3681repeat:
3988 /* first, collect all pa's in the inode */ 3682 /* first, collect all pa's in the inode */
3989 spin_lock(&ei->i_prealloc_lock); 3683 spin_lock(&ei->i_prealloc_lock);
@@ -4048,7 +3742,7 @@ repeat:
4048 3742
4049 ext4_lock_group(sb, group); 3743 ext4_lock_group(sb, group);
4050 list_del(&pa->pa_group_list); 3744 list_del(&pa->pa_group_list);
4051 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); 3745 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
4052 ext4_unlock_group(sb, group); 3746 ext4_unlock_group(sb, group);
4053 3747
4054 ext4_mb_release_desc(&e4b); 3748 ext4_mb_release_desc(&e4b);
@@ -4057,6 +3751,8 @@ repeat:
4057 list_del(&pa->u.pa_tmp_list); 3751 list_del(&pa->u.pa_tmp_list);
4058 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); 3752 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4059 } 3753 }
3754 if (ac)
3755 kmem_cache_free(ext4_ac_cachep, ac);
4060} 3756}
4061 3757
4062/* 3758/*
@@ -4116,7 +3812,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4116 printk(KERN_ERR "PA:%lu:%d:%u \n", i, 3812 printk(KERN_ERR "PA:%lu:%d:%u \n", i,
4117 start, pa->pa_len); 3813 start, pa->pa_len);
4118 } 3814 }
4119 ext4_lock_group(sb, i); 3815 ext4_unlock_group(sb, i);
4120 3816
4121 if (grp->bb_free == 0) 3817 if (grp->bb_free == 0)
4122 continue; 3818 continue;
@@ -4175,7 +3871,8 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
4175 mutex_lock(&ac->ac_lg->lg_mutex); 3871 mutex_lock(&ac->ac_lg->lg_mutex);
4176} 3872}
4177 3873
4178static int ext4_mb_initialize_context(struct ext4_allocation_context *ac, 3874static noinline_for_stack int
3875ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4179 struct ext4_allocation_request *ar) 3876 struct ext4_allocation_request *ar)
4180{ 3877{
4181 struct super_block *sb = ar->inode->i_sb; 3878 struct super_block *sb = ar->inode->i_sb;
@@ -4406,7 +4103,8 @@ static void ext4_mb_poll_new_transaction(struct super_block *sb,
4406 ext4_mb_free_committed_blocks(sb); 4103 ext4_mb_free_committed_blocks(sb);
4407} 4104}
4408 4105
4409static int ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, 4106static noinline_for_stack int
4107ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4410 ext4_group_t group, ext4_grpblk_t block, int count) 4108 ext4_group_t group, ext4_grpblk_t block, int count)
4411{ 4109{
4412 struct ext4_group_info *db = e4b->bd_info; 4110 struct ext4_group_info *db = e4b->bd_info;
@@ -4497,7 +4195,7 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4497 if (block < le32_to_cpu(es->s_first_data_block) || 4195 if (block < le32_to_cpu(es->s_first_data_block) ||
4498 block + count < block || 4196 block + count < block ||
4499 block + count > ext4_blocks_count(es)) { 4197 block + count > ext4_blocks_count(es)) {
4500 ext4_error(sb, __FUNCTION__, 4198 ext4_error(sb, __func__,
4501 "Freeing blocks not in datazone - " 4199 "Freeing blocks not in datazone - "
4502 "block = %lu, count = %lu", block, count); 4200 "block = %lu, count = %lu", block, count);
4503 goto error_return; 4201 goto error_return;
@@ -4538,7 +4236,7 @@ do_more:
4538 in_range(block + count - 1, ext4_inode_table(sb, gdp), 4236 in_range(block + count - 1, ext4_inode_table(sb, gdp),
4539 EXT4_SB(sb)->s_itb_per_group)) { 4237 EXT4_SB(sb)->s_itb_per_group)) {
4540 4238
4541 ext4_error(sb, __FUNCTION__, 4239 ext4_error(sb, __func__,
4542 "Freeing blocks in system zone - " 4240 "Freeing blocks in system zone - "
4543 "Block = %lu, count = %lu", block, count); 4241 "Block = %lu, count = %lu", block, count);
4544 } 4242 }
@@ -4596,8 +4294,7 @@ do_more:
4596 } 4294 }
4597 4295
4598 spin_lock(sb_bgl_lock(sbi, block_group)); 4296 spin_lock(sb_bgl_lock(sbi, block_group));
4599 gdp->bg_free_blocks_count = 4297 le16_add_cpu(&gdp->bg_free_blocks_count, count);
4600 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
4601 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); 4298 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
4602 spin_unlock(sb_bgl_lock(sbi, block_group)); 4299 spin_unlock(sb_bgl_lock(sbi, block_group));
4603 percpu_counter_add(&sbi->s_freeblocks_counter, count); 4300 percpu_counter_add(&sbi->s_freeblocks_counter, count);
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
new file mode 100644
index 000000000000..bfe6add46bcf
--- /dev/null
+++ b/fs/ext4/mballoc.h
@@ -0,0 +1,304 @@
1/*
2 * fs/ext4/mballoc.h
3 *
4 * Written by: Alex Tomas <alex@clusterfs.com>
5 *
6 */
7#ifndef _EXT4_MBALLOC_H
8#define _EXT4_MBALLOC_H
9
10#include <linux/time.h>
11#include <linux/fs.h>
12#include <linux/namei.h>
13#include <linux/quotaops.h>
14#include <linux/buffer_head.h>
15#include <linux/module.h>
16#include <linux/swap.h>
17#include <linux/proc_fs.h>
18#include <linux/pagemap.h>
19#include <linux/seq_file.h>
20#include <linux/version.h>
21#include "ext4_jbd2.h"
22#include "ext4.h"
23#include "group.h"
24
25/*
26 * with AGGRESSIVE_CHECK allocator runs consistency checks over
27 * structures. these checks slow things down a lot
28 */
29#define AGGRESSIVE_CHECK__
30
31/*
32 * with DOUBLE_CHECK defined mballoc creates persistent in-core
33 * bitmaps, maintains and uses them to check for double allocations
34 */
35#define DOUBLE_CHECK__
36
37/*
38 */
39#define MB_DEBUG__
40#ifdef MB_DEBUG
41#define mb_debug(fmt, a...) printk(fmt, ##a)
42#else
43#define mb_debug(fmt, a...)
44#endif
45
46/*
47 * with EXT4_MB_HISTORY mballoc stores last N allocations in memory
48 * and you can monitor it in /proc/fs/ext4/<dev>/mb_history
49 */
50#define EXT4_MB_HISTORY
51#define EXT4_MB_HISTORY_ALLOC 1 /* allocation */
52#define EXT4_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */
53#define EXT4_MB_HISTORY_DISCARD 4 /* preallocation discarded */
54#define EXT4_MB_HISTORY_FREE 8 /* free */
55
56#define EXT4_MB_HISTORY_DEFAULT (EXT4_MB_HISTORY_ALLOC | \
57 EXT4_MB_HISTORY_PREALLOC)
58
59/*
60 * How long mballoc can look for a best extent (in found extents)
61 */
62#define MB_DEFAULT_MAX_TO_SCAN 200
63
64/*
65 * How long mballoc must look for a best extent
66 */
67#define MB_DEFAULT_MIN_TO_SCAN 10
68
69/*
70 * How many groups mballoc will scan looking for the best chunk
71 */
72#define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5
73
74/*
75 * with 'ext4_mb_stats' allocator will collect stats that will be
76 * shown at umount. The collecting costs though!
77 */
78#define MB_DEFAULT_STATS 1
79
80/*
81 * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served
82 * by the stream allocator, which purpose is to pack requests
83 * as close each to other as possible to produce smooth I/O traffic
84 * We use locality group prealloc space for stream request.
85 * We can tune the same via /proc/fs/ext4/<parition>/stream_req
86 */
87#define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */
88
89/*
90 * for which requests use 2^N search using buddies
91 */
92#define MB_DEFAULT_ORDER2_REQS 2
93
94/*
95 * default group prealloc size 512 blocks
96 */
97#define MB_DEFAULT_GROUP_PREALLOC 512
98
99static struct kmem_cache *ext4_pspace_cachep;
100static struct kmem_cache *ext4_ac_cachep;
101
102#ifdef EXT4_BB_MAX_BLOCKS
103#undef EXT4_BB_MAX_BLOCKS
104#endif
105#define EXT4_BB_MAX_BLOCKS 30
106
107struct ext4_free_metadata {
108 ext4_group_t group;
109 unsigned short num;
110 ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS];
111 struct list_head list;
112};
113
114struct ext4_group_info {
115 unsigned long bb_state;
116 unsigned long bb_tid;
117 struct ext4_free_metadata *bb_md_cur;
118 unsigned short bb_first_free;
119 unsigned short bb_free;
120 unsigned short bb_fragments;
121 struct list_head bb_prealloc_list;
122#ifdef DOUBLE_CHECK
123 void *bb_bitmap;
124#endif
125 unsigned short bb_counters[];
126};
127
128#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
129#define EXT4_GROUP_INFO_LOCKED_BIT 1
130
131#define EXT4_MB_GRP_NEED_INIT(grp) \
132 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
133
134
135struct ext4_prealloc_space {
136 struct list_head pa_inode_list;
137 struct list_head pa_group_list;
138 union {
139 struct list_head pa_tmp_list;
140 struct rcu_head pa_rcu;
141 } u;
142 spinlock_t pa_lock;
143 atomic_t pa_count;
144 unsigned pa_deleted;
145 ext4_fsblk_t pa_pstart; /* phys. block */
146 ext4_lblk_t pa_lstart; /* log. block */
147 unsigned short pa_len; /* len of preallocated chunk */
148 unsigned short pa_free; /* how many blocks are free */
149 unsigned short pa_linear; /* consumed in one direction
150 * strictly, for grp prealloc */
151 spinlock_t *pa_obj_lock;
152 struct inode *pa_inode; /* hack, for history only */
153};
154
155
156struct ext4_free_extent {
157 ext4_lblk_t fe_logical;
158 ext4_grpblk_t fe_start;
159 ext4_group_t fe_group;
160 int fe_len;
161};
162
163/*
164 * Locality group:
165 * we try to group all related changes together
166 * so that writeback can flush/allocate them together as well
167 */
168struct ext4_locality_group {
169 /* for allocator */
170 struct mutex lg_mutex; /* to serialize allocates */
171 struct list_head lg_prealloc_list;/* list of preallocations */
172 spinlock_t lg_prealloc_lock;
173};
174
175struct ext4_allocation_context {
176 struct inode *ac_inode;
177 struct super_block *ac_sb;
178
179 /* original request */
180 struct ext4_free_extent ac_o_ex;
181
182 /* goal request (after normalization) */
183 struct ext4_free_extent ac_g_ex;
184
185 /* the best found extent */
186 struct ext4_free_extent ac_b_ex;
187
188 /* copy of the bext found extent taken before preallocation efforts */
189 struct ext4_free_extent ac_f_ex;
190
191 /* number of iterations done. we have to track to limit searching */
192 unsigned long ac_ex_scanned;
193 __u16 ac_groups_scanned;
194 __u16 ac_found;
195 __u16 ac_tail;
196 __u16 ac_buddy;
197 __u16 ac_flags; /* allocation hints */
198 __u8 ac_status;
199 __u8 ac_criteria;
200 __u8 ac_repeats;
201 __u8 ac_2order; /* if request is to allocate 2^N blocks and
202 * N > 0, the field stores N, otherwise 0 */
203 __u8 ac_op; /* operation, for history only */
204 struct page *ac_bitmap_page;
205 struct page *ac_buddy_page;
206 struct ext4_prealloc_space *ac_pa;
207 struct ext4_locality_group *ac_lg;
208};
209
210#define AC_STATUS_CONTINUE 1
211#define AC_STATUS_FOUND 2
212#define AC_STATUS_BREAK 3
213
214struct ext4_mb_history {
215 struct ext4_free_extent orig; /* orig allocation */
216 struct ext4_free_extent goal; /* goal allocation */
217 struct ext4_free_extent result; /* result allocation */
218 unsigned pid;
219 unsigned ino;
220 __u16 found; /* how many extents have been found */
221 __u16 groups; /* how many groups have been scanned */
222 __u16 tail; /* what tail broke some buddy */
223 __u16 buddy; /* buddy the tail ^^^ broke */
224 __u16 flags;
225 __u8 cr:3; /* which phase the result extent was found at */
226 __u8 op:4;
227 __u8 merged:1;
228};
229
230struct ext4_buddy {
231 struct page *bd_buddy_page;
232 void *bd_buddy;
233 struct page *bd_bitmap_page;
234 void *bd_bitmap;
235 struct ext4_group_info *bd_info;
236 struct super_block *bd_sb;
237 __u16 bd_blkbits;
238 ext4_group_t bd_group;
239};
240#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap)
241#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy)
242
243#ifndef EXT4_MB_HISTORY
244static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
245{
246 return;
247}
248#else
249static void ext4_mb_store_history(struct ext4_allocation_context *ac);
250#endif
251
252#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
253
254static struct proc_dir_entry *proc_root_ext4;
255struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
256
257static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
258 ext4_group_t group);
259static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
260static void ext4_mb_free_committed_blocks(struct super_block *);
261static void ext4_mb_return_to_preallocation(struct inode *inode,
262 struct ext4_buddy *e4b, sector_t block,
263 int count);
264static void ext4_mb_put_pa(struct ext4_allocation_context *,
265 struct super_block *, struct ext4_prealloc_space *pa);
266static int ext4_mb_init_per_dev_proc(struct super_block *sb);
267static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
268
269
270static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
271{
272 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
273
274 bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
275}
276
277static inline void ext4_unlock_group(struct super_block *sb,
278 ext4_group_t group)
279{
280 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
281
282 bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
283}
284
285static inline int ext4_is_group_locked(struct super_block *sb,
286 ext4_group_t group)
287{
288 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
289
290 return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
291 &(grinfo->bb_state));
292}
293
294static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
295 struct ext4_free_extent *fex)
296{
297 ext4_fsblk_t block;
298
299 block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb)
300 + fex->fe_start
301 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
302 return block;
303}
304#endif
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 5c1e27de7755..b9e077ba07e9 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -13,8 +13,8 @@
13 */ 13 */
14 14
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/ext4_jbd2.h> 16#include "ext4_jbd2.h"
17#include <linux/ext4_fs_extents.h> 17#include "ext4_extents.h"
18 18
19/* 19/*
20 * The contiguous blocks details which can be 20 * The contiguous blocks details which can be
@@ -327,7 +327,7 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
327} 327}
328 328
329static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, 329static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
330 struct inode *tmp_inode) 330 struct inode *tmp_inode)
331{ 331{
332 int retval; 332 int retval;
333 __le32 i_data[3]; 333 __le32 i_data[3];
@@ -339,7 +339,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
339 * i_data field of the original inode 339 * i_data field of the original inode
340 */ 340 */
341 retval = ext4_journal_extend(handle, 1); 341 retval = ext4_journal_extend(handle, 1);
342 if (retval != 0) { 342 if (retval) {
343 retval = ext4_journal_restart(handle, 1); 343 retval = ext4_journal_restart(handle, 1);
344 if (retval) 344 if (retval)
345 goto err_out; 345 goto err_out;
@@ -351,6 +351,18 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
351 351
352 down_write(&EXT4_I(inode)->i_data_sem); 352 down_write(&EXT4_I(inode)->i_data_sem);
353 /* 353 /*
354 * if EXT4_EXT_MIGRATE is cleared a block allocation
355 * happened after we started the migrate. We need to
356 * fail the migrate
357 */
358 if (!(EXT4_I(inode)->i_flags & EXT4_EXT_MIGRATE)) {
359 retval = -EAGAIN;
360 up_write(&EXT4_I(inode)->i_data_sem);
361 goto err_out;
362 } else
363 EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags &
364 ~EXT4_EXT_MIGRATE;
365 /*
354 * We have the extent map build with the tmp inode. 366 * We have the extent map build with the tmp inode.
355 * Now copy the i_data across 367 * Now copy the i_data across
356 */ 368 */
@@ -508,6 +520,17 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
508 * switch the inode format to prevent read. 520 * switch the inode format to prevent read.
509 */ 521 */
510 mutex_lock(&(inode->i_mutex)); 522 mutex_lock(&(inode->i_mutex));
523 /*
524 * Even though we take i_mutex we can still cause block allocation
525 * via mmap write to holes. If we have allocated new blocks we fail
526 * migrate. New block allocation will clear EXT4_EXT_MIGRATE flag.
527 * The flag is updated with i_data_sem held to prevent racing with
528 * block allocation.
529 */
530 down_read((&EXT4_I(inode)->i_data_sem));
531 EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags | EXT4_EXT_MIGRATE;
532 up_read((&EXT4_I(inode)->i_data_sem));
533
511 handle = ext4_journal_start(inode, 1); 534 handle = ext4_journal_start(inode, 1);
512 535
513 ei = EXT4_I(inode); 536 ei = EXT4_I(inode);
@@ -559,9 +582,15 @@ err_out:
559 * tmp_inode 582 * tmp_inode
560 */ 583 */
561 free_ext_block(handle, tmp_inode); 584 free_ext_block(handle, tmp_inode);
562 else 585 else {
563 retval = ext4_ext_swap_inode_data(handle, inode, 586 retval = ext4_ext_swap_inode_data(handle, inode, tmp_inode);
564 tmp_inode); 587 if (retval)
588 /*
589 * if we fail to swap inode data free the extent
590 * details of the tmp inode
591 */
592 free_ext_block(handle, tmp_inode);
593 }
565 594
566 /* We mark the tmp_inode dirty via ext4_ext_tree_init. */ 595 /* We mark the tmp_inode dirty via ext4_ext_tree_init. */
567 if (ext4_journal_extend(handle, 1) != 0) 596 if (ext4_journal_extend(handle, 1) != 0)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 28aa2ed4297e..ab16beaa830d 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -28,14 +28,14 @@
28#include <linux/pagemap.h> 28#include <linux/pagemap.h>
29#include <linux/jbd2.h> 29#include <linux/jbd2.h>
30#include <linux/time.h> 30#include <linux/time.h>
31#include <linux/ext4_fs.h>
32#include <linux/ext4_jbd2.h>
33#include <linux/fcntl.h> 31#include <linux/fcntl.h>
34#include <linux/stat.h> 32#include <linux/stat.h>
35#include <linux/string.h> 33#include <linux/string.h>
36#include <linux/quotaops.h> 34#include <linux/quotaops.h>
37#include <linux/buffer_head.h> 35#include <linux/buffer_head.h>
38#include <linux/bio.h> 36#include <linux/bio.h>
37#include "ext4.h"
38#include "ext4_jbd2.h"
39 39
40#include "namei.h" 40#include "namei.h"
41#include "xattr.h" 41#include "xattr.h"
@@ -57,10 +57,15 @@ static struct buffer_head *ext4_append(handle_t *handle,
57 57
58 *block = inode->i_size >> inode->i_sb->s_blocksize_bits; 58 *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
59 59
60 if ((bh = ext4_bread(handle, inode, *block, 1, err))) { 60 bh = ext4_bread(handle, inode, *block, 1, err);
61 if (bh) {
61 inode->i_size += inode->i_sb->s_blocksize; 62 inode->i_size += inode->i_sb->s_blocksize;
62 EXT4_I(inode)->i_disksize = inode->i_size; 63 EXT4_I(inode)->i_disksize = inode->i_size;
63 ext4_journal_get_write_access(handle,bh); 64 *err = ext4_journal_get_write_access(handle, bh);
65 if (*err) {
66 brelse(bh);
67 bh = NULL;
68 }
64 } 69 }
65 return bh; 70 return bh;
66} 71}
@@ -348,7 +353,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
348 if (root->info.hash_version != DX_HASH_TEA && 353 if (root->info.hash_version != DX_HASH_TEA &&
349 root->info.hash_version != DX_HASH_HALF_MD4 && 354 root->info.hash_version != DX_HASH_HALF_MD4 &&
350 root->info.hash_version != DX_HASH_LEGACY) { 355 root->info.hash_version != DX_HASH_LEGACY) {
351 ext4_warning(dir->i_sb, __FUNCTION__, 356 ext4_warning(dir->i_sb, __func__,
352 "Unrecognised inode hash code %d", 357 "Unrecognised inode hash code %d",
353 root->info.hash_version); 358 root->info.hash_version);
354 brelse(bh); 359 brelse(bh);
@@ -362,7 +367,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
362 hash = hinfo->hash; 367 hash = hinfo->hash;
363 368
364 if (root->info.unused_flags & 1) { 369 if (root->info.unused_flags & 1) {
365 ext4_warning(dir->i_sb, __FUNCTION__, 370 ext4_warning(dir->i_sb, __func__,
366 "Unimplemented inode hash flags: %#06x", 371 "Unimplemented inode hash flags: %#06x",
367 root->info.unused_flags); 372 root->info.unused_flags);
368 brelse(bh); 373 brelse(bh);
@@ -371,7 +376,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
371 } 376 }
372 377
373 if ((indirect = root->info.indirect_levels) > 1) { 378 if ((indirect = root->info.indirect_levels) > 1) {
374 ext4_warning(dir->i_sb, __FUNCTION__, 379 ext4_warning(dir->i_sb, __func__,
375 "Unimplemented inode hash depth: %#06x", 380 "Unimplemented inode hash depth: %#06x",
376 root->info.indirect_levels); 381 root->info.indirect_levels);
377 brelse(bh); 382 brelse(bh);
@@ -384,7 +389,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
384 389
385 if (dx_get_limit(entries) != dx_root_limit(dir, 390 if (dx_get_limit(entries) != dx_root_limit(dir,
386 root->info.info_length)) { 391 root->info.info_length)) {
387 ext4_warning(dir->i_sb, __FUNCTION__, 392 ext4_warning(dir->i_sb, __func__,
388 "dx entry: limit != root limit"); 393 "dx entry: limit != root limit");
389 brelse(bh); 394 brelse(bh);
390 *err = ERR_BAD_DX_DIR; 395 *err = ERR_BAD_DX_DIR;
@@ -396,7 +401,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
396 { 401 {
397 count = dx_get_count(entries); 402 count = dx_get_count(entries);
398 if (!count || count > dx_get_limit(entries)) { 403 if (!count || count > dx_get_limit(entries)) {
399 ext4_warning(dir->i_sb, __FUNCTION__, 404 ext4_warning(dir->i_sb, __func__,
400 "dx entry: no count or count > limit"); 405 "dx entry: no count or count > limit");
401 brelse(bh); 406 brelse(bh);
402 *err = ERR_BAD_DX_DIR; 407 *err = ERR_BAD_DX_DIR;
@@ -441,7 +446,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
441 goto fail2; 446 goto fail2;
442 at = entries = ((struct dx_node *) bh->b_data)->entries; 447 at = entries = ((struct dx_node *) bh->b_data)->entries;
443 if (dx_get_limit(entries) != dx_node_limit (dir)) { 448 if (dx_get_limit(entries) != dx_node_limit (dir)) {
444 ext4_warning(dir->i_sb, __FUNCTION__, 449 ext4_warning(dir->i_sb, __func__,
445 "dx entry: limit != node limit"); 450 "dx entry: limit != node limit");
446 brelse(bh); 451 brelse(bh);
447 *err = ERR_BAD_DX_DIR; 452 *err = ERR_BAD_DX_DIR;
@@ -457,7 +462,7 @@ fail2:
457 } 462 }
458fail: 463fail:
459 if (*err == ERR_BAD_DX_DIR) 464 if (*err == ERR_BAD_DX_DIR)
460 ext4_warning(dir->i_sb, __FUNCTION__, 465 ext4_warning(dir->i_sb, __func__,
461 "Corrupt dir inode %ld, running e2fsck is " 466 "Corrupt dir inode %ld, running e2fsck is "
462 "recommended.", dir->i_ino); 467 "recommended.", dir->i_ino);
463 return NULL; 468 return NULL;
@@ -914,7 +919,7 @@ restart:
914 wait_on_buffer(bh); 919 wait_on_buffer(bh);
915 if (!buffer_uptodate(bh)) { 920 if (!buffer_uptodate(bh)) {
916 /* read error, skip block & hope for the best */ 921 /* read error, skip block & hope for the best */
917 ext4_error(sb, __FUNCTION__, "reading directory #%lu " 922 ext4_error(sb, __func__, "reading directory #%lu "
918 "offset %lu", dir->i_ino, 923 "offset %lu", dir->i_ino,
919 (unsigned long)block); 924 (unsigned long)block);
920 brelse(bh); 925 brelse(bh);
@@ -1007,7 +1012,7 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
1007 retval = ext4_htree_next_block(dir, hash, frame, 1012 retval = ext4_htree_next_block(dir, hash, frame,
1008 frames, NULL); 1013 frames, NULL);
1009 if (retval < 0) { 1014 if (retval < 0) {
1010 ext4_warning(sb, __FUNCTION__, 1015 ext4_warning(sb, __func__,
1011 "error reading index page in directory #%lu", 1016 "error reading index page in directory #%lu",
1012 dir->i_ino); 1017 dir->i_ino);
1013 *err = retval; 1018 *err = retval;
@@ -1532,7 +1537,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1532 1537
1533 if (levels && (dx_get_count(frames->entries) == 1538 if (levels && (dx_get_count(frames->entries) ==
1534 dx_get_limit(frames->entries))) { 1539 dx_get_limit(frames->entries))) {
1535 ext4_warning(sb, __FUNCTION__, 1540 ext4_warning(sb, __func__,
1536 "Directory index full!"); 1541 "Directory index full!");
1537 err = -ENOSPC; 1542 err = -ENOSPC;
1538 goto cleanup; 1543 goto cleanup;
@@ -1860,11 +1865,11 @@ static int empty_dir (struct inode * inode)
1860 if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || 1865 if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
1861 !(bh = ext4_bread (NULL, inode, 0, 0, &err))) { 1866 !(bh = ext4_bread (NULL, inode, 0, 0, &err))) {
1862 if (err) 1867 if (err)
1863 ext4_error(inode->i_sb, __FUNCTION__, 1868 ext4_error(inode->i_sb, __func__,
1864 "error %d reading directory #%lu offset 0", 1869 "error %d reading directory #%lu offset 0",
1865 err, inode->i_ino); 1870 err, inode->i_ino);
1866 else 1871 else
1867 ext4_warning(inode->i_sb, __FUNCTION__, 1872 ext4_warning(inode->i_sb, __func__,
1868 "bad directory (dir #%lu) - no data block", 1873 "bad directory (dir #%lu) - no data block",
1869 inode->i_ino); 1874 inode->i_ino);
1870 return 1; 1875 return 1;
@@ -1893,7 +1898,7 @@ static int empty_dir (struct inode * inode)
1893 offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); 1898 offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err);
1894 if (!bh) { 1899 if (!bh) {
1895 if (err) 1900 if (err)
1896 ext4_error(sb, __FUNCTION__, 1901 ext4_error(sb, __func__,
1897 "error %d reading directory" 1902 "error %d reading directory"
1898 " #%lu offset %lu", 1903 " #%lu offset %lu",
1899 err, inode->i_ino, offset); 1904 err, inode->i_ino, offset);
@@ -2217,6 +2222,8 @@ retry:
2217 goto out_stop; 2222 goto out_stop;
2218 } 2223 }
2219 } else { 2224 } else {
2225 /* clear the extent format for fast symlink */
2226 EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL;
2220 inode->i_op = &ext4_fast_symlink_inode_operations; 2227 inode->i_op = &ext4_fast_symlink_inode_operations;
2221 memcpy((char*)&EXT4_I(inode)->i_data,symname,l); 2228 memcpy((char*)&EXT4_I(inode)->i_data,symname,l);
2222 inode->i_size = l-1; 2229 inode->i_size = l-1;
@@ -2347,6 +2354,9 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
2347 EXT4_FEATURE_INCOMPAT_FILETYPE)) 2354 EXT4_FEATURE_INCOMPAT_FILETYPE))
2348 new_de->file_type = old_de->file_type; 2355 new_de->file_type = old_de->file_type;
2349 new_dir->i_version++; 2356 new_dir->i_version++;
2357 new_dir->i_ctime = new_dir->i_mtime =
2358 ext4_current_time(new_dir);
2359 ext4_mark_inode_dirty(handle, new_dir);
2350 BUFFER_TRACE(new_bh, "call ext4_journal_dirty_metadata"); 2360 BUFFER_TRACE(new_bh, "call ext4_journal_dirty_metadata");
2351 ext4_journal_dirty_metadata(handle, new_bh); 2361 ext4_journal_dirty_metadata(handle, new_bh);
2352 brelse(new_bh); 2362 brelse(new_bh);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index e29efa0f9d62..9f086a6a472b 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -11,11 +11,10 @@
11 11
12#define EXT4FS_DEBUG 12#define EXT4FS_DEBUG
13 13
14#include <linux/ext4_jbd2.h>
15
16#include <linux/errno.h> 14#include <linux/errno.h>
17#include <linux/slab.h> 15#include <linux/slab.h>
18 16
17#include "ext4_jbd2.h"
19#include "group.h" 18#include "group.h"
20 19
21#define outside(b, first, last) ((b) < (first) || (b) >= (last)) 20#define outside(b, first, last) ((b) < (first) || (b) >= (last))
@@ -50,63 +49,63 @@ static int verify_group_input(struct super_block *sb,
50 49
51 ext4_get_group_no_and_offset(sb, start, NULL, &offset); 50 ext4_get_group_no_and_offset(sb, start, NULL, &offset);
52 if (group != sbi->s_groups_count) 51 if (group != sbi->s_groups_count)
53 ext4_warning(sb, __FUNCTION__, 52 ext4_warning(sb, __func__,
54 "Cannot add at group %u (only %lu groups)", 53 "Cannot add at group %u (only %lu groups)",
55 input->group, sbi->s_groups_count); 54 input->group, sbi->s_groups_count);
56 else if (offset != 0) 55 else if (offset != 0)
57 ext4_warning(sb, __FUNCTION__, "Last group not full"); 56 ext4_warning(sb, __func__, "Last group not full");
58 else if (input->reserved_blocks > input->blocks_count / 5) 57 else if (input->reserved_blocks > input->blocks_count / 5)
59 ext4_warning(sb, __FUNCTION__, "Reserved blocks too high (%u)", 58 ext4_warning(sb, __func__, "Reserved blocks too high (%u)",
60 input->reserved_blocks); 59 input->reserved_blocks);
61 else if (free_blocks_count < 0) 60 else if (free_blocks_count < 0)
62 ext4_warning(sb, __FUNCTION__, "Bad blocks count %u", 61 ext4_warning(sb, __func__, "Bad blocks count %u",
63 input->blocks_count); 62 input->blocks_count);
64 else if (!(bh = sb_bread(sb, end - 1))) 63 else if (!(bh = sb_bread(sb, end - 1)))
65 ext4_warning(sb, __FUNCTION__, 64 ext4_warning(sb, __func__,
66 "Cannot read last block (%llu)", 65 "Cannot read last block (%llu)",
67 end - 1); 66 end - 1);
68 else if (outside(input->block_bitmap, start, end)) 67 else if (outside(input->block_bitmap, start, end))
69 ext4_warning(sb, __FUNCTION__, 68 ext4_warning(sb, __func__,
70 "Block bitmap not in group (block %llu)", 69 "Block bitmap not in group (block %llu)",
71 (unsigned long long)input->block_bitmap); 70 (unsigned long long)input->block_bitmap);
72 else if (outside(input->inode_bitmap, start, end)) 71 else if (outside(input->inode_bitmap, start, end))
73 ext4_warning(sb, __FUNCTION__, 72 ext4_warning(sb, __func__,
74 "Inode bitmap not in group (block %llu)", 73 "Inode bitmap not in group (block %llu)",
75 (unsigned long long)input->inode_bitmap); 74 (unsigned long long)input->inode_bitmap);
76 else if (outside(input->inode_table, start, end) || 75 else if (outside(input->inode_table, start, end) ||
77 outside(itend - 1, start, end)) 76 outside(itend - 1, start, end))
78 ext4_warning(sb, __FUNCTION__, 77 ext4_warning(sb, __func__,
79 "Inode table not in group (blocks %llu-%llu)", 78 "Inode table not in group (blocks %llu-%llu)",
80 (unsigned long long)input->inode_table, itend - 1); 79 (unsigned long long)input->inode_table, itend - 1);
81 else if (input->inode_bitmap == input->block_bitmap) 80 else if (input->inode_bitmap == input->block_bitmap)
82 ext4_warning(sb, __FUNCTION__, 81 ext4_warning(sb, __func__,
83 "Block bitmap same as inode bitmap (%llu)", 82 "Block bitmap same as inode bitmap (%llu)",
84 (unsigned long long)input->block_bitmap); 83 (unsigned long long)input->block_bitmap);
85 else if (inside(input->block_bitmap, input->inode_table, itend)) 84 else if (inside(input->block_bitmap, input->inode_table, itend))
86 ext4_warning(sb, __FUNCTION__, 85 ext4_warning(sb, __func__,
87 "Block bitmap (%llu) in inode table (%llu-%llu)", 86 "Block bitmap (%llu) in inode table (%llu-%llu)",
88 (unsigned long long)input->block_bitmap, 87 (unsigned long long)input->block_bitmap,
89 (unsigned long long)input->inode_table, itend - 1); 88 (unsigned long long)input->inode_table, itend - 1);
90 else if (inside(input->inode_bitmap, input->inode_table, itend)) 89 else if (inside(input->inode_bitmap, input->inode_table, itend))
91 ext4_warning(sb, __FUNCTION__, 90 ext4_warning(sb, __func__,
92 "Inode bitmap (%llu) in inode table (%llu-%llu)", 91 "Inode bitmap (%llu) in inode table (%llu-%llu)",
93 (unsigned long long)input->inode_bitmap, 92 (unsigned long long)input->inode_bitmap,
94 (unsigned long long)input->inode_table, itend - 1); 93 (unsigned long long)input->inode_table, itend - 1);
95 else if (inside(input->block_bitmap, start, metaend)) 94 else if (inside(input->block_bitmap, start, metaend))
96 ext4_warning(sb, __FUNCTION__, 95 ext4_warning(sb, __func__,
97 "Block bitmap (%llu) in GDT table" 96 "Block bitmap (%llu) in GDT table"
98 " (%llu-%llu)", 97 " (%llu-%llu)",
99 (unsigned long long)input->block_bitmap, 98 (unsigned long long)input->block_bitmap,
100 start, metaend - 1); 99 start, metaend - 1);
101 else if (inside(input->inode_bitmap, start, metaend)) 100 else if (inside(input->inode_bitmap, start, metaend))
102 ext4_warning(sb, __FUNCTION__, 101 ext4_warning(sb, __func__,
103 "Inode bitmap (%llu) in GDT table" 102 "Inode bitmap (%llu) in GDT table"
104 " (%llu-%llu)", 103 " (%llu-%llu)",
105 (unsigned long long)input->inode_bitmap, 104 (unsigned long long)input->inode_bitmap,
106 start, metaend - 1); 105 start, metaend - 1);
107 else if (inside(input->inode_table, start, metaend) || 106 else if (inside(input->inode_table, start, metaend) ||
108 inside(itend - 1, start, metaend)) 107 inside(itend - 1, start, metaend))
109 ext4_warning(sb, __FUNCTION__, 108 ext4_warning(sb, __func__,
110 "Inode table (%llu-%llu) overlaps" 109 "Inode table (%llu-%llu) overlaps"
111 "GDT table (%llu-%llu)", 110 "GDT table (%llu-%llu)",
112 (unsigned long long)input->inode_table, 111 (unsigned long long)input->inode_table,
@@ -368,7 +367,7 @@ static int verify_reserved_gdb(struct super_block *sb,
368 while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) { 367 while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) {
369 if (le32_to_cpu(*p++) != 368 if (le32_to_cpu(*p++) !=
370 grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){ 369 grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){
371 ext4_warning(sb, __FUNCTION__, 370 ext4_warning(sb, __func__,
372 "reserved GDT %llu" 371 "reserved GDT %llu"
373 " missing grp %d (%llu)", 372 " missing grp %d (%llu)",
374 blk, grp, 373 blk, grp,
@@ -424,7 +423,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
424 */ 423 */
425 if (EXT4_SB(sb)->s_sbh->b_blocknr != 424 if (EXT4_SB(sb)->s_sbh->b_blocknr !=
426 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { 425 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
427 ext4_warning(sb, __FUNCTION__, 426 ext4_warning(sb, __func__,
428 "won't resize using backup superblock at %llu", 427 "won't resize using backup superblock at %llu",
429 (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr); 428 (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr);
430 return -EPERM; 429 return -EPERM;
@@ -448,7 +447,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
448 447
449 data = (__le32 *)dind->b_data; 448 data = (__le32 *)dind->b_data;
450 if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) { 449 if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) {
451 ext4_warning(sb, __FUNCTION__, 450 ext4_warning(sb, __func__,
452 "new group %u GDT block %llu not reserved", 451 "new group %u GDT block %llu not reserved",
453 input->group, gdblock); 452 input->group, gdblock);
454 err = -EINVAL; 453 err = -EINVAL;
@@ -469,10 +468,10 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
469 goto exit_dindj; 468 goto exit_dindj;
470 469
471 n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *), 470 n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
472 GFP_KERNEL); 471 GFP_NOFS);
473 if (!n_group_desc) { 472 if (!n_group_desc) {
474 err = -ENOMEM; 473 err = -ENOMEM;
475 ext4_warning (sb, __FUNCTION__, 474 ext4_warning(sb, __func__,
476 "not enough memory for %lu groups", gdb_num + 1); 475 "not enough memory for %lu groups", gdb_num + 1);
477 goto exit_inode; 476 goto exit_inode;
478 } 477 }
@@ -502,8 +501,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
502 EXT4_SB(sb)->s_gdb_count++; 501 EXT4_SB(sb)->s_gdb_count++;
503 kfree(o_group_desc); 502 kfree(o_group_desc);
504 503
505 es->s_reserved_gdt_blocks = 504 le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
506 cpu_to_le16(le16_to_cpu(es->s_reserved_gdt_blocks) - 1);
507 ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); 505 ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
508 506
509 return 0; 507 return 0;
@@ -553,7 +551,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
553 int res, i; 551 int res, i;
554 int err; 552 int err;
555 553
556 primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_KERNEL); 554 primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_NOFS);
557 if (!primary) 555 if (!primary)
558 return -ENOMEM; 556 return -ENOMEM;
559 557
@@ -571,7 +569,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
571 /* Get each reserved primary GDT block and verify it holds backups */ 569 /* Get each reserved primary GDT block and verify it holds backups */
572 for (res = 0; res < reserved_gdb; res++, blk++) { 570 for (res = 0; res < reserved_gdb; res++, blk++) {
573 if (le32_to_cpu(*data) != blk) { 571 if (le32_to_cpu(*data) != blk) {
574 ext4_warning(sb, __FUNCTION__, 572 ext4_warning(sb, __func__,
575 "reserved block %llu" 573 "reserved block %llu"
576 " not at offset %ld", 574 " not at offset %ld",
577 blk, 575 blk,
@@ -715,7 +713,7 @@ static void update_backups(struct super_block *sb,
715 */ 713 */
716exit_err: 714exit_err:
717 if (err) { 715 if (err) {
718 ext4_warning(sb, __FUNCTION__, 716 ext4_warning(sb, __func__,
719 "can't update backup for group %lu (err %d), " 717 "can't update backup for group %lu (err %d), "
720 "forcing fsck on next reboot", group, err); 718 "forcing fsck on next reboot", group, err);
721 sbi->s_mount_state &= ~EXT4_VALID_FS; 719 sbi->s_mount_state &= ~EXT4_VALID_FS;
@@ -755,33 +753,33 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
755 753
756 if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb, 754 if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb,
757 EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) { 755 EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
758 ext4_warning(sb, __FUNCTION__, 756 ext4_warning(sb, __func__,
759 "Can't resize non-sparse filesystem further"); 757 "Can't resize non-sparse filesystem further");
760 return -EPERM; 758 return -EPERM;
761 } 759 }
762 760
763 if (ext4_blocks_count(es) + input->blocks_count < 761 if (ext4_blocks_count(es) + input->blocks_count <
764 ext4_blocks_count(es)) { 762 ext4_blocks_count(es)) {
765 ext4_warning(sb, __FUNCTION__, "blocks_count overflow\n"); 763 ext4_warning(sb, __func__, "blocks_count overflow\n");
766 return -EINVAL; 764 return -EINVAL;
767 } 765 }
768 766
769 if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) < 767 if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) <
770 le32_to_cpu(es->s_inodes_count)) { 768 le32_to_cpu(es->s_inodes_count)) {
771 ext4_warning(sb, __FUNCTION__, "inodes_count overflow\n"); 769 ext4_warning(sb, __func__, "inodes_count overflow\n");
772 return -EINVAL; 770 return -EINVAL;
773 } 771 }
774 772
775 if (reserved_gdb || gdb_off == 0) { 773 if (reserved_gdb || gdb_off == 0) {
776 if (!EXT4_HAS_COMPAT_FEATURE(sb, 774 if (!EXT4_HAS_COMPAT_FEATURE(sb,
777 EXT4_FEATURE_COMPAT_RESIZE_INODE)){ 775 EXT4_FEATURE_COMPAT_RESIZE_INODE)){
778 ext4_warning(sb, __FUNCTION__, 776 ext4_warning(sb, __func__,
779 "No reserved GDT blocks, can't resize"); 777 "No reserved GDT blocks, can't resize");
780 return -EPERM; 778 return -EPERM;
781 } 779 }
782 inode = ext4_iget(sb, EXT4_RESIZE_INO); 780 inode = ext4_iget(sb, EXT4_RESIZE_INO);
783 if (IS_ERR(inode)) { 781 if (IS_ERR(inode)) {
784 ext4_warning(sb, __FUNCTION__, 782 ext4_warning(sb, __func__,
785 "Error opening resize inode"); 783 "Error opening resize inode");
786 return PTR_ERR(inode); 784 return PTR_ERR(inode);
787 } 785 }
@@ -810,7 +808,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
810 808
811 lock_super(sb); 809 lock_super(sb);
812 if (input->group != sbi->s_groups_count) { 810 if (input->group != sbi->s_groups_count) {
813 ext4_warning(sb, __FUNCTION__, 811 ext4_warning(sb, __func__,
814 "multiple resizers run on filesystem!"); 812 "multiple resizers run on filesystem!");
815 err = -EBUSY; 813 err = -EBUSY;
816 goto exit_journal; 814 goto exit_journal;
@@ -877,8 +875,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
877 */ 875 */
878 ext4_blocks_count_set(es, ext4_blocks_count(es) + 876 ext4_blocks_count_set(es, ext4_blocks_count(es) +
879 input->blocks_count); 877 input->blocks_count);
880 es->s_inodes_count = cpu_to_le32(le32_to_cpu(es->s_inodes_count) + 878 le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb));
881 EXT4_INODES_PER_GROUP(sb));
882 879
883 /* 880 /*
884 * We need to protect s_groups_count against other CPUs seeing 881 * We need to protect s_groups_count against other CPUs seeing
@@ -977,13 +974,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
977 " too large to resize to %llu blocks safely\n", 974 " too large to resize to %llu blocks safely\n",
978 sb->s_id, n_blocks_count); 975 sb->s_id, n_blocks_count);
979 if (sizeof(sector_t) < 8) 976 if (sizeof(sector_t) < 8)
980 ext4_warning(sb, __FUNCTION__, 977 ext4_warning(sb, __func__,
981 "CONFIG_LBD not enabled\n"); 978 "CONFIG_LBD not enabled\n");
982 return -EINVAL; 979 return -EINVAL;
983 } 980 }
984 981
985 if (n_blocks_count < o_blocks_count) { 982 if (n_blocks_count < o_blocks_count) {
986 ext4_warning(sb, __FUNCTION__, 983 ext4_warning(sb, __func__,
987 "can't shrink FS - resize aborted"); 984 "can't shrink FS - resize aborted");
988 return -EBUSY; 985 return -EBUSY;
989 } 986 }
@@ -992,7 +989,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
992 ext4_get_group_no_and_offset(sb, o_blocks_count, NULL, &last); 989 ext4_get_group_no_and_offset(sb, o_blocks_count, NULL, &last);
993 990
994 if (last == 0) { 991 if (last == 0) {
995 ext4_warning(sb, __FUNCTION__, 992 ext4_warning(sb, __func__,
996 "need to use ext2online to resize further"); 993 "need to use ext2online to resize further");
997 return -EPERM; 994 return -EPERM;
998 } 995 }
@@ -1000,7 +997,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1000 add = EXT4_BLOCKS_PER_GROUP(sb) - last; 997 add = EXT4_BLOCKS_PER_GROUP(sb) - last;
1001 998
1002 if (o_blocks_count + add < o_blocks_count) { 999 if (o_blocks_count + add < o_blocks_count) {
1003 ext4_warning(sb, __FUNCTION__, "blocks_count overflow"); 1000 ext4_warning(sb, __func__, "blocks_count overflow");
1004 return -EINVAL; 1001 return -EINVAL;
1005 } 1002 }
1006 1003
@@ -1008,7 +1005,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1008 add = n_blocks_count - o_blocks_count; 1005 add = n_blocks_count - o_blocks_count;
1009 1006
1010 if (o_blocks_count + add < n_blocks_count) 1007 if (o_blocks_count + add < n_blocks_count)
1011 ext4_warning(sb, __FUNCTION__, 1008 ext4_warning(sb, __func__,
1012 "will only finish group (%llu" 1009 "will only finish group (%llu"
1013 " blocks, %u new)", 1010 " blocks, %u new)",
1014 o_blocks_count + add, add); 1011 o_blocks_count + add, add);
@@ -1016,7 +1013,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1016 /* See if the device is actually as big as what was requested */ 1013 /* See if the device is actually as big as what was requested */
1017 bh = sb_bread(sb, o_blocks_count + add -1); 1014 bh = sb_bread(sb, o_blocks_count + add -1);
1018 if (!bh) { 1015 if (!bh) {
1019 ext4_warning(sb, __FUNCTION__, 1016 ext4_warning(sb, __func__,
1020 "can't read last block, resize aborted"); 1017 "can't read last block, resize aborted");
1021 return -ENOSPC; 1018 return -ENOSPC;
1022 } 1019 }
@@ -1028,13 +1025,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1028 handle = ext4_journal_start_sb(sb, 3); 1025 handle = ext4_journal_start_sb(sb, 3);
1029 if (IS_ERR(handle)) { 1026 if (IS_ERR(handle)) {
1030 err = PTR_ERR(handle); 1027 err = PTR_ERR(handle);
1031 ext4_warning(sb, __FUNCTION__, "error %d on journal start",err); 1028 ext4_warning(sb, __func__, "error %d on journal start", err);
1032 goto exit_put; 1029 goto exit_put;
1033 } 1030 }
1034 1031
1035 lock_super(sb); 1032 lock_super(sb);
1036 if (o_blocks_count != ext4_blocks_count(es)) { 1033 if (o_blocks_count != ext4_blocks_count(es)) {
1037 ext4_warning(sb, __FUNCTION__, 1034 ext4_warning(sb, __func__,
1038 "multiple resizers run on filesystem!"); 1035 "multiple resizers run on filesystem!");
1039 unlock_super(sb); 1036 unlock_super(sb);
1040 ext4_journal_stop(handle); 1037 ext4_journal_stop(handle);
@@ -1044,7 +1041,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1044 1041
1045 if ((err = ext4_journal_get_write_access(handle, 1042 if ((err = ext4_journal_get_write_access(handle,
1046 EXT4_SB(sb)->s_sbh))) { 1043 EXT4_SB(sb)->s_sbh))) {
1047 ext4_warning(sb, __FUNCTION__, 1044 ext4_warning(sb, __func__,
1048 "error %d on journal write access", err); 1045 "error %d on journal write access", err);
1049 unlock_super(sb); 1046 unlock_super(sb);
1050 ext4_journal_stop(handle); 1047 ext4_journal_stop(handle);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 13383ba18f1d..52dd0679a4e2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -21,8 +21,6 @@
21#include <linux/fs.h> 21#include <linux/fs.h>
22#include <linux/time.h> 22#include <linux/time.h>
23#include <linux/jbd2.h> 23#include <linux/jbd2.h>
24#include <linux/ext4_fs.h>
25#include <linux/ext4_jbd2.h>
26#include <linux/slab.h> 24#include <linux/slab.h>
27#include <linux/init.h> 25#include <linux/init.h>
28#include <linux/blkdev.h> 26#include <linux/blkdev.h>
@@ -38,9 +36,10 @@
38#include <linux/seq_file.h> 36#include <linux/seq_file.h>
39#include <linux/log2.h> 37#include <linux/log2.h>
40#include <linux/crc16.h> 38#include <linux/crc16.h>
41
42#include <asm/uaccess.h> 39#include <asm/uaccess.h>
43 40
41#include "ext4.h"
42#include "ext4_jbd2.h"
44#include "xattr.h" 43#include "xattr.h"
45#include "acl.h" 44#include "acl.h"
46#include "namei.h" 45#include "namei.h"
@@ -135,7 +134,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
135 * take the FS itself readonly cleanly. */ 134 * take the FS itself readonly cleanly. */
136 journal = EXT4_SB(sb)->s_journal; 135 journal = EXT4_SB(sb)->s_journal;
137 if (is_journal_aborted(journal)) { 136 if (is_journal_aborted(journal)) {
138 ext4_abort(sb, __FUNCTION__, 137 ext4_abort(sb, __func__,
139 "Detected aborted journal"); 138 "Detected aborted journal");
140 return ERR_PTR(-EROFS); 139 return ERR_PTR(-EROFS);
141 } 140 }
@@ -355,7 +354,7 @@ void ext4_update_dynamic_rev(struct super_block *sb)
355 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) 354 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
356 return; 355 return;
357 356
358 ext4_warning(sb, __FUNCTION__, 357 ext4_warning(sb, __func__,
359 "updating to rev %d because of new feature flag, " 358 "updating to rev %d because of new feature flag, "
360 "running e2fsck is recommended", 359 "running e2fsck is recommended",
361 EXT4_DYNAMIC_REV); 360 EXT4_DYNAMIC_REV);
@@ -813,7 +812,8 @@ static int ext4_acquire_dquot(struct dquot *dquot);
813static int ext4_release_dquot(struct dquot *dquot); 812static int ext4_release_dquot(struct dquot *dquot);
814static int ext4_mark_dquot_dirty(struct dquot *dquot); 813static int ext4_mark_dquot_dirty(struct dquot *dquot);
815static int ext4_write_info(struct super_block *sb, int type); 814static int ext4_write_info(struct super_block *sb, int type);
816static int ext4_quota_on(struct super_block *sb, int type, int format_id, char *path); 815static int ext4_quota_on(struct super_block *sb, int type, int format_id,
816 char *path, int remount);
817static int ext4_quota_on_mount(struct super_block *sb, int type); 817static int ext4_quota_on_mount(struct super_block *sb, int type);
818static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 818static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
819 size_t len, loff_t off); 819 size_t len, loff_t off);
@@ -944,8 +944,8 @@ static match_table_t tokens = {
944 {Opt_mballoc, "mballoc"}, 944 {Opt_mballoc, "mballoc"},
945 {Opt_nomballoc, "nomballoc"}, 945 {Opt_nomballoc, "nomballoc"},
946 {Opt_stripe, "stripe=%u"}, 946 {Opt_stripe, "stripe=%u"},
947 {Opt_err, NULL},
948 {Opt_resize, "resize"}, 947 {Opt_resize, "resize"},
948 {Opt_err, NULL},
949}; 949};
950 950
951static ext4_fsblk_t get_sb_block(void **data) 951static ext4_fsblk_t get_sb_block(void **data)
@@ -1387,11 +1387,11 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1387 * a plain journaled filesystem we can keep it set as 1387 * a plain journaled filesystem we can keep it set as
1388 * valid forever! :) 1388 * valid forever! :)
1389 */ 1389 */
1390 es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT4_VALID_FS); 1390 es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
1391#endif 1391#endif
1392 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 1392 if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
1393 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); 1393 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
1394 es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1); 1394 le16_add_cpu(&es->s_mnt_count, 1);
1395 es->s_mtime = cpu_to_le32(get_seconds()); 1395 es->s_mtime = cpu_to_le32(get_seconds());
1396 ext4_update_dynamic_rev(sb); 1396 ext4_update_dynamic_rev(sb);
1397 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 1397 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
@@ -1484,36 +1484,33 @@ static int ext4_check_descriptors(struct super_block *sb)
1484 block_bitmap = ext4_block_bitmap(sb, gdp); 1484 block_bitmap = ext4_block_bitmap(sb, gdp);
1485 if (block_bitmap < first_block || block_bitmap > last_block) 1485 if (block_bitmap < first_block || block_bitmap > last_block)
1486 { 1486 {
1487 ext4_error (sb, "ext4_check_descriptors", 1487 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1488 "Block bitmap for group %lu" 1488 "Block bitmap for group %lu not in group "
1489 " not in group (block %llu)!", 1489 "(block %llu)!", i, block_bitmap);
1490 i, block_bitmap);
1491 return 0; 1490 return 0;
1492 } 1491 }
1493 inode_bitmap = ext4_inode_bitmap(sb, gdp); 1492 inode_bitmap = ext4_inode_bitmap(sb, gdp);
1494 if (inode_bitmap < first_block || inode_bitmap > last_block) 1493 if (inode_bitmap < first_block || inode_bitmap > last_block)
1495 { 1494 {
1496 ext4_error (sb, "ext4_check_descriptors", 1495 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1497 "Inode bitmap for group %lu" 1496 "Inode bitmap for group %lu not in group "
1498 " not in group (block %llu)!", 1497 "(block %llu)!", i, inode_bitmap);
1499 i, inode_bitmap);
1500 return 0; 1498 return 0;
1501 } 1499 }
1502 inode_table = ext4_inode_table(sb, gdp); 1500 inode_table = ext4_inode_table(sb, gdp);
1503 if (inode_table < first_block || 1501 if (inode_table < first_block ||
1504 inode_table + sbi->s_itb_per_group - 1 > last_block) 1502 inode_table + sbi->s_itb_per_group - 1 > last_block)
1505 { 1503 {
1506 ext4_error (sb, "ext4_check_descriptors", 1504 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1507 "Inode table for group %lu" 1505 "Inode table for group %lu not in group "
1508 " not in group (block %llu)!", 1506 "(block %llu)!", i, inode_table);
1509 i, inode_table);
1510 return 0; 1507 return 0;
1511 } 1508 }
1512 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { 1509 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
1513 ext4_error(sb, __FUNCTION__, 1510 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1514 "Checksum for group %lu failed (%u!=%u)\n", 1511 "Checksum for group %lu failed (%u!=%u)\n",
1515 i, le16_to_cpu(ext4_group_desc_csum(sbi, i, 1512 i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
1516 gdp)), le16_to_cpu(gdp->bg_checksum)); 1513 gdp)), le16_to_cpu(gdp->bg_checksum));
1517 return 0; 1514 return 0;
1518 } 1515 }
1519 if (!flexbg_flag) 1516 if (!flexbg_flag)
@@ -1593,8 +1590,8 @@ static void ext4_orphan_cleanup (struct super_block * sb,
1593 while (es->s_last_orphan) { 1590 while (es->s_last_orphan) {
1594 struct inode *inode; 1591 struct inode *inode;
1595 1592
1596 if (!(inode = 1593 inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
1597 ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) { 1594 if (IS_ERR(inode)) {
1598 es->s_last_orphan = 0; 1595 es->s_last_orphan = 0;
1599 break; 1596 break;
1600 } 1597 }
@@ -1604,7 +1601,7 @@ static void ext4_orphan_cleanup (struct super_block * sb,
1604 if (inode->i_nlink) { 1601 if (inode->i_nlink) {
1605 printk(KERN_DEBUG 1602 printk(KERN_DEBUG
1606 "%s: truncating inode %lu to %Ld bytes\n", 1603 "%s: truncating inode %lu to %Ld bytes\n",
1607 __FUNCTION__, inode->i_ino, inode->i_size); 1604 __func__, inode->i_ino, inode->i_size);
1608 jbd_debug(2, "truncating inode %lu to %Ld bytes\n", 1605 jbd_debug(2, "truncating inode %lu to %Ld bytes\n",
1609 inode->i_ino, inode->i_size); 1606 inode->i_ino, inode->i_size);
1610 ext4_truncate(inode); 1607 ext4_truncate(inode);
@@ -1612,7 +1609,7 @@ static void ext4_orphan_cleanup (struct super_block * sb,
1612 } else { 1609 } else {
1613 printk(KERN_DEBUG 1610 printk(KERN_DEBUG
1614 "%s: deleting unreferenced inode %lu\n", 1611 "%s: deleting unreferenced inode %lu\n",
1615 __FUNCTION__, inode->i_ino); 1612 __func__, inode->i_ino);
1616 jbd_debug(2, "deleting unreferenced inode %lu\n", 1613 jbd_debug(2, "deleting unreferenced inode %lu\n",
1617 inode->i_ino); 1614 inode->i_ino);
1618 nr_orphans++; 1615 nr_orphans++;
@@ -1632,7 +1629,7 @@ static void ext4_orphan_cleanup (struct super_block * sb,
1632 /* Turn quotas off */ 1629 /* Turn quotas off */
1633 for (i = 0; i < MAXQUOTAS; i++) { 1630 for (i = 0; i < MAXQUOTAS; i++) {
1634 if (sb_dqopt(sb)->files[i]) 1631 if (sb_dqopt(sb)->files[i])
1635 vfs_quota_off(sb, i); 1632 vfs_quota_off(sb, i, 0);
1636 } 1633 }
1637#endif 1634#endif
1638 sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 1635 sb->s_flags = s_flags; /* Restore MS_RDONLY status */
@@ -2698,9 +2695,9 @@ static void ext4_clear_journal_err(struct super_block * sb,
2698 char nbuf[16]; 2695 char nbuf[16];
2699 2696
2700 errstr = ext4_decode_error(sb, j_errno, nbuf); 2697 errstr = ext4_decode_error(sb, j_errno, nbuf);
2701 ext4_warning(sb, __FUNCTION__, "Filesystem error recorded " 2698 ext4_warning(sb, __func__, "Filesystem error recorded "
2702 "from previous mount: %s", errstr); 2699 "from previous mount: %s", errstr);
2703 ext4_warning(sb, __FUNCTION__, "Marking fs in need of " 2700 ext4_warning(sb, __func__, "Marking fs in need of "
2704 "filesystem check."); 2701 "filesystem check.");
2705 2702
2706 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2703 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
@@ -2827,7 +2824,7 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
2827 } 2824 }
2828 2825
2829 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) 2826 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT)
2830 ext4_abort(sb, __FUNCTION__, "Abort forced by user"); 2827 ext4_abort(sb, __func__, "Abort forced by user");
2831 2828
2832 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 2829 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2833 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 2830 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
@@ -3039,8 +3036,14 @@ static int ext4_dquot_drop(struct inode *inode)
3039 3036
3040 /* We may delete quota structure so we need to reserve enough blocks */ 3037 /* We may delete quota structure so we need to reserve enough blocks */
3041 handle = ext4_journal_start(inode, 2*EXT4_QUOTA_DEL_BLOCKS(inode->i_sb)); 3038 handle = ext4_journal_start(inode, 2*EXT4_QUOTA_DEL_BLOCKS(inode->i_sb));
3042 if (IS_ERR(handle)) 3039 if (IS_ERR(handle)) {
3040 /*
3041 * We call dquot_drop() anyway to at least release references
3042 * to quota structures so that umount does not hang.
3043 */
3044 dquot_drop(inode);
3043 return PTR_ERR(handle); 3045 return PTR_ERR(handle);
3046 }
3044 ret = dquot_drop(inode); 3047 ret = dquot_drop(inode);
3045 err = ext4_journal_stop(handle); 3048 err = ext4_journal_stop(handle);
3046 if (!ret) 3049 if (!ret)
@@ -3143,7 +3146,7 @@ static int ext4_quota_on_mount(struct super_block *sb, int type)
3143 * Standard function to be called on quota_on 3146 * Standard function to be called on quota_on
3144 */ 3147 */
3145static int ext4_quota_on(struct super_block *sb, int type, int format_id, 3148static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3146 char *path) 3149 char *path, int remount)
3147{ 3150{
3148 int err; 3151 int err;
3149 struct nameidata nd; 3152 struct nameidata nd;
@@ -3151,9 +3154,9 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3151 if (!test_opt(sb, QUOTA)) 3154 if (!test_opt(sb, QUOTA))
3152 return -EINVAL; 3155 return -EINVAL;
3153 /* Not journalling quota? */ 3156 /* Not journalling quota? */
3154 if (!EXT4_SB(sb)->s_qf_names[USRQUOTA] && 3157 if ((!EXT4_SB(sb)->s_qf_names[USRQUOTA] &&
3155 !EXT4_SB(sb)->s_qf_names[GRPQUOTA]) 3158 !EXT4_SB(sb)->s_qf_names[GRPQUOTA]) || remount)
3156 return vfs_quota_on(sb, type, format_id, path); 3159 return vfs_quota_on(sb, type, format_id, path, remount);
3157 err = path_lookup(path, LOOKUP_FOLLOW, &nd); 3160 err = path_lookup(path, LOOKUP_FOLLOW, &nd);
3158 if (err) 3161 if (err)
3159 return err; 3162 return err;
@@ -3168,7 +3171,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3168 "EXT4-fs: Quota file not on filesystem root. " 3171 "EXT4-fs: Quota file not on filesystem root. "
3169 "Journalled quota will not work.\n"); 3172 "Journalled quota will not work.\n");
3170 path_put(&nd.path); 3173 path_put(&nd.path);
3171 return vfs_quota_on(sb, type, format_id, path); 3174 return vfs_quota_on(sb, type, format_id, path, remount);
3172} 3175}
3173 3176
3174/* Read data from quotafile - avoid pagecache and such because we cannot afford 3177/* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index e6f9da4287c4..e9178643dc01 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -19,8 +19,8 @@
19 19
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/jbd2.h> 21#include <linux/jbd2.h>
22#include <linux/ext4_fs.h>
23#include <linux/namei.h> 22#include <linux/namei.h>
23#include "ext4.h"
24#include "xattr.h" 24#include "xattr.h"
25 25
26static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd) 26static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index e9054c1c7d93..3fbc2c6c3d0e 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -53,11 +53,11 @@
53#include <linux/init.h> 53#include <linux/init.h>
54#include <linux/fs.h> 54#include <linux/fs.h>
55#include <linux/slab.h> 55#include <linux/slab.h>
56#include <linux/ext4_jbd2.h>
57#include <linux/ext4_fs.h>
58#include <linux/mbcache.h> 56#include <linux/mbcache.h>
59#include <linux/quotaops.h> 57#include <linux/quotaops.h>
60#include <linux/rwsem.h> 58#include <linux/rwsem.h>
59#include "ext4_jbd2.h"
60#include "ext4.h"
61#include "xattr.h" 61#include "xattr.h"
62#include "acl.h" 62#include "acl.h"
63 63
@@ -92,6 +92,8 @@ static struct buffer_head *ext4_xattr_cache_find(struct inode *,
92 struct mb_cache_entry **); 92 struct mb_cache_entry **);
93static void ext4_xattr_rehash(struct ext4_xattr_header *, 93static void ext4_xattr_rehash(struct ext4_xattr_header *,
94 struct ext4_xattr_entry *); 94 struct ext4_xattr_entry *);
95static int ext4_xattr_list(struct inode *inode, char *buffer,
96 size_t buffer_size);
95 97
96static struct mb_cache *ext4_xattr_cache; 98static struct mb_cache *ext4_xattr_cache;
97 99
@@ -225,7 +227,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
225 ea_bdebug(bh, "b_count=%d, refcount=%d", 227 ea_bdebug(bh, "b_count=%d, refcount=%d",
226 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); 228 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
227 if (ext4_xattr_check_block(bh)) { 229 if (ext4_xattr_check_block(bh)) {
228bad_block: ext4_error(inode->i_sb, __FUNCTION__, 230bad_block: ext4_error(inode->i_sb, __func__,
229 "inode %lu: bad block %llu", inode->i_ino, 231 "inode %lu: bad block %llu", inode->i_ino,
230 EXT4_I(inode)->i_file_acl); 232 EXT4_I(inode)->i_file_acl);
231 error = -EIO; 233 error = -EIO;
@@ -367,7 +369,7 @@ ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
367 ea_bdebug(bh, "b_count=%d, refcount=%d", 369 ea_bdebug(bh, "b_count=%d, refcount=%d",
368 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); 370 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
369 if (ext4_xattr_check_block(bh)) { 371 if (ext4_xattr_check_block(bh)) {
370 ext4_error(inode->i_sb, __FUNCTION__, 372 ext4_error(inode->i_sb, __func__,
371 "inode %lu: bad block %llu", inode->i_ino, 373 "inode %lu: bad block %llu", inode->i_ino,
372 EXT4_I(inode)->i_file_acl); 374 EXT4_I(inode)->i_file_acl);
373 error = -EIO; 375 error = -EIO;
@@ -420,7 +422,7 @@ cleanup:
420 * Returns a negative error number on failure, or the number of bytes 422 * Returns a negative error number on failure, or the number of bytes
421 * used / required on success. 423 * used / required on success.
422 */ 424 */
423int 425static int
424ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) 426ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
425{ 427{
426 int i_error, b_error; 428 int i_error, b_error;
@@ -484,8 +486,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
484 get_bh(bh); 486 get_bh(bh);
485 ext4_forget(handle, 1, inode, bh, bh->b_blocknr); 487 ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
486 } else { 488 } else {
487 BHDR(bh)->h_refcount = cpu_to_le32( 489 le32_add_cpu(&BHDR(bh)->h_refcount, -1);
488 le32_to_cpu(BHDR(bh)->h_refcount) - 1);
489 error = ext4_journal_dirty_metadata(handle, bh); 490 error = ext4_journal_dirty_metadata(handle, bh);
490 if (IS_SYNC(inode)) 491 if (IS_SYNC(inode))
491 handle->h_sync = 1; 492 handle->h_sync = 1;
@@ -660,7 +661,7 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
660 atomic_read(&(bs->bh->b_count)), 661 atomic_read(&(bs->bh->b_count)),
661 le32_to_cpu(BHDR(bs->bh)->h_refcount)); 662 le32_to_cpu(BHDR(bs->bh)->h_refcount));
662 if (ext4_xattr_check_block(bs->bh)) { 663 if (ext4_xattr_check_block(bs->bh)) {
663 ext4_error(sb, __FUNCTION__, 664 ext4_error(sb, __func__,
664 "inode %lu: bad block %llu", inode->i_ino, 665 "inode %lu: bad block %llu", inode->i_ino,
665 EXT4_I(inode)->i_file_acl); 666 EXT4_I(inode)->i_file_acl);
666 error = -EIO; 667 error = -EIO;
@@ -738,7 +739,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
738 ce = NULL; 739 ce = NULL;
739 } 740 }
740 ea_bdebug(bs->bh, "cloning"); 741 ea_bdebug(bs->bh, "cloning");
741 s->base = kmalloc(bs->bh->b_size, GFP_KERNEL); 742 s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
742 error = -ENOMEM; 743 error = -ENOMEM;
743 if (s->base == NULL) 744 if (s->base == NULL)
744 goto cleanup; 745 goto cleanup;
@@ -750,7 +751,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
750 } 751 }
751 } else { 752 } else {
752 /* Allocate a buffer where we construct the new block. */ 753 /* Allocate a buffer where we construct the new block. */
753 s->base = kzalloc(sb->s_blocksize, GFP_KERNEL); 754 s->base = kzalloc(sb->s_blocksize, GFP_NOFS);
754 /* assert(header == s->base) */ 755 /* assert(header == s->base) */
755 error = -ENOMEM; 756 error = -ENOMEM;
756 if (s->base == NULL) 757 if (s->base == NULL)
@@ -789,8 +790,7 @@ inserted:
789 if (error) 790 if (error)
790 goto cleanup_dquot; 791 goto cleanup_dquot;
791 lock_buffer(new_bh); 792 lock_buffer(new_bh);
792 BHDR(new_bh)->h_refcount = cpu_to_le32(1 + 793 le32_add_cpu(&BHDR(new_bh)->h_refcount, 1);
793 le32_to_cpu(BHDR(new_bh)->h_refcount));
794 ea_bdebug(new_bh, "reusing; refcount now=%d", 794 ea_bdebug(new_bh, "reusing; refcount now=%d",
795 le32_to_cpu(BHDR(new_bh)->h_refcount)); 795 le32_to_cpu(BHDR(new_bh)->h_refcount));
796 unlock_buffer(new_bh); 796 unlock_buffer(new_bh);
@@ -808,10 +808,8 @@ inserted:
808 get_bh(new_bh); 808 get_bh(new_bh);
809 } else { 809 } else {
810 /* We need to allocate a new block */ 810 /* We need to allocate a new block */
811 ext4_fsblk_t goal = le32_to_cpu( 811 ext4_fsblk_t goal = ext4_group_first_block_no(sb,
812 EXT4_SB(sb)->s_es->s_first_data_block) + 812 EXT4_I(inode)->i_block_group);
813 (ext4_fsblk_t)EXT4_I(inode)->i_block_group *
814 EXT4_BLOCKS_PER_GROUP(sb);
815 ext4_fsblk_t block = ext4_new_block(handle, inode, 813 ext4_fsblk_t block = ext4_new_block(handle, inode,
816 goal, &error); 814 goal, &error);
817 if (error) 815 if (error)
@@ -863,7 +861,7 @@ cleanup_dquot:
863 goto cleanup; 861 goto cleanup;
864 862
865bad_block: 863bad_block:
866 ext4_error(inode->i_sb, __FUNCTION__, 864 ext4_error(inode->i_sb, __func__,
867 "inode %lu: bad block %llu", inode->i_ino, 865 "inode %lu: bad block %llu", inode->i_ino,
868 EXT4_I(inode)->i_file_acl); 866 EXT4_I(inode)->i_file_acl);
869 goto cleanup; 867 goto cleanup;
@@ -1166,7 +1164,7 @@ retry:
1166 if (!bh) 1164 if (!bh)
1167 goto cleanup; 1165 goto cleanup;
1168 if (ext4_xattr_check_block(bh)) { 1166 if (ext4_xattr_check_block(bh)) {
1169 ext4_error(inode->i_sb, __FUNCTION__, 1167 ext4_error(inode->i_sb, __func__,
1170 "inode %lu: bad block %llu", inode->i_ino, 1168 "inode %lu: bad block %llu", inode->i_ino,
1171 EXT4_I(inode)->i_file_acl); 1169 EXT4_I(inode)->i_file_acl);
1172 error = -EIO; 1170 error = -EIO;
@@ -1341,14 +1339,14 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
1341 goto cleanup; 1339 goto cleanup;
1342 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); 1340 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
1343 if (!bh) { 1341 if (!bh) {
1344 ext4_error(inode->i_sb, __FUNCTION__, 1342 ext4_error(inode->i_sb, __func__,
1345 "inode %lu: block %llu read error", inode->i_ino, 1343 "inode %lu: block %llu read error", inode->i_ino,
1346 EXT4_I(inode)->i_file_acl); 1344 EXT4_I(inode)->i_file_acl);
1347 goto cleanup; 1345 goto cleanup;
1348 } 1346 }
1349 if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || 1347 if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
1350 BHDR(bh)->h_blocks != cpu_to_le32(1)) { 1348 BHDR(bh)->h_blocks != cpu_to_le32(1)) {
1351 ext4_error(inode->i_sb, __FUNCTION__, 1349 ext4_error(inode->i_sb, __func__,
1352 "inode %lu: bad block %llu", inode->i_ino, 1350 "inode %lu: bad block %llu", inode->i_ino,
1353 EXT4_I(inode)->i_file_acl); 1351 EXT4_I(inode)->i_file_acl);
1354 goto cleanup; 1352 goto cleanup;
@@ -1475,7 +1473,7 @@ again:
1475 } 1473 }
1476 bh = sb_bread(inode->i_sb, ce->e_block); 1474 bh = sb_bread(inode->i_sb, ce->e_block);
1477 if (!bh) { 1475 if (!bh) {
1478 ext4_error(inode->i_sb, __FUNCTION__, 1476 ext4_error(inode->i_sb, __func__,
1479 "inode %lu: block %lu read error", 1477 "inode %lu: block %lu read error",
1480 inode->i_ino, (unsigned long) ce->e_block); 1478 inode->i_ino, (unsigned long) ce->e_block);
1481 } else if (le32_to_cpu(BHDR(bh)->h_refcount) >= 1479 } else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index d7f5d6a12651..5992fe979bb9 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -74,7 +74,6 @@ extern struct xattr_handler ext4_xattr_security_handler;
74extern ssize_t ext4_listxattr(struct dentry *, char *, size_t); 74extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
75 75
76extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t); 76extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);
77extern int ext4_xattr_list(struct inode *, char *, size_t);
78extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int); 77extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
79extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int); 78extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
80 79
@@ -99,12 +98,6 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name,
99} 98}
100 99
101static inline int 100static inline int
102ext4_xattr_list(struct inode *inode, void *buffer, size_t size)
103{
104 return -EOPNOTSUPP;
105}
106
107static inline int
108ext4_xattr_set(struct inode *inode, int name_index, const char *name, 101ext4_xattr_set(struct inode *inode, int name_index, const char *name,
109 const void *value, size_t size, int flags) 102 const void *value, size_t size, int flags)
110{ 103{
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index f17eaf2321b9..ca5f89fc6cae 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -6,9 +6,9 @@
6#include <linux/module.h> 6#include <linux/module.h>
7#include <linux/string.h> 7#include <linux/string.h>
8#include <linux/fs.h> 8#include <linux/fs.h>
9#include <linux/ext4_jbd2.h>
10#include <linux/ext4_fs.h>
11#include <linux/security.h> 9#include <linux/security.h>
10#include "ext4_jbd2.h"
11#include "ext4.h"
12#include "xattr.h" 12#include "xattr.h"
13 13
14static size_t 14static size_t
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
index e0f05acdafec..fff33382cadc 100644
--- a/fs/ext4/xattr_trusted.c
+++ b/fs/ext4/xattr_trusted.c
@@ -9,8 +9,8 @@
9#include <linux/string.h> 9#include <linux/string.h>
10#include <linux/capability.h> 10#include <linux/capability.h>
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/ext4_jbd2.h> 12#include "ext4_jbd2.h"
13#include <linux/ext4_fs.h> 13#include "ext4.h"
14#include "xattr.h" 14#include "xattr.h"
15 15
16#define XATTR_TRUSTED_PREFIX "trusted." 16#define XATTR_TRUSTED_PREFIX "trusted."
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
index 7ed3d8ebf096..67be723fcc4e 100644
--- a/fs/ext4/xattr_user.c
+++ b/fs/ext4/xattr_user.c
@@ -8,8 +8,8 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/string.h> 9#include <linux/string.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/ext4_jbd2.h> 11#include "ext4_jbd2.h"
12#include <linux/ext4_fs.h> 12#include "ext4.h"
13#include "xattr.h" 13#include "xattr.h"
14 14
15#define XATTR_USER_PREFIX "user." 15#define XATTR_USER_PREFIX "user."
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 639b3b4f86d1..fda25479af26 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -242,7 +242,7 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)
242 /* prevent the infinite loop of cluster chain */ 242 /* prevent the infinite loop of cluster chain */
243 if (*fclus > limit) { 243 if (*fclus > limit) {
244 fat_fs_panic(sb, "%s: detected the cluster chain loop" 244 fat_fs_panic(sb, "%s: detected the cluster chain loop"
245 " (i_pos %lld)", __FUNCTION__, 245 " (i_pos %lld)", __func__,
246 MSDOS_I(inode)->i_pos); 246 MSDOS_I(inode)->i_pos);
247 nr = -EIO; 247 nr = -EIO;
248 goto out; 248 goto out;
@@ -253,7 +253,7 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)
253 goto out; 253 goto out;
254 else if (nr == FAT_ENT_FREE) { 254 else if (nr == FAT_ENT_FREE) {
255 fat_fs_panic(sb, "%s: invalid cluster chain" 255 fat_fs_panic(sb, "%s: invalid cluster chain"
256 " (i_pos %lld)", __FUNCTION__, 256 " (i_pos %lld)", __func__,
257 MSDOS_I(inode)->i_pos); 257 MSDOS_I(inode)->i_pos);
258 nr = -EIO; 258 nr = -EIO;
259 goto out; 259 goto out;
@@ -286,7 +286,7 @@ static int fat_bmap_cluster(struct inode *inode, int cluster)
286 return ret; 286 return ret;
287 else if (ret == FAT_ENT_EOF) { 287 else if (ret == FAT_ENT_EOF) {
288 fat_fs_panic(sb, "%s: request beyond EOF (i_pos %lld)", 288 fat_fs_panic(sb, "%s: request beyond EOF (i_pos %lld)",
289 __FUNCTION__, MSDOS_I(inode)->i_pos); 289 __func__, MSDOS_I(inode)->i_pos);
290 return -EIO; 290 return -EIO;
291 } 291 }
292 return dclus; 292 return dclus;
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 72cbcd61bd95..486725ee99ae 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -124,8 +124,8 @@ static inline int fat_get_entry(struct inode *dir, loff_t *pos,
124 * but ignore that right now. 124 * but ignore that right now.
125 * Ahem... Stack smashing in ring 0 isn't fun. Fixed. 125 * Ahem... Stack smashing in ring 0 isn't fun. Fixed.
126 */ 126 */
127static int uni16_to_x8(unsigned char *ascii, wchar_t *uni, int uni_xlate, 127static int uni16_to_x8(unsigned char *ascii, wchar_t *uni, int len,
128 struct nls_table *nls) 128 int uni_xlate, struct nls_table *nls)
129{ 129{
130 wchar_t *ip, ec; 130 wchar_t *ip, ec;
131 unsigned char *op, nc; 131 unsigned char *op, nc;
@@ -135,10 +135,11 @@ static int uni16_to_x8(unsigned char *ascii, wchar_t *uni, int uni_xlate,
135 ip = uni; 135 ip = uni;
136 op = ascii; 136 op = ascii;
137 137
138 while (*ip) { 138 while (*ip && ((len - NLS_MAX_CHARSET_SIZE) > 0)) {
139 ec = *ip++; 139 ec = *ip++;
140 if ( (charlen = nls->uni2char(ec, op, NLS_MAX_CHARSET_SIZE)) > 0) { 140 if ( (charlen = nls->uni2char(ec, op, NLS_MAX_CHARSET_SIZE)) > 0) {
141 op += charlen; 141 op += charlen;
142 len -= charlen;
142 } else { 143 } else {
143 if (uni_xlate == 1) { 144 if (uni_xlate == 1) {
144 *op = ':'; 145 *op = ':';
@@ -149,16 +150,19 @@ static int uni16_to_x8(unsigned char *ascii, wchar_t *uni, int uni_xlate,
149 ec >>= 4; 150 ec >>= 4;
150 } 151 }
151 op += 5; 152 op += 5;
153 len -= 5;
152 } else { 154 } else {
153 *op++ = '?'; 155 *op++ = '?';
156 len--;
154 } 157 }
155 } 158 }
156 /* We have some slack there, so it's OK */
157 if (op>ascii+256) {
158 op = ascii + 256;
159 break;
160 }
161 } 159 }
160
161 if (unlikely(*ip)) {
162 printk(KERN_WARNING "FAT: filename was truncated while "
163 "converting.");
164 }
165
162 *op = 0; 166 *op = 0;
163 return (op - ascii); 167 return (op - ascii);
164} 168}
@@ -243,7 +247,7 @@ static int fat_parse_long(struct inode *dir, loff_t *pos,
243 unsigned char id, slot, slots, alias_checksum; 247 unsigned char id, slot, slots, alias_checksum;
244 248
245 if (!*unicode) { 249 if (!*unicode) {
246 *unicode = (wchar_t *)__get_free_page(GFP_KERNEL); 250 *unicode = __getname();
247 if (!*unicode) { 251 if (!*unicode) {
248 brelse(*bh); 252 brelse(*bh);
249 return -ENOMEM; 253 return -ENOMEM;
@@ -311,9 +315,11 @@ int fat_search_long(struct inode *inode, const unsigned char *name,
311 struct nls_table *nls_io = sbi->nls_io; 315 struct nls_table *nls_io = sbi->nls_io;
312 struct nls_table *nls_disk = sbi->nls_disk; 316 struct nls_table *nls_disk = sbi->nls_disk;
313 wchar_t bufuname[14]; 317 wchar_t bufuname[14];
314 unsigned char xlate_len, nr_slots; 318 unsigned char nr_slots;
319 int xlate_len;
315 wchar_t *unicode = NULL; 320 wchar_t *unicode = NULL;
316 unsigned char work[MSDOS_NAME], bufname[260]; /* 256 + 4 */ 321 unsigned char work[MSDOS_NAME];
322 unsigned char *bufname = NULL;
317 int uni_xlate = sbi->options.unicode_xlate; 323 int uni_xlate = sbi->options.unicode_xlate;
318 int utf8 = sbi->options.utf8; 324 int utf8 = sbi->options.utf8;
319 int anycase = (sbi->options.name_check != 's'); 325 int anycase = (sbi->options.name_check != 's');
@@ -321,6 +327,10 @@ int fat_search_long(struct inode *inode, const unsigned char *name,
321 loff_t cpos = 0; 327 loff_t cpos = 0;
322 int chl, i, j, last_u, err; 328 int chl, i, j, last_u, err;
323 329
330 bufname = __getname();
331 if (!bufname)
332 return -ENOMEM;
333
324 err = -ENOENT; 334 err = -ENOENT;
325 while(1) { 335 while(1) {
326 if (fat_get_entry(inode, &cpos, &bh, &de) == -1) 336 if (fat_get_entry(inode, &cpos, &bh, &de) == -1)
@@ -386,8 +396,8 @@ parse_record:
386 396
387 bufuname[last_u] = 0x0000; 397 bufuname[last_u] = 0x0000;
388 xlate_len = utf8 398 xlate_len = utf8
389 ?utf8_wcstombs(bufname, bufuname, sizeof(bufname)) 399 ?utf8_wcstombs(bufname, bufuname, PATH_MAX)
390 :uni16_to_x8(bufname, bufuname, uni_xlate, nls_io); 400 :uni16_to_x8(bufname, bufuname, PATH_MAX, uni_xlate, nls_io);
391 if (xlate_len == name_len) 401 if (xlate_len == name_len)
392 if ((!anycase && !memcmp(name, bufname, xlate_len)) || 402 if ((!anycase && !memcmp(name, bufname, xlate_len)) ||
393 (anycase && !nls_strnicmp(nls_io, name, bufname, 403 (anycase && !nls_strnicmp(nls_io, name, bufname,
@@ -396,8 +406,8 @@ parse_record:
396 406
397 if (nr_slots) { 407 if (nr_slots) {
398 xlate_len = utf8 408 xlate_len = utf8
399 ?utf8_wcstombs(bufname, unicode, sizeof(bufname)) 409 ?utf8_wcstombs(bufname, unicode, PATH_MAX)
400 :uni16_to_x8(bufname, unicode, uni_xlate, nls_io); 410 :uni16_to_x8(bufname, unicode, PATH_MAX, uni_xlate, nls_io);
401 if (xlate_len != name_len) 411 if (xlate_len != name_len)
402 continue; 412 continue;
403 if ((!anycase && !memcmp(name, bufname, xlate_len)) || 413 if ((!anycase && !memcmp(name, bufname, xlate_len)) ||
@@ -416,8 +426,10 @@ Found:
416 sinfo->i_pos = fat_make_i_pos(sb, sinfo->bh, sinfo->de); 426 sinfo->i_pos = fat_make_i_pos(sb, sinfo->bh, sinfo->de);
417 err = 0; 427 err = 0;
418EODir: 428EODir:
429 if (bufname)
430 __putname(bufname);
419 if (unicode) 431 if (unicode)
420 free_page((unsigned long)unicode); 432 __putname(unicode);
421 433
422 return err; 434 return err;
423} 435}
@@ -598,7 +610,7 @@ parse_record:
598 if (isvfat) { 610 if (isvfat) {
599 bufuname[j] = 0x0000; 611 bufuname[j] = 0x0000;
600 i = utf8 ? utf8_wcstombs(bufname, bufuname, sizeof(bufname)) 612 i = utf8 ? utf8_wcstombs(bufname, bufuname, sizeof(bufname))
601 : uni16_to_x8(bufname, bufuname, uni_xlate, nls_io); 613 : uni16_to_x8(bufname, bufuname, sizeof(bufname), uni_xlate, nls_io);
602 } 614 }
603 615
604 fill_name = bufname; 616 fill_name = bufname;
@@ -607,10 +619,10 @@ parse_record:
607 /* convert the unicode long name. 261 is maximum size 619 /* convert the unicode long name. 261 is maximum size
608 * of unicode buffer. (13 * slots + nul) */ 620 * of unicode buffer. (13 * slots + nul) */
609 void *longname = unicode + 261; 621 void *longname = unicode + 261;
610 int buf_size = PAGE_SIZE - (261 * sizeof(unicode[0])); 622 int buf_size = PATH_MAX - (261 * sizeof(unicode[0]));
611 int long_len = utf8 623 int long_len = utf8
612 ? utf8_wcstombs(longname, unicode, buf_size) 624 ? utf8_wcstombs(longname, unicode, buf_size)
613 : uni16_to_x8(longname, unicode, uni_xlate, nls_io); 625 : uni16_to_x8(longname, unicode, buf_size, uni_xlate, nls_io);
614 626
615 if (!both) { 627 if (!both) {
616 fill_name = longname; 628 fill_name = longname;
@@ -640,7 +652,7 @@ EODir:
640FillFailed: 652FillFailed:
641 brelse(bh); 653 brelse(bh);
642 if (unicode) 654 if (unicode)
643 free_page((unsigned long)unicode); 655 __putname(unicode);
644out: 656out:
645 unlock_kernel(); 657 unlock_kernel();
646 return ret; 658 return ret;
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 5fb366992b73..302e95c4af7e 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -450,7 +450,8 @@ int fat_alloc_clusters(struct inode *inode, int *cluster, int nr_cluster)
450 BUG_ON(nr_cluster > (MAX_BUF_PER_PAGE / 2)); /* fixed limit */ 450 BUG_ON(nr_cluster > (MAX_BUF_PER_PAGE / 2)); /* fixed limit */
451 451
452 lock_fat(sbi); 452 lock_fat(sbi);
453 if (sbi->free_clusters != -1 && sbi->free_clusters < nr_cluster) { 453 if (sbi->free_clusters != -1 && sbi->free_clus_valid &&
454 sbi->free_clusters < nr_cluster) {
454 unlock_fat(sbi); 455 unlock_fat(sbi);
455 return -ENOSPC; 456 return -ENOSPC;
456 } 457 }
@@ -504,6 +505,7 @@ int fat_alloc_clusters(struct inode *inode, int *cluster, int nr_cluster)
504 505
505 /* Couldn't allocate the free entries */ 506 /* Couldn't allocate the free entries */
506 sbi->free_clusters = 0; 507 sbi->free_clusters = 0;
508 sbi->free_clus_valid = 1;
507 sb->s_dirt = 1; 509 sb->s_dirt = 1;
508 err = -ENOSPC; 510 err = -ENOSPC;
509 511
@@ -544,7 +546,7 @@ int fat_free_clusters(struct inode *inode, int cluster)
544 goto error; 546 goto error;
545 } else if (cluster == FAT_ENT_FREE) { 547 } else if (cluster == FAT_ENT_FREE) {
546 fat_fs_panic(sb, "%s: deleting FAT entry beyond EOF", 548 fat_fs_panic(sb, "%s: deleting FAT entry beyond EOF",
547 __FUNCTION__); 549 __func__);
548 err = -EIO; 550 err = -EIO;
549 goto error; 551 goto error;
550 } 552 }
@@ -583,8 +585,6 @@ error:
583 brelse(bhs[i]); 585 brelse(bhs[i]);
584 unlock_fat(sbi); 586 unlock_fat(sbi);
585 587
586 fat_clusters_flush(sb);
587
588 return err; 588 return err;
589} 589}
590 590
@@ -615,7 +615,7 @@ int fat_count_free_clusters(struct super_block *sb)
615 int err = 0, free; 615 int err = 0, free;
616 616
617 lock_fat(sbi); 617 lock_fat(sbi);
618 if (sbi->free_clusters != -1) 618 if (sbi->free_clusters != -1 && sbi->free_clus_valid)
619 goto out; 619 goto out;
620 620
621 reada_blocks = FAT_READA_SIZE >> sb->s_blocksize_bits; 621 reada_blocks = FAT_READA_SIZE >> sb->s_blocksize_bits;
@@ -643,6 +643,7 @@ int fat_count_free_clusters(struct super_block *sb)
643 } while (fat_ent_next(sbi, &fatent)); 643 } while (fat_ent_next(sbi, &fatent));
644 } 644 }
645 sbi->free_clusters = free; 645 sbi->free_clusters = free;
646 sbi->free_clus_valid = 1;
646 sb->s_dirt = 1; 647 sb->s_dirt = 1;
647 fatent_brelse(&fatent); 648 fatent_brelse(&fatent);
648out: 649out:
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 2a3bed967041..27cc1164ec36 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -157,104 +157,6 @@ out:
157 return err; 157 return err;
158} 158}
159 159
160static int check_mode(const struct msdos_sb_info *sbi, mode_t mode)
161{
162 mode_t req = mode & ~S_IFMT;
163
164 /*
165 * Of the r and x bits, all (subject to umask) must be present. Of the
166 * w bits, either all (subject to umask) or none must be present.
167 */
168
169 if (S_ISREG(mode)) {
170 req &= ~sbi->options.fs_fmask;
171
172 if ((req & (S_IRUGO | S_IXUGO)) !=
173 ((S_IRUGO | S_IXUGO) & ~sbi->options.fs_fmask))
174 return -EPERM;
175
176 if ((req & S_IWUGO) != 0 &&
177 (req & S_IWUGO) != (S_IWUGO & ~sbi->options.fs_fmask))
178 return -EPERM;
179 } else if (S_ISDIR(mode)) {
180 req &= ~sbi->options.fs_dmask;
181
182 if ((req & (S_IRUGO | S_IXUGO)) !=
183 ((S_IRUGO | S_IXUGO) & ~sbi->options.fs_dmask))
184 return -EPERM;
185
186 if ((req & S_IWUGO) != 0 &&
187 (req & S_IWUGO) != (S_IWUGO & ~sbi->options.fs_dmask))
188 return -EPERM;
189 } else {
190 return -EPERM;
191 }
192
193 return 0;
194}
195
196int fat_notify_change(struct dentry *dentry, struct iattr *attr)
197{
198 struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
199 struct inode *inode = dentry->d_inode;
200 int mask, error = 0;
201
202 lock_kernel();
203
204 /*
205 * Expand the file. Since inode_setattr() updates ->i_size
206 * before calling the ->truncate(), but FAT needs to fill the
207 * hole before it.
208 */
209 if (attr->ia_valid & ATTR_SIZE) {
210 if (attr->ia_size > inode->i_size) {
211 error = fat_cont_expand(inode, attr->ia_size);
212 if (error || attr->ia_valid == ATTR_SIZE)
213 goto out;
214 attr->ia_valid &= ~ATTR_SIZE;
215 }
216 }
217
218 error = inode_change_ok(inode, attr);
219 if (error) {
220 if (sbi->options.quiet)
221 error = 0;
222 goto out;
223 }
224 if (((attr->ia_valid & ATTR_UID) &&
225 (attr->ia_uid != sbi->options.fs_uid)) ||
226 ((attr->ia_valid & ATTR_GID) &&
227 (attr->ia_gid != sbi->options.fs_gid)))
228 error = -EPERM;
229
230 if (error) {
231 if (sbi->options.quiet)
232 error = 0;
233 goto out;
234 }
235
236 if (attr->ia_valid & ATTR_MODE) {
237 error = check_mode(sbi, attr->ia_mode);
238 if (error != 0 && !sbi->options.quiet)
239 goto out;
240 }
241
242 error = inode_setattr(inode, attr);
243 if (error)
244 goto out;
245
246 if (S_ISDIR(inode->i_mode))
247 mask = sbi->options.fs_dmask;
248 else
249 mask = sbi->options.fs_fmask;
250 inode->i_mode &= S_IFMT | (S_IRWXUGO & ~mask);
251out:
252 unlock_kernel();
253 return error;
254}
255
256EXPORT_SYMBOL_GPL(fat_notify_change);
257
258/* Free all clusters after the skip'th cluster. */ 160/* Free all clusters after the skip'th cluster. */
259static int fat_free(struct inode *inode, int skip) 161static int fat_free(struct inode *inode, int skip)
260{ 162{
@@ -306,7 +208,7 @@ static int fat_free(struct inode *inode, int skip)
306 } else if (ret == FAT_ENT_FREE) { 208 } else if (ret == FAT_ENT_FREE) {
307 fat_fs_panic(sb, 209 fat_fs_panic(sb,
308 "%s: invalid cluster chain (i_pos %lld)", 210 "%s: invalid cluster chain (i_pos %lld)",
309 __FUNCTION__, MSDOS_I(inode)->i_pos); 211 __func__, MSDOS_I(inode)->i_pos);
310 ret = -EIO; 212 ret = -EIO;
311 } else if (ret > 0) { 213 } else if (ret > 0) {
312 err = fat_ent_write(inode, &fatent, FAT_ENT_EOF, wait); 214 err = fat_ent_write(inode, &fatent, FAT_ENT_EOF, wait);
@@ -355,8 +257,112 @@ int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
355} 257}
356EXPORT_SYMBOL_GPL(fat_getattr); 258EXPORT_SYMBOL_GPL(fat_getattr);
357 259
260static int fat_check_mode(const struct msdos_sb_info *sbi, struct inode *inode,
261 mode_t mode)
262{
263 mode_t mask, req = mode & ~S_IFMT;
264
265 if (S_ISREG(mode))
266 mask = sbi->options.fs_fmask;
267 else
268 mask = sbi->options.fs_dmask;
269
270 /*
271 * Of the r and x bits, all (subject to umask) must be present. Of the
272 * w bits, either all (subject to umask) or none must be present.
273 */
274 req &= ~mask;
275 if ((req & (S_IRUGO | S_IXUGO)) != (inode->i_mode & (S_IRUGO|S_IXUGO)))
276 return -EPERM;
277 if ((req & S_IWUGO) && ((req & S_IWUGO) != (S_IWUGO & ~mask)))
278 return -EPERM;
279
280 return 0;
281}
282
283static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode)
284{
285 mode_t allow_utime = sbi->options.allow_utime;
286
287 if (current->fsuid != inode->i_uid) {
288 if (in_group_p(inode->i_gid))
289 allow_utime >>= 3;
290 if (allow_utime & MAY_WRITE)
291 return 1;
292 }
293
294 /* use a default check */
295 return 0;
296}
297
298int fat_setattr(struct dentry *dentry, struct iattr *attr)
299{
300 struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
301 struct inode *inode = dentry->d_inode;
302 int mask, error = 0;
303 unsigned int ia_valid;
304
305 lock_kernel();
306
307 /*
308 * Expand the file. Since inode_setattr() updates ->i_size
309 * before calling the ->truncate(), but FAT needs to fill the
310 * hole before it.
311 */
312 if (attr->ia_valid & ATTR_SIZE) {
313 if (attr->ia_size > inode->i_size) {
314 error = fat_cont_expand(inode, attr->ia_size);
315 if (error || attr->ia_valid == ATTR_SIZE)
316 goto out;
317 attr->ia_valid &= ~ATTR_SIZE;
318 }
319 }
320
321 /* Check for setting the inode time. */
322 ia_valid = attr->ia_valid;
323 if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET)) {
324 if (fat_allow_set_time(sbi, inode))
325 attr->ia_valid &= ~(ATTR_MTIME_SET | ATTR_ATIME_SET);
326 }
327
328 error = inode_change_ok(inode, attr);
329 attr->ia_valid = ia_valid;
330 if (error) {
331 if (sbi->options.quiet)
332 error = 0;
333 goto out;
334 }
335 if (((attr->ia_valid & ATTR_UID) &&
336 (attr->ia_uid != sbi->options.fs_uid)) ||
337 ((attr->ia_valid & ATTR_GID) &&
338 (attr->ia_gid != sbi->options.fs_gid)) ||
339 ((attr->ia_valid & ATTR_MODE) &&
340 fat_check_mode(sbi, inode, attr->ia_mode) < 0))
341 error = -EPERM;
342
343 if (error) {
344 if (sbi->options.quiet)
345 error = 0;
346 goto out;
347 }
348
349 error = inode_setattr(inode, attr);
350 if (error)
351 goto out;
352
353 if (S_ISDIR(inode->i_mode))
354 mask = sbi->options.fs_dmask;
355 else
356 mask = sbi->options.fs_fmask;
357 inode->i_mode &= S_IFMT | (S_IRWXUGO & ~mask);
358out:
359 unlock_kernel();
360 return error;
361}
362EXPORT_SYMBOL_GPL(fat_setattr);
363
358const struct inode_operations fat_file_inode_operations = { 364const struct inode_operations fat_file_inode_operations = {
359 .truncate = fat_truncate, 365 .truncate = fat_truncate,
360 .setattr = fat_notify_change, 366 .setattr = fat_setattr,
361 .getattr = fat_getattr, 367 .getattr = fat_getattr,
362}; 368};
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 53f3cf62b7c1..4e0a3dd9d677 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -433,11 +433,8 @@ EXPORT_SYMBOL_GPL(fat_build_inode);
433static void fat_delete_inode(struct inode *inode) 433static void fat_delete_inode(struct inode *inode)
434{ 434{
435 truncate_inode_pages(&inode->i_data, 0); 435 truncate_inode_pages(&inode->i_data, 0);
436 436 inode->i_size = 0;
437 if (!is_bad_inode(inode)) { 437 fat_truncate(inode);
438 inode->i_size = 0;
439 fat_truncate(inode);
440 }
441 clear_inode(inode); 438 clear_inode(inode);
442} 439}
443 440
@@ -445,8 +442,6 @@ static void fat_clear_inode(struct inode *inode)
445{ 442{
446 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); 443 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
447 444
448 if (is_bad_inode(inode))
449 return;
450 lock_kernel(); 445 lock_kernel();
451 spin_lock(&sbi->inode_hash_lock); 446 spin_lock(&sbi->inode_hash_lock);
452 fat_cache_inval_inode(inode); 447 fat_cache_inval_inode(inode);
@@ -542,7 +537,7 @@ static int fat_statfs(struct dentry *dentry, struct kstatfs *buf)
542 struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); 537 struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
543 538
544 /* If the count of free cluster is still unknown, counts it here. */ 539 /* If the count of free cluster is still unknown, counts it here. */
545 if (sbi->free_clusters == -1) { 540 if (sbi->free_clusters == -1 || !sbi->free_clus_valid) {
546 int err = fat_count_free_clusters(dentry->d_sb); 541 int err = fat_count_free_clusters(dentry->d_sb);
547 if (err) 542 if (err)
548 return err; 543 return err;
@@ -790,6 +785,8 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
790 seq_printf(m, ",gid=%u", opts->fs_gid); 785 seq_printf(m, ",gid=%u", opts->fs_gid);
791 seq_printf(m, ",fmask=%04o", opts->fs_fmask); 786 seq_printf(m, ",fmask=%04o", opts->fs_fmask);
792 seq_printf(m, ",dmask=%04o", opts->fs_dmask); 787 seq_printf(m, ",dmask=%04o", opts->fs_dmask);
788 if (opts->allow_utime)
789 seq_printf(m, ",allow_utime=%04o", opts->allow_utime);
793 if (sbi->nls_disk) 790 if (sbi->nls_disk)
794 seq_printf(m, ",codepage=%s", sbi->nls_disk->charset); 791 seq_printf(m, ",codepage=%s", sbi->nls_disk->charset);
795 if (isvfat) { 792 if (isvfat) {
@@ -845,9 +842,9 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
845 842
846enum { 843enum {
847 Opt_check_n, Opt_check_r, Opt_check_s, Opt_uid, Opt_gid, 844 Opt_check_n, Opt_check_r, Opt_check_s, Opt_uid, Opt_gid,
848 Opt_umask, Opt_dmask, Opt_fmask, Opt_codepage, Opt_usefree, Opt_nocase, 845 Opt_umask, Opt_dmask, Opt_fmask, Opt_allow_utime, Opt_codepage,
849 Opt_quiet, Opt_showexec, Opt_debug, Opt_immutable, 846 Opt_usefree, Opt_nocase, Opt_quiet, Opt_showexec, Opt_debug,
850 Opt_dots, Opt_nodots, 847 Opt_immutable, Opt_dots, Opt_nodots,
851 Opt_charset, Opt_shortname_lower, Opt_shortname_win95, 848 Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
852 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, 849 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
853 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, 850 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
@@ -866,6 +863,7 @@ static match_table_t fat_tokens = {
866 {Opt_umask, "umask=%o"}, 863 {Opt_umask, "umask=%o"},
867 {Opt_dmask, "dmask=%o"}, 864 {Opt_dmask, "dmask=%o"},
868 {Opt_fmask, "fmask=%o"}, 865 {Opt_fmask, "fmask=%o"},
866 {Opt_allow_utime, "allow_utime=%o"},
869 {Opt_codepage, "codepage=%u"}, 867 {Opt_codepage, "codepage=%u"},
870 {Opt_usefree, "usefree"}, 868 {Opt_usefree, "usefree"},
871 {Opt_nocase, "nocase"}, 869 {Opt_nocase, "nocase"},
@@ -937,6 +935,7 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
937 opts->fs_uid = current->uid; 935 opts->fs_uid = current->uid;
938 opts->fs_gid = current->gid; 936 opts->fs_gid = current->gid;
939 opts->fs_fmask = opts->fs_dmask = current->fs->umask; 937 opts->fs_fmask = opts->fs_dmask = current->fs->umask;
938 opts->allow_utime = -1;
940 opts->codepage = fat_default_codepage; 939 opts->codepage = fat_default_codepage;
941 opts->iocharset = fat_default_iocharset; 940 opts->iocharset = fat_default_iocharset;
942 if (is_vfat) 941 if (is_vfat)
@@ -1024,6 +1023,11 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
1024 return 0; 1023 return 0;
1025 opts->fs_fmask = option; 1024 opts->fs_fmask = option;
1026 break; 1025 break;
1026 case Opt_allow_utime:
1027 if (match_octal(&args[0], &option))
1028 return 0;
1029 opts->allow_utime = option & (S_IWGRP | S_IWOTH);
1030 break;
1027 case Opt_codepage: 1031 case Opt_codepage:
1028 if (match_int(&args[0], &option)) 1032 if (match_int(&args[0], &option))
1029 return 0; 1033 return 0;
@@ -1106,6 +1110,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
1106 " for FAT filesystems, filesystem will be case sensitive!\n"); 1110 " for FAT filesystems, filesystem will be case sensitive!\n");
1107 } 1111 }
1108 1112
1113 /* If user doesn't specify allow_utime, it's initialized from dmask. */
1114 if (opts->allow_utime == (unsigned short)-1)
1115 opts->allow_utime = ~opts->fs_dmask & (S_IWGRP | S_IWOTH);
1109 if (opts->unicode_xlate) 1116 if (opts->unicode_xlate)
1110 opts->utf8 = 0; 1117 opts->utf8 = 0;
1111 1118
@@ -1208,18 +1215,17 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1208 */ 1215 */
1209 1216
1210 media = b->media; 1217 media = b->media;
1211 if (!FAT_VALID_MEDIA(media)) { 1218 if (!fat_valid_media(media)) {
1212 if (!silent) 1219 if (!silent)
1213 printk(KERN_ERR "FAT: invalid media value (0x%02x)\n", 1220 printk(KERN_ERR "FAT: invalid media value (0x%02x)\n",
1214 media); 1221 media);
1215 brelse(bh); 1222 brelse(bh);
1216 goto out_invalid; 1223 goto out_invalid;
1217 } 1224 }
1218 logical_sector_size = 1225 logical_sector_size = get_unaligned_le16(&b->sector_size);
1219 le16_to_cpu(get_unaligned((__le16 *)&b->sector_size));
1220 if (!is_power_of_2(logical_sector_size) 1226 if (!is_power_of_2(logical_sector_size)
1221 || (logical_sector_size < 512) 1227 || (logical_sector_size < 512)
1222 || (PAGE_CACHE_SIZE < logical_sector_size)) { 1228 || (logical_sector_size > 4096)) {
1223 if (!silent) 1229 if (!silent)
1224 printk(KERN_ERR "FAT: bogus logical sector size %u\n", 1230 printk(KERN_ERR "FAT: bogus logical sector size %u\n",
1225 logical_sector_size); 1231 logical_sector_size);
@@ -1267,6 +1273,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1267 sbi->fat_length = le16_to_cpu(b->fat_length); 1273 sbi->fat_length = le16_to_cpu(b->fat_length);
1268 sbi->root_cluster = 0; 1274 sbi->root_cluster = 0;
1269 sbi->free_clusters = -1; /* Don't know yet */ 1275 sbi->free_clusters = -1; /* Don't know yet */
1276 sbi->free_clus_valid = 0;
1270 sbi->prev_free = FAT_START_ENT; 1277 sbi->prev_free = FAT_START_ENT;
1271 1278
1272 if (!sbi->fat_length && b->fat32_length) { 1279 if (!sbi->fat_length && b->fat32_length) {
@@ -1302,8 +1309,8 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1302 sbi->fsinfo_sector); 1309 sbi->fsinfo_sector);
1303 } else { 1310 } else {
1304 if (sbi->options.usefree) 1311 if (sbi->options.usefree)
1305 sbi->free_clusters = 1312 sbi->free_clus_valid = 1;
1306 le32_to_cpu(fsinfo->free_clusters); 1313 sbi->free_clusters = le32_to_cpu(fsinfo->free_clusters);
1307 sbi->prev_free = le32_to_cpu(fsinfo->next_cluster); 1314 sbi->prev_free = le32_to_cpu(fsinfo->next_cluster);
1308 } 1315 }
1309 1316
@@ -1314,8 +1321,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1314 sbi->dir_per_block_bits = ffs(sbi->dir_per_block) - 1; 1321 sbi->dir_per_block_bits = ffs(sbi->dir_per_block) - 1;
1315 1322
1316 sbi->dir_start = sbi->fat_start + sbi->fats * sbi->fat_length; 1323 sbi->dir_start = sbi->fat_start + sbi->fats * sbi->fat_length;
1317 sbi->dir_entries = 1324 sbi->dir_entries = get_unaligned_le16(&b->dir_entries);
1318 le16_to_cpu(get_unaligned((__le16 *)&b->dir_entries));
1319 if (sbi->dir_entries & (sbi->dir_per_block - 1)) { 1325 if (sbi->dir_entries & (sbi->dir_per_block - 1)) {
1320 if (!silent) 1326 if (!silent)
1321 printk(KERN_ERR "FAT: bogus directroy-entries per block" 1327 printk(KERN_ERR "FAT: bogus directroy-entries per block"
@@ -1327,7 +1333,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1327 rootdir_sectors = sbi->dir_entries 1333 rootdir_sectors = sbi->dir_entries
1328 * sizeof(struct msdos_dir_entry) / sb->s_blocksize; 1334 * sizeof(struct msdos_dir_entry) / sb->s_blocksize;
1329 sbi->data_start = sbi->dir_start + rootdir_sectors; 1335 sbi->data_start = sbi->dir_start + rootdir_sectors;
1330 total_sectors = le16_to_cpu(get_unaligned((__le16 *)&b->sectors)); 1336 total_sectors = get_unaligned_le16(&b->sectors);
1331 if (total_sectors == 0) 1337 if (total_sectors == 0)
1332 total_sectors = le32_to_cpu(b->total_sect); 1338 total_sectors = le32_to_cpu(b->total_sect);
1333 1339
diff --git a/fs/fcntl.c b/fs/fcntl.c
index e632da761fc1..bfd776509a72 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -9,6 +9,7 @@
9#include <linux/mm.h> 9#include <linux/mm.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/file.h> 11#include <linux/file.h>
12#include <linux/fdtable.h>
12#include <linux/capability.h> 13#include <linux/capability.h>
13#include <linux/dnotify.h> 14#include <linux/dnotify.h>
14#include <linux/smp_lock.h> 15#include <linux/smp_lock.h>
@@ -55,14 +56,16 @@ static int get_close_on_exec(unsigned int fd)
55 * file_lock held for write. 56 * file_lock held for write.
56 */ 57 */
57 58
58static int locate_fd(struct files_struct *files, 59static int locate_fd(unsigned int orig_start, int cloexec)
59 struct file *file, unsigned int orig_start)
60{ 60{
61 struct files_struct *files = current->files;
61 unsigned int newfd; 62 unsigned int newfd;
62 unsigned int start; 63 unsigned int start;
63 int error; 64 int error;
64 struct fdtable *fdt; 65 struct fdtable *fdt;
65 66
67 spin_lock(&files->file_lock);
68
66 error = -EINVAL; 69 error = -EINVAL;
67 if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) 70 if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
68 goto out; 71 goto out;
@@ -97,42 +100,28 @@ repeat:
97 if (error) 100 if (error)
98 goto repeat; 101 goto repeat;
99 102
100 /*
101 * We reacquired files_lock, so we are safe as long as
102 * we reacquire the fdtable pointer and use it while holding
103 * the lock, no one can free it during that time.
104 */
105 if (start <= files->next_fd) 103 if (start <= files->next_fd)
106 files->next_fd = newfd + 1; 104 files->next_fd = newfd + 1;
107 105
106 FD_SET(newfd, fdt->open_fds);
107 if (cloexec)
108 FD_SET(newfd, fdt->close_on_exec);
109 else
110 FD_CLR(newfd, fdt->close_on_exec);
108 error = newfd; 111 error = newfd;
109 112
110out: 113out:
114 spin_unlock(&files->file_lock);
111 return error; 115 return error;
112} 116}
113 117
114static int dupfd(struct file *file, unsigned int start, int cloexec) 118static int dupfd(struct file *file, unsigned int start, int cloexec)
115{ 119{
116 struct files_struct * files = current->files; 120 int fd = locate_fd(start, cloexec);
117 struct fdtable *fdt; 121 if (fd >= 0)
118 int fd;
119
120 spin_lock(&files->file_lock);
121 fd = locate_fd(files, file, start);
122 if (fd >= 0) {
123 /* locate_fd() may have expanded fdtable, load the ptr */
124 fdt = files_fdtable(files);
125 FD_SET(fd, fdt->open_fds);
126 if (cloexec)
127 FD_SET(fd, fdt->close_on_exec);
128 else
129 FD_CLR(fd, fdt->close_on_exec);
130 spin_unlock(&files->file_lock);
131 fd_install(fd, file); 122 fd_install(fd, file);
132 } else { 123 else
133 spin_unlock(&files->file_lock);
134 fput(file); 124 fput(file);
135 }
136 125
137 return fd; 126 return fd;
138} 127}
diff --git a/fs/file.c b/fs/file.c
index 5110acb1c9ef..4c6f0ea12c41 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -12,6 +12,7 @@
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/vmalloc.h> 13#include <linux/vmalloc.h>
14#include <linux/file.h> 14#include <linux/file.h>
15#include <linux/fdtable.h>
15#include <linux/bitops.h> 16#include <linux/bitops.h>
16#include <linux/interrupt.h> 17#include <linux/interrupt.h>
17#include <linux/spinlock.h> 18#include <linux/spinlock.h>
@@ -149,8 +150,16 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
149 nr /= (1024 / sizeof(struct file *)); 150 nr /= (1024 / sizeof(struct file *));
150 nr = roundup_pow_of_two(nr + 1); 151 nr = roundup_pow_of_two(nr + 1);
151 nr *= (1024 / sizeof(struct file *)); 152 nr *= (1024 / sizeof(struct file *));
152 if (nr > sysctl_nr_open) 153 /*
153 nr = sysctl_nr_open; 154 * Note that this can drive nr *below* what we had passed if sysctl_nr_open
155 * had been set lower between the check in expand_files() and here. Deal
156 * with that in caller, it's cheaper that way.
157 *
158 * We make sure that nr remains a multiple of BITS_PER_LONG - otherwise
159 * bitmaps handling below becomes unpleasant, to put it mildly...
160 */
161 if (unlikely(nr > sysctl_nr_open))
162 nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1;
154 163
155 fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL); 164 fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL);
156 if (!fdt) 165 if (!fdt)
@@ -199,6 +208,16 @@ static int expand_fdtable(struct files_struct *files, int nr)
199 if (!new_fdt) 208 if (!new_fdt)
200 return -ENOMEM; 209 return -ENOMEM;
201 /* 210 /*
211 * extremely unlikely race - sysctl_nr_open decreased between the check in
212 * caller and alloc_fdtable(). Cheaper to catch it here...
213 */
214 if (unlikely(new_fdt->max_fds <= nr)) {
215 free_fdarr(new_fdt);
216 free_fdset(new_fdt);
217 kfree(new_fdt);
218 return -EMFILE;
219 }
220 /*
202 * Check again since another task may have expanded the fd table while 221 * Check again since another task may have expanded the fd table while
203 * we dropped the lock 222 * we dropped the lock
204 */ 223 */
diff --git a/fs/file_table.c b/fs/file_table.c
index 7a0a9b872251..83084225b4c3 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -8,6 +8,7 @@
8#include <linux/string.h> 8#include <linux/string.h>
9#include <linux/slab.h> 9#include <linux/slab.h>
10#include <linux/file.h> 10#include <linux/file.h>
11#include <linux/fdtable.h>
11#include <linux/init.h> 12#include <linux/init.h>
12#include <linux/module.h> 13#include <linux/module.h>
13#include <linux/fs.h> 14#include <linux/fs.h>
diff --git a/fs/freevxfs/vxfs_extern.h b/fs/freevxfs/vxfs_extern.h
index 2b46064f66b2..50ab5eecb99b 100644
--- a/fs/freevxfs/vxfs_extern.h
+++ b/fs/freevxfs/vxfs_extern.h
@@ -50,7 +50,11 @@ extern daddr_t vxfs_bmap1(struct inode *, long);
50/* vxfs_fshead.c */ 50/* vxfs_fshead.c */
51extern int vxfs_read_fshead(struct super_block *); 51extern int vxfs_read_fshead(struct super_block *);
52 52
53/* vxfs_immed.c */
54extern const struct inode_operations vxfs_immed_symlink_iops;
55
53/* vxfs_inode.c */ 56/* vxfs_inode.c */
57extern const struct address_space_operations vxfs_immed_aops;
54extern struct kmem_cache *vxfs_inode_cachep; 58extern struct kmem_cache *vxfs_inode_cachep;
55extern void vxfs_dumpi(struct vxfs_inode_info *, ino_t); 59extern void vxfs_dumpi(struct vxfs_inode_info *, ino_t);
56extern struct inode * vxfs_get_fake_inode(struct super_block *, 60extern struct inode * vxfs_get_fake_inode(struct super_block *,
@@ -69,6 +73,7 @@ extern const struct file_operations vxfs_dir_operations;
69extern int vxfs_read_olt(struct super_block *, u_long); 73extern int vxfs_read_olt(struct super_block *, u_long);
70 74
71/* vxfs_subr.c */ 75/* vxfs_subr.c */
76extern const struct address_space_operations vxfs_aops;
72extern struct page * vxfs_get_page(struct address_space *, u_long); 77extern struct page * vxfs_get_page(struct address_space *, u_long);
73extern void vxfs_put_page(struct page *); 78extern void vxfs_put_page(struct page *);
74extern struct buffer_head * vxfs_bread(struct inode *, int); 79extern struct buffer_head * vxfs_bread(struct inode *, int);
diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c
index 8a5959a61ba9..c36aeaf92e41 100644
--- a/fs/freevxfs/vxfs_immed.c
+++ b/fs/freevxfs/vxfs_immed.c
@@ -35,6 +35,7 @@
35#include <linux/namei.h> 35#include <linux/namei.h>
36 36
37#include "vxfs.h" 37#include "vxfs.h"
38#include "vxfs_extern.h"
38#include "vxfs_inode.h" 39#include "vxfs_inode.h"
39 40
40 41
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index ad88d2364bc2..9f3f2ceb73f0 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -41,11 +41,6 @@
41#include "vxfs_extern.h" 41#include "vxfs_extern.h"
42 42
43 43
44extern const struct address_space_operations vxfs_aops;
45extern const struct address_space_operations vxfs_immed_aops;
46
47extern const struct inode_operations vxfs_immed_symlink_iops;
48
49struct kmem_cache *vxfs_inode_cachep; 44struct kmem_cache *vxfs_inode_cachep;
50 45
51 46
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 06557679ca41..ae45f77765c0 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -25,6 +25,45 @@
25#include <linux/buffer_head.h> 25#include <linux/buffer_head.h>
26#include "internal.h" 26#include "internal.h"
27 27
28
29/**
30 * writeback_acquire - attempt to get exclusive writeback access to a device
31 * @bdi: the device's backing_dev_info structure
32 *
33 * It is a waste of resources to have more than one pdflush thread blocked on
34 * a single request queue. Exclusion at the request_queue level is obtained
35 * via a flag in the request_queue's backing_dev_info.state.
36 *
37 * Non-request_queue-backed address_spaces will share default_backing_dev_info,
38 * unless they implement their own. Which is somewhat inefficient, as this
39 * may prevent concurrent writeback against multiple devices.
40 */
41static int writeback_acquire(struct backing_dev_info *bdi)
42{
43 return !test_and_set_bit(BDI_pdflush, &bdi->state);
44}
45
46/**
47 * writeback_in_progress - determine whether there is writeback in progress
48 * @bdi: the device's backing_dev_info structure.
49 *
50 * Determine whether there is writeback in progress against a backing device.
51 */
52int writeback_in_progress(struct backing_dev_info *bdi)
53{
54 return test_bit(BDI_pdflush, &bdi->state);
55}
56
57/**
58 * writeback_release - relinquish exclusive writeback access against a device.
59 * @bdi: the device's backing_dev_info structure
60 */
61static void writeback_release(struct backing_dev_info *bdi)
62{
63 BUG_ON(!writeback_in_progress(bdi));
64 clear_bit(BDI_pdflush, &bdi->state);
65}
66
28/** 67/**
29 * __mark_inode_dirty - internal function 68 * __mark_inode_dirty - internal function
30 * @inode: inode to mark 69 * @inode: inode to mark
@@ -747,43 +786,4 @@ int generic_osync_inode(struct inode *inode, struct address_space *mapping, int
747 786
748 return err; 787 return err;
749} 788}
750
751EXPORT_SYMBOL(generic_osync_inode); 789EXPORT_SYMBOL(generic_osync_inode);
752
753/**
754 * writeback_acquire - attempt to get exclusive writeback access to a device
755 * @bdi: the device's backing_dev_info structure
756 *
757 * It is a waste of resources to have more than one pdflush thread blocked on
758 * a single request queue. Exclusion at the request_queue level is obtained
759 * via a flag in the request_queue's backing_dev_info.state.
760 *
761 * Non-request_queue-backed address_spaces will share default_backing_dev_info,
762 * unless they implement their own. Which is somewhat inefficient, as this
763 * may prevent concurrent writeback against multiple devices.
764 */
765int writeback_acquire(struct backing_dev_info *bdi)
766{
767 return !test_and_set_bit(BDI_pdflush, &bdi->state);
768}
769
770/**
771 * writeback_in_progress - determine whether there is writeback in progress
772 * @bdi: the device's backing_dev_info structure.
773 *
774 * Determine whether there is writeback in progress against a backing device.
775 */
776int writeback_in_progress(struct backing_dev_info *bdi)
777{
778 return test_bit(BDI_pdflush, &bdi->state);
779}
780
781/**
782 * writeback_release - relinquish exclusive writeback access against a device.
783 * @bdi: the device's backing_dev_info structure
784 */
785void writeback_release(struct backing_dev_info *bdi)
786{
787 BUG_ON(!writeback_in_progress(bdi));
788 clear_bit(BDI_pdflush, &bdi->state);
789}
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 105d4a271e07..4f3cab321415 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -117,7 +117,7 @@ int fuse_ctl_add_conn(struct fuse_conn *fc)
117 117
118 parent = fuse_control_sb->s_root; 118 parent = fuse_control_sb->s_root;
119 inc_nlink(parent->d_inode); 119 inc_nlink(parent->d_inode);
120 sprintf(name, "%llu", (unsigned long long) fc->id); 120 sprintf(name, "%u", fc->dev);
121 parent = fuse_ctl_add_dentry(parent, fc, name, S_IFDIR | 0500, 2, 121 parent = fuse_ctl_add_dentry(parent, fc, name, S_IFDIR | 0500, 2,
122 &simple_dir_inode_operations, 122 &simple_dir_inode_operations,
123 &simple_dir_operations); 123 &simple_dir_operations);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index af639807524e..87250b6a8682 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -47,6 +47,14 @@ struct fuse_req *fuse_request_alloc(void)
47 return req; 47 return req;
48} 48}
49 49
50struct fuse_req *fuse_request_alloc_nofs(void)
51{
52 struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS);
53 if (req)
54 fuse_request_init(req);
55 return req;
56}
57
50void fuse_request_free(struct fuse_req *req) 58void fuse_request_free(struct fuse_req *req)
51{ 59{
52 kmem_cache_free(fuse_req_cachep, req); 60 kmem_cache_free(fuse_req_cachep, req);
@@ -291,6 +299,7 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
291 299
292static void wait_answer_interruptible(struct fuse_conn *fc, 300static void wait_answer_interruptible(struct fuse_conn *fc,
293 struct fuse_req *req) 301 struct fuse_req *req)
302 __releases(fc->lock) __acquires(fc->lock)
294{ 303{
295 if (signal_pending(current)) 304 if (signal_pending(current))
296 return; 305 return;
@@ -307,8 +316,8 @@ static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req)
307 kill_fasync(&fc->fasync, SIGIO, POLL_IN); 316 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
308} 317}
309 318
310/* Called with fc->lock held. Releases, and then reacquires it. */
311static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) 319static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
320 __releases(fc->lock) __acquires(fc->lock)
312{ 321{
313 if (!fc->no_interrupt) { 322 if (!fc->no_interrupt) {
314 /* Any signal may interrupt this */ 323 /* Any signal may interrupt this */
@@ -430,6 +439,17 @@ void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
430} 439}
431 440
432/* 441/*
442 * Called under fc->lock
443 *
444 * fc->connected must have been checked previously
445 */
446void request_send_background_locked(struct fuse_conn *fc, struct fuse_req *req)
447{
448 req->isreply = 1;
449 request_send_nowait_locked(fc, req);
450}
451
452/*
433 * Lock the request. Up to the next unlock_request() there mustn't be 453 * Lock the request. Up to the next unlock_request() there mustn't be
434 * anything that could cause a page-fault. If the request was already 454 * anything that could cause a page-fault. If the request was already
435 * aborted bail out. 455 * aborted bail out.
@@ -968,6 +988,7 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head)
968 * locked). 988 * locked).
969 */ 989 */
970static void end_io_requests(struct fuse_conn *fc) 990static void end_io_requests(struct fuse_conn *fc)
991 __releases(fc->lock) __acquires(fc->lock)
971{ 992{
972 while (!list_empty(&fc->io)) { 993 while (!list_empty(&fc->io)) {
973 struct fuse_req *req = 994 struct fuse_req *req =
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index c4807b3fc8a3..2060bf06b906 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -132,7 +132,7 @@ static void fuse_lookup_init(struct fuse_req *req, struct inode *dir,
132 req->out.args[0].value = outarg; 132 req->out.args[0].value = outarg;
133} 133}
134 134
135static u64 fuse_get_attr_version(struct fuse_conn *fc) 135u64 fuse_get_attr_version(struct fuse_conn *fc)
136{ 136{
137 u64 curr_version; 137 u64 curr_version;
138 138
@@ -1107,6 +1107,50 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
1107} 1107}
1108 1108
1109/* 1109/*
1110 * Prevent concurrent writepages on inode
1111 *
1112 * This is done by adding a negative bias to the inode write counter
1113 * and waiting for all pending writes to finish.
1114 */
1115void fuse_set_nowrite(struct inode *inode)
1116{
1117 struct fuse_conn *fc = get_fuse_conn(inode);
1118 struct fuse_inode *fi = get_fuse_inode(inode);
1119
1120 BUG_ON(!mutex_is_locked(&inode->i_mutex));
1121
1122 spin_lock(&fc->lock);
1123 BUG_ON(fi->writectr < 0);
1124 fi->writectr += FUSE_NOWRITE;
1125 spin_unlock(&fc->lock);
1126 wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1127}
1128
1129/*
1130 * Allow writepages on inode
1131 *
1132 * Remove the bias from the writecounter and send any queued
1133 * writepages.
1134 */
1135static void __fuse_release_nowrite(struct inode *inode)
1136{
1137 struct fuse_inode *fi = get_fuse_inode(inode);
1138
1139 BUG_ON(fi->writectr != FUSE_NOWRITE);
1140 fi->writectr = 0;
1141 fuse_flush_writepages(inode);
1142}
1143
1144void fuse_release_nowrite(struct inode *inode)
1145{
1146 struct fuse_conn *fc = get_fuse_conn(inode);
1147
1148 spin_lock(&fc->lock);
1149 __fuse_release_nowrite(inode);
1150 spin_unlock(&fc->lock);
1151}
1152
1153/*
1110 * Set attributes, and at the same time refresh them. 1154 * Set attributes, and at the same time refresh them.
1111 * 1155 *
1112 * Truncation is slightly complicated, because the 'truncate' request 1156 * Truncation is slightly complicated, because the 'truncate' request
@@ -1122,6 +1166,8 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1122 struct fuse_req *req; 1166 struct fuse_req *req;
1123 struct fuse_setattr_in inarg; 1167 struct fuse_setattr_in inarg;
1124 struct fuse_attr_out outarg; 1168 struct fuse_attr_out outarg;
1169 bool is_truncate = false;
1170 loff_t oldsize;
1125 int err; 1171 int err;
1126 1172
1127 if (!fuse_allow_task(fc, current)) 1173 if (!fuse_allow_task(fc, current))
@@ -1145,12 +1191,16 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1145 send_sig(SIGXFSZ, current, 0); 1191 send_sig(SIGXFSZ, current, 0);
1146 return -EFBIG; 1192 return -EFBIG;
1147 } 1193 }
1194 is_truncate = true;
1148 } 1195 }
1149 1196
1150 req = fuse_get_req(fc); 1197 req = fuse_get_req(fc);
1151 if (IS_ERR(req)) 1198 if (IS_ERR(req))
1152 return PTR_ERR(req); 1199 return PTR_ERR(req);
1153 1200
1201 if (is_truncate)
1202 fuse_set_nowrite(inode);
1203
1154 memset(&inarg, 0, sizeof(inarg)); 1204 memset(&inarg, 0, sizeof(inarg));
1155 memset(&outarg, 0, sizeof(outarg)); 1205 memset(&outarg, 0, sizeof(outarg));
1156 iattr_to_fattr(attr, &inarg); 1206 iattr_to_fattr(attr, &inarg);
@@ -1181,16 +1231,44 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1181 if (err) { 1231 if (err) {
1182 if (err == -EINTR) 1232 if (err == -EINTR)
1183 fuse_invalidate_attr(inode); 1233 fuse_invalidate_attr(inode);
1184 return err; 1234 goto error;
1185 } 1235 }
1186 1236
1187 if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) { 1237 if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
1188 make_bad_inode(inode); 1238 make_bad_inode(inode);
1189 return -EIO; 1239 err = -EIO;
1240 goto error;
1241 }
1242
1243 spin_lock(&fc->lock);
1244 fuse_change_attributes_common(inode, &outarg.attr,
1245 attr_timeout(&outarg));
1246 oldsize = inode->i_size;
1247 i_size_write(inode, outarg.attr.size);
1248
1249 if (is_truncate) {
1250 /* NOTE: this may release/reacquire fc->lock */
1251 __fuse_release_nowrite(inode);
1252 }
1253 spin_unlock(&fc->lock);
1254
1255 /*
1256 * Only call invalidate_inode_pages2() after removing
1257 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
1258 */
1259 if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
1260 if (outarg.attr.size < oldsize)
1261 fuse_truncate(inode->i_mapping, outarg.attr.size);
1262 invalidate_inode_pages2(inode->i_mapping);
1190 } 1263 }
1191 1264
1192 fuse_change_attributes(inode, &outarg.attr, attr_timeout(&outarg), 0);
1193 return 0; 1265 return 0;
1266
1267error:
1268 if (is_truncate)
1269 fuse_release_nowrite(inode);
1270
1271 return err;
1194} 1272}
1195 1273
1196static int fuse_setattr(struct dentry *entry, struct iattr *attr) 1274static int fuse_setattr(struct dentry *entry, struct iattr *attr)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 676b0bc8a86d..f28cf8b46f80 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -210,6 +210,49 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
210 return (u64) v0 + ((u64) v1 << 32); 210 return (u64) v0 + ((u64) v1 << 32);
211} 211}
212 212
213/*
214 * Check if page is under writeback
215 *
216 * This is currently done by walking the list of writepage requests
217 * for the inode, which can be pretty inefficient.
218 */
219static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
220{
221 struct fuse_conn *fc = get_fuse_conn(inode);
222 struct fuse_inode *fi = get_fuse_inode(inode);
223 struct fuse_req *req;
224 bool found = false;
225
226 spin_lock(&fc->lock);
227 list_for_each_entry(req, &fi->writepages, writepages_entry) {
228 pgoff_t curr_index;
229
230 BUG_ON(req->inode != inode);
231 curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
232 if (curr_index == index) {
233 found = true;
234 break;
235 }
236 }
237 spin_unlock(&fc->lock);
238
239 return found;
240}
241
242/*
243 * Wait for page writeback to be completed.
244 *
245 * Since fuse doesn't rely on the VM writeback tracking, this has to
246 * use some other means.
247 */
248static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
249{
250 struct fuse_inode *fi = get_fuse_inode(inode);
251
252 wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index));
253 return 0;
254}
255
213static int fuse_flush(struct file *file, fl_owner_t id) 256static int fuse_flush(struct file *file, fl_owner_t id)
214{ 257{
215 struct inode *inode = file->f_path.dentry->d_inode; 258 struct inode *inode = file->f_path.dentry->d_inode;
@@ -245,6 +288,21 @@ static int fuse_flush(struct file *file, fl_owner_t id)
245 return err; 288 return err;
246} 289}
247 290
291/*
292 * Wait for all pending writepages on the inode to finish.
293 *
294 * This is currently done by blocking further writes with FUSE_NOWRITE
295 * and waiting for all sent writes to complete.
296 *
297 * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage
298 * could conflict with truncation.
299 */
300static void fuse_sync_writes(struct inode *inode)
301{
302 fuse_set_nowrite(inode);
303 fuse_release_nowrite(inode);
304}
305
248int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, 306int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
249 int isdir) 307 int isdir)
250{ 308{
@@ -261,6 +319,17 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
261 if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir)) 319 if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
262 return 0; 320 return 0;
263 321
322 /*
323 * Start writeback against all dirty pages of the inode, then
324 * wait for all outstanding writes, before sending the FSYNC
325 * request.
326 */
327 err = write_inode_now(inode, 0);
328 if (err)
329 return err;
330
331 fuse_sync_writes(inode);
332
264 req = fuse_get_req(fc); 333 req = fuse_get_req(fc);
265 if (IS_ERR(req)) 334 if (IS_ERR(req))
266 return PTR_ERR(req); 335 return PTR_ERR(req);
@@ -294,7 +363,7 @@ static int fuse_fsync(struct file *file, struct dentry *de, int datasync)
294void fuse_read_fill(struct fuse_req *req, struct file *file, 363void fuse_read_fill(struct fuse_req *req, struct file *file,
295 struct inode *inode, loff_t pos, size_t count, int opcode) 364 struct inode *inode, loff_t pos, size_t count, int opcode)
296{ 365{
297 struct fuse_read_in *inarg = &req->misc.read_in; 366 struct fuse_read_in *inarg = &req->misc.read.in;
298 struct fuse_file *ff = file->private_data; 367 struct fuse_file *ff = file->private_data;
299 368
300 inarg->fh = ff->fh; 369 inarg->fh = ff->fh;
@@ -320,7 +389,7 @@ static size_t fuse_send_read(struct fuse_req *req, struct file *file,
320 389
321 fuse_read_fill(req, file, inode, pos, count, FUSE_READ); 390 fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
322 if (owner != NULL) { 391 if (owner != NULL) {
323 struct fuse_read_in *inarg = &req->misc.read_in; 392 struct fuse_read_in *inarg = &req->misc.read.in;
324 393
325 inarg->read_flags |= FUSE_READ_LOCKOWNER; 394 inarg->read_flags |= FUSE_READ_LOCKOWNER;
326 inarg->lock_owner = fuse_lock_owner_id(fc, owner); 395 inarg->lock_owner = fuse_lock_owner_id(fc, owner);
@@ -329,31 +398,66 @@ static size_t fuse_send_read(struct fuse_req *req, struct file *file,
329 return req->out.args[0].size; 398 return req->out.args[0].size;
330} 399}
331 400
401static void fuse_read_update_size(struct inode *inode, loff_t size,
402 u64 attr_ver)
403{
404 struct fuse_conn *fc = get_fuse_conn(inode);
405 struct fuse_inode *fi = get_fuse_inode(inode);
406
407 spin_lock(&fc->lock);
408 if (attr_ver == fi->attr_version && size < inode->i_size) {
409 fi->attr_version = ++fc->attr_version;
410 i_size_write(inode, size);
411 }
412 spin_unlock(&fc->lock);
413}
414
332static int fuse_readpage(struct file *file, struct page *page) 415static int fuse_readpage(struct file *file, struct page *page)
333{ 416{
334 struct inode *inode = page->mapping->host; 417 struct inode *inode = page->mapping->host;
335 struct fuse_conn *fc = get_fuse_conn(inode); 418 struct fuse_conn *fc = get_fuse_conn(inode);
336 struct fuse_req *req; 419 struct fuse_req *req;
420 size_t num_read;
421 loff_t pos = page_offset(page);
422 size_t count = PAGE_CACHE_SIZE;
423 u64 attr_ver;
337 int err; 424 int err;
338 425
339 err = -EIO; 426 err = -EIO;
340 if (is_bad_inode(inode)) 427 if (is_bad_inode(inode))
341 goto out; 428 goto out;
342 429
430 /*
431 * Page writeback can extend beyond the liftime of the
432 * page-cache page, so make sure we read a properly synced
433 * page.
434 */
435 fuse_wait_on_page_writeback(inode, page->index);
436
343 req = fuse_get_req(fc); 437 req = fuse_get_req(fc);
344 err = PTR_ERR(req); 438 err = PTR_ERR(req);
345 if (IS_ERR(req)) 439 if (IS_ERR(req))
346 goto out; 440 goto out;
347 441
442 attr_ver = fuse_get_attr_version(fc);
443
348 req->out.page_zeroing = 1; 444 req->out.page_zeroing = 1;
349 req->num_pages = 1; 445 req->num_pages = 1;
350 req->pages[0] = page; 446 req->pages[0] = page;
351 fuse_send_read(req, file, inode, page_offset(page), PAGE_CACHE_SIZE, 447 num_read = fuse_send_read(req, file, inode, pos, count, NULL);
352 NULL);
353 err = req->out.h.error; 448 err = req->out.h.error;
354 fuse_put_request(fc, req); 449 fuse_put_request(fc, req);
355 if (!err) 450
451 if (!err) {
452 /*
453 * Short read means EOF. If file size is larger, truncate it
454 */
455 if (num_read < count)
456 fuse_read_update_size(inode, pos + num_read, attr_ver);
457
356 SetPageUptodate(page); 458 SetPageUptodate(page);
459 }
460
357 fuse_invalidate_attr(inode); /* atime changed */ 461 fuse_invalidate_attr(inode); /* atime changed */
358 out: 462 out:
359 unlock_page(page); 463 unlock_page(page);
@@ -363,8 +467,19 @@ static int fuse_readpage(struct file *file, struct page *page)
363static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req) 467static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
364{ 468{
365 int i; 469 int i;
470 size_t count = req->misc.read.in.size;
471 size_t num_read = req->out.args[0].size;
472 struct inode *inode = req->pages[0]->mapping->host;
473
474 /*
475 * Short read means EOF. If file size is larger, truncate it
476 */
477 if (!req->out.h.error && num_read < count) {
478 loff_t pos = page_offset(req->pages[0]) + num_read;
479 fuse_read_update_size(inode, pos, req->misc.read.attr_ver);
480 }
366 481
367 fuse_invalidate_attr(req->pages[0]->mapping->host); /* atime changed */ 482 fuse_invalidate_attr(inode); /* atime changed */
368 483
369 for (i = 0; i < req->num_pages; i++) { 484 for (i = 0; i < req->num_pages; i++) {
370 struct page *page = req->pages[i]; 485 struct page *page = req->pages[i];
@@ -387,6 +502,7 @@ static void fuse_send_readpages(struct fuse_req *req, struct file *file,
387 size_t count = req->num_pages << PAGE_CACHE_SHIFT; 502 size_t count = req->num_pages << PAGE_CACHE_SHIFT;
388 req->out.page_zeroing = 1; 503 req->out.page_zeroing = 1;
389 fuse_read_fill(req, file, inode, pos, count, FUSE_READ); 504 fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
505 req->misc.read.attr_ver = fuse_get_attr_version(fc);
390 if (fc->async_read) { 506 if (fc->async_read) {
391 struct fuse_file *ff = file->private_data; 507 struct fuse_file *ff = file->private_data;
392 req->ff = fuse_file_get(ff); 508 req->ff = fuse_file_get(ff);
@@ -411,6 +527,8 @@ static int fuse_readpages_fill(void *_data, struct page *page)
411 struct inode *inode = data->inode; 527 struct inode *inode = data->inode;
412 struct fuse_conn *fc = get_fuse_conn(inode); 528 struct fuse_conn *fc = get_fuse_conn(inode);
413 529
530 fuse_wait_on_page_writeback(inode, page->index);
531
414 if (req->num_pages && 532 if (req->num_pages &&
415 (req->num_pages == FUSE_MAX_PAGES_PER_REQ || 533 (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
416 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || 534 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
@@ -477,11 +595,10 @@ static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
477} 595}
478 596
479static void fuse_write_fill(struct fuse_req *req, struct file *file, 597static void fuse_write_fill(struct fuse_req *req, struct file *file,
480 struct inode *inode, loff_t pos, size_t count, 598 struct fuse_file *ff, struct inode *inode,
481 int writepage) 599 loff_t pos, size_t count, int writepage)
482{ 600{
483 struct fuse_conn *fc = get_fuse_conn(inode); 601 struct fuse_conn *fc = get_fuse_conn(inode);
484 struct fuse_file *ff = file->private_data;
485 struct fuse_write_in *inarg = &req->misc.write.in; 602 struct fuse_write_in *inarg = &req->misc.write.in;
486 struct fuse_write_out *outarg = &req->misc.write.out; 603 struct fuse_write_out *outarg = &req->misc.write.out;
487 604
@@ -490,7 +607,7 @@ static void fuse_write_fill(struct fuse_req *req, struct file *file,
490 inarg->offset = pos; 607 inarg->offset = pos;
491 inarg->size = count; 608 inarg->size = count;
492 inarg->write_flags = writepage ? FUSE_WRITE_CACHE : 0; 609 inarg->write_flags = writepage ? FUSE_WRITE_CACHE : 0;
493 inarg->flags = file->f_flags; 610 inarg->flags = file ? file->f_flags : 0;
494 req->in.h.opcode = FUSE_WRITE; 611 req->in.h.opcode = FUSE_WRITE;
495 req->in.h.nodeid = get_node_id(inode); 612 req->in.h.nodeid = get_node_id(inode);
496 req->in.argpages = 1; 613 req->in.argpages = 1;
@@ -511,7 +628,7 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file,
511 fl_owner_t owner) 628 fl_owner_t owner)
512{ 629{
513 struct fuse_conn *fc = get_fuse_conn(inode); 630 struct fuse_conn *fc = get_fuse_conn(inode);
514 fuse_write_fill(req, file, inode, pos, count, 0); 631 fuse_write_fill(req, file, file->private_data, inode, pos, count, 0);
515 if (owner != NULL) { 632 if (owner != NULL) {
516 struct fuse_write_in *inarg = &req->misc.write.in; 633 struct fuse_write_in *inarg = &req->misc.write.in;
517 inarg->write_flags |= FUSE_WRITE_LOCKOWNER; 634 inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
@@ -533,19 +650,36 @@ static int fuse_write_begin(struct file *file, struct address_space *mapping,
533 return 0; 650 return 0;
534} 651}
535 652
653static void fuse_write_update_size(struct inode *inode, loff_t pos)
654{
655 struct fuse_conn *fc = get_fuse_conn(inode);
656 struct fuse_inode *fi = get_fuse_inode(inode);
657
658 spin_lock(&fc->lock);
659 fi->attr_version = ++fc->attr_version;
660 if (pos > inode->i_size)
661 i_size_write(inode, pos);
662 spin_unlock(&fc->lock);
663}
664
536static int fuse_buffered_write(struct file *file, struct inode *inode, 665static int fuse_buffered_write(struct file *file, struct inode *inode,
537 loff_t pos, unsigned count, struct page *page) 666 loff_t pos, unsigned count, struct page *page)
538{ 667{
539 int err; 668 int err;
540 size_t nres; 669 size_t nres;
541 struct fuse_conn *fc = get_fuse_conn(inode); 670 struct fuse_conn *fc = get_fuse_conn(inode);
542 struct fuse_inode *fi = get_fuse_inode(inode);
543 unsigned offset = pos & (PAGE_CACHE_SIZE - 1); 671 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
544 struct fuse_req *req; 672 struct fuse_req *req;
545 673
546 if (is_bad_inode(inode)) 674 if (is_bad_inode(inode))
547 return -EIO; 675 return -EIO;
548 676
677 /*
678 * Make sure writepages on the same page are not mixed up with
679 * plain writes.
680 */
681 fuse_wait_on_page_writeback(inode, page->index);
682
549 req = fuse_get_req(fc); 683 req = fuse_get_req(fc);
550 if (IS_ERR(req)) 684 if (IS_ERR(req))
551 return PTR_ERR(req); 685 return PTR_ERR(req);
@@ -560,12 +694,7 @@ static int fuse_buffered_write(struct file *file, struct inode *inode,
560 err = -EIO; 694 err = -EIO;
561 if (!err) { 695 if (!err) {
562 pos += nres; 696 pos += nres;
563 spin_lock(&fc->lock); 697 fuse_write_update_size(inode, pos);
564 fi->attr_version = ++fc->attr_version;
565 if (pos > inode->i_size)
566 i_size_write(inode, pos);
567 spin_unlock(&fc->lock);
568
569 if (count == PAGE_CACHE_SIZE) 698 if (count == PAGE_CACHE_SIZE)
570 SetPageUptodate(page); 699 SetPageUptodate(page);
571 } 700 }
@@ -588,6 +717,198 @@ static int fuse_write_end(struct file *file, struct address_space *mapping,
588 return res; 717 return res;
589} 718}
590 719
720static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
721 struct inode *inode, loff_t pos,
722 size_t count)
723{
724 size_t res;
725 unsigned offset;
726 unsigned i;
727
728 for (i = 0; i < req->num_pages; i++)
729 fuse_wait_on_page_writeback(inode, req->pages[i]->index);
730
731 res = fuse_send_write(req, file, inode, pos, count, NULL);
732
733 offset = req->page_offset;
734 count = res;
735 for (i = 0; i < req->num_pages; i++) {
736 struct page *page = req->pages[i];
737
738 if (!req->out.h.error && !offset && count >= PAGE_CACHE_SIZE)
739 SetPageUptodate(page);
740
741 if (count > PAGE_CACHE_SIZE - offset)
742 count -= PAGE_CACHE_SIZE - offset;
743 else
744 count = 0;
745 offset = 0;
746
747 unlock_page(page);
748 page_cache_release(page);
749 }
750
751 return res;
752}
753
754static ssize_t fuse_fill_write_pages(struct fuse_req *req,
755 struct address_space *mapping,
756 struct iov_iter *ii, loff_t pos)
757{
758 struct fuse_conn *fc = get_fuse_conn(mapping->host);
759 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
760 size_t count = 0;
761 int err;
762
763 req->page_offset = offset;
764
765 do {
766 size_t tmp;
767 struct page *page;
768 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
769 size_t bytes = min_t(size_t, PAGE_CACHE_SIZE - offset,
770 iov_iter_count(ii));
771
772 bytes = min_t(size_t, bytes, fc->max_write - count);
773
774 again:
775 err = -EFAULT;
776 if (iov_iter_fault_in_readable(ii, bytes))
777 break;
778
779 err = -ENOMEM;
780 page = __grab_cache_page(mapping, index);
781 if (!page)
782 break;
783
784 pagefault_disable();
785 tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes);
786 pagefault_enable();
787 flush_dcache_page(page);
788
789 if (!tmp) {
790 unlock_page(page);
791 page_cache_release(page);
792 bytes = min(bytes, iov_iter_single_seg_count(ii));
793 goto again;
794 }
795
796 err = 0;
797 req->pages[req->num_pages] = page;
798 req->num_pages++;
799
800 iov_iter_advance(ii, tmp);
801 count += tmp;
802 pos += tmp;
803 offset += tmp;
804 if (offset == PAGE_CACHE_SIZE)
805 offset = 0;
806
807 } while (iov_iter_count(ii) && count < fc->max_write &&
808 req->num_pages < FUSE_MAX_PAGES_PER_REQ && offset == 0);
809
810 return count > 0 ? count : err;
811}
812
813static ssize_t fuse_perform_write(struct file *file,
814 struct address_space *mapping,
815 struct iov_iter *ii, loff_t pos)
816{
817 struct inode *inode = mapping->host;
818 struct fuse_conn *fc = get_fuse_conn(inode);
819 int err = 0;
820 ssize_t res = 0;
821
822 if (is_bad_inode(inode))
823 return -EIO;
824
825 do {
826 struct fuse_req *req;
827 ssize_t count;
828
829 req = fuse_get_req(fc);
830 if (IS_ERR(req)) {
831 err = PTR_ERR(req);
832 break;
833 }
834
835 count = fuse_fill_write_pages(req, mapping, ii, pos);
836 if (count <= 0) {
837 err = count;
838 } else {
839 size_t num_written;
840
841 num_written = fuse_send_write_pages(req, file, inode,
842 pos, count);
843 err = req->out.h.error;
844 if (!err) {
845 res += num_written;
846 pos += num_written;
847
848 /* break out of the loop on short write */
849 if (num_written != count)
850 err = -EIO;
851 }
852 }
853 fuse_put_request(fc, req);
854 } while (!err && iov_iter_count(ii));
855
856 if (res > 0)
857 fuse_write_update_size(inode, pos);
858
859 fuse_invalidate_attr(inode);
860
861 return res > 0 ? res : err;
862}
863
864static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
865 unsigned long nr_segs, loff_t pos)
866{
867 struct file *file = iocb->ki_filp;
868 struct address_space *mapping = file->f_mapping;
869 size_t count = 0;
870 ssize_t written = 0;
871 struct inode *inode = mapping->host;
872 ssize_t err;
873 struct iov_iter i;
874
875 WARN_ON(iocb->ki_pos != pos);
876
877 err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
878 if (err)
879 return err;
880
881 mutex_lock(&inode->i_mutex);
882 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
883
884 /* We can write back this queue in page reclaim */
885 current->backing_dev_info = mapping->backing_dev_info;
886
887 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
888 if (err)
889 goto out;
890
891 if (count == 0)
892 goto out;
893
894 err = remove_suid(file->f_path.dentry);
895 if (err)
896 goto out;
897
898 file_update_time(file);
899
900 iov_iter_init(&i, iov, nr_segs, count, 0);
901 written = fuse_perform_write(file, mapping, &i, pos);
902 if (written >= 0)
903 iocb->ki_pos = pos + written;
904
905out:
906 current->backing_dev_info = NULL;
907 mutex_unlock(&inode->i_mutex);
908
909 return written ? written : err;
910}
911
591static void fuse_release_user_pages(struct fuse_req *req, int write) 912static void fuse_release_user_pages(struct fuse_req *req, int write)
592{ 913{
593 unsigned i; 914 unsigned i;
@@ -613,7 +934,7 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
613 934
614 nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); 935 nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
615 npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; 936 npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
616 npages = min(max(npages, 1), FUSE_MAX_PAGES_PER_REQ); 937 npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ);
617 down_read(&current->mm->mmap_sem); 938 down_read(&current->mm->mmap_sem);
618 npages = get_user_pages(current, current->mm, user_addr, npages, write, 939 npages = get_user_pages(current, current->mm, user_addr, npages, write,
619 0, req->pages, NULL); 940 0, req->pages, NULL);
@@ -645,14 +966,15 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
645 966
646 while (count) { 967 while (count) {
647 size_t nres; 968 size_t nres;
648 size_t nbytes = min(count, nmax); 969 size_t nbytes_limit = min(count, nmax);
649 int err = fuse_get_user_pages(req, buf, nbytes, !write); 970 size_t nbytes;
971 int err = fuse_get_user_pages(req, buf, nbytes_limit, !write);
650 if (err) { 972 if (err) {
651 res = err; 973 res = err;
652 break; 974 break;
653 } 975 }
654 nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset; 976 nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset;
655 nbytes = min(count, nbytes); 977 nbytes = min(nbytes_limit, nbytes);
656 if (write) 978 if (write)
657 nres = fuse_send_write(req, file, inode, pos, nbytes, 979 nres = fuse_send_write(req, file, inode, pos, nbytes,
658 current->files); 980 current->files);
@@ -683,12 +1005,8 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
683 } 1005 }
684 fuse_put_request(fc, req); 1006 fuse_put_request(fc, req);
685 if (res > 0) { 1007 if (res > 0) {
686 if (write) { 1008 if (write)
687 spin_lock(&fc->lock); 1009 fuse_write_update_size(inode, pos);
688 if (pos > inode->i_size)
689 i_size_write(inode, pos);
690 spin_unlock(&fc->lock);
691 }
692 *ppos = pos; 1010 *ppos = pos;
693 } 1011 }
694 fuse_invalidate_attr(inode); 1012 fuse_invalidate_attr(inode);
@@ -716,21 +1034,225 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
716 return res; 1034 return res;
717} 1035}
718 1036
719static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) 1037static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
720{ 1038{
721 if ((vma->vm_flags & VM_SHARED)) { 1039 __free_page(req->pages[0]);
722 if ((vma->vm_flags & VM_WRITE)) 1040 fuse_file_put(req->ff);
723 return -ENODEV; 1041 fuse_put_request(fc, req);
724 else 1042}
725 vma->vm_flags &= ~VM_MAYWRITE; 1043
1044static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
1045{
1046 struct inode *inode = req->inode;
1047 struct fuse_inode *fi = get_fuse_inode(inode);
1048 struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info;
1049
1050 list_del(&req->writepages_entry);
1051 dec_bdi_stat(bdi, BDI_WRITEBACK);
1052 dec_zone_page_state(req->pages[0], NR_WRITEBACK_TEMP);
1053 bdi_writeout_inc(bdi);
1054 wake_up(&fi->page_waitq);
1055}
1056
1057/* Called under fc->lock, may release and reacquire it */
1058static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req)
1059{
1060 struct fuse_inode *fi = get_fuse_inode(req->inode);
1061 loff_t size = i_size_read(req->inode);
1062 struct fuse_write_in *inarg = &req->misc.write.in;
1063
1064 if (!fc->connected)
1065 goto out_free;
1066
1067 if (inarg->offset + PAGE_CACHE_SIZE <= size) {
1068 inarg->size = PAGE_CACHE_SIZE;
1069 } else if (inarg->offset < size) {
1070 inarg->size = size & (PAGE_CACHE_SIZE - 1);
1071 } else {
1072 /* Got truncated off completely */
1073 goto out_free;
1074 }
1075
1076 req->in.args[1].size = inarg->size;
1077 fi->writectr++;
1078 request_send_background_locked(fc, req);
1079 return;
1080
1081 out_free:
1082 fuse_writepage_finish(fc, req);
1083 spin_unlock(&fc->lock);
1084 fuse_writepage_free(fc, req);
1085 spin_lock(&fc->lock);
1086}
1087
1088/*
1089 * If fi->writectr is positive (no truncate or fsync going on) send
1090 * all queued writepage requests.
1091 *
1092 * Called with fc->lock
1093 */
1094void fuse_flush_writepages(struct inode *inode)
1095{
1096 struct fuse_conn *fc = get_fuse_conn(inode);
1097 struct fuse_inode *fi = get_fuse_inode(inode);
1098 struct fuse_req *req;
1099
1100 while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) {
1101 req = list_entry(fi->queued_writes.next, struct fuse_req, list);
1102 list_del_init(&req->list);
1103 fuse_send_writepage(fc, req);
1104 }
1105}
1106
1107static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
1108{
1109 struct inode *inode = req->inode;
1110 struct fuse_inode *fi = get_fuse_inode(inode);
1111
1112 mapping_set_error(inode->i_mapping, req->out.h.error);
1113 spin_lock(&fc->lock);
1114 fi->writectr--;
1115 fuse_writepage_finish(fc, req);
1116 spin_unlock(&fc->lock);
1117 fuse_writepage_free(fc, req);
1118}
1119
1120static int fuse_writepage_locked(struct page *page)
1121{
1122 struct address_space *mapping = page->mapping;
1123 struct inode *inode = mapping->host;
1124 struct fuse_conn *fc = get_fuse_conn(inode);
1125 struct fuse_inode *fi = get_fuse_inode(inode);
1126 struct fuse_req *req;
1127 struct fuse_file *ff;
1128 struct page *tmp_page;
1129
1130 set_page_writeback(page);
1131
1132 req = fuse_request_alloc_nofs();
1133 if (!req)
1134 goto err;
1135
1136 tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
1137 if (!tmp_page)
1138 goto err_free;
1139
1140 spin_lock(&fc->lock);
1141 BUG_ON(list_empty(&fi->write_files));
1142 ff = list_entry(fi->write_files.next, struct fuse_file, write_entry);
1143 req->ff = fuse_file_get(ff);
1144 spin_unlock(&fc->lock);
1145
1146 fuse_write_fill(req, NULL, ff, inode, page_offset(page), 0, 1);
1147
1148 copy_highpage(tmp_page, page);
1149 req->num_pages = 1;
1150 req->pages[0] = tmp_page;
1151 req->page_offset = 0;
1152 req->end = fuse_writepage_end;
1153 req->inode = inode;
1154
1155 inc_bdi_stat(mapping->backing_dev_info, BDI_WRITEBACK);
1156 inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
1157 end_page_writeback(page);
1158
1159 spin_lock(&fc->lock);
1160 list_add(&req->writepages_entry, &fi->writepages);
1161 list_add_tail(&req->list, &fi->queued_writes);
1162 fuse_flush_writepages(inode);
1163 spin_unlock(&fc->lock);
1164
1165 return 0;
1166
1167err_free:
1168 fuse_request_free(req);
1169err:
1170 end_page_writeback(page);
1171 return -ENOMEM;
1172}
1173
1174static int fuse_writepage(struct page *page, struct writeback_control *wbc)
1175{
1176 int err;
1177
1178 err = fuse_writepage_locked(page);
1179 unlock_page(page);
1180
1181 return err;
1182}
1183
1184static int fuse_launder_page(struct page *page)
1185{
1186 int err = 0;
1187 if (clear_page_dirty_for_io(page)) {
1188 struct inode *inode = page->mapping->host;
1189 err = fuse_writepage_locked(page);
1190 if (!err)
1191 fuse_wait_on_page_writeback(inode, page->index);
726 } 1192 }
727 return generic_file_mmap(file, vma); 1193 return err;
728} 1194}
729 1195
730static int fuse_set_page_dirty(struct page *page) 1196/*
1197 * Write back dirty pages now, because there may not be any suitable
1198 * open files later
1199 */
1200static void fuse_vma_close(struct vm_area_struct *vma)
731{ 1201{
732 printk("fuse_set_page_dirty: should not happen\n"); 1202 filemap_write_and_wait(vma->vm_file->f_mapping);
733 dump_stack(); 1203}
1204
1205/*
1206 * Wait for writeback against this page to complete before allowing it
1207 * to be marked dirty again, and hence written back again, possibly
1208 * before the previous writepage completed.
1209 *
1210 * Block here, instead of in ->writepage(), so that the userspace fs
1211 * can only block processes actually operating on the filesystem.
1212 *
1213 * Otherwise unprivileged userspace fs would be able to block
1214 * unrelated:
1215 *
1216 * - page migration
1217 * - sync(2)
1218 * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER
1219 */
1220static int fuse_page_mkwrite(struct vm_area_struct *vma, struct page *page)
1221{
1222 /*
1223 * Don't use page->mapping as it may become NULL from a
1224 * concurrent truncate.
1225 */
1226 struct inode *inode = vma->vm_file->f_mapping->host;
1227
1228 fuse_wait_on_page_writeback(inode, page->index);
1229 return 0;
1230}
1231
1232static struct vm_operations_struct fuse_file_vm_ops = {
1233 .close = fuse_vma_close,
1234 .fault = filemap_fault,
1235 .page_mkwrite = fuse_page_mkwrite,
1236};
1237
1238static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
1239{
1240 if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
1241 struct inode *inode = file->f_dentry->d_inode;
1242 struct fuse_conn *fc = get_fuse_conn(inode);
1243 struct fuse_inode *fi = get_fuse_inode(inode);
1244 struct fuse_file *ff = file->private_data;
1245 /*
1246 * file may be written through mmap, so chain it onto the
1247 * inodes's write_file list
1248 */
1249 spin_lock(&fc->lock);
1250 if (list_empty(&ff->write_entry))
1251 list_add(&ff->write_entry, &fi->write_files);
1252 spin_unlock(&fc->lock);
1253 }
1254 file_accessed(file);
1255 vma->vm_ops = &fuse_file_vm_ops;
734 return 0; 1256 return 0;
735} 1257}
736 1258
@@ -909,12 +1431,37 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
909 return err ? 0 : outarg.block; 1431 return err ? 0 : outarg.block;
910} 1432}
911 1433
1434static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
1435{
1436 loff_t retval;
1437 struct inode *inode = file->f_path.dentry->d_inode;
1438
1439 mutex_lock(&inode->i_mutex);
1440 switch (origin) {
1441 case SEEK_END:
1442 offset += i_size_read(inode);
1443 break;
1444 case SEEK_CUR:
1445 offset += file->f_pos;
1446 }
1447 retval = -EINVAL;
1448 if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) {
1449 if (offset != file->f_pos) {
1450 file->f_pos = offset;
1451 file->f_version = 0;
1452 }
1453 retval = offset;
1454 }
1455 mutex_unlock(&inode->i_mutex);
1456 return retval;
1457}
1458
912static const struct file_operations fuse_file_operations = { 1459static const struct file_operations fuse_file_operations = {
913 .llseek = generic_file_llseek, 1460 .llseek = fuse_file_llseek,
914 .read = do_sync_read, 1461 .read = do_sync_read,
915 .aio_read = fuse_file_aio_read, 1462 .aio_read = fuse_file_aio_read,
916 .write = do_sync_write, 1463 .write = do_sync_write,
917 .aio_write = generic_file_aio_write, 1464 .aio_write = fuse_file_aio_write,
918 .mmap = fuse_file_mmap, 1465 .mmap = fuse_file_mmap,
919 .open = fuse_open, 1466 .open = fuse_open,
920 .flush = fuse_flush, 1467 .flush = fuse_flush,
@@ -926,7 +1473,7 @@ static const struct file_operations fuse_file_operations = {
926}; 1473};
927 1474
928static const struct file_operations fuse_direct_io_file_operations = { 1475static const struct file_operations fuse_direct_io_file_operations = {
929 .llseek = generic_file_llseek, 1476 .llseek = fuse_file_llseek,
930 .read = fuse_direct_read, 1477 .read = fuse_direct_read,
931 .write = fuse_direct_write, 1478 .write = fuse_direct_write,
932 .open = fuse_open, 1479 .open = fuse_open,
@@ -940,10 +1487,12 @@ static const struct file_operations fuse_direct_io_file_operations = {
940 1487
941static const struct address_space_operations fuse_file_aops = { 1488static const struct address_space_operations fuse_file_aops = {
942 .readpage = fuse_readpage, 1489 .readpage = fuse_readpage,
1490 .writepage = fuse_writepage,
1491 .launder_page = fuse_launder_page,
943 .write_begin = fuse_write_begin, 1492 .write_begin = fuse_write_begin,
944 .write_end = fuse_write_end, 1493 .write_end = fuse_write_end,
945 .readpages = fuse_readpages, 1494 .readpages = fuse_readpages,
946 .set_page_dirty = fuse_set_page_dirty, 1495 .set_page_dirty = __set_page_dirty_nobuffers,
947 .bmap = fuse_bmap, 1496 .bmap = fuse_bmap,
948}; 1497};
949 1498
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 67aaf6ee38ea..dadffa21a206 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -15,6 +15,7 @@
15#include <linux/mm.h> 15#include <linux/mm.h>
16#include <linux/backing-dev.h> 16#include <linux/backing-dev.h>
17#include <linux/mutex.h> 17#include <linux/mutex.h>
18#include <linux/rwsem.h>
18 19
19/** Max number of pages that can be used in a single read request */ 20/** Max number of pages that can be used in a single read request */
20#define FUSE_MAX_PAGES_PER_REQ 32 21#define FUSE_MAX_PAGES_PER_REQ 32
@@ -25,6 +26,9 @@
25/** Congestion starts at 75% of maximum */ 26/** Congestion starts at 75% of maximum */
26#define FUSE_CONGESTION_THRESHOLD (FUSE_MAX_BACKGROUND * 75 / 100) 27#define FUSE_CONGESTION_THRESHOLD (FUSE_MAX_BACKGROUND * 75 / 100)
27 28
29/** Bias for fi->writectr, meaning new writepages must not be sent */
30#define FUSE_NOWRITE INT_MIN
31
28/** It could be as large as PATH_MAX, but would that have any uses? */ 32/** It could be as large as PATH_MAX, but would that have any uses? */
29#define FUSE_NAME_MAX 1024 33#define FUSE_NAME_MAX 1024
30 34
@@ -73,6 +77,19 @@ struct fuse_inode {
73 77
74 /** Files usable in writepage. Protected by fc->lock */ 78 /** Files usable in writepage. Protected by fc->lock */
75 struct list_head write_files; 79 struct list_head write_files;
80
81 /** Writepages pending on truncate or fsync */
82 struct list_head queued_writes;
83
84 /** Number of sent writes, a negative bias (FUSE_NOWRITE)
85 * means more writes are blocked */
86 int writectr;
87
88 /** Waitq for writepage completion */
89 wait_queue_head_t page_waitq;
90
91 /** List of writepage requestst (pending or sent) */
92 struct list_head writepages;
76}; 93};
77 94
78/** FUSE specific file data */ 95/** FUSE specific file data */
@@ -222,7 +239,10 @@ struct fuse_req {
222 } release; 239 } release;
223 struct fuse_init_in init_in; 240 struct fuse_init_in init_in;
224 struct fuse_init_out init_out; 241 struct fuse_init_out init_out;
225 struct fuse_read_in read_in; 242 struct {
243 struct fuse_read_in in;
244 u64 attr_ver;
245 } read;
226 struct { 246 struct {
227 struct fuse_write_in in; 247 struct fuse_write_in in;
228 struct fuse_write_out out; 248 struct fuse_write_out out;
@@ -242,6 +262,12 @@ struct fuse_req {
242 /** File used in the request (or NULL) */ 262 /** File used in the request (or NULL) */
243 struct fuse_file *ff; 263 struct fuse_file *ff;
244 264
265 /** Inode used in the request or NULL */
266 struct inode *inode;
267
268 /** Link on fi->writepages */
269 struct list_head writepages_entry;
270
245 /** Request completion callback */ 271 /** Request completion callback */
246 void (*end)(struct fuse_conn *, struct fuse_req *); 272 void (*end)(struct fuse_conn *, struct fuse_req *);
247 273
@@ -390,8 +416,8 @@ struct fuse_conn {
390 /** Entry on the fuse_conn_list */ 416 /** Entry on the fuse_conn_list */
391 struct list_head entry; 417 struct list_head entry;
392 418
393 /** Unique ID */ 419 /** Device ID from super block */
394 u64 id; 420 dev_t dev;
395 421
396 /** Dentries in the control filesystem */ 422 /** Dentries in the control filesystem */
397 struct dentry *ctl_dentry[FUSE_CTL_NUM_DENTRIES]; 423 struct dentry *ctl_dentry[FUSE_CTL_NUM_DENTRIES];
@@ -438,7 +464,7 @@ extern const struct file_operations fuse_dev_operations;
438/** 464/**
439 * Get a filled in inode 465 * Get a filled in inode
440 */ 466 */
441struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid, 467struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
442 int generation, struct fuse_attr *attr, 468 int generation, struct fuse_attr *attr,
443 u64 attr_valid, u64 attr_version); 469 u64 attr_valid, u64 attr_version);
444 470
@@ -446,7 +472,7 @@ struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
446 * Send FORGET command 472 * Send FORGET command
447 */ 473 */
448void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, 474void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
449 unsigned long nodeid, u64 nlookup); 475 u64 nodeid, u64 nlookup);
450 476
451/** 477/**
452 * Initialize READ or READDIR request 478 * Initialize READ or READDIR request
@@ -504,6 +530,11 @@ void fuse_init_symlink(struct inode *inode);
504void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, 530void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
505 u64 attr_valid, u64 attr_version); 531 u64 attr_valid, u64 attr_version);
506 532
533void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
534 u64 attr_valid);
535
536void fuse_truncate(struct address_space *mapping, loff_t offset);
537
507/** 538/**
508 * Initialize the client device 539 * Initialize the client device
509 */ 540 */
@@ -522,6 +553,8 @@ void fuse_ctl_cleanup(void);
522 */ 553 */
523struct fuse_req *fuse_request_alloc(void); 554struct fuse_req *fuse_request_alloc(void);
524 555
556struct fuse_req *fuse_request_alloc_nofs(void);
557
525/** 558/**
526 * Free a request 559 * Free a request
527 */ 560 */
@@ -558,6 +591,8 @@ void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req);
558 */ 591 */
559void request_send_background(struct fuse_conn *fc, struct fuse_req *req); 592void request_send_background(struct fuse_conn *fc, struct fuse_req *req);
560 593
594void request_send_background_locked(struct fuse_conn *fc, struct fuse_req *req);
595
561/* Abort all requests */ 596/* Abort all requests */
562void fuse_abort_conn(struct fuse_conn *fc); 597void fuse_abort_conn(struct fuse_conn *fc);
563 598
@@ -600,3 +635,10 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id);
600 635
601int fuse_update_attributes(struct inode *inode, struct kstat *stat, 636int fuse_update_attributes(struct inode *inode, struct kstat *stat,
602 struct file *file, bool *refreshed); 637 struct file *file, bool *refreshed);
638
639void fuse_flush_writepages(struct inode *inode);
640
641void fuse_set_nowrite(struct inode *inode);
642void fuse_release_nowrite(struct inode *inode);
643
644u64 fuse_get_attr_version(struct fuse_conn *fc);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 033f7bdd47e8..79b615873838 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -59,7 +59,11 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
59 fi->nodeid = 0; 59 fi->nodeid = 0;
60 fi->nlookup = 0; 60 fi->nlookup = 0;
61 fi->attr_version = 0; 61 fi->attr_version = 0;
62 fi->writectr = 0;
62 INIT_LIST_HEAD(&fi->write_files); 63 INIT_LIST_HEAD(&fi->write_files);
64 INIT_LIST_HEAD(&fi->queued_writes);
65 INIT_LIST_HEAD(&fi->writepages);
66 init_waitqueue_head(&fi->page_waitq);
63 fi->forget_req = fuse_request_alloc(); 67 fi->forget_req = fuse_request_alloc();
64 if (!fi->forget_req) { 68 if (!fi->forget_req) {
65 kmem_cache_free(fuse_inode_cachep, inode); 69 kmem_cache_free(fuse_inode_cachep, inode);
@@ -73,13 +77,14 @@ static void fuse_destroy_inode(struct inode *inode)
73{ 77{
74 struct fuse_inode *fi = get_fuse_inode(inode); 78 struct fuse_inode *fi = get_fuse_inode(inode);
75 BUG_ON(!list_empty(&fi->write_files)); 79 BUG_ON(!list_empty(&fi->write_files));
80 BUG_ON(!list_empty(&fi->queued_writes));
76 if (fi->forget_req) 81 if (fi->forget_req)
77 fuse_request_free(fi->forget_req); 82 fuse_request_free(fi->forget_req);
78 kmem_cache_free(fuse_inode_cachep, inode); 83 kmem_cache_free(fuse_inode_cachep, inode);
79} 84}
80 85
81void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, 86void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
82 unsigned long nodeid, u64 nlookup) 87 u64 nodeid, u64 nlookup)
83{ 88{
84 struct fuse_forget_in *inarg = &req->misc.forget_in; 89 struct fuse_forget_in *inarg = &req->misc.forget_in;
85 inarg->nlookup = nlookup; 90 inarg->nlookup = nlookup;
@@ -109,7 +114,7 @@ static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
109 return 0; 114 return 0;
110} 115}
111 116
112static void fuse_truncate(struct address_space *mapping, loff_t offset) 117void fuse_truncate(struct address_space *mapping, loff_t offset)
113{ 118{
114 /* See vmtruncate() */ 119 /* See vmtruncate() */
115 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); 120 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
@@ -117,19 +122,12 @@ static void fuse_truncate(struct address_space *mapping, loff_t offset)
117 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); 122 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
118} 123}
119 124
120 125void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
121void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, 126 u64 attr_valid)
122 u64 attr_valid, u64 attr_version)
123{ 127{
124 struct fuse_conn *fc = get_fuse_conn(inode); 128 struct fuse_conn *fc = get_fuse_conn(inode);
125 struct fuse_inode *fi = get_fuse_inode(inode); 129 struct fuse_inode *fi = get_fuse_inode(inode);
126 loff_t oldsize;
127 130
128 spin_lock(&fc->lock);
129 if (attr_version != 0 && fi->attr_version > attr_version) {
130 spin_unlock(&fc->lock);
131 return;
132 }
133 fi->attr_version = ++fc->attr_version; 131 fi->attr_version = ++fc->attr_version;
134 fi->i_time = attr_valid; 132 fi->i_time = attr_valid;
135 133
@@ -159,6 +157,22 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
159 fi->orig_i_mode = inode->i_mode; 157 fi->orig_i_mode = inode->i_mode;
160 if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) 158 if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
161 inode->i_mode &= ~S_ISVTX; 159 inode->i_mode &= ~S_ISVTX;
160}
161
162void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
163 u64 attr_valid, u64 attr_version)
164{
165 struct fuse_conn *fc = get_fuse_conn(inode);
166 struct fuse_inode *fi = get_fuse_inode(inode);
167 loff_t oldsize;
168
169 spin_lock(&fc->lock);
170 if (attr_version != 0 && fi->attr_version > attr_version) {
171 spin_unlock(&fc->lock);
172 return;
173 }
174
175 fuse_change_attributes_common(inode, attr, attr_valid);
162 176
163 oldsize = inode->i_size; 177 oldsize = inode->i_size;
164 i_size_write(inode, attr->size); 178 i_size_write(inode, attr->size);
@@ -193,7 +207,7 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
193 207
194static int fuse_inode_eq(struct inode *inode, void *_nodeidp) 208static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
195{ 209{
196 unsigned long nodeid = *(unsigned long *) _nodeidp; 210 u64 nodeid = *(u64 *) _nodeidp;
197 if (get_node_id(inode) == nodeid) 211 if (get_node_id(inode) == nodeid)
198 return 1; 212 return 1;
199 else 213 else
@@ -202,12 +216,12 @@ static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
202 216
203static int fuse_inode_set(struct inode *inode, void *_nodeidp) 217static int fuse_inode_set(struct inode *inode, void *_nodeidp)
204{ 218{
205 unsigned long nodeid = *(unsigned long *) _nodeidp; 219 u64 nodeid = *(u64 *) _nodeidp;
206 get_fuse_inode(inode)->nodeid = nodeid; 220 get_fuse_inode(inode)->nodeid = nodeid;
207 return 0; 221 return 0;
208} 222}
209 223
210struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid, 224struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
211 int generation, struct fuse_attr *attr, 225 int generation, struct fuse_attr *attr,
212 u64 attr_valid, u64 attr_version) 226 u64 attr_valid, u64 attr_version)
213{ 227{
@@ -242,10 +256,9 @@ struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
242 return inode; 256 return inode;
243} 257}
244 258
245static void fuse_umount_begin(struct vfsmount *vfsmnt, int flags) 259static void fuse_umount_begin(struct super_block *sb)
246{ 260{
247 if (flags & MNT_FORCE) 261 fuse_abort_conn(get_fuse_conn_super(sb));
248 fuse_abort_conn(get_fuse_conn_super(vfsmnt->mnt_sb));
249} 262}
250 263
251static void fuse_send_destroy(struct fuse_conn *fc) 264static void fuse_send_destroy(struct fuse_conn *fc)
@@ -448,7 +461,7 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
448 return 0; 461 return 0;
449} 462}
450 463
451static struct fuse_conn *new_conn(void) 464static struct fuse_conn *new_conn(struct super_block *sb)
452{ 465{
453 struct fuse_conn *fc; 466 struct fuse_conn *fc;
454 int err; 467 int err;
@@ -469,19 +482,41 @@ static struct fuse_conn *new_conn(void)
469 atomic_set(&fc->num_waiting, 0); 482 atomic_set(&fc->num_waiting, 0);
470 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; 483 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
471 fc->bdi.unplug_io_fn = default_unplug_io_fn; 484 fc->bdi.unplug_io_fn = default_unplug_io_fn;
485 /* fuse does it's own writeback accounting */
486 fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
487 fc->dev = sb->s_dev;
472 err = bdi_init(&fc->bdi); 488 err = bdi_init(&fc->bdi);
473 if (err) { 489 if (err)
474 kfree(fc); 490 goto error_kfree;
475 fc = NULL; 491 err = bdi_register_dev(&fc->bdi, fc->dev);
476 goto out; 492 if (err)
477 } 493 goto error_bdi_destroy;
494 /*
495 * For a single fuse filesystem use max 1% of dirty +
496 * writeback threshold.
497 *
498 * This gives about 1M of write buffer for memory maps on a
499 * machine with 1G and 10% dirty_ratio, which should be more
500 * than enough.
501 *
502 * Privileged users can raise it by writing to
503 *
504 * /sys/class/bdi/<bdi>/max_ratio
505 */
506 bdi_set_max_ratio(&fc->bdi, 1);
478 fc->reqctr = 0; 507 fc->reqctr = 0;
479 fc->blocked = 1; 508 fc->blocked = 1;
480 fc->attr_version = 1; 509 fc->attr_version = 1;
481 get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); 510 get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
482 } 511 }
483out:
484 return fc; 512 return fc;
513
514error_bdi_destroy:
515 bdi_destroy(&fc->bdi);
516error_kfree:
517 mutex_destroy(&fc->inst_mutex);
518 kfree(fc);
519 return NULL;
485} 520}
486 521
487void fuse_conn_put(struct fuse_conn *fc) 522void fuse_conn_put(struct fuse_conn *fc)
@@ -549,6 +584,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
549 fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages); 584 fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages);
550 fc->minor = arg->minor; 585 fc->minor = arg->minor;
551 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; 586 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
587 fc->max_write = min_t(unsigned, 4096, fc->max_write);
552 fc->conn_init = 1; 588 fc->conn_init = 1;
553 } 589 }
554 fuse_put_request(fc, req); 590 fuse_put_request(fc, req);
@@ -579,12 +615,6 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
579 request_send_background(fc, req); 615 request_send_background(fc, req);
580} 616}
581 617
582static u64 conn_id(void)
583{
584 static u64 ctr = 1;
585 return ctr++;
586}
587
588static int fuse_fill_super(struct super_block *sb, void *data, int silent) 618static int fuse_fill_super(struct super_block *sb, void *data, int silent)
589{ 619{
590 struct fuse_conn *fc; 620 struct fuse_conn *fc;
@@ -622,14 +652,14 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
622 if (file->f_op != &fuse_dev_operations) 652 if (file->f_op != &fuse_dev_operations)
623 return -EINVAL; 653 return -EINVAL;
624 654
625 fc = new_conn(); 655 fc = new_conn(sb);
626 if (!fc) 656 if (!fc)
627 return -ENOMEM; 657 return -ENOMEM;
628 658
629 fc->flags = d.flags; 659 fc->flags = d.flags;
630 fc->user_id = d.user_id; 660 fc->user_id = d.user_id;
631 fc->group_id = d.group_id; 661 fc->group_id = d.group_id;
632 fc->max_read = d.max_read; 662 fc->max_read = min_t(unsigned, 4096, d.max_read);
633 663
634 /* Used by get_root_inode() */ 664 /* Used by get_root_inode() */
635 sb->s_fs_info = fc; 665 sb->s_fs_info = fc;
@@ -660,7 +690,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
660 if (file->private_data) 690 if (file->private_data)
661 goto err_unlock; 691 goto err_unlock;
662 692
663 fc->id = conn_id();
664 err = fuse_ctl_add_conn(fc); 693 err = fuse_ctl_add_conn(fc);
665 if (err) 694 if (err)
666 goto err_unlock; 695 goto err_unlock;
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c
index 8479da47049c..a4ff271df9ee 100644
--- a/fs/gfs2/locking/dlm/sysfs.c
+++ b/fs/gfs2/locking/dlm/sysfs.c
@@ -212,7 +212,7 @@ int gdlm_sysfs_init(void)
212{ 212{
213 gdlm_kset = kset_create_and_add("lock_dlm", NULL, kernel_kobj); 213 gdlm_kset = kset_create_and_add("lock_dlm", NULL, kernel_kobj);
214 if (!gdlm_kset) { 214 if (!gdlm_kset) {
215 printk(KERN_WARNING "%s: can not create kset\n", __FUNCTION__); 215 printk(KERN_WARNING "%s: can not create kset\n", __func__);
216 return -ENOMEM; 216 return -ENOMEM;
217 } 217 }
218 return 0; 218 return 0;
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 90a04a6e3789..f55394e57cb2 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -438,7 +438,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
438 int error; 438 int error;
439 439
440 /* 440 /*
441 * Due to the order of unstuffing files and ->nopage(), we can be 441 * Due to the order of unstuffing files and ->fault(), we can be
442 * asked for a zero page in the case of a stuffed file being extended, 442 * asked for a zero page in the case of a stuffed file being extended,
443 * so we need to supply one here. It doesn't happen often. 443 * so we need to supply one here. It doesn't happen often.
444 */ 444 */
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index 509c5d60bd80..7f48576289c9 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -41,7 +41,7 @@ int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
41 41
42#define gfs2_assert_withdraw(sdp, assertion) \ 42#define gfs2_assert_withdraw(sdp, assertion) \
43((likely(assertion)) ? 0 : gfs2_assert_withdraw_i((sdp), #assertion, \ 43((likely(assertion)) ? 0 : gfs2_assert_withdraw_i((sdp), #assertion, \
44 __FUNCTION__, __FILE__, __LINE__)) 44 __func__, __FILE__, __LINE__))
45 45
46 46
47int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, 47int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
@@ -49,28 +49,28 @@ int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
49 49
50#define gfs2_assert_warn(sdp, assertion) \ 50#define gfs2_assert_warn(sdp, assertion) \
51((likely(assertion)) ? 0 : gfs2_assert_warn_i((sdp), #assertion, \ 51((likely(assertion)) ? 0 : gfs2_assert_warn_i((sdp), #assertion, \
52 __FUNCTION__, __FILE__, __LINE__)) 52 __func__, __FILE__, __LINE__))
53 53
54 54
55int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, 55int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide,
56 const char *function, char *file, unsigned int line); 56 const char *function, char *file, unsigned int line);
57 57
58#define gfs2_consist(sdp) \ 58#define gfs2_consist(sdp) \
59gfs2_consist_i((sdp), 0, __FUNCTION__, __FILE__, __LINE__) 59gfs2_consist_i((sdp), 0, __func__, __FILE__, __LINE__)
60 60
61 61
62int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide, 62int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
63 const char *function, char *file, unsigned int line); 63 const char *function, char *file, unsigned int line);
64 64
65#define gfs2_consist_inode(ip) \ 65#define gfs2_consist_inode(ip) \
66gfs2_consist_inode_i((ip), 0, __FUNCTION__, __FILE__, __LINE__) 66gfs2_consist_inode_i((ip), 0, __func__, __FILE__, __LINE__)
67 67
68 68
69int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide, 69int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
70 const char *function, char *file, unsigned int line); 70 const char *function, char *file, unsigned int line);
71 71
72#define gfs2_consist_rgrpd(rgd) \ 72#define gfs2_consist_rgrpd(rgd) \
73gfs2_consist_rgrpd_i((rgd), 0, __FUNCTION__, __FILE__, __LINE__) 73gfs2_consist_rgrpd_i((rgd), 0, __func__, __FILE__, __LINE__)
74 74
75 75
76int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, 76int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
@@ -91,7 +91,7 @@ static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp,
91} 91}
92 92
93#define gfs2_meta_check(sdp, bh) \ 93#define gfs2_meta_check(sdp, bh) \
94gfs2_meta_check_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__) 94gfs2_meta_check_i((sdp), (bh), __func__, __FILE__, __LINE__)
95 95
96 96
97int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, 97int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
@@ -118,7 +118,7 @@ static inline int gfs2_metatype_check_i(struct gfs2_sbd *sdp,
118} 118}
119 119
120#define gfs2_metatype_check(sdp, bh, type) \ 120#define gfs2_metatype_check(sdp, bh, type) \
121gfs2_metatype_check_i((sdp), (bh), (type), __FUNCTION__, __FILE__, __LINE__) 121gfs2_metatype_check_i((sdp), (bh), (type), __func__, __FILE__, __LINE__)
122 122
123static inline void gfs2_metatype_set(struct buffer_head *bh, u16 type, 123static inline void gfs2_metatype_set(struct buffer_head *bh, u16 type,
124 u16 format) 124 u16 format)
@@ -134,14 +134,14 @@ int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function,
134 char *file, unsigned int line); 134 char *file, unsigned int line);
135 135
136#define gfs2_io_error(sdp) \ 136#define gfs2_io_error(sdp) \
137gfs2_io_error_i((sdp), __FUNCTION__, __FILE__, __LINE__); 137gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__);
138 138
139 139
140int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, 140int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
141 const char *function, char *file, unsigned int line); 141 const char *function, char *file, unsigned int line);
142 142
143#define gfs2_io_error_bh(sdp, bh) \ 143#define gfs2_io_error_bh(sdp, bh) \
144gfs2_io_error_bh_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__); 144gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__);
145 145
146 146
147extern struct kmem_cache *gfs2_glock_cachep; 147extern struct kmem_cache *gfs2_glock_cachep;
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index 24cf6fc43021..f6621a785202 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -208,7 +208,9 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
208 struct hfs_bnode *node, *next_node; 208 struct hfs_bnode *node, *next_node;
209 struct page **pagep; 209 struct page **pagep;
210 u32 nidx, idx; 210 u32 nidx, idx;
211 u16 off, len; 211 unsigned off;
212 u16 off16;
213 u16 len;
212 u8 *data, byte, m; 214 u8 *data, byte, m;
213 int i; 215 int i;
214 216
@@ -235,7 +237,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
235 node = hfs_bnode_find(tree, nidx); 237 node = hfs_bnode_find(tree, nidx);
236 if (IS_ERR(node)) 238 if (IS_ERR(node))
237 return node; 239 return node;
238 len = hfs_brec_lenoff(node, 2, &off); 240 len = hfs_brec_lenoff(node, 2, &off16);
241 off = off16;
239 242
240 off += node->page_offset; 243 off += node->page_offset;
241 pagep = node->page + (off >> PAGE_CACHE_SHIFT); 244 pagep = node->page + (off >> PAGE_CACHE_SHIFT);
@@ -280,7 +283,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
280 return next_node; 283 return next_node;
281 node = next_node; 284 node = next_node;
282 285
283 len = hfs_brec_lenoff(node, 0, &off); 286 len = hfs_brec_lenoff(node, 0, &off16);
287 off = off16;
284 off += node->page_offset; 288 off += node->page_offset;
285 pagep = node->page + (off >> PAGE_CACHE_SHIFT); 289 pagep = node->page + (off >> PAGE_CACHE_SHIFT);
286 data = kmap(*pagep); 290 data = kmap(*pagep);
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index b4651e128d7f..36ca2e1a4fa3 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -215,7 +215,7 @@ int hfs_mdb_get(struct super_block *sb)
215 attrib &= cpu_to_be16(~HFS_SB_ATTRIB_UNMNT); 215 attrib &= cpu_to_be16(~HFS_SB_ATTRIB_UNMNT);
216 attrib |= cpu_to_be16(HFS_SB_ATTRIB_INCNSTNT); 216 attrib |= cpu_to_be16(HFS_SB_ATTRIB_INCNSTNT);
217 mdb->drAtrb = attrib; 217 mdb->drAtrb = attrib;
218 mdb->drWrCnt = cpu_to_be32(be32_to_cpu(mdb->drWrCnt) + 1); 218 be32_add_cpu(&mdb->drWrCnt, 1);
219 mdb->drLsMod = hfs_mtime(); 219 mdb->drLsMod = hfs_mtime();
220 220
221 mark_buffer_dirty(HFS_SB(sb)->mdb_bh); 221 mark_buffer_dirty(HFS_SB(sb)->mdb_bh);
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 32de44ed0021..8cf67974adf6 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -297,7 +297,8 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
297 return 0; 297 return 0;
298 } 298 }
299 p = match_strdup(&args[0]); 299 p = match_strdup(&args[0]);
300 hsb->nls_disk = load_nls(p); 300 if (p)
301 hsb->nls_disk = load_nls(p);
301 if (!hsb->nls_disk) { 302 if (!hsb->nls_disk) {
302 printk(KERN_ERR "hfs: unable to load codepage \"%s\"\n", p); 303 printk(KERN_ERR "hfs: unable to load codepage \"%s\"\n", p);
303 kfree(p); 304 kfree(p);
@@ -311,7 +312,8 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
311 return 0; 312 return 0;
312 } 313 }
313 p = match_strdup(&args[0]); 314 p = match_strdup(&args[0]);
314 hsb->nls_io = load_nls(p); 315 if (p)
316 hsb->nls_io = load_nls(p);
315 if (!hsb->nls_io) { 317 if (!hsb->nls_io) {
316 printk(KERN_ERR "hfs: unable to load iocharset \"%s\"\n", p); 318 printk(KERN_ERR "hfs: unable to load iocharset \"%s\"\n", p);
317 kfree(p); 319 kfree(p);
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index bb5433608a42..e49fcee1e293 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -184,7 +184,9 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
184 struct hfs_bnode *node, *next_node; 184 struct hfs_bnode *node, *next_node;
185 struct page **pagep; 185 struct page **pagep;
186 u32 nidx, idx; 186 u32 nidx, idx;
187 u16 off, len; 187 unsigned off;
188 u16 off16;
189 u16 len;
188 u8 *data, byte, m; 190 u8 *data, byte, m;
189 int i; 191 int i;
190 192
@@ -211,7 +213,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
211 node = hfs_bnode_find(tree, nidx); 213 node = hfs_bnode_find(tree, nidx);
212 if (IS_ERR(node)) 214 if (IS_ERR(node))
213 return node; 215 return node;
214 len = hfs_brec_lenoff(node, 2, &off); 216 len = hfs_brec_lenoff(node, 2, &off16);
217 off = off16;
215 218
216 off += node->page_offset; 219 off += node->page_offset;
217 pagep = node->page + (off >> PAGE_CACHE_SHIFT); 220 pagep = node->page + (off >> PAGE_CACHE_SHIFT);
@@ -256,7 +259,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
256 return next_node; 259 return next_node;
257 node = next_node; 260 node = next_node;
258 261
259 len = hfs_brec_lenoff(node, 0, &off); 262 len = hfs_brec_lenoff(node, 0, &off16);
263 off = off16;
260 off += node->page_offset; 264 off += node->page_offset;
261 pagep = node->page + (off >> PAGE_CACHE_SHIFT); 265 pagep = node->page + (off >> PAGE_CACHE_SHIFT);
262 data = kmap(*pagep); 266 data = kmap(*pagep);
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index d72d0a8b25aa..9e59537b43d5 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -311,6 +311,10 @@ int hfsplus_delete_cat(u32, struct inode *, struct qstr *);
311int hfsplus_rename_cat(u32, struct inode *, struct qstr *, 311int hfsplus_rename_cat(u32, struct inode *, struct qstr *,
312 struct inode *, struct qstr *); 312 struct inode *, struct qstr *);
313 313
314/* dir.c */
315extern const struct inode_operations hfsplus_dir_inode_operations;
316extern const struct file_operations hfsplus_dir_operations;
317
314/* extents.c */ 318/* extents.c */
315int hfsplus_ext_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *); 319int hfsplus_ext_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *);
316void hfsplus_ext_write_extent(struct inode *); 320void hfsplus_ext_write_extent(struct inode *);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 37744cf3706a..d53b2af91c25 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -278,9 +278,6 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
278 return 0; 278 return 0;
279} 279}
280 280
281extern const struct inode_operations hfsplus_dir_inode_operations;
282extern struct file_operations hfsplus_dir_operations;
283
284static const struct inode_operations hfsplus_file_inode_operations = { 281static const struct inode_operations hfsplus_file_inode_operations = {
285 .lookup = hfsplus_file_lookup, 282 .lookup = hfsplus_file_lookup,
286 .truncate = hfsplus_file_truncate, 283 .truncate = hfsplus_file_truncate,
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c
index dc64fac00831..9997cbf8beb5 100644
--- a/fs/hfsplus/options.c
+++ b/fs/hfsplus/options.c
@@ -132,7 +132,8 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
132 return 0; 132 return 0;
133 } 133 }
134 p = match_strdup(&args[0]); 134 p = match_strdup(&args[0]);
135 sbi->nls = load_nls(p); 135 if (p)
136 sbi->nls = load_nls(p);
136 if (!sbi->nls) { 137 if (!sbi->nls) {
137 printk(KERN_ERR "hfs: unable to load nls mapping \"%s\"\n", p); 138 printk(KERN_ERR "hfs: unable to load nls mapping \"%s\"\n", p);
138 kfree(p); 139 kfree(p);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index b0f9ad362d1d..ce97a54518d8 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -357,7 +357,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
357 printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n"); 357 printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n");
358 sb->s_flags |= MS_RDONLY; 358 sb->s_flags |= MS_RDONLY;
359 } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) { 359 } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) {
360 printk(KERN_WARNING "hfs: write access to a jounaled filesystem is not supported, " 360 printk(KERN_WARNING "hfs: write access to a journaled filesystem is not supported, "
361 "use the force option at your own risk, mounting read-only.\n"); 361 "use the force option at your own risk, mounting read-only.\n");
362 sb->s_flags |= MS_RDONLY; 362 sb->s_flags |= MS_RDONLY;
363 } 363 }
@@ -423,7 +423,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
423 */ 423 */
424 vhdr->last_mount_vers = cpu_to_be32(HFSP_MOUNT_VERSION); 424 vhdr->last_mount_vers = cpu_to_be32(HFSP_MOUNT_VERSION);
425 vhdr->modify_date = hfsp_now2mt(); 425 vhdr->modify_date = hfsp_now2mt();
426 vhdr->write_count = cpu_to_be32(be32_to_cpu(vhdr->write_count) + 1); 426 be32_add_cpu(&vhdr->write_count, 1);
427 vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); 427 vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT);
428 vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); 428 vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT);
429 mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh); 429 mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh);
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 72cab78f0509..175d08eacc86 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -47,7 +47,7 @@ static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd)
47 return 0; 47 return 0;
48 wd->ablk_start = be16_to_cpu(*(__be16 *)(bufptr + HFSP_WRAPOFF_ABLKSTART)); 48 wd->ablk_start = be16_to_cpu(*(__be16 *)(bufptr + HFSP_WRAPOFF_ABLKSTART));
49 49
50 extent = be32_to_cpu(get_unaligned((__be32 *)(bufptr + HFSP_WRAPOFF_EMBEDEXT))); 50 extent = get_unaligned_be32(bufptr + HFSP_WRAPOFF_EMBEDEXT);
51 wd->embed_start = (extent >> 16) & 0xFFFF; 51 wd->embed_start = (extent >> 16) & 0xFFFF;
52 wd->embed_count = extent & 0xFFFF; 52 wd->embed_count = extent & 0xFFFF;
53 53
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 6846785fe904..aeabf80f81a5 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -45,7 +45,7 @@ static const struct inode_operations hugetlbfs_inode_operations;
45 45
46static struct backing_dev_info hugetlbfs_backing_dev_info = { 46static struct backing_dev_info hugetlbfs_backing_dev_info = {
47 .ra_pages = 0, /* No readahead */ 47 .ra_pages = 0, /* No readahead */
48 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 48 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
49}; 49};
50 50
51int sysctl_hugetlb_shm_group; 51int sysctl_hugetlb_shm_group;
@@ -504,7 +504,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
504 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 504 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
505 INIT_LIST_HEAD(&inode->i_mapping->private_list); 505 INIT_LIST_HEAD(&inode->i_mapping->private_list);
506 info = HUGETLBFS_I(inode); 506 info = HUGETLBFS_I(inode);
507 mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, NULL); 507 mpol_shared_policy_init(&info->policy, NULL);
508 switch (mode & S_IFMT) { 508 switch (mode & S_IFMT) {
509 default: 509 default:
510 init_special_inode(inode, mode, dev); 510 init_special_inode(inode, mode, dev);
diff --git a/fs/inode.c b/fs/inode.c
index 27ee1af50d02..bf6478130424 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -495,8 +495,7 @@ static struct inode * find_inode(struct super_block * sb, struct hlist_head *hea
495 struct inode * inode = NULL; 495 struct inode * inode = NULL;
496 496
497repeat: 497repeat:
498 hlist_for_each (node, head) { 498 hlist_for_each_entry(inode, node, head, i_hash) {
499 inode = hlist_entry(node, struct inode, i_hash);
500 if (inode->i_sb != sb) 499 if (inode->i_sb != sb)
501 continue; 500 continue;
502 if (!test(inode, data)) 501 if (!test(inode, data))
@@ -520,8 +519,7 @@ static struct inode * find_inode_fast(struct super_block * sb, struct hlist_head
520 struct inode * inode = NULL; 519 struct inode * inode = NULL;
521 520
522repeat: 521repeat:
523 hlist_for_each (node, head) { 522 hlist_for_each_entry(inode, node, head, i_hash) {
524 inode = hlist_entry(node, struct inode, i_hash);
525 if (inode->i_ino != ino) 523 if (inode->i_ino != ino)
526 continue; 524 continue;
527 if (inode->i_sb != sb) 525 if (inode->i_sb != sb)
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index 7b94a1e3c015..6676c06bb7c1 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -598,7 +598,7 @@ asmlinkage long sys_inotify_init(void)
598 } 598 }
599 599
600 ih = inotify_init(&inotify_user_ops); 600 ih = inotify_init(&inotify_user_ops);
601 if (unlikely(IS_ERR(ih))) { 601 if (IS_ERR(ih)) {
602 ret = PTR_ERR(ih); 602 ret = PTR_ERR(ih);
603 goto out_free_dev; 603 goto out_free_dev;
604 } 604 }
diff --git a/fs/ioctl.c b/fs/ioctl.c
index f32fbde2175e..7db32b3382d3 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -28,8 +28,8 @@
28 * 28 *
29 * Returns 0 on success, -errno on error. 29 * Returns 0 on success, -errno on error.
30 */ 30 */
31long vfs_ioctl(struct file *filp, unsigned int cmd, 31static long vfs_ioctl(struct file *filp, unsigned int cmd,
32 unsigned long arg) 32 unsigned long arg)
33{ 33{
34 int error = -ENOTTY; 34 int error = -ENOTTY;
35 35
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 1ba407c64df1..2f0dc5a14633 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -145,6 +145,14 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp,
145 } 145 }
146 de = tmpde; 146 de = tmpde;
147 } 147 }
148 /* Basic sanity check, whether name doesn't exceed dir entry */
149 if (de_len < de->name_len[0] +
150 sizeof(struct iso_directory_record)) {
151 printk(KERN_NOTICE "iso9660: Corrupted directory entry"
152 " in block %lu of inode %lu\n", block,
153 inode->i_ino);
154 return -EIO;
155 }
148 156
149 if (first_de) { 157 if (first_de) {
150 isofs_normalize_block_and_offset(de, 158 isofs_normalize_block_and_offset(de,
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index d1bdf8adb351..ccbf72faf27a 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -78,29 +78,29 @@ static inline int isonum_712(char *p)
78} 78}
79static inline unsigned int isonum_721(char *p) 79static inline unsigned int isonum_721(char *p)
80{ 80{
81 return le16_to_cpu(get_unaligned((__le16 *)p)); 81 return get_unaligned_le16(p);
82} 82}
83static inline unsigned int isonum_722(char *p) 83static inline unsigned int isonum_722(char *p)
84{ 84{
85 return be16_to_cpu(get_unaligned((__le16 *)p)); 85 return get_unaligned_be16(p);
86} 86}
87static inline unsigned int isonum_723(char *p) 87static inline unsigned int isonum_723(char *p)
88{ 88{
89 /* Ignore bigendian datum due to broken mastering programs */ 89 /* Ignore bigendian datum due to broken mastering programs */
90 return le16_to_cpu(get_unaligned((__le16 *)p)); 90 return get_unaligned_le16(p);
91} 91}
92static inline unsigned int isonum_731(char *p) 92static inline unsigned int isonum_731(char *p)
93{ 93{
94 return le32_to_cpu(get_unaligned((__le32 *)p)); 94 return get_unaligned_le32(p);
95} 95}
96static inline unsigned int isonum_732(char *p) 96static inline unsigned int isonum_732(char *p)
97{ 97{
98 return be32_to_cpu(get_unaligned((__le32 *)p)); 98 return get_unaligned_be32(p);
99} 99}
100static inline unsigned int isonum_733(char *p) 100static inline unsigned int isonum_733(char *p)
101{ 101{
102 /* Ignore bigendian datum due to broken mastering programs */ 102 /* Ignore bigendian datum due to broken mastering programs */
103 return le32_to_cpu(get_unaligned((__le32 *)p)); 103 return get_unaligned_le32(p);
104} 104}
105extern int iso_date(char *, int); 105extern int iso_date(char *, int);
106 106
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 344b247bc29a..8299889a835e 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -111,6 +111,13 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
111 111
112 dlen = de->name_len[0]; 112 dlen = de->name_len[0];
113 dpnt = de->name; 113 dpnt = de->name;
114 /* Basic sanity check, whether name doesn't exceed dir entry */
115 if (de_len < dlen + sizeof(struct iso_directory_record)) {
116 printk(KERN_NOTICE "iso9660: Corrupted directory entry"
117 " in block %lu of inode %lu\n", block,
118 dir->i_ino);
119 return 0;
120 }
114 121
115 if (sbi->s_rock && 122 if (sbi->s_rock &&
116 ((i = get_rock_ridge_filename(de, tmpname, dir)))) { 123 ((i = get_rock_ridge_filename(de, tmpname, dir)))) {
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index a38c7186c570..cd931ef1f000 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -407,22 +407,6 @@ void journal_commit_transaction(journal_t *journal)
407 jbd_debug (3, "JBD: commit phase 2\n"); 407 jbd_debug (3, "JBD: commit phase 2\n");
408 408
409 /* 409 /*
410 * First, drop modified flag: all accesses to the buffers
411 * will be tracked for a new trasaction only -bzzz
412 */
413 spin_lock(&journal->j_list_lock);
414 if (commit_transaction->t_buffers) {
415 new_jh = jh = commit_transaction->t_buffers->b_tnext;
416 do {
417 J_ASSERT_JH(new_jh, new_jh->b_modified == 1 ||
418 new_jh->b_modified == 0);
419 new_jh->b_modified = 0;
420 new_jh = new_jh->b_tnext;
421 } while (new_jh != jh);
422 }
423 spin_unlock(&journal->j_list_lock);
424
425 /*
426 * Now start flushing things to disk, in the order they appear 410 * Now start flushing things to disk, in the order they appear
427 * on the transaction lists. Data blocks go first. 411 * on the transaction lists. Data blocks go first.
428 */ 412 */
@@ -488,6 +472,9 @@ void journal_commit_transaction(journal_t *journal)
488 */ 472 */
489 commit_transaction->t_state = T_COMMIT; 473 commit_transaction->t_state = T_COMMIT;
490 474
475 J_ASSERT(commit_transaction->t_nr_buffers <=
476 commit_transaction->t_outstanding_credits);
477
491 descriptor = NULL; 478 descriptor = NULL;
492 bufs = 0; 479 bufs = 0;
493 while (commit_transaction->t_buffers) { 480 while (commit_transaction->t_buffers) {
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 0e081d5f32e8..b99c3b3654c4 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -534,7 +534,7 @@ int log_wait_commit(journal_t *journal, tid_t tid)
534 if (!tid_geq(journal->j_commit_request, tid)) { 534 if (!tid_geq(journal->j_commit_request, tid)) {
535 printk(KERN_EMERG 535 printk(KERN_EMERG
536 "%s: error: j_commit_request=%d, tid=%d\n", 536 "%s: error: j_commit_request=%d, tid=%d\n",
537 __FUNCTION__, journal->j_commit_request, tid); 537 __func__, journal->j_commit_request, tid);
538 } 538 }
539 spin_unlock(&journal->j_state_lock); 539 spin_unlock(&journal->j_state_lock);
540#endif 540#endif
@@ -599,7 +599,7 @@ int journal_bmap(journal_t *journal, unsigned long blocknr,
599 599
600 printk(KERN_ALERT "%s: journal block not found " 600 printk(KERN_ALERT "%s: journal block not found "
601 "at offset %lu on %s\n", 601 "at offset %lu on %s\n",
602 __FUNCTION__, 602 __func__,
603 blocknr, 603 blocknr,
604 bdevname(journal->j_dev, b)); 604 bdevname(journal->j_dev, b));
605 err = -EIO; 605 err = -EIO;
@@ -728,7 +728,7 @@ journal_t * journal_init_dev(struct block_device *bdev,
728 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 728 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
729 if (!journal->j_wbuf) { 729 if (!journal->j_wbuf) {
730 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 730 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
731 __FUNCTION__); 731 __func__);
732 kfree(journal); 732 kfree(journal);
733 journal = NULL; 733 journal = NULL;
734 goto out; 734 goto out;
@@ -782,7 +782,7 @@ journal_t * journal_init_inode (struct inode *inode)
782 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 782 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
783 if (!journal->j_wbuf) { 783 if (!journal->j_wbuf) {
784 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 784 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
785 __FUNCTION__); 785 __func__);
786 kfree(journal); 786 kfree(journal);
787 return NULL; 787 return NULL;
788 } 788 }
@@ -791,7 +791,7 @@ journal_t * journal_init_inode (struct inode *inode)
791 /* If that failed, give up */ 791 /* If that failed, give up */
792 if (err) { 792 if (err) {
793 printk(KERN_ERR "%s: Cannnot locate journal superblock\n", 793 printk(KERN_ERR "%s: Cannnot locate journal superblock\n",
794 __FUNCTION__); 794 __func__);
795 kfree(journal); 795 kfree(journal);
796 return NULL; 796 return NULL;
797 } 797 }
@@ -877,7 +877,7 @@ int journal_create(journal_t *journal)
877 */ 877 */
878 printk(KERN_EMERG 878 printk(KERN_EMERG
879 "%s: creation of journal on external device!\n", 879 "%s: creation of journal on external device!\n",
880 __FUNCTION__); 880 __func__);
881 BUG(); 881 BUG();
882 } 882 }
883 883
@@ -1657,7 +1657,7 @@ static struct journal_head *journal_alloc_journal_head(void)
1657 jbd_debug(1, "out of memory for journal_head\n"); 1657 jbd_debug(1, "out of memory for journal_head\n");
1658 if (time_after(jiffies, last_warning + 5*HZ)) { 1658 if (time_after(jiffies, last_warning + 5*HZ)) {
1659 printk(KERN_NOTICE "ENOMEM in %s, retrying.\n", 1659 printk(KERN_NOTICE "ENOMEM in %s, retrying.\n",
1660 __FUNCTION__); 1660 __func__);
1661 last_warning = jiffies; 1661 last_warning = jiffies;
1662 } 1662 }
1663 while (ret == NULL) { 1663 while (ret == NULL) {
@@ -1794,13 +1794,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
1794 if (jh->b_frozen_data) { 1794 if (jh->b_frozen_data) {
1795 printk(KERN_WARNING "%s: freeing " 1795 printk(KERN_WARNING "%s: freeing "
1796 "b_frozen_data\n", 1796 "b_frozen_data\n",
1797 __FUNCTION__); 1797 __func__);
1798 jbd_free(jh->b_frozen_data, bh->b_size); 1798 jbd_free(jh->b_frozen_data, bh->b_size);
1799 } 1799 }
1800 if (jh->b_committed_data) { 1800 if (jh->b_committed_data) {
1801 printk(KERN_WARNING "%s: freeing " 1801 printk(KERN_WARNING "%s: freeing "
1802 "b_committed_data\n", 1802 "b_committed_data\n",
1803 __FUNCTION__); 1803 __func__);
1804 jbd_free(jh->b_committed_data, bh->b_size); 1804 jbd_free(jh->b_committed_data, bh->b_size);
1805 } 1805 }
1806 bh->b_private = NULL; 1806 bh->b_private = NULL;
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index d5f8eee7c88c..1bb43e987f4b 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -138,7 +138,7 @@ repeat:
138oom: 138oom:
139 if (!journal_oom_retry) 139 if (!journal_oom_retry)
140 return -ENOMEM; 140 return -ENOMEM;
141 jbd_debug(1, "ENOMEM in %s, retrying\n", __FUNCTION__); 141 jbd_debug(1, "ENOMEM in %s, retrying\n", __func__);
142 yield(); 142 yield();
143 goto repeat; 143 goto repeat;
144} 144}
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 2c9e8f5d13aa..67ff2024c23c 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -609,6 +609,12 @@ repeat:
609 goto done; 609 goto done;
610 610
611 /* 611 /*
612 * this is the first time this transaction is touching this buffer,
613 * reset the modified flag
614 */
615 jh->b_modified = 0;
616
617 /*
612 * If there is already a copy-out version of this buffer, then we don't 618 * If there is already a copy-out version of this buffer, then we don't
613 * need to make another one 619 * need to make another one
614 */ 620 */
@@ -681,7 +687,7 @@ repeat:
681 if (!frozen_buffer) { 687 if (!frozen_buffer) {
682 printk(KERN_EMERG 688 printk(KERN_EMERG
683 "%s: OOM for frozen_buffer\n", 689 "%s: OOM for frozen_buffer\n",
684 __FUNCTION__); 690 __func__);
685 JBUFFER_TRACE(jh, "oom!"); 691 JBUFFER_TRACE(jh, "oom!");
686 error = -ENOMEM; 692 error = -ENOMEM;
687 jbd_lock_bh_state(bh); 693 jbd_lock_bh_state(bh);
@@ -820,9 +826,16 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
820 826
821 if (jh->b_transaction == NULL) { 827 if (jh->b_transaction == NULL) {
822 jh->b_transaction = transaction; 828 jh->b_transaction = transaction;
829
830 /* first access by this transaction */
831 jh->b_modified = 0;
832
823 JBUFFER_TRACE(jh, "file as BJ_Reserved"); 833 JBUFFER_TRACE(jh, "file as BJ_Reserved");
824 __journal_file_buffer(jh, transaction, BJ_Reserved); 834 __journal_file_buffer(jh, transaction, BJ_Reserved);
825 } else if (jh->b_transaction == journal->j_committing_transaction) { 835 } else if (jh->b_transaction == journal->j_committing_transaction) {
836 /* first access by this transaction */
837 jh->b_modified = 0;
838
826 JBUFFER_TRACE(jh, "set next transaction"); 839 JBUFFER_TRACE(jh, "set next transaction");
827 jh->b_next_transaction = transaction; 840 jh->b_next_transaction = transaction;
828 } 841 }
@@ -891,7 +904,7 @@ repeat:
891 committed_data = jbd_alloc(jh2bh(jh)->b_size, GFP_NOFS); 904 committed_data = jbd_alloc(jh2bh(jh)->b_size, GFP_NOFS);
892 if (!committed_data) { 905 if (!committed_data) {
893 printk(KERN_EMERG "%s: No memory for committed data\n", 906 printk(KERN_EMERG "%s: No memory for committed data\n",
894 __FUNCTION__); 907 __func__);
895 err = -ENOMEM; 908 err = -ENOMEM;
896 goto out; 909 goto out;
897 } 910 }
@@ -1222,6 +1235,7 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
1222 struct journal_head *jh; 1235 struct journal_head *jh;
1223 int drop_reserve = 0; 1236 int drop_reserve = 0;
1224 int err = 0; 1237 int err = 0;
1238 int was_modified = 0;
1225 1239
1226 BUFFER_TRACE(bh, "entry"); 1240 BUFFER_TRACE(bh, "entry");
1227 1241
@@ -1240,6 +1254,9 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
1240 goto not_jbd; 1254 goto not_jbd;
1241 } 1255 }
1242 1256
1257 /* keep track of wether or not this transaction modified us */
1258 was_modified = jh->b_modified;
1259
1243 /* 1260 /*
1244 * The buffer's going from the transaction, we must drop 1261 * The buffer's going from the transaction, we must drop
1245 * all references -bzzz 1262 * all references -bzzz
@@ -1257,7 +1274,12 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
1257 1274
1258 JBUFFER_TRACE(jh, "belongs to current transaction: unfile"); 1275 JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
1259 1276
1260 drop_reserve = 1; 1277 /*
1278 * we only want to drop a reference if this transaction
1279 * modified the buffer
1280 */
1281 if (was_modified)
1282 drop_reserve = 1;
1261 1283
1262 /* 1284 /*
1263 * We are no longer going to journal this buffer. 1285 * We are no longer going to journal this buffer.
@@ -1297,7 +1319,13 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
1297 if (jh->b_next_transaction) { 1319 if (jh->b_next_transaction) {
1298 J_ASSERT(jh->b_next_transaction == transaction); 1320 J_ASSERT(jh->b_next_transaction == transaction);
1299 jh->b_next_transaction = NULL; 1321 jh->b_next_transaction = NULL;
1300 drop_reserve = 1; 1322
1323 /*
1324 * only drop a reference if this transaction modified
1325 * the buffer
1326 */
1327 if (was_modified)
1328 drop_reserve = 1;
1301 } 1329 }
1302 } 1330 }
1303 1331
@@ -2069,7 +2097,7 @@ void __journal_refile_buffer(struct journal_head *jh)
2069 jh->b_transaction = jh->b_next_transaction; 2097 jh->b_transaction = jh->b_next_transaction;
2070 jh->b_next_transaction = NULL; 2098 jh->b_next_transaction = NULL;
2071 __journal_file_buffer(jh, jh->b_transaction, 2099 __journal_file_buffer(jh, jh->b_transaction,
2072 was_dirty ? BJ_Metadata : BJ_Reserved); 2100 jh->b_modified ? BJ_Metadata : BJ_Reserved);
2073 J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING); 2101 J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
2074 2102
2075 if (was_dirty) 2103 if (was_dirty)
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index a8173081f831..e0139786f717 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -520,22 +520,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
520 jbd_debug (3, "JBD: commit phase 2\n"); 520 jbd_debug (3, "JBD: commit phase 2\n");
521 521
522 /* 522 /*
523 * First, drop modified flag: all accesses to the buffers
524 * will be tracked for a new trasaction only -bzzz
525 */
526 spin_lock(&journal->j_list_lock);
527 if (commit_transaction->t_buffers) {
528 new_jh = jh = commit_transaction->t_buffers->b_tnext;
529 do {
530 J_ASSERT_JH(new_jh, new_jh->b_modified == 1 ||
531 new_jh->b_modified == 0);
532 new_jh->b_modified = 0;
533 new_jh = new_jh->b_tnext;
534 } while (new_jh != jh);
535 }
536 spin_unlock(&journal->j_list_lock);
537
538 /*
539 * Now start flushing things to disk, in the order they appear 523 * Now start flushing things to disk, in the order they appear
540 * on the transaction lists. Data blocks go first. 524 * on the transaction lists. Data blocks go first.
541 */ 525 */
@@ -584,6 +568,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
584 stats.u.run.rs_blocks = commit_transaction->t_outstanding_credits; 568 stats.u.run.rs_blocks = commit_transaction->t_outstanding_credits;
585 stats.u.run.rs_blocks_logged = 0; 569 stats.u.run.rs_blocks_logged = 0;
586 570
571 J_ASSERT(commit_transaction->t_nr_buffers <=
572 commit_transaction->t_outstanding_credits);
573
587 descriptor = NULL; 574 descriptor = NULL;
588 bufs = 0; 575 bufs = 0;
589 while (commit_transaction->t_buffers) { 576 while (commit_transaction->t_buffers) {
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 954cff001df6..53632e3e8457 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -534,7 +534,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
534 if (!tid_geq(journal->j_commit_request, tid)) { 534 if (!tid_geq(journal->j_commit_request, tid)) {
535 printk(KERN_EMERG 535 printk(KERN_EMERG
536 "%s: error: j_commit_request=%d, tid=%d\n", 536 "%s: error: j_commit_request=%d, tid=%d\n",
537 __FUNCTION__, journal->j_commit_request, tid); 537 __func__, journal->j_commit_request, tid);
538 } 538 }
539 spin_unlock(&journal->j_state_lock); 539 spin_unlock(&journal->j_state_lock);
540#endif 540#endif
@@ -599,7 +599,7 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
599 599
600 printk(KERN_ALERT "%s: journal block not found " 600 printk(KERN_ALERT "%s: journal block not found "
601 "at offset %lu on %s\n", 601 "at offset %lu on %s\n",
602 __FUNCTION__, 602 __func__,
603 blocknr, 603 blocknr,
604 bdevname(journal->j_dev, b)); 604 bdevname(journal->j_dev, b));
605 err = -EIO; 605 err = -EIO;
@@ -904,19 +904,10 @@ static void jbd2_stats_proc_init(journal_t *journal)
904 snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name)); 904 snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name));
905 journal->j_proc_entry = proc_mkdir(name, proc_jbd2_stats); 905 journal->j_proc_entry = proc_mkdir(name, proc_jbd2_stats);
906 if (journal->j_proc_entry) { 906 if (journal->j_proc_entry) {
907 struct proc_dir_entry *p; 907 proc_create_data("history", S_IRUGO, journal->j_proc_entry,
908 p = create_proc_entry("history", S_IRUGO, 908 &jbd2_seq_history_fops, journal);
909 journal->j_proc_entry); 909 proc_create_data("info", S_IRUGO, journal->j_proc_entry,
910 if (p) { 910 &jbd2_seq_info_fops, journal);
911 p->proc_fops = &jbd2_seq_history_fops;
912 p->data = journal;
913 p = create_proc_entry("info", S_IRUGO,
914 journal->j_proc_entry);
915 if (p) {
916 p->proc_fops = &jbd2_seq_info_fops;
917 p->data = journal;
918 }
919 }
920 } 911 }
921} 912}
922 913
@@ -1006,13 +997,14 @@ fail:
1006 */ 997 */
1007 998
1008/** 999/**
1009 * journal_t * jbd2_journal_init_dev() - creates an initialises a journal structure 1000 * journal_t * jbd2_journal_init_dev() - creates and initialises a journal structure
1010 * @bdev: Block device on which to create the journal 1001 * @bdev: Block device on which to create the journal
1011 * @fs_dev: Device which hold journalled filesystem for this journal. 1002 * @fs_dev: Device which hold journalled filesystem for this journal.
1012 * @start: Block nr Start of journal. 1003 * @start: Block nr Start of journal.
1013 * @len: Length of the journal in blocks. 1004 * @len: Length of the journal in blocks.
1014 * @blocksize: blocksize of journalling device 1005 * @blocksize: blocksize of journalling device
1015 * @returns: a newly created journal_t * 1006 *
1007 * Returns: a newly created journal_t *
1016 * 1008 *
1017 * jbd2_journal_init_dev creates a journal which maps a fixed contiguous 1009 * jbd2_journal_init_dev creates a journal which maps a fixed contiguous
1018 * range of blocks on an arbitrary block device. 1010 * range of blocks on an arbitrary block device.
@@ -1036,7 +1028,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
1036 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 1028 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
1037 if (!journal->j_wbuf) { 1029 if (!journal->j_wbuf) {
1038 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 1030 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
1039 __FUNCTION__); 1031 __func__);
1040 kfree(journal); 1032 kfree(journal);
1041 journal = NULL; 1033 journal = NULL;
1042 goto out; 1034 goto out;
@@ -1092,7 +1084,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
1092 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 1084 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
1093 if (!journal->j_wbuf) { 1085 if (!journal->j_wbuf) {
1094 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 1086 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
1095 __FUNCTION__); 1087 __func__);
1096 kfree(journal); 1088 kfree(journal);
1097 return NULL; 1089 return NULL;
1098 } 1090 }
@@ -1101,7 +1093,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
1101 /* If that failed, give up */ 1093 /* If that failed, give up */
1102 if (err) { 1094 if (err) {
1103 printk(KERN_ERR "%s: Cannnot locate journal superblock\n", 1095 printk(KERN_ERR "%s: Cannnot locate journal superblock\n",
1104 __FUNCTION__); 1096 __func__);
1105 kfree(journal); 1097 kfree(journal);
1106 return NULL; 1098 return NULL;
1107 } 1099 }
@@ -1187,7 +1179,7 @@ int jbd2_journal_create(journal_t *journal)
1187 */ 1179 */
1188 printk(KERN_EMERG 1180 printk(KERN_EMERG
1189 "%s: creation of journal on external device!\n", 1181 "%s: creation of journal on external device!\n",
1190 __FUNCTION__); 1182 __func__);
1191 BUG(); 1183 BUG();
1192 } 1184 }
1193 1185
@@ -1985,9 +1977,10 @@ static int journal_init_jbd2_journal_head_cache(void)
1985 1977
1986static void jbd2_journal_destroy_jbd2_journal_head_cache(void) 1978static void jbd2_journal_destroy_jbd2_journal_head_cache(void)
1987{ 1979{
1988 J_ASSERT(jbd2_journal_head_cache != NULL); 1980 if (jbd2_journal_head_cache) {
1989 kmem_cache_destroy(jbd2_journal_head_cache); 1981 kmem_cache_destroy(jbd2_journal_head_cache);
1990 jbd2_journal_head_cache = NULL; 1982 jbd2_journal_head_cache = NULL;
1983 }
1991} 1984}
1992 1985
1993/* 1986/*
@@ -2006,7 +1999,7 @@ static struct journal_head *journal_alloc_journal_head(void)
2006 jbd_debug(1, "out of memory for journal_head\n"); 1999 jbd_debug(1, "out of memory for journal_head\n");
2007 if (time_after(jiffies, last_warning + 5*HZ)) { 2000 if (time_after(jiffies, last_warning + 5*HZ)) {
2008 printk(KERN_NOTICE "ENOMEM in %s, retrying.\n", 2001 printk(KERN_NOTICE "ENOMEM in %s, retrying.\n",
2009 __FUNCTION__); 2002 __func__);
2010 last_warning = jiffies; 2003 last_warning = jiffies;
2011 } 2004 }
2012 while (!ret) { 2005 while (!ret) {
@@ -2143,13 +2136,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
2143 if (jh->b_frozen_data) { 2136 if (jh->b_frozen_data) {
2144 printk(KERN_WARNING "%s: freeing " 2137 printk(KERN_WARNING "%s: freeing "
2145 "b_frozen_data\n", 2138 "b_frozen_data\n",
2146 __FUNCTION__); 2139 __func__);
2147 jbd2_free(jh->b_frozen_data, bh->b_size); 2140 jbd2_free(jh->b_frozen_data, bh->b_size);
2148 } 2141 }
2149 if (jh->b_committed_data) { 2142 if (jh->b_committed_data) {
2150 printk(KERN_WARNING "%s: freeing " 2143 printk(KERN_WARNING "%s: freeing "
2151 "b_committed_data\n", 2144 "b_committed_data\n",
2152 __FUNCTION__); 2145 __func__);
2153 jbd2_free(jh->b_committed_data, bh->b_size); 2146 jbd2_free(jh->b_committed_data, bh->b_size);
2154 } 2147 }
2155 bh->b_private = NULL; 2148 bh->b_private = NULL;
@@ -2314,10 +2307,12 @@ static int __init journal_init(void)
2314 BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024); 2307 BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024);
2315 2308
2316 ret = journal_init_caches(); 2309 ret = journal_init_caches();
2317 if (ret != 0) 2310 if (ret == 0) {
2311 jbd2_create_debugfs_entry();
2312 jbd2_create_jbd_stats_proc_entry();
2313 } else {
2318 jbd2_journal_destroy_caches(); 2314 jbd2_journal_destroy_caches();
2319 jbd2_create_debugfs_entry(); 2315 }
2320 jbd2_create_jbd_stats_proc_entry();
2321 return ret; 2316 return ret;
2322} 2317}
2323 2318
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 2e1453a5e998..257ff2625765 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -139,7 +139,7 @@ repeat:
139oom: 139oom:
140 if (!journal_oom_retry) 140 if (!journal_oom_retry)
141 return -ENOMEM; 141 return -ENOMEM;
142 jbd_debug(1, "ENOMEM in %s, retrying\n", __FUNCTION__); 142 jbd_debug(1, "ENOMEM in %s, retrying\n", __func__);
143 yield(); 143 yield();
144 goto repeat; 144 goto repeat;
145} 145}
@@ -167,138 +167,121 @@ static struct jbd2_revoke_record_s *find_revoke_record(journal_t *journal,
167 return NULL; 167 return NULL;
168} 168}
169 169
170void jbd2_journal_destroy_revoke_caches(void)
171{
172 if (jbd2_revoke_record_cache) {
173 kmem_cache_destroy(jbd2_revoke_record_cache);
174 jbd2_revoke_record_cache = NULL;
175 }
176 if (jbd2_revoke_table_cache) {
177 kmem_cache_destroy(jbd2_revoke_table_cache);
178 jbd2_revoke_table_cache = NULL;
179 }
180}
181
170int __init jbd2_journal_init_revoke_caches(void) 182int __init jbd2_journal_init_revoke_caches(void)
171{ 183{
184 J_ASSERT(!jbd2_revoke_record_cache);
185 J_ASSERT(!jbd2_revoke_table_cache);
186
172 jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record", 187 jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record",
173 sizeof(struct jbd2_revoke_record_s), 188 sizeof(struct jbd2_revoke_record_s),
174 0, 189 0,
175 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, 190 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
176 NULL); 191 NULL);
177 if (!jbd2_revoke_record_cache) 192 if (!jbd2_revoke_record_cache)
178 return -ENOMEM; 193 goto record_cache_failure;
179 194
180 jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table", 195 jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table",
181 sizeof(struct jbd2_revoke_table_s), 196 sizeof(struct jbd2_revoke_table_s),
182 0, SLAB_TEMPORARY, NULL); 197 0, SLAB_TEMPORARY, NULL);
183 if (!jbd2_revoke_table_cache) { 198 if (!jbd2_revoke_table_cache)
184 kmem_cache_destroy(jbd2_revoke_record_cache); 199 goto table_cache_failure;
185 jbd2_revoke_record_cache = NULL;
186 return -ENOMEM;
187 }
188 return 0; 200 return 0;
201table_cache_failure:
202 jbd2_journal_destroy_revoke_caches();
203record_cache_failure:
204 return -ENOMEM;
189} 205}
190 206
191void jbd2_journal_destroy_revoke_caches(void) 207static struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size)
192{ 208{
193 kmem_cache_destroy(jbd2_revoke_record_cache); 209 int shift = 0;
194 jbd2_revoke_record_cache = NULL; 210 int tmp = hash_size;
195 kmem_cache_destroy(jbd2_revoke_table_cache); 211 struct jbd2_revoke_table_s *table;
196 jbd2_revoke_table_cache = NULL;
197}
198
199/* Initialise the revoke table for a given journal to a given size. */
200
201int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
202{
203 int shift, tmp;
204 212
205 J_ASSERT (journal->j_revoke_table[0] == NULL); 213 table = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL);
214 if (!table)
215 goto out;
206 216
207 shift = 0;
208 tmp = hash_size;
209 while((tmp >>= 1UL) != 0UL) 217 while((tmp >>= 1UL) != 0UL)
210 shift++; 218 shift++;
211 219
212 journal->j_revoke_table[0] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL); 220 table->hash_size = hash_size;
213 if (!journal->j_revoke_table[0]) 221 table->hash_shift = shift;
214 return -ENOMEM; 222 table->hash_table =
215 journal->j_revoke = journal->j_revoke_table[0];
216
217 /* Check that the hash_size is a power of two */
218 J_ASSERT(is_power_of_2(hash_size));
219
220 journal->j_revoke->hash_size = hash_size;
221
222 journal->j_revoke->hash_shift = shift;
223
224 journal->j_revoke->hash_table =
225 kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); 223 kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
226 if (!journal->j_revoke->hash_table) { 224 if (!table->hash_table) {
227 kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]); 225 kmem_cache_free(jbd2_revoke_table_cache, table);
228 journal->j_revoke = NULL; 226 table = NULL;
229 return -ENOMEM; 227 goto out;
230 } 228 }
231 229
232 for (tmp = 0; tmp < hash_size; tmp++) 230 for (tmp = 0; tmp < hash_size; tmp++)
233 INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]); 231 INIT_LIST_HEAD(&table->hash_table[tmp]);
234 232
235 journal->j_revoke_table[1] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL); 233out:
236 if (!journal->j_revoke_table[1]) { 234 return table;
237 kfree(journal->j_revoke_table[0]->hash_table); 235}
238 kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]); 236
239 return -ENOMEM; 237static void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table)
238{
239 int i;
240 struct list_head *hash_list;
241
242 for (i = 0; i < table->hash_size; i++) {
243 hash_list = &table->hash_table[i];
244 J_ASSERT(list_empty(hash_list));
240 } 245 }
241 246
242 journal->j_revoke = journal->j_revoke_table[1]; 247 kfree(table->hash_table);
248 kmem_cache_free(jbd2_revoke_table_cache, table);
249}
243 250
244 /* Check that the hash_size is a power of two */ 251/* Initialise the revoke table for a given journal to a given size. */
252int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
253{
254 J_ASSERT(journal->j_revoke_table[0] == NULL);
245 J_ASSERT(is_power_of_2(hash_size)); 255 J_ASSERT(is_power_of_2(hash_size));
246 256
247 journal->j_revoke->hash_size = hash_size; 257 journal->j_revoke_table[0] = jbd2_journal_init_revoke_table(hash_size);
248 258 if (!journal->j_revoke_table[0])
249 journal->j_revoke->hash_shift = shift; 259 goto fail0;
250 260
251 journal->j_revoke->hash_table = 261 journal->j_revoke_table[1] = jbd2_journal_init_revoke_table(hash_size);
252 kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); 262 if (!journal->j_revoke_table[1])
253 if (!journal->j_revoke->hash_table) { 263 goto fail1;
254 kfree(journal->j_revoke_table[0]->hash_table);
255 kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]);
256 kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[1]);
257 journal->j_revoke = NULL;
258 return -ENOMEM;
259 }
260 264
261 for (tmp = 0; tmp < hash_size; tmp++) 265 journal->j_revoke = journal->j_revoke_table[1];
262 INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
263 266
264 spin_lock_init(&journal->j_revoke_lock); 267 spin_lock_init(&journal->j_revoke_lock);
265 268
266 return 0; 269 return 0;
267}
268 270
269/* Destoy a journal's revoke table. The table must already be empty! */ 271fail1:
272 jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]);
273fail0:
274 return -ENOMEM;
275}
270 276
277/* Destroy a journal's revoke table. The table must already be empty! */
271void jbd2_journal_destroy_revoke(journal_t *journal) 278void jbd2_journal_destroy_revoke(journal_t *journal)
272{ 279{
273 struct jbd2_revoke_table_s *table;
274 struct list_head *hash_list;
275 int i;
276
277 table = journal->j_revoke_table[0];
278 if (!table)
279 return;
280
281 for (i=0; i<table->hash_size; i++) {
282 hash_list = &table->hash_table[i];
283 J_ASSERT (list_empty(hash_list));
284 }
285
286 kfree(table->hash_table);
287 kmem_cache_free(jbd2_revoke_table_cache, table);
288 journal->j_revoke = NULL;
289
290 table = journal->j_revoke_table[1];
291 if (!table)
292 return;
293
294 for (i=0; i<table->hash_size; i++) {
295 hash_list = &table->hash_table[i];
296 J_ASSERT (list_empty(hash_list));
297 }
298
299 kfree(table->hash_table);
300 kmem_cache_free(jbd2_revoke_table_cache, table);
301 journal->j_revoke = NULL; 280 journal->j_revoke = NULL;
281 if (journal->j_revoke_table[0])
282 jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]);
283 if (journal->j_revoke_table[1])
284 jbd2_journal_destroy_revoke_table(journal->j_revoke_table[1]);
302} 285}
303 286
304 287
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index b9b0b6f899b9..d6e006e67804 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -618,6 +618,12 @@ repeat:
618 goto done; 618 goto done;
619 619
620 /* 620 /*
621 * this is the first time this transaction is touching this buffer,
622 * reset the modified flag
623 */
624 jh->b_modified = 0;
625
626 /*
621 * If there is already a copy-out version of this buffer, then we don't 627 * If there is already a copy-out version of this buffer, then we don't
622 * need to make another one 628 * need to make another one
623 */ 629 */
@@ -690,7 +696,7 @@ repeat:
690 if (!frozen_buffer) { 696 if (!frozen_buffer) {
691 printk(KERN_EMERG 697 printk(KERN_EMERG
692 "%s: OOM for frozen_buffer\n", 698 "%s: OOM for frozen_buffer\n",
693 __FUNCTION__); 699 __func__);
694 JBUFFER_TRACE(jh, "oom!"); 700 JBUFFER_TRACE(jh, "oom!");
695 error = -ENOMEM; 701 error = -ENOMEM;
696 jbd_lock_bh_state(bh); 702 jbd_lock_bh_state(bh);
@@ -829,9 +835,16 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
829 835
830 if (jh->b_transaction == NULL) { 836 if (jh->b_transaction == NULL) {
831 jh->b_transaction = transaction; 837 jh->b_transaction = transaction;
838
839 /* first access by this transaction */
840 jh->b_modified = 0;
841
832 JBUFFER_TRACE(jh, "file as BJ_Reserved"); 842 JBUFFER_TRACE(jh, "file as BJ_Reserved");
833 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); 843 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
834 } else if (jh->b_transaction == journal->j_committing_transaction) { 844 } else if (jh->b_transaction == journal->j_committing_transaction) {
845 /* first access by this transaction */
846 jh->b_modified = 0;
847
835 JBUFFER_TRACE(jh, "set next transaction"); 848 JBUFFER_TRACE(jh, "set next transaction");
836 jh->b_next_transaction = transaction; 849 jh->b_next_transaction = transaction;
837 } 850 }
@@ -901,7 +914,7 @@ repeat:
901 committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS); 914 committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
902 if (!committed_data) { 915 if (!committed_data) {
903 printk(KERN_EMERG "%s: No memory for committed data\n", 916 printk(KERN_EMERG "%s: No memory for committed data\n",
904 __FUNCTION__); 917 __func__);
905 err = -ENOMEM; 918 err = -ENOMEM;
906 goto out; 919 goto out;
907 } 920 }
@@ -1230,6 +1243,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1230 struct journal_head *jh; 1243 struct journal_head *jh;
1231 int drop_reserve = 0; 1244 int drop_reserve = 0;
1232 int err = 0; 1245 int err = 0;
1246 int was_modified = 0;
1233 1247
1234 BUFFER_TRACE(bh, "entry"); 1248 BUFFER_TRACE(bh, "entry");
1235 1249
@@ -1248,6 +1262,9 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1248 goto not_jbd; 1262 goto not_jbd;
1249 } 1263 }
1250 1264
1265 /* keep track of wether or not this transaction modified us */
1266 was_modified = jh->b_modified;
1267
1251 /* 1268 /*
1252 * The buffer's going from the transaction, we must drop 1269 * The buffer's going from the transaction, we must drop
1253 * all references -bzzz 1270 * all references -bzzz
@@ -1265,7 +1282,12 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1265 1282
1266 JBUFFER_TRACE(jh, "belongs to current transaction: unfile"); 1283 JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
1267 1284
1268 drop_reserve = 1; 1285 /*
1286 * we only want to drop a reference if this transaction
1287 * modified the buffer
1288 */
1289 if (was_modified)
1290 drop_reserve = 1;
1269 1291
1270 /* 1292 /*
1271 * We are no longer going to journal this buffer. 1293 * We are no longer going to journal this buffer.
@@ -1305,7 +1327,13 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1305 if (jh->b_next_transaction) { 1327 if (jh->b_next_transaction) {
1306 J_ASSERT(jh->b_next_transaction == transaction); 1328 J_ASSERT(jh->b_next_transaction == transaction);
1307 jh->b_next_transaction = NULL; 1329 jh->b_next_transaction = NULL;
1308 drop_reserve = 1; 1330
1331 /*
1332 * only drop a reference if this transaction modified
1333 * the buffer
1334 */
1335 if (was_modified)
1336 drop_reserve = 1;
1309 } 1337 }
1310 } 1338 }
1311 1339
@@ -1434,7 +1462,8 @@ int jbd2_journal_stop(handle_t *handle)
1434 return err; 1462 return err;
1435} 1463}
1436 1464
1437/**int jbd2_journal_force_commit() - force any uncommitted transactions 1465/**
1466 * int jbd2_journal_force_commit() - force any uncommitted transactions
1438 * @journal: journal to force 1467 * @journal: journal to force
1439 * 1468 *
1440 * For synchronous operations: force any uncommitted transactions 1469 * For synchronous operations: force any uncommitted transactions
@@ -2077,7 +2106,7 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
2077 jh->b_transaction = jh->b_next_transaction; 2106 jh->b_transaction = jh->b_next_transaction;
2078 jh->b_next_transaction = NULL; 2107 jh->b_next_transaction = NULL;
2079 __jbd2_journal_file_buffer(jh, jh->b_transaction, 2108 __jbd2_journal_file_buffer(jh, jh->b_transaction,
2080 was_dirty ? BJ_Metadata : BJ_Reserved); 2109 jh->b_modified ? BJ_Metadata : BJ_Reserved);
2081 J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING); 2110 J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
2082 2111
2083 if (was_dirty) 2112 if (was_dirty)
diff --git a/fs/jffs2/README.Locking b/fs/jffs2/README.Locking
index d14d5a4dc5ac..3ea36554107f 100644
--- a/fs/jffs2/README.Locking
+++ b/fs/jffs2/README.Locking
@@ -14,7 +14,7 @@ be fairly close.
14 alloc_sem 14 alloc_sem
15 --------- 15 ---------
16 16
17The alloc_sem is a per-filesystem semaphore, used primarily to ensure 17The alloc_sem is a per-filesystem mutex, used primarily to ensure
18contiguous allocation of space on the medium. It is automatically 18contiguous allocation of space on the medium. It is automatically
19obtained during space allocations (jffs2_reserve_space()) and freed 19obtained during space allocations (jffs2_reserve_space()) and freed
20upon write completion (jffs2_complete_reservation()). Note that 20upon write completion (jffs2_complete_reservation()). Note that
@@ -41,10 +41,10 @@ if the wbuf is currently holding any data is permitted, though.
41Ordering constraints: See f->sem. 41Ordering constraints: See f->sem.
42 42
43 43
44 File Semaphore f->sem 44 File Mutex f->sem
45 --------------------- 45 ---------------------
46 46
47This is the JFFS2-internal equivalent of the inode semaphore i->i_sem. 47This is the JFFS2-internal equivalent of the inode mutex i->i_sem.
48It protects the contents of the jffs2_inode_info private inode data, 48It protects the contents of the jffs2_inode_info private inode data,
49including the linked list of node fragments (but see the notes below on 49including the linked list of node fragments (but see the notes below on
50erase_completion_lock), etc. 50erase_completion_lock), etc.
@@ -60,14 +60,14 @@ lead to deadlock, unless we played games with unlocking the i_sem
60before calling the space allocation functions. 60before calling the space allocation functions.
61 61
62Instead of playing such games, we just have an extra internal 62Instead of playing such games, we just have an extra internal
63semaphore, which is obtained by the garbage collection code and also 63mutex, which is obtained by the garbage collection code and also
64by the normal file system code _after_ allocation of space. 64by the normal file system code _after_ allocation of space.
65 65
66Ordering constraints: 66Ordering constraints:
67 67
68 1. Never attempt to allocate space or lock alloc_sem with 68 1. Never attempt to allocate space or lock alloc_sem with
69 any f->sem held. 69 any f->sem held.
70 2. Never attempt to lock two file semaphores in one thread. 70 2. Never attempt to lock two file mutexes in one thread.
71 No ordering rules have been made for doing so. 71 No ordering rules have been made for doing so.
72 72
73 73
@@ -86,8 +86,8 @@ a simple spin_lock() rather than spin_lock_bh().
86 86
87Note that the per-inode list of physical nodes (f->nodes) is a special 87Note that the per-inode list of physical nodes (f->nodes) is a special
88case. Any changes to _valid_ nodes (i.e. ->flash_offset & 1 == 0) in 88case. Any changes to _valid_ nodes (i.e. ->flash_offset & 1 == 0) in
89the list are protected by the file semaphore f->sem. But the erase 89the list are protected by the file mutex f->sem. But the erase code
90code may remove _obsolete_ nodes from the list while holding only the 90may remove _obsolete_ nodes from the list while holding only the
91erase_completion_lock. So you can walk the list only while holding the 91erase_completion_lock. So you can walk the list only while holding the
92erase_completion_lock, and can drop the lock temporarily mid-walk as 92erase_completion_lock, and can drop the lock temporarily mid-walk as
93long as the pointer you're holding is to a _valid_ node, not an 93long as the pointer you're holding is to a _valid_ node, not an
@@ -124,10 +124,10 @@ Ordering constraints:
124 erase_free_sem 124 erase_free_sem
125 -------------- 125 --------------
126 126
127This semaphore is only used by the erase code which frees obsolete 127This mutex is only used by the erase code which frees obsolete node
128node references and the jffs2_garbage_collect_deletion_dirent() 128references and the jffs2_garbage_collect_deletion_dirent() function.
129function. The latter function on NAND flash must read _obsolete_ nodes 129The latter function on NAND flash must read _obsolete_ nodes to
130to determine whether the 'deletion dirent' under consideration can be 130determine whether the 'deletion dirent' under consideration can be
131discarded or whether it is still required to show that an inode has 131discarded or whether it is still required to show that an inode has
132been unlinked. Because reading from the flash may sleep, the 132been unlinked. Because reading from the flash may sleep, the
133erase_completion_lock cannot be held, so an alternative, more 133erase_completion_lock cannot be held, so an alternative, more
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index 722a6b682951..c5e1450d79f9 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -46,7 +46,7 @@ next_inode(int *i, struct jffs2_inode_cache *ic, struct jffs2_sb_info *c)
46 46
47 47
48static void jffs2_build_inode_pass1(struct jffs2_sb_info *c, 48static void jffs2_build_inode_pass1(struct jffs2_sb_info *c,
49 struct jffs2_inode_cache *ic) 49 struct jffs2_inode_cache *ic)
50{ 50{
51 struct jffs2_full_dirent *fd; 51 struct jffs2_full_dirent *fd;
52 52
@@ -68,11 +68,17 @@ static void jffs2_build_inode_pass1(struct jffs2_sb_info *c,
68 continue; 68 continue;
69 } 69 }
70 70
71 if (child_ic->nlink++ && fd->type == DT_DIR) { 71 if (fd->type == DT_DIR) {
72 JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u appears to be a hard link\n", 72 if (child_ic->pino_nlink) {
73 fd->name, fd->ino, ic->ino); 73 JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u appears to be a hard link\n",
74 /* TODO: What do we do about it? */ 74 fd->name, fd->ino, ic->ino);
75 } 75 /* TODO: What do we do about it? */
76 } else {
77 child_ic->pino_nlink = ic->ino;
78 }
79 } else
80 child_ic->pino_nlink++;
81
76 dbg_fsbuild("increased nlink for child \"%s\" (ino #%u)\n", fd->name, fd->ino); 82 dbg_fsbuild("increased nlink for child \"%s\" (ino #%u)\n", fd->name, fd->ino);
77 /* Can't free scan_dents so far. We might need them in pass 2 */ 83 /* Can't free scan_dents so far. We might need them in pass 2 */
78 } 84 }
@@ -125,7 +131,7 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c)
125 dbg_fsbuild("pass 2 starting\n"); 131 dbg_fsbuild("pass 2 starting\n");
126 132
127 for_each_inode(i, c, ic) { 133 for_each_inode(i, c, ic) {
128 if (ic->nlink) 134 if (ic->pino_nlink)
129 continue; 135 continue;
130 136
131 jffs2_build_remove_unlinked_inode(c, ic, &dead_fds); 137 jffs2_build_remove_unlinked_inode(c, ic, &dead_fds);
@@ -232,16 +238,19 @@ static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *c,
232 /* Reduce nlink of the child. If it's now zero, stick it on the 238 /* Reduce nlink of the child. If it's now zero, stick it on the
233 dead_fds list to be cleaned up later. Else just free the fd */ 239 dead_fds list to be cleaned up later. Else just free the fd */
234 240
235 child_ic->nlink--; 241 if (fd->type == DT_DIR)
242 child_ic->pino_nlink = 0;
243 else
244 child_ic->pino_nlink--;
236 245
237 if (!child_ic->nlink) { 246 if (!child_ic->pino_nlink) {
238 dbg_fsbuild("inode #%u (\"%s\") has now got zero nlink, adding to dead_fds list.\n", 247 dbg_fsbuild("inode #%u (\"%s\") now has no links; adding to dead_fds list.\n",
239 fd->ino, fd->name); 248 fd->ino, fd->name);
240 fd->next = *dead_fds; 249 fd->next = *dead_fds;
241 *dead_fds = fd; 250 *dead_fds = fd;
242 } else { 251 } else {
243 dbg_fsbuild("inode #%u (\"%s\") has now got nlink %d. Ignoring.\n", 252 dbg_fsbuild("inode #%u (\"%s\") has now got nlink %d. Ignoring.\n",
244 fd->ino, fd->name, child_ic->nlink); 253 fd->ino, fd->name, child_ic->pino_nlink);
245 jffs2_free_full_dirent(fd); 254 jffs2_free_full_dirent(fd);
246 } 255 }
247 } 256 }
@@ -345,6 +354,7 @@ int jffs2_do_mount_fs(struct jffs2_sb_info *c)
345 INIT_LIST_HEAD(&c->dirty_list); 354 INIT_LIST_HEAD(&c->dirty_list);
346 INIT_LIST_HEAD(&c->erasable_list); 355 INIT_LIST_HEAD(&c->erasable_list);
347 INIT_LIST_HEAD(&c->erasing_list); 356 INIT_LIST_HEAD(&c->erasing_list);
357 INIT_LIST_HEAD(&c->erase_checking_list);
348 INIT_LIST_HEAD(&c->erase_pending_list); 358 INIT_LIST_HEAD(&c->erase_pending_list);
349 INIT_LIST_HEAD(&c->erasable_pending_wbuf_list); 359 INIT_LIST_HEAD(&c->erasable_pending_wbuf_list);
350 INIT_LIST_HEAD(&c->erase_complete_list); 360 INIT_LIST_HEAD(&c->erase_complete_list);
diff --git a/fs/jffs2/debug.c b/fs/jffs2/debug.c
index 3a32c64ed497..5544d31c066b 100644
--- a/fs/jffs2/debug.c
+++ b/fs/jffs2/debug.c
@@ -62,9 +62,9 @@ __jffs2_dbg_acct_sanity_check(struct jffs2_sb_info *c,
62void 62void
63__jffs2_dbg_fragtree_paranoia_check(struct jffs2_inode_info *f) 63__jffs2_dbg_fragtree_paranoia_check(struct jffs2_inode_info *f)
64{ 64{
65 down(&f->sem); 65 mutex_lock(&f->sem);
66 __jffs2_dbg_fragtree_paranoia_check_nolock(f); 66 __jffs2_dbg_fragtree_paranoia_check_nolock(f);
67 up(&f->sem); 67 mutex_unlock(&f->sem);
68} 68}
69 69
70void 70void
@@ -153,6 +153,139 @@ __jffs2_dbg_prewrite_paranoia_check(struct jffs2_sb_info *c,
153 kfree(buf); 153 kfree(buf);
154} 154}
155 155
156void __jffs2_dbg_superblock_counts(struct jffs2_sb_info *c)
157{
158 struct jffs2_eraseblock *jeb;
159 uint32_t free = 0, dirty = 0, used = 0, wasted = 0,
160 erasing = 0, bad = 0, unchecked = 0;
161 int nr_counted = 0;
162 int dump = 0;
163
164 if (c->gcblock) {
165 nr_counted++;
166 free += c->gcblock->free_size;
167 dirty += c->gcblock->dirty_size;
168 used += c->gcblock->used_size;
169 wasted += c->gcblock->wasted_size;
170 unchecked += c->gcblock->unchecked_size;
171 }
172 if (c->nextblock) {
173 nr_counted++;
174 free += c->nextblock->free_size;
175 dirty += c->nextblock->dirty_size;
176 used += c->nextblock->used_size;
177 wasted += c->nextblock->wasted_size;
178 unchecked += c->nextblock->unchecked_size;
179 }
180 list_for_each_entry(jeb, &c->clean_list, list) {
181 nr_counted++;
182 free += jeb->free_size;
183 dirty += jeb->dirty_size;
184 used += jeb->used_size;
185 wasted += jeb->wasted_size;
186 unchecked += jeb->unchecked_size;
187 }
188 list_for_each_entry(jeb, &c->very_dirty_list, list) {
189 nr_counted++;
190 free += jeb->free_size;
191 dirty += jeb->dirty_size;
192 used += jeb->used_size;
193 wasted += jeb->wasted_size;
194 unchecked += jeb->unchecked_size;
195 }
196 list_for_each_entry(jeb, &c->dirty_list, list) {
197 nr_counted++;
198 free += jeb->free_size;
199 dirty += jeb->dirty_size;
200 used += jeb->used_size;
201 wasted += jeb->wasted_size;
202 unchecked += jeb->unchecked_size;
203 }
204 list_for_each_entry(jeb, &c->erasable_list, list) {
205 nr_counted++;
206 free += jeb->free_size;
207 dirty += jeb->dirty_size;
208 used += jeb->used_size;
209 wasted += jeb->wasted_size;
210 unchecked += jeb->unchecked_size;
211 }
212 list_for_each_entry(jeb, &c->erasable_pending_wbuf_list, list) {
213 nr_counted++;
214 free += jeb->free_size;
215 dirty += jeb->dirty_size;
216 used += jeb->used_size;
217 wasted += jeb->wasted_size;
218 unchecked += jeb->unchecked_size;
219 }
220 list_for_each_entry(jeb, &c->erase_pending_list, list) {
221 nr_counted++;
222 free += jeb->free_size;
223 dirty += jeb->dirty_size;
224 used += jeb->used_size;
225 wasted += jeb->wasted_size;
226 unchecked += jeb->unchecked_size;
227 }
228 list_for_each_entry(jeb, &c->free_list, list) {
229 nr_counted++;
230 free += jeb->free_size;
231 dirty += jeb->dirty_size;
232 used += jeb->used_size;
233 wasted += jeb->wasted_size;
234 unchecked += jeb->unchecked_size;
235 }
236 list_for_each_entry(jeb, &c->bad_used_list, list) {
237 nr_counted++;
238 free += jeb->free_size;
239 dirty += jeb->dirty_size;
240 used += jeb->used_size;
241 wasted += jeb->wasted_size;
242 unchecked += jeb->unchecked_size;
243 }
244
245 list_for_each_entry(jeb, &c->erasing_list, list) {
246 nr_counted++;
247 erasing += c->sector_size;
248 }
249 list_for_each_entry(jeb, &c->erase_checking_list, list) {
250 nr_counted++;
251 erasing += c->sector_size;
252 }
253 list_for_each_entry(jeb, &c->erase_complete_list, list) {
254 nr_counted++;
255 erasing += c->sector_size;
256 }
257 list_for_each_entry(jeb, &c->bad_list, list) {
258 nr_counted++;
259 bad += c->sector_size;
260 }
261
262#define check(sz) \
263 if (sz != c->sz##_size) { \
264 printk(KERN_WARNING #sz "_size mismatch counted 0x%x, c->" #sz "_size 0x%x\n", \
265 sz, c->sz##_size); \
266 dump = 1; \
267 }
268 check(free);
269 check(dirty);
270 check(used);
271 check(wasted);
272 check(unchecked);
273 check(bad);
274 check(erasing);
275#undef check
276
277 if (nr_counted != c->nr_blocks) {
278 printk(KERN_WARNING "%s counted only 0x%x blocks of 0x%x. Where are the others?\n",
279 __func__, nr_counted, c->nr_blocks);
280 dump = 1;
281 }
282
283 if (dump) {
284 __jffs2_dbg_dump_block_lists_nolock(c);
285 BUG();
286 }
287}
288
156/* 289/*
157 * Check the space accounting and node_ref list correctness for the JFFS2 erasable block 'jeb'. 290 * Check the space accounting and node_ref list correctness for the JFFS2 erasable block 'jeb'.
158 */ 291 */
@@ -229,6 +362,9 @@ __jffs2_dbg_acct_paranoia_check_nolock(struct jffs2_sb_info *c,
229 } 362 }
230#endif 363#endif
231 364
365 if (!(c->flags & (JFFS2_SB_FLAG_BUILDING|JFFS2_SB_FLAG_SCANNING)))
366 __jffs2_dbg_superblock_counts(c);
367
232 return; 368 return;
233 369
234error: 370error:
@@ -268,7 +404,10 @@ __jffs2_dbg_dump_node_refs_nolock(struct jffs2_sb_info *c,
268 404
269 printk(JFFS2_DBG); 405 printk(JFFS2_DBG);
270 for (ref = jeb->first_node; ; ref = ref_next(ref)) { 406 for (ref = jeb->first_node; ; ref = ref_next(ref)) {
271 printk("%#08x(%#x)", ref_offset(ref), ref->__totlen); 407 printk("%#08x", ref_offset(ref));
408#ifdef TEST_TOTLEN
409 printk("(%x)", ref->__totlen);
410#endif
272 if (ref_next(ref)) 411 if (ref_next(ref))
273 printk("->"); 412 printk("->");
274 else 413 else
@@ -447,6 +586,21 @@ __jffs2_dbg_dump_block_lists_nolock(struct jffs2_sb_info *c)
447 } 586 }
448 } 587 }
449 } 588 }
589 if (list_empty(&c->erase_checking_list)) {
590 printk(JFFS2_DBG "erase_checking_list: empty\n");
591 } else {
592 struct list_head *this;
593
594 list_for_each(this, &c->erase_checking_list) {
595 struct jffs2_eraseblock *jeb = list_entry(this, struct jffs2_eraseblock, list);
596
597 if (!(jeb->used_size == 0 && jeb->dirty_size == 0 && jeb->wasted_size == 0)) {
598 printk(JFFS2_DBG "erase_checking_list: %#08x (used %#08x, dirty %#08x, wasted %#08x, unchecked %#08x, free %#08x)\n",
599 jeb->offset, jeb->used_size, jeb->dirty_size, jeb->wasted_size,
600 jeb->unchecked_size, jeb->free_size);
601 }
602 }
603 }
450 604
451 if (list_empty(&c->erase_pending_list)) { 605 if (list_empty(&c->erase_pending_list)) {
452 printk(JFFS2_DBG "erase_pending_list: empty\n"); 606 printk(JFFS2_DBG "erase_pending_list: empty\n");
@@ -532,9 +686,9 @@ __jffs2_dbg_dump_block_lists_nolock(struct jffs2_sb_info *c)
532void 686void
533__jffs2_dbg_dump_fragtree(struct jffs2_inode_info *f) 687__jffs2_dbg_dump_fragtree(struct jffs2_inode_info *f)
534{ 688{
535 down(&f->sem); 689 mutex_lock(&f->sem);
536 jffs2_dbg_dump_fragtree_nolock(f); 690 jffs2_dbg_dump_fragtree_nolock(f);
537 up(&f->sem); 691 mutex_unlock(&f->sem);
538} 692}
539 693
540void 694void
diff --git a/fs/jffs2/debug.h b/fs/jffs2/debug.h
index 4130adabd76e..a113ecc3bafe 100644
--- a/fs/jffs2/debug.h
+++ b/fs/jffs2/debug.h
@@ -38,6 +38,7 @@
38 38
39#if CONFIG_JFFS2_FS_DEBUG > 1 39#if CONFIG_JFFS2_FS_DEBUG > 1
40#define JFFS2_DBG_FRAGTREE2_MESSAGES 40#define JFFS2_DBG_FRAGTREE2_MESSAGES
41#define JFFS2_DBG_READINODE2_MESSAGES
41#define JFFS2_DBG_MEMALLOC_MESSAGES 42#define JFFS2_DBG_MEMALLOC_MESSAGES
42#endif 43#endif
43 44
@@ -81,28 +82,28 @@
81 do { \ 82 do { \
82 printk(JFFS2_ERR_MSG_PREFIX \ 83 printk(JFFS2_ERR_MSG_PREFIX \
83 " (%d) %s: " fmt, task_pid_nr(current), \ 84 " (%d) %s: " fmt, task_pid_nr(current), \
84 __FUNCTION__ , ##__VA_ARGS__); \ 85 __func__ , ##__VA_ARGS__); \
85 } while(0) 86 } while(0)
86 87
87#define JFFS2_WARNING(fmt, ...) \ 88#define JFFS2_WARNING(fmt, ...) \
88 do { \ 89 do { \
89 printk(JFFS2_WARN_MSG_PREFIX \ 90 printk(JFFS2_WARN_MSG_PREFIX \
90 " (%d) %s: " fmt, task_pid_nr(current), \ 91 " (%d) %s: " fmt, task_pid_nr(current), \
91 __FUNCTION__ , ##__VA_ARGS__); \ 92 __func__ , ##__VA_ARGS__); \
92 } while(0) 93 } while(0)
93 94
94#define JFFS2_NOTICE(fmt, ...) \ 95#define JFFS2_NOTICE(fmt, ...) \
95 do { \ 96 do { \
96 printk(JFFS2_NOTICE_MSG_PREFIX \ 97 printk(JFFS2_NOTICE_MSG_PREFIX \
97 " (%d) %s: " fmt, task_pid_nr(current), \ 98 " (%d) %s: " fmt, task_pid_nr(current), \
98 __FUNCTION__ , ##__VA_ARGS__); \ 99 __func__ , ##__VA_ARGS__); \
99 } while(0) 100 } while(0)
100 101
101#define JFFS2_DEBUG(fmt, ...) \ 102#define JFFS2_DEBUG(fmt, ...) \
102 do { \ 103 do { \
103 printk(JFFS2_DBG_MSG_PREFIX \ 104 printk(JFFS2_DBG_MSG_PREFIX \
104 " (%d) %s: " fmt, task_pid_nr(current), \ 105 " (%d) %s: " fmt, task_pid_nr(current), \
105 __FUNCTION__ , ##__VA_ARGS__); \ 106 __func__ , ##__VA_ARGS__); \
106 } while(0) 107 } while(0)
107 108
108/* 109/*
@@ -115,6 +116,11 @@
115#else 116#else
116#define dbg_readinode(fmt, ...) 117#define dbg_readinode(fmt, ...)
117#endif 118#endif
119#ifdef JFFS2_DBG_READINODE2_MESSAGES
120#define dbg_readinode2(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__)
121#else
122#define dbg_readinode2(fmt, ...)
123#endif
118 124
119/* Fragtree build debugging messages */ 125/* Fragtree build debugging messages */
120#ifdef JFFS2_DBG_FRAGTREE_MESSAGES 126#ifdef JFFS2_DBG_FRAGTREE_MESSAGES
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index f948f7e6ec82..c0c141f6fde1 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -86,7 +86,7 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target,
86 dir_f = JFFS2_INODE_INFO(dir_i); 86 dir_f = JFFS2_INODE_INFO(dir_i);
87 c = JFFS2_SB_INFO(dir_i->i_sb); 87 c = JFFS2_SB_INFO(dir_i->i_sb);
88 88
89 down(&dir_f->sem); 89 mutex_lock(&dir_f->sem);
90 90
91 /* NB: The 2.2 backport will need to explicitly check for '.' and '..' here */ 91 /* NB: The 2.2 backport will need to explicitly check for '.' and '..' here */
92 for (fd_list = dir_f->dents; fd_list && fd_list->nhash <= target->d_name.hash; fd_list = fd_list->next) { 92 for (fd_list = dir_f->dents; fd_list && fd_list->nhash <= target->d_name.hash; fd_list = fd_list->next) {
@@ -99,7 +99,7 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target,
99 } 99 }
100 if (fd) 100 if (fd)
101 ino = fd->ino; 101 ino = fd->ino;
102 up(&dir_f->sem); 102 mutex_unlock(&dir_f->sem);
103 if (ino) { 103 if (ino) {
104 inode = jffs2_iget(dir_i->i_sb, ino); 104 inode = jffs2_iget(dir_i->i_sb, ino);
105 if (IS_ERR(inode)) { 105 if (IS_ERR(inode)) {
@@ -146,7 +146,7 @@ static int jffs2_readdir(struct file *filp, void *dirent, filldir_t filldir)
146 } 146 }
147 147
148 curofs=1; 148 curofs=1;
149 down(&f->sem); 149 mutex_lock(&f->sem);
150 for (fd = f->dents; fd; fd = fd->next) { 150 for (fd = f->dents; fd; fd = fd->next) {
151 151
152 curofs++; 152 curofs++;
@@ -166,7 +166,7 @@ static int jffs2_readdir(struct file *filp, void *dirent, filldir_t filldir)
166 break; 166 break;
167 offset++; 167 offset++;
168 } 168 }
169 up(&f->sem); 169 mutex_unlock(&f->sem);
170 out: 170 out:
171 filp->f_pos = offset; 171 filp->f_pos = offset;
172 return 0; 172 return 0;
@@ -208,6 +208,13 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
208 f = JFFS2_INODE_INFO(inode); 208 f = JFFS2_INODE_INFO(inode);
209 dir_f = JFFS2_INODE_INFO(dir_i); 209 dir_f = JFFS2_INODE_INFO(dir_i);
210 210
211 /* jffs2_do_create() will want to lock it, _after_ reserving
212 space and taking c-alloc_sem. If we keep it locked here,
213 lockdep gets unhappy (although it's a false positive;
214 nothing else will be looking at this inode yet so there's
215 no chance of AB-BA deadlock involving its f->sem). */
216 mutex_unlock(&f->sem);
217
211 ret = jffs2_do_create(c, dir_f, f, ri, 218 ret = jffs2_do_create(c, dir_f, f, ri,
212 dentry->d_name.name, dentry->d_name.len); 219 dentry->d_name.name, dentry->d_name.len);
213 if (ret) 220 if (ret)
@@ -219,7 +226,8 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
219 d_instantiate(dentry, inode); 226 d_instantiate(dentry, inode);
220 227
221 D1(printk(KERN_DEBUG "jffs2_create: Created ino #%lu with mode %o, nlink %d(%d). nrpages %ld\n", 228 D1(printk(KERN_DEBUG "jffs2_create: Created ino #%lu with mode %o, nlink %d(%d). nrpages %ld\n",
222 inode->i_ino, inode->i_mode, inode->i_nlink, f->inocache->nlink, inode->i_mapping->nrpages)); 229 inode->i_ino, inode->i_mode, inode->i_nlink,
230 f->inocache->pino_nlink, inode->i_mapping->nrpages));
223 return 0; 231 return 0;
224 232
225 fail: 233 fail:
@@ -243,7 +251,7 @@ static int jffs2_unlink(struct inode *dir_i, struct dentry *dentry)
243 ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name, 251 ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name,
244 dentry->d_name.len, dead_f, now); 252 dentry->d_name.len, dead_f, now);
245 if (dead_f->inocache) 253 if (dead_f->inocache)
246 dentry->d_inode->i_nlink = dead_f->inocache->nlink; 254 dentry->d_inode->i_nlink = dead_f->inocache->pino_nlink;
247 if (!ret) 255 if (!ret)
248 dir_i->i_mtime = dir_i->i_ctime = ITIME(now); 256 dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
249 return ret; 257 return ret;
@@ -275,9 +283,9 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de
275 ret = jffs2_do_link(c, dir_f, f->inocache->ino, type, dentry->d_name.name, dentry->d_name.len, now); 283 ret = jffs2_do_link(c, dir_f, f->inocache->ino, type, dentry->d_name.name, dentry->d_name.len, now);
276 284
277 if (!ret) { 285 if (!ret) {
278 down(&f->sem); 286 mutex_lock(&f->sem);
279 old_dentry->d_inode->i_nlink = ++f->inocache->nlink; 287 old_dentry->d_inode->i_nlink = ++f->inocache->pino_nlink;
280 up(&f->sem); 288 mutex_unlock(&f->sem);
281 d_instantiate(dentry, old_dentry->d_inode); 289 d_instantiate(dentry, old_dentry->d_inode);
282 dir_i->i_mtime = dir_i->i_ctime = ITIME(now); 290 dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
283 atomic_inc(&old_dentry->d_inode->i_count); 291 atomic_inc(&old_dentry->d_inode->i_count);
@@ -351,7 +359,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
351 359
352 if (IS_ERR(fn)) { 360 if (IS_ERR(fn)) {
353 /* Eeek. Wave bye bye */ 361 /* Eeek. Wave bye bye */
354 up(&f->sem); 362 mutex_unlock(&f->sem);
355 jffs2_complete_reservation(c); 363 jffs2_complete_reservation(c);
356 jffs2_clear_inode(inode); 364 jffs2_clear_inode(inode);
357 return PTR_ERR(fn); 365 return PTR_ERR(fn);
@@ -361,7 +369,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
361 f->target = kmalloc(targetlen + 1, GFP_KERNEL); 369 f->target = kmalloc(targetlen + 1, GFP_KERNEL);
362 if (!f->target) { 370 if (!f->target) {
363 printk(KERN_WARNING "Can't allocate %d bytes of memory\n", targetlen + 1); 371 printk(KERN_WARNING "Can't allocate %d bytes of memory\n", targetlen + 1);
364 up(&f->sem); 372 mutex_unlock(&f->sem);
365 jffs2_complete_reservation(c); 373 jffs2_complete_reservation(c);
366 jffs2_clear_inode(inode); 374 jffs2_clear_inode(inode);
367 return -ENOMEM; 375 return -ENOMEM;
@@ -374,7 +382,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
374 obsoleted by the first data write 382 obsoleted by the first data write
375 */ 383 */
376 f->metadata = fn; 384 f->metadata = fn;
377 up(&f->sem); 385 mutex_unlock(&f->sem);
378 386
379 jffs2_complete_reservation(c); 387 jffs2_complete_reservation(c);
380 388
@@ -406,7 +414,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
406 } 414 }
407 415
408 dir_f = JFFS2_INODE_INFO(dir_i); 416 dir_f = JFFS2_INODE_INFO(dir_i);
409 down(&dir_f->sem); 417 mutex_lock(&dir_f->sem);
410 418
411 rd->magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); 419 rd->magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
412 rd->nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT); 420 rd->nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
@@ -429,7 +437,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
429 as if it were the final unlink() */ 437 as if it were the final unlink() */
430 jffs2_complete_reservation(c); 438 jffs2_complete_reservation(c);
431 jffs2_free_raw_dirent(rd); 439 jffs2_free_raw_dirent(rd);
432 up(&dir_f->sem); 440 mutex_unlock(&dir_f->sem);
433 jffs2_clear_inode(inode); 441 jffs2_clear_inode(inode);
434 return PTR_ERR(fd); 442 return PTR_ERR(fd);
435 } 443 }
@@ -442,7 +450,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
442 one if necessary. */ 450 one if necessary. */
443 jffs2_add_fd_to_list(c, fd, &dir_f->dents); 451 jffs2_add_fd_to_list(c, fd, &dir_f->dents);
444 452
445 up(&dir_f->sem); 453 mutex_unlock(&dir_f->sem);
446 jffs2_complete_reservation(c); 454 jffs2_complete_reservation(c);
447 455
448 d_instantiate(dentry, inode); 456 d_instantiate(dentry, inode);
@@ -493,11 +501,14 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
493 501
494 inode->i_op = &jffs2_dir_inode_operations; 502 inode->i_op = &jffs2_dir_inode_operations;
495 inode->i_fop = &jffs2_dir_operations; 503 inode->i_fop = &jffs2_dir_operations;
496 /* Directories get nlink 2 at start */
497 inode->i_nlink = 2;
498 504
499 f = JFFS2_INODE_INFO(inode); 505 f = JFFS2_INODE_INFO(inode);
500 506
507 /* Directories get nlink 2 at start */
508 inode->i_nlink = 2;
509 /* but ic->pino_nlink is the parent ino# */
510 f->inocache->pino_nlink = dir_i->i_ino;
511
501 ri->data_crc = cpu_to_je32(0); 512 ri->data_crc = cpu_to_je32(0);
502 ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8)); 513 ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8));
503 514
@@ -507,7 +518,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
507 518
508 if (IS_ERR(fn)) { 519 if (IS_ERR(fn)) {
509 /* Eeek. Wave bye bye */ 520 /* Eeek. Wave bye bye */
510 up(&f->sem); 521 mutex_unlock(&f->sem);
511 jffs2_complete_reservation(c); 522 jffs2_complete_reservation(c);
512 jffs2_clear_inode(inode); 523 jffs2_clear_inode(inode);
513 return PTR_ERR(fn); 524 return PTR_ERR(fn);
@@ -516,7 +527,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
516 obsoleted by the first data write 527 obsoleted by the first data write
517 */ 528 */
518 f->metadata = fn; 529 f->metadata = fn;
519 up(&f->sem); 530 mutex_unlock(&f->sem);
520 531
521 jffs2_complete_reservation(c); 532 jffs2_complete_reservation(c);
522 533
@@ -548,7 +559,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
548 } 559 }
549 560
550 dir_f = JFFS2_INODE_INFO(dir_i); 561 dir_f = JFFS2_INODE_INFO(dir_i);
551 down(&dir_f->sem); 562 mutex_lock(&dir_f->sem);
552 563
553 rd->magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); 564 rd->magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
554 rd->nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT); 565 rd->nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
@@ -571,7 +582,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
571 as if it were the final unlink() */ 582 as if it were the final unlink() */
572 jffs2_complete_reservation(c); 583 jffs2_complete_reservation(c);
573 jffs2_free_raw_dirent(rd); 584 jffs2_free_raw_dirent(rd);
574 up(&dir_f->sem); 585 mutex_unlock(&dir_f->sem);
575 jffs2_clear_inode(inode); 586 jffs2_clear_inode(inode);
576 return PTR_ERR(fd); 587 return PTR_ERR(fd);
577 } 588 }
@@ -585,7 +596,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
585 one if necessary. */ 596 one if necessary. */
586 jffs2_add_fd_to_list(c, fd, &dir_f->dents); 597 jffs2_add_fd_to_list(c, fd, &dir_f->dents);
587 598
588 up(&dir_f->sem); 599 mutex_unlock(&dir_f->sem);
589 jffs2_complete_reservation(c); 600 jffs2_complete_reservation(c);
590 601
591 d_instantiate(dentry, inode); 602 d_instantiate(dentry, inode);
@@ -594,17 +605,25 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
594 605
595static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry) 606static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
596{ 607{
608 struct jffs2_sb_info *c = JFFS2_SB_INFO(dir_i->i_sb);
609 struct jffs2_inode_info *dir_f = JFFS2_INODE_INFO(dir_i);
597 struct jffs2_inode_info *f = JFFS2_INODE_INFO(dentry->d_inode); 610 struct jffs2_inode_info *f = JFFS2_INODE_INFO(dentry->d_inode);
598 struct jffs2_full_dirent *fd; 611 struct jffs2_full_dirent *fd;
599 int ret; 612 int ret;
613 uint32_t now = get_seconds();
600 614
601 for (fd = f->dents ; fd; fd = fd->next) { 615 for (fd = f->dents ; fd; fd = fd->next) {
602 if (fd->ino) 616 if (fd->ino)
603 return -ENOTEMPTY; 617 return -ENOTEMPTY;
604 } 618 }
605 ret = jffs2_unlink(dir_i, dentry); 619
606 if (!ret) 620 ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name,
621 dentry->d_name.len, f, now);
622 if (!ret) {
623 dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
624 clear_nlink(dentry->d_inode);
607 drop_nlink(dir_i); 625 drop_nlink(dir_i);
626 }
608 return ret; 627 return ret;
609} 628}
610 629
@@ -673,7 +692,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
673 692
674 if (IS_ERR(fn)) { 693 if (IS_ERR(fn)) {
675 /* Eeek. Wave bye bye */ 694 /* Eeek. Wave bye bye */
676 up(&f->sem); 695 mutex_unlock(&f->sem);
677 jffs2_complete_reservation(c); 696 jffs2_complete_reservation(c);
678 jffs2_clear_inode(inode); 697 jffs2_clear_inode(inode);
679 return PTR_ERR(fn); 698 return PTR_ERR(fn);
@@ -682,7 +701,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
682 obsoleted by the first data write 701 obsoleted by the first data write
683 */ 702 */
684 f->metadata = fn; 703 f->metadata = fn;
685 up(&f->sem); 704 mutex_unlock(&f->sem);
686 705
687 jffs2_complete_reservation(c); 706 jffs2_complete_reservation(c);
688 707
@@ -714,7 +733,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
714 } 733 }
715 734
716 dir_f = JFFS2_INODE_INFO(dir_i); 735 dir_f = JFFS2_INODE_INFO(dir_i);
717 down(&dir_f->sem); 736 mutex_lock(&dir_f->sem);
718 737
719 rd->magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); 738 rd->magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
720 rd->nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT); 739 rd->nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
@@ -740,7 +759,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
740 as if it were the final unlink() */ 759 as if it were the final unlink() */
741 jffs2_complete_reservation(c); 760 jffs2_complete_reservation(c);
742 jffs2_free_raw_dirent(rd); 761 jffs2_free_raw_dirent(rd);
743 up(&dir_f->sem); 762 mutex_unlock(&dir_f->sem);
744 jffs2_clear_inode(inode); 763 jffs2_clear_inode(inode);
745 return PTR_ERR(fd); 764 return PTR_ERR(fd);
746 } 765 }
@@ -753,7 +772,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
753 one if necessary. */ 772 one if necessary. */
754 jffs2_add_fd_to_list(c, fd, &dir_f->dents); 773 jffs2_add_fd_to_list(c, fd, &dir_f->dents);
755 774
756 up(&dir_f->sem); 775 mutex_unlock(&dir_f->sem);
757 jffs2_complete_reservation(c); 776 jffs2_complete_reservation(c);
758 777
759 d_instantiate(dentry, inode); 778 d_instantiate(dentry, inode);
@@ -780,14 +799,14 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
780 if (S_ISDIR(new_dentry->d_inode->i_mode)) { 799 if (S_ISDIR(new_dentry->d_inode->i_mode)) {
781 struct jffs2_full_dirent *fd; 800 struct jffs2_full_dirent *fd;
782 801
783 down(&victim_f->sem); 802 mutex_lock(&victim_f->sem);
784 for (fd = victim_f->dents; fd; fd = fd->next) { 803 for (fd = victim_f->dents; fd; fd = fd->next) {
785 if (fd->ino) { 804 if (fd->ino) {
786 up(&victim_f->sem); 805 mutex_unlock(&victim_f->sem);
787 return -ENOTEMPTY; 806 return -ENOTEMPTY;
788 } 807 }
789 } 808 }
790 up(&victim_f->sem); 809 mutex_unlock(&victim_f->sem);
791 } 810 }
792 } 811 }
793 812
@@ -816,9 +835,12 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
816 /* Don't oops if the victim was a dirent pointing to an 835 /* Don't oops if the victim was a dirent pointing to an
817 inode which didn't exist. */ 836 inode which didn't exist. */
818 if (victim_f->inocache) { 837 if (victim_f->inocache) {
819 down(&victim_f->sem); 838 mutex_lock(&victim_f->sem);
820 victim_f->inocache->nlink--; 839 if (S_ISDIR(new_dentry->d_inode->i_mode))
821 up(&victim_f->sem); 840 victim_f->inocache->pino_nlink = 0;
841 else
842 victim_f->inocache->pino_nlink--;
843 mutex_unlock(&victim_f->sem);
822 } 844 }
823 } 845 }
824 846
@@ -836,11 +858,11 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
836 if (ret) { 858 if (ret) {
837 /* Oh shit. We really ought to make a single node which can do both atomically */ 859 /* Oh shit. We really ought to make a single node which can do both atomically */
838 struct jffs2_inode_info *f = JFFS2_INODE_INFO(old_dentry->d_inode); 860 struct jffs2_inode_info *f = JFFS2_INODE_INFO(old_dentry->d_inode);
839 down(&f->sem); 861 mutex_lock(&f->sem);
840 inc_nlink(old_dentry->d_inode); 862 inc_nlink(old_dentry->d_inode);
841 if (f->inocache) 863 if (f->inocache && !S_ISDIR(old_dentry->d_inode->i_mode))
842 f->inocache->nlink++; 864 f->inocache->pino_nlink++;
843 up(&f->sem); 865 mutex_unlock(&f->sem);
844 866
845 printk(KERN_NOTICE "jffs2_rename(): Link succeeded, unlink failed (err %d). You now have a hard link\n", ret); 867 printk(KERN_NOTICE "jffs2_rename(): Link succeeded, unlink failed (err %d). You now have a hard link\n", ret);
846 /* Might as well let the VFS know */ 868 /* Might as well let the VFS know */
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index a1db9180633f..dddb2a6c9e2c 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -50,14 +50,14 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
50 instr = kmalloc(sizeof(struct erase_info) + sizeof(struct erase_priv_struct), GFP_KERNEL); 50 instr = kmalloc(sizeof(struct erase_info) + sizeof(struct erase_priv_struct), GFP_KERNEL);
51 if (!instr) { 51 if (!instr) {
52 printk(KERN_WARNING "kmalloc for struct erase_info in jffs2_erase_block failed. Refiling block for later\n"); 52 printk(KERN_WARNING "kmalloc for struct erase_info in jffs2_erase_block failed. Refiling block for later\n");
53 down(&c->erase_free_sem); 53 mutex_lock(&c->erase_free_sem);
54 spin_lock(&c->erase_completion_lock); 54 spin_lock(&c->erase_completion_lock);
55 list_move(&jeb->list, &c->erase_pending_list); 55 list_move(&jeb->list, &c->erase_pending_list);
56 c->erasing_size -= c->sector_size; 56 c->erasing_size -= c->sector_size;
57 c->dirty_size += c->sector_size; 57 c->dirty_size += c->sector_size;
58 jeb->dirty_size = c->sector_size; 58 jeb->dirty_size = c->sector_size;
59 spin_unlock(&c->erase_completion_lock); 59 spin_unlock(&c->erase_completion_lock);
60 up(&c->erase_free_sem); 60 mutex_unlock(&c->erase_free_sem);
61 return; 61 return;
62 } 62 }
63 63
@@ -84,14 +84,14 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
84 if (ret == -ENOMEM || ret == -EAGAIN) { 84 if (ret == -ENOMEM || ret == -EAGAIN) {
85 /* Erase failed immediately. Refile it on the list */ 85 /* Erase failed immediately. Refile it on the list */
86 D1(printk(KERN_DEBUG "Erase at 0x%08x failed: %d. Refiling on erase_pending_list\n", jeb->offset, ret)); 86 D1(printk(KERN_DEBUG "Erase at 0x%08x failed: %d. Refiling on erase_pending_list\n", jeb->offset, ret));
87 down(&c->erase_free_sem); 87 mutex_lock(&c->erase_free_sem);
88 spin_lock(&c->erase_completion_lock); 88 spin_lock(&c->erase_completion_lock);
89 list_move(&jeb->list, &c->erase_pending_list); 89 list_move(&jeb->list, &c->erase_pending_list);
90 c->erasing_size -= c->sector_size; 90 c->erasing_size -= c->sector_size;
91 c->dirty_size += c->sector_size; 91 c->dirty_size += c->sector_size;
92 jeb->dirty_size = c->sector_size; 92 jeb->dirty_size = c->sector_size;
93 spin_unlock(&c->erase_completion_lock); 93 spin_unlock(&c->erase_completion_lock);
94 up(&c->erase_free_sem); 94 mutex_unlock(&c->erase_free_sem);
95 return; 95 return;
96 } 96 }
97 97
@@ -107,7 +107,7 @@ void jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count)
107{ 107{
108 struct jffs2_eraseblock *jeb; 108 struct jffs2_eraseblock *jeb;
109 109
110 down(&c->erase_free_sem); 110 mutex_lock(&c->erase_free_sem);
111 111
112 spin_lock(&c->erase_completion_lock); 112 spin_lock(&c->erase_completion_lock);
113 113
@@ -116,9 +116,9 @@ void jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count)
116 116
117 if (!list_empty(&c->erase_complete_list)) { 117 if (!list_empty(&c->erase_complete_list)) {
118 jeb = list_entry(c->erase_complete_list.next, struct jffs2_eraseblock, list); 118 jeb = list_entry(c->erase_complete_list.next, struct jffs2_eraseblock, list);
119 list_del(&jeb->list); 119 list_move(&jeb->list, &c->erase_checking_list);
120 spin_unlock(&c->erase_completion_lock); 120 spin_unlock(&c->erase_completion_lock);
121 up(&c->erase_free_sem); 121 mutex_unlock(&c->erase_free_sem);
122 jffs2_mark_erased_block(c, jeb); 122 jffs2_mark_erased_block(c, jeb);
123 123
124 if (!--count) { 124 if (!--count) {
@@ -139,7 +139,7 @@ void jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count)
139 jffs2_free_jeb_node_refs(c, jeb); 139 jffs2_free_jeb_node_refs(c, jeb);
140 list_add(&jeb->list, &c->erasing_list); 140 list_add(&jeb->list, &c->erasing_list);
141 spin_unlock(&c->erase_completion_lock); 141 spin_unlock(&c->erase_completion_lock);
142 up(&c->erase_free_sem); 142 mutex_unlock(&c->erase_free_sem);
143 143
144 jffs2_erase_block(c, jeb); 144 jffs2_erase_block(c, jeb);
145 145
@@ -149,12 +149,12 @@ void jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count)
149 149
150 /* Be nice */ 150 /* Be nice */
151 yield(); 151 yield();
152 down(&c->erase_free_sem); 152 mutex_lock(&c->erase_free_sem);
153 spin_lock(&c->erase_completion_lock); 153 spin_lock(&c->erase_completion_lock);
154 } 154 }
155 155
156 spin_unlock(&c->erase_completion_lock); 156 spin_unlock(&c->erase_completion_lock);
157 up(&c->erase_free_sem); 157 mutex_unlock(&c->erase_free_sem);
158 done: 158 done:
159 D1(printk(KERN_DEBUG "jffs2_erase_pending_blocks completed\n")); 159 D1(printk(KERN_DEBUG "jffs2_erase_pending_blocks completed\n"));
160} 160}
@@ -162,11 +162,11 @@ void jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count)
162static void jffs2_erase_succeeded(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb) 162static void jffs2_erase_succeeded(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb)
163{ 163{
164 D1(printk(KERN_DEBUG "Erase completed successfully at 0x%08x\n", jeb->offset)); 164 D1(printk(KERN_DEBUG "Erase completed successfully at 0x%08x\n", jeb->offset));
165 down(&c->erase_free_sem); 165 mutex_lock(&c->erase_free_sem);
166 spin_lock(&c->erase_completion_lock); 166 spin_lock(&c->erase_completion_lock);
167 list_move_tail(&jeb->list, &c->erase_complete_list); 167 list_move_tail(&jeb->list, &c->erase_complete_list);
168 spin_unlock(&c->erase_completion_lock); 168 spin_unlock(&c->erase_completion_lock);
169 up(&c->erase_free_sem); 169 mutex_unlock(&c->erase_free_sem);
170 /* Ensure that kupdated calls us again to mark them clean */ 170 /* Ensure that kupdated calls us again to mark them clean */
171 jffs2_erase_pending_trigger(c); 171 jffs2_erase_pending_trigger(c);
172} 172}
@@ -180,26 +180,26 @@ static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock
180 failed too many times. */ 180 failed too many times. */
181 if (!jffs2_write_nand_badblock(c, jeb, bad_offset)) { 181 if (!jffs2_write_nand_badblock(c, jeb, bad_offset)) {
182 /* We'd like to give this block another try. */ 182 /* We'd like to give this block another try. */
183 down(&c->erase_free_sem); 183 mutex_lock(&c->erase_free_sem);
184 spin_lock(&c->erase_completion_lock); 184 spin_lock(&c->erase_completion_lock);
185 list_move(&jeb->list, &c->erase_pending_list); 185 list_move(&jeb->list, &c->erase_pending_list);
186 c->erasing_size -= c->sector_size; 186 c->erasing_size -= c->sector_size;
187 c->dirty_size += c->sector_size; 187 c->dirty_size += c->sector_size;
188 jeb->dirty_size = c->sector_size; 188 jeb->dirty_size = c->sector_size;
189 spin_unlock(&c->erase_completion_lock); 189 spin_unlock(&c->erase_completion_lock);
190 up(&c->erase_free_sem); 190 mutex_unlock(&c->erase_free_sem);
191 return; 191 return;
192 } 192 }
193 } 193 }
194 194
195 down(&c->erase_free_sem); 195 mutex_lock(&c->erase_free_sem);
196 spin_lock(&c->erase_completion_lock); 196 spin_lock(&c->erase_completion_lock);
197 c->erasing_size -= c->sector_size; 197 c->erasing_size -= c->sector_size;
198 c->bad_size += c->sector_size; 198 c->bad_size += c->sector_size;
199 list_move(&jeb->list, &c->bad_list); 199 list_move(&jeb->list, &c->bad_list);
200 c->nr_erasing_blocks--; 200 c->nr_erasing_blocks--;
201 spin_unlock(&c->erase_completion_lock); 201 spin_unlock(&c->erase_completion_lock);
202 up(&c->erase_free_sem); 202 mutex_unlock(&c->erase_free_sem);
203 wake_up(&c->erase_wait); 203 wake_up(&c->erase_wait);
204} 204}
205 205
@@ -294,7 +294,7 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c,
294 break; 294 break;
295#endif 295#endif
296 default: 296 default:
297 if (ic->nodes == (void *)ic && ic->nlink == 0) 297 if (ic->nodes == (void *)ic && ic->pino_nlink == 0)
298 jffs2_del_ino_cache(c, ic); 298 jffs2_del_ino_cache(c, ic);
299 } 299 }
300} 300}
@@ -332,7 +332,8 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
332 if (c->mtd->point) { 332 if (c->mtd->point) {
333 unsigned long *wordebuf; 333 unsigned long *wordebuf;
334 334
335 ret = c->mtd->point(c->mtd, jeb->offset, c->sector_size, &retlen, (unsigned char **)&ebuf); 335 ret = c->mtd->point(c->mtd, jeb->offset, c->sector_size,
336 &retlen, &ebuf, NULL);
336 if (ret) { 337 if (ret) {
337 D1(printk(KERN_DEBUG "MTD point failed %d\n", ret)); 338 D1(printk(KERN_DEBUG "MTD point failed %d\n", ret));
338 goto do_flash_read; 339 goto do_flash_read;
@@ -340,7 +341,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
340 if (retlen < c->sector_size) { 341 if (retlen < c->sector_size) {
341 /* Don't muck about if it won't let us point to the whole erase sector */ 342 /* Don't muck about if it won't let us point to the whole erase sector */
342 D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", retlen)); 343 D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", retlen));
343 c->mtd->unpoint(c->mtd, ebuf, jeb->offset, retlen); 344 c->mtd->unpoint(c->mtd, jeb->offset, retlen);
344 goto do_flash_read; 345 goto do_flash_read;
345 } 346 }
346 wordebuf = ebuf-sizeof(*wordebuf); 347 wordebuf = ebuf-sizeof(*wordebuf);
@@ -349,10 +350,12 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
349 if (*++wordebuf != ~0) 350 if (*++wordebuf != ~0)
350 break; 351 break;
351 } while(--retlen); 352 } while(--retlen);
352 c->mtd->unpoint(c->mtd, ebuf, jeb->offset, c->sector_size); 353 c->mtd->unpoint(c->mtd, jeb->offset, c->sector_size);
353 if (retlen) 354 if (retlen) {
354 printk(KERN_WARNING "Newly-erased block contained word 0x%lx at offset 0x%08tx\n", 355 printk(KERN_WARNING "Newly-erased block contained word 0x%lx at offset 0x%08tx\n",
355 *wordebuf, jeb->offset + c->sector_size-retlen*sizeof(*wordebuf)); 356 *wordebuf, jeb->offset + c->sector_size-retlen*sizeof(*wordebuf));
357 return -EIO;
358 }
356 return 0; 359 return 0;
357 } 360 }
358 do_flash_read: 361 do_flash_read:
@@ -373,10 +376,12 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
373 ret = c->mtd->read(c->mtd, ofs, readlen, &retlen, ebuf); 376 ret = c->mtd->read(c->mtd, ofs, readlen, &retlen, ebuf);
374 if (ret) { 377 if (ret) {
375 printk(KERN_WARNING "Read of newly-erased block at 0x%08x failed: %d. Putting on bad_list\n", ofs, ret); 378 printk(KERN_WARNING "Read of newly-erased block at 0x%08x failed: %d. Putting on bad_list\n", ofs, ret);
379 ret = -EIO;
376 goto fail; 380 goto fail;
377 } 381 }
378 if (retlen != readlen) { 382 if (retlen != readlen) {
379 printk(KERN_WARNING "Short read from newly-erased block at 0x%08x. Wanted %d, got %zd\n", ofs, readlen, retlen); 383 printk(KERN_WARNING "Short read from newly-erased block at 0x%08x. Wanted %d, got %zd\n", ofs, readlen, retlen);
384 ret = -EIO;
380 goto fail; 385 goto fail;
381 } 386 }
382 for (i=0; i<readlen; i += sizeof(unsigned long)) { 387 for (i=0; i<readlen; i += sizeof(unsigned long)) {
@@ -385,6 +390,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
385 if (*datum + 1) { 390 if (*datum + 1) {
386 *bad_offset += i; 391 *bad_offset += i;
387 printk(KERN_WARNING "Newly-erased block contained word 0x%lx at offset 0x%08x\n", *datum, *bad_offset); 392 printk(KERN_WARNING "Newly-erased block contained word 0x%lx at offset 0x%08x\n", *datum, *bad_offset);
393 ret = -EIO;
388 goto fail; 394 goto fail;
389 } 395 }
390 } 396 }
@@ -419,9 +425,6 @@ static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseb
419 if (jffs2_write_nand_cleanmarker(c, jeb)) 425 if (jffs2_write_nand_cleanmarker(c, jeb))
420 goto filebad; 426 goto filebad;
421 } 427 }
422
423 /* Everything else got zeroed before the erase */
424 jeb->free_size = c->sector_size;
425 } else { 428 } else {
426 429
427 struct kvec vecs[1]; 430 struct kvec vecs[1];
@@ -449,48 +452,50 @@ static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseb
449 452
450 goto filebad; 453 goto filebad;
451 } 454 }
452
453 /* Everything else got zeroed before the erase */
454 jeb->free_size = c->sector_size;
455 /* FIXME Special case for cleanmarker in empty block */
456 jffs2_link_node_ref(c, jeb, jeb->offset | REF_NORMAL, c->cleanmarker_size, NULL);
457 } 455 }
456 /* Everything else got zeroed before the erase */
457 jeb->free_size = c->sector_size;
458 458
459 down(&c->erase_free_sem); 459 mutex_lock(&c->erase_free_sem);
460 spin_lock(&c->erase_completion_lock); 460 spin_lock(&c->erase_completion_lock);
461
461 c->erasing_size -= c->sector_size; 462 c->erasing_size -= c->sector_size;
462 c->free_size += jeb->free_size; 463 c->free_size += c->sector_size;
463 c->used_size += jeb->used_size;
464 464
465 jffs2_dbg_acct_sanity_check_nolock(c,jeb); 465 /* Account for cleanmarker now, if it's in-band */
466 jffs2_dbg_acct_paranoia_check_nolock(c, jeb); 466 if (c->cleanmarker_size && !jffs2_cleanmarker_oob(c))
467 jffs2_link_node_ref(c, jeb, jeb->offset | REF_NORMAL, c->cleanmarker_size, NULL);
467 468
468 list_add_tail(&jeb->list, &c->free_list); 469 list_move_tail(&jeb->list, &c->free_list);
469 c->nr_erasing_blocks--; 470 c->nr_erasing_blocks--;
470 c->nr_free_blocks++; 471 c->nr_free_blocks++;
472
473 jffs2_dbg_acct_sanity_check_nolock(c, jeb);
474 jffs2_dbg_acct_paranoia_check_nolock(c, jeb);
475
471 spin_unlock(&c->erase_completion_lock); 476 spin_unlock(&c->erase_completion_lock);
472 up(&c->erase_free_sem); 477 mutex_unlock(&c->erase_free_sem);
473 wake_up(&c->erase_wait); 478 wake_up(&c->erase_wait);
474 return; 479 return;
475 480
476filebad: 481filebad:
477 down(&c->erase_free_sem); 482 mutex_lock(&c->erase_free_sem);
478 spin_lock(&c->erase_completion_lock); 483 spin_lock(&c->erase_completion_lock);
479 /* Stick it on a list (any list) so erase_failed can take it 484 /* Stick it on a list (any list) so erase_failed can take it
480 right off again. Silly, but shouldn't happen often. */ 485 right off again. Silly, but shouldn't happen often. */
481 list_add(&jeb->list, &c->erasing_list); 486 list_move(&jeb->list, &c->erasing_list);
482 spin_unlock(&c->erase_completion_lock); 487 spin_unlock(&c->erase_completion_lock);
483 up(&c->erase_free_sem); 488 mutex_unlock(&c->erase_free_sem);
484 jffs2_erase_failed(c, jeb, bad_offset); 489 jffs2_erase_failed(c, jeb, bad_offset);
485 return; 490 return;
486 491
487refile: 492refile:
488 /* Stick it back on the list from whence it came and come back later */ 493 /* Stick it back on the list from whence it came and come back later */
489 jffs2_erase_pending_trigger(c); 494 jffs2_erase_pending_trigger(c);
490 down(&c->erase_free_sem); 495 mutex_lock(&c->erase_free_sem);
491 spin_lock(&c->erase_completion_lock); 496 spin_lock(&c->erase_completion_lock);
492 list_add(&jeb->list, &c->erase_complete_list); 497 list_move(&jeb->list, &c->erase_complete_list);
493 spin_unlock(&c->erase_completion_lock); 498 spin_unlock(&c->erase_completion_lock);
494 up(&c->erase_free_sem); 499 mutex_unlock(&c->erase_free_sem);
495 return; 500 return;
496} 501}
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index dcc2734e0b5d..5e920343b2c5 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -115,9 +115,9 @@ static int jffs2_readpage (struct file *filp, struct page *pg)
115 struct jffs2_inode_info *f = JFFS2_INODE_INFO(pg->mapping->host); 115 struct jffs2_inode_info *f = JFFS2_INODE_INFO(pg->mapping->host);
116 int ret; 116 int ret;
117 117
118 down(&f->sem); 118 mutex_lock(&f->sem);
119 ret = jffs2_do_readpage_unlock(pg->mapping->host, pg); 119 ret = jffs2_do_readpage_unlock(pg->mapping->host, pg);
120 up(&f->sem); 120 mutex_unlock(&f->sem);
121 return ret; 121 return ret;
122} 122}
123 123
@@ -154,7 +154,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
154 if (ret) 154 if (ret)
155 goto out_page; 155 goto out_page;
156 156
157 down(&f->sem); 157 mutex_lock(&f->sem);
158 memset(&ri, 0, sizeof(ri)); 158 memset(&ri, 0, sizeof(ri));
159 159
160 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); 160 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
@@ -181,7 +181,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
181 if (IS_ERR(fn)) { 181 if (IS_ERR(fn)) {
182 ret = PTR_ERR(fn); 182 ret = PTR_ERR(fn);
183 jffs2_complete_reservation(c); 183 jffs2_complete_reservation(c);
184 up(&f->sem); 184 mutex_unlock(&f->sem);
185 goto out_page; 185 goto out_page;
186 } 186 }
187 ret = jffs2_add_full_dnode_to_inode(c, f, fn); 187 ret = jffs2_add_full_dnode_to_inode(c, f, fn);
@@ -195,12 +195,12 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
195 jffs2_mark_node_obsolete(c, fn->raw); 195 jffs2_mark_node_obsolete(c, fn->raw);
196 jffs2_free_full_dnode(fn); 196 jffs2_free_full_dnode(fn);
197 jffs2_complete_reservation(c); 197 jffs2_complete_reservation(c);
198 up(&f->sem); 198 mutex_unlock(&f->sem);
199 goto out_page; 199 goto out_page;
200 } 200 }
201 jffs2_complete_reservation(c); 201 jffs2_complete_reservation(c);
202 inode->i_size = pageofs; 202 inode->i_size = pageofs;
203 up(&f->sem); 203 mutex_unlock(&f->sem);
204 } 204 }
205 205
206 /* 206 /*
@@ -209,9 +209,9 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
209 * case of a short-copy. 209 * case of a short-copy.
210 */ 210 */
211 if (!PageUptodate(pg)) { 211 if (!PageUptodate(pg)) {
212 down(&f->sem); 212 mutex_lock(&f->sem);
213 ret = jffs2_do_readpage_nolock(inode, pg); 213 ret = jffs2_do_readpage_nolock(inode, pg);
214 up(&f->sem); 214 mutex_unlock(&f->sem);
215 if (ret) 215 if (ret)
216 goto out_page; 216 goto out_page;
217 } 217 }
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index e26ea78c7892..086c43830221 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -36,6 +36,7 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
36 unsigned int ivalid; 36 unsigned int ivalid;
37 uint32_t alloclen; 37 uint32_t alloclen;
38 int ret; 38 int ret;
39 int alloc_type = ALLOC_NORMAL;
39 40
40 D1(printk(KERN_DEBUG "jffs2_setattr(): ino #%lu\n", inode->i_ino)); 41 D1(printk(KERN_DEBUG "jffs2_setattr(): ino #%lu\n", inode->i_ino));
41 42
@@ -50,20 +51,20 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
50 mdata = (char *)&dev; 51 mdata = (char *)&dev;
51 D1(printk(KERN_DEBUG "jffs2_setattr(): Writing %d bytes of kdev_t\n", mdatalen)); 52 D1(printk(KERN_DEBUG "jffs2_setattr(): Writing %d bytes of kdev_t\n", mdatalen));
52 } else if (S_ISLNK(inode->i_mode)) { 53 } else if (S_ISLNK(inode->i_mode)) {
53 down(&f->sem); 54 mutex_lock(&f->sem);
54 mdatalen = f->metadata->size; 55 mdatalen = f->metadata->size;
55 mdata = kmalloc(f->metadata->size, GFP_USER); 56 mdata = kmalloc(f->metadata->size, GFP_USER);
56 if (!mdata) { 57 if (!mdata) {
57 up(&f->sem); 58 mutex_unlock(&f->sem);
58 return -ENOMEM; 59 return -ENOMEM;
59 } 60 }
60 ret = jffs2_read_dnode(c, f, f->metadata, mdata, 0, mdatalen); 61 ret = jffs2_read_dnode(c, f, f->metadata, mdata, 0, mdatalen);
61 if (ret) { 62 if (ret) {
62 up(&f->sem); 63 mutex_unlock(&f->sem);
63 kfree(mdata); 64 kfree(mdata);
64 return ret; 65 return ret;
65 } 66 }
66 up(&f->sem); 67 mutex_unlock(&f->sem);
67 D1(printk(KERN_DEBUG "jffs2_setattr(): Writing %d bytes of symlink target\n", mdatalen)); 68 D1(printk(KERN_DEBUG "jffs2_setattr(): Writing %d bytes of symlink target\n", mdatalen));
68 } 69 }
69 70
@@ -82,7 +83,7 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
82 kfree(mdata); 83 kfree(mdata);
83 return ret; 84 return ret;
84 } 85 }
85 down(&f->sem); 86 mutex_lock(&f->sem);
86 ivalid = iattr->ia_valid; 87 ivalid = iattr->ia_valid;
87 88
88 ri->magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); 89 ri->magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
@@ -115,6 +116,10 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
115 ri->compr = JFFS2_COMPR_ZERO; 116 ri->compr = JFFS2_COMPR_ZERO;
116 ri->dsize = cpu_to_je32(iattr->ia_size - inode->i_size); 117 ri->dsize = cpu_to_je32(iattr->ia_size - inode->i_size);
117 ri->offset = cpu_to_je32(inode->i_size); 118 ri->offset = cpu_to_je32(inode->i_size);
119 } else if (ivalid & ATTR_SIZE && !iattr->ia_size) {
120 /* For truncate-to-zero, treat it as deletion because
121 it'll always be obsoleting all previous nodes */
122 alloc_type = ALLOC_DELETION;
118 } 123 }
119 ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8)); 124 ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8));
120 if (mdatalen) 125 if (mdatalen)
@@ -122,14 +127,14 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
122 else 127 else
123 ri->data_crc = cpu_to_je32(0); 128 ri->data_crc = cpu_to_je32(0);
124 129
125 new_metadata = jffs2_write_dnode(c, f, ri, mdata, mdatalen, ALLOC_NORMAL); 130 new_metadata = jffs2_write_dnode(c, f, ri, mdata, mdatalen, alloc_type);
126 if (S_ISLNK(inode->i_mode)) 131 if (S_ISLNK(inode->i_mode))
127 kfree(mdata); 132 kfree(mdata);
128 133
129 if (IS_ERR(new_metadata)) { 134 if (IS_ERR(new_metadata)) {
130 jffs2_complete_reservation(c); 135 jffs2_complete_reservation(c);
131 jffs2_free_raw_inode(ri); 136 jffs2_free_raw_inode(ri);
132 up(&f->sem); 137 mutex_unlock(&f->sem);
133 return PTR_ERR(new_metadata); 138 return PTR_ERR(new_metadata);
134 } 139 }
135 /* It worked. Update the inode */ 140 /* It worked. Update the inode */
@@ -149,6 +154,7 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
149 if (ivalid & ATTR_SIZE && inode->i_size < iattr->ia_size) { 154 if (ivalid & ATTR_SIZE && inode->i_size < iattr->ia_size) {
150 jffs2_add_full_dnode_to_inode(c, f, new_metadata); 155 jffs2_add_full_dnode_to_inode(c, f, new_metadata);
151 inode->i_size = iattr->ia_size; 156 inode->i_size = iattr->ia_size;
157 inode->i_blocks = (inode->i_size + 511) >> 9;
152 f->metadata = NULL; 158 f->metadata = NULL;
153 } else { 159 } else {
154 f->metadata = new_metadata; 160 f->metadata = new_metadata;
@@ -159,7 +165,7 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
159 } 165 }
160 jffs2_free_raw_inode(ri); 166 jffs2_free_raw_inode(ri);
161 167
162 up(&f->sem); 168 mutex_unlock(&f->sem);
163 jffs2_complete_reservation(c); 169 jffs2_complete_reservation(c);
164 170
165 /* We have to do the vmtruncate() without f->sem held, since 171 /* We have to do the vmtruncate() without f->sem held, since
@@ -167,8 +173,10 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
167 We are protected from a simultaneous write() extending i_size 173 We are protected from a simultaneous write() extending i_size
168 back past iattr->ia_size, because do_truncate() holds the 174 back past iattr->ia_size, because do_truncate() holds the
169 generic inode semaphore. */ 175 generic inode semaphore. */
170 if (ivalid & ATTR_SIZE && inode->i_size > iattr->ia_size) 176 if (ivalid & ATTR_SIZE && inode->i_size > iattr->ia_size) {
171 vmtruncate(inode, iattr->ia_size); 177 vmtruncate(inode, iattr->ia_size);
178 inode->i_blocks = (inode->i_size + 511) >> 9;
179 }
172 180
173 return 0; 181 return 0;
174} 182}
@@ -248,12 +256,12 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
248 c = JFFS2_SB_INFO(inode->i_sb); 256 c = JFFS2_SB_INFO(inode->i_sb);
249 257
250 jffs2_init_inode_info(f); 258 jffs2_init_inode_info(f);
251 down(&f->sem); 259 mutex_lock(&f->sem);
252 260
253 ret = jffs2_do_read_inode(c, f, inode->i_ino, &latest_node); 261 ret = jffs2_do_read_inode(c, f, inode->i_ino, &latest_node);
254 262
255 if (ret) { 263 if (ret) {
256 up(&f->sem); 264 mutex_unlock(&f->sem);
257 iget_failed(inode); 265 iget_failed(inode);
258 return ERR_PTR(ret); 266 return ERR_PTR(ret);
259 } 267 }
@@ -265,7 +273,7 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
265 inode->i_mtime = ITIME(je32_to_cpu(latest_node.mtime)); 273 inode->i_mtime = ITIME(je32_to_cpu(latest_node.mtime));
266 inode->i_ctime = ITIME(je32_to_cpu(latest_node.ctime)); 274 inode->i_ctime = ITIME(je32_to_cpu(latest_node.ctime));
267 275
268 inode->i_nlink = f->inocache->nlink; 276 inode->i_nlink = f->inocache->pino_nlink;
269 277
270 inode->i_blocks = (inode->i_size + 511) >> 9; 278 inode->i_blocks = (inode->i_size + 511) >> 9;
271 279
@@ -278,13 +286,12 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
278 case S_IFDIR: 286 case S_IFDIR:
279 { 287 {
280 struct jffs2_full_dirent *fd; 288 struct jffs2_full_dirent *fd;
289 inode->i_nlink = 2; /* parent and '.' */
281 290
282 for (fd=f->dents; fd; fd = fd->next) { 291 for (fd=f->dents; fd; fd = fd->next) {
283 if (fd->type == DT_DIR && fd->ino) 292 if (fd->type == DT_DIR && fd->ino)
284 inc_nlink(inode); 293 inc_nlink(inode);
285 } 294 }
286 /* and '..' */
287 inc_nlink(inode);
288 /* Root dir gets i_nlink 3 for some reason */ 295 /* Root dir gets i_nlink 3 for some reason */
289 if (inode->i_ino == 1) 296 if (inode->i_ino == 1)
290 inc_nlink(inode); 297 inc_nlink(inode);
@@ -330,7 +337,7 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
330 printk(KERN_WARNING "jffs2_read_inode(): Bogus imode %o for ino %lu\n", inode->i_mode, (unsigned long)inode->i_ino); 337 printk(KERN_WARNING "jffs2_read_inode(): Bogus imode %o for ino %lu\n", inode->i_mode, (unsigned long)inode->i_ino);
331 } 338 }
332 339
333 up(&f->sem); 340 mutex_unlock(&f->sem);
334 341
335 D1(printk(KERN_DEBUG "jffs2_read_inode() returning\n")); 342 D1(printk(KERN_DEBUG "jffs2_read_inode() returning\n"));
336 unlock_new_inode(inode); 343 unlock_new_inode(inode);
@@ -339,7 +346,7 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
339error_io: 346error_io:
340 ret = -EIO; 347 ret = -EIO;
341error: 348error:
342 up(&f->sem); 349 mutex_unlock(&f->sem);
343 jffs2_do_clear_inode(c, f); 350 jffs2_do_clear_inode(c, f);
344 iget_failed(inode); 351 iget_failed(inode);
345 return ERR_PTR(ret); 352 return ERR_PTR(ret);
@@ -380,9 +387,9 @@ int jffs2_remount_fs (struct super_block *sb, int *flags, char *data)
380 Flush the writebuffer, if neccecary, else we loose it */ 387 Flush the writebuffer, if neccecary, else we loose it */
381 if (!(sb->s_flags & MS_RDONLY)) { 388 if (!(sb->s_flags & MS_RDONLY)) {
382 jffs2_stop_garbage_collect_thread(c); 389 jffs2_stop_garbage_collect_thread(c);
383 down(&c->alloc_sem); 390 mutex_lock(&c->alloc_sem);
384 jffs2_flush_wbuf_pad(c); 391 jffs2_flush_wbuf_pad(c);
385 up(&c->alloc_sem); 392 mutex_unlock(&c->alloc_sem);
386 } 393 }
387 394
388 if (!(*flags & MS_RDONLY)) 395 if (!(*flags & MS_RDONLY))
@@ -429,7 +436,7 @@ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_i
429 436
430 f = JFFS2_INODE_INFO(inode); 437 f = JFFS2_INODE_INFO(inode);
431 jffs2_init_inode_info(f); 438 jffs2_init_inode_info(f);
432 down(&f->sem); 439 mutex_lock(&f->sem);
433 440
434 memset(ri, 0, sizeof(*ri)); 441 memset(ri, 0, sizeof(*ri));
435 /* Set OS-specific defaults for new inodes */ 442 /* Set OS-specific defaults for new inodes */
@@ -578,11 +585,12 @@ void jffs2_gc_release_inode(struct jffs2_sb_info *c,
578} 585}
579 586
580struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c, 587struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c,
581 int inum, int nlink) 588 int inum, int unlinked)
582{ 589{
583 struct inode *inode; 590 struct inode *inode;
584 struct jffs2_inode_cache *ic; 591 struct jffs2_inode_cache *ic;
585 if (!nlink) { 592
593 if (unlinked) {
586 /* The inode has zero nlink but its nodes weren't yet marked 594 /* The inode has zero nlink but its nodes weren't yet marked
587 obsolete. This has to be because we're still waiting for 595 obsolete. This has to be because we're still waiting for
588 the final (close() and) iput() to happen. 596 the final (close() and) iput() to happen.
@@ -630,8 +638,8 @@ struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c,
630 return ERR_CAST(inode); 638 return ERR_CAST(inode);
631 } 639 }
632 if (is_bad_inode(inode)) { 640 if (is_bad_inode(inode)) {
633 printk(KERN_NOTICE "Eep. read_inode() failed for ino #%u. nlink %d\n", 641 printk(KERN_NOTICE "Eep. read_inode() failed for ino #%u. unlinked %d\n",
634 inum, nlink); 642 inum, unlinked);
635 /* NB. This will happen again. We need to do something appropriate here. */ 643 /* NB. This will happen again. We need to do something appropriate here. */
636 iput(inode); 644 iput(inode);
637 return ERR_PTR(-EIO); 645 return ERR_PTR(-EIO);
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index 32ff0373aa04..090c556ffed2 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -126,7 +126,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
126 int ret = 0, inum, nlink; 126 int ret = 0, inum, nlink;
127 int xattr = 0; 127 int xattr = 0;
128 128
129 if (down_interruptible(&c->alloc_sem)) 129 if (mutex_lock_interruptible(&c->alloc_sem))
130 return -EINTR; 130 return -EINTR;
131 131
132 for (;;) { 132 for (;;) {
@@ -143,7 +143,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
143 c->unchecked_size); 143 c->unchecked_size);
144 jffs2_dbg_dump_block_lists_nolock(c); 144 jffs2_dbg_dump_block_lists_nolock(c);
145 spin_unlock(&c->erase_completion_lock); 145 spin_unlock(&c->erase_completion_lock);
146 up(&c->alloc_sem); 146 mutex_unlock(&c->alloc_sem);
147 return -ENOSPC; 147 return -ENOSPC;
148 } 148 }
149 149
@@ -161,8 +161,8 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
161 continue; 161 continue;
162 } 162 }
163 163
164 if (!ic->nlink) { 164 if (!ic->pino_nlink) {
165 D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n", 165 D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink/pino zero\n",
166 ic->ino)); 166 ic->ino));
167 spin_unlock(&c->inocache_lock); 167 spin_unlock(&c->inocache_lock);
168 jffs2_xattr_delete_inode(c, ic); 168 jffs2_xattr_delete_inode(c, ic);
@@ -190,7 +190,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
190 made no progress in this case, but that should be OK */ 190 made no progress in this case, but that should be OK */
191 c->checked_ino--; 191 c->checked_ino--;
192 192
193 up(&c->alloc_sem); 193 mutex_unlock(&c->alloc_sem);
194 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock); 194 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
195 return 0; 195 return 0;
196 196
@@ -210,7 +210,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
210 printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino); 210 printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino);
211 211
212 jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT); 212 jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
213 up(&c->alloc_sem); 213 mutex_unlock(&c->alloc_sem);
214 return ret; 214 return ret;
215 } 215 }
216 216
@@ -221,9 +221,15 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
221 jeb = jffs2_find_gc_block(c); 221 jeb = jffs2_find_gc_block(c);
222 222
223 if (!jeb) { 223 if (!jeb) {
224 D1 (printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n")); 224 /* Couldn't find a free block. But maybe we can just erase one and make 'progress'? */
225 if (!list_empty(&c->erase_pending_list)) {
226 spin_unlock(&c->erase_completion_lock);
227 mutex_unlock(&c->alloc_sem);
228 return -EAGAIN;
229 }
230 D1(printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n"));
225 spin_unlock(&c->erase_completion_lock); 231 spin_unlock(&c->erase_completion_lock);
226 up(&c->alloc_sem); 232 mutex_unlock(&c->alloc_sem);
227 return -EIO; 233 return -EIO;
228 } 234 }
229 235
@@ -232,7 +238,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
232 printk(KERN_DEBUG "Nextblock at %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size)); 238 printk(KERN_DEBUG "Nextblock at %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
233 239
234 if (!jeb->used_size) { 240 if (!jeb->used_size) {
235 up(&c->alloc_sem); 241 mutex_unlock(&c->alloc_sem);
236 goto eraseit; 242 goto eraseit;
237 } 243 }
238 244
@@ -248,7 +254,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
248 jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size); 254 jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size);
249 jeb->gc_node = raw; 255 jeb->gc_node = raw;
250 spin_unlock(&c->erase_completion_lock); 256 spin_unlock(&c->erase_completion_lock);
251 up(&c->alloc_sem); 257 mutex_unlock(&c->alloc_sem);
252 BUG(); 258 BUG();
253 } 259 }
254 } 260 }
@@ -266,7 +272,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
266 /* Just mark it obsolete */ 272 /* Just mark it obsolete */
267 jffs2_mark_node_obsolete(c, raw); 273 jffs2_mark_node_obsolete(c, raw);
268 } 274 }
269 up(&c->alloc_sem); 275 mutex_unlock(&c->alloc_sem);
270 goto eraseit_lock; 276 goto eraseit_lock;
271 } 277 }
272 278
@@ -334,7 +340,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
334 */ 340 */
335 printk(KERN_CRIT "Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n", 341 printk(KERN_CRIT "Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
336 ic->ino, ic->state); 342 ic->ino, ic->state);
337 up(&c->alloc_sem); 343 mutex_unlock(&c->alloc_sem);
338 spin_unlock(&c->inocache_lock); 344 spin_unlock(&c->inocache_lock);
339 BUG(); 345 BUG();
340 346
@@ -345,7 +351,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
345 the alloc_sem() (for marking nodes invalid) so we must 351 the alloc_sem() (for marking nodes invalid) so we must
346 drop the alloc_sem before sleeping. */ 352 drop the alloc_sem before sleeping. */
347 353
348 up(&c->alloc_sem); 354 mutex_unlock(&c->alloc_sem);
349 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n", 355 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n",
350 ic->ino, ic->state)); 356 ic->ino, ic->state));
351 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock); 357 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
@@ -392,10 +398,10 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
392 it's vaguely possible. */ 398 it's vaguely possible. */
393 399
394 inum = ic->ino; 400 inum = ic->ino;
395 nlink = ic->nlink; 401 nlink = ic->pino_nlink;
396 spin_unlock(&c->inocache_lock); 402 spin_unlock(&c->inocache_lock);
397 403
398 f = jffs2_gc_fetch_inode(c, inum, nlink); 404 f = jffs2_gc_fetch_inode(c, inum, !nlink);
399 if (IS_ERR(f)) { 405 if (IS_ERR(f)) {
400 ret = PTR_ERR(f); 406 ret = PTR_ERR(f);
401 goto release_sem; 407 goto release_sem;
@@ -416,7 +422,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
416 ret = -ENOSPC; 422 ret = -ENOSPC;
417 } 423 }
418 release_sem: 424 release_sem:
419 up(&c->alloc_sem); 425 mutex_unlock(&c->alloc_sem);
420 426
421 eraseit_lock: 427 eraseit_lock:
422 /* If we've finished this block, start it erasing */ 428 /* If we've finished this block, start it erasing */
@@ -445,7 +451,7 @@ static int jffs2_garbage_collect_live(struct jffs2_sb_info *c, struct jffs2_era
445 uint32_t start = 0, end = 0, nrfrags = 0; 451 uint32_t start = 0, end = 0, nrfrags = 0;
446 int ret = 0; 452 int ret = 0;
447 453
448 down(&f->sem); 454 mutex_lock(&f->sem);
449 455
450 /* Now we have the lock for this inode. Check that it's still the one at the head 456 /* Now we have the lock for this inode. Check that it's still the one at the head
451 of the list. */ 457 of the list. */
@@ -525,7 +531,7 @@ static int jffs2_garbage_collect_live(struct jffs2_sb_info *c, struct jffs2_era
525 } 531 }
526 } 532 }
527 upnout: 533 upnout:
528 up(&f->sem); 534 mutex_unlock(&f->sem);
529 535
530 return ret; 536 return ret;
531} 537}
@@ -846,7 +852,7 @@ static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct
846 /* Prevent the erase code from nicking the obsolete node refs while 852 /* Prevent the erase code from nicking the obsolete node refs while
847 we're looking at them. I really don't like this extra lock but 853 we're looking at them. I really don't like this extra lock but
848 can't see any alternative. Suggestions on a postcard to... */ 854 can't see any alternative. Suggestions on a postcard to... */
849 down(&c->erase_free_sem); 855 mutex_lock(&c->erase_free_sem);
850 856
851 for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) { 857 for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
852 858
@@ -899,7 +905,7 @@ static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct
899 /* OK. The name really does match. There really is still an older node on 905 /* OK. The name really does match. There really is still an older node on
900 the flash which our deletion dirent obsoletes. So we have to write out 906 the flash which our deletion dirent obsoletes. So we have to write out
901 a new deletion dirent to replace it */ 907 a new deletion dirent to replace it */
902 up(&c->erase_free_sem); 908 mutex_unlock(&c->erase_free_sem);
903 909
904 D1(printk(KERN_DEBUG "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n", 910 D1(printk(KERN_DEBUG "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
905 ref_offset(fd->raw), fd->name, ref_offset(raw), je32_to_cpu(rd->ino))); 911 ref_offset(fd->raw), fd->name, ref_offset(raw), je32_to_cpu(rd->ino)));
@@ -908,7 +914,7 @@ static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct
908 return jffs2_garbage_collect_dirent(c, jeb, f, fd); 914 return jffs2_garbage_collect_dirent(c, jeb, f, fd);
909 } 915 }
910 916
911 up(&c->erase_free_sem); 917 mutex_unlock(&c->erase_free_sem);
912 kfree(rd); 918 kfree(rd);
913 } 919 }
914 920
@@ -1081,7 +1087,7 @@ static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eras
1081 return 0; 1087 return 0;
1082} 1088}
1083 1089
1084static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, 1090static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *orig_jeb,
1085 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn, 1091 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1086 uint32_t start, uint32_t end) 1092 uint32_t start, uint32_t end)
1087{ 1093{
diff --git a/fs/jffs2/ioctl.c b/fs/jffs2/ioctl.c
index f4d525b0ea53..e2177210f621 100644
--- a/fs/jffs2/ioctl.c
+++ b/fs/jffs2/ioctl.c
@@ -10,6 +10,7 @@
10 */ 10 */
11 11
12#include <linux/fs.h> 12#include <linux/fs.h>
13#include "nodelist.h"
13 14
14int jffs2_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, 15int jffs2_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
15 unsigned long arg) 16 unsigned long arg)
diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h
index a841f4973a74..31559f45fdde 100644
--- a/fs/jffs2/jffs2_fs_i.h
+++ b/fs/jffs2/jffs2_fs_i.h
@@ -15,7 +15,7 @@
15#include <linux/version.h> 15#include <linux/version.h>
16#include <linux/rbtree.h> 16#include <linux/rbtree.h>
17#include <linux/posix_acl.h> 17#include <linux/posix_acl.h>
18#include <linux/semaphore.h> 18#include <linux/mutex.h>
19 19
20struct jffs2_inode_info { 20struct jffs2_inode_info {
21 /* We need an internal mutex similar to inode->i_mutex. 21 /* We need an internal mutex similar to inode->i_mutex.
@@ -24,7 +24,7 @@ struct jffs2_inode_info {
24 before letting GC proceed. Or we'd have to put ugliness 24 before letting GC proceed. Or we'd have to put ugliness
25 into the GC code so it didn't attempt to obtain the i_mutex 25 into the GC code so it didn't attempt to obtain the i_mutex
26 for the inode(s) which are already locked */ 26 for the inode(s) which are already locked */
27 struct semaphore sem; 27 struct mutex sem;
28 28
29 /* The highest (datanode) version number used for this ino */ 29 /* The highest (datanode) version number used for this ino */
30 uint32_t highest_version; 30 uint32_t highest_version;
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h
index 18fca2b9e531..85ef6dbb1be7 100644
--- a/fs/jffs2/jffs2_fs_sb.h
+++ b/fs/jffs2/jffs2_fs_sb.h
@@ -16,7 +16,7 @@
16#include <linux/spinlock.h> 16#include <linux/spinlock.h>
17#include <linux/workqueue.h> 17#include <linux/workqueue.h>
18#include <linux/completion.h> 18#include <linux/completion.h>
19#include <linux/semaphore.h> 19#include <linux/mutex.h>
20#include <linux/timer.h> 20#include <linux/timer.h>
21#include <linux/wait.h> 21#include <linux/wait.h>
22#include <linux/list.h> 22#include <linux/list.h>
@@ -44,7 +44,7 @@ struct jffs2_sb_info {
44 struct completion gc_thread_start; /* GC thread start completion */ 44 struct completion gc_thread_start; /* GC thread start completion */
45 struct completion gc_thread_exit; /* GC thread exit completion port */ 45 struct completion gc_thread_exit; /* GC thread exit completion port */
46 46
47 struct semaphore alloc_sem; /* Used to protect all the following 47 struct mutex alloc_sem; /* Used to protect all the following
48 fields, and also to protect against 48 fields, and also to protect against
49 out-of-order writing of nodes. And GC. */ 49 out-of-order writing of nodes. And GC. */
50 uint32_t cleanmarker_size; /* Size of an _inline_ CLEANMARKER 50 uint32_t cleanmarker_size; /* Size of an _inline_ CLEANMARKER
@@ -87,6 +87,7 @@ struct jffs2_sb_info {
87 struct list_head erasable_list; /* Blocks which are completely dirty, and need erasing */ 87 struct list_head erasable_list; /* Blocks which are completely dirty, and need erasing */
88 struct list_head erasable_pending_wbuf_list; /* Blocks which need erasing but only after the current wbuf is flushed */ 88 struct list_head erasable_pending_wbuf_list; /* Blocks which need erasing but only after the current wbuf is flushed */
89 struct list_head erasing_list; /* Blocks which are currently erasing */ 89 struct list_head erasing_list; /* Blocks which are currently erasing */
90 struct list_head erase_checking_list; /* Blocks which are being checked and marked */
90 struct list_head erase_pending_list; /* Blocks which need erasing now */ 91 struct list_head erase_pending_list; /* Blocks which need erasing now */
91 struct list_head erase_complete_list; /* Blocks which are erased and need the clean marker written to them */ 92 struct list_head erase_complete_list; /* Blocks which are erased and need the clean marker written to them */
92 struct list_head free_list; /* Blocks which are free and ready to be used */ 93 struct list_head free_list; /* Blocks which are free and ready to be used */
@@ -104,7 +105,7 @@ struct jffs2_sb_info {
104 /* Sem to allow jffs2_garbage_collect_deletion_dirent to 105 /* Sem to allow jffs2_garbage_collect_deletion_dirent to
105 drop the erase_completion_lock while it's holding a pointer 106 drop the erase_completion_lock while it's holding a pointer
106 to an obsoleted node. I don't like this. Alternatives welcomed. */ 107 to an obsoleted node. I don't like this. Alternatives welcomed. */
107 struct semaphore erase_free_sem; 108 struct mutex erase_free_sem;
108 109
109 uint32_t wbuf_pagesize; /* 0 for NOR and other flashes with no wbuf */ 110 uint32_t wbuf_pagesize; /* 0 for NOR and other flashes with no wbuf */
110 111
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index ec1aae9e695e..1750445556c3 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -87,7 +87,7 @@ struct jffs2_raw_node_ref
87 xattr_ref or xattr_datum instead. The common part of those structures 87 xattr_ref or xattr_datum instead. The common part of those structures
88 has NULL in the first word. See jffs2_raw_ref_to_ic() below */ 88 has NULL in the first word. See jffs2_raw_ref_to_ic() below */
89 uint32_t flash_offset; 89 uint32_t flash_offset;
90#define TEST_TOTLEN 90#undef TEST_TOTLEN
91#ifdef TEST_TOTLEN 91#ifdef TEST_TOTLEN
92 uint32_t __totlen; /* This may die; use ref_totlen(c, jeb, ) below */ 92 uint32_t __totlen; /* This may die; use ref_totlen(c, jeb, ) below */
93#endif 93#endif
@@ -177,7 +177,10 @@ struct jffs2_inode_cache {
177#ifdef CONFIG_JFFS2_FS_XATTR 177#ifdef CONFIG_JFFS2_FS_XATTR
178 struct jffs2_xattr_ref *xref; 178 struct jffs2_xattr_ref *xref;
179#endif 179#endif
180 int nlink; 180 uint32_t pino_nlink; /* Directories store parent inode
181 here; other inodes store nlink.
182 Zero always means that it's
183 completely unlinked. */
181}; 184};
182 185
183/* Inode states for 'state' above. We need the 'GC' state to prevent 186/* Inode states for 'state' above. We need the 'GC' state to prevent
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index a0313fa8748e..a9bf9603c1ba 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -48,7 +48,7 @@ int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
48 minsize = PAD(minsize); 48 minsize = PAD(minsize);
49 49
50 D1(printk(KERN_DEBUG "jffs2_reserve_space(): Requested 0x%x bytes\n", minsize)); 50 D1(printk(KERN_DEBUG "jffs2_reserve_space(): Requested 0x%x bytes\n", minsize));
51 down(&c->alloc_sem); 51 mutex_lock(&c->alloc_sem);
52 52
53 D1(printk(KERN_DEBUG "jffs2_reserve_space(): alloc sem got\n")); 53 D1(printk(KERN_DEBUG "jffs2_reserve_space(): alloc sem got\n"));
54 54
@@ -57,7 +57,6 @@ int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
57 /* this needs a little more thought (true <tglx> :)) */ 57 /* this needs a little more thought (true <tglx> :)) */
58 while(ret == -EAGAIN) { 58 while(ret == -EAGAIN) {
59 while(c->nr_free_blocks + c->nr_erasing_blocks < blocksneeded) { 59 while(c->nr_free_blocks + c->nr_erasing_blocks < blocksneeded) {
60 int ret;
61 uint32_t dirty, avail; 60 uint32_t dirty, avail;
62 61
63 /* calculate real dirty size 62 /* calculate real dirty size
@@ -82,7 +81,7 @@ int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
82 dirty, c->unchecked_size, c->sector_size)); 81 dirty, c->unchecked_size, c->sector_size));
83 82
84 spin_unlock(&c->erase_completion_lock); 83 spin_unlock(&c->erase_completion_lock);
85 up(&c->alloc_sem); 84 mutex_unlock(&c->alloc_sem);
86 return -ENOSPC; 85 return -ENOSPC;
87 } 86 }
88 87
@@ -105,11 +104,11 @@ int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
105 D1(printk(KERN_DEBUG "max. available size 0x%08x < blocksneeded * sector_size 0x%08x, returning -ENOSPC\n", 104 D1(printk(KERN_DEBUG "max. available size 0x%08x < blocksneeded * sector_size 0x%08x, returning -ENOSPC\n",
106 avail, blocksneeded * c->sector_size)); 105 avail, blocksneeded * c->sector_size));
107 spin_unlock(&c->erase_completion_lock); 106 spin_unlock(&c->erase_completion_lock);
108 up(&c->alloc_sem); 107 mutex_unlock(&c->alloc_sem);
109 return -ENOSPC; 108 return -ENOSPC;
110 } 109 }
111 110
112 up(&c->alloc_sem); 111 mutex_unlock(&c->alloc_sem);
113 112
114 D1(printk(KERN_DEBUG "Triggering GC pass. nr_free_blocks %d, nr_erasing_blocks %d, free_size 0x%08x, dirty_size 0x%08x, wasted_size 0x%08x, used_size 0x%08x, erasing_size 0x%08x, bad_size 0x%08x (total 0x%08x of 0x%08x)\n", 113 D1(printk(KERN_DEBUG "Triggering GC pass. nr_free_blocks %d, nr_erasing_blocks %d, free_size 0x%08x, dirty_size 0x%08x, wasted_size 0x%08x, used_size 0x%08x, erasing_size 0x%08x, bad_size 0x%08x (total 0x%08x of 0x%08x)\n",
115 c->nr_free_blocks, c->nr_erasing_blocks, c->free_size, c->dirty_size, c->wasted_size, c->used_size, c->erasing_size, c->bad_size, 114 c->nr_free_blocks, c->nr_erasing_blocks, c->free_size, c->dirty_size, c->wasted_size, c->used_size, c->erasing_size, c->bad_size,
@@ -117,7 +116,10 @@ int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
117 spin_unlock(&c->erase_completion_lock); 116 spin_unlock(&c->erase_completion_lock);
118 117
119 ret = jffs2_garbage_collect_pass(c); 118 ret = jffs2_garbage_collect_pass(c);
120 if (ret) 119
120 if (ret == -EAGAIN)
121 jffs2_erase_pending_blocks(c, 1);
122 else if (ret)
121 return ret; 123 return ret;
122 124
123 cond_resched(); 125 cond_resched();
@@ -125,7 +127,7 @@ int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
125 if (signal_pending(current)) 127 if (signal_pending(current))
126 return -EINTR; 128 return -EINTR;
127 129
128 down(&c->alloc_sem); 130 mutex_lock(&c->alloc_sem);
129 spin_lock(&c->erase_completion_lock); 131 spin_lock(&c->erase_completion_lock);
130 } 132 }
131 133
@@ -138,7 +140,7 @@ int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
138 if (!ret) 140 if (!ret)
139 ret = jffs2_prealloc_raw_node_refs(c, c->nextblock, 1); 141 ret = jffs2_prealloc_raw_node_refs(c, c->nextblock, 1);
140 if (ret) 142 if (ret)
141 up(&c->alloc_sem); 143 mutex_unlock(&c->alloc_sem);
142 return ret; 144 return ret;
143} 145}
144 146
@@ -463,7 +465,7 @@ void jffs2_complete_reservation(struct jffs2_sb_info *c)
463{ 465{
464 D1(printk(KERN_DEBUG "jffs2_complete_reservation()\n")); 466 D1(printk(KERN_DEBUG "jffs2_complete_reservation()\n"));
465 jffs2_garbage_collect_trigger(c); 467 jffs2_garbage_collect_trigger(c);
466 up(&c->alloc_sem); 468 mutex_unlock(&c->alloc_sem);
467} 469}
468 470
469static inline int on_list(struct list_head *obj, struct list_head *head) 471static inline int on_list(struct list_head *obj, struct list_head *head)
@@ -512,7 +514,7 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
512 any jffs2_raw_node_refs. So we don't need to stop erases from 514 any jffs2_raw_node_refs. So we don't need to stop erases from
513 happening, or protect against people holding an obsolete 515 happening, or protect against people holding an obsolete
514 jffs2_raw_node_ref without the erase_completion_lock. */ 516 jffs2_raw_node_ref without the erase_completion_lock. */
515 down(&c->erase_free_sem); 517 mutex_lock(&c->erase_free_sem);
516 } 518 }
517 519
518 spin_lock(&c->erase_completion_lock); 520 spin_lock(&c->erase_completion_lock);
@@ -707,7 +709,7 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
707 break; 709 break;
708#endif 710#endif
709 default: 711 default:
710 if (ic->nodes == (void *)ic && ic->nlink == 0) 712 if (ic->nodes == (void *)ic && ic->pino_nlink == 0)
711 jffs2_del_ino_cache(c, ic); 713 jffs2_del_ino_cache(c, ic);
712 break; 714 break;
713 } 715 }
@@ -715,7 +717,7 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
715 } 717 }
716 718
717 out_erase_sem: 719 out_erase_sem:
718 up(&c->erase_free_sem); 720 mutex_unlock(&c->erase_free_sem);
719} 721}
720 722
721int jffs2_thread_should_wake(struct jffs2_sb_info *c) 723int jffs2_thread_should_wake(struct jffs2_sb_info *c)
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 1b10d2594092..2cc866cf134f 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -187,7 +187,7 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent);
187void jffs2_gc_release_inode(struct jffs2_sb_info *c, 187void jffs2_gc_release_inode(struct jffs2_sb_info *c,
188 struct jffs2_inode_info *f); 188 struct jffs2_inode_info *f);
189struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c, 189struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c,
190 int inum, int nlink); 190 int inum, int unlinked);
191 191
192unsigned char *jffs2_gc_fetch_page(struct jffs2_sb_info *c, 192unsigned char *jffs2_gc_fetch_page(struct jffs2_sb_info *c,
193 struct jffs2_inode_info *f, 193 struct jffs2_inode_info *f,
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index e512a93d6249..6ca08ad887c0 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -63,10 +63,11 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info
63 /* TODO: instead, incapsulate point() stuff to jffs2_flash_read(), 63 /* TODO: instead, incapsulate point() stuff to jffs2_flash_read(),
64 * adding and jffs2_flash_read_end() interface. */ 64 * adding and jffs2_flash_read_end() interface. */
65 if (c->mtd->point) { 65 if (c->mtd->point) {
66 err = c->mtd->point(c->mtd, ofs, len, &retlen, &buffer); 66 err = c->mtd->point(c->mtd, ofs, len, &retlen,
67 (void **)&buffer, NULL);
67 if (!err && retlen < len) { 68 if (!err && retlen < len) {
68 JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize); 69 JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize);
69 c->mtd->unpoint(c->mtd, buffer, ofs, retlen); 70 c->mtd->unpoint(c->mtd, ofs, retlen);
70 } else if (err) 71 } else if (err)
71 JFFS2_WARNING("MTD point failed: error code %d.\n", err); 72 JFFS2_WARNING("MTD point failed: error code %d.\n", err);
72 else 73 else
@@ -100,7 +101,7 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info
100 kfree(buffer); 101 kfree(buffer);
101#ifndef __ECOS 102#ifndef __ECOS
102 else 103 else
103 c->mtd->unpoint(c->mtd, buffer, ofs, len); 104 c->mtd->unpoint(c->mtd, ofs, len);
104#endif 105#endif
105 106
106 if (crc != tn->data_crc) { 107 if (crc != tn->data_crc) {
@@ -136,7 +137,7 @@ free_out:
136 kfree(buffer); 137 kfree(buffer);
137#ifndef __ECOS 138#ifndef __ECOS
138 else 139 else
139 c->mtd->unpoint(c->mtd, buffer, ofs, len); 140 c->mtd->unpoint(c->mtd, ofs, len);
140#endif 141#endif
141 return err; 142 return err;
142} 143}
@@ -825,8 +826,9 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
825 else // normal case... 826 else // normal case...
826 tn->fn->size = je32_to_cpu(rd->dsize); 827 tn->fn->size = je32_to_cpu(rd->dsize);
827 828
828 dbg_readinode("dnode @%08x: ver %u, offset %#04x, dsize %#04x, csize %#04x\n", 829 dbg_readinode2("dnode @%08x: ver %u, offset %#04x, dsize %#04x, csize %#04x\n",
829 ref_offset(ref), je32_to_cpu(rd->version), je32_to_cpu(rd->offset), je32_to_cpu(rd->dsize), csize); 830 ref_offset(ref), je32_to_cpu(rd->version),
831 je32_to_cpu(rd->offset), je32_to_cpu(rd->dsize), csize);
830 832
831 ret = jffs2_add_tn_to_tree(c, rii, tn); 833 ret = jffs2_add_tn_to_tree(c, rii, tn);
832 834
@@ -836,13 +838,13 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
836 jffs2_free_tmp_dnode_info(tn); 838 jffs2_free_tmp_dnode_info(tn);
837 return ret; 839 return ret;
838 } 840 }
839#ifdef JFFS2_DBG_READINODE_MESSAGES 841#ifdef JFFS2_DBG_READINODE2_MESSAGES
840 dbg_readinode("After adding ver %d:\n", je32_to_cpu(rd->version)); 842 dbg_readinode2("After adding ver %d:\n", je32_to_cpu(rd->version));
841 tn = tn_first(&rii->tn_root); 843 tn = tn_first(&rii->tn_root);
842 while (tn) { 844 while (tn) {
843 dbg_readinode("%p: v %d r 0x%x-0x%x ov %d\n", 845 dbg_readinode2("%p: v %d r 0x%x-0x%x ov %d\n",
844 tn, tn->version, tn->fn->ofs, 846 tn, tn->version, tn->fn->ofs,
845 tn->fn->ofs+tn->fn->size, tn->overlapped); 847 tn->fn->ofs+tn->fn->size, tn->overlapped);
846 tn = tn_next(tn); 848 tn = tn_next(tn);
847 } 849 }
848#endif 850#endif
@@ -1122,7 +1124,8 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
1122 size_t retlen; 1124 size_t retlen;
1123 int ret; 1125 int ret;
1124 1126
1125 dbg_readinode("ino #%u nlink is %d\n", f->inocache->ino, f->inocache->nlink); 1127 dbg_readinode("ino #%u pino/nlink is %d\n", f->inocache->ino,
1128 f->inocache->pino_nlink);
1126 1129
1127 memset(&rii, 0, sizeof(rii)); 1130 memset(&rii, 0, sizeof(rii));
1128 1131
@@ -1193,7 +1196,7 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
1193 JFFS2_ERROR("failed to read from flash: error %d, %zd of %zd bytes read\n", 1196 JFFS2_ERROR("failed to read from flash: error %d, %zd of %zd bytes read\n",
1194 ret, retlen, sizeof(*latest_node)); 1197 ret, retlen, sizeof(*latest_node));
1195 /* FIXME: If this fails, there seems to be a memory leak. Find it. */ 1198 /* FIXME: If this fails, there seems to be a memory leak. Find it. */
1196 up(&f->sem); 1199 mutex_unlock(&f->sem);
1197 jffs2_do_clear_inode(c, f); 1200 jffs2_do_clear_inode(c, f);
1198 return ret?ret:-EIO; 1201 return ret?ret:-EIO;
1199 } 1202 }
@@ -1202,7 +1205,7 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
1202 if (crc != je32_to_cpu(latest_node->node_crc)) { 1205 if (crc != je32_to_cpu(latest_node->node_crc)) {
1203 JFFS2_ERROR("CRC failed for read_inode of inode %u at physical location 0x%x\n", 1206 JFFS2_ERROR("CRC failed for read_inode of inode %u at physical location 0x%x\n",
1204 f->inocache->ino, ref_offset(rii.latest_ref)); 1207 f->inocache->ino, ref_offset(rii.latest_ref));
1205 up(&f->sem); 1208 mutex_unlock(&f->sem);
1206 jffs2_do_clear_inode(c, f); 1209 jffs2_do_clear_inode(c, f);
1207 return -EIO; 1210 return -EIO;
1208 } 1211 }
@@ -1242,7 +1245,7 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
1242 f->target = kmalloc(je32_to_cpu(latest_node->csize) + 1, GFP_KERNEL); 1245 f->target = kmalloc(je32_to_cpu(latest_node->csize) + 1, GFP_KERNEL);
1243 if (!f->target) { 1246 if (!f->target) {
1244 JFFS2_ERROR("can't allocate %d bytes of memory for the symlink target path cache\n", je32_to_cpu(latest_node->csize)); 1247 JFFS2_ERROR("can't allocate %d bytes of memory for the symlink target path cache\n", je32_to_cpu(latest_node->csize));
1245 up(&f->sem); 1248 mutex_unlock(&f->sem);
1246 jffs2_do_clear_inode(c, f); 1249 jffs2_do_clear_inode(c, f);
1247 return -ENOMEM; 1250 return -ENOMEM;
1248 } 1251 }
@@ -1255,7 +1258,7 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
1255 ret = -EIO; 1258 ret = -EIO;
1256 kfree(f->target); 1259 kfree(f->target);
1257 f->target = NULL; 1260 f->target = NULL;
1258 up(&f->sem); 1261 mutex_unlock(&f->sem);
1259 jffs2_do_clear_inode(c, f); 1262 jffs2_do_clear_inode(c, f);
1260 return -ret; 1263 return -ret;
1261 } 1264 }
@@ -1273,14 +1276,14 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
1273 if (f->metadata) { 1276 if (f->metadata) {
1274 JFFS2_ERROR("Argh. Special inode #%u with mode 0%o had metadata node\n", 1277 JFFS2_ERROR("Argh. Special inode #%u with mode 0%o had metadata node\n",
1275 f->inocache->ino, jemode_to_cpu(latest_node->mode)); 1278 f->inocache->ino, jemode_to_cpu(latest_node->mode));
1276 up(&f->sem); 1279 mutex_unlock(&f->sem);
1277 jffs2_do_clear_inode(c, f); 1280 jffs2_do_clear_inode(c, f);
1278 return -EIO; 1281 return -EIO;
1279 } 1282 }
1280 if (!frag_first(&f->fragtree)) { 1283 if (!frag_first(&f->fragtree)) {
1281 JFFS2_ERROR("Argh. Special inode #%u with mode 0%o has no fragments\n", 1284 JFFS2_ERROR("Argh. Special inode #%u with mode 0%o has no fragments\n",
1282 f->inocache->ino, jemode_to_cpu(latest_node->mode)); 1285 f->inocache->ino, jemode_to_cpu(latest_node->mode));
1283 up(&f->sem); 1286 mutex_unlock(&f->sem);
1284 jffs2_do_clear_inode(c, f); 1287 jffs2_do_clear_inode(c, f);
1285 return -EIO; 1288 return -EIO;
1286 } 1289 }
@@ -1289,7 +1292,7 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
1289 JFFS2_ERROR("Argh. Special inode #%u with mode 0x%x had more than one node\n", 1292 JFFS2_ERROR("Argh. Special inode #%u with mode 0x%x had more than one node\n",
1290 f->inocache->ino, jemode_to_cpu(latest_node->mode)); 1293 f->inocache->ino, jemode_to_cpu(latest_node->mode));
1291 /* FIXME: Deal with it - check crc32, check for duplicate node, check times and discard the older one */ 1294 /* FIXME: Deal with it - check crc32, check for duplicate node, check times and discard the older one */
1292 up(&f->sem); 1295 mutex_unlock(&f->sem);
1293 jffs2_do_clear_inode(c, f); 1296 jffs2_do_clear_inode(c, f);
1294 return -EIO; 1297 return -EIO;
1295 } 1298 }
@@ -1357,7 +1360,7 @@ int jffs2_do_read_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
1357 } 1360 }
1358 dbg_readinode("creating inocache for root inode\n"); 1361 dbg_readinode("creating inocache for root inode\n");
1359 memset(f->inocache, 0, sizeof(struct jffs2_inode_cache)); 1362 memset(f->inocache, 0, sizeof(struct jffs2_inode_cache));
1360 f->inocache->ino = f->inocache->nlink = 1; 1363 f->inocache->ino = f->inocache->pino_nlink = 1;
1361 f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache; 1364 f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache;
1362 f->inocache->state = INO_STATE_READING; 1365 f->inocache->state = INO_STATE_READING;
1363 jffs2_add_ino_cache(c, f->inocache); 1366 jffs2_add_ino_cache(c, f->inocache);
@@ -1379,12 +1382,13 @@ int jffs2_do_crccheck_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *i
1379 if (!f) 1382 if (!f)
1380 return -ENOMEM; 1383 return -ENOMEM;
1381 1384
1382 init_MUTEX_LOCKED(&f->sem); 1385 mutex_init(&f->sem);
1386 mutex_lock(&f->sem);
1383 f->inocache = ic; 1387 f->inocache = ic;
1384 1388
1385 ret = jffs2_do_read_inode_internal(c, f, &n); 1389 ret = jffs2_do_read_inode_internal(c, f, &n);
1386 if (!ret) { 1390 if (!ret) {
1387 up(&f->sem); 1391 mutex_unlock(&f->sem);
1388 jffs2_do_clear_inode(c, f); 1392 jffs2_do_clear_inode(c, f);
1389 } 1393 }
1390 kfree (f); 1394 kfree (f);
@@ -1398,8 +1402,8 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
1398 1402
1399 jffs2_clear_acl(f); 1403 jffs2_clear_acl(f);
1400 jffs2_xattr_delete_inode(c, f->inocache); 1404 jffs2_xattr_delete_inode(c, f->inocache);
1401 down(&f->sem); 1405 mutex_lock(&f->sem);
1402 deleted = f->inocache && !f->inocache->nlink; 1406 deleted = f->inocache && !f->inocache->pino_nlink;
1403 1407
1404 if (f->inocache && f->inocache->state != INO_STATE_CHECKING) 1408 if (f->inocache && f->inocache->state != INO_STATE_CHECKING)
1405 jffs2_set_inocache_state(c, f->inocache, INO_STATE_CLEARING); 1409 jffs2_set_inocache_state(c, f->inocache, INO_STATE_CLEARING);
@@ -1430,5 +1434,5 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
1430 jffs2_del_ino_cache(c, f->inocache); 1434 jffs2_del_ino_cache(c, f->inocache);
1431 } 1435 }
1432 1436
1433 up(&f->sem); 1437 mutex_unlock(&f->sem);
1434} 1438}
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 272872d27fd5..1d437de1e9a8 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -97,11 +97,12 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
97 size_t pointlen; 97 size_t pointlen;
98 98
99 if (c->mtd->point) { 99 if (c->mtd->point) {
100 ret = c->mtd->point (c->mtd, 0, c->mtd->size, &pointlen, &flashbuf); 100 ret = c->mtd->point(c->mtd, 0, c->mtd->size, &pointlen,
101 (void **)&flashbuf, NULL);
101 if (!ret && pointlen < c->mtd->size) { 102 if (!ret && pointlen < c->mtd->size) {
102 /* Don't muck about if it won't let us point to the whole flash */ 103 /* Don't muck about if it won't let us point to the whole flash */
103 D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", pointlen)); 104 D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", pointlen));
104 c->mtd->unpoint(c->mtd, flashbuf, 0, pointlen); 105 c->mtd->unpoint(c->mtd, 0, pointlen);
105 flashbuf = NULL; 106 flashbuf = NULL;
106 } 107 }
107 if (ret) 108 if (ret)
@@ -267,7 +268,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
267 kfree(flashbuf); 268 kfree(flashbuf);
268#ifndef __ECOS 269#ifndef __ECOS
269 else 270 else
270 c->mtd->unpoint(c->mtd, flashbuf, 0, c->mtd->size); 271 c->mtd->unpoint(c->mtd, 0, c->mtd->size);
271#endif 272#endif
272 if (s) 273 if (s)
273 kfree(s); 274 kfree(s);
@@ -940,7 +941,7 @@ struct jffs2_inode_cache *jffs2_scan_make_ino_cache(struct jffs2_sb_info *c, uin
940 ic->nodes = (void *)ic; 941 ic->nodes = (void *)ic;
941 jffs2_add_ino_cache(c, ic); 942 jffs2_add_ino_cache(c, ic);
942 if (ino == 1) 943 if (ino == 1)
943 ic->nlink = 1; 944 ic->pino_nlink = 1;
944 return ic; 945 return ic;
945} 946}
946 947
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 4677355996cc..7da69eae49e4 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -31,11 +31,12 @@ static struct kmem_cache *jffs2_inode_cachep;
31 31
32static struct inode *jffs2_alloc_inode(struct super_block *sb) 32static struct inode *jffs2_alloc_inode(struct super_block *sb)
33{ 33{
34 struct jffs2_inode_info *ei; 34 struct jffs2_inode_info *f;
35 ei = (struct jffs2_inode_info *)kmem_cache_alloc(jffs2_inode_cachep, GFP_KERNEL); 35
36 if (!ei) 36 f = kmem_cache_alloc(jffs2_inode_cachep, GFP_KERNEL);
37 if (!f)
37 return NULL; 38 return NULL;
38 return &ei->vfs_inode; 39 return &f->vfs_inode;
39} 40}
40 41
41static void jffs2_destroy_inode(struct inode *inode) 42static void jffs2_destroy_inode(struct inode *inode)
@@ -45,19 +46,19 @@ static void jffs2_destroy_inode(struct inode *inode)
45 46
46static void jffs2_i_init_once(struct kmem_cache *cachep, void *foo) 47static void jffs2_i_init_once(struct kmem_cache *cachep, void *foo)
47{ 48{
48 struct jffs2_inode_info *ei = (struct jffs2_inode_info *) foo; 49 struct jffs2_inode_info *f = foo;
49 50
50 init_MUTEX(&ei->sem); 51 mutex_init(&f->sem);
51 inode_init_once(&ei->vfs_inode); 52 inode_init_once(&f->vfs_inode);
52} 53}
53 54
54static int jffs2_sync_fs(struct super_block *sb, int wait) 55static int jffs2_sync_fs(struct super_block *sb, int wait)
55{ 56{
56 struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); 57 struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
57 58
58 down(&c->alloc_sem); 59 mutex_lock(&c->alloc_sem);
59 jffs2_flush_wbuf_pad(c); 60 jffs2_flush_wbuf_pad(c);
60 up(&c->alloc_sem); 61 mutex_unlock(&c->alloc_sem);
61 return 0; 62 return 0;
62} 63}
63 64
@@ -95,8 +96,8 @@ static int jffs2_fill_super(struct super_block *sb, void *data, int silent)
95 96
96 /* Initialize JFFS2 superblock locks, the further initialization will 97 /* Initialize JFFS2 superblock locks, the further initialization will
97 * be done later */ 98 * be done later */
98 init_MUTEX(&c->alloc_sem); 99 mutex_init(&c->alloc_sem);
99 init_MUTEX(&c->erase_free_sem); 100 mutex_init(&c->erase_free_sem);
100 init_waitqueue_head(&c->erase_wait); 101 init_waitqueue_head(&c->erase_wait);
101 init_waitqueue_head(&c->inocache_wq); 102 init_waitqueue_head(&c->inocache_wq);
102 spin_lock_init(&c->erase_completion_lock); 103 spin_lock_init(&c->erase_completion_lock);
@@ -125,9 +126,9 @@ static void jffs2_put_super (struct super_block *sb)
125 126
126 D2(printk(KERN_DEBUG "jffs2: jffs2_put_super()\n")); 127 D2(printk(KERN_DEBUG "jffs2: jffs2_put_super()\n"));
127 128
128 down(&c->alloc_sem); 129 mutex_lock(&c->alloc_sem);
129 jffs2_flush_wbuf_pad(c); 130 jffs2_flush_wbuf_pad(c);
130 up(&c->alloc_sem); 131 mutex_unlock(&c->alloc_sem);
131 132
132 jffs2_sum_exit(c); 133 jffs2_sum_exit(c);
133 134
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index d1d4f27464ba..0e78b00035e4 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -494,7 +494,7 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
494 /* If it's an in-core inode, then we have to adjust any 494 /* If it's an in-core inode, then we have to adjust any
495 full_dirent or full_dnode structure to point to the 495 full_dirent or full_dnode structure to point to the
496 new version instead of the old */ 496 new version instead of the old */
497 f = jffs2_gc_fetch_inode(c, ic->ino, ic->nlink); 497 f = jffs2_gc_fetch_inode(c, ic->ino, !ic->pino_nlink);
498 if (IS_ERR(f)) { 498 if (IS_ERR(f)) {
499 /* Should never happen; it _must_ be present */ 499 /* Should never happen; it _must_ be present */
500 JFFS2_ERROR("Failed to iget() ino #%u, err %ld\n", 500 JFFS2_ERROR("Failed to iget() ino #%u, err %ld\n",
@@ -578,8 +578,8 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad)
578 if (!jffs2_is_writebuffered(c)) 578 if (!jffs2_is_writebuffered(c))
579 return 0; 579 return 0;
580 580
581 if (!down_trylock(&c->alloc_sem)) { 581 if (mutex_trylock(&c->alloc_sem)) {
582 up(&c->alloc_sem); 582 mutex_unlock(&c->alloc_sem);
583 printk(KERN_CRIT "jffs2_flush_wbuf() called with alloc_sem not locked!\n"); 583 printk(KERN_CRIT "jffs2_flush_wbuf() called with alloc_sem not locked!\n");
584 BUG(); 584 BUG();
585 } 585 }
@@ -702,10 +702,10 @@ int jffs2_flush_wbuf_gc(struct jffs2_sb_info *c, uint32_t ino)
702 if (!c->wbuf) 702 if (!c->wbuf)
703 return 0; 703 return 0;
704 704
705 down(&c->alloc_sem); 705 mutex_lock(&c->alloc_sem);
706 if (!jffs2_wbuf_pending_for_ino(c, ino)) { 706 if (!jffs2_wbuf_pending_for_ino(c, ino)) {
707 D1(printk(KERN_DEBUG "Ino #%d not pending in wbuf. Returning\n", ino)); 707 D1(printk(KERN_DEBUG "Ino #%d not pending in wbuf. Returning\n", ino));
708 up(&c->alloc_sem); 708 mutex_unlock(&c->alloc_sem);
709 return 0; 709 return 0;
710 } 710 }
711 711
@@ -725,14 +725,14 @@ int jffs2_flush_wbuf_gc(struct jffs2_sb_info *c, uint32_t ino)
725 } else while (old_wbuf_len && 725 } else while (old_wbuf_len &&
726 old_wbuf_ofs == c->wbuf_ofs) { 726 old_wbuf_ofs == c->wbuf_ofs) {
727 727
728 up(&c->alloc_sem); 728 mutex_unlock(&c->alloc_sem);
729 729
730 D1(printk(KERN_DEBUG "jffs2_flush_wbuf_gc() calls gc pass\n")); 730 D1(printk(KERN_DEBUG "jffs2_flush_wbuf_gc() calls gc pass\n"));
731 731
732 ret = jffs2_garbage_collect_pass(c); 732 ret = jffs2_garbage_collect_pass(c);
733 if (ret) { 733 if (ret) {
734 /* GC failed. Flush it with padding instead */ 734 /* GC failed. Flush it with padding instead */
735 down(&c->alloc_sem); 735 mutex_lock(&c->alloc_sem);
736 down_write(&c->wbuf_sem); 736 down_write(&c->wbuf_sem);
737 ret = __jffs2_flush_wbuf(c, PAD_ACCOUNTING); 737 ret = __jffs2_flush_wbuf(c, PAD_ACCOUNTING);
738 /* retry flushing wbuf in case jffs2_wbuf_recover 738 /* retry flushing wbuf in case jffs2_wbuf_recover
@@ -742,12 +742,12 @@ int jffs2_flush_wbuf_gc(struct jffs2_sb_info *c, uint32_t ino)
742 up_write(&c->wbuf_sem); 742 up_write(&c->wbuf_sem);
743 break; 743 break;
744 } 744 }
745 down(&c->alloc_sem); 745 mutex_lock(&c->alloc_sem);
746 } 746 }
747 747
748 D1(printk(KERN_DEBUG "jffs2_flush_wbuf_gc() ends...\n")); 748 D1(printk(KERN_DEBUG "jffs2_flush_wbuf_gc() ends...\n"));
749 749
750 up(&c->alloc_sem); 750 mutex_unlock(&c->alloc_sem);
751 return ret; 751 return ret;
752} 752}
753 753
@@ -1236,12 +1236,24 @@ int jffs2_dataflash_setup(struct jffs2_sb_info *c) {
1236 if (!c->wbuf) 1236 if (!c->wbuf)
1237 return -ENOMEM; 1237 return -ENOMEM;
1238 1238
1239#ifdef CONFIG_JFFS2_FS_WBUF_VERIFY
1240 c->wbuf_verify = kmalloc(c->wbuf_pagesize, GFP_KERNEL);
1241 if (!c->wbuf_verify) {
1242 kfree(c->oobbuf);
1243 kfree(c->wbuf);
1244 return -ENOMEM;
1245 }
1246#endif
1247
1239 printk(KERN_INFO "JFFS2 write-buffering enabled buffer (%d) erasesize (%d)\n", c->wbuf_pagesize, c->sector_size); 1248 printk(KERN_INFO "JFFS2 write-buffering enabled buffer (%d) erasesize (%d)\n", c->wbuf_pagesize, c->sector_size);
1240 1249
1241 return 0; 1250 return 0;
1242} 1251}
1243 1252
1244void jffs2_dataflash_cleanup(struct jffs2_sb_info *c) { 1253void jffs2_dataflash_cleanup(struct jffs2_sb_info *c) {
1254#ifdef CONFIG_JFFS2_FS_WBUF_VERIFY
1255 kfree(c->wbuf_verify);
1256#endif
1245 kfree(c->wbuf); 1257 kfree(c->wbuf);
1246} 1258}
1247 1259
diff --git a/fs/jffs2/write.c b/fs/jffs2/write.c
index 776f13cbf2b5..ca29440e9435 100644
--- a/fs/jffs2/write.c
+++ b/fs/jffs2/write.c
@@ -19,7 +19,8 @@
19#include "compr.h" 19#include "compr.h"
20 20
21 21
22int jffs2_do_new_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, uint32_t mode, struct jffs2_raw_inode *ri) 22int jffs2_do_new_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
23 uint32_t mode, struct jffs2_raw_inode *ri)
23{ 24{
24 struct jffs2_inode_cache *ic; 25 struct jffs2_inode_cache *ic;
25 26
@@ -31,7 +32,7 @@ int jffs2_do_new_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, uint
31 memset(ic, 0, sizeof(*ic)); 32 memset(ic, 0, sizeof(*ic));
32 33
33 f->inocache = ic; 34 f->inocache = ic;
34 f->inocache->nlink = 1; 35 f->inocache->pino_nlink = 1; /* Will be overwritten shortly for directories */
35 f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache; 36 f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache;
36 f->inocache->state = INO_STATE_PRESENT; 37 f->inocache->state = INO_STATE_PRESENT;
37 38
@@ -137,12 +138,12 @@ struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2
137 JFFS2_SUMMARY_INODE_SIZE); 138 JFFS2_SUMMARY_INODE_SIZE);
138 } else { 139 } else {
139 /* Locking pain */ 140 /* Locking pain */
140 up(&f->sem); 141 mutex_unlock(&f->sem);
141 jffs2_complete_reservation(c); 142 jffs2_complete_reservation(c);
142 143
143 ret = jffs2_reserve_space(c, sizeof(*ri) + datalen, &dummy, 144 ret = jffs2_reserve_space(c, sizeof(*ri) + datalen, &dummy,
144 alloc_mode, JFFS2_SUMMARY_INODE_SIZE); 145 alloc_mode, JFFS2_SUMMARY_INODE_SIZE);
145 down(&f->sem); 146 mutex_lock(&f->sem);
146 } 147 }
147 148
148 if (!ret) { 149 if (!ret) {
@@ -285,12 +286,12 @@ struct jffs2_full_dirent *jffs2_write_dirent(struct jffs2_sb_info *c, struct jff
285 JFFS2_SUMMARY_DIRENT_SIZE(namelen)); 286 JFFS2_SUMMARY_DIRENT_SIZE(namelen));
286 } else { 287 } else {
287 /* Locking pain */ 288 /* Locking pain */
288 up(&f->sem); 289 mutex_unlock(&f->sem);
289 jffs2_complete_reservation(c); 290 jffs2_complete_reservation(c);
290 291
291 ret = jffs2_reserve_space(c, sizeof(*rd) + namelen, &dummy, 292 ret = jffs2_reserve_space(c, sizeof(*rd) + namelen, &dummy,
292 alloc_mode, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); 293 alloc_mode, JFFS2_SUMMARY_DIRENT_SIZE(namelen));
293 down(&f->sem); 294 mutex_lock(&f->sem);
294 } 295 }
295 296
296 if (!ret) { 297 if (!ret) {
@@ -353,7 +354,7 @@ int jffs2_write_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
353 D1(printk(KERN_DEBUG "jffs2_reserve_space returned %d\n", ret)); 354 D1(printk(KERN_DEBUG "jffs2_reserve_space returned %d\n", ret));
354 break; 355 break;
355 } 356 }
356 down(&f->sem); 357 mutex_lock(&f->sem);
357 datalen = min_t(uint32_t, writelen, PAGE_CACHE_SIZE - (offset & (PAGE_CACHE_SIZE-1))); 358 datalen = min_t(uint32_t, writelen, PAGE_CACHE_SIZE - (offset & (PAGE_CACHE_SIZE-1)));
358 cdatalen = min_t(uint32_t, alloclen - sizeof(*ri), datalen); 359 cdatalen = min_t(uint32_t, alloclen - sizeof(*ri), datalen);
359 360
@@ -381,7 +382,7 @@ int jffs2_write_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
381 382
382 if (IS_ERR(fn)) { 383 if (IS_ERR(fn)) {
383 ret = PTR_ERR(fn); 384 ret = PTR_ERR(fn);
384 up(&f->sem); 385 mutex_unlock(&f->sem);
385 jffs2_complete_reservation(c); 386 jffs2_complete_reservation(c);
386 if (!retried) { 387 if (!retried) {
387 /* Write error to be retried */ 388 /* Write error to be retried */
@@ -403,11 +404,11 @@ int jffs2_write_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
403 jffs2_mark_node_obsolete(c, fn->raw); 404 jffs2_mark_node_obsolete(c, fn->raw);
404 jffs2_free_full_dnode(fn); 405 jffs2_free_full_dnode(fn);
405 406
406 up(&f->sem); 407 mutex_unlock(&f->sem);
407 jffs2_complete_reservation(c); 408 jffs2_complete_reservation(c);
408 break; 409 break;
409 } 410 }
410 up(&f->sem); 411 mutex_unlock(&f->sem);
411 jffs2_complete_reservation(c); 412 jffs2_complete_reservation(c);
412 if (!datalen) { 413 if (!datalen) {
413 printk(KERN_WARNING "Eep. We didn't actually write any data in jffs2_write_inode_range()\n"); 414 printk(KERN_WARNING "Eep. We didn't actually write any data in jffs2_write_inode_range()\n");
@@ -438,10 +439,10 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, str
438 ret = jffs2_reserve_space(c, sizeof(*ri), &alloclen, ALLOC_NORMAL, 439 ret = jffs2_reserve_space(c, sizeof(*ri), &alloclen, ALLOC_NORMAL,
439 JFFS2_SUMMARY_INODE_SIZE); 440 JFFS2_SUMMARY_INODE_SIZE);
440 D1(printk(KERN_DEBUG "jffs2_do_create(): reserved 0x%x bytes\n", alloclen)); 441 D1(printk(KERN_DEBUG "jffs2_do_create(): reserved 0x%x bytes\n", alloclen));
441 if (ret) { 442 if (ret)
442 up(&f->sem);
443 return ret; 443 return ret;
444 } 444
445 mutex_lock(&f->sem);
445 446
446 ri->data_crc = cpu_to_je32(0); 447 ri->data_crc = cpu_to_je32(0);
447 ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8)); 448 ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8));
@@ -454,7 +455,7 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, str
454 if (IS_ERR(fn)) { 455 if (IS_ERR(fn)) {
455 D1(printk(KERN_DEBUG "jffs2_write_dnode() failed\n")); 456 D1(printk(KERN_DEBUG "jffs2_write_dnode() failed\n"));
456 /* Eeek. Wave bye bye */ 457 /* Eeek. Wave bye bye */
457 up(&f->sem); 458 mutex_unlock(&f->sem);
458 jffs2_complete_reservation(c); 459 jffs2_complete_reservation(c);
459 return PTR_ERR(fn); 460 return PTR_ERR(fn);
460 } 461 }
@@ -463,7 +464,7 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, str
463 */ 464 */
464 f->metadata = fn; 465 f->metadata = fn;
465 466
466 up(&f->sem); 467 mutex_unlock(&f->sem);
467 jffs2_complete_reservation(c); 468 jffs2_complete_reservation(c);
468 469
469 ret = jffs2_init_security(&f->vfs_inode, &dir_f->vfs_inode); 470 ret = jffs2_init_security(&f->vfs_inode, &dir_f->vfs_inode);
@@ -489,7 +490,7 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, str
489 return -ENOMEM; 490 return -ENOMEM;
490 } 491 }
491 492
492 down(&dir_f->sem); 493 mutex_lock(&dir_f->sem);
493 494
494 rd->magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); 495 rd->magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
495 rd->nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT); 496 rd->nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
@@ -513,7 +514,7 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, str
513 /* dirent failed to write. Delete the inode normally 514 /* dirent failed to write. Delete the inode normally
514 as if it were the final unlink() */ 515 as if it were the final unlink() */
515 jffs2_complete_reservation(c); 516 jffs2_complete_reservation(c);
516 up(&dir_f->sem); 517 mutex_unlock(&dir_f->sem);
517 return PTR_ERR(fd); 518 return PTR_ERR(fd);
518 } 519 }
519 520
@@ -522,7 +523,7 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, str
522 jffs2_add_fd_to_list(c, fd, &dir_f->dents); 523 jffs2_add_fd_to_list(c, fd, &dir_f->dents);
523 524
524 jffs2_complete_reservation(c); 525 jffs2_complete_reservation(c);
525 up(&dir_f->sem); 526 mutex_unlock(&dir_f->sem);
526 527
527 return 0; 528 return 0;
528} 529}
@@ -551,7 +552,7 @@ int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f,
551 return ret; 552 return ret;
552 } 553 }
553 554
554 down(&dir_f->sem); 555 mutex_lock(&dir_f->sem);
555 556
556 /* Build a deletion node */ 557 /* Build a deletion node */
557 rd->magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); 558 rd->magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
@@ -574,21 +575,21 @@ int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f,
574 575
575 if (IS_ERR(fd)) { 576 if (IS_ERR(fd)) {
576 jffs2_complete_reservation(c); 577 jffs2_complete_reservation(c);
577 up(&dir_f->sem); 578 mutex_unlock(&dir_f->sem);
578 return PTR_ERR(fd); 579 return PTR_ERR(fd);
579 } 580 }
580 581
581 /* File it. This will mark the old one obsolete. */ 582 /* File it. This will mark the old one obsolete. */
582 jffs2_add_fd_to_list(c, fd, &dir_f->dents); 583 jffs2_add_fd_to_list(c, fd, &dir_f->dents);
583 up(&dir_f->sem); 584 mutex_unlock(&dir_f->sem);
584 } else { 585 } else {
585 struct jffs2_full_dirent *fd = dir_f->dents;
586 uint32_t nhash = full_name_hash(name, namelen); 586 uint32_t nhash = full_name_hash(name, namelen);
587 587
588 fd = dir_f->dents;
588 /* We don't actually want to reserve any space, but we do 589 /* We don't actually want to reserve any space, but we do
589 want to be holding the alloc_sem when we write to flash */ 590 want to be holding the alloc_sem when we write to flash */
590 down(&c->alloc_sem); 591 mutex_lock(&c->alloc_sem);
591 down(&dir_f->sem); 592 mutex_lock(&dir_f->sem);
592 593
593 for (fd = dir_f->dents; fd; fd = fd->next) { 594 for (fd = dir_f->dents; fd; fd = fd->next) {
594 if (fd->nhash == nhash && 595 if (fd->nhash == nhash &&
@@ -607,7 +608,7 @@ int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f,
607 break; 608 break;
608 } 609 }
609 } 610 }
610 up(&dir_f->sem); 611 mutex_unlock(&dir_f->sem);
611 } 612 }
612 613
613 /* dead_f is NULL if this was a rename not a real unlink */ 614 /* dead_f is NULL if this was a rename not a real unlink */
@@ -615,7 +616,7 @@ int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f,
615 pointing to an inode which didn't exist. */ 616 pointing to an inode which didn't exist. */
616 if (dead_f && dead_f->inocache) { 617 if (dead_f && dead_f->inocache) {
617 618
618 down(&dead_f->sem); 619 mutex_lock(&dead_f->sem);
619 620
620 if (S_ISDIR(OFNI_EDONI_2SFFJ(dead_f)->i_mode)) { 621 if (S_ISDIR(OFNI_EDONI_2SFFJ(dead_f)->i_mode)) {
621 while (dead_f->dents) { 622 while (dead_f->dents) {
@@ -635,11 +636,11 @@ int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f,
635 jffs2_mark_node_obsolete(c, fd->raw); 636 jffs2_mark_node_obsolete(c, fd->raw);
636 jffs2_free_full_dirent(fd); 637 jffs2_free_full_dirent(fd);
637 } 638 }
638 } 639 dead_f->inocache->pino_nlink = 0;
639 640 } else
640 dead_f->inocache->nlink--; 641 dead_f->inocache->pino_nlink--;
641 /* NB: Caller must set inode nlink if appropriate */ 642 /* NB: Caller must set inode nlink if appropriate */
642 up(&dead_f->sem); 643 mutex_unlock(&dead_f->sem);
643 } 644 }
644 645
645 jffs2_complete_reservation(c); 646 jffs2_complete_reservation(c);
@@ -666,7 +667,7 @@ int jffs2_do_link (struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, uint
666 return ret; 667 return ret;
667 } 668 }
668 669
669 down(&dir_f->sem); 670 mutex_lock(&dir_f->sem);
670 671
671 /* Build a deletion node */ 672 /* Build a deletion node */
672 rd->magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); 673 rd->magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
@@ -691,7 +692,7 @@ int jffs2_do_link (struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, uint
691 692
692 if (IS_ERR(fd)) { 693 if (IS_ERR(fd)) {
693 jffs2_complete_reservation(c); 694 jffs2_complete_reservation(c);
694 up(&dir_f->sem); 695 mutex_unlock(&dir_f->sem);
695 return PTR_ERR(fd); 696 return PTR_ERR(fd);
696 } 697 }
697 698
@@ -699,7 +700,7 @@ int jffs2_do_link (struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, uint
699 jffs2_add_fd_to_list(c, fd, &dir_f->dents); 700 jffs2_add_fd_to_list(c, fd, &dir_f->dents);
700 701
701 jffs2_complete_reservation(c); 702 jffs2_complete_reservation(c);
702 up(&dir_f->sem); 703 mutex_unlock(&dir_f->sem);
703 704
704 return 0; 705 return 0;
705} 706}
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index e48665984cb3..082e844ab2db 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -82,7 +82,7 @@ static int is_xattr_datum_unchecked(struct jffs2_sb_info *c, struct jffs2_xattr_
82static void unload_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) 82static void unload_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
83{ 83{
84 /* must be called under down_write(xattr_sem) */ 84 /* must be called under down_write(xattr_sem) */
85 D1(dbg_xattr("%s: xid=%u, version=%u\n", __FUNCTION__, xd->xid, xd->version)); 85 D1(dbg_xattr("%s: xid=%u, version=%u\n", __func__, xd->xid, xd->version));
86 if (xd->xname) { 86 if (xd->xname) {
87 c->xdatum_mem_usage -= (xd->name_len + 1 + xd->value_len); 87 c->xdatum_mem_usage -= (xd->name_len + 1 + xd->value_len);
88 kfree(xd->xname); 88 kfree(xd->xname);
@@ -592,7 +592,7 @@ void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache
592 When an inode with XATTR is removed, those XATTRs must be removed. */ 592 When an inode with XATTR is removed, those XATTRs must be removed. */
593 struct jffs2_xattr_ref *ref, *_ref; 593 struct jffs2_xattr_ref *ref, *_ref;
594 594
595 if (!ic || ic->nlink > 0) 595 if (!ic || ic->pino_nlink > 0)
596 return; 596 return;
597 597
598 down_write(&c->xattr_sem); 598 down_write(&c->xattr_sem);
@@ -829,7 +829,7 @@ void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c)
829 ref->xd and ref->ic are not valid yet. */ 829 ref->xd and ref->ic are not valid yet. */
830 xd = jffs2_find_xattr_datum(c, ref->xid); 830 xd = jffs2_find_xattr_datum(c, ref->xid);
831 ic = jffs2_get_ino_cache(c, ref->ino); 831 ic = jffs2_get_ino_cache(c, ref->ino);
832 if (!xd || !ic || !ic->nlink) { 832 if (!xd || !ic || !ic->pino_nlink) {
833 dbg_xattr("xref(ino=%u, xid=%u, xseqno=%u) is orphan.\n", 833 dbg_xattr("xref(ino=%u, xid=%u, xseqno=%u) is orphan.\n",
834 ref->ino, ref->xid, ref->xseqno); 834 ref->ino, ref->xid, ref->xseqno);
835 ref->xseqno |= XREF_DELETE_MARKER; 835 ref->xseqno |= XREF_DELETE_MARKER;
@@ -1252,7 +1252,7 @@ int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_
1252 rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XREF_SIZE); 1252 rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XREF_SIZE);
1253 if (rc) { 1253 if (rc) {
1254 JFFS2_WARNING("%s: jffs2_reserve_space_gc() = %d, request = %u\n", 1254 JFFS2_WARNING("%s: jffs2_reserve_space_gc() = %d, request = %u\n",
1255 __FUNCTION__, rc, totlen); 1255 __func__, rc, totlen);
1256 rc = rc ? rc : -EBADFD; 1256 rc = rc ? rc : -EBADFD;
1257 goto out; 1257 goto out;
1258 } 1258 }
diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c
index 887f5759e536..bf6ab19b86ee 100644
--- a/fs/jfs/jfs_debug.c
+++ b/fs/jfs/jfs_debug.c
@@ -89,7 +89,7 @@ void jfs_proc_init(void)
89{ 89{
90 int i; 90 int i;
91 91
92 if (!(base = proc_mkdir("jfs", proc_root_fs))) 92 if (!(base = proc_mkdir("fs/jfs", NULL)))
93 return; 93 return;
94 base->owner = THIS_MODULE; 94 base->owner = THIS_MODULE;
95 95
@@ -109,7 +109,7 @@ void jfs_proc_clean(void)
109 if (base) { 109 if (base) {
110 for (i = 0; i < NPROCENT; i++) 110 for (i = 0; i < NPROCENT; i++)
111 remove_proc_entry(Entries[i].name, base); 111 remove_proc_entry(Entries[i].name, base);
112 remove_proc_entry("jfs", proc_root_fs); 112 remove_proc_entry("fs/jfs", NULL);
113 } 113 }
114} 114}
115 115
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index b6b74a60e1eb..5df517b81f3f 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -155,8 +155,6 @@ static void nlmclnt_release_lockargs(struct nlm_rqst *req)
155int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl) 155int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl)
156{ 156{
157 struct nlm_rqst *call; 157 struct nlm_rqst *call;
158 sigset_t oldset;
159 unsigned long flags;
160 int status; 158 int status;
161 159
162 nlm_get_host(host); 160 nlm_get_host(host);
@@ -168,22 +166,6 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl)
168 /* Set up the argument struct */ 166 /* Set up the argument struct */
169 nlmclnt_setlockargs(call, fl); 167 nlmclnt_setlockargs(call, fl);
170 168
171 /* Keep the old signal mask */
172 spin_lock_irqsave(&current->sighand->siglock, flags);
173 oldset = current->blocked;
174
175 /* If we're cleaning up locks because the process is exiting,
176 * perform the RPC call asynchronously. */
177 if ((IS_SETLK(cmd) || IS_SETLKW(cmd))
178 && fl->fl_type == F_UNLCK
179 && (current->flags & PF_EXITING)) {
180 sigfillset(&current->blocked); /* Mask all signals */
181 recalc_sigpending();
182
183 call->a_flags = RPC_TASK_ASYNC;
184 }
185 spin_unlock_irqrestore(&current->sighand->siglock, flags);
186
187 if (IS_SETLK(cmd) || IS_SETLKW(cmd)) { 169 if (IS_SETLK(cmd) || IS_SETLKW(cmd)) {
188 if (fl->fl_type != F_UNLCK) { 170 if (fl->fl_type != F_UNLCK) {
189 call->a_args.block = IS_SETLKW(cmd) ? 1 : 0; 171 call->a_args.block = IS_SETLKW(cmd) ? 1 : 0;
@@ -198,11 +180,6 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl)
198 fl->fl_ops->fl_release_private(fl); 180 fl->fl_ops->fl_release_private(fl);
199 fl->fl_ops = NULL; 181 fl->fl_ops = NULL;
200 182
201 spin_lock_irqsave(&current->sighand->siglock, flags);
202 current->blocked = oldset;
203 recalc_sigpending();
204 spin_unlock_irqrestore(&current->sighand->siglock, flags);
205
206 dprintk("lockd: clnt proc returns %d\n", status); 183 dprintk("lockd: clnt proc returns %d\n", status);
207 return status; 184 return status;
208} 185}
@@ -221,6 +198,7 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
221 for(;;) { 198 for(;;) {
222 call = kzalloc(sizeof(*call), GFP_KERNEL); 199 call = kzalloc(sizeof(*call), GFP_KERNEL);
223 if (call != NULL) { 200 if (call != NULL) {
201 atomic_set(&call->a_count, 1);
224 locks_init_lock(&call->a_args.lock.fl); 202 locks_init_lock(&call->a_args.lock.fl);
225 locks_init_lock(&call->a_res.lock.fl); 203 locks_init_lock(&call->a_res.lock.fl);
226 call->a_host = host; 204 call->a_host = host;
@@ -237,6 +215,8 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
237 215
238void nlm_release_call(struct nlm_rqst *call) 216void nlm_release_call(struct nlm_rqst *call)
239{ 217{
218 if (!atomic_dec_and_test(&call->a_count))
219 return;
240 nlm_release_host(call->a_host); 220 nlm_release_host(call->a_host);
241 nlmclnt_release_lockargs(call); 221 nlmclnt_release_lockargs(call);
242 kfree(call); 222 kfree(call);
@@ -267,7 +247,7 @@ static int nlm_wait_on_grace(wait_queue_head_t *queue)
267 * Generic NLM call 247 * Generic NLM call
268 */ 248 */
269static int 249static int
270nlmclnt_call(struct nlm_rqst *req, u32 proc) 250nlmclnt_call(struct rpc_cred *cred, struct nlm_rqst *req, u32 proc)
271{ 251{
272 struct nlm_host *host = req->a_host; 252 struct nlm_host *host = req->a_host;
273 struct rpc_clnt *clnt; 253 struct rpc_clnt *clnt;
@@ -276,6 +256,7 @@ nlmclnt_call(struct nlm_rqst *req, u32 proc)
276 struct rpc_message msg = { 256 struct rpc_message msg = {
277 .rpc_argp = argp, 257 .rpc_argp = argp,
278 .rpc_resp = resp, 258 .rpc_resp = resp,
259 .rpc_cred = cred,
279 }; 260 };
280 int status; 261 int status;
281 262
@@ -343,10 +324,16 @@ in_grace_period:
343/* 324/*
344 * Generic NLM call, async version. 325 * Generic NLM call, async version.
345 */ 326 */
346static int __nlm_async_call(struct nlm_rqst *req, u32 proc, struct rpc_message *msg, const struct rpc_call_ops *tk_ops) 327static struct rpc_task *__nlm_async_call(struct nlm_rqst *req, u32 proc, struct rpc_message *msg, const struct rpc_call_ops *tk_ops)
347{ 328{
348 struct nlm_host *host = req->a_host; 329 struct nlm_host *host = req->a_host;
349 struct rpc_clnt *clnt; 330 struct rpc_clnt *clnt;
331 struct rpc_task_setup task_setup_data = {
332 .rpc_message = msg,
333 .callback_ops = tk_ops,
334 .callback_data = req,
335 .flags = RPC_TASK_ASYNC,
336 };
350 337
351 dprintk("lockd: call procedure %d on %s (async)\n", 338 dprintk("lockd: call procedure %d on %s (async)\n",
352 (int)proc, host->h_name); 339 (int)proc, host->h_name);
@@ -356,21 +343,36 @@ static int __nlm_async_call(struct nlm_rqst *req, u32 proc, struct rpc_message *
356 if (clnt == NULL) 343 if (clnt == NULL)
357 goto out_err; 344 goto out_err;
358 msg->rpc_proc = &clnt->cl_procinfo[proc]; 345 msg->rpc_proc = &clnt->cl_procinfo[proc];
346 task_setup_data.rpc_client = clnt;
359 347
360 /* bootstrap and kick off the async RPC call */ 348 /* bootstrap and kick off the async RPC call */
361 return rpc_call_async(clnt, msg, RPC_TASK_ASYNC, tk_ops, req); 349 return rpc_run_task(&task_setup_data);
362out_err: 350out_err:
363 tk_ops->rpc_release(req); 351 tk_ops->rpc_release(req);
364 return -ENOLCK; 352 return ERR_PTR(-ENOLCK);
365} 353}
366 354
355static int nlm_do_async_call(struct nlm_rqst *req, u32 proc, struct rpc_message *msg, const struct rpc_call_ops *tk_ops)
356{
357 struct rpc_task *task;
358
359 task = __nlm_async_call(req, proc, msg, tk_ops);
360 if (IS_ERR(task))
361 return PTR_ERR(task);
362 rpc_put_task(task);
363 return 0;
364}
365
366/*
367 * NLM asynchronous call.
368 */
367int nlm_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops) 369int nlm_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
368{ 370{
369 struct rpc_message msg = { 371 struct rpc_message msg = {
370 .rpc_argp = &req->a_args, 372 .rpc_argp = &req->a_args,
371 .rpc_resp = &req->a_res, 373 .rpc_resp = &req->a_res,
372 }; 374 };
373 return __nlm_async_call(req, proc, &msg, tk_ops); 375 return nlm_do_async_call(req, proc, &msg, tk_ops);
374} 376}
375 377
376int nlm_async_reply(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops) 378int nlm_async_reply(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
@@ -378,7 +380,33 @@ int nlm_async_reply(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *t
378 struct rpc_message msg = { 380 struct rpc_message msg = {
379 .rpc_argp = &req->a_res, 381 .rpc_argp = &req->a_res,
380 }; 382 };
381 return __nlm_async_call(req, proc, &msg, tk_ops); 383 return nlm_do_async_call(req, proc, &msg, tk_ops);
384}
385
386/*
387 * NLM client asynchronous call.
388 *
389 * Note that although the calls are asynchronous, and are therefore
390 * guaranteed to complete, we still always attempt to wait for
391 * completion in order to be able to correctly track the lock
392 * state.
393 */
394static int nlmclnt_async_call(struct rpc_cred *cred, struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
395{
396 struct rpc_message msg = {
397 .rpc_argp = &req->a_args,
398 .rpc_resp = &req->a_res,
399 .rpc_cred = cred,
400 };
401 struct rpc_task *task;
402 int err;
403
404 task = __nlm_async_call(req, proc, &msg, tk_ops);
405 if (IS_ERR(task))
406 return PTR_ERR(task);
407 err = rpc_wait_for_completion_task(task);
408 rpc_put_task(task);
409 return err;
382} 410}
383 411
384/* 412/*
@@ -389,7 +417,7 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl)
389{ 417{
390 int status; 418 int status;
391 419
392 status = nlmclnt_call(req, NLMPROC_TEST); 420 status = nlmclnt_call(nfs_file_cred(fl->fl_file), req, NLMPROC_TEST);
393 if (status < 0) 421 if (status < 0)
394 goto out; 422 goto out;
395 423
@@ -480,10 +508,12 @@ static int do_vfs_lock(struct file_lock *fl)
480static int 508static int
481nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) 509nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
482{ 510{
511 struct rpc_cred *cred = nfs_file_cred(fl->fl_file);
483 struct nlm_host *host = req->a_host; 512 struct nlm_host *host = req->a_host;
484 struct nlm_res *resp = &req->a_res; 513 struct nlm_res *resp = &req->a_res;
485 struct nlm_wait *block = NULL; 514 struct nlm_wait *block = NULL;
486 unsigned char fl_flags = fl->fl_flags; 515 unsigned char fl_flags = fl->fl_flags;
516 unsigned char fl_type;
487 int status = -ENOLCK; 517 int status = -ENOLCK;
488 518
489 if (nsm_monitor(host) < 0) { 519 if (nsm_monitor(host) < 0) {
@@ -493,18 +523,22 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
493 } 523 }
494 fl->fl_flags |= FL_ACCESS; 524 fl->fl_flags |= FL_ACCESS;
495 status = do_vfs_lock(fl); 525 status = do_vfs_lock(fl);
526 fl->fl_flags = fl_flags;
496 if (status < 0) 527 if (status < 0)
497 goto out; 528 goto out;
498 529
499 block = nlmclnt_prepare_block(host, fl); 530 block = nlmclnt_prepare_block(host, fl);
500again: 531again:
532 /*
533 * Initialise resp->status to a valid non-zero value,
534 * since 0 == nlm_lck_granted
535 */
536 resp->status = nlm_lck_blocked;
501 for(;;) { 537 for(;;) {
502 /* Reboot protection */ 538 /* Reboot protection */
503 fl->fl_u.nfs_fl.state = host->h_state; 539 fl->fl_u.nfs_fl.state = host->h_state;
504 status = nlmclnt_call(req, NLMPROC_LOCK); 540 status = nlmclnt_call(cred, req, NLMPROC_LOCK);
505 if (status < 0) 541 if (status < 0)
506 goto out_unblock;
507 if (!req->a_args.block)
508 break; 542 break;
509 /* Did a reclaimer thread notify us of a server reboot? */ 543 /* Did a reclaimer thread notify us of a server reboot? */
510 if (resp->status == nlm_lck_denied_grace_period) 544 if (resp->status == nlm_lck_denied_grace_period)
@@ -513,15 +547,22 @@ again:
513 break; 547 break;
514 /* Wait on an NLM blocking lock */ 548 /* Wait on an NLM blocking lock */
515 status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT); 549 status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT);
516 /* if we were interrupted. Send a CANCEL request to the server
517 * and exit
518 */
519 if (status < 0) 550 if (status < 0)
520 goto out_unblock; 551 break;
521 if (resp->status != nlm_lck_blocked) 552 if (resp->status != nlm_lck_blocked)
522 break; 553 break;
523 } 554 }
524 555
556 /* if we were interrupted while blocking, then cancel the lock request
557 * and exit
558 */
559 if (resp->status == nlm_lck_blocked) {
560 if (!req->a_args.block)
561 goto out_unlock;
562 if (nlmclnt_cancel(host, req->a_args.block, fl) == 0)
563 goto out_unblock;
564 }
565
525 if (resp->status == nlm_granted) { 566 if (resp->status == nlm_granted) {
526 down_read(&host->h_rwsem); 567 down_read(&host->h_rwsem);
527 /* Check whether or not the server has rebooted */ 568 /* Check whether or not the server has rebooted */
@@ -530,20 +571,34 @@ again:
530 goto again; 571 goto again;
531 } 572 }
532 /* Ensure the resulting lock will get added to granted list */ 573 /* Ensure the resulting lock will get added to granted list */
533 fl->fl_flags = fl_flags | FL_SLEEP; 574 fl->fl_flags |= FL_SLEEP;
534 if (do_vfs_lock(fl) < 0) 575 if (do_vfs_lock(fl) < 0)
535 printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__); 576 printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __func__);
536 up_read(&host->h_rwsem); 577 up_read(&host->h_rwsem);
578 fl->fl_flags = fl_flags;
579 status = 0;
537 } 580 }
581 if (status < 0)
582 goto out_unlock;
538 status = nlm_stat_to_errno(resp->status); 583 status = nlm_stat_to_errno(resp->status);
539out_unblock: 584out_unblock:
540 nlmclnt_finish_block(block); 585 nlmclnt_finish_block(block);
541 /* Cancel the blocked request if it is still pending */
542 if (resp->status == nlm_lck_blocked)
543 nlmclnt_cancel(host, req->a_args.block, fl);
544out: 586out:
545 nlm_release_call(req); 587 nlm_release_call(req);
588 return status;
589out_unlock:
590 /* Fatal error: ensure that we remove the lock altogether */
591 dprintk("lockd: lock attempt ended in fatal error.\n"
592 " Attempting to unlock.\n");
593 nlmclnt_finish_block(block);
594 fl_type = fl->fl_type;
595 fl->fl_type = F_UNLCK;
596 down_read(&host->h_rwsem);
597 do_vfs_lock(fl);
598 up_read(&host->h_rwsem);
599 fl->fl_type = fl_type;
546 fl->fl_flags = fl_flags; 600 fl->fl_flags = fl_flags;
601 nlmclnt_async_call(cred, req, NLMPROC_UNLOCK, &nlmclnt_unlock_ops);
547 return status; 602 return status;
548} 603}
549 604
@@ -567,8 +622,8 @@ nlmclnt_reclaim(struct nlm_host *host, struct file_lock *fl)
567 nlmclnt_setlockargs(req, fl); 622 nlmclnt_setlockargs(req, fl);
568 req->a_args.reclaim = 1; 623 req->a_args.reclaim = 1;
569 624
570 if ((status = nlmclnt_call(req, NLMPROC_LOCK)) >= 0 625 status = nlmclnt_call(nfs_file_cred(fl->fl_file), req, NLMPROC_LOCK);
571 && req->a_res.status == nlm_granted) 626 if (status >= 0 && req->a_res.status == nlm_granted)
572 return 0; 627 return 0;
573 628
574 printk(KERN_WARNING "lockd: failed to reclaim lock for pid %d " 629 printk(KERN_WARNING "lockd: failed to reclaim lock for pid %d "
@@ -598,7 +653,8 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
598{ 653{
599 struct nlm_host *host = req->a_host; 654 struct nlm_host *host = req->a_host;
600 struct nlm_res *resp = &req->a_res; 655 struct nlm_res *resp = &req->a_res;
601 int status = 0; 656 int status;
657 unsigned char fl_flags = fl->fl_flags;
602 658
603 /* 659 /*
604 * Note: the server is supposed to either grant us the unlock 660 * Note: the server is supposed to either grant us the unlock
@@ -607,16 +663,17 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
607 */ 663 */
608 fl->fl_flags |= FL_EXISTS; 664 fl->fl_flags |= FL_EXISTS;
609 down_read(&host->h_rwsem); 665 down_read(&host->h_rwsem);
610 if (do_vfs_lock(fl) == -ENOENT) { 666 status = do_vfs_lock(fl);
611 up_read(&host->h_rwsem); 667 up_read(&host->h_rwsem);
668 fl->fl_flags = fl_flags;
669 if (status == -ENOENT) {
670 status = 0;
612 goto out; 671 goto out;
613 } 672 }
614 up_read(&host->h_rwsem);
615
616 if (req->a_flags & RPC_TASK_ASYNC)
617 return nlm_async_call(req, NLMPROC_UNLOCK, &nlmclnt_unlock_ops);
618 673
619 status = nlmclnt_call(req, NLMPROC_UNLOCK); 674 atomic_inc(&req->a_count);
675 status = nlmclnt_async_call(nfs_file_cred(fl->fl_file), req,
676 NLMPROC_UNLOCK, &nlmclnt_unlock_ops);
620 if (status < 0) 677 if (status < 0)
621 goto out; 678 goto out;
622 679
@@ -671,16 +728,10 @@ static const struct rpc_call_ops nlmclnt_unlock_ops = {
671static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl) 728static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl)
672{ 729{
673 struct nlm_rqst *req; 730 struct nlm_rqst *req;
674 unsigned long flags; 731 int status;
675 sigset_t oldset;
676 int status;
677 732
678 /* Block all signals while setting up call */ 733 dprintk("lockd: blocking lock attempt was interrupted by a signal.\n"
679 spin_lock_irqsave(&current->sighand->siglock, flags); 734 " Attempting to cancel lock.\n");
680 oldset = current->blocked;
681 sigfillset(&current->blocked);
682 recalc_sigpending();
683 spin_unlock_irqrestore(&current->sighand->siglock, flags);
684 735
685 req = nlm_alloc_call(nlm_get_host(host)); 736 req = nlm_alloc_call(nlm_get_host(host));
686 if (!req) 737 if (!req)
@@ -690,13 +741,12 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl
690 nlmclnt_setlockargs(req, fl); 741 nlmclnt_setlockargs(req, fl);
691 req->a_args.block = block; 742 req->a_args.block = block;
692 743
693 status = nlm_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops); 744 atomic_inc(&req->a_count);
694 745 status = nlmclnt_async_call(nfs_file_cred(fl->fl_file), req,
695 spin_lock_irqsave(&current->sighand->siglock, flags); 746 NLMPROC_CANCEL, &nlmclnt_cancel_ops);
696 current->blocked = oldset; 747 if (status == 0 && req->a_res.status == nlm_lck_denied)
697 recalc_sigpending(); 748 status = -ENOLCK;
698 spin_unlock_irqrestore(&current->sighand->siglock, flags); 749 nlm_release_call(req);
699
700 return status; 750 return status;
701} 751}
702 752
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index f1ef49fff118..a17664c7eacc 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -19,12 +19,11 @@
19 19
20 20
21#define NLMDBG_FACILITY NLMDBG_HOSTCACHE 21#define NLMDBG_FACILITY NLMDBG_HOSTCACHE
22#define NLM_HOST_MAX 64
23#define NLM_HOST_NRHASH 32 22#define NLM_HOST_NRHASH 32
24#define NLM_ADDRHASH(addr) (ntohl(addr) & (NLM_HOST_NRHASH-1)) 23#define NLM_ADDRHASH(addr) (ntohl(addr) & (NLM_HOST_NRHASH-1))
25#define NLM_HOST_REBIND (60 * HZ) 24#define NLM_HOST_REBIND (60 * HZ)
26#define NLM_HOST_EXPIRE ((nrhosts > NLM_HOST_MAX)? 300 * HZ : 120 * HZ) 25#define NLM_HOST_EXPIRE (300 * HZ)
27#define NLM_HOST_COLLECT ((nrhosts > NLM_HOST_MAX)? 120 * HZ : 60 * HZ) 26#define NLM_HOST_COLLECT (120 * HZ)
28 27
29static struct hlist_head nlm_hosts[NLM_HOST_NRHASH]; 28static struct hlist_head nlm_hosts[NLM_HOST_NRHASH];
30static unsigned long next_gc; 29static unsigned long next_gc;
@@ -42,11 +41,12 @@ static struct nsm_handle * nsm_find(const struct sockaddr_in *sin,
42/* 41/*
43 * Common host lookup routine for server & client 42 * Common host lookup routine for server & client
44 */ 43 */
45static struct nlm_host * 44static struct nlm_host *nlm_lookup_host(int server,
46nlm_lookup_host(int server, const struct sockaddr_in *sin, 45 const struct sockaddr_in *sin,
47 int proto, int version, const char *hostname, 46 int proto, u32 version,
48 unsigned int hostname_len, 47 const char *hostname,
49 const struct sockaddr_in *ssin) 48 unsigned int hostname_len,
49 const struct sockaddr_in *ssin)
50{ 50{
51 struct hlist_head *chain; 51 struct hlist_head *chain;
52 struct hlist_node *pos; 52 struct hlist_node *pos;
@@ -55,7 +55,7 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
55 int hash; 55 int hash;
56 56
57 dprintk("lockd: nlm_lookup_host("NIPQUAD_FMT"->"NIPQUAD_FMT 57 dprintk("lockd: nlm_lookup_host("NIPQUAD_FMT"->"NIPQUAD_FMT
58 ", p=%d, v=%d, my role=%s, name=%.*s)\n", 58 ", p=%d, v=%u, my role=%s, name=%.*s)\n",
59 NIPQUAD(ssin->sin_addr.s_addr), 59 NIPQUAD(ssin->sin_addr.s_addr),
60 NIPQUAD(sin->sin_addr.s_addr), proto, version, 60 NIPQUAD(sin->sin_addr.s_addr), proto, version,
61 server? "server" : "client", 61 server? "server" : "client",
@@ -142,9 +142,7 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
142 INIT_LIST_HEAD(&host->h_granted); 142 INIT_LIST_HEAD(&host->h_granted);
143 INIT_LIST_HEAD(&host->h_reclaim); 143 INIT_LIST_HEAD(&host->h_reclaim);
144 144
145 if (++nrhosts > NLM_HOST_MAX) 145 nrhosts++;
146 next_gc = 0;
147
148out: 146out:
149 mutex_unlock(&nlm_host_mutex); 147 mutex_unlock(&nlm_host_mutex);
150 return host; 148 return host;
@@ -175,9 +173,10 @@ nlm_destroy_host(struct nlm_host *host)
175/* 173/*
176 * Find an NLM server handle in the cache. If there is none, create it. 174 * Find an NLM server handle in the cache. If there is none, create it.
177 */ 175 */
178struct nlm_host * 176struct nlm_host *nlmclnt_lookup_host(const struct sockaddr_in *sin,
179nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version, 177 int proto, u32 version,
180 const char *hostname, unsigned int hostname_len) 178 const char *hostname,
179 unsigned int hostname_len)
181{ 180{
182 struct sockaddr_in ssin = {0}; 181 struct sockaddr_in ssin = {0};
183 182
@@ -460,7 +459,7 @@ nlm_gc_hosts(void)
460 * Manage NSM handles 459 * Manage NSM handles
461 */ 460 */
462static LIST_HEAD(nsm_handles); 461static LIST_HEAD(nsm_handles);
463static DEFINE_MUTEX(nsm_mutex); 462static DEFINE_SPINLOCK(nsm_lock);
464 463
465static struct nsm_handle * 464static struct nsm_handle *
466__nsm_find(const struct sockaddr_in *sin, 465__nsm_find(const struct sockaddr_in *sin,
@@ -468,7 +467,7 @@ __nsm_find(const struct sockaddr_in *sin,
468 int create) 467 int create)
469{ 468{
470 struct nsm_handle *nsm = NULL; 469 struct nsm_handle *nsm = NULL;
471 struct list_head *pos; 470 struct nsm_handle *pos;
472 471
473 if (!sin) 472 if (!sin)
474 return NULL; 473 return NULL;
@@ -482,38 +481,43 @@ __nsm_find(const struct sockaddr_in *sin,
482 return NULL; 481 return NULL;
483 } 482 }
484 483
485 mutex_lock(&nsm_mutex); 484retry:
486 list_for_each(pos, &nsm_handles) { 485 spin_lock(&nsm_lock);
487 nsm = list_entry(pos, struct nsm_handle, sm_link); 486 list_for_each_entry(pos, &nsm_handles, sm_link) {
488 487
489 if (hostname && nsm_use_hostnames) { 488 if (hostname && nsm_use_hostnames) {
490 if (strlen(nsm->sm_name) != hostname_len 489 if (strlen(pos->sm_name) != hostname_len
491 || memcmp(nsm->sm_name, hostname, hostname_len)) 490 || memcmp(pos->sm_name, hostname, hostname_len))
492 continue; 491 continue;
493 } else if (!nlm_cmp_addr(&nsm->sm_addr, sin)) 492 } else if (!nlm_cmp_addr(&pos->sm_addr, sin))
494 continue; 493 continue;
495 atomic_inc(&nsm->sm_count); 494 atomic_inc(&pos->sm_count);
496 goto out; 495 kfree(nsm);
496 nsm = pos;
497 goto found;
497 } 498 }
498 499 if (nsm) {
499 if (!create) { 500 list_add(&nsm->sm_link, &nsm_handles);
500 nsm = NULL; 501 goto found;
501 goto out;
502 } 502 }
503 spin_unlock(&nsm_lock);
504
505 if (!create)
506 return NULL;
503 507
504 nsm = kzalloc(sizeof(*nsm) + hostname_len + 1, GFP_KERNEL); 508 nsm = kzalloc(sizeof(*nsm) + hostname_len + 1, GFP_KERNEL);
505 if (nsm != NULL) { 509 if (nsm == NULL)
506 nsm->sm_addr = *sin; 510 return NULL;
507 nsm->sm_name = (char *) (nsm + 1);
508 memcpy(nsm->sm_name, hostname, hostname_len);
509 nsm->sm_name[hostname_len] = '\0';
510 atomic_set(&nsm->sm_count, 1);
511 511
512 list_add(&nsm->sm_link, &nsm_handles); 512 nsm->sm_addr = *sin;
513 } 513 nsm->sm_name = (char *) (nsm + 1);
514 memcpy(nsm->sm_name, hostname, hostname_len);
515 nsm->sm_name[hostname_len] = '\0';
516 atomic_set(&nsm->sm_count, 1);
517 goto retry;
514 518
515out: 519found:
516 mutex_unlock(&nsm_mutex); 520 spin_unlock(&nsm_lock);
517 return nsm; 521 return nsm;
518} 522}
519 523
@@ -532,12 +536,9 @@ nsm_release(struct nsm_handle *nsm)
532{ 536{
533 if (!nsm) 537 if (!nsm)
534 return; 538 return;
535 if (atomic_dec_and_test(&nsm->sm_count)) { 539 if (atomic_dec_and_lock(&nsm->sm_count, &nsm_lock)) {
536 mutex_lock(&nsm_mutex); 540 list_del(&nsm->sm_link);
537 if (atomic_read(&nsm->sm_count) == 0) { 541 spin_unlock(&nsm_lock);
538 list_del(&nsm->sm_link); 542 kfree(nsm);
539 kfree(nsm);
540 }
541 mutex_unlock(&nsm_mutex);
542 } 543 }
543} 544}
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 908b23fadd05..e4d563543b11 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -18,6 +18,8 @@
18 18
19#define NLMDBG_FACILITY NLMDBG_MONITOR 19#define NLMDBG_FACILITY NLMDBG_MONITOR
20 20
21#define XDR_ADDRBUF_LEN (20)
22
21static struct rpc_clnt * nsm_create(void); 23static struct rpc_clnt * nsm_create(void);
22 24
23static struct rpc_program nsm_program; 25static struct rpc_program nsm_program;
@@ -147,28 +149,55 @@ nsm_create(void)
147 149
148/* 150/*
149 * XDR functions for NSM. 151 * XDR functions for NSM.
152 *
153 * See http://www.opengroup.org/ for details on the Network
154 * Status Monitor wire protocol.
150 */ 155 */
151 156
152static __be32 * 157static __be32 *xdr_encode_nsm_string(__be32 *p, char *string)
153xdr_encode_common(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp)
154{ 158{
155 char buffer[20], *name; 159 size_t len = strlen(string);
156 160
157 /* 161 if (len > SM_MAXSTRLEN)
158 * Use the dotted-quad IP address of the remote host as 162 len = SM_MAXSTRLEN;
159 * identifier. Linux statd always looks up the canonical 163 return xdr_encode_opaque(p, string, len);
160 * hostname first for whatever remote hostname it receives, 164}
161 * so this works alright. 165
162 */ 166/*
163 if (nsm_use_hostnames) { 167 * "mon_name" specifies the host to be monitored.
164 name = argp->mon_name; 168 *
165 } else { 169 * Linux uses a text version of the IP address of the remote
166 sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(argp->addr)); 170 * host as the host identifier (the "mon_name" argument).
171 *
172 * Linux statd always looks up the canonical hostname first for
173 * whatever remote hostname it receives, so this works alright.
174 */
175static __be32 *xdr_encode_mon_name(__be32 *p, struct nsm_args *argp)
176{
177 char buffer[XDR_ADDRBUF_LEN + 1];
178 char *name = argp->mon_name;
179
180 if (!nsm_use_hostnames) {
181 snprintf(buffer, XDR_ADDRBUF_LEN,
182 NIPQUAD_FMT, NIPQUAD(argp->addr));
167 name = buffer; 183 name = buffer;
168 } 184 }
169 if (!(p = xdr_encode_string(p, name)) 185
170 || !(p = xdr_encode_string(p, utsname()->nodename))) 186 return xdr_encode_nsm_string(p, name);
187}
188
189/*
190 * The "my_id" argument specifies the hostname and RPC procedure
191 * to be called when the status manager receives notification
192 * (via the SM_NOTIFY call) that the state of host "mon_name"
193 * has changed.
194 */
195static __be32 *xdr_encode_my_id(__be32 *p, struct nsm_args *argp)
196{
197 p = xdr_encode_nsm_string(p, utsname()->nodename);
198 if (!p)
171 return ERR_PTR(-EIO); 199 return ERR_PTR(-EIO);
200
172 *p++ = htonl(argp->prog); 201 *p++ = htonl(argp->prog);
173 *p++ = htonl(argp->vers); 202 *p++ = htonl(argp->vers);
174 *p++ = htonl(argp->proc); 203 *p++ = htonl(argp->proc);
@@ -176,18 +205,48 @@ xdr_encode_common(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp)
176 return p; 205 return p;
177} 206}
178 207
179static int 208/*
180xdr_encode_mon(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp) 209 * The "mon_id" argument specifies the non-private arguments
210 * of an SM_MON or SM_UNMON call.
211 */
212static __be32 *xdr_encode_mon_id(__be32 *p, struct nsm_args *argp)
181{ 213{
182 p = xdr_encode_common(rqstp, p, argp); 214 p = xdr_encode_mon_name(p, argp);
183 if (IS_ERR(p)) 215 if (!p)
184 return PTR_ERR(p); 216 return ERR_PTR(-EIO);
185 217
186 /* Surprise - there may even be room for an IPv6 address now */ 218 return xdr_encode_my_id(p, argp);
219}
220
221/*
222 * The "priv" argument may contain private information required
223 * by the SM_MON call. This information will be supplied in the
224 * SM_NOTIFY call.
225 *
226 * Linux provides the raw IP address of the monitored host,
227 * left in network byte order.
228 */
229static __be32 *xdr_encode_priv(__be32 *p, struct nsm_args *argp)
230{
187 *p++ = argp->addr; 231 *p++ = argp->addr;
188 *p++ = 0; 232 *p++ = 0;
189 *p++ = 0; 233 *p++ = 0;
190 *p++ = 0; 234 *p++ = 0;
235
236 return p;
237}
238
239static int
240xdr_encode_mon(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp)
241{
242 p = xdr_encode_mon_id(p, argp);
243 if (IS_ERR(p))
244 return PTR_ERR(p);
245
246 p = xdr_encode_priv(p, argp);
247 if (IS_ERR(p))
248 return PTR_ERR(p);
249
191 rqstp->rq_slen = xdr_adjust_iovec(rqstp->rq_svec, p); 250 rqstp->rq_slen = xdr_adjust_iovec(rqstp->rq_svec, p);
192 return 0; 251 return 0;
193} 252}
@@ -195,7 +254,7 @@ xdr_encode_mon(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp)
195static int 254static int
196xdr_encode_unmon(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp) 255xdr_encode_unmon(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp)
197{ 256{
198 p = xdr_encode_common(rqstp, p, argp); 257 p = xdr_encode_mon_id(p, argp);
199 if (IS_ERR(p)) 258 if (IS_ERR(p))
200 return PTR_ERR(p); 259 return PTR_ERR(p);
201 rqstp->rq_slen = xdr_adjust_iovec(rqstp->rq_svec, p); 260 rqstp->rq_slen = xdr_adjust_iovec(rqstp->rq_svec, p);
@@ -220,9 +279,11 @@ xdr_decode_stat(struct rpc_rqst *rqstp, __be32 *p, struct nsm_res *resp)
220} 279}
221 280
222#define SM_my_name_sz (1+XDR_QUADLEN(SM_MAXSTRLEN)) 281#define SM_my_name_sz (1+XDR_QUADLEN(SM_MAXSTRLEN))
223#define SM_my_id_sz (3+1+SM_my_name_sz) 282#define SM_my_id_sz (SM_my_name_sz+3)
224#define SM_mon_id_sz (1+XDR_QUADLEN(20)+SM_my_id_sz) 283#define SM_mon_name_sz (1+XDR_QUADLEN(SM_MAXSTRLEN))
225#define SM_mon_sz (SM_mon_id_sz+4) 284#define SM_mon_id_sz (SM_mon_name_sz+SM_my_id_sz)
285#define SM_priv_sz (XDR_QUADLEN(SM_PRIV_SIZE))
286#define SM_mon_sz (SM_mon_id_sz+SM_priv_sz)
226#define SM_monres_sz 2 287#define SM_monres_sz 2
227#define SM_unmonres_sz 1 288#define SM_unmonres_sz 1
228 289
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 1ed8bd4de941..2169af4d5455 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -25,6 +25,7 @@
25#include <linux/smp.h> 25#include <linux/smp.h>
26#include <linux/smp_lock.h> 26#include <linux/smp_lock.h>
27#include <linux/mutex.h> 27#include <linux/mutex.h>
28#include <linux/kthread.h>
28#include <linux/freezer.h> 29#include <linux/freezer.h>
29 30
30#include <linux/sunrpc/types.h> 31#include <linux/sunrpc/types.h>
@@ -48,14 +49,11 @@ EXPORT_SYMBOL(nlmsvc_ops);
48 49
49static DEFINE_MUTEX(nlmsvc_mutex); 50static DEFINE_MUTEX(nlmsvc_mutex);
50static unsigned int nlmsvc_users; 51static unsigned int nlmsvc_users;
51static pid_t nlmsvc_pid; 52static struct task_struct *nlmsvc_task;
52static struct svc_serv *nlmsvc_serv; 53static struct svc_serv *nlmsvc_serv;
53int nlmsvc_grace_period; 54int nlmsvc_grace_period;
54unsigned long nlmsvc_timeout; 55unsigned long nlmsvc_timeout;
55 56
56static DECLARE_COMPLETION(lockd_start_done);
57static DECLARE_WAIT_QUEUE_HEAD(lockd_exit);
58
59/* 57/*
60 * These can be set at insmod time (useful for NFS as root filesystem), 58 * These can be set at insmod time (useful for NFS as root filesystem),
61 * and also changed through the sysctl interface. -- Jamie Lokier, Aug 2003 59 * and also changed through the sysctl interface. -- Jamie Lokier, Aug 2003
@@ -74,7 +72,9 @@ static const unsigned long nlm_timeout_min = 3;
74static const unsigned long nlm_timeout_max = 20; 72static const unsigned long nlm_timeout_max = 20;
75static const int nlm_port_min = 0, nlm_port_max = 65535; 73static const int nlm_port_min = 0, nlm_port_max = 65535;
76 74
75#ifdef CONFIG_SYSCTL
77static struct ctl_table_header * nlm_sysctl_table; 76static struct ctl_table_header * nlm_sysctl_table;
77#endif
78 78
79static unsigned long get_lockd_grace_period(void) 79static unsigned long get_lockd_grace_period(void)
80{ 80{
@@ -111,35 +111,30 @@ static inline void clear_grace_period(void)
111/* 111/*
112 * This is the lockd kernel thread 112 * This is the lockd kernel thread
113 */ 113 */
114static void 114static int
115lockd(struct svc_rqst *rqstp) 115lockd(void *vrqstp)
116{ 116{
117 int err = 0; 117 int err = 0, preverr = 0;
118 struct svc_rqst *rqstp = vrqstp;
118 unsigned long grace_period_expire; 119 unsigned long grace_period_expire;
119 120
120 /* Lock module and set up kernel thread */ 121 /* try_to_freeze() is called from svc_recv() */
121 /* lockd_up is waiting for us to startup, so will
122 * be holding a reference to this module, so it
123 * is safe to just claim another reference
124 */
125 __module_get(THIS_MODULE);
126 lock_kernel();
127
128 /*
129 * Let our maker know we're running.
130 */
131 nlmsvc_pid = current->pid;
132 nlmsvc_serv = rqstp->rq_server;
133 complete(&lockd_start_done);
134
135 daemonize("lockd");
136 set_freezable(); 122 set_freezable();
137 123
138 /* Process request with signals blocked, but allow SIGKILL. */ 124 /* Allow SIGKILL to tell lockd to drop all of its locks */
139 allow_signal(SIGKILL); 125 allow_signal(SIGKILL);
140 126
141 dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n"); 127 dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n");
142 128
129 /*
130 * FIXME: it would be nice if lockd didn't spend its entire life
131 * running under the BKL. At the very least, it would be good to
132 * have someone clarify what it's intended to protect here. I've
133 * seen some handwavy posts about posix locking needing to be
134 * done under the BKL, but it's far from clear.
135 */
136 lock_kernel();
137
143 if (!nlm_timeout) 138 if (!nlm_timeout)
144 nlm_timeout = LOCKD_DFLT_TIMEO; 139 nlm_timeout = LOCKD_DFLT_TIMEO;
145 nlmsvc_timeout = nlm_timeout * HZ; 140 nlmsvc_timeout = nlm_timeout * HZ;
@@ -148,10 +143,9 @@ lockd(struct svc_rqst *rqstp)
148 143
149 /* 144 /*
150 * The main request loop. We don't terminate until the last 145 * The main request loop. We don't terminate until the last
151 * NFS mount or NFS daemon has gone away, and we've been sent a 146 * NFS mount or NFS daemon has gone away.
152 * signal, or else another process has taken over our job.
153 */ 147 */
154 while ((nlmsvc_users || !signalled()) && nlmsvc_pid == current->pid) { 148 while (!kthread_should_stop()) {
155 long timeout = MAX_SCHEDULE_TIMEOUT; 149 long timeout = MAX_SCHEDULE_TIMEOUT;
156 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); 150 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
157 151
@@ -161,6 +155,7 @@ lockd(struct svc_rqst *rqstp)
161 nlmsvc_invalidate_all(); 155 nlmsvc_invalidate_all();
162 grace_period_expire = set_grace_period(); 156 grace_period_expire = set_grace_period();
163 } 157 }
158 continue;
164 } 159 }
165 160
166 /* 161 /*
@@ -179,14 +174,20 @@ lockd(struct svc_rqst *rqstp)
179 * recvfrom routine. 174 * recvfrom routine.
180 */ 175 */
181 err = svc_recv(rqstp, timeout); 176 err = svc_recv(rqstp, timeout);
182 if (err == -EAGAIN || err == -EINTR) 177 if (err == -EAGAIN || err == -EINTR) {
178 preverr = err;
183 continue; 179 continue;
180 }
184 if (err < 0) { 181 if (err < 0) {
185 printk(KERN_WARNING 182 if (err != preverr) {
186 "lockd: terminating on error %d\n", 183 printk(KERN_WARNING "%s: unexpected error "
187 -err); 184 "from svc_recv (%d)\n", __func__, err);
188 break; 185 preverr = err;
186 }
187 schedule_timeout_interruptible(HZ);
188 continue;
189 } 189 }
190 preverr = err;
190 191
191 dprintk("lockd: request from %s\n", 192 dprintk("lockd: request from %s\n",
192 svc_print_addr(rqstp, buf, sizeof(buf))); 193 svc_print_addr(rqstp, buf, sizeof(buf)));
@@ -195,28 +196,19 @@ lockd(struct svc_rqst *rqstp)
195 } 196 }
196 197
197 flush_signals(current); 198 flush_signals(current);
199 if (nlmsvc_ops)
200 nlmsvc_invalidate_all();
201 nlm_shutdown_hosts();
198 202
199 /* 203 unlock_kernel();
200 * Check whether there's a new lockd process before 204
201 * shutting down the hosts and clearing the slot. 205 nlmsvc_task = NULL;
202 */ 206 nlmsvc_serv = NULL;
203 if (!nlmsvc_pid || current->pid == nlmsvc_pid) {
204 if (nlmsvc_ops)
205 nlmsvc_invalidate_all();
206 nlm_shutdown_hosts();
207 nlmsvc_pid = 0;
208 nlmsvc_serv = NULL;
209 } else
210 printk(KERN_DEBUG
211 "lockd: new process, skipping host shutdown\n");
212 wake_up(&lockd_exit);
213 207
214 /* Exit the RPC thread */ 208 /* Exit the RPC thread */
215 svc_exit_thread(rqstp); 209 svc_exit_thread(rqstp);
216 210
217 /* Release module */ 211 return 0;
218 unlock_kernel();
219 module_put_and_exit(0);
220} 212}
221 213
222/* 214/*
@@ -261,14 +253,15 @@ static int make_socks(struct svc_serv *serv, int proto)
261int 253int
262lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */ 254lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */
263{ 255{
264 struct svc_serv * serv; 256 struct svc_serv *serv;
265 int error = 0; 257 struct svc_rqst *rqstp;
258 int error = 0;
266 259
267 mutex_lock(&nlmsvc_mutex); 260 mutex_lock(&nlmsvc_mutex);
268 /* 261 /*
269 * Check whether we're already up and running. 262 * Check whether we're already up and running.
270 */ 263 */
271 if (nlmsvc_pid) { 264 if (nlmsvc_serv) {
272 if (proto) 265 if (proto)
273 error = make_socks(nlmsvc_serv, proto); 266 error = make_socks(nlmsvc_serv, proto);
274 goto out; 267 goto out;
@@ -295,13 +288,28 @@ lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */
295 /* 288 /*
296 * Create the kernel thread and wait for it to start. 289 * Create the kernel thread and wait for it to start.
297 */ 290 */
298 error = svc_create_thread(lockd, serv); 291 rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]);
299 if (error) { 292 if (IS_ERR(rqstp)) {
293 error = PTR_ERR(rqstp);
294 printk(KERN_WARNING
295 "lockd_up: svc_rqst allocation failed, error=%d\n",
296 error);
297 goto destroy_and_out;
298 }
299
300 svc_sock_update_bufs(serv);
301 nlmsvc_serv = rqstp->rq_server;
302
303 nlmsvc_task = kthread_run(lockd, rqstp, serv->sv_name);
304 if (IS_ERR(nlmsvc_task)) {
305 error = PTR_ERR(nlmsvc_task);
306 nlmsvc_task = NULL;
307 nlmsvc_serv = NULL;
300 printk(KERN_WARNING 308 printk(KERN_WARNING
301 "lockd_up: create thread failed, error=%d\n", error); 309 "lockd_up: kthread_run failed, error=%d\n", error);
310 svc_exit_thread(rqstp);
302 goto destroy_and_out; 311 goto destroy_and_out;
303 } 312 }
304 wait_for_completion(&lockd_start_done);
305 313
306 /* 314 /*
307 * Note: svc_serv structures have an initial use count of 1, 315 * Note: svc_serv structures have an initial use count of 1,
@@ -323,42 +331,28 @@ EXPORT_SYMBOL(lockd_up);
323void 331void
324lockd_down(void) 332lockd_down(void)
325{ 333{
326 static int warned;
327
328 mutex_lock(&nlmsvc_mutex); 334 mutex_lock(&nlmsvc_mutex);
329 if (nlmsvc_users) { 335 if (nlmsvc_users) {
330 if (--nlmsvc_users) 336 if (--nlmsvc_users)
331 goto out; 337 goto out;
332 } else 338 } else {
333 printk(KERN_WARNING "lockd_down: no users! pid=%d\n", nlmsvc_pid); 339 printk(KERN_ERR "lockd_down: no users! task=%p\n",
334 340 nlmsvc_task);
335 if (!nlmsvc_pid) { 341 BUG();
336 if (warned++ == 0)
337 printk(KERN_WARNING "lockd_down: no lockd running.\n");
338 goto out;
339 } 342 }
340 warned = 0;
341 343
342 kill_proc(nlmsvc_pid, SIGKILL, 1); 344 if (!nlmsvc_task) {
343 /* 345 printk(KERN_ERR "lockd_down: no lockd running.\n");
344 * Wait for the lockd process to exit, but since we're holding 346 BUG();
345 * the lockd semaphore, we can't wait around forever ...
346 */
347 clear_thread_flag(TIF_SIGPENDING);
348 interruptible_sleep_on_timeout(&lockd_exit, HZ);
349 if (nlmsvc_pid) {
350 printk(KERN_WARNING
351 "lockd_down: lockd failed to exit, clearing pid\n");
352 nlmsvc_pid = 0;
353 } 347 }
354 spin_lock_irq(&current->sighand->siglock); 348 kthread_stop(nlmsvc_task);
355 recalc_sigpending();
356 spin_unlock_irq(&current->sighand->siglock);
357out: 349out:
358 mutex_unlock(&nlmsvc_mutex); 350 mutex_unlock(&nlmsvc_mutex);
359} 351}
360EXPORT_SYMBOL(lockd_down); 352EXPORT_SYMBOL(lockd_down);
361 353
354#ifdef CONFIG_SYSCTL
355
362/* 356/*
363 * Sysctl parameters (same as module parameters, different interface). 357 * Sysctl parameters (same as module parameters, different interface).
364 */ 358 */
@@ -443,6 +437,8 @@ static ctl_table nlm_sysctl_root[] = {
443 { .ctl_name = 0 } 437 { .ctl_name = 0 }
444}; 438};
445 439
440#endif /* CONFIG_SYSCTL */
441
446/* 442/*
447 * Module (and sysfs) parameters. 443 * Module (and sysfs) parameters.
448 */ 444 */
@@ -516,15 +512,21 @@ module_param(nsm_use_hostnames, bool, 0644);
516 512
517static int __init init_nlm(void) 513static int __init init_nlm(void)
518{ 514{
515#ifdef CONFIG_SYSCTL
519 nlm_sysctl_table = register_sysctl_table(nlm_sysctl_root); 516 nlm_sysctl_table = register_sysctl_table(nlm_sysctl_root);
520 return nlm_sysctl_table ? 0 : -ENOMEM; 517 return nlm_sysctl_table ? 0 : -ENOMEM;
518#else
519 return 0;
520#endif
521} 521}
522 522
523static void __exit exit_nlm(void) 523static void __exit exit_nlm(void)
524{ 524{
525 /* FIXME: delete all NLM clients */ 525 /* FIXME: delete all NLM clients */
526 nlm_shutdown_hosts(); 526 nlm_shutdown_hosts();
527#ifdef CONFIG_SYSCTL
527 unregister_sysctl_table(nlm_sysctl_table); 528 unregister_sysctl_table(nlm_sysctl_table);
529#endif
528} 530}
529 531
530module_init(init_nlm); 532module_init(init_nlm);
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index fe9bdb4a220c..81aca859bfde 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -29,6 +29,7 @@
29#include <linux/sunrpc/svc.h> 29#include <linux/sunrpc/svc.h>
30#include <linux/lockd/nlm.h> 30#include <linux/lockd/nlm.h>
31#include <linux/lockd/lockd.h> 31#include <linux/lockd/lockd.h>
32#include <linux/kthread.h>
32 33
33#define NLMDBG_FACILITY NLMDBG_SVCLOCK 34#define NLMDBG_FACILITY NLMDBG_SVCLOCK
34 35
@@ -226,8 +227,7 @@ failed:
226} 227}
227 228
228/* 229/*
229 * Delete a block. If the lock was cancelled or the grant callback 230 * Delete a block.
230 * failed, unlock is set to 1.
231 * It is the caller's responsibility to check whether the file 231 * It is the caller's responsibility to check whether the file
232 * can be closed hereafter. 232 * can be closed hereafter.
233 */ 233 */
@@ -632,7 +632,7 @@ nlmsvc_update_deferred_block(struct nlm_block *block, struct file_lock *conf,
632 block->b_flags |= B_TIMED_OUT; 632 block->b_flags |= B_TIMED_OUT;
633 if (conf) { 633 if (conf) {
634 if (block->b_fl) 634 if (block->b_fl)
635 locks_copy_lock(block->b_fl, conf); 635 __locks_copy_lock(block->b_fl, conf);
636 } 636 }
637} 637}
638 638
@@ -752,7 +752,7 @@ nlmsvc_grant_blocked(struct nlm_block *block)
752 return; 752 return;
753 default: 753 default:
754 printk(KERN_WARNING "lockd: unexpected error %d in %s!\n", 754 printk(KERN_WARNING "lockd: unexpected error %d in %s!\n",
755 -error, __FUNCTION__); 755 -error, __func__);
756 nlmsvc_insert_block(block, 10 * HZ); 756 nlmsvc_insert_block(block, 10 * HZ);
757 nlmsvc_release_block(block); 757 nlmsvc_release_block(block);
758 return; 758 return;
@@ -887,7 +887,7 @@ nlmsvc_retry_blocked(void)
887 unsigned long timeout = MAX_SCHEDULE_TIMEOUT; 887 unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
888 struct nlm_block *block; 888 struct nlm_block *block;
889 889
890 while (!list_empty(&nlm_blocked)) { 890 while (!list_empty(&nlm_blocked) && !kthread_should_stop()) {
891 block = list_entry(nlm_blocked.next, struct nlm_block, b_list); 891 block = list_entry(nlm_blocked.next, struct nlm_block, b_list);
892 892
893 if (block->b_when == NLM_NEVER) 893 if (block->b_when == NLM_NEVER)
diff --git a/fs/lockd/svcshare.c b/fs/lockd/svcshare.c
index 068886de4dda..b0ae07008700 100644
--- a/fs/lockd/svcshare.c
+++ b/fs/lockd/svcshare.c
@@ -71,7 +71,8 @@ nlmsvc_unshare_file(struct nlm_host *host, struct nlm_file *file,
71 struct nlm_share *share, **shpp; 71 struct nlm_share *share, **shpp;
72 struct xdr_netobj *oh = &argp->lock.oh; 72 struct xdr_netobj *oh = &argp->lock.oh;
73 73
74 for (shpp = &file->f_shares; (share = *shpp) != 0; shpp = &share->s_next) { 74 for (shpp = &file->f_shares; (share = *shpp) != NULL;
75 shpp = &share->s_next) {
75 if (share->s_host == host && nlm_cmp_owner(share, oh)) { 76 if (share->s_host == host && nlm_cmp_owner(share, oh)) {
76 *shpp = share->s_next; 77 *shpp = share->s_next;
77 kfree(share); 78 kfree(share);
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index dbbefbcd6712..d1c48b539df8 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -18,6 +18,8 @@
18#include <linux/lockd/lockd.h> 18#include <linux/lockd/lockd.h>
19#include <linux/lockd/share.h> 19#include <linux/lockd/share.h>
20#include <linux/lockd/sm_inter.h> 20#include <linux/lockd/sm_inter.h>
21#include <linux/module.h>
22#include <linux/mount.h>
21 23
22#define NLMDBG_FACILITY NLMDBG_SVCSUBS 24#define NLMDBG_FACILITY NLMDBG_SVCSUBS
23 25
@@ -194,6 +196,12 @@ again:
194 return 0; 196 return 0;
195} 197}
196 198
199static int
200nlmsvc_always_match(void *dummy1, struct nlm_host *dummy2)
201{
202 return 1;
203}
204
197/* 205/*
198 * Inspect a single file 206 * Inspect a single file
199 */ 207 */
@@ -230,7 +238,8 @@ nlm_file_inuse(struct nlm_file *file)
230 * Loop over all files in the file table. 238 * Loop over all files in the file table.
231 */ 239 */
232static int 240static int
233nlm_traverse_files(struct nlm_host *host, nlm_host_match_fn_t match) 241nlm_traverse_files(void *data, nlm_host_match_fn_t match,
242 int (*is_failover_file)(void *data, struct nlm_file *file))
234{ 243{
235 struct hlist_node *pos, *next; 244 struct hlist_node *pos, *next;
236 struct nlm_file *file; 245 struct nlm_file *file;
@@ -239,12 +248,14 @@ nlm_traverse_files(struct nlm_host *host, nlm_host_match_fn_t match)
239 mutex_lock(&nlm_file_mutex); 248 mutex_lock(&nlm_file_mutex);
240 for (i = 0; i < FILE_NRHASH; i++) { 249 for (i = 0; i < FILE_NRHASH; i++) {
241 hlist_for_each_entry_safe(file, pos, next, &nlm_files[i], f_list) { 250 hlist_for_each_entry_safe(file, pos, next, &nlm_files[i], f_list) {
251 if (is_failover_file && !is_failover_file(data, file))
252 continue;
242 file->f_count++; 253 file->f_count++;
243 mutex_unlock(&nlm_file_mutex); 254 mutex_unlock(&nlm_file_mutex);
244 255
245 /* Traverse locks, blocks and shares of this file 256 /* Traverse locks, blocks and shares of this file
246 * and update file->f_locks count */ 257 * and update file->f_locks count */
247 if (nlm_inspect_file(host, file, match)) 258 if (nlm_inspect_file(data, file, match))
248 ret = 1; 259 ret = 1;
249 260
250 mutex_lock(&nlm_file_mutex); 261 mutex_lock(&nlm_file_mutex);
@@ -303,21 +314,27 @@ nlm_release_file(struct nlm_file *file)
303 * Used by nlmsvc_invalidate_all 314 * Used by nlmsvc_invalidate_all
304 */ 315 */
305static int 316static int
306nlmsvc_mark_host(struct nlm_host *host, struct nlm_host *dummy) 317nlmsvc_mark_host(void *data, struct nlm_host *dummy)
307{ 318{
319 struct nlm_host *host = data;
320
308 host->h_inuse = 1; 321 host->h_inuse = 1;
309 return 0; 322 return 0;
310} 323}
311 324
312static int 325static int
313nlmsvc_same_host(struct nlm_host *host, struct nlm_host *other) 326nlmsvc_same_host(void *data, struct nlm_host *other)
314{ 327{
328 struct nlm_host *host = data;
329
315 return host == other; 330 return host == other;
316} 331}
317 332
318static int 333static int
319nlmsvc_is_client(struct nlm_host *host, struct nlm_host *dummy) 334nlmsvc_is_client(void *data, struct nlm_host *dummy)
320{ 335{
336 struct nlm_host *host = data;
337
321 if (host->h_server) { 338 if (host->h_server) {
322 /* we are destroying locks even though the client 339 /* we are destroying locks even though the client
323 * hasn't asked us too, so don't unmonitor the 340 * hasn't asked us too, so don't unmonitor the
@@ -337,7 +354,7 @@ void
337nlmsvc_mark_resources(void) 354nlmsvc_mark_resources(void)
338{ 355{
339 dprintk("lockd: nlmsvc_mark_resources\n"); 356 dprintk("lockd: nlmsvc_mark_resources\n");
340 nlm_traverse_files(NULL, nlmsvc_mark_host); 357 nlm_traverse_files(NULL, nlmsvc_mark_host, NULL);
341} 358}
342 359
343/* 360/*
@@ -348,7 +365,7 @@ nlmsvc_free_host_resources(struct nlm_host *host)
348{ 365{
349 dprintk("lockd: nlmsvc_free_host_resources\n"); 366 dprintk("lockd: nlmsvc_free_host_resources\n");
350 367
351 if (nlm_traverse_files(host, nlmsvc_same_host)) { 368 if (nlm_traverse_files(host, nlmsvc_same_host, NULL)) {
352 printk(KERN_WARNING 369 printk(KERN_WARNING
353 "lockd: couldn't remove all locks held by %s\n", 370 "lockd: couldn't remove all locks held by %s\n",
354 host->h_name); 371 host->h_name);
@@ -368,5 +385,41 @@ nlmsvc_invalidate_all(void)
368 * turn, which is about as inefficient as it gets. 385 * turn, which is about as inefficient as it gets.
369 * Now we just do it once in nlm_traverse_files. 386 * Now we just do it once in nlm_traverse_files.
370 */ 387 */
371 nlm_traverse_files(NULL, nlmsvc_is_client); 388 nlm_traverse_files(NULL, nlmsvc_is_client, NULL);
389}
390
391static int
392nlmsvc_match_sb(void *datap, struct nlm_file *file)
393{
394 struct super_block *sb = datap;
395
396 return sb == file->f_file->f_path.mnt->mnt_sb;
397}
398
399int
400nlmsvc_unlock_all_by_sb(struct super_block *sb)
401{
402 int ret;
403
404 ret = nlm_traverse_files(sb, nlmsvc_always_match, nlmsvc_match_sb);
405 return ret ? -EIO : 0;
406}
407EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_sb);
408
409static int
410nlmsvc_match_ip(void *datap, struct nlm_host *host)
411{
412 __be32 *server_addr = datap;
413
414 return host->h_saddr.sin_addr.s_addr == *server_addr;
415}
416
417int
418nlmsvc_unlock_all_by_ip(__be32 server_addr)
419{
420 int ret;
421 ret = nlm_traverse_files(&server_addr, nlmsvc_match_ip, NULL);
422 return ret ? -EIO : 0;
423
372} 424}
425EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_ip);
diff --git a/fs/locks.c b/fs/locks.c
index 592faadbcec1..663c069b59b3 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -116,6 +116,7 @@
116 116
117#include <linux/capability.h> 117#include <linux/capability.h>
118#include <linux/file.h> 118#include <linux/file.h>
119#include <linux/fdtable.h>
119#include <linux/fs.h> 120#include <linux/fs.h>
120#include <linux/init.h> 121#include <linux/init.h>
121#include <linux/module.h> 122#include <linux/module.h>
@@ -224,7 +225,7 @@ static void locks_copy_private(struct file_lock *new, struct file_lock *fl)
224/* 225/*
225 * Initialize a new lock from an existing file_lock structure. 226 * Initialize a new lock from an existing file_lock structure.
226 */ 227 */
227static void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl) 228void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl)
228{ 229{
229 new->fl_owner = fl->fl_owner; 230 new->fl_owner = fl->fl_owner;
230 new->fl_pid = fl->fl_pid; 231 new->fl_pid = fl->fl_pid;
@@ -236,6 +237,7 @@ static void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl)
236 new->fl_ops = NULL; 237 new->fl_ops = NULL;
237 new->fl_lmops = NULL; 238 new->fl_lmops = NULL;
238} 239}
240EXPORT_SYMBOL(__locks_copy_lock);
239 241
240void locks_copy_lock(struct file_lock *new, struct file_lock *fl) 242void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
241{ 243{
@@ -833,7 +835,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
833 if (!posix_locks_conflict(request, fl)) 835 if (!posix_locks_conflict(request, fl))
834 continue; 836 continue;
835 if (conflock) 837 if (conflock)
836 locks_copy_lock(conflock, fl); 838 __locks_copy_lock(conflock, fl);
837 error = -EAGAIN; 839 error = -EAGAIN;
838 if (!(request->fl_flags & FL_SLEEP)) 840 if (!(request->fl_flags & FL_SLEEP))
839 goto out; 841 goto out;
@@ -1367,18 +1369,20 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
1367 1369
1368 lease = *flp; 1370 lease = *flp;
1369 1371
1370 error = -EAGAIN; 1372 if (arg != F_UNLCK) {
1371 if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) 1373 error = -ENOMEM;
1372 goto out; 1374 new_fl = locks_alloc_lock();
1373 if ((arg == F_WRLCK) 1375 if (new_fl == NULL)
1374 && ((atomic_read(&dentry->d_count) > 1) 1376 goto out;
1375 || (atomic_read(&inode->i_count) > 1)))
1376 goto out;
1377 1377
1378 error = -ENOMEM; 1378 error = -EAGAIN;
1379 new_fl = locks_alloc_lock(); 1379 if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
1380 if (new_fl == NULL) 1380 goto out;
1381 goto out; 1381 if ((arg == F_WRLCK)
1382 && ((atomic_read(&dentry->d_count) > 1)
1383 || (atomic_read(&inode->i_count) > 1)))
1384 goto out;
1385 }
1382 1386
1383 /* 1387 /*
1384 * At this point, we know that if there is an exclusive 1388 * At this point, we know that if there is an exclusive
@@ -1404,6 +1408,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
1404 rdlease_count++; 1408 rdlease_count++;
1405 } 1409 }
1406 1410
1411 error = -EAGAIN;
1407 if ((arg == F_RDLCK && (wrlease_count > 0)) || 1412 if ((arg == F_RDLCK && (wrlease_count > 0)) ||
1408 (arg == F_WRLCK && ((rdlease_count + wrlease_count) > 0))) 1413 (arg == F_WRLCK && ((rdlease_count + wrlease_count) > 0)))
1409 goto out; 1414 goto out;
@@ -1490,8 +1495,7 @@ EXPORT_SYMBOL_GPL(vfs_setlease);
1490int fcntl_setlease(unsigned int fd, struct file *filp, long arg) 1495int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
1491{ 1496{
1492 struct file_lock fl, *flp = &fl; 1497 struct file_lock fl, *flp = &fl;
1493 struct dentry *dentry = filp->f_path.dentry; 1498 struct inode *inode = filp->f_path.dentry->d_inode;
1494 struct inode *inode = dentry->d_inode;
1495 int error; 1499 int error;
1496 1500
1497 locks_init_lock(&fl); 1501 locks_init_lock(&fl);
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index 30f7d0ae2215..05ff4f1d7026 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -609,7 +609,7 @@ error_inode:
609 if (corrupt < 0) { 609 if (corrupt < 0) {
610 fat_fs_panic(new_dir->i_sb, 610 fat_fs_panic(new_dir->i_sb,
611 "%s: Filesystem corrupted (i_pos %lld)", 611 "%s: Filesystem corrupted (i_pos %lld)",
612 __FUNCTION__, sinfo.i_pos); 612 __func__, sinfo.i_pos);
613 } 613 }
614 goto out; 614 goto out;
615} 615}
@@ -653,7 +653,7 @@ static const struct inode_operations msdos_dir_inode_operations = {
653 .mkdir = msdos_mkdir, 653 .mkdir = msdos_mkdir,
654 .rmdir = msdos_rmdir, 654 .rmdir = msdos_rmdir,
655 .rename = msdos_rename, 655 .rename = msdos_rename,
656 .setattr = fat_notify_change, 656 .setattr = fat_setattr,
657 .getattr = fat_getattr, 657 .getattr = fat_getattr,
658}; 658};
659 659
diff --git a/fs/namei.c b/fs/namei.c
index e179f71bfcb0..32fd9655485b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -30,6 +30,7 @@
30#include <linux/capability.h> 30#include <linux/capability.h>
31#include <linux/file.h> 31#include <linux/file.h>
32#include <linux/fcntl.h> 32#include <linux/fcntl.h>
33#include <linux/device_cgroup.h>
33#include <asm/namei.h> 34#include <asm/namei.h>
34#include <asm/uaccess.h> 35#include <asm/uaccess.h>
35 36
@@ -281,6 +282,10 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
281 if (retval) 282 if (retval)
282 return retval; 283 return retval;
283 284
285 retval = devcgroup_inode_permission(inode, mask);
286 if (retval)
287 return retval;
288
284 return security_inode_permission(inode, mask, nd); 289 return security_inode_permission(inode, mask, nd);
285} 290}
286 291
@@ -2028,6 +2033,10 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
2028 if (!dir->i_op || !dir->i_op->mknod) 2033 if (!dir->i_op || !dir->i_op->mknod)
2029 return -EPERM; 2034 return -EPERM;
2030 2035
2036 error = devcgroup_inode_mknod(mode, dev);
2037 if (error)
2038 return error;
2039
2031 error = security_inode_mknod(dir, dentry, mode, dev); 2040 error = security_inode_mknod(dir, dentry, mode, dev);
2032 if (error) 2041 if (error)
2033 return error; 2042 return error;
diff --git a/fs/namespace.c b/fs/namespace.c
index 1bf302d0478b..4fc302c2a0e0 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -14,7 +14,6 @@
14#include <linux/smp_lock.h> 14#include <linux/smp_lock.h>
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/kernel.h> 16#include <linux/kernel.h>
17#include <linux/quotaops.h>
18#include <linux/acct.h> 17#include <linux/acct.h>
19#include <linux/capability.h> 18#include <linux/capability.h>
20#include <linux/cpumask.h> 19#include <linux/cpumask.h>
@@ -27,6 +26,7 @@
27#include <linux/mount.h> 26#include <linux/mount.h>
28#include <linux/ramfs.h> 27#include <linux/ramfs.h>
29#include <linux/log2.h> 28#include <linux/log2.h>
29#include <linux/idr.h>
30#include <asm/uaccess.h> 30#include <asm/uaccess.h>
31#include <asm/unistd.h> 31#include <asm/unistd.h>
32#include "pnode.h" 32#include "pnode.h"
@@ -39,6 +39,8 @@
39__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); 39__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
40 40
41static int event; 41static int event;
42static DEFINE_IDA(mnt_id_ida);
43static DEFINE_IDA(mnt_group_ida);
42 44
43static struct list_head *mount_hashtable __read_mostly; 45static struct list_head *mount_hashtable __read_mostly;
44static struct kmem_cache *mnt_cache __read_mostly; 46static struct kmem_cache *mnt_cache __read_mostly;
@@ -58,10 +60,63 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
58 60
59#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16) 61#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16)
60 62
63/* allocation is serialized by namespace_sem */
64static int mnt_alloc_id(struct vfsmount *mnt)
65{
66 int res;
67
68retry:
69 ida_pre_get(&mnt_id_ida, GFP_KERNEL);
70 spin_lock(&vfsmount_lock);
71 res = ida_get_new(&mnt_id_ida, &mnt->mnt_id);
72 spin_unlock(&vfsmount_lock);
73 if (res == -EAGAIN)
74 goto retry;
75
76 return res;
77}
78
79static void mnt_free_id(struct vfsmount *mnt)
80{
81 spin_lock(&vfsmount_lock);
82 ida_remove(&mnt_id_ida, mnt->mnt_id);
83 spin_unlock(&vfsmount_lock);
84}
85
86/*
87 * Allocate a new peer group ID
88 *
89 * mnt_group_ida is protected by namespace_sem
90 */
91static int mnt_alloc_group_id(struct vfsmount *mnt)
92{
93 if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL))
94 return -ENOMEM;
95
96 return ida_get_new_above(&mnt_group_ida, 1, &mnt->mnt_group_id);
97}
98
99/*
100 * Release a peer group ID
101 */
102void mnt_release_group_id(struct vfsmount *mnt)
103{
104 ida_remove(&mnt_group_ida, mnt->mnt_group_id);
105 mnt->mnt_group_id = 0;
106}
107
61struct vfsmount *alloc_vfsmnt(const char *name) 108struct vfsmount *alloc_vfsmnt(const char *name)
62{ 109{
63 struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); 110 struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
64 if (mnt) { 111 if (mnt) {
112 int err;
113
114 err = mnt_alloc_id(mnt);
115 if (err) {
116 kmem_cache_free(mnt_cache, mnt);
117 return NULL;
118 }
119
65 atomic_set(&mnt->mnt_count, 1); 120 atomic_set(&mnt->mnt_count, 1);
66 INIT_LIST_HEAD(&mnt->mnt_hash); 121 INIT_LIST_HEAD(&mnt->mnt_hash);
67 INIT_LIST_HEAD(&mnt->mnt_child); 122 INIT_LIST_HEAD(&mnt->mnt_child);
@@ -353,6 +408,7 @@ EXPORT_SYMBOL(simple_set_mnt);
353void free_vfsmnt(struct vfsmount *mnt) 408void free_vfsmnt(struct vfsmount *mnt)
354{ 409{
355 kfree(mnt->mnt_devname); 410 kfree(mnt->mnt_devname);
411 mnt_free_id(mnt);
356 kmem_cache_free(mnt_cache, mnt); 412 kmem_cache_free(mnt_cache, mnt);
357} 413}
358 414
@@ -499,6 +555,17 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
499 struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname); 555 struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname);
500 556
501 if (mnt) { 557 if (mnt) {
558 if (flag & (CL_SLAVE | CL_PRIVATE))
559 mnt->mnt_group_id = 0; /* not a peer of original */
560 else
561 mnt->mnt_group_id = old->mnt_group_id;
562
563 if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
564 int err = mnt_alloc_group_id(mnt);
565 if (err)
566 goto out_free;
567 }
568
502 mnt->mnt_flags = old->mnt_flags; 569 mnt->mnt_flags = old->mnt_flags;
503 atomic_inc(&sb->s_active); 570 atomic_inc(&sb->s_active);
504 mnt->mnt_sb = sb; 571 mnt->mnt_sb = sb;
@@ -528,6 +595,10 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
528 } 595 }
529 } 596 }
530 return mnt; 597 return mnt;
598
599 out_free:
600 free_vfsmnt(mnt);
601 return NULL;
531} 602}
532 603
533static inline void __mntput(struct vfsmount *mnt) 604static inline void __mntput(struct vfsmount *mnt)
@@ -652,20 +723,21 @@ void save_mount_options(struct super_block *sb, char *options)
652} 723}
653EXPORT_SYMBOL(save_mount_options); 724EXPORT_SYMBOL(save_mount_options);
654 725
726#ifdef CONFIG_PROC_FS
655/* iterator */ 727/* iterator */
656static void *m_start(struct seq_file *m, loff_t *pos) 728static void *m_start(struct seq_file *m, loff_t *pos)
657{ 729{
658 struct mnt_namespace *n = m->private; 730 struct proc_mounts *p = m->private;
659 731
660 down_read(&namespace_sem); 732 down_read(&namespace_sem);
661 return seq_list_start(&n->list, *pos); 733 return seq_list_start(&p->ns->list, *pos);
662} 734}
663 735
664static void *m_next(struct seq_file *m, void *v, loff_t *pos) 736static void *m_next(struct seq_file *m, void *v, loff_t *pos)
665{ 737{
666 struct mnt_namespace *n = m->private; 738 struct proc_mounts *p = m->private;
667 739
668 return seq_list_next(v, &n->list, pos); 740 return seq_list_next(v, &p->ns->list, pos);
669} 741}
670 742
671static void m_stop(struct seq_file *m, void *v) 743static void m_stop(struct seq_file *m, void *v)
@@ -673,20 +745,30 @@ static void m_stop(struct seq_file *m, void *v)
673 up_read(&namespace_sem); 745 up_read(&namespace_sem);
674} 746}
675 747
676static int show_vfsmnt(struct seq_file *m, void *v) 748struct proc_fs_info {
749 int flag;
750 const char *str;
751};
752
753static void show_sb_opts(struct seq_file *m, struct super_block *sb)
677{ 754{
678 struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list); 755 static const struct proc_fs_info fs_info[] = {
679 int err = 0;
680 static struct proc_fs_info {
681 int flag;
682 char *str;
683 } fs_info[] = {
684 { MS_SYNCHRONOUS, ",sync" }, 756 { MS_SYNCHRONOUS, ",sync" },
685 { MS_DIRSYNC, ",dirsync" }, 757 { MS_DIRSYNC, ",dirsync" },
686 { MS_MANDLOCK, ",mand" }, 758 { MS_MANDLOCK, ",mand" },
687 { 0, NULL } 759 { 0, NULL }
688 }; 760 };
689 static struct proc_fs_info mnt_info[] = { 761 const struct proc_fs_info *fs_infop;
762
763 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
764 if (sb->s_flags & fs_infop->flag)
765 seq_puts(m, fs_infop->str);
766 }
767}
768
769static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
770{
771 static const struct proc_fs_info mnt_info[] = {
690 { MNT_NOSUID, ",nosuid" }, 772 { MNT_NOSUID, ",nosuid" },
691 { MNT_NODEV, ",nodev" }, 773 { MNT_NODEV, ",nodev" },
692 { MNT_NOEXEC, ",noexec" }, 774 { MNT_NOEXEC, ",noexec" },
@@ -695,40 +777,108 @@ static int show_vfsmnt(struct seq_file *m, void *v)
695 { MNT_RELATIME, ",relatime" }, 777 { MNT_RELATIME, ",relatime" },
696 { 0, NULL } 778 { 0, NULL }
697 }; 779 };
698 struct proc_fs_info *fs_infop; 780 const struct proc_fs_info *fs_infop;
781
782 for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) {
783 if (mnt->mnt_flags & fs_infop->flag)
784 seq_puts(m, fs_infop->str);
785 }
786}
787
788static void show_type(struct seq_file *m, struct super_block *sb)
789{
790 mangle(m, sb->s_type->name);
791 if (sb->s_subtype && sb->s_subtype[0]) {
792 seq_putc(m, '.');
793 mangle(m, sb->s_subtype);
794 }
795}
796
797static int show_vfsmnt(struct seq_file *m, void *v)
798{
799 struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
800 int err = 0;
699 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; 801 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
700 802
701 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); 803 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
702 seq_putc(m, ' '); 804 seq_putc(m, ' ');
703 seq_path(m, &mnt_path, " \t\n\\"); 805 seq_path(m, &mnt_path, " \t\n\\");
704 seq_putc(m, ' '); 806 seq_putc(m, ' ');
705 mangle(m, mnt->mnt_sb->s_type->name); 807 show_type(m, mnt->mnt_sb);
706 if (mnt->mnt_sb->s_subtype && mnt->mnt_sb->s_subtype[0]) {
707 seq_putc(m, '.');
708 mangle(m, mnt->mnt_sb->s_subtype);
709 }
710 seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw"); 808 seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
711 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) { 809 show_sb_opts(m, mnt->mnt_sb);
712 if (mnt->mnt_sb->s_flags & fs_infop->flag) 810 show_mnt_opts(m, mnt);
713 seq_puts(m, fs_infop->str);
714 }
715 for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) {
716 if (mnt->mnt_flags & fs_infop->flag)
717 seq_puts(m, fs_infop->str);
718 }
719 if (mnt->mnt_sb->s_op->show_options) 811 if (mnt->mnt_sb->s_op->show_options)
720 err = mnt->mnt_sb->s_op->show_options(m, mnt); 812 err = mnt->mnt_sb->s_op->show_options(m, mnt);
721 seq_puts(m, " 0 0\n"); 813 seq_puts(m, " 0 0\n");
722 return err; 814 return err;
723} 815}
724 816
725struct seq_operations mounts_op = { 817const struct seq_operations mounts_op = {
726 .start = m_start, 818 .start = m_start,
727 .next = m_next, 819 .next = m_next,
728 .stop = m_stop, 820 .stop = m_stop,
729 .show = show_vfsmnt 821 .show = show_vfsmnt
730}; 822};
731 823
824static int show_mountinfo(struct seq_file *m, void *v)
825{
826 struct proc_mounts *p = m->private;
827 struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
828 struct super_block *sb = mnt->mnt_sb;
829 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
830 struct path root = p->root;
831 int err = 0;
832
833 seq_printf(m, "%i %i %u:%u ", mnt->mnt_id, mnt->mnt_parent->mnt_id,
834 MAJOR(sb->s_dev), MINOR(sb->s_dev));
835 seq_dentry(m, mnt->mnt_root, " \t\n\\");
836 seq_putc(m, ' ');
837 seq_path_root(m, &mnt_path, &root, " \t\n\\");
838 if (root.mnt != p->root.mnt || root.dentry != p->root.dentry) {
839 /*
840 * Mountpoint is outside root, discard that one. Ugly,
841 * but less so than trying to do that in iterator in a
842 * race-free way (due to renames).
843 */
844 return SEQ_SKIP;
845 }
846 seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw");
847 show_mnt_opts(m, mnt);
848
849 /* Tagged fields ("foo:X" or "bar") */
850 if (IS_MNT_SHARED(mnt))
851 seq_printf(m, " shared:%i", mnt->mnt_group_id);
852 if (IS_MNT_SLAVE(mnt)) {
853 int master = mnt->mnt_master->mnt_group_id;
854 int dom = get_dominating_id(mnt, &p->root);
855 seq_printf(m, " master:%i", master);
856 if (dom && dom != master)
857 seq_printf(m, " propagate_from:%i", dom);
858 }
859 if (IS_MNT_UNBINDABLE(mnt))
860 seq_puts(m, " unbindable");
861
862 /* Filesystem specific data */
863 seq_puts(m, " - ");
864 show_type(m, sb);
865 seq_putc(m, ' ');
866 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
867 seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw");
868 show_sb_opts(m, sb);
869 if (sb->s_op->show_options)
870 err = sb->s_op->show_options(m, mnt);
871 seq_putc(m, '\n');
872 return err;
873}
874
875const struct seq_operations mountinfo_op = {
876 .start = m_start,
877 .next = m_next,
878 .stop = m_stop,
879 .show = show_mountinfo,
880};
881
732static int show_vfsstat(struct seq_file *m, void *v) 882static int show_vfsstat(struct seq_file *m, void *v)
733{ 883{
734 struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list); 884 struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
@@ -749,7 +899,7 @@ static int show_vfsstat(struct seq_file *m, void *v)
749 899
750 /* file system type */ 900 /* file system type */
751 seq_puts(m, "with fstype "); 901 seq_puts(m, "with fstype ");
752 mangle(m, mnt->mnt_sb->s_type->name); 902 show_type(m, mnt->mnt_sb);
753 903
754 /* optional statistics */ 904 /* optional statistics */
755 if (mnt->mnt_sb->s_op->show_stats) { 905 if (mnt->mnt_sb->s_op->show_stats) {
@@ -761,12 +911,13 @@ static int show_vfsstat(struct seq_file *m, void *v)
761 return err; 911 return err;
762} 912}
763 913
764struct seq_operations mountstats_op = { 914const struct seq_operations mountstats_op = {
765 .start = m_start, 915 .start = m_start,
766 .next = m_next, 916 .next = m_next,
767 .stop = m_stop, 917 .stop = m_stop,
768 .show = show_vfsstat, 918 .show = show_vfsstat,
769}; 919};
920#endif /* CONFIG_PROC_FS */
770 921
771/** 922/**
772 * may_umount_tree - check if a mount tree is busy 923 * may_umount_tree - check if a mount tree is busy
@@ -909,10 +1060,11 @@ static int do_umount(struct vfsmount *mnt, int flags)
909 * about for the moment. 1060 * about for the moment.
910 */ 1061 */
911 1062
912 lock_kernel(); 1063 if (flags & MNT_FORCE && sb->s_op->umount_begin) {
913 if (sb->s_op->umount_begin) 1064 lock_kernel();
914 sb->s_op->umount_begin(mnt, flags); 1065 sb->s_op->umount_begin(sb);
915 unlock_kernel(); 1066 unlock_kernel();
1067 }
916 1068
917 /* 1069 /*
918 * No sense to grab the lock for this test, but test itself looks 1070 * No sense to grab the lock for this test, but test itself looks
@@ -931,7 +1083,6 @@ static int do_umount(struct vfsmount *mnt, int flags)
931 down_write(&sb->s_umount); 1083 down_write(&sb->s_umount);
932 if (!(sb->s_flags & MS_RDONLY)) { 1084 if (!(sb->s_flags & MS_RDONLY)) {
933 lock_kernel(); 1085 lock_kernel();
934 DQUOT_OFF(sb);
935 retval = do_remount_sb(sb, MS_RDONLY, NULL, 0); 1086 retval = do_remount_sb(sb, MS_RDONLY, NULL, 0);
936 unlock_kernel(); 1087 unlock_kernel();
937 } 1088 }
@@ -1025,17 +1176,6 @@ static int mount_is_safe(struct nameidata *nd)
1025#endif 1176#endif
1026} 1177}
1027 1178
1028static int lives_below_in_same_fs(struct dentry *d, struct dentry *dentry)
1029{
1030 while (1) {
1031 if (d == dentry)
1032 return 1;
1033 if (d == NULL || d == d->d_parent)
1034 return 0;
1035 d = d->d_parent;
1036 }
1037}
1038
1039struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry, 1179struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
1040 int flag) 1180 int flag)
1041{ 1181{
@@ -1052,7 +1192,7 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
1052 1192
1053 p = mnt; 1193 p = mnt;
1054 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) { 1194 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
1055 if (!lives_below_in_same_fs(r->mnt_mountpoint, dentry)) 1195 if (!is_subdir(r->mnt_mountpoint, dentry))
1056 continue; 1196 continue;
1057 1197
1058 for (s = r; s; s = next_mnt(s, r)) { 1198 for (s = r; s; s = next_mnt(s, r)) {
@@ -1108,6 +1248,33 @@ void drop_collected_mounts(struct vfsmount *mnt)
1108 release_mounts(&umount_list); 1248 release_mounts(&umount_list);
1109} 1249}
1110 1250
1251static void cleanup_group_ids(struct vfsmount *mnt, struct vfsmount *end)
1252{
1253 struct vfsmount *p;
1254
1255 for (p = mnt; p != end; p = next_mnt(p, mnt)) {
1256 if (p->mnt_group_id && !IS_MNT_SHARED(p))
1257 mnt_release_group_id(p);
1258 }
1259}
1260
1261static int invent_group_ids(struct vfsmount *mnt, bool recurse)
1262{
1263 struct vfsmount *p;
1264
1265 for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) {
1266 if (!p->mnt_group_id && !IS_MNT_SHARED(p)) {
1267 int err = mnt_alloc_group_id(p);
1268 if (err) {
1269 cleanup_group_ids(mnt, p);
1270 return err;
1271 }
1272 }
1273 }
1274
1275 return 0;
1276}
1277
1111/* 1278/*
1112 * @source_mnt : mount tree to be attached 1279 * @source_mnt : mount tree to be attached
1113 * @nd : place the mount tree @source_mnt is attached 1280 * @nd : place the mount tree @source_mnt is attached
@@ -1178,9 +1345,16 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
1178 struct vfsmount *dest_mnt = path->mnt; 1345 struct vfsmount *dest_mnt = path->mnt;
1179 struct dentry *dest_dentry = path->dentry; 1346 struct dentry *dest_dentry = path->dentry;
1180 struct vfsmount *child, *p; 1347 struct vfsmount *child, *p;
1348 int err;
1181 1349
1182 if (propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list)) 1350 if (IS_MNT_SHARED(dest_mnt)) {
1183 return -EINVAL; 1351 err = invent_group_ids(source_mnt, true);
1352 if (err)
1353 goto out;
1354 }
1355 err = propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list);
1356 if (err)
1357 goto out_cleanup_ids;
1184 1358
1185 if (IS_MNT_SHARED(dest_mnt)) { 1359 if (IS_MNT_SHARED(dest_mnt)) {
1186 for (p = source_mnt; p; p = next_mnt(p, source_mnt)) 1360 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
@@ -1203,6 +1377,12 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
1203 } 1377 }
1204 spin_unlock(&vfsmount_lock); 1378 spin_unlock(&vfsmount_lock);
1205 return 0; 1379 return 0;
1380
1381 out_cleanup_ids:
1382 if (IS_MNT_SHARED(dest_mnt))
1383 cleanup_group_ids(source_mnt, NULL);
1384 out:
1385 return err;
1206} 1386}
1207 1387
1208static int graft_tree(struct vfsmount *mnt, struct path *path) 1388static int graft_tree(struct vfsmount *mnt, struct path *path)
@@ -1243,6 +1423,7 @@ static noinline int do_change_type(struct nameidata *nd, int flag)
1243 struct vfsmount *m, *mnt = nd->path.mnt; 1423 struct vfsmount *m, *mnt = nd->path.mnt;
1244 int recurse = flag & MS_REC; 1424 int recurse = flag & MS_REC;
1245 int type = flag & ~MS_REC; 1425 int type = flag & ~MS_REC;
1426 int err = 0;
1246 1427
1247 if (!capable(CAP_SYS_ADMIN)) 1428 if (!capable(CAP_SYS_ADMIN))
1248 return -EPERM; 1429 return -EPERM;
@@ -1251,12 +1432,20 @@ static noinline int do_change_type(struct nameidata *nd, int flag)
1251 return -EINVAL; 1432 return -EINVAL;
1252 1433
1253 down_write(&namespace_sem); 1434 down_write(&namespace_sem);
1435 if (type == MS_SHARED) {
1436 err = invent_group_ids(mnt, recurse);
1437 if (err)
1438 goto out_unlock;
1439 }
1440
1254 spin_lock(&vfsmount_lock); 1441 spin_lock(&vfsmount_lock);
1255 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) 1442 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
1256 change_mnt_propagation(m, type); 1443 change_mnt_propagation(m, type);
1257 spin_unlock(&vfsmount_lock); 1444 spin_unlock(&vfsmount_lock);
1445
1446 out_unlock:
1258 up_write(&namespace_sem); 1447 up_write(&namespace_sem);
1259 return 0; 1448 return err;
1260} 1449}
1261 1450
1262/* 1451/*
@@ -2140,10 +2329,10 @@ void __init mnt_init(void)
2140 err = sysfs_init(); 2329 err = sysfs_init();
2141 if (err) 2330 if (err)
2142 printk(KERN_WARNING "%s: sysfs_init error: %d\n", 2331 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
2143 __FUNCTION__, err); 2332 __func__, err);
2144 fs_kobj = kobject_create_and_add("fs", NULL); 2333 fs_kobj = kobject_create_and_add("fs", NULL);
2145 if (!fs_kobj) 2334 if (!fs_kobj)
2146 printk(KERN_WARNING "%s: kobj create error\n", __FUNCTION__); 2335 printk(KERN_WARNING "%s: kobj create error\n", __func__);
2147 init_rootfs(); 2336 init_rootfs();
2148 init_mount_tree(); 2337 init_mount_tree();
2149} 2338}
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index fbbb9f7afa1a..2e5ab1204dec 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -107,12 +107,6 @@ static const struct super_operations ncp_sops =
107 .show_options = ncp_show_options, 107 .show_options = ncp_show_options,
108}; 108};
109 109
110extern struct dentry_operations ncp_root_dentry_operations;
111#if defined(CONFIG_NCPFS_EXTRAS) || defined(CONFIG_NCPFS_NFS_NS)
112extern const struct address_space_operations ncp_symlink_aops;
113extern int ncp_symlink(struct inode*, struct dentry*, const char*);
114#endif
115
116/* 110/*
117 * Fill in the ncpfs-specific information in the inode. 111 * Fill in the ncpfs-specific information in the inode.
118 */ 112 */
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index ad8f167e54bc..3a97c95e1ca2 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -389,11 +389,11 @@ static int __ncp_ioctl(struct inode *inode, struct file *filp,
389 struct dentry* dentry = inode->i_sb->s_root; 389 struct dentry* dentry = inode->i_sb->s_root;
390 390
391 if (dentry) { 391 if (dentry) {
392 struct inode* inode = dentry->d_inode; 392 struct inode* s_inode = dentry->d_inode;
393 393
394 if (inode) { 394 if (s_inode) {
395 sr.volNumber = NCP_FINFO(inode)->volNumber; 395 sr.volNumber = NCP_FINFO(s_inode)->volNumber;
396 sr.dirEntNum = NCP_FINFO(inode)->dirEntNum; 396 sr.dirEntNum = NCP_FINFO(s_inode)->dirEntNum;
397 sr.namespace = server->name_space[sr.volNumber]; 397 sr.namespace = server->name_space[sr.volNumber];
398 } else 398 } else
399 DPRINTK("ncpfs: s_root->d_inode==NULL\n"); 399 DPRINTK("ncpfs: s_root->d_inode==NULL\n");
@@ -439,12 +439,12 @@ static int __ncp_ioctl(struct inode *inode, struct file *filp,
439 dentry = inode->i_sb->s_root; 439 dentry = inode->i_sb->s_root;
440 server->root_setuped = 1; 440 server->root_setuped = 1;
441 if (dentry) { 441 if (dentry) {
442 struct inode* inode = dentry->d_inode; 442 struct inode* s_inode = dentry->d_inode;
443 443
444 if (inode) { 444 if (inode) {
445 NCP_FINFO(inode)->volNumber = vnum; 445 NCP_FINFO(s_inode)->volNumber = vnum;
446 NCP_FINFO(inode)->dirEntNum = de; 446 NCP_FINFO(s_inode)->dirEntNum = de;
447 NCP_FINFO(inode)->DosDirNum = dosde; 447 NCP_FINFO(s_inode)->DosDirNum = dosde;
448 } else 448 } else
449 DPRINTK("ncpfs: s_root->d_inode==NULL\n"); 449 DPRINTK("ncpfs: s_root->d_inode==NULL\n");
450 } else 450 } else
@@ -519,7 +519,6 @@ static int __ncp_ioctl(struct inode *inode, struct file *filp,
519 } 519 }
520 { 520 {
521 struct ncp_lock_ioctl rqdata; 521 struct ncp_lock_ioctl rqdata;
522 int result;
523 522
524 if (copy_from_user(&rqdata, argp, sizeof(rqdata))) 523 if (copy_from_user(&rqdata, argp, sizeof(rqdata)))
525 return -EFAULT; 524 return -EFAULT;
diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c
index df6d60bdfcd3..97645f112114 100644
--- a/fs/ncpfs/ncplib_kernel.c
+++ b/fs/ncpfs/ncplib_kernel.c
@@ -102,48 +102,47 @@ static inline void ncp_init_request_s(struct ncp_server *server, int subfunction
102} 102}
103 103
104static inline char * 104static inline char *
105 ncp_reply_data(struct ncp_server *server, int offset) 105ncp_reply_data(struct ncp_server *server, int offset)
106{ 106{
107 return &(server->packet[sizeof(struct ncp_reply_header) + offset]); 107 return &(server->packet[sizeof(struct ncp_reply_header) + offset]);
108} 108}
109 109
110static inline __u8 BVAL(void* data) 110static inline u8 BVAL(void *data)
111{ 111{
112 return get_unaligned((__u8*)data); 112 return *(u8 *)data;
113} 113}
114 114
115static __u8 115static u8 ncp_reply_byte(struct ncp_server *server, int offset)
116 ncp_reply_byte(struct ncp_server *server, int offset)
117{ 116{
118 return get_unaligned((__u8 *) ncp_reply_data(server, offset)); 117 return *(u8 *)ncp_reply_data(server, offset);
119} 118}
120 119
121static inline __u16 WVAL_LH(void* data) 120static inline u16 WVAL_LH(void *data)
122{ 121{
123 return le16_to_cpu(get_unaligned((__le16*)data)); 122 return get_unaligned_le16(data);
124} 123}
125 124
126static __u16 125static u16
127 ncp_reply_le16(struct ncp_server *server, int offset) 126ncp_reply_le16(struct ncp_server *server, int offset)
128{ 127{
129 return le16_to_cpu(get_unaligned((__le16 *) ncp_reply_data(server, offset))); 128 return get_unaligned_le16(ncp_reply_data(server, offset));
130} 129}
131 130
132static __u16 131static u16
133 ncp_reply_be16(struct ncp_server *server, int offset) 132ncp_reply_be16(struct ncp_server *server, int offset)
134{ 133{
135 return be16_to_cpu(get_unaligned((__be16 *) ncp_reply_data(server, offset))); 134 return get_unaligned_be16(ncp_reply_data(server, offset));
136} 135}
137 136
138static inline __u32 DVAL_LH(void* data) 137static inline u32 DVAL_LH(void *data)
139{ 138{
140 return le32_to_cpu(get_unaligned((__le32*)data)); 139 return get_unaligned_le32(data);
141} 140}
142 141
143static __le32 142static __le32
144 ncp_reply_dword(struct ncp_server *server, int offset) 143ncp_reply_dword(struct ncp_server *server, int offset)
145{ 144{
146 return get_unaligned((__le32 *) ncp_reply_data(server, offset)); 145 return get_unaligned((__le32 *)ncp_reply_data(server, offset));
147} 146}
148 147
149static inline __u32 ncp_reply_dword_lh(struct ncp_server* server, int offset) { 148static inline __u32 ncp_reply_dword_lh(struct ncp_server* server, int offset) {
@@ -1006,8 +1005,8 @@ ncp_read_bounce(struct ncp_server *server, const char *file_id,
1006 result = ncp_request2(server, 72, bounce, bufsize); 1005 result = ncp_request2(server, 72, bounce, bufsize);
1007 ncp_unlock_server(server); 1006 ncp_unlock_server(server);
1008 if (!result) { 1007 if (!result) {
1009 int len = be16_to_cpu(get_unaligned((__be16*)((char*)bounce + 1008 int len = get_unaligned_be16((char *)bounce +
1010 sizeof(struct ncp_reply_header)))); 1009 sizeof(struct ncp_reply_header));
1011 result = -EIO; 1010 result = -EIO;
1012 if (len <= to_read) { 1011 if (len <= to_read) {
1013 char* source; 1012 char* source;
diff --git a/fs/ncpfs/ncpsign_kernel.c b/fs/ncpfs/ncpsign_kernel.c
index 749a18d33599..7c0b5c21e6cf 100644
--- a/fs/ncpfs/ncpsign_kernel.c
+++ b/fs/ncpfs/ncpsign_kernel.c
@@ -55,7 +55,7 @@ static void nwsign(char *r_data1, char *r_data2, char *outdata) {
55 unsigned int w0,w1,w2,w3; 55 unsigned int w0,w1,w2,w3;
56 static int rbit[4]={0, 2, 1, 3}; 56 static int rbit[4]={0, 2, 1, 3};
57#ifdef __i386__ 57#ifdef __i386__
58 unsigned int *data2=(int *)r_data2; 58 unsigned int *data2=(unsigned int *)r_data2;
59#else 59#else
60 unsigned int data2[16]; 60 unsigned int data2[16];
61 for (i=0;i<16;i++) 61 for (i=0;i<16;i++)
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index df0f41e09885..ac6170c594a3 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -5,7 +5,7 @@
5obj-$(CONFIG_NFS_FS) += nfs.o 5obj-$(CONFIG_NFS_FS) += nfs.o
6 6
7nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \ 7nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \
8 pagelist.o proc.o read.o symlink.o unlink.o \ 8 direct.o pagelist.o proc.o read.o symlink.o unlink.o \
9 write.o namespace.o mount_clnt.o 9 write.o namespace.o mount_clnt.o
10nfs-$(CONFIG_ROOT_NFS) += nfsroot.o 10nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
11nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o 11nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o
@@ -14,5 +14,4 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
14 delegation.o idmap.o \ 14 delegation.o idmap.o \
15 callback.o callback_xdr.o callback_proc.o \ 15 callback.o callback_xdr.o callback_proc.o \
16 nfs4namespace.o 16 nfs4namespace.o
17nfs-$(CONFIG_NFS_DIRECTIO) += direct.o
18nfs-$(CONFIG_SYSCTL) += sysctl.o 17nfs-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 66648dd92d97..5606ae3d72d3 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -15,6 +15,7 @@
15#include <linux/nfs_fs.h> 15#include <linux/nfs_fs.h>
16#include <linux/mutex.h> 16#include <linux/mutex.h>
17#include <linux/freezer.h> 17#include <linux/freezer.h>
18#include <linux/kthread.h>
18 19
19#include <net/inet_sock.h> 20#include <net/inet_sock.h>
20 21
@@ -27,9 +28,7 @@
27struct nfs_callback_data { 28struct nfs_callback_data {
28 unsigned int users; 29 unsigned int users;
29 struct svc_serv *serv; 30 struct svc_serv *serv;
30 pid_t pid; 31 struct task_struct *task;
31 struct completion started;
32 struct completion stopped;
33}; 32};
34 33
35static struct nfs_callback_data nfs_callback_info; 34static struct nfs_callback_data nfs_callback_info;
@@ -57,48 +56,44 @@ module_param_call(callback_tcpport, param_set_port, param_get_int,
57/* 56/*
58 * This is the callback kernel thread. 57 * This is the callback kernel thread.
59 */ 58 */
60static void nfs_callback_svc(struct svc_rqst *rqstp) 59static int
60nfs_callback_svc(void *vrqstp)
61{ 61{
62 int err; 62 int err, preverr = 0;
63 struct svc_rqst *rqstp = vrqstp;
63 64
64 __module_get(THIS_MODULE);
65 lock_kernel();
66
67 nfs_callback_info.pid = current->pid;
68 daemonize("nfsv4-svc");
69 /* Process request with signals blocked, but allow SIGKILL. */
70 allow_signal(SIGKILL);
71 set_freezable(); 65 set_freezable();
72 66
73 complete(&nfs_callback_info.started); 67 /*
74 68 * FIXME: do we really need to run this under the BKL? If so, please
75 for(;;) { 69 * add a comment about what it's intended to protect.
76 if (signalled()) { 70 */
77 if (nfs_callback_info.users == 0) 71 lock_kernel();
78 break; 72 while (!kthread_should_stop()) {
79 flush_signals(current);
80 }
81 /* 73 /*
82 * Listen for a request on the socket 74 * Listen for a request on the socket
83 */ 75 */
84 err = svc_recv(rqstp, MAX_SCHEDULE_TIMEOUT); 76 err = svc_recv(rqstp, MAX_SCHEDULE_TIMEOUT);
85 if (err == -EAGAIN || err == -EINTR) 77 if (err == -EAGAIN || err == -EINTR) {
78 preverr = err;
86 continue; 79 continue;
80 }
87 if (err < 0) { 81 if (err < 0) {
88 printk(KERN_WARNING 82 if (err != preverr) {
89 "%s: terminating on error %d\n", 83 printk(KERN_WARNING "%s: unexpected error "
90 __FUNCTION__, -err); 84 "from svc_recv (%d)\n", __func__, err);
91 break; 85 preverr = err;
86 }
87 schedule_timeout_uninterruptible(HZ);
88 continue;
92 } 89 }
90 preverr = err;
93 svc_process(rqstp); 91 svc_process(rqstp);
94 } 92 }
95
96 flush_signals(current);
97 svc_exit_thread(rqstp);
98 nfs_callback_info.pid = 0;
99 complete(&nfs_callback_info.stopped);
100 unlock_kernel(); 93 unlock_kernel();
101 module_put_and_exit(0); 94 nfs_callback_info.task = NULL;
95 svc_exit_thread(rqstp);
96 return 0;
102} 97}
103 98
104/* 99/*
@@ -107,14 +102,13 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
107int nfs_callback_up(void) 102int nfs_callback_up(void)
108{ 103{
109 struct svc_serv *serv = NULL; 104 struct svc_serv *serv = NULL;
105 struct svc_rqst *rqstp;
110 int ret = 0; 106 int ret = 0;
111 107
112 lock_kernel(); 108 lock_kernel();
113 mutex_lock(&nfs_callback_mutex); 109 mutex_lock(&nfs_callback_mutex);
114 if (nfs_callback_info.users++ || nfs_callback_info.pid != 0) 110 if (nfs_callback_info.users++ || nfs_callback_info.task != NULL)
115 goto out; 111 goto out;
116 init_completion(&nfs_callback_info.started);
117 init_completion(&nfs_callback_info.stopped);
118 serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL); 112 serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL);
119 ret = -ENOMEM; 113 ret = -ENOMEM;
120 if (!serv) 114 if (!serv)
@@ -127,15 +121,28 @@ int nfs_callback_up(void)
127 nfs_callback_tcpport = ret; 121 nfs_callback_tcpport = ret;
128 dprintk("Callback port = 0x%x\n", nfs_callback_tcpport); 122 dprintk("Callback port = 0x%x\n", nfs_callback_tcpport);
129 123
130 ret = svc_create_thread(nfs_callback_svc, serv); 124 rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]);
131 if (ret < 0) 125 if (IS_ERR(rqstp)) {
126 ret = PTR_ERR(rqstp);
132 goto out_err; 127 goto out_err;
128 }
129
130 svc_sock_update_bufs(serv);
133 nfs_callback_info.serv = serv; 131 nfs_callback_info.serv = serv;
134 wait_for_completion(&nfs_callback_info.started); 132
133 nfs_callback_info.task = kthread_run(nfs_callback_svc, rqstp,
134 "nfsv4-svc");
135 if (IS_ERR(nfs_callback_info.task)) {
136 ret = PTR_ERR(nfs_callback_info.task);
137 nfs_callback_info.serv = NULL;
138 nfs_callback_info.task = NULL;
139 svc_exit_thread(rqstp);
140 goto out_err;
141 }
135out: 142out:
136 /* 143 /*
137 * svc_create creates the svc_serv with sv_nrthreads == 1, and then 144 * svc_create creates the svc_serv with sv_nrthreads == 1, and then
138 * svc_create_thread increments that. So we need to call svc_destroy 145 * svc_prepare_thread increments that. So we need to call svc_destroy
139 * on both success and failure so that the refcount is 1 when the 146 * on both success and failure so that the refcount is 1 when the
140 * thread exits. 147 * thread exits.
141 */ 148 */
@@ -152,19 +159,15 @@ out_err:
152} 159}
153 160
154/* 161/*
155 * Kill the server process if it is not already up. 162 * Kill the server process if it is not already down.
156 */ 163 */
157void nfs_callback_down(void) 164void nfs_callback_down(void)
158{ 165{
159 lock_kernel(); 166 lock_kernel();
160 mutex_lock(&nfs_callback_mutex); 167 mutex_lock(&nfs_callback_mutex);
161 nfs_callback_info.users--; 168 nfs_callback_info.users--;
162 do { 169 if (nfs_callback_info.users == 0 && nfs_callback_info.task != NULL)
163 if (nfs_callback_info.users != 0 || nfs_callback_info.pid == 0) 170 kthread_stop(nfs_callback_info.task);
164 break;
165 if (kill_proc(nfs_callback_info.pid, SIGKILL, 1) < 0)
166 break;
167 } while (wait_for_completion_timeout(&nfs_callback_info.stopped, 5*HZ) == 0);
168 mutex_unlock(&nfs_callback_mutex); 171 mutex_unlock(&nfs_callback_mutex);
169 unlock_kernel(); 172 unlock_kernel();
170} 173}
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index c5c0175898f6..89ac5bb0401c 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -112,6 +112,7 @@ struct nfs_client_initdata {
112static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) 112static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
113{ 113{
114 struct nfs_client *clp; 114 struct nfs_client *clp;
115 struct rpc_cred *cred;
115 116
116 if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) 117 if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
117 goto error_0; 118 goto error_0;
@@ -150,6 +151,9 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
150 clp->cl_boot_time = CURRENT_TIME; 151 clp->cl_boot_time = CURRENT_TIME;
151 clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; 152 clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
152#endif 153#endif
154 cred = rpc_lookup_machine_cred();
155 if (!IS_ERR(cred))
156 clp->cl_machine_cred = cred;
153 157
154 return clp; 158 return clp;
155 159
@@ -170,6 +174,8 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
170 BUG_ON(!RB_EMPTY_ROOT(&clp->cl_state_owners)); 174 BUG_ON(!RB_EMPTY_ROOT(&clp->cl_state_owners));
171 if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) 175 if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
172 nfs_idmap_delete(clp); 176 nfs_idmap_delete(clp);
177
178 rpc_destroy_wait_queue(&clp->cl_rpcwaitq);
173#endif 179#endif
174} 180}
175 181
@@ -189,6 +195,9 @@ static void nfs_free_client(struct nfs_client *clp)
189 if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) 195 if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
190 nfs_callback_down(); 196 nfs_callback_down();
191 197
198 if (clp->cl_machine_cred != NULL)
199 put_rpccred(clp->cl_machine_cred);
200
192 kfree(clp->cl_hostname); 201 kfree(clp->cl_hostname);
193 kfree(clp); 202 kfree(clp);
194 203
@@ -680,10 +689,22 @@ static int nfs_init_server(struct nfs_server *server,
680 if (error < 0) 689 if (error < 0)
681 goto error; 690 goto error;
682 691
692 server->port = data->nfs_server.port;
693
683 error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]); 694 error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]);
684 if (error < 0) 695 if (error < 0)
685 goto error; 696 goto error;
686 697
698 /* Preserve the values of mount_server-related mount options */
699 if (data->mount_server.addrlen) {
700 memcpy(&server->mountd_address, &data->mount_server.address,
701 data->mount_server.addrlen);
702 server->mountd_addrlen = data->mount_server.addrlen;
703 }
704 server->mountd_version = data->mount_server.version;
705 server->mountd_port = data->mount_server.port;
706 server->mountd_protocol = data->mount_server.protocol;
707
687 server->namelen = data->namlen; 708 server->namelen = data->namlen;
688 /* Create a client RPC handle for the NFSv3 ACL management interface */ 709 /* Create a client RPC handle for the NFSv3 ACL management interface */
689 nfs_init_server_aclclient(server); 710 nfs_init_server_aclclient(server);
@@ -1062,6 +1083,8 @@ static int nfs4_init_server(struct nfs_server *server,
1062 server->acdirmin = data->acdirmin * HZ; 1083 server->acdirmin = data->acdirmin * HZ;
1063 server->acdirmax = data->acdirmax * HZ; 1084 server->acdirmax = data->acdirmax * HZ;
1064 1085
1086 server->port = data->nfs_server.port;
1087
1065 error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]); 1088 error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]);
1066 1089
1067error: 1090error:
@@ -1298,6 +1321,7 @@ static const struct file_operations nfs_server_list_fops = {
1298 .read = seq_read, 1321 .read = seq_read,
1299 .llseek = seq_lseek, 1322 .llseek = seq_lseek,
1300 .release = seq_release, 1323 .release = seq_release,
1324 .owner = THIS_MODULE,
1301}; 1325};
1302 1326
1303static int nfs_volume_list_open(struct inode *inode, struct file *file); 1327static int nfs_volume_list_open(struct inode *inode, struct file *file);
@@ -1318,6 +1342,7 @@ static const struct file_operations nfs_volume_list_fops = {
1318 .read = seq_read, 1342 .read = seq_read,
1319 .llseek = seq_lseek, 1343 .llseek = seq_lseek,
1320 .release = seq_release, 1344 .release = seq_release,
1345 .owner = THIS_MODULE,
1321}; 1346};
1322 1347
1323/* 1348/*
@@ -1477,33 +1502,29 @@ int __init nfs_fs_proc_init(void)
1477{ 1502{
1478 struct proc_dir_entry *p; 1503 struct proc_dir_entry *p;
1479 1504
1480 proc_fs_nfs = proc_mkdir("nfsfs", proc_root_fs); 1505 proc_fs_nfs = proc_mkdir("fs/nfsfs", NULL);
1481 if (!proc_fs_nfs) 1506 if (!proc_fs_nfs)
1482 goto error_0; 1507 goto error_0;
1483 1508
1484 proc_fs_nfs->owner = THIS_MODULE; 1509 proc_fs_nfs->owner = THIS_MODULE;
1485 1510
1486 /* a file of servers with which we're dealing */ 1511 /* a file of servers with which we're dealing */
1487 p = create_proc_entry("servers", S_IFREG|S_IRUGO, proc_fs_nfs); 1512 p = proc_create("servers", S_IFREG|S_IRUGO,
1513 proc_fs_nfs, &nfs_server_list_fops);
1488 if (!p) 1514 if (!p)
1489 goto error_1; 1515 goto error_1;
1490 1516
1491 p->proc_fops = &nfs_server_list_fops;
1492 p->owner = THIS_MODULE;
1493
1494 /* a file of volumes that we have mounted */ 1517 /* a file of volumes that we have mounted */
1495 p = create_proc_entry("volumes", S_IFREG|S_IRUGO, proc_fs_nfs); 1518 p = proc_create("volumes", S_IFREG|S_IRUGO,
1519 proc_fs_nfs, &nfs_volume_list_fops);
1496 if (!p) 1520 if (!p)
1497 goto error_2; 1521 goto error_2;
1498
1499 p->proc_fops = &nfs_volume_list_fops;
1500 p->owner = THIS_MODULE;
1501 return 0; 1522 return 0;
1502 1523
1503error_2: 1524error_2:
1504 remove_proc_entry("servers", proc_fs_nfs); 1525 remove_proc_entry("servers", proc_fs_nfs);
1505error_1: 1526error_1:
1506 remove_proc_entry("nfsfs", proc_root_fs); 1527 remove_proc_entry("fs/nfsfs", NULL);
1507error_0: 1528error_0:
1508 return -ENOMEM; 1529 return -ENOMEM;
1509} 1530}
@@ -1515,7 +1536,7 @@ void nfs_fs_proc_exit(void)
1515{ 1536{
1516 remove_proc_entry("volumes", proc_fs_nfs); 1537 remove_proc_entry("volumes", proc_fs_nfs);
1517 remove_proc_entry("servers", proc_fs_nfs); 1538 remove_proc_entry("servers", proc_fs_nfs);
1518 remove_proc_entry("nfsfs", proc_root_fs); 1539 remove_proc_entry("fs/nfsfs", NULL);
1519} 1540}
1520 1541
1521#endif /* CONFIG_PROC_FS */ 1542#endif /* CONFIG_PROC_FS */
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index d9e30ac2798d..f288b3ecab4a 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1967,7 +1967,7 @@ force_lookup:
1967 if (!NFS_PROTO(inode)->access) 1967 if (!NFS_PROTO(inode)->access)
1968 goto out_notsup; 1968 goto out_notsup;
1969 1969
1970 cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); 1970 cred = rpc_lookup_cred();
1971 if (!IS_ERR(cred)) { 1971 if (!IS_ERR(cred)) {
1972 res = nfs_do_access(inode, cred, mask); 1972 res = nfs_do_access(inode, cred, mask);
1973 put_rpccred(cred); 1973 put_rpccred(cred);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 16844f98f50e..4757a2b326a1 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -229,14 +229,20 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
229static void nfs_direct_read_result(struct rpc_task *task, void *calldata) 229static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
230{ 230{
231 struct nfs_read_data *data = calldata; 231 struct nfs_read_data *data = calldata;
232 struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
233 232
234 if (nfs_readpage_result(task, data) != 0) 233 nfs_readpage_result(task, data);
235 return; 234}
235
236static void nfs_direct_read_release(void *calldata)
237{
238
239 struct nfs_read_data *data = calldata;
240 struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
241 int status = data->task.tk_status;
236 242
237 spin_lock(&dreq->lock); 243 spin_lock(&dreq->lock);
238 if (unlikely(task->tk_status < 0)) { 244 if (unlikely(status < 0)) {
239 dreq->error = task->tk_status; 245 dreq->error = status;
240 spin_unlock(&dreq->lock); 246 spin_unlock(&dreq->lock);
241 } else { 247 } else {
242 dreq->count += data->res.count; 248 dreq->count += data->res.count;
@@ -249,11 +255,12 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
249 255
250 if (put_dreq(dreq)) 256 if (put_dreq(dreq))
251 nfs_direct_complete(dreq); 257 nfs_direct_complete(dreq);
258 nfs_readdata_release(calldata);
252} 259}
253 260
254static const struct rpc_call_ops nfs_read_direct_ops = { 261static const struct rpc_call_ops nfs_read_direct_ops = {
255 .rpc_call_done = nfs_direct_read_result, 262 .rpc_call_done = nfs_direct_read_result,
256 .rpc_release = nfs_readdata_release, 263 .rpc_release = nfs_direct_read_release,
257}; 264};
258 265
259/* 266/*
@@ -280,6 +287,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
280 .rpc_client = NFS_CLIENT(inode), 287 .rpc_client = NFS_CLIENT(inode),
281 .rpc_message = &msg, 288 .rpc_message = &msg,
282 .callback_ops = &nfs_read_direct_ops, 289 .callback_ops = &nfs_read_direct_ops,
290 .workqueue = nfsiod_workqueue,
283 .flags = RPC_TASK_ASYNC, 291 .flags = RPC_TASK_ASYNC,
284 }; 292 };
285 unsigned int pgbase; 293 unsigned int pgbase;
@@ -323,7 +331,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
323 data->inode = inode; 331 data->inode = inode;
324 data->cred = msg.rpc_cred; 332 data->cred = msg.rpc_cred;
325 data->args.fh = NFS_FH(inode); 333 data->args.fh = NFS_FH(inode);
326 data->args.context = ctx; 334 data->args.context = get_nfs_open_context(ctx);
327 data->args.offset = pos; 335 data->args.offset = pos;
328 data->args.pgbase = pgbase; 336 data->args.pgbase = pgbase;
329 data->args.pages = data->pagevec; 337 data->args.pages = data->pagevec;
@@ -339,8 +347,9 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
339 NFS_PROTO(inode)->read_setup(data, &msg); 347 NFS_PROTO(inode)->read_setup(data, &msg);
340 348
341 task = rpc_run_task(&task_setup_data); 349 task = rpc_run_task(&task_setup_data);
342 if (!IS_ERR(task)) 350 if (IS_ERR(task))
343 rpc_put_task(task); 351 break;
352 rpc_put_task(task);
344 353
345 dprintk("NFS: %5u initiated direct read call " 354 dprintk("NFS: %5u initiated direct read call "
346 "(req %s/%Ld, %zu bytes @ offset %Lu)\n", 355 "(req %s/%Ld, %zu bytes @ offset %Lu)\n",
@@ -446,6 +455,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
446 struct rpc_task_setup task_setup_data = { 455 struct rpc_task_setup task_setup_data = {
447 .rpc_client = NFS_CLIENT(inode), 456 .rpc_client = NFS_CLIENT(inode),
448 .callback_ops = &nfs_write_direct_ops, 457 .callback_ops = &nfs_write_direct_ops,
458 .workqueue = nfsiod_workqueue,
449 .flags = RPC_TASK_ASYNC, 459 .flags = RPC_TASK_ASYNC,
450 }; 460 };
451 461
@@ -499,27 +509,34 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
499static void nfs_direct_commit_result(struct rpc_task *task, void *calldata) 509static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)
500{ 510{
501 struct nfs_write_data *data = calldata; 511 struct nfs_write_data *data = calldata;
502 struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
503 512
504 /* Call the NFS version-specific code */ 513 /* Call the NFS version-specific code */
505 if (NFS_PROTO(data->inode)->commit_done(task, data) != 0) 514 NFS_PROTO(data->inode)->commit_done(task, data);
506 return; 515}
507 if (unlikely(task->tk_status < 0)) { 516
517static void nfs_direct_commit_release(void *calldata)
518{
519 struct nfs_write_data *data = calldata;
520 struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
521 int status = data->task.tk_status;
522
523 if (status < 0) {
508 dprintk("NFS: %5u commit failed with error %d.\n", 524 dprintk("NFS: %5u commit failed with error %d.\n",
509 task->tk_pid, task->tk_status); 525 data->task.tk_pid, status);
510 dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 526 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
511 } else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) { 527 } else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
512 dprintk("NFS: %5u commit verify failed\n", task->tk_pid); 528 dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid);
513 dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 529 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
514 } 530 }
515 531
516 dprintk("NFS: %5u commit returned %d\n", task->tk_pid, task->tk_status); 532 dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status);
517 nfs_direct_write_complete(dreq, data->inode); 533 nfs_direct_write_complete(dreq, data->inode);
534 nfs_commitdata_release(calldata);
518} 535}
519 536
520static const struct rpc_call_ops nfs_commit_direct_ops = { 537static const struct rpc_call_ops nfs_commit_direct_ops = {
521 .rpc_call_done = nfs_direct_commit_result, 538 .rpc_call_done = nfs_direct_commit_result,
522 .rpc_release = nfs_commit_release, 539 .rpc_release = nfs_direct_commit_release,
523}; 540};
524 541
525static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) 542static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
@@ -537,6 +554,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
537 .rpc_message = &msg, 554 .rpc_message = &msg,
538 .callback_ops = &nfs_commit_direct_ops, 555 .callback_ops = &nfs_commit_direct_ops,
539 .callback_data = data, 556 .callback_data = data,
557 .workqueue = nfsiod_workqueue,
540 .flags = RPC_TASK_ASYNC, 558 .flags = RPC_TASK_ASYNC,
541 }; 559 };
542 560
@@ -546,6 +564,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
546 data->args.fh = NFS_FH(data->inode); 564 data->args.fh = NFS_FH(data->inode);
547 data->args.offset = 0; 565 data->args.offset = 0;
548 data->args.count = 0; 566 data->args.count = 0;
567 data->args.context = get_nfs_open_context(dreq->ctx);
549 data->res.count = 0; 568 data->res.count = 0;
550 data->res.fattr = &data->fattr; 569 data->res.fattr = &data->fattr;
551 data->res.verf = &data->verf; 570 data->res.verf = &data->verf;
@@ -585,7 +604,7 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
585 604
586static void nfs_alloc_commit_data(struct nfs_direct_req *dreq) 605static void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
587{ 606{
588 dreq->commit_data = nfs_commit_alloc(); 607 dreq->commit_data = nfs_commitdata_alloc();
589 if (dreq->commit_data != NULL) 608 if (dreq->commit_data != NULL)
590 dreq->commit_data->req = (struct nfs_page *) dreq; 609 dreq->commit_data->req = (struct nfs_page *) dreq;
591} 610}
@@ -606,11 +625,20 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
606static void nfs_direct_write_result(struct rpc_task *task, void *calldata) 625static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
607{ 626{
608 struct nfs_write_data *data = calldata; 627 struct nfs_write_data *data = calldata;
609 struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
610 int status = task->tk_status;
611 628
612 if (nfs_writeback_done(task, data) != 0) 629 if (nfs_writeback_done(task, data) != 0)
613 return; 630 return;
631}
632
633/*
634 * NB: Return the value of the first error return code. Subsequent
635 * errors after the first one are ignored.
636 */
637static void nfs_direct_write_release(void *calldata)
638{
639 struct nfs_write_data *data = calldata;
640 struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
641 int status = data->task.tk_status;
614 642
615 spin_lock(&dreq->lock); 643 spin_lock(&dreq->lock);
616 644
@@ -632,23 +660,13 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
632 break; 660 break;
633 case NFS_ODIRECT_DO_COMMIT: 661 case NFS_ODIRECT_DO_COMMIT:
634 if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) { 662 if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) {
635 dprintk("NFS: %5u write verify failed\n", task->tk_pid); 663 dprintk("NFS: %5u write verify failed\n", data->task.tk_pid);
636 dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 664 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
637 } 665 }
638 } 666 }
639 } 667 }
640out_unlock: 668out_unlock:
641 spin_unlock(&dreq->lock); 669 spin_unlock(&dreq->lock);
642}
643
644/*
645 * NB: Return the value of the first error return code. Subsequent
646 * errors after the first one are ignored.
647 */
648static void nfs_direct_write_release(void *calldata)
649{
650 struct nfs_write_data *data = calldata;
651 struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
652 670
653 if (put_dreq(dreq)) 671 if (put_dreq(dreq))
654 nfs_direct_write_complete(dreq, data->inode); 672 nfs_direct_write_complete(dreq, data->inode);
@@ -682,6 +700,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
682 .rpc_client = NFS_CLIENT(inode), 700 .rpc_client = NFS_CLIENT(inode),
683 .rpc_message = &msg, 701 .rpc_message = &msg,
684 .callback_ops = &nfs_write_direct_ops, 702 .callback_ops = &nfs_write_direct_ops,
703 .workqueue = nfsiod_workqueue,
685 .flags = RPC_TASK_ASYNC, 704 .flags = RPC_TASK_ASYNC,
686 }; 705 };
687 size_t wsize = NFS_SERVER(inode)->wsize; 706 size_t wsize = NFS_SERVER(inode)->wsize;
@@ -728,7 +747,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
728 data->inode = inode; 747 data->inode = inode;
729 data->cred = msg.rpc_cred; 748 data->cred = msg.rpc_cred;
730 data->args.fh = NFS_FH(inode); 749 data->args.fh = NFS_FH(inode);
731 data->args.context = ctx; 750 data->args.context = get_nfs_open_context(ctx);
732 data->args.offset = pos; 751 data->args.offset = pos;
733 data->args.pgbase = pgbase; 752 data->args.pgbase = pgbase;
734 data->args.pages = data->pagevec; 753 data->args.pages = data->pagevec;
@@ -745,8 +764,9 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
745 NFS_PROTO(inode)->write_setup(data, &msg); 764 NFS_PROTO(inode)->write_setup(data, &msg);
746 765
747 task = rpc_run_task(&task_setup_data); 766 task = rpc_run_task(&task_setup_data);
748 if (!IS_ERR(task)) 767 if (IS_ERR(task))
749 rpc_put_task(task); 768 break;
769 rpc_put_task(task);
750 770
751 dprintk("NFS: %5u initiated direct write call " 771 dprintk("NFS: %5u initiated direct write call "
752 "(req %s/%Ld, %zu bytes @ offset %Lu)\n", 772 "(req %s/%Ld, %zu bytes @ offset %Lu)\n",
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 5d2e9d9a4e28..3536b01164f9 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -238,10 +238,8 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
238 ssize_t result; 238 ssize_t result;
239 size_t count = iov_length(iov, nr_segs); 239 size_t count = iov_length(iov, nr_segs);
240 240
241#ifdef CONFIG_NFS_DIRECTIO
242 if (iocb->ki_filp->f_flags & O_DIRECT) 241 if (iocb->ki_filp->f_flags & O_DIRECT)
243 return nfs_file_direct_read(iocb, iov, nr_segs, pos); 242 return nfs_file_direct_read(iocb, iov, nr_segs, pos);
244#endif
245 243
246 dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n", 244 dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n",
247 dentry->d_parent->d_name.name, dentry->d_name.name, 245 dentry->d_parent->d_name.name, dentry->d_name.name,
@@ -387,9 +385,7 @@ const struct address_space_operations nfs_file_aops = {
387 .write_end = nfs_write_end, 385 .write_end = nfs_write_end,
388 .invalidatepage = nfs_invalidate_page, 386 .invalidatepage = nfs_invalidate_page,
389 .releasepage = nfs_release_page, 387 .releasepage = nfs_release_page,
390#ifdef CONFIG_NFS_DIRECTIO
391 .direct_IO = nfs_direct_IO, 388 .direct_IO = nfs_direct_IO,
392#endif
393 .launder_page = nfs_launder_page, 389 .launder_page = nfs_launder_page,
394}; 390};
395 391
@@ -447,10 +443,8 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
447 ssize_t result; 443 ssize_t result;
448 size_t count = iov_length(iov, nr_segs); 444 size_t count = iov_length(iov, nr_segs);
449 445
450#ifdef CONFIG_NFS_DIRECTIO
451 if (iocb->ki_filp->f_flags & O_DIRECT) 446 if (iocb->ki_filp->f_flags & O_DIRECT)
452 return nfs_file_direct_write(iocb, iov, nr_segs, pos); 447 return nfs_file_direct_write(iocb, iov, nr_segs, pos);
453#endif
454 448
455 dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%Ld)\n", 449 dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%Ld)\n",
456 dentry->d_parent->d_name.name, dentry->d_name.name, 450 dentry->d_parent->d_name.name, dentry->d_name.name,
@@ -576,17 +570,9 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
576 570
577 lock_kernel(); 571 lock_kernel();
578 /* Use local locking if mounted with "-onolock" */ 572 /* Use local locking if mounted with "-onolock" */
579 if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) { 573 if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
580 status = NFS_PROTO(inode)->lock(filp, cmd, fl); 574 status = NFS_PROTO(inode)->lock(filp, cmd, fl);
581 /* If we were signalled we still need to ensure that 575 else
582 * we clean up any state on the server. We therefore
583 * record the lock call as having succeeded in order to
584 * ensure that locks_remove_posix() cleans it out when
585 * the process exits.
586 */
587 if (status == -EINTR || status == -ERESTARTSYS)
588 do_vfs_lock(filp, fl);
589 } else
590 status = do_vfs_lock(filp, fl); 576 status = do_vfs_lock(filp, fl);
591 unlock_kernel(); 577 unlock_kernel();
592 if (status < 0) 578 if (status < 0)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6f88d7c77ac9..5cb3345eb694 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -523,8 +523,12 @@ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
523 523
524static void __put_nfs_open_context(struct nfs_open_context *ctx, int wait) 524static void __put_nfs_open_context(struct nfs_open_context *ctx, int wait)
525{ 525{
526 struct inode *inode = ctx->path.dentry->d_inode; 526 struct inode *inode;
527 527
528 if (ctx == NULL)
529 return;
530
531 inode = ctx->path.dentry->d_inode;
528 if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock)) 532 if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock))
529 return; 533 return;
530 list_del(&ctx->list); 534 list_del(&ctx->list);
@@ -610,7 +614,7 @@ int nfs_open(struct inode *inode, struct file *filp)
610 struct nfs_open_context *ctx; 614 struct nfs_open_context *ctx;
611 struct rpc_cred *cred; 615 struct rpc_cred *cred;
612 616
613 cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); 617 cred = rpc_lookup_cred();
614 if (IS_ERR(cred)) 618 if (IS_ERR(cred))
615 return PTR_ERR(cred); 619 return PTR_ERR(cred);
616 ctx = alloc_nfs_open_context(filp->f_path.mnt, filp->f_path.dentry, cred); 620 ctx = alloc_nfs_open_context(filp->f_path.mnt, filp->f_path.dentry, cred);
@@ -1218,6 +1222,36 @@ static void nfs_destroy_inodecache(void)
1218 kmem_cache_destroy(nfs_inode_cachep); 1222 kmem_cache_destroy(nfs_inode_cachep);
1219} 1223}
1220 1224
1225struct workqueue_struct *nfsiod_workqueue;
1226
1227/*
1228 * start up the nfsiod workqueue
1229 */
1230static int nfsiod_start(void)
1231{
1232 struct workqueue_struct *wq;
1233 dprintk("RPC: creating workqueue nfsiod\n");
1234 wq = create_singlethread_workqueue("nfsiod");
1235 if (wq == NULL)
1236 return -ENOMEM;
1237 nfsiod_workqueue = wq;
1238 return 0;
1239}
1240
1241/*
1242 * Destroy the nfsiod workqueue
1243 */
1244static void nfsiod_stop(void)
1245{
1246 struct workqueue_struct *wq;
1247
1248 wq = nfsiod_workqueue;
1249 if (wq == NULL)
1250 return;
1251 nfsiod_workqueue = NULL;
1252 destroy_workqueue(wq);
1253}
1254
1221/* 1255/*
1222 * Initialize NFS 1256 * Initialize NFS
1223 */ 1257 */
@@ -1225,6 +1259,10 @@ static int __init init_nfs_fs(void)
1225{ 1259{
1226 int err; 1260 int err;
1227 1261
1262 err = nfsiod_start();
1263 if (err)
1264 goto out6;
1265
1228 err = nfs_fs_proc_init(); 1266 err = nfs_fs_proc_init();
1229 if (err) 1267 if (err)
1230 goto out5; 1268 goto out5;
@@ -1271,6 +1309,8 @@ out3:
1271out4: 1309out4:
1272 nfs_fs_proc_exit(); 1310 nfs_fs_proc_exit();
1273out5: 1311out5:
1312 nfsiod_stop();
1313out6:
1274 return err; 1314 return err;
1275} 1315}
1276 1316
@@ -1286,6 +1326,7 @@ static void __exit exit_nfs_fs(void)
1286#endif 1326#endif
1287 unregister_nfs_fs(); 1327 unregister_nfs_fs();
1288 nfs_fs_proc_exit(); 1328 nfs_fs_proc_exit();
1329 nfsiod_stop();
1289} 1330}
1290 1331
1291/* Not quite true; I just maintain it */ 1332/* Not quite true; I just maintain it */
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 931992763e68..04ae867dddba 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -46,9 +46,9 @@ struct nfs_parsed_mount_data {
46 struct sockaddr_storage address; 46 struct sockaddr_storage address;
47 size_t addrlen; 47 size_t addrlen;
48 char *hostname; 48 char *hostname;
49 unsigned int version; 49 u32 version;
50 unsigned short port; 50 unsigned short port;
51 int protocol; 51 unsigned short protocol;
52 } mount_server; 52 } mount_server;
53 53
54 struct { 54 struct {
@@ -56,7 +56,8 @@ struct nfs_parsed_mount_data {
56 size_t addrlen; 56 size_t addrlen;
57 char *hostname; 57 char *hostname;
58 char *export_path; 58 char *export_path;
59 int protocol; 59 unsigned short port;
60 unsigned short protocol;
60 } nfs_server; 61 } nfs_server;
61 62
62 struct security_mnt_opts lsm_opts; 63 struct security_mnt_opts lsm_opts;
@@ -115,13 +116,8 @@ extern void nfs_destroy_readpagecache(void);
115extern int __init nfs_init_writepagecache(void); 116extern int __init nfs_init_writepagecache(void);
116extern void nfs_destroy_writepagecache(void); 117extern void nfs_destroy_writepagecache(void);
117 118
118#ifdef CONFIG_NFS_DIRECTIO
119extern int __init nfs_init_directcache(void); 119extern int __init nfs_init_directcache(void);
120extern void nfs_destroy_directcache(void); 120extern void nfs_destroy_directcache(void);
121#else
122#define nfs_init_directcache() (0)
123#define nfs_destroy_directcache() do {} while(0)
124#endif
125 121
126/* nfs2xdr.c */ 122/* nfs2xdr.c */
127extern int nfs_stat_to_errno(int); 123extern int nfs_stat_to_errno(int);
@@ -146,6 +142,7 @@ extern struct rpc_procinfo nfs4_procedures[];
146extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask); 142extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask);
147 143
148/* inode.c */ 144/* inode.c */
145extern struct workqueue_struct *nfsiod_workqueue;
149extern struct inode *nfs_alloc_inode(struct super_block *sb); 146extern struct inode *nfs_alloc_inode(struct super_block *sb);
150extern void nfs_destroy_inode(struct inode *); 147extern void nfs_destroy_inode(struct inode *);
151extern int nfs_write_inode(struct inode *,int); 148extern int nfs_write_inode(struct inode *,int);
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 607f6eb9cdb5..af4d0f1e402c 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -20,7 +20,7 @@
20 20
21static void nfs_expire_automounts(struct work_struct *work); 21static void nfs_expire_automounts(struct work_struct *work);
22 22
23LIST_HEAD(nfs_automount_list); 23static LIST_HEAD(nfs_automount_list);
24static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts); 24static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts);
25int nfs_mountpoint_expiry_timeout = 500 * HZ; 25int nfs_mountpoint_expiry_timeout = 500 * HZ;
26 26
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 1f7ea675e0c5..28bab67d1519 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -267,7 +267,7 @@ nfs_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res)
267 int status; 267 int status;
268 268
269 if ((status = ntohl(*p++))) 269 if ((status = ntohl(*p++)))
270 return -nfs_stat_to_errno(status); 270 return nfs_stat_to_errno(status);
271 p = xdr_decode_fattr(p, res->fattr); 271 p = xdr_decode_fattr(p, res->fattr);
272 272
273 count = ntohl(*p++); 273 count = ntohl(*p++);
@@ -428,11 +428,11 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
428 size_t hdrlen; 428 size_t hdrlen;
429 unsigned int pglen, recvd; 429 unsigned int pglen, recvd;
430 u32 len; 430 u32 len;
431 int status, nr; 431 int status, nr = 0;
432 __be32 *end, *entry, *kaddr; 432 __be32 *end, *entry, *kaddr;
433 433
434 if ((status = ntohl(*p++))) 434 if ((status = ntohl(*p++)))
435 return -nfs_stat_to_errno(status); 435 return nfs_stat_to_errno(status);
436 436
437 hdrlen = (u8 *) p - (u8 *) iov->iov_base; 437 hdrlen = (u8 *) p - (u8 *) iov->iov_base;
438 if (iov->iov_len < hdrlen) { 438 if (iov->iov_len < hdrlen) {
@@ -452,7 +452,12 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
452 kaddr = p = kmap_atomic(*page, KM_USER0); 452 kaddr = p = kmap_atomic(*page, KM_USER0);
453 end = (__be32 *)((char *)p + pglen); 453 end = (__be32 *)((char *)p + pglen);
454 entry = p; 454 entry = p;
455 for (nr = 0; *p++; nr++) { 455
456 /* Make sure the packet actually has a value_follows and EOF entry */
457 if ((entry + 1) > end)
458 goto short_pkt;
459
460 for (; *p++; nr++) {
456 if (p + 2 > end) 461 if (p + 2 > end)
457 goto short_pkt; 462 goto short_pkt;
458 p++; /* fileid */ 463 p++; /* fileid */
@@ -467,18 +472,32 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
467 goto short_pkt; 472 goto short_pkt;
468 entry = p; 473 entry = p;
469 } 474 }
470 if (!nr && (entry[0] != 0 || entry[1] == 0)) 475
471 goto short_pkt; 476 /*
477 * Apparently some server sends responses that are a valid size, but
478 * contain no entries, and have value_follows==0 and EOF==0. For
479 * those, just set the EOF marker.
480 */
481 if (!nr && entry[1] == 0) {
482 dprintk("NFS: readdir reply truncated!\n");
483 entry[1] = 1;
484 }
472 out: 485 out:
473 kunmap_atomic(kaddr, KM_USER0); 486 kunmap_atomic(kaddr, KM_USER0);
474 return nr; 487 return nr;
475 short_pkt: 488 short_pkt:
489 /*
490 * When we get a short packet there are 2 possibilities. We can
491 * return an error, or fix up the response to look like a valid
492 * response and return what we have so far. If there are no
493 * entries and the packet was short, then return -EIO. If there
494 * are valid entries in the response, return them and pretend that
495 * the call was successful, but incomplete. The caller can retry the
496 * readdir starting at the last cookie.
497 */
476 entry[0] = entry[1] = 0; 498 entry[0] = entry[1] = 0;
477 /* truncate listing ? */ 499 if (!nr)
478 if (!nr) { 500 nr = -errno_NFSERR_IO;
479 dprintk("NFS: readdir reply truncated!\n");
480 entry[1] = 1;
481 }
482 goto out; 501 goto out;
483err_unmap: 502err_unmap:
484 nr = -errno_NFSERR_IO; 503 nr = -errno_NFSERR_IO;
@@ -518,7 +537,7 @@ nfs_xdr_stat(struct rpc_rqst *req, __be32 *p, void *dummy)
518 int status; 537 int status;
519 538
520 if ((status = ntohl(*p++)) != 0) 539 if ((status = ntohl(*p++)) != 0)
521 status = -nfs_stat_to_errno(status); 540 status = nfs_stat_to_errno(status);
522 return status; 541 return status;
523} 542}
524 543
@@ -532,7 +551,7 @@ nfs_xdr_attrstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
532 int status; 551 int status;
533 552
534 if ((status = ntohl(*p++))) 553 if ((status = ntohl(*p++)))
535 return -nfs_stat_to_errno(status); 554 return nfs_stat_to_errno(status);
536 xdr_decode_fattr(p, fattr); 555 xdr_decode_fattr(p, fattr);
537 return 0; 556 return 0;
538} 557}
@@ -547,7 +566,7 @@ nfs_xdr_diropres(struct rpc_rqst *req, __be32 *p, struct nfs_diropok *res)
547 int status; 566 int status;
548 567
549 if ((status = ntohl(*p++))) 568 if ((status = ntohl(*p++)))
550 return -nfs_stat_to_errno(status); 569 return nfs_stat_to_errno(status);
551 p = xdr_decode_fhandle(p, res->fh); 570 p = xdr_decode_fhandle(p, res->fh);
552 xdr_decode_fattr(p, res->fattr); 571 xdr_decode_fattr(p, res->fattr);
553 return 0; 572 return 0;
@@ -585,7 +604,7 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy)
585 int status; 604 int status;
586 605
587 if ((status = ntohl(*p++))) 606 if ((status = ntohl(*p++)))
588 return -nfs_stat_to_errno(status); 607 return nfs_stat_to_errno(status);
589 /* Convert length of symlink */ 608 /* Convert length of symlink */
590 len = ntohl(*p++); 609 len = ntohl(*p++);
591 if (len >= rcvbuf->page_len) { 610 if (len >= rcvbuf->page_len) {
@@ -634,7 +653,7 @@ nfs_xdr_statfsres(struct rpc_rqst *req, __be32 *p, struct nfs2_fsstat *res)
634 int status; 653 int status;
635 654
636 if ((status = ntohl(*p++))) 655 if ((status = ntohl(*p++)))
637 return -nfs_stat_to_errno(status); 656 return nfs_stat_to_errno(status);
638 657
639 res->tsize = ntohl(*p++); 658 res->tsize = ntohl(*p++);
640 res->bsize = ntohl(*p++); 659 res->bsize = ntohl(*p++);
@@ -653,39 +672,39 @@ static struct {
653 int errno; 672 int errno;
654} nfs_errtbl[] = { 673} nfs_errtbl[] = {
655 { NFS_OK, 0 }, 674 { NFS_OK, 0 },
656 { NFSERR_PERM, EPERM }, 675 { NFSERR_PERM, -EPERM },
657 { NFSERR_NOENT, ENOENT }, 676 { NFSERR_NOENT, -ENOENT },
658 { NFSERR_IO, errno_NFSERR_IO }, 677 { NFSERR_IO, -errno_NFSERR_IO},
659 { NFSERR_NXIO, ENXIO }, 678 { NFSERR_NXIO, -ENXIO },
660/* { NFSERR_EAGAIN, EAGAIN }, */ 679/* { NFSERR_EAGAIN, -EAGAIN }, */
661 { NFSERR_ACCES, EACCES }, 680 { NFSERR_ACCES, -EACCES },
662 { NFSERR_EXIST, EEXIST }, 681 { NFSERR_EXIST, -EEXIST },
663 { NFSERR_XDEV, EXDEV }, 682 { NFSERR_XDEV, -EXDEV },
664 { NFSERR_NODEV, ENODEV }, 683 { NFSERR_NODEV, -ENODEV },
665 { NFSERR_NOTDIR, ENOTDIR }, 684 { NFSERR_NOTDIR, -ENOTDIR },
666 { NFSERR_ISDIR, EISDIR }, 685 { NFSERR_ISDIR, -EISDIR },
667 { NFSERR_INVAL, EINVAL }, 686 { NFSERR_INVAL, -EINVAL },
668 { NFSERR_FBIG, EFBIG }, 687 { NFSERR_FBIG, -EFBIG },
669 { NFSERR_NOSPC, ENOSPC }, 688 { NFSERR_NOSPC, -ENOSPC },
670 { NFSERR_ROFS, EROFS }, 689 { NFSERR_ROFS, -EROFS },
671 { NFSERR_MLINK, EMLINK }, 690 { NFSERR_MLINK, -EMLINK },
672 { NFSERR_NAMETOOLONG, ENAMETOOLONG }, 691 { NFSERR_NAMETOOLONG, -ENAMETOOLONG },
673 { NFSERR_NOTEMPTY, ENOTEMPTY }, 692 { NFSERR_NOTEMPTY, -ENOTEMPTY },
674 { NFSERR_DQUOT, EDQUOT }, 693 { NFSERR_DQUOT, -EDQUOT },
675 { NFSERR_STALE, ESTALE }, 694 { NFSERR_STALE, -ESTALE },
676 { NFSERR_REMOTE, EREMOTE }, 695 { NFSERR_REMOTE, -EREMOTE },
677#ifdef EWFLUSH 696#ifdef EWFLUSH
678 { NFSERR_WFLUSH, EWFLUSH }, 697 { NFSERR_WFLUSH, -EWFLUSH },
679#endif 698#endif
680 { NFSERR_BADHANDLE, EBADHANDLE }, 699 { NFSERR_BADHANDLE, -EBADHANDLE },
681 { NFSERR_NOT_SYNC, ENOTSYNC }, 700 { NFSERR_NOT_SYNC, -ENOTSYNC },
682 { NFSERR_BAD_COOKIE, EBADCOOKIE }, 701 { NFSERR_BAD_COOKIE, -EBADCOOKIE },
683 { NFSERR_NOTSUPP, ENOTSUPP }, 702 { NFSERR_NOTSUPP, -ENOTSUPP },
684 { NFSERR_TOOSMALL, ETOOSMALL }, 703 { NFSERR_TOOSMALL, -ETOOSMALL },
685 { NFSERR_SERVERFAULT, ESERVERFAULT }, 704 { NFSERR_SERVERFAULT, -ESERVERFAULT },
686 { NFSERR_BADTYPE, EBADTYPE }, 705 { NFSERR_BADTYPE, -EBADTYPE },
687 { NFSERR_JUKEBOX, EJUKEBOX }, 706 { NFSERR_JUKEBOX, -EJUKEBOX },
688 { -1, EIO } 707 { -1, -EIO }
689}; 708};
690 709
691/* 710/*
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 3917e2fa4e40..11cdddec1432 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -508,14 +508,14 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
508 struct page **page; 508 struct page **page;
509 size_t hdrlen; 509 size_t hdrlen;
510 u32 len, recvd, pglen; 510 u32 len, recvd, pglen;
511 int status, nr; 511 int status, nr = 0;
512 __be32 *entry, *end, *kaddr; 512 __be32 *entry, *end, *kaddr;
513 513
514 status = ntohl(*p++); 514 status = ntohl(*p++);
515 /* Decode post_op_attrs */ 515 /* Decode post_op_attrs */
516 p = xdr_decode_post_op_attr(p, res->dir_attr); 516 p = xdr_decode_post_op_attr(p, res->dir_attr);
517 if (status) 517 if (status)
518 return -nfs_stat_to_errno(status); 518 return nfs_stat_to_errno(status);
519 /* Decode verifier cookie */ 519 /* Decode verifier cookie */
520 if (res->verf) { 520 if (res->verf) {
521 res->verf[0] = *p++; 521 res->verf[0] = *p++;
@@ -542,7 +542,12 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
542 kaddr = p = kmap_atomic(*page, KM_USER0); 542 kaddr = p = kmap_atomic(*page, KM_USER0);
543 end = (__be32 *)((char *)p + pglen); 543 end = (__be32 *)((char *)p + pglen);
544 entry = p; 544 entry = p;
545 for (nr = 0; *p++; nr++) { 545
546 /* Make sure the packet actually has a value_follows and EOF entry */
547 if ((entry + 1) > end)
548 goto short_pkt;
549
550 for (; *p++; nr++) {
546 if (p + 3 > end) 551 if (p + 3 > end)
547 goto short_pkt; 552 goto short_pkt;
548 p += 2; /* inode # */ 553 p += 2; /* inode # */
@@ -581,18 +586,32 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
581 goto short_pkt; 586 goto short_pkt;
582 entry = p; 587 entry = p;
583 } 588 }
584 if (!nr && (entry[0] != 0 || entry[1] == 0)) 589
585 goto short_pkt; 590 /*
591 * Apparently some server sends responses that are a valid size, but
592 * contain no entries, and have value_follows==0 and EOF==0. For
593 * those, just set the EOF marker.
594 */
595 if (!nr && entry[1] == 0) {
596 dprintk("NFS: readdir reply truncated!\n");
597 entry[1] = 1;
598 }
586 out: 599 out:
587 kunmap_atomic(kaddr, KM_USER0); 600 kunmap_atomic(kaddr, KM_USER0);
588 return nr; 601 return nr;
589 short_pkt: 602 short_pkt:
603 /*
604 * When we get a short packet there are 2 possibilities. We can
605 * return an error, or fix up the response to look like a valid
606 * response and return what we have so far. If there are no
607 * entries and the packet was short, then return -EIO. If there
608 * are valid entries in the response, return them and pretend that
609 * the call was successful, but incomplete. The caller can retry the
610 * readdir starting at the last cookie.
611 */
590 entry[0] = entry[1] = 0; 612 entry[0] = entry[1] = 0;
591 /* truncate listing ? */ 613 if (!nr)
592 if (!nr) { 614 nr = -errno_NFSERR_IO;
593 dprintk("NFS: readdir reply truncated!\n");
594 entry[1] = 1;
595 }
596 goto out; 615 goto out;
597err_unmap: 616err_unmap:
598 nr = -errno_NFSERR_IO; 617 nr = -errno_NFSERR_IO;
@@ -732,7 +751,7 @@ nfs3_xdr_attrstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
732 int status; 751 int status;
733 752
734 if ((status = ntohl(*p++))) 753 if ((status = ntohl(*p++)))
735 return -nfs_stat_to_errno(status); 754 return nfs_stat_to_errno(status);
736 xdr_decode_fattr(p, fattr); 755 xdr_decode_fattr(p, fattr);
737 return 0; 756 return 0;
738} 757}
@@ -747,7 +766,7 @@ nfs3_xdr_wccstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
747 int status; 766 int status;
748 767
749 if ((status = ntohl(*p++))) 768 if ((status = ntohl(*p++)))
750 status = -nfs_stat_to_errno(status); 769 status = nfs_stat_to_errno(status);
751 xdr_decode_wcc_data(p, fattr); 770 xdr_decode_wcc_data(p, fattr);
752 return status; 771 return status;
753} 772}
@@ -767,7 +786,7 @@ nfs3_xdr_lookupres(struct rpc_rqst *req, __be32 *p, struct nfs3_diropres *res)
767 int status; 786 int status;
768 787
769 if ((status = ntohl(*p++))) { 788 if ((status = ntohl(*p++))) {
770 status = -nfs_stat_to_errno(status); 789 status = nfs_stat_to_errno(status);
771 } else { 790 } else {
772 if (!(p = xdr_decode_fhandle(p, res->fh))) 791 if (!(p = xdr_decode_fhandle(p, res->fh)))
773 return -errno_NFSERR_IO; 792 return -errno_NFSERR_IO;
@@ -787,7 +806,7 @@ nfs3_xdr_accessres(struct rpc_rqst *req, __be32 *p, struct nfs3_accessres *res)
787 806
788 p = xdr_decode_post_op_attr(p, res->fattr); 807 p = xdr_decode_post_op_attr(p, res->fattr);
789 if (status) 808 if (status)
790 return -nfs_stat_to_errno(status); 809 return nfs_stat_to_errno(status);
791 res->access = ntohl(*p++); 810 res->access = ntohl(*p++);
792 return 0; 811 return 0;
793} 812}
@@ -824,7 +843,7 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
824 p = xdr_decode_post_op_attr(p, fattr); 843 p = xdr_decode_post_op_attr(p, fattr);
825 844
826 if (status != 0) 845 if (status != 0)
827 return -nfs_stat_to_errno(status); 846 return nfs_stat_to_errno(status);
828 847
829 /* Convert length of symlink */ 848 /* Convert length of symlink */
830 len = ntohl(*p++); 849 len = ntohl(*p++);
@@ -872,7 +891,7 @@ nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res)
872 p = xdr_decode_post_op_attr(p, res->fattr); 891 p = xdr_decode_post_op_attr(p, res->fattr);
873 892
874 if (status != 0) 893 if (status != 0)
875 return -nfs_stat_to_errno(status); 894 return nfs_stat_to_errno(status);
876 895
877 /* Decode reply count and EOF flag. NFSv3 is somewhat redundant 896 /* Decode reply count and EOF flag. NFSv3 is somewhat redundant
878 * in that it puts the count both in the res struct and in the 897 * in that it puts the count both in the res struct and in the
@@ -922,7 +941,7 @@ nfs3_xdr_writeres(struct rpc_rqst *req, __be32 *p, struct nfs_writeres *res)
922 p = xdr_decode_wcc_data(p, res->fattr); 941 p = xdr_decode_wcc_data(p, res->fattr);
923 942
924 if (status != 0) 943 if (status != 0)
925 return -nfs_stat_to_errno(status); 944 return nfs_stat_to_errno(status);
926 945
927 res->count = ntohl(*p++); 946 res->count = ntohl(*p++);
928 res->verf->committed = (enum nfs3_stable_how)ntohl(*p++); 947 res->verf->committed = (enum nfs3_stable_how)ntohl(*p++);
@@ -953,7 +972,7 @@ nfs3_xdr_createres(struct rpc_rqst *req, __be32 *p, struct nfs3_diropres *res)
953 res->fattr->valid = 0; 972 res->fattr->valid = 0;
954 } 973 }
955 } else { 974 } else {
956 status = -nfs_stat_to_errno(status); 975 status = nfs_stat_to_errno(status);
957 } 976 }
958 p = xdr_decode_wcc_data(p, res->dir_attr); 977 p = xdr_decode_wcc_data(p, res->dir_attr);
959 return status; 978 return status;
@@ -968,7 +987,7 @@ nfs3_xdr_renameres(struct rpc_rqst *req, __be32 *p, struct nfs3_renameres *res)
968 int status; 987 int status;
969 988
970 if ((status = ntohl(*p++)) != 0) 989 if ((status = ntohl(*p++)) != 0)
971 status = -nfs_stat_to_errno(status); 990 status = nfs_stat_to_errno(status);
972 p = xdr_decode_wcc_data(p, res->fromattr); 991 p = xdr_decode_wcc_data(p, res->fromattr);
973 p = xdr_decode_wcc_data(p, res->toattr); 992 p = xdr_decode_wcc_data(p, res->toattr);
974 return status; 993 return status;
@@ -983,7 +1002,7 @@ nfs3_xdr_linkres(struct rpc_rqst *req, __be32 *p, struct nfs3_linkres *res)
983 int status; 1002 int status;
984 1003
985 if ((status = ntohl(*p++)) != 0) 1004 if ((status = ntohl(*p++)) != 0)
986 status = -nfs_stat_to_errno(status); 1005 status = nfs_stat_to_errno(status);
987 p = xdr_decode_post_op_attr(p, res->fattr); 1006 p = xdr_decode_post_op_attr(p, res->fattr);
988 p = xdr_decode_wcc_data(p, res->dir_attr); 1007 p = xdr_decode_wcc_data(p, res->dir_attr);
989 return status; 1008 return status;
@@ -1001,7 +1020,7 @@ nfs3_xdr_fsstatres(struct rpc_rqst *req, __be32 *p, struct nfs_fsstat *res)
1001 1020
1002 p = xdr_decode_post_op_attr(p, res->fattr); 1021 p = xdr_decode_post_op_attr(p, res->fattr);
1003 if (status != 0) 1022 if (status != 0)
1004 return -nfs_stat_to_errno(status); 1023 return nfs_stat_to_errno(status);
1005 1024
1006 p = xdr_decode_hyper(p, &res->tbytes); 1025 p = xdr_decode_hyper(p, &res->tbytes);
1007 p = xdr_decode_hyper(p, &res->fbytes); 1026 p = xdr_decode_hyper(p, &res->fbytes);
@@ -1026,7 +1045,7 @@ nfs3_xdr_fsinfores(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *res)
1026 1045
1027 p = xdr_decode_post_op_attr(p, res->fattr); 1046 p = xdr_decode_post_op_attr(p, res->fattr);
1028 if (status != 0) 1047 if (status != 0)
1029 return -nfs_stat_to_errno(status); 1048 return nfs_stat_to_errno(status);
1030 1049
1031 res->rtmax = ntohl(*p++); 1050 res->rtmax = ntohl(*p++);
1032 res->rtpref = ntohl(*p++); 1051 res->rtpref = ntohl(*p++);
@@ -1054,7 +1073,7 @@ nfs3_xdr_pathconfres(struct rpc_rqst *req, __be32 *p, struct nfs_pathconf *res)
1054 1073
1055 p = xdr_decode_post_op_attr(p, res->fattr); 1074 p = xdr_decode_post_op_attr(p, res->fattr);
1056 if (status != 0) 1075 if (status != 0)
1057 return -nfs_stat_to_errno(status); 1076 return nfs_stat_to_errno(status);
1058 res->max_link = ntohl(*p++); 1077 res->max_link = ntohl(*p++);
1059 res->max_namelen = ntohl(*p++); 1078 res->max_namelen = ntohl(*p++);
1060 1079
@@ -1073,7 +1092,7 @@ nfs3_xdr_commitres(struct rpc_rqst *req, __be32 *p, struct nfs_writeres *res)
1073 status = ntohl(*p++); 1092 status = ntohl(*p++);
1074 p = xdr_decode_wcc_data(p, res->fattr); 1093 p = xdr_decode_wcc_data(p, res->fattr);
1075 if (status != 0) 1094 if (status != 0)
1076 return -nfs_stat_to_errno(status); 1095 return nfs_stat_to_errno(status);
1077 1096
1078 res->verf->verifier[0] = *p++; 1097 res->verf->verifier[0] = *p++;
1079 res->verf->verifier[1] = *p++; 1098 res->verf->verifier[1] = *p++;
@@ -1095,7 +1114,7 @@ nfs3_xdr_getaclres(struct rpc_rqst *req, __be32 *p,
1095 int err, base; 1114 int err, base;
1096 1115
1097 if (status != 0) 1116 if (status != 0)
1098 return -nfs_stat_to_errno(status); 1117 return nfs_stat_to_errno(status);
1099 p = xdr_decode_post_op_attr(p, res->fattr); 1118 p = xdr_decode_post_op_attr(p, res->fattr);
1100 res->mask = ntohl(*p++); 1119 res->mask = ntohl(*p++);
1101 if (res->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) 1120 if (res->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
@@ -1122,7 +1141,7 @@ nfs3_xdr_setaclres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
1122 int status = ntohl(*p++); 1141 int status = ntohl(*p++);
1123 1142
1124 if (status) 1143 if (status)
1125 return -nfs_stat_to_errno(status); 1144 return nfs_stat_to_errno(status);
1126 xdr_decode_post_op_attr(p, fattr); 1145 xdr_decode_post_op_attr(p, fattr);
1127 return 0; 1146 return 0;
1128} 1147}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7ce07862c2fb..dbc09271af02 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -51,6 +51,7 @@
51 51
52#include "nfs4_fs.h" 52#include "nfs4_fs.h"
53#include "delegation.h" 53#include "delegation.h"
54#include "internal.h"
54#include "iostat.h" 55#include "iostat.h"
55 56
56#define NFSDBG_FACILITY NFSDBG_PROC 57#define NFSDBG_FACILITY NFSDBG_PROC
@@ -239,6 +240,8 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p)
239{ 240{
240 p->o_res.f_attr = &p->f_attr; 241 p->o_res.f_attr = &p->f_attr;
241 p->o_res.dir_attr = &p->dir_attr; 242 p->o_res.dir_attr = &p->dir_attr;
243 p->o_res.seqid = p->o_arg.seqid;
244 p->c_res.seqid = p->c_arg.seqid;
242 p->o_res.server = p->o_arg.server; 245 p->o_res.server = p->o_arg.server;
243 nfs_fattr_init(&p->f_attr); 246 nfs_fattr_init(&p->f_attr);
244 nfs_fattr_init(&p->dir_attr); 247 nfs_fattr_init(&p->dir_attr);
@@ -729,7 +732,6 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
729 renew_lease(data->o_res.server, data->timestamp); 732 renew_lease(data->o_res.server, data->timestamp);
730 data->rpc_done = 1; 733 data->rpc_done = 1;
731 } 734 }
732 nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid);
733} 735}
734 736
735static void nfs4_open_confirm_release(void *calldata) 737static void nfs4_open_confirm_release(void *calldata)
@@ -773,6 +775,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
773 .rpc_message = &msg, 775 .rpc_message = &msg,
774 .callback_ops = &nfs4_open_confirm_ops, 776 .callback_ops = &nfs4_open_confirm_ops,
775 .callback_data = data, 777 .callback_data = data,
778 .workqueue = nfsiod_workqueue,
776 .flags = RPC_TASK_ASYNC, 779 .flags = RPC_TASK_ASYNC,
777 }; 780 };
778 int status; 781 int status;
@@ -858,7 +861,6 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata)
858 if (!(data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM)) 861 if (!(data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM))
859 nfs_confirm_seqid(&data->owner->so_seqid, 0); 862 nfs_confirm_seqid(&data->owner->so_seqid, 0);
860 } 863 }
861 nfs_increment_open_seqid(data->rpc_status, data->o_arg.seqid);
862 data->rpc_done = 1; 864 data->rpc_done = 1;
863} 865}
864 866
@@ -910,6 +912,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
910 .rpc_message = &msg, 912 .rpc_message = &msg,
911 .callback_ops = &nfs4_open_ops, 913 .callback_ops = &nfs4_open_ops,
912 .callback_data = data, 914 .callback_data = data,
915 .workqueue = nfsiod_workqueue,
913 .flags = RPC_TASK_ASYNC, 916 .flags = RPC_TASK_ASYNC,
914 }; 917 };
915 int status; 918 int status;
@@ -979,11 +982,8 @@ static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *s
979 if (IS_ERR(opendata)) 982 if (IS_ERR(opendata))
980 return PTR_ERR(opendata); 983 return PTR_ERR(opendata);
981 ret = nfs4_open_recover(opendata, state); 984 ret = nfs4_open_recover(opendata, state);
982 if (ret == -ESTALE) { 985 if (ret == -ESTALE)
983 /* Invalidate the state owner so we don't ever use it again */
984 nfs4_drop_state_owner(state->owner);
985 d_drop(ctx->path.dentry); 986 d_drop(ctx->path.dentry);
986 }
987 nfs4_opendata_put(opendata); 987 nfs4_opendata_put(opendata);
988 return ret; 988 return ret;
989} 989}
@@ -1226,7 +1226,6 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
1226 /* hmm. we are done with the inode, and in the process of freeing 1226 /* hmm. we are done with the inode, and in the process of freeing
1227 * the state_owner. we keep this around to process errors 1227 * the state_owner. we keep this around to process errors
1228 */ 1228 */
1229 nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid);
1230 switch (task->tk_status) { 1229 switch (task->tk_status) {
1231 case 0: 1230 case 0:
1232 nfs_set_open_stateid(state, &calldata->res.stateid, 0); 1231 nfs_set_open_stateid(state, &calldata->res.stateid, 0);
@@ -1315,6 +1314,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
1315 .rpc_client = server->client, 1314 .rpc_client = server->client,
1316 .rpc_message = &msg, 1315 .rpc_message = &msg,
1317 .callback_ops = &nfs4_close_ops, 1316 .callback_ops = &nfs4_close_ops,
1317 .workqueue = nfsiod_workqueue,
1318 .flags = RPC_TASK_ASYNC, 1318 .flags = RPC_TASK_ASYNC,
1319 }; 1319 };
1320 int status = -ENOMEM; 1320 int status = -ENOMEM;
@@ -1332,6 +1332,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
1332 goto out_free_calldata; 1332 goto out_free_calldata;
1333 calldata->arg.bitmask = server->attr_bitmask; 1333 calldata->arg.bitmask = server->attr_bitmask;
1334 calldata->res.fattr = &calldata->fattr; 1334 calldata->res.fattr = &calldata->fattr;
1335 calldata->res.seqid = calldata->arg.seqid;
1335 calldata->res.server = server; 1336 calldata->res.server = server;
1336 calldata->path.mnt = mntget(path->mnt); 1337 calldata->path.mnt = mntget(path->mnt);
1337 calldata->path.dentry = dget(path->dentry); 1338 calldata->path.dentry = dget(path->dentry);
@@ -1404,7 +1405,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
1404 BUG_ON(nd->intent.open.flags & O_CREAT); 1405 BUG_ON(nd->intent.open.flags & O_CREAT);
1405 } 1406 }
1406 1407
1407 cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); 1408 cred = rpc_lookup_cred();
1408 if (IS_ERR(cred)) 1409 if (IS_ERR(cred))
1409 return (struct dentry *)cred; 1410 return (struct dentry *)cred;
1410 parent = dentry->d_parent; 1411 parent = dentry->d_parent;
@@ -1439,7 +1440,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
1439 struct rpc_cred *cred; 1440 struct rpc_cred *cred;
1440 struct nfs4_state *state; 1441 struct nfs4_state *state;
1441 1442
1442 cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); 1443 cred = rpc_lookup_cred();
1443 if (IS_ERR(cred)) 1444 if (IS_ERR(cred))
1444 return PTR_ERR(cred); 1445 return PTR_ERR(cred);
1445 state = nfs4_do_open(dir, &path, openflags, NULL, cred); 1446 state = nfs4_do_open(dir, &path, openflags, NULL, cred);
@@ -1656,7 +1657,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
1656 1657
1657 nfs_fattr_init(fattr); 1658 nfs_fattr_init(fattr);
1658 1659
1659 cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); 1660 cred = rpc_lookup_cred();
1660 if (IS_ERR(cred)) 1661 if (IS_ERR(cred))
1661 return PTR_ERR(cred); 1662 return PTR_ERR(cred);
1662 1663
@@ -1892,7 +1893,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
1892 struct rpc_cred *cred; 1893 struct rpc_cred *cred;
1893 int status = 0; 1894 int status = 0;
1894 1895
1895 cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); 1896 cred = rpc_lookup_cred();
1896 if (IS_ERR(cred)) { 1897 if (IS_ERR(cred)) {
1897 status = PTR_ERR(cred); 1898 status = PTR_ERR(cred);
1898 goto out; 1899 goto out;
@@ -2761,10 +2762,10 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
2761 case -NFS4ERR_STALE_CLIENTID: 2762 case -NFS4ERR_STALE_CLIENTID:
2762 case -NFS4ERR_STALE_STATEID: 2763 case -NFS4ERR_STALE_STATEID:
2763 case -NFS4ERR_EXPIRED: 2764 case -NFS4ERR_EXPIRED:
2764 rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL, NULL); 2765 rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
2765 nfs4_schedule_state_recovery(clp); 2766 nfs4_schedule_state_recovery(clp);
2766 if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) == 0) 2767 if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) == 0)
2767 rpc_wake_up_task(task); 2768 rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
2768 task->tk_status = 0; 2769 task->tk_status = 0;
2769 return -EAGAIN; 2770 return -EAGAIN;
2770 case -NFS4ERR_DELAY: 2771 case -NFS4ERR_DELAY:
@@ -2884,7 +2885,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short po
2884 RPC_DISPLAY_ADDR), 2885 RPC_DISPLAY_ADDR),
2885 rpc_peeraddr2str(clp->cl_rpcclient, 2886 rpc_peeraddr2str(clp->cl_rpcclient,
2886 RPC_DISPLAY_PROTO), 2887 RPC_DISPLAY_PROTO),
2887 cred->cr_ops->cr_name, 2888 clp->cl_rpcclient->cl_auth->au_ops->au_name,
2888 clp->cl_id_uniquifier); 2889 clp->cl_id_uniquifier);
2889 setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, 2890 setclientid.sc_netid_len = scnprintf(setclientid.sc_netid,
2890 sizeof(setclientid.sc_netid), 2891 sizeof(setclientid.sc_netid),
@@ -3158,6 +3159,7 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
3158 p->arg.fh = NFS_FH(inode); 3159 p->arg.fh = NFS_FH(inode);
3159 p->arg.fl = &p->fl; 3160 p->arg.fl = &p->fl;
3160 p->arg.seqid = seqid; 3161 p->arg.seqid = seqid;
3162 p->res.seqid = seqid;
3161 p->arg.stateid = &lsp->ls_stateid; 3163 p->arg.stateid = &lsp->ls_stateid;
3162 p->lsp = lsp; 3164 p->lsp = lsp;
3163 atomic_inc(&lsp->ls_count); 3165 atomic_inc(&lsp->ls_count);
@@ -3183,7 +3185,6 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
3183 3185
3184 if (RPC_ASSASSINATED(task)) 3186 if (RPC_ASSASSINATED(task))
3185 return; 3187 return;
3186 nfs_increment_lock_seqid(task->tk_status, calldata->arg.seqid);
3187 switch (task->tk_status) { 3188 switch (task->tk_status) {
3188 case 0: 3189 case 0:
3189 memcpy(calldata->lsp->ls_stateid.data, 3190 memcpy(calldata->lsp->ls_stateid.data,
@@ -3235,6 +3236,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
3235 .rpc_client = NFS_CLIENT(lsp->ls_state->inode), 3236 .rpc_client = NFS_CLIENT(lsp->ls_state->inode),
3236 .rpc_message = &msg, 3237 .rpc_message = &msg,
3237 .callback_ops = &nfs4_locku_ops, 3238 .callback_ops = &nfs4_locku_ops,
3239 .workqueue = nfsiod_workqueue,
3238 .flags = RPC_TASK_ASYNC, 3240 .flags = RPC_TASK_ASYNC,
3239 }; 3241 };
3240 3242
@@ -3261,6 +3263,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
3261 struct nfs4_lock_state *lsp; 3263 struct nfs4_lock_state *lsp;
3262 struct rpc_task *task; 3264 struct rpc_task *task;
3263 int status = 0; 3265 int status = 0;
3266 unsigned char fl_flags = request->fl_flags;
3264 3267
3265 status = nfs4_set_lock_state(state, request); 3268 status = nfs4_set_lock_state(state, request);
3266 /* Unlock _before_ we do the RPC call */ 3269 /* Unlock _before_ we do the RPC call */
@@ -3284,6 +3287,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
3284 status = nfs4_wait_for_completion_rpc_task(task); 3287 status = nfs4_wait_for_completion_rpc_task(task);
3285 rpc_put_task(task); 3288 rpc_put_task(task);
3286out: 3289out:
3290 request->fl_flags = fl_flags;
3287 return status; 3291 return status;
3288} 3292}
3289 3293
@@ -3320,6 +3324,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
3320 p->arg.lock_stateid = &lsp->ls_stateid; 3324 p->arg.lock_stateid = &lsp->ls_stateid;
3321 p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; 3325 p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
3322 p->arg.lock_owner.id = lsp->ls_id.id; 3326 p->arg.lock_owner.id = lsp->ls_id.id;
3327 p->res.lock_seqid = p->arg.lock_seqid;
3323 p->lsp = lsp; 3328 p->lsp = lsp;
3324 atomic_inc(&lsp->ls_count); 3329 atomic_inc(&lsp->ls_count);
3325 p->ctx = get_nfs_open_context(ctx); 3330 p->ctx = get_nfs_open_context(ctx);
@@ -3346,6 +3351,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
3346 return; 3351 return;
3347 data->arg.open_stateid = &state->stateid; 3352 data->arg.open_stateid = &state->stateid;
3348 data->arg.new_lock_owner = 1; 3353 data->arg.new_lock_owner = 1;
3354 data->res.open_seqid = data->arg.open_seqid;
3349 } else 3355 } else
3350 data->arg.new_lock_owner = 0; 3356 data->arg.new_lock_owner = 0;
3351 data->timestamp = jiffies; 3357 data->timestamp = jiffies;
@@ -3363,7 +3369,6 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
3363 if (RPC_ASSASSINATED(task)) 3369 if (RPC_ASSASSINATED(task))
3364 goto out; 3370 goto out;
3365 if (data->arg.new_lock_owner != 0) { 3371 if (data->arg.new_lock_owner != 0) {
3366 nfs_increment_open_seqid(data->rpc_status, data->arg.open_seqid);
3367 if (data->rpc_status == 0) 3372 if (data->rpc_status == 0)
3368 nfs_confirm_seqid(&data->lsp->ls_seqid, 0); 3373 nfs_confirm_seqid(&data->lsp->ls_seqid, 0);
3369 else 3374 else
@@ -3375,7 +3380,6 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
3375 data->lsp->ls_flags |= NFS_LOCK_INITIALIZED; 3380 data->lsp->ls_flags |= NFS_LOCK_INITIALIZED;
3376 renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp); 3381 renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp);
3377 } 3382 }
3378 nfs_increment_lock_seqid(data->rpc_status, data->arg.lock_seqid);
3379out: 3383out:
3380 dprintk("%s: done, ret = %d!\n", __FUNCTION__, data->rpc_status); 3384 dprintk("%s: done, ret = %d!\n", __FUNCTION__, data->rpc_status);
3381} 3385}
@@ -3419,6 +3423,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
3419 .rpc_client = NFS_CLIENT(state->inode), 3423 .rpc_client = NFS_CLIENT(state->inode),
3420 .rpc_message = &msg, 3424 .rpc_message = &msg,
3421 .callback_ops = &nfs4_lock_ops, 3425 .callback_ops = &nfs4_lock_ops,
3426 .workqueue = nfsiod_workqueue,
3422 .flags = RPC_TASK_ASYNC, 3427 .flags = RPC_TASK_ASYNC,
3423 }; 3428 };
3424 int ret; 3429 int ret;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index b962397004c1..46eb624e4f16 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -71,6 +71,29 @@ static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
71 return status; 71 return status;
72} 72}
73 73
74static struct rpc_cred *nfs4_get_machine_cred(struct nfs_client *clp)
75{
76 struct rpc_cred *cred = NULL;
77
78 spin_lock(&clp->cl_lock);
79 if (clp->cl_machine_cred != NULL)
80 cred = get_rpccred(clp->cl_machine_cred);
81 spin_unlock(&clp->cl_lock);
82 return cred;
83}
84
85static void nfs4_clear_machine_cred(struct nfs_client *clp)
86{
87 struct rpc_cred *cred;
88
89 spin_lock(&clp->cl_lock);
90 cred = clp->cl_machine_cred;
91 clp->cl_machine_cred = NULL;
92 spin_unlock(&clp->cl_lock);
93 if (cred != NULL)
94 put_rpccred(cred);
95}
96
74struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp) 97struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
75{ 98{
76 struct nfs4_state_owner *sp; 99 struct nfs4_state_owner *sp;
@@ -91,13 +114,18 @@ static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
91{ 114{
92 struct nfs4_state_owner *sp; 115 struct nfs4_state_owner *sp;
93 struct rb_node *pos; 116 struct rb_node *pos;
117 struct rpc_cred *cred;
94 118
119 cred = nfs4_get_machine_cred(clp);
120 if (cred != NULL)
121 goto out;
95 pos = rb_first(&clp->cl_state_owners); 122 pos = rb_first(&clp->cl_state_owners);
96 if (pos != NULL) { 123 if (pos != NULL) {
97 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); 124 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
98 return get_rpccred(sp->so_cred); 125 cred = get_rpccred(sp->so_cred);
99 } 126 }
100 return NULL; 127out:
128 return cred;
101} 129}
102 130
103static void nfs_alloc_unique_id(struct rb_root *root, struct nfs_unique_id *new, 131static void nfs_alloc_unique_id(struct rb_root *root, struct nfs_unique_id *new,
@@ -292,8 +320,10 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
292 spin_unlock(&clp->cl_lock); 320 spin_unlock(&clp->cl_lock);
293 if (sp == new) 321 if (sp == new)
294 get_rpccred(cred); 322 get_rpccred(cred);
295 else 323 else {
324 rpc_destroy_wait_queue(&new->so_sequence.wait);
296 kfree(new); 325 kfree(new);
326 }
297 return sp; 327 return sp;
298} 328}
299 329
@@ -310,6 +340,7 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp)
310 return; 340 return;
311 nfs4_remove_state_owner(clp, sp); 341 nfs4_remove_state_owner(clp, sp);
312 spin_unlock(&clp->cl_lock); 342 spin_unlock(&clp->cl_lock);
343 rpc_destroy_wait_queue(&sp->so_sequence.wait);
313 put_rpccred(cred); 344 put_rpccred(cred);
314 kfree(sp); 345 kfree(sp);
315} 346}
@@ -529,6 +560,7 @@ static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
529 spin_lock(&clp->cl_lock); 560 spin_lock(&clp->cl_lock);
530 nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id); 561 nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id);
531 spin_unlock(&clp->cl_lock); 562 spin_unlock(&clp->cl_lock);
563 rpc_destroy_wait_queue(&lsp->ls_sequence.wait);
532 kfree(lsp); 564 kfree(lsp);
533} 565}
534 566
@@ -731,7 +763,7 @@ int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task)
731 list_add_tail(&seqid->list, &sequence->list); 763 list_add_tail(&seqid->list, &sequence->list);
732 if (list_first_entry(&sequence->list, struct nfs_seqid, list) == seqid) 764 if (list_first_entry(&sequence->list, struct nfs_seqid, list) == seqid)
733 goto unlock; 765 goto unlock;
734 rpc_sleep_on(&sequence->wait, task, NULL, NULL); 766 rpc_sleep_on(&sequence->wait, task, NULL);
735 status = -EAGAIN; 767 status = -EAGAIN;
736unlock: 768unlock:
737 spin_unlock(&sequence->lock); 769 spin_unlock(&sequence->lock);
@@ -920,10 +952,10 @@ restart_loop:
920 if (cred != NULL) { 952 if (cred != NULL) {
921 /* Yes there are: try to renew the old lease */ 953 /* Yes there are: try to renew the old lease */
922 status = nfs4_proc_renew(clp, cred); 954 status = nfs4_proc_renew(clp, cred);
955 put_rpccred(cred);
923 switch (status) { 956 switch (status) {
924 case 0: 957 case 0:
925 case -NFS4ERR_CB_PATH_DOWN: 958 case -NFS4ERR_CB_PATH_DOWN:
926 put_rpccred(cred);
927 goto out; 959 goto out;
928 case -NFS4ERR_STALE_CLIENTID: 960 case -NFS4ERR_STALE_CLIENTID:
929 case -NFS4ERR_LEASE_MOVED: 961 case -NFS4ERR_LEASE_MOVED:
@@ -932,14 +964,19 @@ restart_loop:
932 } else { 964 } else {
933 /* "reboot" to ensure we clear all state on the server */ 965 /* "reboot" to ensure we clear all state on the server */
934 clp->cl_boot_time = CURRENT_TIME; 966 clp->cl_boot_time = CURRENT_TIME;
935 cred = nfs4_get_setclientid_cred(clp);
936 } 967 }
937 /* We're going to have to re-establish a clientid */ 968 /* We're going to have to re-establish a clientid */
938 nfs4_state_mark_reclaim(clp); 969 nfs4_state_mark_reclaim(clp);
939 status = -ENOENT; 970 status = -ENOENT;
971 cred = nfs4_get_setclientid_cred(clp);
940 if (cred != NULL) { 972 if (cred != NULL) {
941 status = nfs4_init_client(clp, cred); 973 status = nfs4_init_client(clp, cred);
942 put_rpccred(cred); 974 put_rpccred(cred);
975 /* Handle case where the user hasn't set up machine creds */
976 if (status == -EACCES && cred == clp->cl_machine_cred) {
977 nfs4_clear_machine_cred(clp);
978 goto restart_loop;
979 }
943 } 980 }
944 if (status) 981 if (status)
945 goto out_error; 982 goto out_error;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index db1ed9c46ede..5a2d64927b35 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -110,7 +110,7 @@ static int nfs4_stat_to_errno(int);
110#define decode_savefh_maxsz (op_decode_hdr_maxsz) 110#define decode_savefh_maxsz (op_decode_hdr_maxsz)
111#define encode_restorefh_maxsz (op_encode_hdr_maxsz) 111#define encode_restorefh_maxsz (op_encode_hdr_maxsz)
112#define decode_restorefh_maxsz (op_decode_hdr_maxsz) 112#define decode_restorefh_maxsz (op_decode_hdr_maxsz)
113#define encode_fsinfo_maxsz (op_encode_hdr_maxsz + 2) 113#define encode_fsinfo_maxsz (encode_getattr_maxsz)
114#define decode_fsinfo_maxsz (op_decode_hdr_maxsz + 11) 114#define decode_fsinfo_maxsz (op_decode_hdr_maxsz + 11)
115#define encode_renew_maxsz (op_encode_hdr_maxsz + 3) 115#define encode_renew_maxsz (op_encode_hdr_maxsz + 3)
116#define decode_renew_maxsz (op_decode_hdr_maxsz) 116#define decode_renew_maxsz (op_decode_hdr_maxsz)
@@ -1191,8 +1191,8 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
1191 attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; 1191 attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
1192 WRITE32(attrs[0] & readdir->bitmask[0]); 1192 WRITE32(attrs[0] & readdir->bitmask[0]);
1193 WRITE32(attrs[1] & readdir->bitmask[1]); 1193 WRITE32(attrs[1] & readdir->bitmask[1]);
1194 dprintk("%s: cookie = %Lu, verifier = 0x%x%x, bitmap = 0x%x%x\n", 1194 dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n",
1195 __FUNCTION__, 1195 __func__,
1196 (unsigned long long)readdir->cookie, 1196 (unsigned long long)readdir->cookie,
1197 ((u32 *)readdir->verifier.data)[0], 1197 ((u32 *)readdir->verifier.data)[0],
1198 ((u32 *)readdir->verifier.data)[1], 1198 ((u32 *)readdir->verifier.data)[1],
@@ -2241,7 +2241,7 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
2241 } 2241 }
2242 READ32(nfserr); 2242 READ32(nfserr);
2243 if (nfserr != NFS_OK) 2243 if (nfserr != NFS_OK)
2244 return -nfs4_stat_to_errno(nfserr); 2244 return nfs4_stat_to_errno(nfserr);
2245 return 0; 2245 return 0;
2246} 2246}
2247 2247
@@ -2291,7 +2291,7 @@ static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint3
2291 bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS; 2291 bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS;
2292 } else 2292 } else
2293 bitmask[0] = bitmask[1] = 0; 2293 bitmask[0] = bitmask[1] = 0;
2294 dprintk("%s: bitmask=0x%x%x\n", __FUNCTION__, bitmask[0], bitmask[1]); 2294 dprintk("%s: bitmask=%08x:%08x\n", __func__, bitmask[0], bitmask[1]);
2295 return 0; 2295 return 0;
2296} 2296}
2297 2297
@@ -3005,6 +3005,8 @@ static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
3005 int status; 3005 int status;
3006 3006
3007 status = decode_op_hdr(xdr, OP_CLOSE); 3007 status = decode_op_hdr(xdr, OP_CLOSE);
3008 if (status != -EIO)
3009 nfs_increment_open_seqid(status, res->seqid);
3008 if (status) 3010 if (status)
3009 return status; 3011 return status;
3010 READ_BUF(NFS4_STATEID_SIZE); 3012 READ_BUF(NFS4_STATEID_SIZE);
@@ -3296,11 +3298,17 @@ static int decode_lock(struct xdr_stream *xdr, struct nfs_lock_res *res)
3296 int status; 3298 int status;
3297 3299
3298 status = decode_op_hdr(xdr, OP_LOCK); 3300 status = decode_op_hdr(xdr, OP_LOCK);
3301 if (status == -EIO)
3302 goto out;
3299 if (status == 0) { 3303 if (status == 0) {
3300 READ_BUF(NFS4_STATEID_SIZE); 3304 READ_BUF(NFS4_STATEID_SIZE);
3301 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); 3305 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
3302 } else if (status == -NFS4ERR_DENIED) 3306 } else if (status == -NFS4ERR_DENIED)
3303 return decode_lock_denied(xdr, NULL); 3307 status = decode_lock_denied(xdr, NULL);
3308 if (res->open_seqid != NULL)
3309 nfs_increment_open_seqid(status, res->open_seqid);
3310 nfs_increment_lock_seqid(status, res->lock_seqid);
3311out:
3304 return status; 3312 return status;
3305} 3313}
3306 3314
@@ -3319,6 +3327,8 @@ static int decode_locku(struct xdr_stream *xdr, struct nfs_locku_res *res)
3319 int status; 3327 int status;
3320 3328
3321 status = decode_op_hdr(xdr, OP_LOCKU); 3329 status = decode_op_hdr(xdr, OP_LOCKU);
3330 if (status != -EIO)
3331 nfs_increment_lock_seqid(status, res->seqid);
3322 if (status == 0) { 3332 if (status == 0) {
3323 READ_BUF(NFS4_STATEID_SIZE); 3333 READ_BUF(NFS4_STATEID_SIZE);
3324 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); 3334 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
@@ -3384,6 +3394,8 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
3384 int status; 3394 int status;
3385 3395
3386 status = decode_op_hdr(xdr, OP_OPEN); 3396 status = decode_op_hdr(xdr, OP_OPEN);
3397 if (status != -EIO)
3398 nfs_increment_open_seqid(status, res->seqid);
3387 if (status) 3399 if (status)
3388 return status; 3400 return status;
3389 READ_BUF(NFS4_STATEID_SIZE); 3401 READ_BUF(NFS4_STATEID_SIZE);
@@ -3416,6 +3428,8 @@ static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmre
3416 int status; 3428 int status;
3417 3429
3418 status = decode_op_hdr(xdr, OP_OPEN_CONFIRM); 3430 status = decode_op_hdr(xdr, OP_OPEN_CONFIRM);
3431 if (status != -EIO)
3432 nfs_increment_open_seqid(status, res->seqid);
3419 if (status) 3433 if (status)
3420 return status; 3434 return status;
3421 READ_BUF(NFS4_STATEID_SIZE); 3435 READ_BUF(NFS4_STATEID_SIZE);
@@ -3429,6 +3443,8 @@ static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *re
3429 int status; 3443 int status;
3430 3444
3431 status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE); 3445 status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE);
3446 if (status != -EIO)
3447 nfs_increment_open_seqid(status, res->seqid);
3432 if (status) 3448 if (status)
3433 return status; 3449 return status;
3434 READ_BUF(NFS4_STATEID_SIZE); 3450 READ_BUF(NFS4_STATEID_SIZE);
@@ -3481,7 +3497,7 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
3481 size_t hdrlen; 3497 size_t hdrlen;
3482 u32 recvd, pglen = rcvbuf->page_len; 3498 u32 recvd, pglen = rcvbuf->page_len;
3483 __be32 *end, *entry, *p, *kaddr; 3499 __be32 *end, *entry, *p, *kaddr;
3484 unsigned int nr; 3500 unsigned int nr = 0;
3485 int status; 3501 int status;
3486 3502
3487 status = decode_op_hdr(xdr, OP_READDIR); 3503 status = decode_op_hdr(xdr, OP_READDIR);
@@ -3489,8 +3505,8 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
3489 return status; 3505 return status;
3490 READ_BUF(8); 3506 READ_BUF(8);
3491 COPYMEM(readdir->verifier.data, 8); 3507 COPYMEM(readdir->verifier.data, 8);
3492 dprintk("%s: verifier = 0x%x%x\n", 3508 dprintk("%s: verifier = %08x:%08x\n",
3493 __FUNCTION__, 3509 __func__,
3494 ((u32 *)readdir->verifier.data)[0], 3510 ((u32 *)readdir->verifier.data)[0],
3495 ((u32 *)readdir->verifier.data)[1]); 3511 ((u32 *)readdir->verifier.data)[1]);
3496 3512
@@ -3505,7 +3521,12 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
3505 kaddr = p = kmap_atomic(page, KM_USER0); 3521 kaddr = p = kmap_atomic(page, KM_USER0);
3506 end = p + ((pglen + readdir->pgbase) >> 2); 3522 end = p + ((pglen + readdir->pgbase) >> 2);
3507 entry = p; 3523 entry = p;
3508 for (nr = 0; *p++; nr++) { 3524
3525 /* Make sure the packet actually has a value_follows and EOF entry */
3526 if ((entry + 1) > end)
3527 goto short_pkt;
3528
3529 for (; *p++; nr++) {
3509 u32 len, attrlen, xlen; 3530 u32 len, attrlen, xlen;
3510 if (end - p < 3) 3531 if (end - p < 3)
3511 goto short_pkt; 3532 goto short_pkt;
@@ -3532,20 +3553,32 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
3532 p += attrlen; /* attributes */ 3553 p += attrlen; /* attributes */
3533 entry = p; 3554 entry = p;
3534 } 3555 }
3535 if (!nr && (entry[0] != 0 || entry[1] == 0)) 3556 /*
3536 goto short_pkt; 3557 * Apparently some server sends responses that are a valid size, but
3558 * contain no entries, and have value_follows==0 and EOF==0. For
3559 * those, just set the EOF marker.
3560 */
3561 if (!nr && entry[1] == 0) {
3562 dprintk("NFS: readdir reply truncated!\n");
3563 entry[1] = 1;
3564 }
3537out: 3565out:
3538 kunmap_atomic(kaddr, KM_USER0); 3566 kunmap_atomic(kaddr, KM_USER0);
3539 return 0; 3567 return 0;
3540short_pkt: 3568short_pkt:
3569 /*
3570 * When we get a short packet there are 2 possibilities. We can
3571 * return an error, or fix up the response to look like a valid
3572 * response and return what we have so far. If there are no
3573 * entries and the packet was short, then return -EIO. If there
3574 * are valid entries in the response, return them and pretend that
3575 * the call was successful, but incomplete. The caller can retry the
3576 * readdir starting at the last cookie.
3577 */
3541 dprintk("%s: short packet at entry %d\n", __FUNCTION__, nr); 3578 dprintk("%s: short packet at entry %d\n", __FUNCTION__, nr);
3542 entry[0] = entry[1] = 0; 3579 entry[0] = entry[1] = 0;
3543 /* truncate listing ? */ 3580 if (nr)
3544 if (!nr) { 3581 goto out;
3545 dprintk("NFS: readdir reply truncated!\n");
3546 entry[1] = 1;
3547 }
3548 goto out;
3549err_unmap: 3582err_unmap:
3550 kunmap_atomic(kaddr, KM_USER0); 3583 kunmap_atomic(kaddr, KM_USER0);
3551 return -errno_NFSERR_IO; 3584 return -errno_NFSERR_IO;
@@ -3727,7 +3760,7 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
3727 READ_BUF(len); 3760 READ_BUF(len);
3728 return -NFSERR_CLID_INUSE; 3761 return -NFSERR_CLID_INUSE;
3729 } else 3762 } else
3730 return -nfs4_stat_to_errno(nfserr); 3763 return nfs4_stat_to_errno(nfserr);
3731 3764
3732 return 0; 3765 return 0;
3733} 3766}
@@ -4389,7 +4422,7 @@ static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs_fsinf
4389 if (!status) 4422 if (!status)
4390 status = decode_fsinfo(&xdr, fsinfo); 4423 status = decode_fsinfo(&xdr, fsinfo);
4391 if (!status) 4424 if (!status)
4392 status = -nfs4_stat_to_errno(hdr.status); 4425 status = nfs4_stat_to_errno(hdr.status);
4393 return status; 4426 return status;
4394} 4427}
4395 4428
@@ -4479,7 +4512,7 @@ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, __be32 *p,
4479 if (!status) 4512 if (!status)
4480 status = decode_setclientid(&xdr, clp); 4513 status = decode_setclientid(&xdr, clp);
4481 if (!status) 4514 if (!status)
4482 status = -nfs4_stat_to_errno(hdr.status); 4515 status = nfs4_stat_to_errno(hdr.status);
4483 return status; 4516 return status;
4484} 4517}
4485 4518
@@ -4501,7 +4534,7 @@ static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, __be32 *p, str
4501 if (!status) 4534 if (!status)
4502 status = decode_fsinfo(&xdr, fsinfo); 4535 status = decode_fsinfo(&xdr, fsinfo);
4503 if (!status) 4536 if (!status)
4504 status = -nfs4_stat_to_errno(hdr.status); 4537 status = nfs4_stat_to_errno(hdr.status);
4505 return status; 4538 return status;
4506} 4539}
4507 4540
@@ -4611,42 +4644,42 @@ static struct {
4611 int errno; 4644 int errno;
4612} nfs_errtbl[] = { 4645} nfs_errtbl[] = {
4613 { NFS4_OK, 0 }, 4646 { NFS4_OK, 0 },
4614 { NFS4ERR_PERM, EPERM }, 4647 { NFS4ERR_PERM, -EPERM },
4615 { NFS4ERR_NOENT, ENOENT }, 4648 { NFS4ERR_NOENT, -ENOENT },
4616 { NFS4ERR_IO, errno_NFSERR_IO }, 4649 { NFS4ERR_IO, -errno_NFSERR_IO},
4617 { NFS4ERR_NXIO, ENXIO }, 4650 { NFS4ERR_NXIO, -ENXIO },
4618 { NFS4ERR_ACCESS, EACCES }, 4651 { NFS4ERR_ACCESS, -EACCES },
4619 { NFS4ERR_EXIST, EEXIST }, 4652 { NFS4ERR_EXIST, -EEXIST },
4620 { NFS4ERR_XDEV, EXDEV }, 4653 { NFS4ERR_XDEV, -EXDEV },
4621 { NFS4ERR_NOTDIR, ENOTDIR }, 4654 { NFS4ERR_NOTDIR, -ENOTDIR },
4622 { NFS4ERR_ISDIR, EISDIR }, 4655 { NFS4ERR_ISDIR, -EISDIR },
4623 { NFS4ERR_INVAL, EINVAL }, 4656 { NFS4ERR_INVAL, -EINVAL },
4624 { NFS4ERR_FBIG, EFBIG }, 4657 { NFS4ERR_FBIG, -EFBIG },
4625 { NFS4ERR_NOSPC, ENOSPC }, 4658 { NFS4ERR_NOSPC, -ENOSPC },
4626 { NFS4ERR_ROFS, EROFS }, 4659 { NFS4ERR_ROFS, -EROFS },
4627 { NFS4ERR_MLINK, EMLINK }, 4660 { NFS4ERR_MLINK, -EMLINK },
4628 { NFS4ERR_NAMETOOLONG, ENAMETOOLONG }, 4661 { NFS4ERR_NAMETOOLONG, -ENAMETOOLONG },
4629 { NFS4ERR_NOTEMPTY, ENOTEMPTY }, 4662 { NFS4ERR_NOTEMPTY, -ENOTEMPTY },
4630 { NFS4ERR_DQUOT, EDQUOT }, 4663 { NFS4ERR_DQUOT, -EDQUOT },
4631 { NFS4ERR_STALE, ESTALE }, 4664 { NFS4ERR_STALE, -ESTALE },
4632 { NFS4ERR_BADHANDLE, EBADHANDLE }, 4665 { NFS4ERR_BADHANDLE, -EBADHANDLE },
4633 { NFS4ERR_BADOWNER, EINVAL }, 4666 { NFS4ERR_BADOWNER, -EINVAL },
4634 { NFS4ERR_BADNAME, EINVAL }, 4667 { NFS4ERR_BADNAME, -EINVAL },
4635 { NFS4ERR_BAD_COOKIE, EBADCOOKIE }, 4668 { NFS4ERR_BAD_COOKIE, -EBADCOOKIE },
4636 { NFS4ERR_NOTSUPP, ENOTSUPP }, 4669 { NFS4ERR_NOTSUPP, -ENOTSUPP },
4637 { NFS4ERR_TOOSMALL, ETOOSMALL }, 4670 { NFS4ERR_TOOSMALL, -ETOOSMALL },
4638 { NFS4ERR_SERVERFAULT, ESERVERFAULT }, 4671 { NFS4ERR_SERVERFAULT, -ESERVERFAULT },
4639 { NFS4ERR_BADTYPE, EBADTYPE }, 4672 { NFS4ERR_BADTYPE, -EBADTYPE },
4640 { NFS4ERR_LOCKED, EAGAIN }, 4673 { NFS4ERR_LOCKED, -EAGAIN },
4641 { NFS4ERR_RESOURCE, EREMOTEIO }, 4674 { NFS4ERR_RESOURCE, -EREMOTEIO },
4642 { NFS4ERR_SYMLINK, ELOOP }, 4675 { NFS4ERR_SYMLINK, -ELOOP },
4643 { NFS4ERR_OP_ILLEGAL, EOPNOTSUPP }, 4676 { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP },
4644 { NFS4ERR_DEADLOCK, EDEADLK }, 4677 { NFS4ERR_DEADLOCK, -EDEADLK },
4645 { NFS4ERR_WRONGSEC, EPERM }, /* FIXME: this needs 4678 { NFS4ERR_WRONGSEC, -EPERM }, /* FIXME: this needs
4646 * to be handled by a 4679 * to be handled by a
4647 * middle-layer. 4680 * middle-layer.
4648 */ 4681 */
4649 { -1, EIO } 4682 { -1, -EIO }
4650}; 4683};
4651 4684
4652/* 4685/*
@@ -4663,14 +4696,14 @@ nfs4_stat_to_errno(int stat)
4663 } 4696 }
4664 if (stat <= 10000 || stat > 10100) { 4697 if (stat <= 10000 || stat > 10100) {
4665 /* The server is looney tunes. */ 4698 /* The server is looney tunes. */
4666 return ESERVERFAULT; 4699 return -ESERVERFAULT;
4667 } 4700 }
4668 /* If we cannot translate the error, the recovery routines should 4701 /* If we cannot translate the error, the recovery routines should
4669 * handle it. 4702 * handle it.
4670 * Note: remaining NFSv4 error codes have values > 10000, so should 4703 * Note: remaining NFSv4 error codes have values > 10000, so should
4671 * not conflict with native Linux error codes. 4704 * not conflict with native Linux error codes.
4672 */ 4705 */
4673 return stat; 4706 return -stat;
4674} 4707}
4675 4708
4676#define PROC(proc, argtype, restype) \ 4709#define PROC(proc, argtype, restype) \
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 5a70be589bbe..16f57e0af999 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -58,22 +58,19 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
58 return p; 58 return p;
59} 59}
60 60
61static void nfs_readdata_rcu_free(struct rcu_head *head) 61static void nfs_readdata_free(struct nfs_read_data *p)
62{ 62{
63 struct nfs_read_data *p = container_of(head, struct nfs_read_data, task.u.tk_rcu);
64 if (p && (p->pagevec != &p->page_array[0])) 63 if (p && (p->pagevec != &p->page_array[0]))
65 kfree(p->pagevec); 64 kfree(p->pagevec);
66 mempool_free(p, nfs_rdata_mempool); 65 mempool_free(p, nfs_rdata_mempool);
67} 66}
68 67
69static void nfs_readdata_free(struct nfs_read_data *rdata)
70{
71 call_rcu_bh(&rdata->task.u.tk_rcu, nfs_readdata_rcu_free);
72}
73
74void nfs_readdata_release(void *data) 68void nfs_readdata_release(void *data)
75{ 69{
76 nfs_readdata_free(data); 70 struct nfs_read_data *rdata = data;
71
72 put_nfs_open_context(rdata->args.context);
73 nfs_readdata_free(rdata);
77} 74}
78 75
79static 76static
@@ -156,7 +153,7 @@ static void nfs_readpage_release(struct nfs_page *req)
156/* 153/*
157 * Set up the NFS read request struct 154 * Set up the NFS read request struct
158 */ 155 */
159static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, 156static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
160 const struct rpc_call_ops *call_ops, 157 const struct rpc_call_ops *call_ops,
161 unsigned int count, unsigned int offset) 158 unsigned int count, unsigned int offset)
162{ 159{
@@ -174,6 +171,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
174 .rpc_message = &msg, 171 .rpc_message = &msg,
175 .callback_ops = call_ops, 172 .callback_ops = call_ops,
176 .callback_data = data, 173 .callback_data = data,
174 .workqueue = nfsiod_workqueue,
177 .flags = RPC_TASK_ASYNC | swap_flags, 175 .flags = RPC_TASK_ASYNC | swap_flags,
178 }; 176 };
179 177
@@ -186,7 +184,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
186 data->args.pgbase = req->wb_pgbase + offset; 184 data->args.pgbase = req->wb_pgbase + offset;
187 data->args.pages = data->pagevec; 185 data->args.pages = data->pagevec;
188 data->args.count = count; 186 data->args.count = count;
189 data->args.context = req->wb_context; 187 data->args.context = get_nfs_open_context(req->wb_context);
190 188
191 data->res.fattr = &data->fattr; 189 data->res.fattr = &data->fattr;
192 data->res.count = count; 190 data->res.count = count;
@@ -204,8 +202,10 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
204 (unsigned long long)data->args.offset); 202 (unsigned long long)data->args.offset);
205 203
206 task = rpc_run_task(&task_setup_data); 204 task = rpc_run_task(&task_setup_data);
207 if (!IS_ERR(task)) 205 if (IS_ERR(task))
208 rpc_put_task(task); 206 return PTR_ERR(task);
207 rpc_put_task(task);
208 return 0;
209} 209}
210 210
211static void 211static void
@@ -242,6 +242,7 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne
242 size_t rsize = NFS_SERVER(inode)->rsize, nbytes; 242 size_t rsize = NFS_SERVER(inode)->rsize, nbytes;
243 unsigned int offset; 243 unsigned int offset;
244 int requests = 0; 244 int requests = 0;
245 int ret = 0;
245 LIST_HEAD(list); 246 LIST_HEAD(list);
246 247
247 nfs_list_remove_request(req); 248 nfs_list_remove_request(req);
@@ -253,7 +254,6 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne
253 data = nfs_readdata_alloc(1); 254 data = nfs_readdata_alloc(1);
254 if (!data) 255 if (!data)
255 goto out_bad; 256 goto out_bad;
256 INIT_LIST_HEAD(&data->pages);
257 list_add(&data->pages, &list); 257 list_add(&data->pages, &list);
258 requests++; 258 requests++;
259 nbytes -= len; 259 nbytes -= len;
@@ -264,6 +264,8 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne
264 offset = 0; 264 offset = 0;
265 nbytes = count; 265 nbytes = count;
266 do { 266 do {
267 int ret2;
268
267 data = list_entry(list.next, struct nfs_read_data, pages); 269 data = list_entry(list.next, struct nfs_read_data, pages);
268 list_del_init(&data->pages); 270 list_del_init(&data->pages);
269 271
@@ -271,13 +273,15 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne
271 273
272 if (nbytes < rsize) 274 if (nbytes < rsize)
273 rsize = nbytes; 275 rsize = nbytes;
274 nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, 276 ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
275 rsize, offset); 277 rsize, offset);
278 if (ret == 0)
279 ret = ret2;
276 offset += rsize; 280 offset += rsize;
277 nbytes -= rsize; 281 nbytes -= rsize;
278 } while (nbytes != 0); 282 } while (nbytes != 0);
279 283
280 return 0; 284 return ret;
281 285
282out_bad: 286out_bad:
283 while (!list_empty(&list)) { 287 while (!list_empty(&list)) {
@@ -295,12 +299,12 @@ static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned
295 struct nfs_page *req; 299 struct nfs_page *req;
296 struct page **pages; 300 struct page **pages;
297 struct nfs_read_data *data; 301 struct nfs_read_data *data;
302 int ret = -ENOMEM;
298 303
299 data = nfs_readdata_alloc(npages); 304 data = nfs_readdata_alloc(npages);
300 if (!data) 305 if (!data)
301 goto out_bad; 306 goto out_bad;
302 307
303 INIT_LIST_HEAD(&data->pages);
304 pages = data->pagevec; 308 pages = data->pagevec;
305 while (!list_empty(head)) { 309 while (!list_empty(head)) {
306 req = nfs_list_entry(head->next); 310 req = nfs_list_entry(head->next);
@@ -311,11 +315,10 @@ static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned
311 } 315 }
312 req = nfs_list_entry(data->pages.next); 316 req = nfs_list_entry(data->pages.next);
313 317
314 nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0); 318 return nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0);
315 return 0;
316out_bad: 319out_bad:
317 nfs_async_read_error(head); 320 nfs_async_read_error(head);
318 return -ENOMEM; 321 return ret;
319} 322}
320 323
321/* 324/*
@@ -342,26 +345,25 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
342 return 0; 345 return 0;
343} 346}
344 347
345static int nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) 348static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data)
346{ 349{
347 struct nfs_readargs *argp = &data->args; 350 struct nfs_readargs *argp = &data->args;
348 struct nfs_readres *resp = &data->res; 351 struct nfs_readres *resp = &data->res;
349 352
350 if (resp->eof || resp->count == argp->count) 353 if (resp->eof || resp->count == argp->count)
351 return 0; 354 return;
352 355
353 /* This is a short read! */ 356 /* This is a short read! */
354 nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); 357 nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
355 /* Has the server at least made some progress? */ 358 /* Has the server at least made some progress? */
356 if (resp->count == 0) 359 if (resp->count == 0)
357 return 0; 360 return;
358 361
359 /* Yes, so retry the read at the end of the data */ 362 /* Yes, so retry the read at the end of the data */
360 argp->offset += resp->count; 363 argp->offset += resp->count;
361 argp->pgbase += resp->count; 364 argp->pgbase += resp->count;
362 argp->count -= resp->count; 365 argp->count -= resp->count;
363 rpc_restart_call(task); 366 rpc_restart_call(task);
364 return -EAGAIN;
365} 367}
366 368
367/* 369/*
@@ -370,29 +372,37 @@ static int nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data)
370static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata) 372static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
371{ 373{
372 struct nfs_read_data *data = calldata; 374 struct nfs_read_data *data = calldata;
373 struct nfs_page *req = data->req;
374 struct page *page = req->wb_page;
375 375
376 if (nfs_readpage_result(task, data) != 0) 376 if (nfs_readpage_result(task, data) != 0)
377 return; 377 return;
378 if (task->tk_status < 0)
379 return;
378 380
379 if (likely(task->tk_status >= 0)) { 381 nfs_readpage_truncate_uninitialised_page(data);
380 nfs_readpage_truncate_uninitialised_page(data); 382 nfs_readpage_retry(task, data);
381 if (nfs_readpage_retry(task, data) != 0) 383}
382 return; 384
383 } 385static void nfs_readpage_release_partial(void *calldata)
384 if (unlikely(task->tk_status < 0)) 386{
387 struct nfs_read_data *data = calldata;
388 struct nfs_page *req = data->req;
389 struct page *page = req->wb_page;
390 int status = data->task.tk_status;
391
392 if (status < 0)
385 SetPageError(page); 393 SetPageError(page);
394
386 if (atomic_dec_and_test(&req->wb_complete)) { 395 if (atomic_dec_and_test(&req->wb_complete)) {
387 if (!PageError(page)) 396 if (!PageError(page))
388 SetPageUptodate(page); 397 SetPageUptodate(page);
389 nfs_readpage_release(req); 398 nfs_readpage_release(req);
390 } 399 }
400 nfs_readdata_release(calldata);
391} 401}
392 402
393static const struct rpc_call_ops nfs_read_partial_ops = { 403static const struct rpc_call_ops nfs_read_partial_ops = {
394 .rpc_call_done = nfs_readpage_result_partial, 404 .rpc_call_done = nfs_readpage_result_partial,
395 .rpc_release = nfs_readdata_release, 405 .rpc_release = nfs_readpage_release_partial,
396}; 406};
397 407
398static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data) 408static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
@@ -427,29 +437,35 @@ static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
427 437
428 if (nfs_readpage_result(task, data) != 0) 438 if (nfs_readpage_result(task, data) != 0)
429 return; 439 return;
440 if (task->tk_status < 0)
441 return;
430 /* 442 /*
431 * Note: nfs_readpage_retry may change the values of 443 * Note: nfs_readpage_retry may change the values of
432 * data->args. In the multi-page case, we therefore need 444 * data->args. In the multi-page case, we therefore need
433 * to ensure that we call nfs_readpage_set_pages_uptodate() 445 * to ensure that we call nfs_readpage_set_pages_uptodate()
434 * first. 446 * first.
435 */ 447 */
436 if (likely(task->tk_status >= 0)) { 448 nfs_readpage_truncate_uninitialised_page(data);
437 nfs_readpage_truncate_uninitialised_page(data); 449 nfs_readpage_set_pages_uptodate(data);
438 nfs_readpage_set_pages_uptodate(data); 450 nfs_readpage_retry(task, data);
439 if (nfs_readpage_retry(task, data) != 0) 451}
440 return; 452
441 } 453static void nfs_readpage_release_full(void *calldata)
454{
455 struct nfs_read_data *data = calldata;
456
442 while (!list_empty(&data->pages)) { 457 while (!list_empty(&data->pages)) {
443 struct nfs_page *req = nfs_list_entry(data->pages.next); 458 struct nfs_page *req = nfs_list_entry(data->pages.next);
444 459
445 nfs_list_remove_request(req); 460 nfs_list_remove_request(req);
446 nfs_readpage_release(req); 461 nfs_readpage_release(req);
447 } 462 }
463 nfs_readdata_release(calldata);
448} 464}
449 465
450static const struct rpc_call_ops nfs_read_full_ops = { 466static const struct rpc_call_ops nfs_read_full_ops = {
451 .rpc_call_done = nfs_readpage_result_full, 467 .rpc_call_done = nfs_readpage_result_full,
452 .rpc_release = nfs_readdata_release, 468 .rpc_release = nfs_readpage_release_full,
453}; 469};
454 470
455/* 471/*
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index f9219024f31a..7226a506f3ca 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -198,7 +198,7 @@ static match_table_t nfs_secflavor_tokens = {
198}; 198};
199 199
200 200
201static void nfs_umount_begin(struct vfsmount *, int); 201static void nfs_umount_begin(struct super_block *);
202static int nfs_statfs(struct dentry *, struct kstatfs *); 202static int nfs_statfs(struct dentry *, struct kstatfs *);
203static int nfs_show_options(struct seq_file *, struct vfsmount *); 203static int nfs_show_options(struct seq_file *, struct vfsmount *);
204static int nfs_show_stats(struct seq_file *, struct vfsmount *); 204static int nfs_show_stats(struct seq_file *, struct vfsmount *);
@@ -441,10 +441,52 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour)
441 return sec_flavours[i].str; 441 return sec_flavours[i].str;
442} 442}
443 443
444static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss,
445 int showdefaults)
446{
447 struct sockaddr *sap = (struct sockaddr *)&nfss->mountd_address;
448
449 switch (sap->sa_family) {
450 case AF_INET: {
451 struct sockaddr_in *sin = (struct sockaddr_in *)sap;
452 seq_printf(m, ",mountaddr=" NIPQUAD_FMT,
453 NIPQUAD(sin->sin_addr.s_addr));
454 break;
455 }
456 case AF_INET6: {
457 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
458 seq_printf(m, ",mountaddr=" NIP6_FMT,
459 NIP6(sin6->sin6_addr));
460 break;
461 }
462 default:
463 if (showdefaults)
464 seq_printf(m, ",mountaddr=unspecified");
465 }
466
467 if (nfss->mountd_version || showdefaults)
468 seq_printf(m, ",mountvers=%u", nfss->mountd_version);
469 if (nfss->mountd_port || showdefaults)
470 seq_printf(m, ",mountport=%u", nfss->mountd_port);
471
472 switch (nfss->mountd_protocol) {
473 case IPPROTO_UDP:
474 seq_printf(m, ",mountproto=udp");
475 break;
476 case IPPROTO_TCP:
477 seq_printf(m, ",mountproto=tcp");
478 break;
479 default:
480 if (showdefaults)
481 seq_printf(m, ",mountproto=auto");
482 }
483}
484
444/* 485/*
445 * Describe the mount options in force on this server representation 486 * Describe the mount options in force on this server representation
446 */ 487 */
447static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults) 488static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
489 int showdefaults)
448{ 490{
449 static const struct proc_nfs_info { 491 static const struct proc_nfs_info {
450 int flag; 492 int flag;
@@ -452,6 +494,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
452 const char *nostr; 494 const char *nostr;
453 } nfs_info[] = { 495 } nfs_info[] = {
454 { NFS_MOUNT_SOFT, ",soft", ",hard" }, 496 { NFS_MOUNT_SOFT, ",soft", ",hard" },
497 { NFS_MOUNT_INTR, ",intr", ",nointr" },
498 { NFS_MOUNT_POSIX, ",posix", "" },
455 { NFS_MOUNT_NOCTO, ",nocto", "" }, 499 { NFS_MOUNT_NOCTO, ",nocto", "" },
456 { NFS_MOUNT_NOAC, ",noac", "" }, 500 { NFS_MOUNT_NOAC, ",noac", "" },
457 { NFS_MOUNT_NONLM, ",nolock", "" }, 501 { NFS_MOUNT_NONLM, ",nolock", "" },
@@ -462,18 +506,22 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
462 }; 506 };
463 const struct proc_nfs_info *nfs_infop; 507 const struct proc_nfs_info *nfs_infop;
464 struct nfs_client *clp = nfss->nfs_client; 508 struct nfs_client *clp = nfss->nfs_client;
465 509 u32 version = clp->rpc_ops->version;
466 seq_printf(m, ",vers=%d", clp->rpc_ops->version); 510
467 seq_printf(m, ",rsize=%d", nfss->rsize); 511 seq_printf(m, ",vers=%u", version);
468 seq_printf(m, ",wsize=%d", nfss->wsize); 512 seq_printf(m, ",rsize=%u", nfss->rsize);
513 seq_printf(m, ",wsize=%u", nfss->wsize);
514 if (nfss->bsize != 0)
515 seq_printf(m, ",bsize=%u", nfss->bsize);
516 seq_printf(m, ",namlen=%u", nfss->namelen);
469 if (nfss->acregmin != 3*HZ || showdefaults) 517 if (nfss->acregmin != 3*HZ || showdefaults)
470 seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ); 518 seq_printf(m, ",acregmin=%u", nfss->acregmin/HZ);
471 if (nfss->acregmax != 60*HZ || showdefaults) 519 if (nfss->acregmax != 60*HZ || showdefaults)
472 seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ); 520 seq_printf(m, ",acregmax=%u", nfss->acregmax/HZ);
473 if (nfss->acdirmin != 30*HZ || showdefaults) 521 if (nfss->acdirmin != 30*HZ || showdefaults)
474 seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ); 522 seq_printf(m, ",acdirmin=%u", nfss->acdirmin/HZ);
475 if (nfss->acdirmax != 60*HZ || showdefaults) 523 if (nfss->acdirmax != 60*HZ || showdefaults)
476 seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ); 524 seq_printf(m, ",acdirmax=%u", nfss->acdirmax/HZ);
477 for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { 525 for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
478 if (nfss->flags & nfs_infop->flag) 526 if (nfss->flags & nfs_infop->flag)
479 seq_puts(m, nfs_infop->str); 527 seq_puts(m, nfs_infop->str);
@@ -482,9 +530,24 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
482 } 530 }
483 seq_printf(m, ",proto=%s", 531 seq_printf(m, ",proto=%s",
484 rpc_peeraddr2str(nfss->client, RPC_DISPLAY_PROTO)); 532 rpc_peeraddr2str(nfss->client, RPC_DISPLAY_PROTO));
533 if (version == 4) {
534 if (nfss->port != NFS_PORT)
535 seq_printf(m, ",port=%u", nfss->port);
536 } else
537 if (nfss->port)
538 seq_printf(m, ",port=%u", nfss->port);
539
485 seq_printf(m, ",timeo=%lu", 10U * nfss->client->cl_timeout->to_initval / HZ); 540 seq_printf(m, ",timeo=%lu", 10U * nfss->client->cl_timeout->to_initval / HZ);
486 seq_printf(m, ",retrans=%u", nfss->client->cl_timeout->to_retries); 541 seq_printf(m, ",retrans=%u", nfss->client->cl_timeout->to_retries);
487 seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor)); 542 seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor));
543
544 if (version != 4)
545 nfs_show_mountd_options(m, nfss, showdefaults);
546
547#ifdef CONFIG_NFS_V4
548 if (clp->rpc_ops->version == 4)
549 seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr);
550#endif
488} 551}
489 552
490/* 553/*
@@ -529,10 +592,10 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
529 592
530 seq_printf(m, "\n\tcaps:\t"); 593 seq_printf(m, "\n\tcaps:\t");
531 seq_printf(m, "caps=0x%x", nfss->caps); 594 seq_printf(m, "caps=0x%x", nfss->caps);
532 seq_printf(m, ",wtmult=%d", nfss->wtmult); 595 seq_printf(m, ",wtmult=%u", nfss->wtmult);
533 seq_printf(m, ",dtsize=%d", nfss->dtsize); 596 seq_printf(m, ",dtsize=%u", nfss->dtsize);
534 seq_printf(m, ",bsize=%d", nfss->bsize); 597 seq_printf(m, ",bsize=%u", nfss->bsize);
535 seq_printf(m, ",namelen=%d", nfss->namelen); 598 seq_printf(m, ",namlen=%u", nfss->namelen);
536 599
537#ifdef CONFIG_NFS_V4 600#ifdef CONFIG_NFS_V4
538 if (nfss->nfs_client->rpc_ops->version == 4) { 601 if (nfss->nfs_client->rpc_ops->version == 4) {
@@ -546,9 +609,9 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
546 /* 609 /*
547 * Display security flavor in effect for this mount 610 * Display security flavor in effect for this mount
548 */ 611 */
549 seq_printf(m, "\n\tsec:\tflavor=%d", auth->au_ops->au_flavor); 612 seq_printf(m, "\n\tsec:\tflavor=%u", auth->au_ops->au_flavor);
550 if (auth->au_flavor) 613 if (auth->au_flavor)
551 seq_printf(m, ",pseudoflavor=%d", auth->au_flavor); 614 seq_printf(m, ",pseudoflavor=%u", auth->au_flavor);
552 615
553 /* 616 /*
554 * Display superblock I/O counters 617 * Display superblock I/O counters
@@ -584,13 +647,11 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
584 * Begin unmount by attempting to remove all automounted mountpoints we added 647 * Begin unmount by attempting to remove all automounted mountpoints we added
585 * in response to xdev traversals and referrals 648 * in response to xdev traversals and referrals
586 */ 649 */
587static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags) 650static void nfs_umount_begin(struct super_block *sb)
588{ 651{
589 struct nfs_server *server = NFS_SB(vfsmnt->mnt_sb); 652 struct nfs_server *server = NFS_SB(sb);
590 struct rpc_clnt *rpc; 653 struct rpc_clnt *rpc;
591 654
592 if (!(flags & MNT_FORCE))
593 return;
594 /* -EIO all pending I/O */ 655 /* -EIO all pending I/O */
595 rpc = server->client_acl; 656 rpc = server->client_acl;
596 if (!IS_ERR(rpc)) 657 if (!IS_ERR(rpc))
@@ -683,7 +744,6 @@ static int nfs_parse_mount_options(char *raw,
683 struct nfs_parsed_mount_data *mnt) 744 struct nfs_parsed_mount_data *mnt)
684{ 745{
685 char *p, *string, *secdata; 746 char *p, *string, *secdata;
686 unsigned short port = 0;
687 int rc; 747 int rc;
688 748
689 if (!raw) { 749 if (!raw) {
@@ -798,7 +858,7 @@ static int nfs_parse_mount_options(char *raw,
798 return 0; 858 return 0;
799 if (option < 0 || option > 65535) 859 if (option < 0 || option > 65535)
800 return 0; 860 return 0;
801 port = option; 861 mnt->nfs_server.port = option;
802 break; 862 break;
803 case Opt_rsize: 863 case Opt_rsize:
804 if (match_int(args, &mnt->rsize)) 864 if (match_int(args, &mnt->rsize))
@@ -1048,7 +1108,8 @@ static int nfs_parse_mount_options(char *raw,
1048 } 1108 }
1049 } 1109 }
1050 1110
1051 nfs_set_port((struct sockaddr *)&mnt->nfs_server.address, port); 1111 nfs_set_port((struct sockaddr *)&mnt->nfs_server.address,
1112 mnt->nfs_server.port);
1052 1113
1053 return 1; 1114 return 1;
1054 1115
@@ -1169,7 +1230,9 @@ static int nfs_validate_mount_data(void *options,
1169 args->acregmax = 60; 1230 args->acregmax = 60;
1170 args->acdirmin = 30; 1231 args->acdirmin = 30;
1171 args->acdirmax = 60; 1232 args->acdirmax = 60;
1233 args->mount_server.port = 0; /* autobind unless user sets port */
1172 args->mount_server.protocol = XPRT_TRANSPORT_UDP; 1234 args->mount_server.protocol = XPRT_TRANSPORT_UDP;
1235 args->nfs_server.port = 0; /* autobind unless user sets port */
1173 args->nfs_server.protocol = XPRT_TRANSPORT_TCP; 1236 args->nfs_server.protocol = XPRT_TRANSPORT_TCP;
1174 1237
1175 switch (data->version) { 1238 switch (data->version) {
@@ -1208,7 +1271,6 @@ static int nfs_validate_mount_data(void *options,
1208 args->flags = data->flags; 1271 args->flags = data->flags;
1209 args->rsize = data->rsize; 1272 args->rsize = data->rsize;
1210 args->wsize = data->wsize; 1273 args->wsize = data->wsize;
1211 args->flags = data->flags;
1212 args->timeo = data->timeo; 1274 args->timeo = data->timeo;
1213 args->retrans = data->retrans; 1275 args->retrans = data->retrans;
1214 args->acregmin = data->acregmin; 1276 args->acregmin = data->acregmin;
@@ -1230,6 +1292,8 @@ static int nfs_validate_mount_data(void *options,
1230 args->namlen = data->namlen; 1292 args->namlen = data->namlen;
1231 args->bsize = data->bsize; 1293 args->bsize = data->bsize;
1232 args->auth_flavors[0] = data->pseudoflavor; 1294 args->auth_flavors[0] = data->pseudoflavor;
1295 if (!args->nfs_server.hostname)
1296 goto out_nomem;
1233 1297
1234 /* 1298 /*
1235 * The legacy version 6 binary mount data from userspace has a 1299 * The legacy version 6 binary mount data from userspace has a
@@ -1276,6 +1340,8 @@ static int nfs_validate_mount_data(void *options,
1276 len = c - dev_name; 1340 len = c - dev_name;
1277 /* N.B. caller will free nfs_server.hostname in all cases */ 1341 /* N.B. caller will free nfs_server.hostname in all cases */
1278 args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL); 1342 args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL);
1343 if (!args->nfs_server.hostname)
1344 goto out_nomem;
1279 1345
1280 c++; 1346 c++;
1281 if (strlen(c) > NFS_MAXPATHLEN) 1347 if (strlen(c) > NFS_MAXPATHLEN)
@@ -1319,6 +1385,10 @@ out_v3_not_compiled:
1319 return -EPROTONOSUPPORT; 1385 return -EPROTONOSUPPORT;
1320#endif /* !CONFIG_NFS_V3 */ 1386#endif /* !CONFIG_NFS_V3 */
1321 1387
1388out_nomem:
1389 dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n");
1390 return -ENOMEM;
1391
1322out_no_address: 1392out_no_address:
1323 dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n"); 1393 dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n");
1324 return -EINVAL; 1394 return -EINVAL;
@@ -1505,6 +1575,11 @@ static int nfs_compare_super(struct super_block *sb, void *data)
1505 return nfs_compare_mount_options(sb, server, mntflags); 1575 return nfs_compare_mount_options(sb, server, mntflags);
1506} 1576}
1507 1577
1578static int nfs_bdi_register(struct nfs_server *server)
1579{
1580 return bdi_register_dev(&server->backing_dev_info, server->s_dev);
1581}
1582
1508static int nfs_get_sb(struct file_system_type *fs_type, 1583static int nfs_get_sb(struct file_system_type *fs_type,
1509 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 1584 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
1510{ 1585{
@@ -1547,6 +1622,10 @@ static int nfs_get_sb(struct file_system_type *fs_type,
1547 if (s->s_fs_info != server) { 1622 if (s->s_fs_info != server) {
1548 nfs_free_server(server); 1623 nfs_free_server(server);
1549 server = NULL; 1624 server = NULL;
1625 } else {
1626 error = nfs_bdi_register(server);
1627 if (error)
1628 goto error_splat_super;
1550 } 1629 }
1551 1630
1552 if (!s->s_root) { 1631 if (!s->s_root) {
@@ -1594,6 +1673,7 @@ static void nfs_kill_super(struct super_block *s)
1594{ 1673{
1595 struct nfs_server *server = NFS_SB(s); 1674 struct nfs_server *server = NFS_SB(s);
1596 1675
1676 bdi_unregister(&server->backing_dev_info);
1597 kill_anon_super(s); 1677 kill_anon_super(s);
1598 nfs_free_server(server); 1678 nfs_free_server(server);
1599} 1679}
@@ -1638,6 +1718,10 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
1638 if (s->s_fs_info != server) { 1718 if (s->s_fs_info != server) {
1639 nfs_free_server(server); 1719 nfs_free_server(server);
1640 server = NULL; 1720 server = NULL;
1721 } else {
1722 error = nfs_bdi_register(server);
1723 if (error)
1724 goto error_splat_super;
1641 } 1725 }
1642 1726
1643 if (!s->s_root) { 1727 if (!s->s_root) {
@@ -1706,28 +1790,6 @@ static void nfs4_fill_super(struct super_block *sb)
1706} 1790}
1707 1791
1708/* 1792/*
1709 * If the user didn't specify a port, set the port number to
1710 * the NFS version 4 default port.
1711 */
1712static void nfs4_default_port(struct sockaddr *sap)
1713{
1714 switch (sap->sa_family) {
1715 case AF_INET: {
1716 struct sockaddr_in *ap = (struct sockaddr_in *)sap;
1717 if (ap->sin_port == 0)
1718 ap->sin_port = htons(NFS_PORT);
1719 break;
1720 }
1721 case AF_INET6: {
1722 struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap;
1723 if (ap->sin6_port == 0)
1724 ap->sin6_port = htons(NFS_PORT);
1725 break;
1726 }
1727 }
1728}
1729
1730/*
1731 * Validate NFSv4 mount options 1793 * Validate NFSv4 mount options
1732 */ 1794 */
1733static int nfs4_validate_mount_data(void *options, 1795static int nfs4_validate_mount_data(void *options,
@@ -1751,6 +1813,7 @@ static int nfs4_validate_mount_data(void *options,
1751 args->acregmax = 60; 1813 args->acregmax = 60;
1752 args->acdirmin = 30; 1814 args->acdirmin = 30;
1753 args->acdirmax = 60; 1815 args->acdirmax = 60;
1816 args->nfs_server.port = NFS_PORT; /* 2049 unless user set port= */
1754 args->nfs_server.protocol = XPRT_TRANSPORT_TCP; 1817 args->nfs_server.protocol = XPRT_TRANSPORT_TCP;
1755 1818
1756 switch (data->version) { 1819 switch (data->version) {
@@ -1767,9 +1830,6 @@ static int nfs4_validate_mount_data(void *options,
1767 &args->nfs_server.address)) 1830 &args->nfs_server.address))
1768 goto out_no_address; 1831 goto out_no_address;
1769 1832
1770 nfs4_default_port((struct sockaddr *)
1771 &args->nfs_server.address);
1772
1773 switch (data->auth_flavourlen) { 1833 switch (data->auth_flavourlen) {
1774 case 0: 1834 case 0:
1775 args->auth_flavors[0] = RPC_AUTH_UNIX; 1835 args->auth_flavors[0] = RPC_AUTH_UNIX;
@@ -1827,9 +1887,6 @@ static int nfs4_validate_mount_data(void *options,
1827 &args->nfs_server.address)) 1887 &args->nfs_server.address))
1828 return -EINVAL; 1888 return -EINVAL;
1829 1889
1830 nfs4_default_port((struct sockaddr *)
1831 &args->nfs_server.address);
1832
1833 switch (args->auth_flavor_len) { 1890 switch (args->auth_flavor_len) {
1834 case 0: 1891 case 0:
1835 args->auth_flavors[0] = RPC_AUTH_UNIX; 1892 args->auth_flavors[0] = RPC_AUTH_UNIX;
@@ -1852,12 +1909,16 @@ static int nfs4_validate_mount_data(void *options,
1852 return -ENAMETOOLONG; 1909 return -ENAMETOOLONG;
1853 /* N.B. caller will free nfs_server.hostname in all cases */ 1910 /* N.B. caller will free nfs_server.hostname in all cases */
1854 args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL); 1911 args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL);
1912 if (!args->nfs_server.hostname)
1913 goto out_nomem;
1855 1914
1856 c++; /* step over the ':' */ 1915 c++; /* step over the ':' */
1857 len = strlen(c); 1916 len = strlen(c);
1858 if (len > NFS4_MAXPATHLEN) 1917 if (len > NFS4_MAXPATHLEN)
1859 return -ENAMETOOLONG; 1918 return -ENAMETOOLONG;
1860 args->nfs_server.export_path = kstrndup(c, len, GFP_KERNEL); 1919 args->nfs_server.export_path = kstrndup(c, len, GFP_KERNEL);
1920 if (!args->nfs_server.export_path)
1921 goto out_nomem;
1861 1922
1862 dprintk("NFS: MNTPATH: '%s'\n", args->nfs_server.export_path); 1923 dprintk("NFS: MNTPATH: '%s'\n", args->nfs_server.export_path);
1863 1924
@@ -1879,6 +1940,10 @@ out_inval_auth:
1879 data->auth_flavourlen); 1940 data->auth_flavourlen);
1880 return -EINVAL; 1941 return -EINVAL;
1881 1942
1943out_nomem:
1944 dfprintk(MOUNT, "NFS4: not enough memory to handle mount options\n");
1945 return -ENOMEM;
1946
1882out_no_address: 1947out_no_address:
1883 dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n"); 1948 dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n");
1884 return -EINVAL; 1949 return -EINVAL;
@@ -1933,6 +1998,10 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
1933 if (s->s_fs_info != server) { 1998 if (s->s_fs_info != server) {
1934 nfs_free_server(server); 1999 nfs_free_server(server);
1935 server = NULL; 2000 server = NULL;
2001 } else {
2002 error = nfs_bdi_register(server);
2003 if (error)
2004 goto error_splat_super;
1936 } 2005 }
1937 2006
1938 if (!s->s_root) { 2007 if (!s->s_root) {
@@ -2019,6 +2088,10 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
2019 if (s->s_fs_info != server) { 2088 if (s->s_fs_info != server) {
2020 nfs_free_server(server); 2089 nfs_free_server(server);
2021 server = NULL; 2090 server = NULL;
2091 } else {
2092 error = nfs_bdi_register(server);
2093 if (error)
2094 goto error_splat_super;
2022 } 2095 }
2023 2096
2024 if (!s->s_root) { 2097 if (!s->s_root) {
@@ -2098,6 +2171,10 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
2098 if (s->s_fs_info != server) { 2171 if (s->s_fs_info != server) {
2099 nfs_free_server(server); 2172 nfs_free_server(server);
2100 server = NULL; 2173 server = NULL;
2174 } else {
2175 error = nfs_bdi_register(server);
2176 if (error)
2177 goto error_splat_super;
2101 } 2178 }
2102 2179
2103 if (!s->s_root) { 2180 if (!s->s_root) {
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 83e865a16ad1..412738dbfbc7 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -10,7 +10,6 @@
10 * nfs symlink handling code 10 * nfs symlink handling code
11 */ 11 */
12 12
13#define NFS_NEED_XDR_TYPES
14#include <linux/time.h> 13#include <linux/time.h>
15#include <linux/errno.h> 14#include <linux/errno.h>
16#include <linux/sunrpc/clnt.h> 15#include <linux/sunrpc/clnt.h>
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 757415363422..3adf8b266461 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -234,7 +234,7 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
234 if (data == NULL) 234 if (data == NULL)
235 goto out; 235 goto out;
236 236
237 data->cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); 237 data->cred = rpc_lookup_cred();
238 if (IS_ERR(data->cred)) { 238 if (IS_ERR(data->cred)) {
239 status = PTR_ERR(data->cred); 239 status = PTR_ERR(data->cred);
240 goto out_free; 240 goto out_free;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index bed63416a55b..1ade11d1ba07 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -48,7 +48,7 @@ static struct kmem_cache *nfs_wdata_cachep;
48static mempool_t *nfs_wdata_mempool; 48static mempool_t *nfs_wdata_mempool;
49static mempool_t *nfs_commit_mempool; 49static mempool_t *nfs_commit_mempool;
50 50
51struct nfs_write_data *nfs_commit_alloc(void) 51struct nfs_write_data *nfs_commitdata_alloc(void)
52{ 52{
53 struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); 53 struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS);
54 54
@@ -59,19 +59,13 @@ struct nfs_write_data *nfs_commit_alloc(void)
59 return p; 59 return p;
60} 60}
61 61
62static void nfs_commit_rcu_free(struct rcu_head *head) 62void nfs_commit_free(struct nfs_write_data *p)
63{ 63{
64 struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu);
65 if (p && (p->pagevec != &p->page_array[0])) 64 if (p && (p->pagevec != &p->page_array[0]))
66 kfree(p->pagevec); 65 kfree(p->pagevec);
67 mempool_free(p, nfs_commit_mempool); 66 mempool_free(p, nfs_commit_mempool);
68} 67}
69 68
70void nfs_commit_free(struct nfs_write_data *wdata)
71{
72 call_rcu_bh(&wdata->task.u.tk_rcu, nfs_commit_rcu_free);
73}
74
75struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) 69struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
76{ 70{
77 struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS); 71 struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);
@@ -93,21 +87,18 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
93 return p; 87 return p;
94} 88}
95 89
96static void nfs_writedata_rcu_free(struct rcu_head *head) 90static void nfs_writedata_free(struct nfs_write_data *p)
97{ 91{
98 struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu);
99 if (p && (p->pagevec != &p->page_array[0])) 92 if (p && (p->pagevec != &p->page_array[0]))
100 kfree(p->pagevec); 93 kfree(p->pagevec);
101 mempool_free(p, nfs_wdata_mempool); 94 mempool_free(p, nfs_wdata_mempool);
102} 95}
103 96
104static void nfs_writedata_free(struct nfs_write_data *wdata) 97void nfs_writedata_release(void *data)
105{ 98{
106 call_rcu_bh(&wdata->task.u.tk_rcu, nfs_writedata_rcu_free); 99 struct nfs_write_data *wdata = data;
107}
108 100
109void nfs_writedata_release(void *wdata) 101 put_nfs_open_context(wdata->args.context);
110{
111 nfs_writedata_free(wdata); 102 nfs_writedata_free(wdata);
112} 103}
113 104
@@ -291,8 +282,6 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
291 spin_unlock(&inode->i_lock); 282 spin_unlock(&inode->i_lock);
292 if (!nfs_pageio_add_request(pgio, req)) { 283 if (!nfs_pageio_add_request(pgio, req)) {
293 nfs_redirty_request(req); 284 nfs_redirty_request(req);
294 nfs_end_page_writeback(page);
295 nfs_clear_page_tag_locked(req);
296 return pgio->pg_error; 285 return pgio->pg_error;
297 } 286 }
298 return 0; 287 return 0;
@@ -366,15 +355,13 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
366/* 355/*
367 * Insert a write request into an inode 356 * Insert a write request into an inode
368 */ 357 */
369static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) 358static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
370{ 359{
371 struct nfs_inode *nfsi = NFS_I(inode); 360 struct nfs_inode *nfsi = NFS_I(inode);
372 int error; 361 int error;
373 362
374 error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); 363 error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req);
375 BUG_ON(error == -EEXIST); 364 BUG_ON(error);
376 if (error)
377 return error;
378 if (!nfsi->npages) { 365 if (!nfsi->npages) {
379 igrab(inode); 366 igrab(inode);
380 if (nfs_have_delegation(inode, FMODE_WRITE)) 367 if (nfs_have_delegation(inode, FMODE_WRITE))
@@ -384,8 +371,8 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
384 set_page_private(req->wb_page, (unsigned long)req); 371 set_page_private(req->wb_page, (unsigned long)req);
385 nfsi->npages++; 372 nfsi->npages++;
386 kref_get(&req->wb_kref); 373 kref_get(&req->wb_kref);
387 radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); 374 radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
388 return 0; 375 NFS_PAGE_TAG_LOCKED);
389} 376}
390 377
391/* 378/*
@@ -413,7 +400,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
413} 400}
414 401
415static void 402static void
416nfs_redirty_request(struct nfs_page *req) 403nfs_mark_request_dirty(struct nfs_page *req)
417{ 404{
418 __set_page_dirty_nobuffers(req->wb_page); 405 __set_page_dirty_nobuffers(req->wb_page);
419} 406}
@@ -467,7 +454,7 @@ int nfs_reschedule_unstable_write(struct nfs_page *req)
467 return 1; 454 return 1;
468 } 455 }
469 if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) { 456 if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) {
470 nfs_redirty_request(req); 457 nfs_mark_request_dirty(req);
471 return 1; 458 return 1;
472 } 459 }
473 return 0; 460 return 0;
@@ -597,6 +584,13 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
597 /* Loop over all inode entries and see if we find 584 /* Loop over all inode entries and see if we find
598 * A request for the page we wish to update 585 * A request for the page we wish to update
599 */ 586 */
587 if (new) {
588 if (radix_tree_preload(GFP_NOFS)) {
589 nfs_release_request(new);
590 return ERR_PTR(-ENOMEM);
591 }
592 }
593
600 spin_lock(&inode->i_lock); 594 spin_lock(&inode->i_lock);
601 req = nfs_page_find_request_locked(page); 595 req = nfs_page_find_request_locked(page);
602 if (req) { 596 if (req) {
@@ -607,28 +601,27 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
607 error = nfs_wait_on_request(req); 601 error = nfs_wait_on_request(req);
608 nfs_release_request(req); 602 nfs_release_request(req);
609 if (error < 0) { 603 if (error < 0) {
610 if (new) 604 if (new) {
605 radix_tree_preload_end();
611 nfs_release_request(new); 606 nfs_release_request(new);
607 }
612 return ERR_PTR(error); 608 return ERR_PTR(error);
613 } 609 }
614 continue; 610 continue;
615 } 611 }
616 spin_unlock(&inode->i_lock); 612 spin_unlock(&inode->i_lock);
617 if (new) 613 if (new) {
614 radix_tree_preload_end();
618 nfs_release_request(new); 615 nfs_release_request(new);
616 }
619 break; 617 break;
620 } 618 }
621 619
622 if (new) { 620 if (new) {
623 int error;
624 nfs_lock_request_dontget(new); 621 nfs_lock_request_dontget(new);
625 error = nfs_inode_add_request(inode, new); 622 nfs_inode_add_request(inode, new);
626 if (error) {
627 spin_unlock(&inode->i_lock);
628 nfs_unlock_request(new);
629 return ERR_PTR(error);
630 }
631 spin_unlock(&inode->i_lock); 623 spin_unlock(&inode->i_lock);
624 radix_tree_preload_end();
632 req = new; 625 req = new;
633 goto zero_page; 626 goto zero_page;
634 } 627 }
@@ -785,7 +778,7 @@ static int flush_task_priority(int how)
785/* 778/*
786 * Set up the argument/result storage required for the RPC call. 779 * Set up the argument/result storage required for the RPC call.
787 */ 780 */
788static void nfs_write_rpcsetup(struct nfs_page *req, 781static int nfs_write_rpcsetup(struct nfs_page *req,
789 struct nfs_write_data *data, 782 struct nfs_write_data *data,
790 const struct rpc_call_ops *call_ops, 783 const struct rpc_call_ops *call_ops,
791 unsigned int count, unsigned int offset, 784 unsigned int count, unsigned int offset,
@@ -806,6 +799,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
806 .rpc_message = &msg, 799 .rpc_message = &msg,
807 .callback_ops = call_ops, 800 .callback_ops = call_ops,
808 .callback_data = data, 801 .callback_data = data,
802 .workqueue = nfsiod_workqueue,
809 .flags = flags, 803 .flags = flags,
810 .priority = priority, 804 .priority = priority,
811 }; 805 };
@@ -822,7 +816,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
822 data->args.pgbase = req->wb_pgbase + offset; 816 data->args.pgbase = req->wb_pgbase + offset;
823 data->args.pages = data->pagevec; 817 data->args.pages = data->pagevec;
824 data->args.count = count; 818 data->args.count = count;
825 data->args.context = req->wb_context; 819 data->args.context = get_nfs_open_context(req->wb_context);
826 data->args.stable = NFS_UNSTABLE; 820 data->args.stable = NFS_UNSTABLE;
827 if (how & FLUSH_STABLE) { 821 if (how & FLUSH_STABLE) {
828 data->args.stable = NFS_DATA_SYNC; 822 data->args.stable = NFS_DATA_SYNC;
@@ -847,8 +841,21 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
847 (unsigned long long)data->args.offset); 841 (unsigned long long)data->args.offset);
848 842
849 task = rpc_run_task(&task_setup_data); 843 task = rpc_run_task(&task_setup_data);
850 if (!IS_ERR(task)) 844 if (IS_ERR(task))
851 rpc_put_task(task); 845 return PTR_ERR(task);
846 rpc_put_task(task);
847 return 0;
848}
849
850/* If a nfs_flush_* function fails, it should remove reqs from @head and
851 * call this on each, which will prepare them to be retried on next
852 * writeback using standard nfs.
853 */
854static void nfs_redirty_request(struct nfs_page *req)
855{
856 nfs_mark_request_dirty(req);
857 nfs_end_page_writeback(req->wb_page);
858 nfs_clear_page_tag_locked(req);
852} 859}
853 860
854/* 861/*
@@ -863,6 +870,7 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
863 size_t wsize = NFS_SERVER(inode)->wsize, nbytes; 870 size_t wsize = NFS_SERVER(inode)->wsize, nbytes;
864 unsigned int offset; 871 unsigned int offset;
865 int requests = 0; 872 int requests = 0;
873 int ret = 0;
866 LIST_HEAD(list); 874 LIST_HEAD(list);
867 875
868 nfs_list_remove_request(req); 876 nfs_list_remove_request(req);
@@ -884,6 +892,8 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
884 offset = 0; 892 offset = 0;
885 nbytes = count; 893 nbytes = count;
886 do { 894 do {
895 int ret2;
896
887 data = list_entry(list.next, struct nfs_write_data, pages); 897 data = list_entry(list.next, struct nfs_write_data, pages);
888 list_del_init(&data->pages); 898 list_del_init(&data->pages);
889 899
@@ -891,13 +901,15 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
891 901
892 if (nbytes < wsize) 902 if (nbytes < wsize)
893 wsize = nbytes; 903 wsize = nbytes;
894 nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, 904 ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
895 wsize, offset, how); 905 wsize, offset, how);
906 if (ret == 0)
907 ret = ret2;
896 offset += wsize; 908 offset += wsize;
897 nbytes -= wsize; 909 nbytes -= wsize;
898 } while (nbytes != 0); 910 } while (nbytes != 0);
899 911
900 return 0; 912 return ret;
901 913
902out_bad: 914out_bad:
903 while (!list_empty(&list)) { 915 while (!list_empty(&list)) {
@@ -906,8 +918,6 @@ out_bad:
906 nfs_writedata_release(data); 918 nfs_writedata_release(data);
907 } 919 }
908 nfs_redirty_request(req); 920 nfs_redirty_request(req);
909 nfs_end_page_writeback(req->wb_page);
910 nfs_clear_page_tag_locked(req);
911 return -ENOMEM; 921 return -ENOMEM;
912} 922}
913 923
@@ -940,16 +950,12 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
940 req = nfs_list_entry(data->pages.next); 950 req = nfs_list_entry(data->pages.next);
941 951
942 /* Set up the argument struct */ 952 /* Set up the argument struct */
943 nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how); 953 return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how);
944
945 return 0;
946 out_bad: 954 out_bad:
947 while (!list_empty(head)) { 955 while (!list_empty(head)) {
948 req = nfs_list_entry(head->next); 956 req = nfs_list_entry(head->next);
949 nfs_list_remove_request(req); 957 nfs_list_remove_request(req);
950 nfs_redirty_request(req); 958 nfs_redirty_request(req);
951 nfs_end_page_writeback(req->wb_page);
952 nfs_clear_page_tag_locked(req);
953 } 959 }
954 return -ENOMEM; 960 return -ENOMEM;
955} 961}
@@ -972,7 +978,6 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
972{ 978{
973 struct nfs_write_data *data = calldata; 979 struct nfs_write_data *data = calldata;
974 struct nfs_page *req = data->req; 980 struct nfs_page *req = data->req;
975 struct page *page = req->wb_page;
976 981
977 dprintk("NFS: write (%s/%Ld %d@%Ld)", 982 dprintk("NFS: write (%s/%Ld %d@%Ld)",
978 req->wb_context->path.dentry->d_inode->i_sb->s_id, 983 req->wb_context->path.dentry->d_inode->i_sb->s_id,
@@ -980,13 +985,20 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
980 req->wb_bytes, 985 req->wb_bytes,
981 (long long)req_offset(req)); 986 (long long)req_offset(req));
982 987
983 if (nfs_writeback_done(task, data) != 0) 988 nfs_writeback_done(task, data);
984 return; 989}
985 990
986 if (task->tk_status < 0) { 991static void nfs_writeback_release_partial(void *calldata)
992{
993 struct nfs_write_data *data = calldata;
994 struct nfs_page *req = data->req;
995 struct page *page = req->wb_page;
996 int status = data->task.tk_status;
997
998 if (status < 0) {
987 nfs_set_pageerror(page); 999 nfs_set_pageerror(page);
988 nfs_context_set_write_error(req->wb_context, task->tk_status); 1000 nfs_context_set_write_error(req->wb_context, status);
989 dprintk(", error = %d\n", task->tk_status); 1001 dprintk(", error = %d\n", status);
990 goto out; 1002 goto out;
991 } 1003 }
992 1004
@@ -1011,11 +1023,12 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
1011out: 1023out:
1012 if (atomic_dec_and_test(&req->wb_complete)) 1024 if (atomic_dec_and_test(&req->wb_complete))
1013 nfs_writepage_release(req); 1025 nfs_writepage_release(req);
1026 nfs_writedata_release(calldata);
1014} 1027}
1015 1028
1016static const struct rpc_call_ops nfs_write_partial_ops = { 1029static const struct rpc_call_ops nfs_write_partial_ops = {
1017 .rpc_call_done = nfs_writeback_done_partial, 1030 .rpc_call_done = nfs_writeback_done_partial,
1018 .rpc_release = nfs_writedata_release, 1031 .rpc_release = nfs_writeback_release_partial,
1019}; 1032};
1020 1033
1021/* 1034/*
@@ -1028,17 +1041,21 @@ static const struct rpc_call_ops nfs_write_partial_ops = {
1028static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) 1041static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
1029{ 1042{
1030 struct nfs_write_data *data = calldata; 1043 struct nfs_write_data *data = calldata;
1031 struct nfs_page *req;
1032 struct page *page;
1033 1044
1034 if (nfs_writeback_done(task, data) != 0) 1045 nfs_writeback_done(task, data);
1035 return; 1046}
1047
1048static void nfs_writeback_release_full(void *calldata)
1049{
1050 struct nfs_write_data *data = calldata;
1051 int status = data->task.tk_status;
1036 1052
1037 /* Update attributes as result of writeback. */ 1053 /* Update attributes as result of writeback. */
1038 while (!list_empty(&data->pages)) { 1054 while (!list_empty(&data->pages)) {
1039 req = nfs_list_entry(data->pages.next); 1055 struct nfs_page *req = nfs_list_entry(data->pages.next);
1056 struct page *page = req->wb_page;
1057
1040 nfs_list_remove_request(req); 1058 nfs_list_remove_request(req);
1041 page = req->wb_page;
1042 1059
1043 dprintk("NFS: write (%s/%Ld %d@%Ld)", 1060 dprintk("NFS: write (%s/%Ld %d@%Ld)",
1044 req->wb_context->path.dentry->d_inode->i_sb->s_id, 1061 req->wb_context->path.dentry->d_inode->i_sb->s_id,
@@ -1046,10 +1063,10 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
1046 req->wb_bytes, 1063 req->wb_bytes,
1047 (long long)req_offset(req)); 1064 (long long)req_offset(req));
1048 1065
1049 if (task->tk_status < 0) { 1066 if (status < 0) {
1050 nfs_set_pageerror(page); 1067 nfs_set_pageerror(page);
1051 nfs_context_set_write_error(req->wb_context, task->tk_status); 1068 nfs_context_set_write_error(req->wb_context, status);
1052 dprintk(", error = %d\n", task->tk_status); 1069 dprintk(", error = %d\n", status);
1053 goto remove_request; 1070 goto remove_request;
1054 } 1071 }
1055 1072
@@ -1069,11 +1086,12 @@ remove_request:
1069 next: 1086 next:
1070 nfs_clear_page_tag_locked(req); 1087 nfs_clear_page_tag_locked(req);
1071 } 1088 }
1089 nfs_writedata_release(calldata);
1072} 1090}
1073 1091
1074static const struct rpc_call_ops nfs_write_full_ops = { 1092static const struct rpc_call_ops nfs_write_full_ops = {
1075 .rpc_call_done = nfs_writeback_done_full, 1093 .rpc_call_done = nfs_writeback_done_full,
1076 .rpc_release = nfs_writedata_release, 1094 .rpc_release = nfs_writeback_release_full,
1077}; 1095};
1078 1096
1079 1097
@@ -1159,15 +1177,18 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1159 1177
1160 1178
1161#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1179#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1162void nfs_commit_release(void *wdata) 1180void nfs_commitdata_release(void *data)
1163{ 1181{
1182 struct nfs_write_data *wdata = data;
1183
1184 put_nfs_open_context(wdata->args.context);
1164 nfs_commit_free(wdata); 1185 nfs_commit_free(wdata);
1165} 1186}
1166 1187
1167/* 1188/*
1168 * Set up the argument/result storage required for the RPC call. 1189 * Set up the argument/result storage required for the RPC call.
1169 */ 1190 */
1170static void nfs_commit_rpcsetup(struct list_head *head, 1191static int nfs_commit_rpcsetup(struct list_head *head,
1171 struct nfs_write_data *data, 1192 struct nfs_write_data *data,
1172 int how) 1193 int how)
1173{ 1194{
@@ -1187,6 +1208,7 @@ static void nfs_commit_rpcsetup(struct list_head *head,
1187 .rpc_message = &msg, 1208 .rpc_message = &msg,
1188 .callback_ops = &nfs_commit_ops, 1209 .callback_ops = &nfs_commit_ops,
1189 .callback_data = data, 1210 .callback_data = data,
1211 .workqueue = nfsiod_workqueue,
1190 .flags = flags, 1212 .flags = flags,
1191 .priority = priority, 1213 .priority = priority,
1192 }; 1214 };
@@ -1203,6 +1225,7 @@ static void nfs_commit_rpcsetup(struct list_head *head,
1203 /* Note: we always request a commit of the entire inode */ 1225 /* Note: we always request a commit of the entire inode */
1204 data->args.offset = 0; 1226 data->args.offset = 0;
1205 data->args.count = 0; 1227 data->args.count = 0;
1228 data->args.context = get_nfs_open_context(first->wb_context);
1206 data->res.count = 0; 1229 data->res.count = 0;
1207 data->res.fattr = &data->fattr; 1230 data->res.fattr = &data->fattr;
1208 data->res.verf = &data->verf; 1231 data->res.verf = &data->verf;
@@ -1214,8 +1237,10 @@ static void nfs_commit_rpcsetup(struct list_head *head,
1214 dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); 1237 dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
1215 1238
1216 task = rpc_run_task(&task_setup_data); 1239 task = rpc_run_task(&task_setup_data);
1217 if (!IS_ERR(task)) 1240 if (IS_ERR(task))
1218 rpc_put_task(task); 1241 return PTR_ERR(task);
1242 rpc_put_task(task);
1243 return 0;
1219} 1244}
1220 1245
1221/* 1246/*
@@ -1227,15 +1252,13 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1227 struct nfs_write_data *data; 1252 struct nfs_write_data *data;
1228 struct nfs_page *req; 1253 struct nfs_page *req;
1229 1254
1230 data = nfs_commit_alloc(); 1255 data = nfs_commitdata_alloc();
1231 1256
1232 if (!data) 1257 if (!data)
1233 goto out_bad; 1258 goto out_bad;
1234 1259
1235 /* Set up the argument struct */ 1260 /* Set up the argument struct */
1236 nfs_commit_rpcsetup(head, data, how); 1261 return nfs_commit_rpcsetup(head, data, how);
1237
1238 return 0;
1239 out_bad: 1262 out_bad:
1240 while (!list_empty(head)) { 1263 while (!list_empty(head)) {
1241 req = nfs_list_entry(head->next); 1264 req = nfs_list_entry(head->next);
@@ -1255,7 +1278,6 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1255static void nfs_commit_done(struct rpc_task *task, void *calldata) 1278static void nfs_commit_done(struct rpc_task *task, void *calldata)
1256{ 1279{
1257 struct nfs_write_data *data = calldata; 1280 struct nfs_write_data *data = calldata;
1258 struct nfs_page *req;
1259 1281
1260 dprintk("NFS: %5u nfs_commit_done (status %d)\n", 1282 dprintk("NFS: %5u nfs_commit_done (status %d)\n",
1261 task->tk_pid, task->tk_status); 1283 task->tk_pid, task->tk_status);
@@ -1263,6 +1285,13 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
1263 /* Call the NFS version-specific code */ 1285 /* Call the NFS version-specific code */
1264 if (NFS_PROTO(data->inode)->commit_done(task, data) != 0) 1286 if (NFS_PROTO(data->inode)->commit_done(task, data) != 0)
1265 return; 1287 return;
1288}
1289
1290static void nfs_commit_release(void *calldata)
1291{
1292 struct nfs_write_data *data = calldata;
1293 struct nfs_page *req;
1294 int status = data->task.tk_status;
1266 1295
1267 while (!list_empty(&data->pages)) { 1296 while (!list_empty(&data->pages)) {
1268 req = nfs_list_entry(data->pages.next); 1297 req = nfs_list_entry(data->pages.next);
@@ -1277,10 +1306,10 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
1277 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), 1306 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
1278 req->wb_bytes, 1307 req->wb_bytes,
1279 (long long)req_offset(req)); 1308 (long long)req_offset(req));
1280 if (task->tk_status < 0) { 1309 if (status < 0) {
1281 nfs_context_set_write_error(req->wb_context, task->tk_status); 1310 nfs_context_set_write_error(req->wb_context, status);
1282 nfs_inode_remove_request(req); 1311 nfs_inode_remove_request(req);
1283 dprintk(", error = %d\n", task->tk_status); 1312 dprintk(", error = %d\n", status);
1284 goto next; 1313 goto next;
1285 } 1314 }
1286 1315
@@ -1297,10 +1326,11 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
1297 } 1326 }
1298 /* We have a mismatch. Write the page again */ 1327 /* We have a mismatch. Write the page again */
1299 dprintk(" mismatch\n"); 1328 dprintk(" mismatch\n");
1300 nfs_redirty_request(req); 1329 nfs_mark_request_dirty(req);
1301 next: 1330 next:
1302 nfs_clear_page_tag_locked(req); 1331 nfs_clear_page_tag_locked(req);
1303 } 1332 }
1333 nfs_commitdata_release(calldata);
1304} 1334}
1305 1335
1306static const struct rpc_call_ops nfs_commit_ops = { 1336static const struct rpc_call_ops nfs_commit_ops = {
@@ -1487,18 +1517,19 @@ static int nfs_wb_page_priority(struct inode *inode, struct page *page,
1487 }; 1517 };
1488 int ret; 1518 int ret;
1489 1519
1490 BUG_ON(!PageLocked(page)); 1520 do {
1491 if (clear_page_dirty_for_io(page)) { 1521 if (clear_page_dirty_for_io(page)) {
1492 ret = nfs_writepage_locked(page, &wbc); 1522 ret = nfs_writepage_locked(page, &wbc);
1523 if (ret < 0)
1524 goto out_error;
1525 } else if (!PagePrivate(page))
1526 break;
1527 ret = nfs_sync_mapping_wait(page->mapping, &wbc, how);
1493 if (ret < 0) 1528 if (ret < 0)
1494 goto out; 1529 goto out_error;
1495 } 1530 } while (PagePrivate(page));
1496 if (!PagePrivate(page)) 1531 return 0;
1497 return 0; 1532out_error:
1498 ret = nfs_sync_mapping_wait(page->mapping, &wbc, how);
1499 if (ret >= 0)
1500 return 0;
1501out:
1502 __mark_inode_dirty(inode, I_DIRTY_PAGES); 1533 __mark_inode_dirty(inode, I_DIRTY_PAGES);
1503 return ret; 1534 return ret;
1504} 1535}
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index d13403e33622..294992e9bf69 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -10,6 +10,7 @@
10#include <linux/sunrpc/svcauth.h> 10#include <linux/sunrpc/svcauth.h>
11#include <linux/nfsd/nfsd.h> 11#include <linux/nfsd/nfsd.h>
12#include <linux/nfsd/export.h> 12#include <linux/nfsd/export.h>
13#include "auth.h"
13 14
14int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) 15int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp)
15{ 16{
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 8a6f7c924c75..33bfcf09db46 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -35,6 +35,7 @@
35#include <linux/lockd/bind.h> 35#include <linux/lockd/bind.h>
36#include <linux/sunrpc/msg_prot.h> 36#include <linux/sunrpc/msg_prot.h>
37#include <linux/sunrpc/gss_api.h> 37#include <linux/sunrpc/gss_api.h>
38#include <net/ipv6.h>
38 39
39#define NFSDDBG_FACILITY NFSDDBG_EXPORT 40#define NFSDDBG_FACILITY NFSDDBG_EXPORT
40 41
@@ -1548,6 +1549,7 @@ exp_addclient(struct nfsctl_client *ncp)
1548{ 1549{
1549 struct auth_domain *dom; 1550 struct auth_domain *dom;
1550 int i, err; 1551 int i, err;
1552 struct in6_addr addr6;
1551 1553
1552 /* First, consistency check. */ 1554 /* First, consistency check. */
1553 err = -EINVAL; 1555 err = -EINVAL;
@@ -1566,9 +1568,10 @@ exp_addclient(struct nfsctl_client *ncp)
1566 goto out_unlock; 1568 goto out_unlock;
1567 1569
1568 /* Insert client into hashtable. */ 1570 /* Insert client into hashtable. */
1569 for (i = 0; i < ncp->cl_naddr; i++) 1571 for (i = 0; i < ncp->cl_naddr; i++) {
1570 auth_unix_add_addr(ncp->cl_addrlist[i], dom); 1572 ipv6_addr_set_v4mapped(ncp->cl_addrlist[i].s_addr, &addr6);
1571 1573 auth_unix_add_addr(&addr6, dom);
1574 }
1572 auth_unix_forget_old(dom); 1575 auth_unix_forget_old(dom);
1573 auth_domain_put(dom); 1576 auth_domain_put(dom);
1574 1577
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index aae2b29ae2c9..0b3ffa9840c2 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -104,7 +104,7 @@ xdr_writemem(__be32 *p, const void *ptr, int nbytes)
104} while (0) 104} while (0)
105#define RESERVE_SPACE(nbytes) do { \ 105#define RESERVE_SPACE(nbytes) do { \
106 p = xdr_reserve_space(xdr, nbytes); \ 106 p = xdr_reserve_space(xdr, nbytes); \
107 if (!p) dprintk("NFSD: RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __FUNCTION__); \ 107 if (!p) dprintk("NFSD: RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __func__); \
108 BUG_ON(!p); \ 108 BUG_ON(!p); \
109} while (0) 109} while (0)
110 110
@@ -134,7 +134,7 @@ xdr_error: \
134 p = xdr_inline_decode(xdr, nbytes); \ 134 p = xdr_inline_decode(xdr, nbytes); \
135 if (!p) { \ 135 if (!p) { \
136 dprintk("NFSD: %s: reply buffer overflowed in line %d.\n", \ 136 dprintk("NFSD: %s: reply buffer overflowed in line %d.\n", \
137 __FUNCTION__, __LINE__); \ 137 __func__, __LINE__); \
138 return -EIO; \ 138 return -EIO; \
139 } \ 139 } \
140} while (0) 140} while (0)
@@ -344,6 +344,21 @@ static struct rpc_version * nfs_cb_version[] = {
344 &nfs_cb_version4, 344 &nfs_cb_version4,
345}; 345};
346 346
347static struct rpc_program cb_program;
348
349static struct rpc_stat cb_stats = {
350 .program = &cb_program
351};
352
353#define NFS4_CALLBACK 0x40000000
354static struct rpc_program cb_program = {
355 .name = "nfs4_cb",
356 .number = NFS4_CALLBACK,
357 .nrvers = ARRAY_SIZE(nfs_cb_version),
358 .version = nfs_cb_version,
359 .stats = &cb_stats,
360};
361
347/* Reference counting, callback cleanup, etc., all look racy as heck. 362/* Reference counting, callback cleanup, etc., all look racy as heck.
348 * And why is cb_set an atomic? */ 363 * And why is cb_set an atomic? */
349 364
@@ -358,13 +373,12 @@ static int do_probe_callback(void *data)
358 .to_maxval = (NFSD_LEASE_TIME/2) * HZ, 373 .to_maxval = (NFSD_LEASE_TIME/2) * HZ,
359 .to_exponential = 1, 374 .to_exponential = 1,
360 }; 375 };
361 struct rpc_program * program = &cb->cb_program;
362 struct rpc_create_args args = { 376 struct rpc_create_args args = {
363 .protocol = IPPROTO_TCP, 377 .protocol = IPPROTO_TCP,
364 .address = (struct sockaddr *)&addr, 378 .address = (struct sockaddr *)&addr,
365 .addrsize = sizeof(addr), 379 .addrsize = sizeof(addr),
366 .timeout = &timeparms, 380 .timeout = &timeparms,
367 .program = program, 381 .program = &cb_program,
368 .version = nfs_cb_version[1]->number, 382 .version = nfs_cb_version[1]->number,
369 .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ 383 .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */
370 .flags = (RPC_CLNT_CREATE_NOPING), 384 .flags = (RPC_CLNT_CREATE_NOPING),
@@ -382,16 +396,8 @@ static int do_probe_callback(void *data)
382 addr.sin_port = htons(cb->cb_port); 396 addr.sin_port = htons(cb->cb_port);
383 addr.sin_addr.s_addr = htonl(cb->cb_addr); 397 addr.sin_addr.s_addr = htonl(cb->cb_addr);
384 398
385 /* Initialize rpc_program */
386 program->name = "nfs4_cb";
387 program->number = cb->cb_prog;
388 program->nrvers = ARRAY_SIZE(nfs_cb_version);
389 program->version = nfs_cb_version;
390 program->stats = &cb->cb_stat;
391
392 /* Initialize rpc_stat */ 399 /* Initialize rpc_stat */
393 memset(program->stats, 0, sizeof(cb->cb_stat)); 400 memset(args.program->stats, 0, sizeof(struct rpc_stat));
394 program->stats->program = program;
395 401
396 /* Create RPC client */ 402 /* Create RPC client */
397 client = rpc_create(&args); 403 client = rpc_create(&args);
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 996bd88b75ba..5b398421b051 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -202,7 +202,7 @@ static struct cache_detail idtoname_cache = {
202 .alloc = ent_alloc, 202 .alloc = ent_alloc,
203}; 203};
204 204
205int 205static int
206idtoname_parse(struct cache_detail *cd, char *buf, int buflen) 206idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
207{ 207{
208 struct ent ent, *res; 208 struct ent ent, *res;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 81a75f3081f4..8799b8708188 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1639,6 +1639,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
1639 locks_init_lock(&fl); 1639 locks_init_lock(&fl);
1640 fl.fl_lmops = &nfsd_lease_mng_ops; 1640 fl.fl_lmops = &nfsd_lease_mng_ops;
1641 fl.fl_flags = FL_LEASE; 1641 fl.fl_flags = FL_LEASE;
1642 fl.fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
1642 fl.fl_end = OFFSET_MAX; 1643 fl.fl_end = OFFSET_MAX;
1643 fl.fl_owner = (fl_owner_t)dp; 1644 fl.fl_owner = (fl_owner_t)dp;
1644 fl.fl_file = stp->st_vfs_file; 1645 fl.fl_file = stp->st_vfs_file;
@@ -1647,8 +1648,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
1647 /* vfs_setlease checks to see if delegation should be handed out. 1648 /* vfs_setlease checks to see if delegation should be handed out.
1648 * the lock_manager callbacks fl_mylease and fl_change are used 1649 * the lock_manager callbacks fl_mylease and fl_change are used
1649 */ 1650 */
1650 if ((status = vfs_setlease(stp->st_vfs_file, 1651 if ((status = vfs_setlease(stp->st_vfs_file, fl.fl_type, &flp))) {
1651 flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK, &flp))) {
1652 dprintk("NFSD: setlease failed [%d], no delegation\n", status); 1652 dprintk("NFSD: setlease failed [%d], no delegation\n", status);
1653 unhash_delegation(dp); 1653 unhash_delegation(dp);
1654 flag = NFS4_OPEN_DELEGATE_NONE; 1654 flag = NFS4_OPEN_DELEGATE_NONE;
@@ -1763,10 +1763,6 @@ out:
1763 return status; 1763 return status;
1764} 1764}
1765 1765
1766static struct workqueue_struct *laundry_wq;
1767static void laundromat_main(struct work_struct *);
1768static DECLARE_DELAYED_WORK(laundromat_work, laundromat_main);
1769
1770__be32 1766__be32
1771nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 1767nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1772 clientid_t *clid) 1768 clientid_t *clid)
@@ -1874,7 +1870,11 @@ nfs4_laundromat(void)
1874 return clientid_val; 1870 return clientid_val;
1875} 1871}
1876 1872
1877void 1873static struct workqueue_struct *laundry_wq;
1874static void laundromat_main(struct work_struct *);
1875static DECLARE_DELAYED_WORK(laundromat_work, laundromat_main);
1876
1877static void
1878laundromat_main(struct work_struct *not_used) 1878laundromat_main(struct work_struct *not_used)
1879{ 1879{
1880 time_t t; 1880 time_t t;
@@ -1975,6 +1975,26 @@ io_during_grace_disallowed(struct inode *inode, int flags)
1975 && mandatory_lock(inode); 1975 && mandatory_lock(inode);
1976} 1976}
1977 1977
1978static int check_stateid_generation(stateid_t *in, stateid_t *ref)
1979{
1980 /* If the client sends us a stateid from the future, it's buggy: */
1981 if (in->si_generation > ref->si_generation)
1982 return nfserr_bad_stateid;
1983 /*
1984 * The following, however, can happen. For example, if the
1985 * client sends an open and some IO at the same time, the open
1986 * may bump si_generation while the IO is still in flight.
1987 * Thanks to hard links and renames, the client never knows what
1988 * file an open will affect. So it could avoid that situation
1989 * only by serializing all opens and IO from the same open
1990 * owner. To recover from the old_stateid error, the client
1991 * will just have to retry the IO:
1992 */
1993 if (in->si_generation < ref->si_generation)
1994 return nfserr_old_stateid;
1995 return nfs_ok;
1996}
1997
1978/* 1998/*
1979* Checks for stateid operations 1999* Checks for stateid operations
1980*/ 2000*/
@@ -2023,12 +2043,8 @@ nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int fl
2023 goto out; 2043 goto out;
2024 stidp = &stp->st_stateid; 2044 stidp = &stp->st_stateid;
2025 } 2045 }
2026 if (stateid->si_generation > stidp->si_generation) 2046 status = check_stateid_generation(stateid, stidp);
2027 goto out; 2047 if (status)
2028
2029 /* OLD STATEID */
2030 status = nfserr_old_stateid;
2031 if (stateid->si_generation < stidp->si_generation)
2032 goto out; 2048 goto out;
2033 if (stp) { 2049 if (stp) {
2034 if ((status = nfs4_check_openmode(stp,flags))) 2050 if ((status = nfs4_check_openmode(stp,flags)))
@@ -2036,7 +2052,7 @@ nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int fl
2036 renew_client(stp->st_stateowner->so_client); 2052 renew_client(stp->st_stateowner->so_client);
2037 if (filpp) 2053 if (filpp)
2038 *filpp = stp->st_vfs_file; 2054 *filpp = stp->st_vfs_file;
2039 } else if (dp) { 2055 } else {
2040 if ((status = nfs4_check_delegmode(dp, flags))) 2056 if ((status = nfs4_check_delegmode(dp, flags)))
2041 goto out; 2057 goto out;
2042 renew_client(dp->dl_client); 2058 renew_client(dp->dl_client);
@@ -2065,6 +2081,7 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei
2065{ 2081{
2066 struct nfs4_stateid *stp; 2082 struct nfs4_stateid *stp;
2067 struct nfs4_stateowner *sop; 2083 struct nfs4_stateowner *sop;
2084 __be32 status;
2068 2085
2069 dprintk("NFSD: preprocess_seqid_op: seqid=%d " 2086 dprintk("NFSD: preprocess_seqid_op: seqid=%d "
2070 "stateid = (%08x/%08x/%08x/%08x)\n", seqid, 2087 "stateid = (%08x/%08x/%08x/%08x)\n", seqid,
@@ -2127,7 +2144,7 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei
2127 } 2144 }
2128 } 2145 }
2129 2146
2130 if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) { 2147 if (nfs4_check_fh(current_fh, stp)) {
2131 dprintk("NFSD: preprocess_seqid_op: fh-stateid mismatch!\n"); 2148 dprintk("NFSD: preprocess_seqid_op: fh-stateid mismatch!\n");
2132 return nfserr_bad_stateid; 2149 return nfserr_bad_stateid;
2133 } 2150 }
@@ -2150,15 +2167,9 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei
2150 " confirmed yet!\n"); 2167 " confirmed yet!\n");
2151 return nfserr_bad_stateid; 2168 return nfserr_bad_stateid;
2152 } 2169 }
2153 if (stateid->si_generation > stp->st_stateid.si_generation) { 2170 status = check_stateid_generation(stateid, &stp->st_stateid);
2154 dprintk("NFSD: preprocess_seqid_op: future stateid?!\n"); 2171 if (status)
2155 return nfserr_bad_stateid; 2172 return status;
2156 }
2157
2158 if (stateid->si_generation < stp->st_stateid.si_generation) {
2159 dprintk("NFSD: preprocess_seqid_op: old stateid!\n");
2160 return nfserr_old_stateid;
2161 }
2162 renew_client(sop->so_client); 2173 renew_client(sop->so_client);
2163 return nfs_ok; 2174 return nfs_ok;
2164 2175
@@ -2194,7 +2205,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2194 2205
2195 if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, 2206 if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh,
2196 oc->oc_seqid, &oc->oc_req_stateid, 2207 oc->oc_seqid, &oc->oc_req_stateid,
2197 CHECK_FH | CONFIRM | OPEN_STATE, 2208 CONFIRM | OPEN_STATE,
2198 &oc->oc_stateowner, &stp, NULL))) 2209 &oc->oc_stateowner, &stp, NULL)))
2199 goto out; 2210 goto out;
2200 2211
@@ -2265,7 +2276,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
2265 if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, 2276 if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh,
2266 od->od_seqid, 2277 od->od_seqid,
2267 &od->od_stateid, 2278 &od->od_stateid,
2268 CHECK_FH | OPEN_STATE, 2279 OPEN_STATE,
2269 &od->od_stateowner, &stp, NULL))) 2280 &od->od_stateowner, &stp, NULL)))
2270 goto out; 2281 goto out;
2271 2282
@@ -2318,7 +2329,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2318 if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, 2329 if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh,
2319 close->cl_seqid, 2330 close->cl_seqid,
2320 &close->cl_stateid, 2331 &close->cl_stateid,
2321 CHECK_FH | OPEN_STATE | CLOSE_STATE, 2332 OPEN_STATE | CLOSE_STATE,
2322 &close->cl_stateowner, &stp, NULL))) 2333 &close->cl_stateowner, &stp, NULL)))
2323 goto out; 2334 goto out;
2324 status = nfs_ok; 2335 status = nfs_ok;
@@ -2623,7 +2634,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2623 status = nfs4_preprocess_seqid_op(&cstate->current_fh, 2634 status = nfs4_preprocess_seqid_op(&cstate->current_fh,
2624 lock->lk_new_open_seqid, 2635 lock->lk_new_open_seqid,
2625 &lock->lk_new_open_stateid, 2636 &lock->lk_new_open_stateid,
2626 CHECK_FH | OPEN_STATE, 2637 OPEN_STATE,
2627 &lock->lk_replay_owner, &open_stp, 2638 &lock->lk_replay_owner, &open_stp,
2628 lock); 2639 lock);
2629 if (status) 2640 if (status)
@@ -2650,7 +2661,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2650 status = nfs4_preprocess_seqid_op(&cstate->current_fh, 2661 status = nfs4_preprocess_seqid_op(&cstate->current_fh,
2651 lock->lk_old_lock_seqid, 2662 lock->lk_old_lock_seqid,
2652 &lock->lk_old_lock_stateid, 2663 &lock->lk_old_lock_stateid,
2653 CHECK_FH | LOCK_STATE, 2664 LOCK_STATE,
2654 &lock->lk_replay_owner, &lock_stp, lock); 2665 &lock->lk_replay_owner, &lock_stp, lock);
2655 if (status) 2666 if (status)
2656 goto out; 2667 goto out;
@@ -2701,9 +2712,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2701 * Note: locks.c uses the BKL to protect the inode's lock list. 2712 * Note: locks.c uses the BKL to protect the inode's lock list.
2702 */ 2713 */
2703 2714
2704 /* XXX?: Just to divert the locks_release_private at the start of
2705 * locks_copy_lock: */
2706 locks_init_lock(&conflock);
2707 err = vfs_lock_file(filp, cmd, &file_lock, &conflock); 2715 err = vfs_lock_file(filp, cmd, &file_lock, &conflock);
2708 switch (-err) { 2716 switch (-err) {
2709 case 0: /* success! */ 2717 case 0: /* success! */
@@ -2847,7 +2855,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2847 if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, 2855 if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh,
2848 locku->lu_seqid, 2856 locku->lu_seqid,
2849 &locku->lu_stateid, 2857 &locku->lu_stateid,
2850 CHECK_FH | LOCK_STATE, 2858 LOCK_STATE,
2851 &locku->lu_stateowner, &stp, NULL))) 2859 &locku->lu_stateowner, &stp, NULL)))
2852 goto out; 2860 goto out;
2853 2861
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 0e6a179eccaf..c513bbdf2d36 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -376,20 +376,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *ia
376 goto xdr_error; 376 goto xdr_error;
377 } 377 }
378 } 378 }
379 if (bmval[1] & FATTR4_WORD1_TIME_METADATA) {
380 /* We require the high 32 bits of 'seconds' to be 0, and we ignore
381 all 32 bits of 'nseconds'. */
382 READ_BUF(12);
383 len += 12;
384 READ32(dummy32);
385 if (dummy32)
386 return nfserr_inval;
387 READ32(iattr->ia_ctime.tv_sec);
388 READ32(iattr->ia_ctime.tv_nsec);
389 if (iattr->ia_ctime.tv_nsec >= (u32)1000000000)
390 return nfserr_inval;
391 iattr->ia_valid |= ATTR_CTIME;
392 }
393 if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) { 379 if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) {
394 READ_BUF(4); 380 READ_BUF(4);
395 len += 4; 381 len += 4;
@@ -1867,6 +1853,15 @@ out_serverfault:
1867 goto out; 1853 goto out;
1868} 1854}
1869 1855
1856static inline int attributes_need_mount(u32 *bmval)
1857{
1858 if (bmval[0] & ~(FATTR4_WORD0_RDATTR_ERROR | FATTR4_WORD0_LEASE_TIME))
1859 return 1;
1860 if (bmval[1] & ~FATTR4_WORD1_MOUNTED_ON_FILEID)
1861 return 1;
1862 return 0;
1863}
1864
1870static __be32 1865static __be32
1871nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, 1866nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
1872 const char *name, int namlen, __be32 *p, int *buflen) 1867 const char *name, int namlen, __be32 *p, int *buflen)
@@ -1888,9 +1883,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
1888 * we will not follow the cross mount and will fill the attribtutes 1883 * we will not follow the cross mount and will fill the attribtutes
1889 * directly from the mountpoint dentry. 1884 * directly from the mountpoint dentry.
1890 */ 1885 */
1891 if (d_mountpoint(dentry) && 1886 if (d_mountpoint(dentry) && !attributes_need_mount(cd->rd_bmval))
1892 (cd->rd_bmval[0] & ~FATTR4_WORD0_RDATTR_ERROR) == 0 &&
1893 (cd->rd_bmval[1] & ~FATTR4_WORD1_MOUNTED_ON_FILEID) == 0)
1894 ignore_crossmnt = 1; 1887 ignore_crossmnt = 1;
1895 else if (d_mountpoint(dentry)) { 1888 else if (d_mountpoint(dentry)) {
1896 int err; 1889 int err;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 8516137cdbb0..5ac00c4fee91 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -22,6 +22,7 @@
22#include <linux/seq_file.h> 22#include <linux/seq_file.h>
23#include <linux/pagemap.h> 23#include <linux/pagemap.h>
24#include <linux/init.h> 24#include <linux/init.h>
25#include <linux/inet.h>
25#include <linux/string.h> 26#include <linux/string.h>
26#include <linux/smp_lock.h> 27#include <linux/smp_lock.h>
27#include <linux/ctype.h> 28#include <linux/ctype.h>
@@ -35,8 +36,10 @@
35#include <linux/nfsd/cache.h> 36#include <linux/nfsd/cache.h>
36#include <linux/nfsd/xdr.h> 37#include <linux/nfsd/xdr.h>
37#include <linux/nfsd/syscall.h> 38#include <linux/nfsd/syscall.h>
39#include <linux/lockd/lockd.h>
38 40
39#include <asm/uaccess.h> 41#include <asm/uaccess.h>
42#include <net/ipv6.h>
40 43
41/* 44/*
42 * We have a single directory with 9 nodes in it. 45 * We have a single directory with 9 nodes in it.
@@ -52,6 +55,8 @@ enum {
52 NFSD_Getfs, 55 NFSD_Getfs,
53 NFSD_List, 56 NFSD_List,
54 NFSD_Fh, 57 NFSD_Fh,
58 NFSD_FO_UnlockIP,
59 NFSD_FO_UnlockFS,
55 NFSD_Threads, 60 NFSD_Threads,
56 NFSD_Pool_Threads, 61 NFSD_Pool_Threads,
57 NFSD_Versions, 62 NFSD_Versions,
@@ -88,6 +93,9 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
88static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); 93static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
89#endif 94#endif
90 95
96static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size);
97static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size);
98
91static ssize_t (*write_op[])(struct file *, char *, size_t) = { 99static ssize_t (*write_op[])(struct file *, char *, size_t) = {
92 [NFSD_Svc] = write_svc, 100 [NFSD_Svc] = write_svc,
93 [NFSD_Add] = write_add, 101 [NFSD_Add] = write_add,
@@ -97,6 +105,8 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
97 [NFSD_Getfd] = write_getfd, 105 [NFSD_Getfd] = write_getfd,
98 [NFSD_Getfs] = write_getfs, 106 [NFSD_Getfs] = write_getfs,
99 [NFSD_Fh] = write_filehandle, 107 [NFSD_Fh] = write_filehandle,
108 [NFSD_FO_UnlockIP] = failover_unlock_ip,
109 [NFSD_FO_UnlockFS] = failover_unlock_fs,
100 [NFSD_Threads] = write_threads, 110 [NFSD_Threads] = write_threads,
101 [NFSD_Pool_Threads] = write_pool_threads, 111 [NFSD_Pool_Threads] = write_pool_threads,
102 [NFSD_Versions] = write_versions, 112 [NFSD_Versions] = write_versions,
@@ -149,7 +159,6 @@ static const struct file_operations transaction_ops = {
149 .release = simple_transaction_release, 159 .release = simple_transaction_release,
150}; 160};
151 161
152extern struct seq_operations nfs_exports_op;
153static int exports_open(struct inode *inode, struct file *file) 162static int exports_open(struct inode *inode, struct file *file)
154{ 163{
155 return seq_open(file, &nfs_exports_op); 164 return seq_open(file, &nfs_exports_op);
@@ -160,6 +169,7 @@ static const struct file_operations exports_operations = {
160 .read = seq_read, 169 .read = seq_read,
161 .llseek = seq_lseek, 170 .llseek = seq_lseek,
162 .release = seq_release, 171 .release = seq_release,
172 .owner = THIS_MODULE,
163}; 173};
164 174
165/*----------------------------------------------------------------------------*/ 175/*----------------------------------------------------------------------------*/
@@ -222,6 +232,7 @@ static ssize_t write_getfs(struct file *file, char *buf, size_t size)
222 struct auth_domain *clp; 232 struct auth_domain *clp;
223 int err = 0; 233 int err = 0;
224 struct knfsd_fh *res; 234 struct knfsd_fh *res;
235 struct in6_addr in6;
225 236
226 if (size < sizeof(*data)) 237 if (size < sizeof(*data))
227 return -EINVAL; 238 return -EINVAL;
@@ -236,7 +247,11 @@ static ssize_t write_getfs(struct file *file, char *buf, size_t size)
236 res = (struct knfsd_fh*)buf; 247 res = (struct knfsd_fh*)buf;
237 248
238 exp_readlock(); 249 exp_readlock();
239 if (!(clp = auth_unix_lookup(sin->sin_addr))) 250
251 ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6);
252
253 clp = auth_unix_lookup(&in6);
254 if (!clp)
240 err = -EPERM; 255 err = -EPERM;
241 else { 256 else {
242 err = exp_rootfh(clp, data->gd_path, res, data->gd_maxlen); 257 err = exp_rootfh(clp, data->gd_path, res, data->gd_maxlen);
@@ -257,6 +272,7 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size)
257 int err = 0; 272 int err = 0;
258 struct knfsd_fh fh; 273 struct knfsd_fh fh;
259 char *res; 274 char *res;
275 struct in6_addr in6;
260 276
261 if (size < sizeof(*data)) 277 if (size < sizeof(*data))
262 return -EINVAL; 278 return -EINVAL;
@@ -271,7 +287,11 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size)
271 res = buf; 287 res = buf;
272 sin = (struct sockaddr_in *)&data->gd_addr; 288 sin = (struct sockaddr_in *)&data->gd_addr;
273 exp_readlock(); 289 exp_readlock();
274 if (!(clp = auth_unix_lookup(sin->sin_addr))) 290
291 ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6);
292
293 clp = auth_unix_lookup(&in6);
294 if (!clp)
275 err = -EPERM; 295 err = -EPERM;
276 else { 296 else {
277 err = exp_rootfh(clp, data->gd_path, &fh, NFS_FHSIZE); 297 err = exp_rootfh(clp, data->gd_path, &fh, NFS_FHSIZE);
@@ -288,6 +308,58 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size)
288 return err; 308 return err;
289} 309}
290 310
311static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size)
312{
313 __be32 server_ip;
314 char *fo_path, c;
315 int b1, b2, b3, b4;
316
317 /* sanity check */
318 if (size == 0)
319 return -EINVAL;
320
321 if (buf[size-1] != '\n')
322 return -EINVAL;
323
324 fo_path = buf;
325 if (qword_get(&buf, fo_path, size) < 0)
326 return -EINVAL;
327
328 /* get ipv4 address */
329 if (sscanf(fo_path, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) != 4)
330 return -EINVAL;
331 server_ip = htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4);
332
333 return nlmsvc_unlock_all_by_ip(server_ip);
334}
335
336static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size)
337{
338 struct nameidata nd;
339 char *fo_path;
340 int error;
341
342 /* sanity check */
343 if (size == 0)
344 return -EINVAL;
345
346 if (buf[size-1] != '\n')
347 return -EINVAL;
348
349 fo_path = buf;
350 if (qword_get(&buf, fo_path, size) < 0)
351 return -EINVAL;
352
353 error = path_lookup(fo_path, 0, &nd);
354 if (error)
355 return error;
356
357 error = nlmsvc_unlock_all_by_sb(nd.path.mnt->mnt_sb);
358
359 path_put(&nd.path);
360 return error;
361}
362
291static ssize_t write_filehandle(struct file *file, char *buf, size_t size) 363static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
292{ 364{
293 /* request is: 365 /* request is:
@@ -347,8 +419,6 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
347 return mesg - buf; 419 return mesg - buf;
348} 420}
349 421
350extern int nfsd_nrthreads(void);
351
352static ssize_t write_threads(struct file *file, char *buf, size_t size) 422static ssize_t write_threads(struct file *file, char *buf, size_t size)
353{ 423{
354 /* if size > 0, look for a number of threads and call nfsd_svc 424 /* if size > 0, look for a number of threads and call nfsd_svc
@@ -371,10 +441,6 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size)
371 return strlen(buf); 441 return strlen(buf);
372} 442}
373 443
374extern int nfsd_nrpools(void);
375extern int nfsd_get_nrthreads(int n, int *);
376extern int nfsd_set_nrthreads(int n, int *);
377
378static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) 444static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
379{ 445{
380 /* if size > 0, look for an array of number of threads per node 446 /* if size > 0, look for an array of number of threads per node
@@ -696,6 +762,10 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
696 [NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR}, 762 [NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR},
697 [NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR}, 763 [NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR},
698 [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, 764 [NFSD_List] = {"exports", &exports_operations, S_IRUGO},
765 [NFSD_FO_UnlockIP] = {"unlock_ip",
766 &transaction_ops, S_IWUSR|S_IRUSR},
767 [NFSD_FO_UnlockFS] = {"unlock_filesystem",
768 &transaction_ops, S_IWUSR|S_IRUSR},
699 [NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR}, 769 [NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR},
700 [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, 770 [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR},
701 [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR}, 771 [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR},
@@ -732,10 +802,9 @@ static int create_proc_exports_entry(void)
732 entry = proc_mkdir("fs/nfs", NULL); 802 entry = proc_mkdir("fs/nfs", NULL);
733 if (!entry) 803 if (!entry)
734 return -ENOMEM; 804 return -ENOMEM;
735 entry = create_proc_entry("fs/nfs/exports", 0, NULL); 805 entry = proc_create("exports", 0, entry, &exports_operations);
736 if (!entry) 806 if (!entry)
737 return -ENOMEM; 807 return -ENOMEM;
738 entry->proc_fops = &exports_operations;
739 return 0; 808 return 0;
740} 809}
741#else /* CONFIG_PROC_FS */ 810#else /* CONFIG_PROC_FS */
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 3e6b3f41ee1f..100ae5641162 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -113,6 +113,124 @@ static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
113} 113}
114 114
115/* 115/*
116 * Use the given filehandle to look up the corresponding export and
117 * dentry. On success, the results are used to set fh_export and
118 * fh_dentry.
119 */
120static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
121{
122 struct knfsd_fh *fh = &fhp->fh_handle;
123 struct fid *fid = NULL, sfid;
124 struct svc_export *exp;
125 struct dentry *dentry;
126 int fileid_type;
127 int data_left = fh->fh_size/4;
128 __be32 error;
129
130 error = nfserr_stale;
131 if (rqstp->rq_vers > 2)
132 error = nfserr_badhandle;
133 if (rqstp->rq_vers == 4 && fh->fh_size == 0)
134 return nfserr_nofilehandle;
135
136 if (fh->fh_version == 1) {
137 int len;
138
139 if (--data_left < 0)
140 return error;
141 if (fh->fh_auth_type != 0)
142 return error;
143 len = key_len(fh->fh_fsid_type) / 4;
144 if (len == 0)
145 return error;
146 if (fh->fh_fsid_type == FSID_MAJOR_MINOR) {
147 /* deprecated, convert to type 3 */
148 len = key_len(FSID_ENCODE_DEV)/4;
149 fh->fh_fsid_type = FSID_ENCODE_DEV;
150 fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl(fh->fh_fsid[0]), ntohl(fh->fh_fsid[1])));
151 fh->fh_fsid[1] = fh->fh_fsid[2];
152 }
153 data_left -= len;
154 if (data_left < 0)
155 return error;
156 exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_auth);
157 fid = (struct fid *)(fh->fh_auth + len);
158 } else {
159 __u32 tfh[2];
160 dev_t xdev;
161 ino_t xino;
162
163 if (fh->fh_size != NFS_FHSIZE)
164 return error;
165 /* assume old filehandle format */
166 xdev = old_decode_dev(fh->ofh_xdev);
167 xino = u32_to_ino_t(fh->ofh_xino);
168 mk_fsid(FSID_DEV, tfh, xdev, xino, 0, NULL);
169 exp = rqst_exp_find(rqstp, FSID_DEV, tfh);
170 }
171
172 error = nfserr_stale;
173 if (PTR_ERR(exp) == -ENOENT)
174 return error;
175
176 if (IS_ERR(exp))
177 return nfserrno(PTR_ERR(exp));
178
179 error = nfsd_setuser_and_check_port(rqstp, exp);
180 if (error)
181 goto out;
182
183 /*
184 * Look up the dentry using the NFS file handle.
185 */
186 error = nfserr_stale;
187 if (rqstp->rq_vers > 2)
188 error = nfserr_badhandle;
189
190 if (fh->fh_version != 1) {
191 sfid.i32.ino = fh->ofh_ino;
192 sfid.i32.gen = fh->ofh_generation;
193 sfid.i32.parent_ino = fh->ofh_dirino;
194 fid = &sfid;
195 data_left = 3;
196 if (fh->ofh_dirino == 0)
197 fileid_type = FILEID_INO32_GEN;
198 else
199 fileid_type = FILEID_INO32_GEN_PARENT;
200 } else
201 fileid_type = fh->fh_fileid_type;
202
203 if (fileid_type == FILEID_ROOT)
204 dentry = dget(exp->ex_path.dentry);
205 else {
206 dentry = exportfs_decode_fh(exp->ex_path.mnt, fid,
207 data_left, fileid_type,
208 nfsd_acceptable, exp);
209 }
210 if (dentry == NULL)
211 goto out;
212 if (IS_ERR(dentry)) {
213 if (PTR_ERR(dentry) != -EINVAL)
214 error = nfserrno(PTR_ERR(dentry));
215 goto out;
216 }
217
218 if (S_ISDIR(dentry->d_inode->i_mode) &&
219 (dentry->d_flags & DCACHE_DISCONNECTED)) {
220 printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n",
221 dentry->d_parent->d_name.name, dentry->d_name.name);
222 }
223
224 fhp->fh_dentry = dentry;
225 fhp->fh_export = exp;
226 nfsd_nr_verified++;
227 return 0;
228out:
229 exp_put(exp);
230 return error;
231}
232
233/*
116 * Perform sanity checks on the dentry in a client's file handle. 234 * Perform sanity checks on the dentry in a client's file handle.
117 * 235 *
118 * Note that the file handle dentry may need to be freed even after 236 * Note that the file handle dentry may need to be freed even after
@@ -124,115 +242,18 @@ static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
124__be32 242__be32
125fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) 243fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
126{ 244{
127 struct knfsd_fh *fh = &fhp->fh_handle; 245 struct svc_export *exp;
128 struct svc_export *exp = NULL;
129 struct dentry *dentry; 246 struct dentry *dentry;
130 __be32 error = 0; 247 __be32 error;
131 248
132 dprintk("nfsd: fh_verify(%s)\n", SVCFH_fmt(fhp)); 249 dprintk("nfsd: fh_verify(%s)\n", SVCFH_fmt(fhp));
133 250
134 if (!fhp->fh_dentry) { 251 if (!fhp->fh_dentry) {
135 struct fid *fid = NULL, sfid; 252 error = nfsd_set_fh_dentry(rqstp, fhp);
136 int fileid_type;
137 int data_left = fh->fh_size/4;
138
139 error = nfserr_stale;
140 if (rqstp->rq_vers > 2)
141 error = nfserr_badhandle;
142 if (rqstp->rq_vers == 4 && fh->fh_size == 0)
143 return nfserr_nofilehandle;
144
145 if (fh->fh_version == 1) {
146 int len;
147 if (--data_left<0) goto out;
148 switch (fh->fh_auth_type) {
149 case 0: break;
150 default: goto out;
151 }
152 len = key_len(fh->fh_fsid_type) / 4;
153 if (len == 0) goto out;
154 if (fh->fh_fsid_type == FSID_MAJOR_MINOR) {
155 /* deprecated, convert to type 3 */
156 len = key_len(FSID_ENCODE_DEV)/4;
157 fh->fh_fsid_type = FSID_ENCODE_DEV;
158 fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl(fh->fh_fsid[0]), ntohl(fh->fh_fsid[1])));
159 fh->fh_fsid[1] = fh->fh_fsid[2];
160 }
161 if ((data_left -= len)<0) goto out;
162 exp = rqst_exp_find(rqstp, fh->fh_fsid_type,
163 fh->fh_auth);
164 fid = (struct fid *)(fh->fh_auth + len);
165 } else {
166 __u32 tfh[2];
167 dev_t xdev;
168 ino_t xino;
169 if (fh->fh_size != NFS_FHSIZE)
170 goto out;
171 /* assume old filehandle format */
172 xdev = old_decode_dev(fh->ofh_xdev);
173 xino = u32_to_ino_t(fh->ofh_xino);
174 mk_fsid(FSID_DEV, tfh, xdev, xino, 0, NULL);
175 exp = rqst_exp_find(rqstp, FSID_DEV, tfh);
176 }
177
178 error = nfserr_stale;
179 if (PTR_ERR(exp) == -ENOENT)
180 goto out;
181
182 if (IS_ERR(exp)) {
183 error = nfserrno(PTR_ERR(exp));
184 goto out;
185 }
186
187 error = nfsd_setuser_and_check_port(rqstp, exp);
188 if (error) 253 if (error)
189 goto out; 254 goto out;
190 255 dentry = fhp->fh_dentry;
191 /* 256 exp = fhp->fh_export;
192 * Look up the dentry using the NFS file handle.
193 */
194 error = nfserr_stale;
195 if (rqstp->rq_vers > 2)
196 error = nfserr_badhandle;
197
198 if (fh->fh_version != 1) {
199 sfid.i32.ino = fh->ofh_ino;
200 sfid.i32.gen = fh->ofh_generation;
201 sfid.i32.parent_ino = fh->ofh_dirino;
202 fid = &sfid;
203 data_left = 3;
204 if (fh->ofh_dirino == 0)
205 fileid_type = FILEID_INO32_GEN;
206 else
207 fileid_type = FILEID_INO32_GEN_PARENT;
208 } else
209 fileid_type = fh->fh_fileid_type;
210
211 if (fileid_type == FILEID_ROOT)
212 dentry = dget(exp->ex_path.dentry);
213 else {
214 dentry = exportfs_decode_fh(exp->ex_path.mnt, fid,
215 data_left, fileid_type,
216 nfsd_acceptable, exp);
217 }
218 if (dentry == NULL)
219 goto out;
220 if (IS_ERR(dentry)) {
221 if (PTR_ERR(dentry) != -EINVAL)
222 error = nfserrno(PTR_ERR(dentry));
223 goto out;
224 }
225
226 if (S_ISDIR(dentry->d_inode->i_mode) &&
227 (dentry->d_flags & DCACHE_DISCONNECTED)) {
228 printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n",
229 dentry->d_parent->d_name.name, dentry->d_name.name);
230 }
231
232 fhp->fh_dentry = dentry;
233 fhp->fh_export = exp;
234 nfsd_nr_verified++;
235 cache_get(&exp->h);
236 } else { 257 } else {
237 /* 258 /*
238 * just rechecking permissions 259 * just rechecking permissions
@@ -242,7 +263,6 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
242 dprintk("nfsd: fh_verify - just checking\n"); 263 dprintk("nfsd: fh_verify - just checking\n");
243 dentry = fhp->fh_dentry; 264 dentry = fhp->fh_dentry;
244 exp = fhp->fh_export; 265 exp = fhp->fh_export;
245 cache_get(&exp->h);
246 /* 266 /*
247 * Set user creds for this exportpoint; necessary even 267 * Set user creds for this exportpoint; necessary even
248 * in the "just checking" case because this may be a 268 * in the "just checking" case because this may be a
@@ -281,8 +301,6 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
281 access, ntohl(error)); 301 access, ntohl(error));
282 } 302 }
283out: 303out:
284 if (exp && !IS_ERR(exp))
285 exp_put(exp);
286 if (error == nfserr_stale) 304 if (error == nfserr_stale)
287 nfsdstats.fh_stale++; 305 nfsdstats.fh_stale++;
288 return error; 306 return error;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 9647b0f7bc0c..941041f4b136 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -244,7 +244,6 @@ static int nfsd_init_socks(int port)
244 if (error < 0) 244 if (error < 0)
245 return error; 245 return error;
246 246
247#ifdef CONFIG_NFSD_TCP
248 error = lockd_up(IPPROTO_TCP); 247 error = lockd_up(IPPROTO_TCP);
249 if (error >= 0) { 248 if (error >= 0) {
250 error = svc_create_xprt(nfsd_serv, "tcp", port, 249 error = svc_create_xprt(nfsd_serv, "tcp", port,
@@ -254,7 +253,6 @@ static int nfsd_init_socks(int port)
254 } 253 }
255 if (error < 0) 254 if (error < 0)
256 return error; 255 return error;
257#endif
258 return 0; 256 return 0;
259} 257}
260 258
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 304bf5f643c9..a3a291f771f4 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -264,7 +264,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
264 struct inode *inode; 264 struct inode *inode;
265 int accmode = MAY_SATTR; 265 int accmode = MAY_SATTR;
266 int ftype = 0; 266 int ftype = 0;
267 int imode;
268 __be32 err; 267 __be32 err;
269 int host_err; 268 int host_err;
270 int size_change = 0; 269 int size_change = 0;
@@ -360,25 +359,25 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
360 DQUOT_INIT(inode); 359 DQUOT_INIT(inode);
361 } 360 }
362 361
363 imode = inode->i_mode; 362 /* sanitize the mode change */
364 if (iap->ia_valid & ATTR_MODE) { 363 if (iap->ia_valid & ATTR_MODE) {
365 iap->ia_mode &= S_IALLUGO; 364 iap->ia_mode &= S_IALLUGO;
366 imode = iap->ia_mode |= (imode & ~S_IALLUGO); 365 iap->ia_mode |= (inode->i_mode & ~S_IALLUGO);
367 /* if changing uid/gid revoke setuid/setgid in mode */ 366 }
368 if ((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) { 367
369 iap->ia_valid |= ATTR_KILL_PRIV; 368 /* Revoke setuid/setgid on chown */
369 if (((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) ||
370 ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid)) {
371 iap->ia_valid |= ATTR_KILL_PRIV;
372 if (iap->ia_valid & ATTR_MODE) {
373 /* we're setting mode too, just clear the s*id bits */
370 iap->ia_mode &= ~S_ISUID; 374 iap->ia_mode &= ~S_ISUID;
375 if (iap->ia_mode & S_IXGRP)
376 iap->ia_mode &= ~S_ISGID;
377 } else {
378 /* set ATTR_KILL_* bits and let VFS handle it */
379 iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID);
371 } 380 }
372 if ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid)
373 iap->ia_mode &= ~S_ISGID;
374 } else {
375 /*
376 * Revoke setuid/setgid bit on chown/chgrp
377 */
378 if ((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid)
379 iap->ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV;
380 if ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid)
381 iap->ia_valid |= ATTR_KILL_SGID;
382 } 381 }
383 382
384 /* Change the attributes. */ 383 /* Change the attributes. */
@@ -988,7 +987,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
988 * flushing the data to disk is handled separately below. 987 * flushing the data to disk is handled separately below.
989 */ 988 */
990 989
991 if (file->f_op->fsync == 0) {/* COMMIT3 cannot work */ 990 if (!file->f_op->fsync) {/* COMMIT3 cannot work */
992 stable = 2; 991 stable = 2;
993 *stablep = 2; /* FILE_SYNC */ 992 *stablep = 2; /* FILE_SYNC */
994 } 993 }
@@ -1152,7 +1151,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
1152} 1151}
1153#endif /* CONFIG_NFSD_V3 */ 1152#endif /* CONFIG_NFSD_V3 */
1154 1153
1155__be32 1154static __be32
1156nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp, 1155nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
1157 struct iattr *iap) 1156 struct iattr *iap)
1158{ 1157{
diff --git a/fs/ntfs/debug.h b/fs/ntfs/debug.h
index 8ac37c33d127..5e6724c1afd1 100644
--- a/fs/ntfs/debug.h
+++ b/fs/ntfs/debug.h
@@ -45,7 +45,7 @@ static void ntfs_debug(const char *f, ...);
45extern void __ntfs_debug (const char *file, int line, const char *function, 45extern void __ntfs_debug (const char *file, int line, const char *function,
46 const char *format, ...) __attribute__ ((format (printf, 4, 5))); 46 const char *format, ...) __attribute__ ((format (printf, 4, 5)));
47#define ntfs_debug(f, a...) \ 47#define ntfs_debug(f, a...) \
48 __ntfs_debug(__FILE__, __LINE__, __FUNCTION__, f, ##a) 48 __ntfs_debug(__FILE__, __LINE__, __func__, f, ##a)
49 49
50extern void ntfs_debug_dump_runlist(const runlist_element *rl); 50extern void ntfs_debug_dump_runlist(const runlist_element *rl);
51 51
@@ -58,10 +58,10 @@ extern void ntfs_debug_dump_runlist(const runlist_element *rl);
58 58
59extern void __ntfs_warning(const char *function, const struct super_block *sb, 59extern void __ntfs_warning(const char *function, const struct super_block *sb,
60 const char *fmt, ...) __attribute__ ((format (printf, 3, 4))); 60 const char *fmt, ...) __attribute__ ((format (printf, 3, 4)));
61#define ntfs_warning(sb, f, a...) __ntfs_warning(__FUNCTION__, sb, f, ##a) 61#define ntfs_warning(sb, f, a...) __ntfs_warning(__func__, sb, f, ##a)
62 62
63extern void __ntfs_error(const char *function, const struct super_block *sb, 63extern void __ntfs_error(const char *function, const struct super_block *sb,
64 const char *fmt, ...) __attribute__ ((format (printf, 3, 4))); 64 const char *fmt, ...) __attribute__ ((format (printf, 3, 4)));
65#define ntfs_error(sb, f, a...) __ntfs_error(__FUNCTION__, sb, f, ##a) 65#define ntfs_error(sb, f, a...) __ntfs_error(__func__, sb, f, ##a)
66 66
67#endif /* _LINUX_NTFS_DEBUG_H */ 67#endif /* _LINUX_NTFS_DEBUG_H */
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 2ad5c8b104b9..790defb847e7 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -1191,7 +1191,7 @@ static int ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(ntfs_volume *vol,
1191 if (size) { 1191 if (size) {
1192 page = ntfs_map_page(mftbmp_mapping, 1192 page = ntfs_map_page(mftbmp_mapping,
1193 ofs >> PAGE_CACHE_SHIFT); 1193 ofs >> PAGE_CACHE_SHIFT);
1194 if (unlikely(IS_ERR(page))) { 1194 if (IS_ERR(page)) {
1195 ntfs_error(vol->sb, "Failed to read mft " 1195 ntfs_error(vol->sb, "Failed to read mft "
1196 "bitmap, aborting."); 1196 "bitmap, aborting.");
1197 return PTR_ERR(page); 1197 return PTR_ERR(page);
@@ -2118,7 +2118,7 @@ static int ntfs_mft_record_format(const ntfs_volume *vol, const s64 mft_no)
2118 } 2118 }
2119 /* Read, map, and pin the page containing the mft record. */ 2119 /* Read, map, and pin the page containing the mft record. */
2120 page = ntfs_map_page(mft_vi->i_mapping, index); 2120 page = ntfs_map_page(mft_vi->i_mapping, index);
2121 if (unlikely(IS_ERR(page))) { 2121 if (IS_ERR(page)) {
2122 ntfs_error(vol->sb, "Failed to map page containing mft record " 2122 ntfs_error(vol->sb, "Failed to map page containing mft record "
2123 "to format 0x%llx.", (long long)mft_no); 2123 "to format 0x%llx.", (long long)mft_no);
2124 return PTR_ERR(page); 2124 return PTR_ERR(page);
@@ -2519,7 +2519,7 @@ mft_rec_already_initialized:
2519 ofs = (bit << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK; 2519 ofs = (bit << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK;
2520 /* Read, map, and pin the page containing the mft record. */ 2520 /* Read, map, and pin the page containing the mft record. */
2521 page = ntfs_map_page(vol->mft_ino->i_mapping, index); 2521 page = ntfs_map_page(vol->mft_ino->i_mapping, index);
2522 if (unlikely(IS_ERR(page))) { 2522 if (IS_ERR(page)) {
2523 ntfs_error(vol->sb, "Failed to map page containing allocated " 2523 ntfs_error(vol->sb, "Failed to map page containing allocated "
2524 "mft record 0x%llx.", (long long)bit); 2524 "mft record 0x%llx.", (long long)bit);
2525 err = PTR_ERR(page); 2525 err = PTR_ERR(page);
diff --git a/fs/ocfs2/cluster/sys.c b/fs/ocfs2/cluster/sys.c
index 98429fd68499..bc702dab5d1f 100644
--- a/fs/ocfs2/cluster/sys.c
+++ b/fs/ocfs2/cluster/sys.c
@@ -65,7 +65,7 @@ int o2cb_sys_init(void)
65{ 65{
66 int ret; 66 int ret;
67 67
68 o2cb_kset = kset_create_and_add("o2cb", NULL, NULL); 68 o2cb_kset = kset_create_and_add("o2cb", NULL, fs_kobj);
69 if (!o2cb_kset) 69 if (!o2cb_kset)
70 return -ENOMEM; 70 return -ENOMEM;
71 71
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 5f6d858770a2..1b81dcba175d 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -44,7 +44,8 @@
44#define MLOG_MASK_PREFIX ML_DLM 44#define MLOG_MASK_PREFIX ML_DLM
45#include "cluster/masklog.h" 45#include "cluster/masklog.h"
46 46
47int stringify_lockname(const char *lockname, int locklen, char *buf, int len); 47static int stringify_lockname(const char *lockname, int locklen, char *buf,
48 int len);
48 49
49void dlm_print_one_lock_resource(struct dlm_lock_resource *res) 50void dlm_print_one_lock_resource(struct dlm_lock_resource *res)
50{ 51{
@@ -251,7 +252,8 @@ EXPORT_SYMBOL_GPL(dlm_errname);
251 * 252 *
252 * For more on lockname formats, please refer to dlmglue.c and ocfs2_lockid.h. 253 * For more on lockname formats, please refer to dlmglue.c and ocfs2_lockid.h.
253 */ 254 */
254int stringify_lockname(const char *lockname, int locklen, char *buf, int len) 255static int stringify_lockname(const char *lockname, int locklen, char *buf,
256 int len)
255{ 257{
256 int out = 0; 258 int out = 0;
257 __be64 inode_blkno_be; 259 __be64 inode_blkno_be;
@@ -368,7 +370,7 @@ static void dlm_debug_free(struct kref *kref)
368 kfree(dc); 370 kfree(dc);
369} 371}
370 372
371void dlm_debug_put(struct dlm_debug_ctxt *dc) 373static void dlm_debug_put(struct dlm_debug_ctxt *dc)
372{ 374{
373 if (dc) 375 if (dc)
374 kref_put(&dc->debug_refcnt, dlm_debug_free); 376 kref_put(&dc->debug_refcnt, dlm_debug_free);
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 61a000f8524c..e48aba698b77 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -327,7 +327,7 @@ clear_fields:
327 327
328static struct backing_dev_info dlmfs_backing_dev_info = { 328static struct backing_dev_info dlmfs_backing_dev_info = {
329 .ra_pages = 0, /* No readahead */ 329 .ra_pages = 0, /* No readahead */
330 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 330 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
331}; 331};
332 332
333static struct inode *dlmfs_get_root_inode(struct super_block *sb) 333static struct inode *dlmfs_get_root_inode(struct super_block *sb)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 9154c82d3258..57e0d30cde98 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1048,6 +1048,10 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1048 mlog_entry("(0x%p, '%.*s')\n", dentry, 1048 mlog_entry("(0x%p, '%.*s')\n", dentry,
1049 dentry->d_name.len, dentry->d_name.name); 1049 dentry->d_name.len, dentry->d_name.name);
1050 1050
1051 /* ensuring we don't even attempt to truncate a symlink */
1052 if (S_ISLNK(inode->i_mode))
1053 attr->ia_valid &= ~ATTR_SIZE;
1054
1051 if (attr->ia_valid & ATTR_MODE) 1055 if (attr->ia_valid & ATTR_MODE)
1052 mlog(0, "mode change: %d\n", attr->ia_mode); 1056 mlog(0, "mode change: %d\n", attr->ia_mode);
1053 if (attr->ia_valid & ATTR_UID) 1057 if (attr->ia_valid & ATTR_UID)
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index ce0dc147602a..be774bdc8b36 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -260,7 +260,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
260 bh = osb->local_alloc_bh; 260 bh = osb->local_alloc_bh;
261 alloc = (struct ocfs2_dinode *) bh->b_data; 261 alloc = (struct ocfs2_dinode *) bh->b_data;
262 262
263 alloc_copy = kmalloc(bh->b_size, GFP_KERNEL); 263 alloc_copy = kmalloc(bh->b_size, GFP_NOFS);
264 if (!alloc_copy) { 264 if (!alloc_copy) {
265 status = -ENOMEM; 265 status = -ENOMEM;
266 goto out_commit; 266 goto out_commit;
@@ -931,7 +931,7 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
931 * local alloc shutdown won't try to double free main bitmap 931 * local alloc shutdown won't try to double free main bitmap
932 * bits. Make a copy so the sync function knows which bits to 932 * bits. Make a copy so the sync function knows which bits to
933 * free. */ 933 * free. */
934 alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_KERNEL); 934 alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_NOFS);
935 if (!alloc_copy) { 935 if (!alloc_copy) {
936 status = -ENOMEM; 936 status = -ENOMEM;
937 mlog_errno(status); 937 mlog_errno(status);
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index ac1d74c63bf5..bbd1667aa7d3 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -385,7 +385,7 @@ static int o2cb_cluster_this_node(unsigned int *node)
385 return 0; 385 return 0;
386} 386}
387 387
388struct ocfs2_stack_operations o2cb_stack_ops = { 388static struct ocfs2_stack_operations o2cb_stack_ops = {
389 .connect = o2cb_cluster_connect, 389 .connect = o2cb_cluster_connect,
390 .disconnect = o2cb_cluster_disconnect, 390 .disconnect = o2cb_cluster_disconnect,
391 .hangup = o2cb_cluster_hangup, 391 .hangup = o2cb_cluster_hangup,
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 7428663f9cbb..b503772cd0ec 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -635,7 +635,7 @@ static const struct file_operations ocfs2_control_fops = {
635 .owner = THIS_MODULE, 635 .owner = THIS_MODULE,
636}; 636};
637 637
638struct miscdevice ocfs2_control_device = { 638static struct miscdevice ocfs2_control_device = {
639 .minor = MISC_DYNAMIC_MINOR, 639 .minor = MISC_DYNAMIC_MINOR,
640 .name = "ocfs2_control", 640 .name = "ocfs2_control",
641 .fops = &ocfs2_control_fops, 641 .fops = &ocfs2_control_fops,
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index 7134007ba22f..ba9dbb51d25b 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -167,9 +167,11 @@ const struct inode_operations ocfs2_symlink_inode_operations = {
167 .readlink = page_readlink, 167 .readlink = page_readlink,
168 .follow_link = ocfs2_follow_link, 168 .follow_link = ocfs2_follow_link,
169 .getattr = ocfs2_getattr, 169 .getattr = ocfs2_getattr,
170 .setattr = ocfs2_setattr,
170}; 171};
171const struct inode_operations ocfs2_fast_symlink_inode_operations = { 172const struct inode_operations ocfs2_fast_symlink_inode_operations = {
172 .readlink = ocfs2_readlink, 173 .readlink = ocfs2_readlink,
173 .follow_link = ocfs2_follow_link, 174 .follow_link = ocfs2_follow_link,
174 .getattr = ocfs2_getattr, 175 .getattr = ocfs2_getattr,
176 .setattr = ocfs2_setattr,
175}; 177};
diff --git a/fs/open.c b/fs/open.c
index b70e7666bb2c..a1450086e92f 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -7,6 +7,7 @@
7#include <linux/string.h> 7#include <linux/string.h>
8#include <linux/mm.h> 8#include <linux/mm.h>
9#include <linux/file.h> 9#include <linux/file.h>
10#include <linux/fdtable.h>
10#include <linux/quotaops.h> 11#include <linux/quotaops.h>
11#include <linux/fsnotify.h> 12#include <linux/fsnotify.h>
12#include <linux/module.h> 13#include <linux/module.h>
@@ -837,7 +838,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
837 if (f->f_flags & O_DIRECT) { 838 if (f->f_flags & O_DIRECT) {
838 if (!f->f_mapping->a_ops || 839 if (!f->f_mapping->a_ops ||
839 ((!f->f_mapping->a_ops->direct_IO) && 840 ((!f->f_mapping->a_ops->direct_IO) &&
840 (!f->f_mapping->a_ops->get_xip_page))) { 841 (!f->f_mapping->a_ops->get_xip_mem))) {
841 fput(f); 842 fput(f);
842 f = ERR_PTR(-EINVAL); 843 f = ERR_PTR(-EINVAL);
843 } 844 }
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index e7dd1d4e3473..0fdda2e8a4cc 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -41,12 +41,12 @@
41#ifndef CONFIG_LDM_DEBUG 41#ifndef CONFIG_LDM_DEBUG
42#define ldm_debug(...) do {} while (0) 42#define ldm_debug(...) do {} while (0)
43#else 43#else
44#define ldm_debug(f, a...) _ldm_printk (KERN_DEBUG, __FUNCTION__, f, ##a) 44#define ldm_debug(f, a...) _ldm_printk (KERN_DEBUG, __func__, f, ##a)
45#endif 45#endif
46 46
47#define ldm_crit(f, a...) _ldm_printk (KERN_CRIT, __FUNCTION__, f, ##a) 47#define ldm_crit(f, a...) _ldm_printk (KERN_CRIT, __func__, f, ##a)
48#define ldm_error(f, a...) _ldm_printk (KERN_ERR, __FUNCTION__, f, ##a) 48#define ldm_error(f, a...) _ldm_printk (KERN_ERR, __func__, f, ##a)
49#define ldm_info(f, a...) _ldm_printk (KERN_INFO, __FUNCTION__, f, ##a) 49#define ldm_info(f, a...) _ldm_printk (KERN_INFO, __func__, f, ##a)
50 50
51__attribute__ ((format (printf, 3, 4))) 51__attribute__ ((format (printf, 3, 4)))
52static void _ldm_printk (const char *level, const char *function, 52static void _ldm_printk (const char *level, const char *function,
diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c
index 5567ec0d03a3..796511886f28 100644
--- a/fs/partitions/msdos.c
+++ b/fs/partitions/msdos.c
@@ -18,7 +18,7 @@
18 * 18 *
19 * Re-organised Feb 1998 Russell King 19 * Re-organised Feb 1998 Russell King
20 */ 20 */
21 21#include <linux/msdos_fs.h>
22 22
23#include "check.h" 23#include "check.h"
24#include "msdos.h" 24#include "msdos.h"
@@ -419,6 +419,7 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev)
419 Sector sect; 419 Sector sect;
420 unsigned char *data; 420 unsigned char *data;
421 struct partition *p; 421 struct partition *p;
422 struct fat_boot_sector *fb;
422 int slot; 423 int slot;
423 424
424 data = read_dev_sector(bdev, 0, &sect); 425 data = read_dev_sector(bdev, 0, &sect);
@@ -444,8 +445,21 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev)
444 p = (struct partition *) (data + 0x1be); 445 p = (struct partition *) (data + 0x1be);
445 for (slot = 1; slot <= 4; slot++, p++) { 446 for (slot = 1; slot <= 4; slot++, p++) {
446 if (p->boot_ind != 0 && p->boot_ind != 0x80) { 447 if (p->boot_ind != 0 && p->boot_ind != 0x80) {
447 put_dev_sector(sect); 448 /*
448 return 0; 449 * Even without a valid boot inidicator value
450 * its still possible this is valid FAT filesystem
451 * without a partition table.
452 */
453 fb = (struct fat_boot_sector *) data;
454 if (slot == 1 && fb->reserved && fb->fats
455 && fat_valid_media(fb->media)) {
456 printk("\n");
457 put_dev_sector(sect);
458 return 1;
459 } else {
460 put_dev_sector(sect);
461 return 0;
462 }
449 } 463 }
450 } 464 }
451 465
diff --git a/fs/pipe.c b/fs/pipe.c
index f73492b6817e..3499f9ff6316 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1076,6 +1076,23 @@ int do_pipe(int *fd)
1076} 1076}
1077 1077
1078/* 1078/*
1079 * sys_pipe() is the normal C calling standard for creating
1080 * a pipe. It's not the way Unix traditionally does this, though.
1081 */
1082asmlinkage long __weak sys_pipe(int __user *fildes)
1083{
1084 int fd[2];
1085 int error;
1086
1087 error = do_pipe(fd);
1088 if (!error) {
1089 if (copy_to_user(fildes, fd, sizeof(fd)))
1090 error = -EFAULT;
1091 }
1092 return error;
1093}
1094
1095/*
1079 * pipefs should _never_ be mounted by userland - too much of security hassle, 1096 * pipefs should _never_ be mounted by userland - too much of security hassle,
1080 * no real gain from having the whole whorehouse mounted. So we don't need 1097 * no real gain from having the whole whorehouse mounted. So we don't need
1081 * any operations on the root directory. However, we need a non-trivial 1098 * any operations on the root directory. However, we need a non-trivial
diff --git a/fs/pnode.c b/fs/pnode.c
index f968e35d9785..8d5f392ec3d3 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -28,6 +28,57 @@ static inline struct vfsmount *next_slave(struct vfsmount *p)
28 return list_entry(p->mnt_slave.next, struct vfsmount, mnt_slave); 28 return list_entry(p->mnt_slave.next, struct vfsmount, mnt_slave);
29} 29}
30 30
31/*
32 * Return true if path is reachable from root
33 *
34 * namespace_sem is held, and mnt is attached
35 */
36static bool is_path_reachable(struct vfsmount *mnt, struct dentry *dentry,
37 const struct path *root)
38{
39 while (mnt != root->mnt && mnt->mnt_parent != mnt) {
40 dentry = mnt->mnt_mountpoint;
41 mnt = mnt->mnt_parent;
42 }
43 return mnt == root->mnt && is_subdir(dentry, root->dentry);
44}
45
46static struct vfsmount *get_peer_under_root(struct vfsmount *mnt,
47 struct mnt_namespace *ns,
48 const struct path *root)
49{
50 struct vfsmount *m = mnt;
51
52 do {
53 /* Check the namespace first for optimization */
54 if (m->mnt_ns == ns && is_path_reachable(m, m->mnt_root, root))
55 return m;
56
57 m = next_peer(m);
58 } while (m != mnt);
59
60 return NULL;
61}
62
63/*
64 * Get ID of closest dominating peer group having a representative
65 * under the given root.
66 *
67 * Caller must hold namespace_sem
68 */
69int get_dominating_id(struct vfsmount *mnt, const struct path *root)
70{
71 struct vfsmount *m;
72
73 for (m = mnt->mnt_master; m != NULL; m = m->mnt_master) {
74 struct vfsmount *d = get_peer_under_root(m, mnt->mnt_ns, root);
75 if (d)
76 return d->mnt_group_id;
77 }
78
79 return 0;
80}
81
31static int do_make_slave(struct vfsmount *mnt) 82static int do_make_slave(struct vfsmount *mnt)
32{ 83{
33 struct vfsmount *peer_mnt = mnt, *master = mnt->mnt_master; 84 struct vfsmount *peer_mnt = mnt, *master = mnt->mnt_master;
@@ -46,7 +97,11 @@ static int do_make_slave(struct vfsmount *mnt)
46 if (peer_mnt == mnt) 97 if (peer_mnt == mnt)
47 peer_mnt = NULL; 98 peer_mnt = NULL;
48 } 99 }
100 if (IS_MNT_SHARED(mnt) && list_empty(&mnt->mnt_share))
101 mnt_release_group_id(mnt);
102
49 list_del_init(&mnt->mnt_share); 103 list_del_init(&mnt->mnt_share);
104 mnt->mnt_group_id = 0;
50 105
51 if (peer_mnt) 106 if (peer_mnt)
52 master = peer_mnt; 107 master = peer_mnt;
@@ -68,7 +123,6 @@ static int do_make_slave(struct vfsmount *mnt)
68 } 123 }
69 mnt->mnt_master = master; 124 mnt->mnt_master = master;
70 CLEAR_MNT_SHARED(mnt); 125 CLEAR_MNT_SHARED(mnt);
71 INIT_LIST_HEAD(&mnt->mnt_slave_list);
72 return 0; 126 return 0;
73} 127}
74 128
diff --git a/fs/pnode.h b/fs/pnode.h
index 973c3f825e7d..958665d662af 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -36,4 +36,5 @@ int propagate_mnt(struct vfsmount *, struct dentry *, struct vfsmount *,
36int propagate_umount(struct list_head *); 36int propagate_umount(struct list_head *);
37int propagate_mount_busy(struct vfsmount *, int); 37int propagate_mount_busy(struct vfsmount *, int);
38void mnt_release_group_id(struct vfsmount *); 38void mnt_release_group_id(struct vfsmount *);
39int get_dominating_id(struct vfsmount *mnt, const struct path *root);
39#endif /* _LINUX_PNODE_H */ 40#endif /* _LINUX_PNODE_H */
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 07d6c4853fe8..dca997a93bff 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -73,6 +73,7 @@
73#include <linux/signal.h> 73#include <linux/signal.h>
74#include <linux/highmem.h> 74#include <linux/highmem.h>
75#include <linux/file.h> 75#include <linux/file.h>
76#include <linux/fdtable.h>
76#include <linux/times.h> 77#include <linux/times.h>
77#include <linux/cpuset.h> 78#include <linux/cpuset.h>
78#include <linux/rcupdate.h> 79#include <linux/rcupdate.h>
@@ -425,12 +426,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
425 cutime = cstime = utime = stime = cputime_zero; 426 cutime = cstime = utime = stime = cputime_zero;
426 cgtime = gtime = cputime_zero; 427 cgtime = gtime = cputime_zero;
427 428
428 rcu_read_lock();
429 if (lock_task_sighand(task, &flags)) { 429 if (lock_task_sighand(task, &flags)) {
430 struct signal_struct *sig = task->signal; 430 struct signal_struct *sig = task->signal;
431 431
432 if (sig->tty) { 432 if (sig->tty) {
433 tty_pgrp = pid_nr_ns(sig->tty->pgrp, ns); 433 struct pid *pgrp = tty_get_pgrp(sig->tty);
434 tty_pgrp = pid_nr_ns(pgrp, ns);
435 put_pid(pgrp);
434 tty_nr = new_encode_dev(tty_devnum(sig->tty)); 436 tty_nr = new_encode_dev(tty_devnum(sig->tty));
435 } 437 }
436 438
@@ -469,7 +471,6 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
469 471
470 unlock_task_sighand(task, &flags); 472 unlock_task_sighand(task, &flags);
471 } 473 }
472 rcu_read_unlock();
473 474
474 if (!whole || num_threads < 2) 475 if (!whole || num_threads < 2)
475 wchan = get_wchan(task); 476 wchan = get_wchan(task);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 7313c62e3e9d..808cbdc193d3 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -56,6 +56,7 @@
56#include <linux/init.h> 56#include <linux/init.h>
57#include <linux/capability.h> 57#include <linux/capability.h>
58#include <linux/file.h> 58#include <linux/file.h>
59#include <linux/fdtable.h>
59#include <linux/string.h> 60#include <linux/string.h>
60#include <linux/seq_file.h> 61#include <linux/seq_file.h>
61#include <linux/namei.h> 62#include <linux/namei.h>
@@ -195,12 +196,32 @@ static int proc_root_link(struct inode *inode, struct path *path)
195 return result; 196 return result;
196} 197}
197 198
198#define MAY_PTRACE(task) \ 199/*
199 (task == current || \ 200 * Return zero if current may access user memory in @task, -error if not.
200 (task->parent == current && \ 201 */
201 (task->ptrace & PT_PTRACED) && \ 202static int check_mem_permission(struct task_struct *task)
202 (task_is_stopped_or_traced(task)) && \ 203{
203 security_ptrace(current,task) == 0)) 204 /*
205 * A task can always look at itself, in case it chooses
206 * to use system calls instead of load instructions.
207 */
208 if (task == current)
209 return 0;
210
211 /*
212 * If current is actively ptrace'ing, and would also be
213 * permitted to freshly attach with ptrace now, permit it.
214 */
215 if (task->parent == current && (task->ptrace & PT_PTRACED) &&
216 task_is_stopped_or_traced(task) &&
217 ptrace_may_attach(task))
218 return 0;
219
220 /*
221 * Noone else is allowed.
222 */
223 return -EPERM;
224}
204 225
205struct mm_struct *mm_for_maps(struct task_struct *task) 226struct mm_struct *mm_for_maps(struct task_struct *task)
206{ 227{
@@ -502,17 +523,14 @@ static const struct inode_operations proc_def_inode_operations = {
502 .setattr = proc_setattr, 523 .setattr = proc_setattr,
503}; 524};
504 525
505extern const struct seq_operations mounts_op; 526static int mounts_open_common(struct inode *inode, struct file *file,
506struct proc_mounts { 527 const struct seq_operations *op)
507 struct seq_file m;
508 int event;
509};
510
511static int mounts_open(struct inode *inode, struct file *file)
512{ 528{
513 struct task_struct *task = get_proc_task(inode); 529 struct task_struct *task = get_proc_task(inode);
514 struct nsproxy *nsp; 530 struct nsproxy *nsp;
515 struct mnt_namespace *ns = NULL; 531 struct mnt_namespace *ns = NULL;
532 struct fs_struct *fs = NULL;
533 struct path root;
516 struct proc_mounts *p; 534 struct proc_mounts *p;
517 int ret = -EINVAL; 535 int ret = -EINVAL;
518 536
@@ -525,40 +543,61 @@ static int mounts_open(struct inode *inode, struct file *file)
525 get_mnt_ns(ns); 543 get_mnt_ns(ns);
526 } 544 }
527 rcu_read_unlock(); 545 rcu_read_unlock();
528 546 if (ns)
547 fs = get_fs_struct(task);
529 put_task_struct(task); 548 put_task_struct(task);
530 } 549 }
531 550
532 if (ns) { 551 if (!ns)
533 ret = -ENOMEM; 552 goto err;
534 p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL); 553 if (!fs)
535 if (p) { 554 goto err_put_ns;
536 file->private_data = &p->m; 555
537 ret = seq_open(file, &mounts_op); 556 read_lock(&fs->lock);
538 if (!ret) { 557 root = fs->root;
539 p->m.private = ns; 558 path_get(&root);
540 p->event = ns->event; 559 read_unlock(&fs->lock);
541 return 0; 560 put_fs_struct(fs);
542 } 561
543 kfree(p); 562 ret = -ENOMEM;
544 } 563 p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
545 put_mnt_ns(ns); 564 if (!p)
546 } 565 goto err_put_path;
566
567 file->private_data = &p->m;
568 ret = seq_open(file, op);
569 if (ret)
570 goto err_free;
571
572 p->m.private = p;
573 p->ns = ns;
574 p->root = root;
575 p->event = ns->event;
576
577 return 0;
578
579 err_free:
580 kfree(p);
581 err_put_path:
582 path_put(&root);
583 err_put_ns:
584 put_mnt_ns(ns);
585 err:
547 return ret; 586 return ret;
548} 587}
549 588
550static int mounts_release(struct inode *inode, struct file *file) 589static int mounts_release(struct inode *inode, struct file *file)
551{ 590{
552 struct seq_file *m = file->private_data; 591 struct proc_mounts *p = file->private_data;
553 struct mnt_namespace *ns = m->private; 592 path_put(&p->root);
554 put_mnt_ns(ns); 593 put_mnt_ns(p->ns);
555 return seq_release(inode, file); 594 return seq_release(inode, file);
556} 595}
557 596
558static unsigned mounts_poll(struct file *file, poll_table *wait) 597static unsigned mounts_poll(struct file *file, poll_table *wait)
559{ 598{
560 struct proc_mounts *p = file->private_data; 599 struct proc_mounts *p = file->private_data;
561 struct mnt_namespace *ns = p->m.private; 600 struct mnt_namespace *ns = p->ns;
562 unsigned res = 0; 601 unsigned res = 0;
563 602
564 poll_wait(file, &ns->poll, wait); 603 poll_wait(file, &ns->poll, wait);
@@ -573,6 +612,11 @@ static unsigned mounts_poll(struct file *file, poll_table *wait)
573 return res; 612 return res;
574} 613}
575 614
615static int mounts_open(struct inode *inode, struct file *file)
616{
617 return mounts_open_common(inode, file, &mounts_op);
618}
619
576static const struct file_operations proc_mounts_operations = { 620static const struct file_operations proc_mounts_operations = {
577 .open = mounts_open, 621 .open = mounts_open,
578 .read = seq_read, 622 .read = seq_read,
@@ -581,38 +625,22 @@ static const struct file_operations proc_mounts_operations = {
581 .poll = mounts_poll, 625 .poll = mounts_poll,
582}; 626};
583 627
584extern const struct seq_operations mountstats_op; 628static int mountinfo_open(struct inode *inode, struct file *file)
585static int mountstats_open(struct inode *inode, struct file *file)
586{ 629{
587 int ret = seq_open(file, &mountstats_op); 630 return mounts_open_common(inode, file, &mountinfo_op);
588 631}
589 if (!ret) {
590 struct seq_file *m = file->private_data;
591 struct nsproxy *nsp;
592 struct mnt_namespace *mnt_ns = NULL;
593 struct task_struct *task = get_proc_task(inode);
594
595 if (task) {
596 rcu_read_lock();
597 nsp = task_nsproxy(task);
598 if (nsp) {
599 mnt_ns = nsp->mnt_ns;
600 if (mnt_ns)
601 get_mnt_ns(mnt_ns);
602 }
603 rcu_read_unlock();
604 632
605 put_task_struct(task); 633static const struct file_operations proc_mountinfo_operations = {
606 } 634 .open = mountinfo_open,
635 .read = seq_read,
636 .llseek = seq_lseek,
637 .release = mounts_release,
638 .poll = mounts_poll,
639};
607 640
608 if (mnt_ns) 641static int mountstats_open(struct inode *inode, struct file *file)
609 m->private = mnt_ns; 642{
610 else { 643 return mounts_open_common(inode, file, &mountstats_op);
611 seq_release(inode, file);
612 ret = -EINVAL;
613 }
614 }
615 return ret;
616} 644}
617 645
618static const struct file_operations proc_mountstats_operations = { 646static const struct file_operations proc_mountstats_operations = {
@@ -715,7 +743,7 @@ static ssize_t mem_read(struct file * file, char __user * buf,
715 if (!task) 743 if (!task)
716 goto out_no_task; 744 goto out_no_task;
717 745
718 if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) 746 if (check_mem_permission(task))
719 goto out; 747 goto out;
720 748
721 ret = -ENOMEM; 749 ret = -ENOMEM;
@@ -741,7 +769,7 @@ static ssize_t mem_read(struct file * file, char __user * buf,
741 769
742 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 770 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
743 retval = access_process_vm(task, src, page, this_len, 0); 771 retval = access_process_vm(task, src, page, this_len, 0);
744 if (!retval || !MAY_PTRACE(task) || !ptrace_may_attach(task)) { 772 if (!retval || check_mem_permission(task)) {
745 if (!ret) 773 if (!ret)
746 ret = -EIO; 774 ret = -EIO;
747 break; 775 break;
@@ -785,7 +813,7 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
785 if (!task) 813 if (!task)
786 goto out_no_task; 814 goto out_no_task;
787 815
788 if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) 816 if (check_mem_permission(task))
789 goto out; 817 goto out;
790 818
791 copied = -ENOMEM; 819 copied = -ENOMEM;
@@ -1174,6 +1202,81 @@ static const struct file_operations proc_pid_sched_operations = {
1174 1202
1175#endif 1203#endif
1176 1204
1205/*
1206 * We added or removed a vma mapping the executable. The vmas are only mapped
1207 * during exec and are not mapped with the mmap system call.
1208 * Callers must hold down_write() on the mm's mmap_sem for these
1209 */
1210void added_exe_file_vma(struct mm_struct *mm)
1211{
1212 mm->num_exe_file_vmas++;
1213}
1214
1215void removed_exe_file_vma(struct mm_struct *mm)
1216{
1217 mm->num_exe_file_vmas--;
1218 if ((mm->num_exe_file_vmas == 0) && mm->exe_file){
1219 fput(mm->exe_file);
1220 mm->exe_file = NULL;
1221 }
1222
1223}
1224
1225void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
1226{
1227 if (new_exe_file)
1228 get_file(new_exe_file);
1229 if (mm->exe_file)
1230 fput(mm->exe_file);
1231 mm->exe_file = new_exe_file;
1232 mm->num_exe_file_vmas = 0;
1233}
1234
1235struct file *get_mm_exe_file(struct mm_struct *mm)
1236{
1237 struct file *exe_file;
1238
1239 /* We need mmap_sem to protect against races with removal of
1240 * VM_EXECUTABLE vmas */
1241 down_read(&mm->mmap_sem);
1242 exe_file = mm->exe_file;
1243 if (exe_file)
1244 get_file(exe_file);
1245 up_read(&mm->mmap_sem);
1246 return exe_file;
1247}
1248
1249void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm)
1250{
1251 /* It's safe to write the exe_file pointer without exe_file_lock because
1252 * this is called during fork when the task is not yet in /proc */
1253 newmm->exe_file = get_mm_exe_file(oldmm);
1254}
1255
1256static int proc_exe_link(struct inode *inode, struct path *exe_path)
1257{
1258 struct task_struct *task;
1259 struct mm_struct *mm;
1260 struct file *exe_file;
1261
1262 task = get_proc_task(inode);
1263 if (!task)
1264 return -ENOENT;
1265 mm = get_task_mm(task);
1266 put_task_struct(task);
1267 if (!mm)
1268 return -ENOENT;
1269 exe_file = get_mm_exe_file(mm);
1270 mmput(mm);
1271 if (exe_file) {
1272 *exe_path = exe_file->f_path;
1273 path_get(&exe_file->f_path);
1274 fput(exe_file);
1275 return 0;
1276 } else
1277 return -ENOENT;
1278}
1279
1177static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) 1280static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
1178{ 1281{
1179 struct inode *inode = dentry->d_inode; 1282 struct inode *inode = dentry->d_inode;
@@ -2309,6 +2412,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2309 LNK("root", root), 2412 LNK("root", root),
2310 LNK("exe", exe), 2413 LNK("exe", exe),
2311 REG("mounts", S_IRUGO, mounts), 2414 REG("mounts", S_IRUGO, mounts),
2415 REG("mountinfo", S_IRUGO, mountinfo),
2312 REG("mountstats", S_IRUSR, mountstats), 2416 REG("mountstats", S_IRUSR, mountstats),
2313#ifdef CONFIG_PROC_PAGE_MONITOR 2417#ifdef CONFIG_PROC_PAGE_MONITOR
2314 REG("clear_refs", S_IWUSR, clear_refs), 2418 REG("clear_refs", S_IWUSR, clear_refs),
@@ -2641,6 +2745,7 @@ static const struct pid_entry tid_base_stuff[] = {
2641 LNK("root", root), 2745 LNK("root", root),
2642 LNK("exe", exe), 2746 LNK("exe", exe),
2643 REG("mounts", S_IRUGO, mounts), 2747 REG("mounts", S_IRUGO, mounts),
2748 REG("mountinfo", S_IRUGO, mountinfo),
2644#ifdef CONFIG_PROC_PAGE_MONITOR 2749#ifdef CONFIG_PROC_PAGE_MONITOR
2645 REG("clear_refs", S_IWUSR, clear_refs), 2750 REG("clear_refs", S_IWUSR, clear_refs),
2646 REG("smaps", S_IRUGO, smaps), 2751 REG("smaps", S_IRUGO, smaps),
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index a36ad3c75cf4..43e54e86cefd 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -69,12 +69,7 @@ proc_file_read(struct file *file, char __user *buf, size_t nbytes,
69 count = min_t(size_t, PROC_BLOCK_SIZE, nbytes); 69 count = min_t(size_t, PROC_BLOCK_SIZE, nbytes);
70 70
71 start = NULL; 71 start = NULL;
72 if (dp->get_info) { 72 if (dp->read_proc) {
73 /* Handle old net routines */
74 n = dp->get_info(page, &start, *ppos, count);
75 if (n < count)
76 eof = 1;
77 } else if (dp->read_proc) {
78 /* 73 /*
79 * How to be a proc read function 74 * How to be a proc read function
80 * ------------------------------ 75 * ------------------------------
@@ -277,8 +272,11 @@ static int xlate_proc_name(const char *name,
277 int len; 272 int len;
278 int rtn = 0; 273 int rtn = 0;
279 274
275 de = *ret;
276 if (!de)
277 de = &proc_root;
278
280 spin_lock(&proc_subdir_lock); 279 spin_lock(&proc_subdir_lock);
281 de = &proc_root;
282 while (1) { 280 while (1) {
283 next = strchr(cp, '/'); 281 next = strchr(cp, '/');
284 if (!next) 282 if (!next)
@@ -385,20 +383,18 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
385 383
386 lock_kernel(); 384 lock_kernel();
387 spin_lock(&proc_subdir_lock); 385 spin_lock(&proc_subdir_lock);
388 if (de) { 386 for (de = de->subdir; de ; de = de->next) {
389 for (de = de->subdir; de ; de = de->next) { 387 if (de->namelen != dentry->d_name.len)
390 if (de->namelen != dentry->d_name.len) 388 continue;
391 continue; 389 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
392 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { 390 unsigned int ino;
393 unsigned int ino;
394 391
395 ino = de->low_ino; 392 ino = de->low_ino;
396 de_get(de); 393 de_get(de);
397 spin_unlock(&proc_subdir_lock); 394 spin_unlock(&proc_subdir_lock);
398 error = -EINVAL; 395 error = -EINVAL;
399 inode = proc_get_inode(dir->i_sb, ino, de); 396 inode = proc_get_inode(dir->i_sb, ino, de);
400 goto out_unlock; 397 goto out_unlock;
401 }
402 } 398 }
403 } 399 }
404 spin_unlock(&proc_subdir_lock); 400 spin_unlock(&proc_subdir_lock);
@@ -410,7 +406,8 @@ out_unlock:
410 d_add(dentry, inode); 406 d_add(dentry, inode);
411 return NULL; 407 return NULL;
412 } 408 }
413 de_put(de); 409 if (de)
410 de_put(de);
414 return ERR_PTR(error); 411 return ERR_PTR(error);
415} 412}
416 413
@@ -440,10 +437,6 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
440 lock_kernel(); 437 lock_kernel();
441 438
442 ino = inode->i_ino; 439 ino = inode->i_ino;
443 if (!de) {
444 ret = -EINVAL;
445 goto out;
446 }
447 i = filp->f_pos; 440 i = filp->f_pos;
448 switch (i) { 441 switch (i) {
449 case 0: 442 case 0:
@@ -582,7 +575,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
582 /* make sure name is valid */ 575 /* make sure name is valid */
583 if (!name || !strlen(name)) goto out; 576 if (!name || !strlen(name)) goto out;
584 577
585 if (!(*parent) && xlate_proc_name(name, parent, &fn) != 0) 578 if (xlate_proc_name(name, parent, &fn) != 0)
586 goto out; 579 goto out;
587 580
588 /* At this point there must not be any '/' characters beyond *fn */ 581 /* At this point there must not be any '/' characters beyond *fn */
@@ -648,6 +641,23 @@ struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode,
648 return ent; 641 return ent;
649} 642}
650 643
644struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
645 struct proc_dir_entry *parent)
646{
647 struct proc_dir_entry *ent;
648
649 ent = __proc_create(&parent, name, S_IFDIR | S_IRUGO | S_IXUGO, 2);
650 if (ent) {
651 ent->data = net;
652 if (proc_register(parent, ent) < 0) {
653 kfree(ent);
654 ent = NULL;
655 }
656 }
657 return ent;
658}
659EXPORT_SYMBOL_GPL(proc_net_mkdir);
660
651struct proc_dir_entry *proc_mkdir(const char *name, 661struct proc_dir_entry *proc_mkdir(const char *name,
652 struct proc_dir_entry *parent) 662 struct proc_dir_entry *parent)
653{ 663{
@@ -682,9 +692,10 @@ struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
682 return ent; 692 return ent;
683} 693}
684 694
685struct proc_dir_entry *proc_create(const char *name, mode_t mode, 695struct proc_dir_entry *proc_create_data(const char *name, mode_t mode,
686 struct proc_dir_entry *parent, 696 struct proc_dir_entry *parent,
687 const struct file_operations *proc_fops) 697 const struct file_operations *proc_fops,
698 void *data)
688{ 699{
689 struct proc_dir_entry *pde; 700 struct proc_dir_entry *pde;
690 nlink_t nlink; 701 nlink_t nlink;
@@ -705,6 +716,7 @@ struct proc_dir_entry *proc_create(const char *name, mode_t mode,
705 if (!pde) 716 if (!pde)
706 goto out; 717 goto out;
707 pde->proc_fops = proc_fops; 718 pde->proc_fops = proc_fops;
719 pde->data = data;
708 if (proc_register(parent, pde) < 0) 720 if (proc_register(parent, pde) < 0)
709 goto out_free; 721 goto out_free;
710 return pde; 722 return pde;
@@ -734,55 +746,58 @@ void free_proc_entry(struct proc_dir_entry *de)
734void remove_proc_entry(const char *name, struct proc_dir_entry *parent) 746void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
735{ 747{
736 struct proc_dir_entry **p; 748 struct proc_dir_entry **p;
737 struct proc_dir_entry *de; 749 struct proc_dir_entry *de = NULL;
738 const char *fn = name; 750 const char *fn = name;
739 int len; 751 int len;
740 752
741 if (!parent && xlate_proc_name(name, &parent, &fn) != 0) 753 if (xlate_proc_name(name, &parent, &fn) != 0)
742 goto out; 754 return;
743 len = strlen(fn); 755 len = strlen(fn);
744 756
745 spin_lock(&proc_subdir_lock); 757 spin_lock(&proc_subdir_lock);
746 for (p = &parent->subdir; *p; p=&(*p)->next ) { 758 for (p = &parent->subdir; *p; p=&(*p)->next ) {
747 if (!proc_match(len, fn, *p)) 759 if (proc_match(len, fn, *p)) {
748 continue; 760 de = *p;
749 de = *p; 761 *p = de->next;
750 *p = de->next; 762 de->next = NULL;
751 de->next = NULL; 763 break;
752 764 }
753 spin_lock(&de->pde_unload_lock); 765 }
754 /* 766 spin_unlock(&proc_subdir_lock);
755 * Stop accepting new callers into module. If you're 767 if (!de)
756 * dynamically allocating ->proc_fops, save a pointer somewhere. 768 return;
757 */
758 de->proc_fops = NULL;
759 /* Wait until all existing callers into module are done. */
760 if (de->pde_users > 0) {
761 DECLARE_COMPLETION_ONSTACK(c);
762
763 if (!de->pde_unload_completion)
764 de->pde_unload_completion = &c;
765
766 spin_unlock(&de->pde_unload_lock);
767 spin_unlock(&proc_subdir_lock);
768 769
769 wait_for_completion(de->pde_unload_completion); 770 spin_lock(&de->pde_unload_lock);
771 /*
772 * Stop accepting new callers into module. If you're
773 * dynamically allocating ->proc_fops, save a pointer somewhere.
774 */
775 de->proc_fops = NULL;
776 /* Wait until all existing callers into module are done. */
777 if (de->pde_users > 0) {
778 DECLARE_COMPLETION_ONSTACK(c);
779
780 if (!de->pde_unload_completion)
781 de->pde_unload_completion = &c;
770 782
771 spin_lock(&proc_subdir_lock);
772 goto continue_removing;
773 }
774 spin_unlock(&de->pde_unload_lock); 783 spin_unlock(&de->pde_unload_lock);
775 784
785 wait_for_completion(de->pde_unload_completion);
786
787 goto continue_removing;
788 }
789 spin_unlock(&de->pde_unload_lock);
790
776continue_removing: 791continue_removing:
777 if (S_ISDIR(de->mode)) 792 if (S_ISDIR(de->mode))
778 parent->nlink--; 793 parent->nlink--;
779 de->nlink = 0; 794 de->nlink = 0;
780 WARN_ON(de->subdir); 795 if (de->subdir) {
781 if (atomic_dec_and_test(&de->count)) 796 printk(KERN_WARNING "%s: removing non-empty directory "
782 free_proc_entry(de); 797 "'%s/%s', leaking at least '%s'\n", __func__,
783 break; 798 de->parent->name, de->name, de->subdir->name);
799 WARN_ON(1);
784 } 800 }
785 spin_unlock(&proc_subdir_lock); 801 if (atomic_dec_and_test(&de->count))
786out: 802 free_proc_entry(de);
787 return;
788} 803}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 82b3a1b5a70b..6f4e8dc97da1 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -25,8 +25,7 @@
25 25
26struct proc_dir_entry *de_get(struct proc_dir_entry *de) 26struct proc_dir_entry *de_get(struct proc_dir_entry *de)
27{ 27{
28 if (de) 28 atomic_inc(&de->count);
29 atomic_inc(&de->count);
30 return de; 29 return de;
31} 30}
32 31
@@ -35,18 +34,16 @@ struct proc_dir_entry *de_get(struct proc_dir_entry *de)
35 */ 34 */
36void de_put(struct proc_dir_entry *de) 35void de_put(struct proc_dir_entry *de)
37{ 36{
38 if (de) { 37 lock_kernel();
39 lock_kernel(); 38 if (!atomic_read(&de->count)) {
40 if (!atomic_read(&de->count)) { 39 printk("de_put: entry %s already free!\n", de->name);
41 printk("de_put: entry %s already free!\n", de->name);
42 unlock_kernel();
43 return;
44 }
45
46 if (atomic_dec_and_test(&de->count))
47 free_proc_entry(de);
48 unlock_kernel(); 40 unlock_kernel();
41 return;
49 } 42 }
43
44 if (atomic_dec_and_test(&de->count))
45 free_proc_entry(de);
46 unlock_kernel();
50} 47}
51 48
52/* 49/*
@@ -392,7 +389,7 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
392{ 389{
393 struct inode * inode; 390 struct inode * inode;
394 391
395 if (de != NULL && !try_module_get(de->owner)) 392 if (!try_module_get(de->owner))
396 goto out_mod; 393 goto out_mod;
397 394
398 inode = iget_locked(sb, ino); 395 inode = iget_locked(sb, ino);
@@ -402,30 +399,29 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
402 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 399 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
403 PROC_I(inode)->fd = 0; 400 PROC_I(inode)->fd = 0;
404 PROC_I(inode)->pde = de; 401 PROC_I(inode)->pde = de;
405 if (de) { 402
406 if (de->mode) { 403 if (de->mode) {
407 inode->i_mode = de->mode; 404 inode->i_mode = de->mode;
408 inode->i_uid = de->uid; 405 inode->i_uid = de->uid;
409 inode->i_gid = de->gid; 406 inode->i_gid = de->gid;
410 } 407 }
411 if (de->size) 408 if (de->size)
412 inode->i_size = de->size; 409 inode->i_size = de->size;
413 if (de->nlink) 410 if (de->nlink)
414 inode->i_nlink = de->nlink; 411 inode->i_nlink = de->nlink;
415 if (de->proc_iops) 412 if (de->proc_iops)
416 inode->i_op = de->proc_iops; 413 inode->i_op = de->proc_iops;
417 if (de->proc_fops) { 414 if (de->proc_fops) {
418 if (S_ISREG(inode->i_mode)) { 415 if (S_ISREG(inode->i_mode)) {
419#ifdef CONFIG_COMPAT 416#ifdef CONFIG_COMPAT
420 if (!de->proc_fops->compat_ioctl) 417 if (!de->proc_fops->compat_ioctl)
421 inode->i_fop = 418 inode->i_fop =
422 &proc_reg_file_ops_no_compat; 419 &proc_reg_file_ops_no_compat;
423 else 420 else
424#endif 421#endif
425 inode->i_fop = &proc_reg_file_ops; 422 inode->i_fop = &proc_reg_file_ops;
426 } else { 423 } else {
427 inode->i_fop = de->proc_fops; 424 inode->i_fop = de->proc_fops;
428 }
429 } 425 }
430 } 426 }
431 unlock_new_inode(inode); 427 unlock_new_inode(inode);
@@ -433,8 +429,7 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
433 return inode; 429 return inode;
434 430
435out_ino: 431out_ino:
436 if (de != NULL) 432 module_put(de->owner);
437 module_put(de->owner);
438out_mod: 433out_mod:
439 return NULL; 434 return NULL;
440} 435}
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index bc72f5c8c47d..28cbca805905 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -11,6 +11,7 @@
11 11
12#include <linux/proc_fs.h> 12#include <linux/proc_fs.h>
13 13
14extern struct proc_dir_entry proc_root;
14#ifdef CONFIG_PROC_SYSCTL 15#ifdef CONFIG_PROC_SYSCTL
15extern int proc_sys_init(void); 16extern int proc_sys_init(void);
16#else 17#else
@@ -46,9 +47,6 @@ extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *);
46 47
47extern int maps_protect; 48extern int maps_protect;
48 49
49extern void create_seq_entry(char *name, mode_t mode,
50 const struct file_operations *f);
51extern int proc_exe_link(struct inode *, struct path *);
52extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, 50extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns,
53 struct pid *pid, struct task_struct *task); 51 struct pid *pid, struct task_struct *task);
54extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns, 52extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns,
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 941e95114b5a..79ecd281d2cb 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -137,7 +137,7 @@ static const struct file_operations proc_nommu_vma_list_operations = {
137 137
138static int __init proc_nommu_init(void) 138static int __init proc_nommu_init(void)
139{ 139{
140 create_seq_entry("maps", S_IRUGO, &proc_nommu_vma_list_operations); 140 proc_create("maps", S_IRUGO, NULL, &proc_nommu_vma_list_operations);
141 return 0; 141 return 0;
142} 142}
143 143
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 2d563979cb02..74a323d2b850 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -179,6 +179,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
179 "PageTables: %8lu kB\n" 179 "PageTables: %8lu kB\n"
180 "NFS_Unstable: %8lu kB\n" 180 "NFS_Unstable: %8lu kB\n"
181 "Bounce: %8lu kB\n" 181 "Bounce: %8lu kB\n"
182 "WritebackTmp: %8lu kB\n"
182 "CommitLimit: %8lu kB\n" 183 "CommitLimit: %8lu kB\n"
183 "Committed_AS: %8lu kB\n" 184 "Committed_AS: %8lu kB\n"
184 "VmallocTotal: %8lu kB\n" 185 "VmallocTotal: %8lu kB\n"
@@ -210,6 +211,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
210 K(global_page_state(NR_PAGETABLE)), 211 K(global_page_state(NR_PAGETABLE)),
211 K(global_page_state(NR_UNSTABLE_NFS)), 212 K(global_page_state(NR_UNSTABLE_NFS)),
212 K(global_page_state(NR_BOUNCE)), 213 K(global_page_state(NR_BOUNCE)),
214 K(global_page_state(NR_WRITEBACK_TEMP)),
213 K(allowed), 215 K(allowed),
214 K(committed), 216 K(committed),
215 (unsigned long)VMALLOC_TOTAL >> 10, 217 (unsigned long)VMALLOC_TOTAL >> 10,
@@ -456,6 +458,20 @@ static const struct file_operations proc_slabstats_operations = {
456#endif 458#endif
457#endif 459#endif
458 460
461#ifdef CONFIG_MMU
462static int vmalloc_open(struct inode *inode, struct file *file)
463{
464 return seq_open(file, &vmalloc_op);
465}
466
467static const struct file_operations proc_vmalloc_operations = {
468 .open = vmalloc_open,
469 .read = seq_read,
470 .llseek = seq_lseek,
471 .release = seq_release,
472};
473#endif
474
459static int show_stat(struct seq_file *p, void *v) 475static int show_stat(struct seq_file *p, void *v)
460{ 476{
461 int i; 477 int i;
@@ -812,14 +828,6 @@ static struct file_operations proc_kpageflags_operations = {
812 828
813struct proc_dir_entry *proc_root_kcore; 829struct proc_dir_entry *proc_root_kcore;
814 830
815void create_seq_entry(char *name, mode_t mode, const struct file_operations *f)
816{
817 struct proc_dir_entry *entry;
818 entry = create_proc_entry(name, mode, NULL);
819 if (entry)
820 entry->proc_fops = f;
821}
822
823void __init proc_misc_init(void) 831void __init proc_misc_init(void)
824{ 832{
825 static struct { 833 static struct {
@@ -848,63 +856,52 @@ void __init proc_misc_init(void)
848 856
849 /* And now for trickier ones */ 857 /* And now for trickier ones */
850#ifdef CONFIG_PRINTK 858#ifdef CONFIG_PRINTK
851 { 859 proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations);
852 struct proc_dir_entry *entry;
853 entry = create_proc_entry("kmsg", S_IRUSR, &proc_root);
854 if (entry)
855 entry->proc_fops = &proc_kmsg_operations;
856 }
857#endif 860#endif
858 create_seq_entry("locks", 0, &proc_locks_operations); 861 proc_create("locks", 0, NULL, &proc_locks_operations);
859 create_seq_entry("devices", 0, &proc_devinfo_operations); 862 proc_create("devices", 0, NULL, &proc_devinfo_operations);
860 create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations); 863 proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations);
861#ifdef CONFIG_BLOCK 864#ifdef CONFIG_BLOCK
862 create_seq_entry("partitions", 0, &proc_partitions_operations); 865 proc_create("partitions", 0, NULL, &proc_partitions_operations);
863#endif 866#endif
864 create_seq_entry("stat", 0, &proc_stat_operations); 867 proc_create("stat", 0, NULL, &proc_stat_operations);
865 create_seq_entry("interrupts", 0, &proc_interrupts_operations); 868 proc_create("interrupts", 0, NULL, &proc_interrupts_operations);
866#ifdef CONFIG_SLABINFO 869#ifdef CONFIG_SLABINFO
867 create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations); 870 proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations);
868#ifdef CONFIG_DEBUG_SLAB_LEAK 871#ifdef CONFIG_DEBUG_SLAB_LEAK
869 create_seq_entry("slab_allocators", 0 ,&proc_slabstats_operations); 872 proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations);
873#endif
870#endif 874#endif
875#ifdef CONFIG_MMU
876 proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations);
871#endif 877#endif
872 create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations); 878 proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);
873 create_seq_entry("pagetypeinfo", S_IRUGO, &pagetypeinfo_file_ops); 879 proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops);
874 create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations); 880 proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations);
875 create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations); 881 proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations);
876#ifdef CONFIG_BLOCK 882#ifdef CONFIG_BLOCK
877 create_seq_entry("diskstats", 0, &proc_diskstats_operations); 883 proc_create("diskstats", 0, NULL, &proc_diskstats_operations);
878#endif 884#endif
879#ifdef CONFIG_MODULES 885#ifdef CONFIG_MODULES
880 create_seq_entry("modules", 0, &proc_modules_operations); 886 proc_create("modules", 0, NULL, &proc_modules_operations);
881#endif 887#endif
882#ifdef CONFIG_SCHEDSTATS 888#ifdef CONFIG_SCHEDSTATS
883 create_seq_entry("schedstat", 0, &proc_schedstat_operations); 889 proc_create("schedstat", 0, NULL, &proc_schedstat_operations);
884#endif 890#endif
885#ifdef CONFIG_PROC_KCORE 891#ifdef CONFIG_PROC_KCORE
886 proc_root_kcore = create_proc_entry("kcore", S_IRUSR, NULL); 892 proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &proc_kcore_operations);
887 if (proc_root_kcore) { 893 if (proc_root_kcore)
888 proc_root_kcore->proc_fops = &proc_kcore_operations;
889 proc_root_kcore->size = 894 proc_root_kcore->size =
890 (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; 895 (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE;
891 }
892#endif 896#endif
893#ifdef CONFIG_PROC_PAGE_MONITOR 897#ifdef CONFIG_PROC_PAGE_MONITOR
894 create_seq_entry("kpagecount", S_IRUSR, &proc_kpagecount_operations); 898 proc_create("kpagecount", S_IRUSR, NULL, &proc_kpagecount_operations);
895 create_seq_entry("kpageflags", S_IRUSR, &proc_kpageflags_operations); 899 proc_create("kpageflags", S_IRUSR, NULL, &proc_kpageflags_operations);
896#endif 900#endif
897#ifdef CONFIG_PROC_VMCORE 901#ifdef CONFIG_PROC_VMCORE
898 proc_vmcore = create_proc_entry("vmcore", S_IRUSR, NULL); 902 proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations);
899 if (proc_vmcore)
900 proc_vmcore->proc_fops = &proc_vmcore_operations;
901#endif 903#endif
902#ifdef CONFIG_MAGIC_SYSRQ 904#ifdef CONFIG_MAGIC_SYSRQ
903 { 905 proc_create("sysrq-trigger", S_IWUSR, NULL, &proc_sysrq_trigger_operations);
904 struct proc_dir_entry *entry;
905 entry = create_proc_entry("sysrq-trigger", S_IWUSR, NULL);
906 if (entry)
907 entry->proc_fops = &proc_sysrq_trigger_operations;
908 }
909#endif 906#endif
910} 907}
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 13cd7835d0df..83f357b30d71 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -159,17 +159,6 @@ struct net *get_proc_net(const struct inode *inode)
159} 159}
160EXPORT_SYMBOL_GPL(get_proc_net); 160EXPORT_SYMBOL_GPL(get_proc_net);
161 161
162struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
163 struct proc_dir_entry *parent)
164{
165 struct proc_dir_entry *pde;
166 pde = proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent);
167 if (pde != NULL)
168 pde->data = net;
169 return pde;
170}
171EXPORT_SYMBOL_GPL(proc_net_mkdir);
172
173static __net_init int proc_net_ns_init(struct net *net) 162static __net_init int proc_net_ns_init(struct net *net)
174{ 163{
175 struct proc_dir_entry *netd, *net_statd; 164 struct proc_dir_entry *netd, *net_statd;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 614c34b6d1c2..5acc001d49f6 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -165,8 +165,8 @@ out:
165 return err; 165 return err;
166} 166}
167 167
168static ssize_t proc_sys_read(struct file *filp, char __user *buf, 168static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
169 size_t count, loff_t *ppos) 169 size_t count, loff_t *ppos, int write)
170{ 170{
171 struct dentry *dentry = filp->f_dentry; 171 struct dentry *dentry = filp->f_dentry;
172 struct ctl_table_header *head; 172 struct ctl_table_header *head;
@@ -190,12 +190,12 @@ static ssize_t proc_sys_read(struct file *filp, char __user *buf,
190 * and won't be until we finish. 190 * and won't be until we finish.
191 */ 191 */
192 error = -EPERM; 192 error = -EPERM;
193 if (sysctl_perm(table, MAY_READ)) 193 if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ))
194 goto out; 194 goto out;
195 195
196 /* careful: calling conventions are nasty here */ 196 /* careful: calling conventions are nasty here */
197 res = count; 197 res = count;
198 error = table->proc_handler(table, 0, filp, buf, &res, ppos); 198 error = table->proc_handler(table, write, filp, buf, &res, ppos);
199 if (!error) 199 if (!error)
200 error = res; 200 error = res;
201out: 201out:
@@ -204,44 +204,16 @@ out:
204 return error; 204 return error;
205} 205}
206 206
207static ssize_t proc_sys_write(struct file *filp, const char __user *buf, 207static ssize_t proc_sys_read(struct file *filp, char __user *buf,
208 size_t count, loff_t *ppos) 208 size_t count, loff_t *ppos)
209{ 209{
210 struct dentry *dentry = filp->f_dentry; 210 return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 0);
211 struct ctl_table_header *head; 211}
212 struct ctl_table *table;
213 ssize_t error;
214 size_t res;
215
216 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
217 /* Has the sysctl entry disappeared on us? */
218 error = -ENOENT;
219 if (!table)
220 goto out;
221
222 /* Has the sysctl entry been replaced by a directory? */
223 error = -EISDIR;
224 if (!table->proc_handler)
225 goto out;
226
227 /*
228 * At this point we know that the sysctl was not unregistered
229 * and won't be until we finish.
230 */
231 error = -EPERM;
232 if (sysctl_perm(table, MAY_WRITE))
233 goto out;
234
235 /* careful: calling conventions are nasty here */
236 res = count;
237 error = table->proc_handler(table, 1, filp, (char __user *)buf,
238 &res, ppos);
239 if (!error)
240 error = res;
241out:
242 sysctl_head_finish(head);
243 212
244 return error; 213static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
214 size_t count, loff_t *ppos)
215{
216 return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1);
245} 217}
246 218
247 219
@@ -416,7 +388,7 @@ static int proc_sys_permission(struct inode *inode, int mask, struct nameidata *
416 goto out; 388 goto out;
417 389
418 /* Use the permissions on the sysctl table entry */ 390 /* Use the permissions on the sysctl table entry */
419 error = sysctl_perm(table, mask); 391 error = sysctl_perm(head->root, table, mask);
420out: 392out:
421 sysctl_head_finish(head); 393 sysctl_head_finish(head);
422 return error; 394 return error;
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index 49816e00b51a..21f490f5d65c 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -5,7 +5,7 @@
5 */ 5 */
6 6
7#include <asm/uaccess.h> 7#include <asm/uaccess.h>
8 8#include <linux/module.h>
9#include <linux/init.h> 9#include <linux/init.h>
10#include <linux/errno.h> 10#include <linux/errno.h>
11#include <linux/time.h> 11#include <linux/time.h>
@@ -136,39 +136,54 @@ static const struct file_operations proc_tty_drivers_operations = {
136 .release = seq_release, 136 .release = seq_release,
137}; 137};
138 138
139/* 139static void * tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos)
140 * This is the handler for /proc/tty/ldiscs
141 */
142static int tty_ldiscs_read_proc(char *page, char **start, off_t off,
143 int count, int *eof, void *data)
144{ 140{
145 int i; 141 return (*pos < NR_LDISCS) ? pos : NULL;
146 int len = 0; 142}
147 off_t begin = 0; 143
144static void * tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos)
145{
146 (*pos)++;
147 return (*pos < NR_LDISCS) ? pos : NULL;
148}
149
150static void tty_ldiscs_seq_stop(struct seq_file *m, void *v)
151{
152}
153
154static int tty_ldiscs_seq_show(struct seq_file *m, void *v)
155{
156 int i = *(loff_t *)v;
148 struct tty_ldisc *ld; 157 struct tty_ldisc *ld;
149 158
150 for (i=0; i < NR_LDISCS; i++) { 159 ld = tty_ldisc_get(i);
151 ld = tty_ldisc_get(i); 160 if (ld == NULL)
152 if (ld == NULL)
153 continue;
154 len += sprintf(page+len, "%-10s %2d\n",
155 ld->name ? ld->name : "???", i);
156 tty_ldisc_put(i);
157 if (len+begin > off+count)
158 break;
159 if (len+begin < off) {
160 begin += len;
161 len = 0;
162 }
163 }
164 if (i >= NR_LDISCS)
165 *eof = 1;
166 if (off >= len+begin)
167 return 0; 161 return 0;
168 *start = page + (off-begin); 162 seq_printf(m, "%-10s %2d\n", ld->name ? ld->name : "???", i);
169 return ((count < begin+len-off) ? count : begin+len-off); 163 tty_ldisc_put(i);
164 return 0;
165}
166
167static const struct seq_operations tty_ldiscs_seq_ops = {
168 .start = tty_ldiscs_seq_start,
169 .next = tty_ldiscs_seq_next,
170 .stop = tty_ldiscs_seq_stop,
171 .show = tty_ldiscs_seq_show,
172};
173
174static int proc_tty_ldiscs_open(struct inode *inode, struct file *file)
175{
176 return seq_open(file, &tty_ldiscs_seq_ops);
170} 177}
171 178
179static const struct file_operations tty_ldiscs_proc_fops = {
180 .owner = THIS_MODULE,
181 .open = proc_tty_ldiscs_open,
182 .read = seq_read,
183 .llseek = seq_lseek,
184 .release = seq_release,
185};
186
172/* 187/*
173 * This function is called by tty_register_driver() to handle 188 * This function is called by tty_register_driver() to handle
174 * registering the driver's /proc handler into /proc/tty/driver/<foo> 189 * registering the driver's /proc handler into /proc/tty/driver/<foo>
@@ -177,16 +192,14 @@ void proc_tty_register_driver(struct tty_driver *driver)
177{ 192{
178 struct proc_dir_entry *ent; 193 struct proc_dir_entry *ent;
179 194
180 if ((!driver->read_proc && !driver->write_proc) || 195 if (!driver->ops->read_proc || !driver->driver_name ||
181 !driver->driver_name ||
182 driver->proc_entry) 196 driver->proc_entry)
183 return; 197 return;
184 198
185 ent = create_proc_entry(driver->driver_name, 0, proc_tty_driver); 199 ent = create_proc_entry(driver->driver_name, 0, proc_tty_driver);
186 if (!ent) 200 if (!ent)
187 return; 201 return;
188 ent->read_proc = driver->read_proc; 202 ent->read_proc = driver->ops->read_proc;
189 ent->write_proc = driver->write_proc;
190 ent->owner = driver->owner; 203 ent->owner = driver->owner;
191 ent->data = driver; 204 ent->data = driver;
192 205
@@ -214,7 +227,6 @@ void proc_tty_unregister_driver(struct tty_driver *driver)
214 */ 227 */
215void __init proc_tty_init(void) 228void __init proc_tty_init(void)
216{ 229{
217 struct proc_dir_entry *entry;
218 if (!proc_mkdir("tty", NULL)) 230 if (!proc_mkdir("tty", NULL))
219 return; 231 return;
220 proc_tty_ldisc = proc_mkdir("tty/ldisc", NULL); 232 proc_tty_ldisc = proc_mkdir("tty/ldisc", NULL);
@@ -224,10 +236,7 @@ void __init proc_tty_init(void)
224 * password lengths and inter-keystroke timings during password 236 * password lengths and inter-keystroke timings during password
225 * entry. 237 * entry.
226 */ 238 */
227 proc_tty_driver = proc_mkdir_mode("tty/driver", S_IRUSR | S_IXUSR, NULL); 239 proc_tty_driver = proc_mkdir_mode("tty/driver", S_IRUSR|S_IXUSR, NULL);
228 240 proc_create("tty/ldiscs", 0, NULL, &tty_ldiscs_proc_fops);
229 create_proc_read_entry("tty/ldiscs", 0, NULL, tty_ldiscs_read_proc, NULL); 241 proc_create("tty/drivers", 0, NULL, &proc_tty_drivers_operations);
230 entry = create_proc_entry("tty/drivers", 0, NULL);
231 if (entry)
232 entry->proc_fops = &proc_tty_drivers_operations;
233} 242}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index ef0fb57fc9ef..95117538a4f6 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -22,8 +22,6 @@
22 22
23#include "internal.h" 23#include "internal.h"
24 24
25struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver;
26
27static int proc_test_super(struct super_block *sb, void *data) 25static int proc_test_super(struct super_block *sb, void *data)
28{ 26{
29 return sb->s_fs_info == data; 27 return sb->s_fs_info == data;
@@ -126,8 +124,8 @@ void __init proc_root_init(void)
126#ifdef CONFIG_SYSVIPC 124#ifdef CONFIG_SYSVIPC
127 proc_mkdir("sysvipc", NULL); 125 proc_mkdir("sysvipc", NULL);
128#endif 126#endif
129 proc_root_fs = proc_mkdir("fs", NULL); 127 proc_mkdir("fs", NULL);
130 proc_root_driver = proc_mkdir("driver", NULL); 128 proc_mkdir("driver", NULL);
131 proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */ 129 proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */
132#if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE) 130#if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE)
133 /* just give it a mountpoint */ 131 /* just give it a mountpoint */
@@ -137,7 +135,7 @@ void __init proc_root_init(void)
137#ifdef CONFIG_PROC_DEVICETREE 135#ifdef CONFIG_PROC_DEVICETREE
138 proc_device_tree_init(); 136 proc_device_tree_init();
139#endif 137#endif
140 proc_bus = proc_mkdir("bus", NULL); 138 proc_mkdir("bus", NULL);
141 proc_sys_init(); 139 proc_sys_init();
142} 140}
143 141
@@ -232,9 +230,5 @@ void pid_ns_release_proc(struct pid_namespace *ns)
232EXPORT_SYMBOL(proc_symlink); 230EXPORT_SYMBOL(proc_symlink);
233EXPORT_SYMBOL(proc_mkdir); 231EXPORT_SYMBOL(proc_mkdir);
234EXPORT_SYMBOL(create_proc_entry); 232EXPORT_SYMBOL(create_proc_entry);
235EXPORT_SYMBOL(proc_create); 233EXPORT_SYMBOL(proc_create_data);
236EXPORT_SYMBOL(remove_proc_entry); 234EXPORT_SYMBOL(remove_proc_entry);
237EXPORT_SYMBOL(proc_root);
238EXPORT_SYMBOL(proc_root_fs);
239EXPORT_SYMBOL(proc_bus);
240EXPORT_SYMBOL(proc_root_driver);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 9dfb5ff24209..e2b8e769f510 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -75,40 +75,6 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
75 return mm->total_vm; 75 return mm->total_vm;
76} 76}
77 77
78int proc_exe_link(struct inode *inode, struct path *path)
79{
80 struct vm_area_struct * vma;
81 int result = -ENOENT;
82 struct task_struct *task = get_proc_task(inode);
83 struct mm_struct * mm = NULL;
84
85 if (task) {
86 mm = get_task_mm(task);
87 put_task_struct(task);
88 }
89 if (!mm)
90 goto out;
91 down_read(&mm->mmap_sem);
92
93 vma = mm->mmap;
94 while (vma) {
95 if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file)
96 break;
97 vma = vma->vm_next;
98 }
99
100 if (vma) {
101 *path = vma->vm_file->f_path;
102 path_get(&vma->vm_file->f_path);
103 result = 0;
104 }
105
106 up_read(&mm->mmap_sem);
107 mmput(mm);
108out:
109 return result;
110}
111
112static void pad_len_spaces(struct seq_file *m, int len) 78static void pad_len_spaces(struct seq_file *m, int len)
113{ 79{
114 len = 25 + sizeof(void*) * 6 - len; 80 len = 25 + sizeof(void*) * 6 - len;
@@ -338,8 +304,7 @@ const struct file_operations proc_maps_operations = {
338#define PSS_SHIFT 12 304#define PSS_SHIFT 12
339 305
340#ifdef CONFIG_PROC_PAGE_MONITOR 306#ifdef CONFIG_PROC_PAGE_MONITOR
341struct mem_size_stats 307struct mem_size_stats {
342{
343 struct vm_area_struct *vma; 308 struct vm_area_struct *vma;
344 unsigned long resident; 309 unsigned long resident;
345 unsigned long shared_clean; 310 unsigned long shared_clean;
@@ -347,6 +312,7 @@ struct mem_size_stats
347 unsigned long private_clean; 312 unsigned long private_clean;
348 unsigned long private_dirty; 313 unsigned long private_dirty;
349 unsigned long referenced; 314 unsigned long referenced;
315 unsigned long swap;
350 u64 pss; 316 u64 pss;
351}; 317};
352 318
@@ -363,6 +329,12 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
363 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 329 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
364 for (; addr != end; pte++, addr += PAGE_SIZE) { 330 for (; addr != end; pte++, addr += PAGE_SIZE) {
365 ptent = *pte; 331 ptent = *pte;
332
333 if (is_swap_pte(ptent)) {
334 mss->swap += PAGE_SIZE;
335 continue;
336 }
337
366 if (!pte_present(ptent)) 338 if (!pte_present(ptent))
367 continue; 339 continue;
368 340
@@ -421,7 +393,8 @@ static int show_smap(struct seq_file *m, void *v)
421 "Shared_Dirty: %8lu kB\n" 393 "Shared_Dirty: %8lu kB\n"
422 "Private_Clean: %8lu kB\n" 394 "Private_Clean: %8lu kB\n"
423 "Private_Dirty: %8lu kB\n" 395 "Private_Dirty: %8lu kB\n"
424 "Referenced: %8lu kB\n", 396 "Referenced: %8lu kB\n"
397 "Swap: %8lu kB\n",
425 (vma->vm_end - vma->vm_start) >> 10, 398 (vma->vm_end - vma->vm_start) >> 10,
426 mss.resident >> 10, 399 mss.resident >> 10,
427 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)), 400 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)),
@@ -429,7 +402,8 @@ static int show_smap(struct seq_file *m, void *v)
429 mss.shared_dirty >> 10, 402 mss.shared_dirty >> 10,
430 mss.private_clean >> 10, 403 mss.private_clean >> 10,
431 mss.private_dirty >> 10, 404 mss.private_dirty >> 10,
432 mss.referenced >> 10); 405 mss.referenced >> 10,
406 mss.swap >> 10);
433 407
434 return ret; 408 return ret;
435} 409}
@@ -579,7 +553,7 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
579 return err; 553 return err;
580} 554}
581 555
582u64 swap_pte_to_pagemap_entry(pte_t pte) 556static u64 swap_pte_to_pagemap_entry(pte_t pte)
583{ 557{
584 swp_entry_t e = pte_to_swp_entry(pte); 558 swp_entry_t e = pte_to_swp_entry(pte);
585 return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); 559 return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT);
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 8011528518bd..4b4f9cc2f186 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -1,6 +1,7 @@
1 1
2#include <linux/mm.h> 2#include <linux/mm.h>
3#include <linux/file.h> 3#include <linux/file.h>
4#include <linux/fdtable.h>
4#include <linux/mount.h> 5#include <linux/mount.h>
5#include <linux/ptrace.h> 6#include <linux/ptrace.h>
6#include <linux/seq_file.h> 7#include <linux/seq_file.h>
@@ -103,40 +104,6 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
103 return size; 104 return size;
104} 105}
105 106
106int proc_exe_link(struct inode *inode, struct path *path)
107{
108 struct vm_list_struct *vml;
109 struct vm_area_struct *vma;
110 struct task_struct *task = get_proc_task(inode);
111 struct mm_struct *mm = get_task_mm(task);
112 int result = -ENOENT;
113
114 if (!mm)
115 goto out;
116 down_read(&mm->mmap_sem);
117
118 vml = mm->context.vmlist;
119 vma = NULL;
120 while (vml) {
121 if ((vml->vma->vm_flags & VM_EXECUTABLE) && vml->vma->vm_file) {
122 vma = vml->vma;
123 break;
124 }
125 vml = vml->next;
126 }
127
128 if (vma) {
129 *path = vma->vm_file->f_path;
130 path_get(&vma->vm_file->f_path);
131 result = 0;
132 }
133
134 up_read(&mm->mmap_sem);
135 mmput(mm);
136out:
137 return result;
138}
139
140/* 107/*
141 * display mapping lines for a particular process's /proc/pid/maps 108 * display mapping lines for a particular process's /proc/pid/maps
142 */ 109 */
diff --git a/fs/quota.c b/fs/quota.c
index 84f28dd72116..db1cc9f3c7aa 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -69,7 +69,6 @@ static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, qid
69 switch (cmd) { 69 switch (cmd) {
70 case Q_GETFMT: 70 case Q_GETFMT:
71 case Q_GETINFO: 71 case Q_GETINFO:
72 case Q_QUOTAOFF:
73 case Q_SETINFO: 72 case Q_SETINFO:
74 case Q_SETQUOTA: 73 case Q_SETQUOTA:
75 case Q_GETQUOTA: 74 case Q_GETQUOTA:
@@ -229,12 +228,12 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, void
229 228
230 if (IS_ERR(pathname = getname(addr))) 229 if (IS_ERR(pathname = getname(addr)))
231 return PTR_ERR(pathname); 230 return PTR_ERR(pathname);
232 ret = sb->s_qcop->quota_on(sb, type, id, pathname); 231 ret = sb->s_qcop->quota_on(sb, type, id, pathname, 0);
233 putname(pathname); 232 putname(pathname);
234 return ret; 233 return ret;
235 } 234 }
236 case Q_QUOTAOFF: 235 case Q_QUOTAOFF:
237 return sb->s_qcop->quota_off(sb, type); 236 return sb->s_qcop->quota_off(sb, type, 0);
238 237
239 case Q_GETFMT: { 238 case Q_GETFMT: {
240 __u32 fmt; 239 __u32 fmt;
diff --git a/fs/quota_v1.c b/fs/quota_v1.c
index f3841f233069..a6cf9269105c 100644
--- a/fs/quota_v1.c
+++ b/fs/quota_v1.c
@@ -139,6 +139,9 @@ static int v1_read_file_info(struct super_block *sb, int type)
139 goto out; 139 goto out;
140 } 140 }
141 ret = 0; 141 ret = 0;
142 /* limits are stored as unsigned 32-bit data */
143 dqopt->info[type].dqi_maxblimit = 0xffffffff;
144 dqopt->info[type].dqi_maxilimit = 0xffffffff;
142 dqopt->info[type].dqi_igrace = dqblk.dqb_itime ? dqblk.dqb_itime : MAX_IQ_TIME; 145 dqopt->info[type].dqi_igrace = dqblk.dqb_itime ? dqblk.dqb_itime : MAX_IQ_TIME;
143 dqopt->info[type].dqi_bgrace = dqblk.dqb_btime ? dqblk.dqb_btime : MAX_DQ_TIME; 146 dqopt->info[type].dqi_bgrace = dqblk.dqb_btime ? dqblk.dqb_btime : MAX_DQ_TIME;
144out: 147out:
diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index c519a583e681..234ada903633 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -59,6 +59,9 @@ static int v2_read_file_info(struct super_block *sb, int type)
59 sb->s_id); 59 sb->s_id);
60 return -1; 60 return -1;
61 } 61 }
62 /* limits are stored as unsigned 32-bit data */
63 info->dqi_maxblimit = 0xffffffff;
64 info->dqi_maxilimit = 0xffffffff;
62 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); 65 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
63 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace); 66 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
64 info->dqi_flags = le32_to_cpu(dinfo.dqi_flags); 67 info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
@@ -303,7 +306,7 @@ static uint find_free_dqentry(struct dquot *dquot, int *err)
303 printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk); 306 printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk);
304 goto out_buf; 307 goto out_buf;
305 } 308 }
306 dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)+1); 309 le16_add_cpu(&dh->dqdh_entries, 1);
307 memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk)); 310 memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
308 /* Find free structure in block */ 311 /* Find free structure in block */
309 for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++); 312 for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++);
@@ -445,7 +448,7 @@ static int free_dqentry(struct dquot *dquot, uint blk)
445 goto out_buf; 448 goto out_buf;
446 } 449 }
447 dh = (struct v2_disk_dqdbheader *)buf; 450 dh = (struct v2_disk_dqdbheader *)buf;
448 dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)-1); 451 le16_add_cpu(&dh->dqdh_entries, -1);
449 if (!le16_to_cpu(dh->dqdh_entries)) { /* Block got free? */ 452 if (!le16_to_cpu(dh->dqdh_entries)) { /* Block got free? */
450 if ((ret = remove_free_dqentry(sb, type, buf, blk)) < 0 || 453 if ((ret = remove_free_dqentry(sb, type, buf, blk)) < 0 ||
451 (ret = put_free_dqblk(sb, type, buf, blk)) < 0) { 454 (ret = put_free_dqblk(sb, type, buf, blk)) < 0) {
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index b41a514b0976..9590b9024300 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -26,6 +26,9 @@
26 26
27#include <linux/fs.h> 27#include <linux/fs.h>
28#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/ramfs.h>
30
31#include "internal.h"
29 32
30const struct address_space_operations ramfs_aops = { 33const struct address_space_operations ramfs_aops = {
31 .readpage = simple_readpage, 34 .readpage = simple_readpage,
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 8428d5b2711d..b13123424e49 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -44,7 +44,7 @@ static const struct inode_operations ramfs_dir_inode_operations;
44 44
45static struct backing_dev_info ramfs_backing_dev_info = { 45static struct backing_dev_info ramfs_backing_dev_info = {
46 .ra_pages = 0, /* No readahead */ 46 .ra_pages = 0, /* No readahead */
47 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK | 47 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK |
48 BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY | 48 BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY |
49 BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP, 49 BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP,
50}; 50};
diff --git a/fs/ramfs/internal.h b/fs/ramfs/internal.h
index af7cc074a476..6b330639b51d 100644
--- a/fs/ramfs/internal.h
+++ b/fs/ramfs/internal.h
@@ -11,5 +11,4 @@
11 11
12 12
13extern const struct address_space_operations ramfs_aops; 13extern const struct address_space_operations ramfs_aops;
14extern const struct file_operations ramfs_file_operations;
15extern const struct inode_operations ramfs_file_inode_operations; 14extern const struct inode_operations ramfs_file_inode_operations;
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index f491ceb5af02..4646caa60455 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -479,7 +479,7 @@ static void __discard_prealloc(struct reiserfs_transaction_handle *th,
479 if (ei->i_prealloc_count < 0) 479 if (ei->i_prealloc_count < 0)
480 reiserfs_warning(th->t_super, 480 reiserfs_warning(th->t_super,
481 "zam-4001:%s: inode has negative prealloc blocks count.", 481 "zam-4001:%s: inode has negative prealloc blocks count.",
482 __FUNCTION__); 482 __func__);
483#endif 483#endif
484 while (ei->i_prealloc_count > 0) { 484 while (ei->i_prealloc_count > 0) {
485 reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block); 485 reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block);
@@ -517,7 +517,7 @@ void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th)
517 if (!ei->i_prealloc_count) { 517 if (!ei->i_prealloc_count) {
518 reiserfs_warning(th->t_super, 518 reiserfs_warning(th->t_super,
519 "zam-4001:%s: inode is in prealloc list but has no preallocated blocks.", 519 "zam-4001:%s: inode is in prealloc list but has no preallocated blocks.",
520 __FUNCTION__); 520 __func__);
521 } 521 }
522#endif 522#endif
523 __discard_prealloc(th, ei); 523 __discard_prealloc(th, ei);
@@ -632,7 +632,7 @@ int reiserfs_parse_alloc_options(struct super_block *s, char *options)
632 } 632 }
633 633
634 reiserfs_warning(s, "zam-4001: %s : unknown option - %s", 634 reiserfs_warning(s, "zam-4001: %s : unknown option - %s",
635 __FUNCTION__, this_char); 635 __func__, this_char);
636 return 1; 636 return 1;
637 } 637 }
638 638
@@ -1254,7 +1254,7 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
1254 bh = sb_bread(sb, block); 1254 bh = sb_bread(sb, block);
1255 if (bh == NULL) 1255 if (bh == NULL)
1256 reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) " 1256 reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) "
1257 "reading failed", __FUNCTION__, block); 1257 "reading failed", __func__, block);
1258 else { 1258 else {
1259 if (buffer_locked(bh)) { 1259 if (buffer_locked(bh)) {
1260 PROC_INFO_INC(sb, scan_bitmap.wait); 1260 PROC_INFO_INC(sb, scan_bitmap.wait);
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index 7ee4208793b6..2f87f5b14630 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -1464,29 +1464,29 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
1464 } else 1464 } else
1465 /* item falls wholly into S_new[i] */ 1465 /* item falls wholly into S_new[i] */
1466 { 1466 {
1467 int ret_val; 1467 int leaf_mi;
1468 struct item_head *pasted; 1468 struct item_head *pasted;
1469 1469
1470#ifdef CONFIG_REISERFS_CHECK 1470#ifdef CONFIG_REISERFS_CHECK
1471 struct item_head *ih = 1471 struct item_head *ih_check =
1472 B_N_PITEM_HEAD(tbS0, item_pos); 1472 B_N_PITEM_HEAD(tbS0, item_pos);
1473 1473
1474 if (!is_direntry_le_ih(ih) 1474 if (!is_direntry_le_ih(ih_check)
1475 && (pos_in_item != ih_item_len(ih) 1475 && (pos_in_item != ih_item_len(ih_check)
1476 || tb->insert_size[0] <= 0)) 1476 || tb->insert_size[0] <= 0))
1477 reiserfs_panic(tb->tb_sb, 1477 reiserfs_panic(tb->tb_sb,
1478 "PAP-12235: balance_leaf: pos_in_item must be equal to ih_item_len"); 1478 "PAP-12235: balance_leaf: pos_in_item must be equal to ih_item_len");
1479#endif /* CONFIG_REISERFS_CHECK */ 1479#endif /* CONFIG_REISERFS_CHECK */
1480 1480
1481 ret_val = 1481 leaf_mi =
1482 leaf_move_items(LEAF_FROM_S_TO_SNEW, 1482 leaf_move_items(LEAF_FROM_S_TO_SNEW,
1483 tb, snum[i], 1483 tb, snum[i],
1484 sbytes[i], 1484 sbytes[i],
1485 S_new[i]); 1485 S_new[i]);
1486 1486
1487 RFALSE(ret_val, 1487 RFALSE(leaf_mi,
1488 "PAP-12240: unexpected value returned by leaf_move_items (%d)", 1488 "PAP-12240: unexpected value returned by leaf_move_items (%d)",
1489 ret_val); 1489 leaf_mi);
1490 1490
1491 /* paste into item */ 1491 /* paste into item */
1492 bi.tb = tb; 1492 bi.tb = tb;
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 74363a7aacbc..830332021ed4 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -12,8 +12,6 @@
12#include <linux/smp_lock.h> 12#include <linux/smp_lock.h>
13#include <linux/compat.h> 13#include <linux/compat.h>
14 14
15static int reiserfs_unpack(struct inode *inode, struct file *filp);
16
17/* 15/*
18** reiserfs_ioctl - handler for ioctl for inode 16** reiserfs_ioctl - handler for ioctl for inode
19** supported commands: 17** supported commands:
@@ -159,7 +157,7 @@ int reiserfs_prepare_write(struct file *f, struct page *page,
159** Function try to convert tail from direct item into indirect. 157** Function try to convert tail from direct item into indirect.
160** It set up nopack attribute in the REISERFS_I(inode)->nopack 158** It set up nopack attribute in the REISERFS_I(inode)->nopack
161*/ 159*/
162static int reiserfs_unpack(struct inode *inode, struct file *filp) 160int reiserfs_unpack(struct inode *inode, struct file *filp)
163{ 161{
164 int retval = 0; 162 int retval = 0;
165 int index; 163 int index;
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 060eb3f598e7..e396b2fa4743 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1187,7 +1187,7 @@ static int flush_commit_list(struct super_block *s,
1187 1187
1188 if (retval) 1188 if (retval)
1189 reiserfs_abort(s, retval, "Journal write error in %s", 1189 reiserfs_abort(s, retval, "Journal write error in %s",
1190 __FUNCTION__); 1190 __func__);
1191 put_fs_excl(); 1191 put_fs_excl();
1192 return retval; 1192 return retval;
1193} 1193}
@@ -1534,7 +1534,7 @@ static int flush_journal_list(struct super_block *s,
1534 reiserfs_warning(s, 1534 reiserfs_warning(s,
1535 "clm-2082: Unable to flush buffer %llu in %s", 1535 "clm-2082: Unable to flush buffer %llu in %s",
1536 (unsigned long long)saved_bh-> 1536 (unsigned long long)saved_bh->
1537 b_blocknr, __FUNCTION__); 1537 b_blocknr, __func__);
1538 } 1538 }
1539 free_cnode: 1539 free_cnode:
1540 last = cn; 1540 last = cn;
@@ -1586,7 +1586,7 @@ static int flush_journal_list(struct super_block *s,
1586 if (err) 1586 if (err)
1587 reiserfs_abort(s, -EIO, 1587 reiserfs_abort(s, -EIO,
1588 "Write error while pushing transaction to disk in %s", 1588 "Write error while pushing transaction to disk in %s",
1589 __FUNCTION__); 1589 __func__);
1590 flush_older_and_return: 1590 flush_older_and_return:
1591 1591
1592 /* before we can update the journal header block, we _must_ flush all 1592 /* before we can update the journal header block, we _must_ flush all
@@ -1616,7 +1616,7 @@ static int flush_journal_list(struct super_block *s,
1616 if (err) 1616 if (err)
1617 reiserfs_abort(s, -EIO, 1617 reiserfs_abort(s, -EIO,
1618 "Write error while updating journal header in %s", 1618 "Write error while updating journal header in %s",
1619 __FUNCTION__); 1619 __func__);
1620 } 1620 }
1621 remove_all_from_journal_list(s, jl, 0); 1621 remove_all_from_journal_list(s, jl, 0);
1622 list_del_init(&jl->j_list); 1622 list_del_init(&jl->j_list);
@@ -2574,11 +2574,9 @@ static int release_journal_dev(struct super_block *super,
2574 2574
2575 result = 0; 2575 result = 0;
2576 2576
2577 if (journal->j_dev_file != NULL) { 2577 if (journal->j_dev_bd != NULL) {
2578 result = filp_close(journal->j_dev_file, NULL); 2578 if (journal->j_dev_bd->bd_dev != super->s_dev)
2579 journal->j_dev_file = NULL; 2579 bd_release(journal->j_dev_bd);
2580 journal->j_dev_bd = NULL;
2581 } else if (journal->j_dev_bd != NULL) {
2582 result = blkdev_put(journal->j_dev_bd); 2580 result = blkdev_put(journal->j_dev_bd);
2583 journal->j_dev_bd = NULL; 2581 journal->j_dev_bd = NULL;
2584 } 2582 }
@@ -2603,7 +2601,6 @@ static int journal_init_dev(struct super_block *super,
2603 result = 0; 2601 result = 0;
2604 2602
2605 journal->j_dev_bd = NULL; 2603 journal->j_dev_bd = NULL;
2606 journal->j_dev_file = NULL;
2607 jdev = SB_ONDISK_JOURNAL_DEVICE(super) ? 2604 jdev = SB_ONDISK_JOURNAL_DEVICE(super) ?
2608 new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev; 2605 new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev;
2609 2606
@@ -2620,35 +2617,34 @@ static int journal_init_dev(struct super_block *super,
2620 "cannot init journal device '%s': %i", 2617 "cannot init journal device '%s': %i",
2621 __bdevname(jdev, b), result); 2618 __bdevname(jdev, b), result);
2622 return result; 2619 return result;
2623 } else if (jdev != super->s_dev) 2620 } else if (jdev != super->s_dev) {
2621 result = bd_claim(journal->j_dev_bd, journal);
2622 if (result) {
2623 blkdev_put(journal->j_dev_bd);
2624 return result;
2625 }
2626
2624 set_blocksize(journal->j_dev_bd, super->s_blocksize); 2627 set_blocksize(journal->j_dev_bd, super->s_blocksize);
2628 }
2629
2625 return 0; 2630 return 0;
2626 } 2631 }
2627 2632
2628 journal->j_dev_file = filp_open(jdev_name, 0, 0); 2633 journal->j_dev_bd = open_bdev_excl(jdev_name, 0, journal);
2629 if (!IS_ERR(journal->j_dev_file)) { 2634 if (IS_ERR(journal->j_dev_bd)) {
2630 struct inode *jdev_inode = journal->j_dev_file->f_mapping->host; 2635 result = PTR_ERR(journal->j_dev_bd);
2631 if (!S_ISBLK(jdev_inode->i_mode)) { 2636 journal->j_dev_bd = NULL;
2632 reiserfs_warning(super, "journal_init_dev: '%s' is "
2633 "not a block device", jdev_name);
2634 result = -ENOTBLK;
2635 release_journal_dev(super, journal);
2636 } else {
2637 /* ok */
2638 journal->j_dev_bd = I_BDEV(jdev_inode);
2639 set_blocksize(journal->j_dev_bd, super->s_blocksize);
2640 reiserfs_info(super,
2641 "journal_init_dev: journal device: %s\n",
2642 bdevname(journal->j_dev_bd, b));
2643 }
2644 } else {
2645 result = PTR_ERR(journal->j_dev_file);
2646 journal->j_dev_file = NULL;
2647 reiserfs_warning(super, 2637 reiserfs_warning(super,
2648 "journal_init_dev: Cannot open '%s': %i", 2638 "journal_init_dev: Cannot open '%s': %i",
2649 jdev_name, result); 2639 jdev_name, result);
2640 return result;
2650 } 2641 }
2651 return result; 2642
2643 set_blocksize(journal->j_dev_bd, super->s_blocksize);
2644 reiserfs_info(super,
2645 "journal_init_dev: journal device: %s\n",
2646 bdevname(journal->j_dev_bd, b));
2647 return 0;
2652} 2648}
2653 2649
2654/** 2650/**
@@ -4316,5 +4312,5 @@ static void __reiserfs_journal_abort_soft(struct super_block *sb, int errno)
4316 4312
4317void reiserfs_journal_abort(struct super_block *sb, int errno) 4313void reiserfs_journal_abort(struct super_block *sb, int errno)
4318{ 4314{
4319 return __reiserfs_journal_abort_soft(sb, errno); 4315 __reiserfs_journal_abort_soft(sb, errno);
4320} 4316}
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 8867533cb727..c1add28dd45e 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -301,7 +301,7 @@ static int reiserfs_find_entry(struct inode *dir, const char *name, int namelen,
301 path_to_entry, de); 301 path_to_entry, de);
302 if (retval == IO_ERROR) { 302 if (retval == IO_ERROR) {
303 reiserfs_warning(dir->i_sb, "zam-7001: io error in %s", 303 reiserfs_warning(dir->i_sb, "zam-7001: io error in %s",
304 __FUNCTION__); 304 __func__);
305 return IO_ERROR; 305 return IO_ERROR;
306 } 306 }
307 307
@@ -496,7 +496,7 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
496 reiserfs_warning(dir->i_sb, 496 reiserfs_warning(dir->i_sb,
497 "zam-7002:%s: \"reiserfs_find_entry\" " 497 "zam-7002:%s: \"reiserfs_find_entry\" "
498 "has returned unexpected value (%d)", 498 "has returned unexpected value (%d)",
499 __FUNCTION__, retval); 499 __func__, retval);
500 } 500 }
501 501
502 return -EEXIST; 502 return -EEXIST;
@@ -907,7 +907,7 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry)
907 907
908 if (inode->i_nlink != 2 && inode->i_nlink != 1) 908 if (inode->i_nlink != 2 && inode->i_nlink != 1)
909 reiserfs_warning(inode->i_sb, "%s: empty directory has nlink " 909 reiserfs_warning(inode->i_sb, "%s: empty directory has nlink "
910 "!= 2 (%d)", __FUNCTION__, inode->i_nlink); 910 "!= 2 (%d)", __func__, inode->i_nlink);
911 911
912 clear_nlink(inode); 912 clear_nlink(inode);
913 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; 913 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
@@ -984,7 +984,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
984 984
985 if (!inode->i_nlink) { 985 if (!inode->i_nlink) {
986 reiserfs_warning(inode->i_sb, "%s: deleting nonexistent file " 986 reiserfs_warning(inode->i_sb, "%s: deleting nonexistent file "
987 "(%s:%lu), %d", __FUNCTION__, 987 "(%s:%lu), %d", __func__,
988 reiserfs_bdevname(inode->i_sb), inode->i_ino, 988 reiserfs_bdevname(inode->i_sb), inode->i_ino,
989 inode->i_nlink); 989 inode->i_nlink);
990 inode->i_nlink = 1; 990 inode->i_nlink = 1;
diff --git a/fs/reiserfs/objectid.c b/fs/reiserfs/objectid.c
index 65feba4deb69..ea0cf8c28a99 100644
--- a/fs/reiserfs/objectid.c
+++ b/fs/reiserfs/objectid.c
@@ -61,7 +61,7 @@ __u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th)
61 /* comment needed -Hans */ 61 /* comment needed -Hans */
62 unused_objectid = le32_to_cpu(map[1]); 62 unused_objectid = le32_to_cpu(map[1]);
63 if (unused_objectid == U32_MAX) { 63 if (unused_objectid == U32_MAX) {
64 reiserfs_warning(s, "%s: no more object ids", __FUNCTION__); 64 reiserfs_warning(s, "%s: no more object ids", __func__);
65 reiserfs_restore_prepared_buffer(s, SB_BUFFER_WITH_SB(s)); 65 reiserfs_restore_prepared_buffer(s, SB_BUFFER_WITH_SB(s));
66 return 0; 66 return 0;
67 } 67 }
@@ -114,7 +114,7 @@ void reiserfs_release_objectid(struct reiserfs_transaction_handle *th,
114 if (objectid_to_release == le32_to_cpu(map[i])) { 114 if (objectid_to_release == le32_to_cpu(map[i])) {
115 /* This incrementation unallocates the objectid. */ 115 /* This incrementation unallocates the objectid. */
116 //map[i]++; 116 //map[i]++;
117 map[i] = cpu_to_le32(le32_to_cpu(map[i]) + 1); 117 le32_add_cpu(&map[i], 1);
118 118
119 /* Did we unallocate the last member of an odd sequence, and can shrink oids? */ 119 /* Did we unallocate the last member of an odd sequence, and can shrink oids? */
120 if (map[i] == map[i + 1]) { 120 if (map[i] == map[i + 1]) {
@@ -138,8 +138,7 @@ void reiserfs_release_objectid(struct reiserfs_transaction_handle *th,
138 /* size of objectid map is not changed */ 138 /* size of objectid map is not changed */
139 if (objectid_to_release + 1 == le32_to_cpu(map[i + 1])) { 139 if (objectid_to_release + 1 == le32_to_cpu(map[i + 1])) {
140 //objectid_map[i+1]--; 140 //objectid_map[i+1]--;
141 map[i + 1] = 141 le32_add_cpu(&map[i + 1], -1);
142 cpu_to_le32(le32_to_cpu(map[i + 1]) - 1);
143 return; 142 return;
144 } 143 }
145 144
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 8f86c52b30d8..b9dbeeca7049 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -467,6 +467,7 @@ static const struct file_operations r_file_operations = {
467 .read = seq_read, 467 .read = seq_read,
468 .llseek = seq_lseek, 468 .llseek = seq_lseek,
469 .release = seq_release, 469 .release = seq_release,
470 .owner = THIS_MODULE,
470}; 471};
471 472
472static struct proc_dir_entry *proc_info_root = NULL; 473static struct proc_dir_entry *proc_info_root = NULL;
@@ -475,12 +476,8 @@ static const char proc_info_root_name[] = "fs/reiserfs";
475static void add_file(struct super_block *sb, char *name, 476static void add_file(struct super_block *sb, char *name,
476 int (*func) (struct seq_file *, struct super_block *)) 477 int (*func) (struct seq_file *, struct super_block *))
477{ 478{
478 struct proc_dir_entry *de; 479 proc_create_data(name, 0, REISERFS_SB(sb)->procdir,
479 de = create_proc_entry(name, 0, REISERFS_SB(sb)->procdir); 480 &r_file_operations, func);
480 if (de) {
481 de->data = func;
482 de->proc_fops = &r_file_operations;
483 }
484} 481}
485 482
486int reiserfs_proc_info_init(struct super_block *sb) 483int reiserfs_proc_info_init(struct super_block *sb)
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index d2db2417b2bd..abbc64dcc8d4 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -1419,8 +1419,7 @@ int reiserfs_delete_object(struct reiserfs_transaction_handle *th,
1419 1419
1420 inode_generation = 1420 inode_generation =
1421 &REISERFS_SB(th->t_super)->s_rs->s_inode_generation; 1421 &REISERFS_SB(th->t_super)->s_rs->s_inode_generation;
1422 *inode_generation = 1422 le32_add_cpu(inode_generation, 1);
1423 cpu_to_le32(le32_to_cpu(*inode_generation) + 1);
1424 } 1423 }
1425/* USE_INODE_GENERATION_COUNTER */ 1424/* USE_INODE_GENERATION_COUNTER */
1426#endif 1425#endif
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 393cc22c1717..ed424d708e69 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -304,7 +304,7 @@ static int finish_unfinished(struct super_block *s)
304 /* Turn quotas off */ 304 /* Turn quotas off */
305 for (i = 0; i < MAXQUOTAS; i++) { 305 for (i = 0; i < MAXQUOTAS; i++) {
306 if (sb_dqopt(s)->files[i]) 306 if (sb_dqopt(s)->files[i])
307 vfs_quota_off_mount(s, i); 307 vfs_quota_off(s, i, 0);
308 } 308 }
309 if (ms_active_set) 309 if (ms_active_set)
310 /* Restore the flag back */ 310 /* Restore the flag back */
@@ -634,7 +634,7 @@ static int reiserfs_acquire_dquot(struct dquot *);
634static int reiserfs_release_dquot(struct dquot *); 634static int reiserfs_release_dquot(struct dquot *);
635static int reiserfs_mark_dquot_dirty(struct dquot *); 635static int reiserfs_mark_dquot_dirty(struct dquot *);
636static int reiserfs_write_info(struct super_block *, int); 636static int reiserfs_write_info(struct super_block *, int);
637static int reiserfs_quota_on(struct super_block *, int, int, char *); 637static int reiserfs_quota_on(struct super_block *, int, int, char *, int);
638 638
639static struct dquot_operations reiserfs_quota_operations = { 639static struct dquot_operations reiserfs_quota_operations = {
640 .initialize = reiserfs_dquot_initialize, 640 .initialize = reiserfs_dquot_initialize,
@@ -1890,8 +1890,14 @@ static int reiserfs_dquot_drop(struct inode *inode)
1890 ret = 1890 ret =
1891 journal_begin(&th, inode->i_sb, 1891 journal_begin(&th, inode->i_sb,
1892 2 * REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)); 1892 2 * REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb));
1893 if (ret) 1893 if (ret) {
1894 /*
1895 * We call dquot_drop() anyway to at least release references
1896 * to quota structures so that umount does not hang.
1897 */
1898 dquot_drop(inode);
1894 goto out; 1899 goto out;
1900 }
1895 ret = dquot_drop(inode); 1901 ret = dquot_drop(inode);
1896 err = 1902 err =
1897 journal_end(&th, inode->i_sb, 1903 journal_end(&th, inode->i_sb,
@@ -2015,13 +2021,17 @@ static int reiserfs_quota_on_mount(struct super_block *sb, int type)
2015 * Standard function to be called on quota_on 2021 * Standard function to be called on quota_on
2016 */ 2022 */
2017static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, 2023static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2018 char *path) 2024 char *path, int remount)
2019{ 2025{
2020 int err; 2026 int err;
2021 struct nameidata nd; 2027 struct nameidata nd;
2028 struct inode *inode;
2022 2029
2023 if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA))) 2030 if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA)))
2024 return -EINVAL; 2031 return -EINVAL;
2032 /* No more checks needed? Path and format_id are bogus anyway... */
2033 if (remount)
2034 return vfs_quota_on(sb, type, format_id, path, 1);
2025 err = path_lookup(path, LOOKUP_FOLLOW, &nd); 2035 err = path_lookup(path, LOOKUP_FOLLOW, &nd);
2026 if (err) 2036 if (err)
2027 return err; 2037 return err;
@@ -2030,18 +2040,24 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2030 path_put(&nd.path); 2040 path_put(&nd.path);
2031 return -EXDEV; 2041 return -EXDEV;
2032 } 2042 }
2043 inode = nd.path.dentry->d_inode;
2033 /* We must not pack tails for quota files on reiserfs for quota IO to work */ 2044 /* We must not pack tails for quota files on reiserfs for quota IO to work */
2034 if (!(REISERFS_I(nd.path.dentry->d_inode)->i_flags & i_nopack_mask)) { 2045 if (!(REISERFS_I(inode)->i_flags & i_nopack_mask)) {
2035 reiserfs_warning(sb, 2046 err = reiserfs_unpack(inode, NULL);
2036 "reiserfs: Quota file must have tail packing disabled."); 2047 if (err) {
2037 path_put(&nd.path); 2048 reiserfs_warning(sb,
2038 return -EINVAL; 2049 "reiserfs: Unpacking tail of quota file failed"
2050 " (%d). Cannot turn on quotas.", err);
2051 path_put(&nd.path);
2052 return -EINVAL;
2053 }
2054 mark_inode_dirty(inode);
2039 } 2055 }
2040 /* Not journalling quota? No more tests needed... */ 2056 /* Not journalling quota? No more tests needed... */
2041 if (!REISERFS_SB(sb)->s_qf_names[USRQUOTA] && 2057 if (!REISERFS_SB(sb)->s_qf_names[USRQUOTA] &&
2042 !REISERFS_SB(sb)->s_qf_names[GRPQUOTA]) { 2058 !REISERFS_SB(sb)->s_qf_names[GRPQUOTA]) {
2043 path_put(&nd.path); 2059 path_put(&nd.path);
2044 return vfs_quota_on(sb, type, format_id, path); 2060 return vfs_quota_on(sb, type, format_id, path, 0);
2045 } 2061 }
2046 /* Quotafile not of fs root? */ 2062 /* Quotafile not of fs root? */
2047 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) 2063 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
@@ -2049,7 +2065,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2049 "reiserfs: Quota file not on filesystem root. " 2065 "reiserfs: Quota file not on filesystem root. "
2050 "Journalled quota will not work."); 2066 "Journalled quota will not work.");
2051 path_put(&nd.path); 2067 path_put(&nd.path);
2052 return vfs_quota_on(sb, type, format_id, path); 2068 return vfs_quota_on(sb, type, format_id, path, 0);
2053} 2069}
2054 2070
2055/* Read data from quotafile - avoid pagecache and such because we cannot afford 2071/* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/select.c b/fs/select.c
index 00f58c5c7e05..8dda969614a9 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -21,6 +21,7 @@
21#include <linux/poll.h> 21#include <linux/poll.h>
22#include <linux/personality.h> /* for STICKY_TIMEOUTS */ 22#include <linux/personality.h> /* for STICKY_TIMEOUTS */
23#include <linux/file.h> 23#include <linux/file.h>
24#include <linux/fdtable.h>
24#include <linux/fs.h> 25#include <linux/fs.h>
25#include <linux/rcupdate.h> 26#include <linux/rcupdate.h>
26 27
@@ -298,7 +299,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
298#define MAX_SELECT_SECONDS \ 299#define MAX_SELECT_SECONDS \
299 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) 300 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
300 301
301static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, 302int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
302 fd_set __user *exp, s64 *timeout) 303 fd_set __user *exp, s64 *timeout)
303{ 304{
304 fd_set_bits fds; 305 fd_set_bits fds;
@@ -425,7 +426,7 @@ sticky:
425 return ret; 426 return ret;
426} 427}
427 428
428#ifdef TIF_RESTORE_SIGMASK 429#ifdef HAVE_SET_RESTORE_SIGMASK
429asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp, 430asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
430 fd_set __user *exp, struct timespec __user *tsp, 431 fd_set __user *exp, struct timespec __user *tsp,
431 const sigset_t __user *sigmask, size_t sigsetsize) 432 const sigset_t __user *sigmask, size_t sigsetsize)
@@ -498,7 +499,7 @@ sticky:
498 if (sigmask) { 499 if (sigmask) {
499 memcpy(&current->saved_sigmask, &sigsaved, 500 memcpy(&current->saved_sigmask, &sigsaved,
500 sizeof(sigsaved)); 501 sizeof(sigsaved));
501 set_thread_flag(TIF_RESTORE_SIGMASK); 502 set_restore_sigmask();
502 } 503 }
503 } else if (sigmask) 504 } else if (sigmask)
504 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 505 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
@@ -528,7 +529,7 @@ asmlinkage long sys_pselect6(int n, fd_set __user *inp, fd_set __user *outp,
528 529
529 return sys_pselect7(n, inp, outp, exp, tsp, up, sigsetsize); 530 return sys_pselect7(n, inp, outp, exp, tsp, up, sigsetsize);
530} 531}
531#endif /* TIF_RESTORE_SIGMASK */ 532#endif /* HAVE_SET_RESTORE_SIGMASK */
532 533
533struct poll_list { 534struct poll_list {
534 struct poll_list *next; 535 struct poll_list *next;
@@ -759,7 +760,7 @@ asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
759 return ret; 760 return ret;
760} 761}
761 762
762#ifdef TIF_RESTORE_SIGMASK 763#ifdef HAVE_SET_RESTORE_SIGMASK
763asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, 764asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
764 struct timespec __user *tsp, const sigset_t __user *sigmask, 765 struct timespec __user *tsp, const sigset_t __user *sigmask,
765 size_t sigsetsize) 766 size_t sigsetsize)
@@ -805,7 +806,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
805 if (sigmask) { 806 if (sigmask) {
806 memcpy(&current->saved_sigmask, &sigsaved, 807 memcpy(&current->saved_sigmask, &sigsaved,
807 sizeof(sigsaved)); 808 sizeof(sigsaved));
808 set_thread_flag(TIF_RESTORE_SIGMASK); 809 set_restore_sigmask();
809 } 810 }
810 ret = -ERESTARTNOHAND; 811 ret = -ERESTARTNOHAND;
811 } else if (sigmask) 812 } else if (sigmask)
@@ -839,4 +840,4 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
839 840
840 return ret; 841 return ret;
841} 842}
842#endif /* TIF_RESTORE_SIGMASK */ 843#endif /* HAVE_SET_RESTORE_SIGMASK */
diff --git a/fs/seq_file.c b/fs/seq_file.c
index cdfd996ca6ef..3f54dbd6c49b 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -350,28 +350,40 @@ int seq_printf(struct seq_file *m, const char *f, ...)
350} 350}
351EXPORT_SYMBOL(seq_printf); 351EXPORT_SYMBOL(seq_printf);
352 352
353static char *mangle_path(char *s, char *p, char *esc)
354{
355 while (s <= p) {
356 char c = *p++;
357 if (!c) {
358 return s;
359 } else if (!strchr(esc, c)) {
360 *s++ = c;
361 } else if (s + 4 > p) {
362 break;
363 } else {
364 *s++ = '\\';
365 *s++ = '0' + ((c & 0300) >> 6);
366 *s++ = '0' + ((c & 070) >> 3);
367 *s++ = '0' + (c & 07);
368 }
369 }
370 return NULL;
371}
372
373/*
374 * return the absolute path of 'dentry' residing in mount 'mnt'.
375 */
353int seq_path(struct seq_file *m, struct path *path, char *esc) 376int seq_path(struct seq_file *m, struct path *path, char *esc)
354{ 377{
355 if (m->count < m->size) { 378 if (m->count < m->size) {
356 char *s = m->buf + m->count; 379 char *s = m->buf + m->count;
357 char *p = d_path(path, s, m->size - m->count); 380 char *p = d_path(path, s, m->size - m->count);
358 if (!IS_ERR(p)) { 381 if (!IS_ERR(p)) {
359 while (s <= p) { 382 s = mangle_path(s, p, esc);
360 char c = *p++; 383 if (s) {
361 if (!c) { 384 p = m->buf + m->count;
362 p = m->buf + m->count; 385 m->count = s - m->buf;
363 m->count = s - m->buf; 386 return s - p;
364 return s - p;
365 } else if (!strchr(esc, c)) {
366 *s++ = c;
367 } else if (s + 4 > p) {
368 break;
369 } else {
370 *s++ = '\\';
371 *s++ = '0' + ((c & 0300) >> 6);
372 *s++ = '0' + ((c & 070) >> 3);
373 *s++ = '0' + (c & 07);
374 }
375 } 387 }
376 } 388 }
377 } 389 }
@@ -380,6 +392,57 @@ int seq_path(struct seq_file *m, struct path *path, char *esc)
380} 392}
381EXPORT_SYMBOL(seq_path); 393EXPORT_SYMBOL(seq_path);
382 394
395/*
396 * Same as seq_path, but relative to supplied root.
397 *
398 * root may be changed, see __d_path().
399 */
400int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
401 char *esc)
402{
403 int err = -ENAMETOOLONG;
404 if (m->count < m->size) {
405 char *s = m->buf + m->count;
406 char *p;
407
408 spin_lock(&dcache_lock);
409 p = __d_path(path, root, s, m->size - m->count);
410 spin_unlock(&dcache_lock);
411 err = PTR_ERR(p);
412 if (!IS_ERR(p)) {
413 s = mangle_path(s, p, esc);
414 if (s) {
415 p = m->buf + m->count;
416 m->count = s - m->buf;
417 return 0;
418 }
419 }
420 }
421 m->count = m->size;
422 return err;
423}
424
425/*
426 * returns the path of the 'dentry' from the root of its filesystem.
427 */
428int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
429{
430 if (m->count < m->size) {
431 char *s = m->buf + m->count;
432 char *p = dentry_path(dentry, s, m->size - m->count);
433 if (!IS_ERR(p)) {
434 s = mangle_path(s, p, esc);
435 if (s) {
436 p = m->buf + m->count;
437 m->count = s - m->buf;
438 return s - p;
439 }
440 }
441 }
442 m->count = m->size;
443 return -1;
444}
445
383static void *single_start(struct seq_file *p, loff_t *pos) 446static void *single_start(struct seq_file *p, loff_t *pos)
384{ 447{
385 return NULL + (*pos == 0); 448 return NULL + (*pos == 0);
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 8ead0db35933..619725644c75 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -207,11 +207,8 @@ static const struct file_operations signalfd_fops = {
207 207
208asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask) 208asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask)
209{ 209{
210 int error;
211 sigset_t sigmask; 210 sigset_t sigmask;
212 struct signalfd_ctx *ctx; 211 struct signalfd_ctx *ctx;
213 struct file *file;
214 struct inode *inode;
215 212
216 if (sizemask != sizeof(sigset_t) || 213 if (sizemask != sizeof(sigset_t) ||
217 copy_from_user(&sigmask, user_mask, sizeof(sigmask))) 214 copy_from_user(&sigmask, user_mask, sizeof(sigmask)))
@@ -230,12 +227,11 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
230 * When we call this, the initialization must be complete, since 227 * When we call this, the initialization must be complete, since
231 * anon_inode_getfd() will install the fd. 228 * anon_inode_getfd() will install the fd.
232 */ 229 */
233 error = anon_inode_getfd(&ufd, &inode, &file, "[signalfd]", 230 ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx);
234 &signalfd_fops, ctx); 231 if (ufd < 0)
235 if (error) 232 kfree(ctx);
236 goto err_fdalloc;
237 } else { 233 } else {
238 file = fget(ufd); 234 struct file *file = fget(ufd);
239 if (!file) 235 if (!file)
240 return -EBADF; 236 return -EBADF;
241 ctx = file->private_data; 237 ctx = file->private_data;
@@ -252,9 +248,4 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
252 } 248 }
253 249
254 return ufd; 250 return ufd;
255
256err_fdalloc:
257 kfree(ctx);
258 return error;
259} 251}
260
diff --git a/fs/smbfs/smb_debug.h b/fs/smbfs/smb_debug.h
index 734972b92694..fc4b1a5dd755 100644
--- a/fs/smbfs/smb_debug.h
+++ b/fs/smbfs/smb_debug.h
@@ -11,14 +11,14 @@
11 * these are normally enabled. 11 * these are normally enabled.
12 */ 12 */
13#ifdef SMBFS_PARANOIA 13#ifdef SMBFS_PARANOIA
14# define PARANOIA(f, a...) printk(KERN_NOTICE "%s: " f, __FUNCTION__ , ## a) 14# define PARANOIA(f, a...) printk(KERN_NOTICE "%s: " f, __func__ , ## a)
15#else 15#else
16# define PARANOIA(f, a...) do { ; } while(0) 16# define PARANOIA(f, a...) do { ; } while(0)
17#endif 17#endif
18 18
19/* lots of debug messages */ 19/* lots of debug messages */
20#ifdef SMBFS_DEBUG_VERBOSE 20#ifdef SMBFS_DEBUG_VERBOSE
21# define VERBOSE(f, a...) printk(KERN_DEBUG "%s: " f, __FUNCTION__ , ## a) 21# define VERBOSE(f, a...) printk(KERN_DEBUG "%s: " f, __func__ , ## a)
22#else 22#else
23# define VERBOSE(f, a...) do { ; } while(0) 23# define VERBOSE(f, a...) do { ; } while(0)
24#endif 24#endif
@@ -28,7 +28,7 @@
28 * too common name. 28 * too common name.
29 */ 29 */
30#ifdef SMBFS_DEBUG 30#ifdef SMBFS_DEBUG
31#define DEBUG1(f, a...) printk(KERN_DEBUG "%s: " f, __FUNCTION__ , ## a) 31#define DEBUG1(f, a...) printk(KERN_DEBUG "%s: " f, __func__ , ## a)
32#else 32#else
33#define DEBUG1(f, a...) do { ; } while(0) 33#define DEBUG1(f, a...) do { ; } while(0)
34#endif 34#endif
diff --git a/fs/splice.c b/fs/splice.c
index eeb1a86a7014..633f58ebfb72 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1075,7 +1075,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
1075 1075
1076 ret = splice_direct_to_actor(in, &sd, direct_splice_actor); 1076 ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
1077 if (ret > 0) 1077 if (ret > 0)
1078 *ppos += ret; 1078 *ppos = sd.pos;
1079 1079
1080 return ret; 1080 return ret;
1081} 1081}
diff --git a/fs/super.c b/fs/super.c
index 4798350b2bc9..453877c5697b 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -117,7 +117,7 @@ static inline void destroy_super(struct super_block *s)
117 * Drop a superblock's refcount. Returns non-zero if the superblock was 117 * Drop a superblock's refcount. Returns non-zero if the superblock was
118 * destroyed. The caller must hold sb_lock. 118 * destroyed. The caller must hold sb_lock.
119 */ 119 */
120int __put_super(struct super_block *sb) 120static int __put_super(struct super_block *sb)
121{ 121{
122 int ret = 0; 122 int ret = 0;
123 123
@@ -179,7 +179,7 @@ void deactivate_super(struct super_block *s)
179 if (atomic_dec_and_lock(&s->s_active, &sb_lock)) { 179 if (atomic_dec_and_lock(&s->s_active, &sb_lock)) {
180 s->s_count -= S_BIAS-1; 180 s->s_count -= S_BIAS-1;
181 spin_unlock(&sb_lock); 181 spin_unlock(&sb_lock);
182 DQUOT_OFF(s); 182 DQUOT_OFF(s, 0);
183 down_write(&s->s_umount); 183 down_write(&s->s_umount);
184 fs->kill_sb(s); 184 fs->kill_sb(s);
185 put_filesystem(fs); 185 put_filesystem(fs);
@@ -608,6 +608,7 @@ retry:
608int do_remount_sb(struct super_block *sb, int flags, void *data, int force) 608int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
609{ 609{
610 int retval; 610 int retval;
611 int remount_rw;
611 612
612#ifdef CONFIG_BLOCK 613#ifdef CONFIG_BLOCK
613 if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev)) 614 if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev))
@@ -625,8 +626,11 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
625 mark_files_ro(sb); 626 mark_files_ro(sb);
626 else if (!fs_may_remount_ro(sb)) 627 else if (!fs_may_remount_ro(sb))
627 return -EBUSY; 628 return -EBUSY;
628 DQUOT_OFF(sb); 629 retval = DQUOT_OFF(sb, 1);
630 if (retval < 0 && retval != -ENOSYS)
631 return -EBUSY;
629 } 632 }
633 remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY);
630 634
631 if (sb->s_op->remount_fs) { 635 if (sb->s_op->remount_fs) {
632 lock_super(sb); 636 lock_super(sb);
@@ -636,6 +640,8 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
636 return retval; 640 return retval;
637 } 641 }
638 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); 642 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
643 if (remount_rw)
644 DQUOT_ON_REMOUNT(sb);
639 return 0; 645 return 0;
640} 646}
641 647
diff --git a/fs/sync.c b/fs/sync.c
index 7cd005ea7639..228e17b5e9ee 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -64,7 +64,7 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
64 /* sync the superblock to buffers */ 64 /* sync the superblock to buffers */
65 sb = inode->i_sb; 65 sb = inode->i_sb;
66 lock_super(sb); 66 lock_super(sb);
67 if (sb->s_op->write_super) 67 if (sb->s_dirt && sb->s_op->write_super)
68 sb->s_op->write_super(sb); 68 sb->s_op->write_super(sb);
69 unlock_super(sb); 69 unlock_super(sb);
70 70
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index ade9a7e6a757..e7735f643cd1 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -135,7 +135,7 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos)
135 goto out; 135 goto out;
136 } 136 }
137 pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", 137 pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n",
138 __FUNCTION__, count, *ppos, buffer->page); 138 __func__, count, *ppos, buffer->page);
139 retval = simple_read_from_buffer(buf, count, ppos, buffer->page, 139 retval = simple_read_from_buffer(buf, count, ppos, buffer->page,
140 buffer->count); 140 buffer->count);
141out: 141out:
@@ -477,11 +477,10 @@ const struct file_operations sysfs_file_operations = {
477 .poll = sysfs_poll, 477 .poll = sysfs_poll,
478}; 478};
479 479
480 480int sysfs_add_file_mode(struct sysfs_dirent *dir_sd,
481int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr, 481 const struct attribute *attr, int type, mode_t amode)
482 int type)
483{ 482{
484 umode_t mode = (attr->mode & S_IALLUGO) | S_IFREG; 483 umode_t mode = (amode & S_IALLUGO) | S_IFREG;
485 struct sysfs_addrm_cxt acxt; 484 struct sysfs_addrm_cxt acxt;
486 struct sysfs_dirent *sd; 485 struct sysfs_dirent *sd;
487 int rc; 486 int rc;
@@ -502,6 +501,13 @@ int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr,
502} 501}
503 502
504 503
504int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr,
505 int type)
506{
507 return sysfs_add_file_mode(dir_sd, attr, type, attr->mode);
508}
509
510
505/** 511/**
506 * sysfs_create_file - create an attribute file for an object. 512 * sysfs_create_file - create an attribute file for an object.
507 * @kobj: object we're creating for. 513 * @kobj: object we're creating for.
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 477904915032..eeba38417b1d 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -23,35 +23,50 @@ static void remove_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
23 int i; 23 int i;
24 24
25 for (i = 0, attr = grp->attrs; *attr; i++, attr++) 25 for (i = 0, attr = grp->attrs; *attr; i++, attr++)
26 if (!grp->is_visible || 26 sysfs_hash_and_remove(dir_sd, (*attr)->name);
27 grp->is_visible(kobj, *attr, i))
28 sysfs_hash_and_remove(dir_sd, (*attr)->name);
29} 27}
30 28
31static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, 29static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
32 const struct attribute_group *grp) 30 const struct attribute_group *grp, int update)
33{ 31{
34 struct attribute *const* attr; 32 struct attribute *const* attr;
35 int error = 0, i; 33 int error = 0, i;
36 34
37 for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) 35 for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) {
38 if (!grp->is_visible || 36 mode_t mode = 0;
39 grp->is_visible(kobj, *attr, i)) 37
40 error |= 38 /* in update mode, we're changing the permissions or
41 sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR); 39 * visibility. Do this by first removing then
40 * re-adding (if required) the file */
41 if (update)
42 sysfs_hash_and_remove(dir_sd, (*attr)->name);
43 if (grp->is_visible) {
44 mode = grp->is_visible(kobj, *attr, i);
45 if (!mode)
46 continue;
47 }
48 error = sysfs_add_file_mode(dir_sd, *attr, SYSFS_KOBJ_ATTR,
49 (*attr)->mode | mode);
50 if (unlikely(error))
51 break;
52 }
42 if (error) 53 if (error)
43 remove_files(dir_sd, kobj, grp); 54 remove_files(dir_sd, kobj, grp);
44 return error; 55 return error;
45} 56}
46 57
47 58
48int sysfs_create_group(struct kobject * kobj, 59static int internal_create_group(struct kobject *kobj, int update,
49 const struct attribute_group * grp) 60 const struct attribute_group *grp)
50{ 61{
51 struct sysfs_dirent *sd; 62 struct sysfs_dirent *sd;
52 int error; 63 int error;
53 64
54 BUG_ON(!kobj || !kobj->sd); 65 BUG_ON(!kobj || (!update && !kobj->sd));
66
67 /* Updates may happen before the object has been instantiated */
68 if (unlikely(update && !kobj->sd))
69 return -EINVAL;
55 70
56 if (grp->name) { 71 if (grp->name) {
57 error = sysfs_create_subdir(kobj, grp->name, &sd); 72 error = sysfs_create_subdir(kobj, grp->name, &sd);
@@ -60,7 +75,7 @@ int sysfs_create_group(struct kobject * kobj,
60 } else 75 } else
61 sd = kobj->sd; 76 sd = kobj->sd;
62 sysfs_get(sd); 77 sysfs_get(sd);
63 error = create_files(sd, kobj, grp); 78 error = create_files(sd, kobj, grp, update);
64 if (error) { 79 if (error) {
65 if (grp->name) 80 if (grp->name)
66 sysfs_remove_subdir(sd); 81 sysfs_remove_subdir(sd);
@@ -69,6 +84,47 @@ int sysfs_create_group(struct kobject * kobj,
69 return error; 84 return error;
70} 85}
71 86
87/**
88 * sysfs_create_group - given a directory kobject, create an attribute group
89 * @kobj: The kobject to create the group on
90 * @grp: The attribute group to create
91 *
92 * This function creates a group for the first time. It will explicitly
93 * warn and error if any of the attribute files being created already exist.
94 *
95 * Returns 0 on success or error.
96 */
97int sysfs_create_group(struct kobject *kobj,
98 const struct attribute_group *grp)
99{
100 return internal_create_group(kobj, 0, grp);
101}
102
103/**
104 * sysfs_update_group - given a directory kobject, create an attribute group
105 * @kobj: The kobject to create the group on
106 * @grp: The attribute group to create
107 *
108 * This function updates an attribute group. Unlike
109 * sysfs_create_group(), it will explicitly not warn or error if any
110 * of the attribute files being created already exist. Furthermore,
111 * if the visibility of the files has changed through the is_visible()
112 * callback, it will update the permissions and add or remove the
113 * relevant files.
114 *
115 * The primary use for this function is to call it after making a change
116 * that affects group visibility.
117 *
118 * Returns 0 on success or error.
119 */
120int sysfs_update_group(struct kobject *kobj,
121 const struct attribute_group *grp)
122{
123 return internal_create_group(kobj, 1, grp);
124}
125
126
127
72void sysfs_remove_group(struct kobject * kobj, 128void sysfs_remove_group(struct kobject * kobj,
73 const struct attribute_group * grp) 129 const struct attribute_group * grp)
74{ 130{
@@ -95,4 +151,5 @@ void sysfs_remove_group(struct kobject * kobj,
95 151
96 152
97EXPORT_SYMBOL_GPL(sysfs_create_group); 153EXPORT_SYMBOL_GPL(sysfs_create_group);
154EXPORT_SYMBOL_GPL(sysfs_update_group);
98EXPORT_SYMBOL_GPL(sysfs_remove_group); 155EXPORT_SYMBOL_GPL(sysfs_remove_group);
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index d9262f74f94e..eb53c632f856 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -30,7 +30,7 @@ static const struct address_space_operations sysfs_aops = {
30 30
31static struct backing_dev_info sysfs_backing_dev_info = { 31static struct backing_dev_info sysfs_backing_dev_info = {
32 .ra_pages = 0, /* No readahead */ 32 .ra_pages = 0, /* No readahead */
33 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 33 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
34}; 34};
35 35
36static const struct inode_operations sysfs_inode_operations ={ 36static const struct inode_operations sysfs_inode_operations ={
@@ -59,6 +59,8 @@ int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
59 if (error) 59 if (error)
60 return error; 60 return error;
61 61
62 iattr->ia_valid &= ~ATTR_SIZE; /* ignore size changes */
63
62 error = inode_setattr(inode, iattr); 64 error = inode_setattr(inode, iattr);
63 if (error) 65 if (error)
64 return error; 66 return error;
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 74168266cd59..14f0023984d7 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -61,7 +61,7 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
61 /* instantiate and link root dentry */ 61 /* instantiate and link root dentry */
62 root = d_alloc_root(inode); 62 root = d_alloc_root(inode);
63 if (!root) { 63 if (!root) {
64 pr_debug("%s: could not get root dentry!\n",__FUNCTION__); 64 pr_debug("%s: could not get root dentry!\n",__func__);
65 iput(inode); 65 iput(inode);
66 return -ENOMEM; 66 return -ENOMEM;
67 } 67 }
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index ff17f8da9b43..ce4e15f8aaeb 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -154,6 +154,8 @@ extern const struct file_operations sysfs_file_operations;
154int sysfs_add_file(struct sysfs_dirent *dir_sd, 154int sysfs_add_file(struct sysfs_dirent *dir_sd,
155 const struct attribute *attr, int type); 155 const struct attribute *attr, int type);
156 156
157int sysfs_add_file_mode(struct sysfs_dirent *dir_sd,
158 const struct attribute *attr, int type, mode_t amode);
157/* 159/*
158 * bin.c 160 * bin.c
159 */ 161 */
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 42d51d1c05cd..38ebe3f85b3d 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -217,9 +217,9 @@ static inline __fs32 fs32_add(struct sysv_sb_info *sbi, __fs32 *n, int d)
217 if (sbi->s_bytesex == BYTESEX_PDP) 217 if (sbi->s_bytesex == BYTESEX_PDP)
218 *(__u32*)n = PDP_swab(PDP_swab(*(__u32*)n)+d); 218 *(__u32*)n = PDP_swab(PDP_swab(*(__u32*)n)+d);
219 else if (sbi->s_bytesex == BYTESEX_LE) 219 else if (sbi->s_bytesex == BYTESEX_LE)
220 *(__le32*)n = cpu_to_le32(le32_to_cpu(*(__le32*)n)+d); 220 le32_add_cpu((__le32 *)n, d);
221 else 221 else
222 *(__be32*)n = cpu_to_be32(be32_to_cpu(*(__be32*)n)+d); 222 be32_add_cpu((__be32 *)n, d);
223 return *n; 223 return *n;
224} 224}
225 225
@@ -242,9 +242,9 @@ static inline __fs16 cpu_to_fs16(struct sysv_sb_info *sbi, __u16 n)
242static inline __fs16 fs16_add(struct sysv_sb_info *sbi, __fs16 *n, int d) 242static inline __fs16 fs16_add(struct sysv_sb_info *sbi, __fs16 *n, int d)
243{ 243{
244 if (sbi->s_bytesex != BYTESEX_BE) 244 if (sbi->s_bytesex != BYTESEX_BE)
245 *(__le16*)n = cpu_to_le16(le16_to_cpu(*(__le16 *)n)+d); 245 le16_add_cpu((__le16 *)n, d);
246 else 246 else
247 *(__be16*)n = cpu_to_be16(be16_to_cpu(*(__be16 *)n)+d); 247 be16_add_cpu((__be16 *)n, d);
248 return *n; 248 return *n;
249} 249}
250 250
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 10c80b59ec4b..d87d354ec424 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -20,6 +20,7 @@
20#include <linux/hrtimer.h> 20#include <linux/hrtimer.h>
21#include <linux/anon_inodes.h> 21#include <linux/anon_inodes.h>
22#include <linux/timerfd.h> 22#include <linux/timerfd.h>
23#include <linux/syscalls.h>
23 24
24struct timerfd_ctx { 25struct timerfd_ctx {
25 struct hrtimer tmr; 26 struct hrtimer tmr;
@@ -180,10 +181,8 @@ static struct file *timerfd_fget(int fd)
180 181
181asmlinkage long sys_timerfd_create(int clockid, int flags) 182asmlinkage long sys_timerfd_create(int clockid, int flags)
182{ 183{
183 int error, ufd; 184 int ufd;
184 struct timerfd_ctx *ctx; 185 struct timerfd_ctx *ctx;
185 struct file *file;
186 struct inode *inode;
187 186
188 if (flags) 187 if (flags)
189 return -EINVAL; 188 return -EINVAL;
@@ -199,12 +198,9 @@ asmlinkage long sys_timerfd_create(int clockid, int flags)
199 ctx->clockid = clockid; 198 ctx->clockid = clockid;
200 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); 199 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
201 200
202 error = anon_inode_getfd(&ufd, &inode, &file, "[timerfd]", 201 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx);
203 &timerfd_fops, ctx); 202 if (ufd < 0)
204 if (error) {
205 kfree(ctx); 203 kfree(ctx);
206 return error;
207 }
208 204
209 return ufd; 205 return ufd;
210} 206}
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index ba5537d4bc15..2b34c8ca6c83 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -890,7 +890,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
890 890
891 if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { 891 if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
892 kernel_lb_addr eloc; 892 kernel_lb_addr eloc;
893 uint32_t elen; 893 uint32_t bsize;
894 894
895 block = udf_new_block(inode->i_sb, inode, 895 block = udf_new_block(inode->i_sb, inode,
896 iinfo->i_location.partitionReferenceNum, 896 iinfo->i_location.partitionReferenceNum,
@@ -903,9 +903,9 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
903 eloc.logicalBlockNum = block; 903 eloc.logicalBlockNum = block;
904 eloc.partitionReferenceNum = 904 eloc.partitionReferenceNum =
905 iinfo->i_location.partitionReferenceNum; 905 iinfo->i_location.partitionReferenceNum;
906 elen = inode->i_sb->s_blocksize; 906 bsize = inode->i_sb->s_blocksize;
907 iinfo->i_lenExtents = elen; 907 iinfo->i_lenExtents = bsize;
908 udf_add_aext(inode, &epos, eloc, elen, 0); 908 udf_add_aext(inode, &epos, eloc, bsize, 0);
909 brelse(epos.bh); 909 brelse(epos.bh);
910 910
911 block = udf_get_pblock(inode->i_sb, block, 911 block = udf_get_pblock(inode->i_sb, block,
diff --git a/fs/udf/super.c b/fs/udf/super.c
index b564fc140fe4..9fb18a340fc1 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -240,7 +240,7 @@ static int udf_sb_alloc_partition_maps(struct super_block *sb, u32 count)
240 sbi->s_partmaps = kcalloc(count, sizeof(struct udf_part_map), 240 sbi->s_partmaps = kcalloc(count, sizeof(struct udf_part_map),
241 GFP_KERNEL); 241 GFP_KERNEL);
242 if (!sbi->s_partmaps) { 242 if (!sbi->s_partmaps) {
243 udf_error(sb, __FUNCTION__, 243 udf_error(sb, __func__,
244 "Unable to allocate space for %d partition maps", 244 "Unable to allocate space for %d partition maps",
245 count); 245 count);
246 sbi->s_partitions = 0; 246 sbi->s_partitions = 0;
@@ -1086,7 +1086,7 @@ static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index)
1086 bitmap = vmalloc(size); /* TODO: get rid of vmalloc */ 1086 bitmap = vmalloc(size); /* TODO: get rid of vmalloc */
1087 1087
1088 if (bitmap == NULL) { 1088 if (bitmap == NULL) {
1089 udf_error(sb, __FUNCTION__, 1089 udf_error(sb, __func__,
1090 "Unable to allocate space for bitmap " 1090 "Unable to allocate space for bitmap "
1091 "and %d buffer_head pointers", nr_groups); 1091 "and %d buffer_head pointers", nr_groups);
1092 return NULL; 1092 return NULL;
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 1e7598fb9787..0d9ada173739 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -277,7 +277,7 @@ static void ufs_change_blocknr(struct inode *inode, sector_t beg,
277 if (!page)/* it was truncated */ 277 if (!page)/* it was truncated */
278 continue; 278 continue;
279 if (IS_ERR(page)) {/* or EIO */ 279 if (IS_ERR(page)) {/* or EIO */
280 ufs_error(inode->i_sb, __FUNCTION__, 280 ufs_error(inode->i_sb, __func__,
281 "read of page %llu failed\n", 281 "read of page %llu failed\n",
282 (unsigned long long)index); 282 (unsigned long long)index);
283 continue; 283 continue;
@@ -308,7 +308,7 @@ static void ufs_change_blocknr(struct inode *inode, sector_t beg,
308 ll_rw_block(READ, 1, &bh); 308 ll_rw_block(READ, 1, &bh);
309 wait_on_buffer(bh); 309 wait_on_buffer(bh);
310 if (!buffer_uptodate(bh)) { 310 if (!buffer_uptodate(bh)) {
311 ufs_error(inode->i_sb, __FUNCTION__, 311 ufs_error(inode->i_sb, __func__,
312 "read of block failed\n"); 312 "read of block failed\n");
313 break; 313 break;
314 } 314 }
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index ef563fc8d72c..df0bef18742d 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -179,7 +179,7 @@ bad_entry:
179 goto fail; 179 goto fail;
180Eend: 180Eend:
181 p = (struct ufs_dir_entry *)(kaddr + offs); 181 p = (struct ufs_dir_entry *)(kaddr + offs);
182 ufs_error(sb, __FUNCTION__, 182 ufs_error(sb, __func__,
183 "entry in directory #%lu spans the page boundary" 183 "entry in directory #%lu spans the page boundary"
184 "offset=%lu", 184 "offset=%lu",
185 dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs); 185 dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs);
@@ -284,7 +284,7 @@ struct ufs_dir_entry *ufs_find_entry(struct inode *dir, struct dentry *dentry,
284 kaddr += ufs_last_byte(dir, n) - reclen; 284 kaddr += ufs_last_byte(dir, n) - reclen;
285 while ((char *) de <= kaddr) { 285 while ((char *) de <= kaddr) {
286 if (de->d_reclen == 0) { 286 if (de->d_reclen == 0) {
287 ufs_error(dir->i_sb, __FUNCTION__, 287 ufs_error(dir->i_sb, __func__,
288 "zero-length directory entry"); 288 "zero-length directory entry");
289 ufs_put_page(page); 289 ufs_put_page(page);
290 goto out; 290 goto out;
@@ -356,7 +356,7 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode)
356 goto got_it; 356 goto got_it;
357 } 357 }
358 if (de->d_reclen == 0) { 358 if (de->d_reclen == 0) {
359 ufs_error(dir->i_sb, __FUNCTION__, 359 ufs_error(dir->i_sb, __func__,
360 "zero-length directory entry"); 360 "zero-length directory entry");
361 err = -EIO; 361 err = -EIO;
362 goto out_unlock; 362 goto out_unlock;
@@ -456,7 +456,7 @@ ufs_readdir(struct file *filp, void *dirent, filldir_t filldir)
456 struct page *page = ufs_get_page(inode, n); 456 struct page *page = ufs_get_page(inode, n);
457 457
458 if (IS_ERR(page)) { 458 if (IS_ERR(page)) {
459 ufs_error(sb, __FUNCTION__, 459 ufs_error(sb, __func__,
460 "bad page in #%lu", 460 "bad page in #%lu",
461 inode->i_ino); 461 inode->i_ino);
462 filp->f_pos += PAGE_CACHE_SIZE - offset; 462 filp->f_pos += PAGE_CACHE_SIZE - offset;
@@ -475,7 +475,7 @@ ufs_readdir(struct file *filp, void *dirent, filldir_t filldir)
475 limit = kaddr + ufs_last_byte(inode, n) - UFS_DIR_REC_LEN(1); 475 limit = kaddr + ufs_last_byte(inode, n) - UFS_DIR_REC_LEN(1);
476 for ( ;(char*)de <= limit; de = ufs_next_entry(sb, de)) { 476 for ( ;(char*)de <= limit; de = ufs_next_entry(sb, de)) {
477 if (de->d_reclen == 0) { 477 if (de->d_reclen == 0) {
478 ufs_error(sb, __FUNCTION__, 478 ufs_error(sb, __func__,
479 "zero-length directory entry"); 479 "zero-length directory entry");
480 ufs_put_page(page); 480 ufs_put_page(page);
481 return -EIO; 481 return -EIO;
@@ -536,7 +536,7 @@ int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir,
536 536
537 while ((char*)de < (char*)dir) { 537 while ((char*)de < (char*)dir) {
538 if (de->d_reclen == 0) { 538 if (de->d_reclen == 0) {
539 ufs_error(inode->i_sb, __FUNCTION__, 539 ufs_error(inode->i_sb, __func__,
540 "zero-length directory entry"); 540 "zero-length directory entry");
541 err = -EIO; 541 err = -EIO;
542 goto out; 542 goto out;
@@ -633,7 +633,7 @@ int ufs_empty_dir(struct inode * inode)
633 633
634 while ((char *)de <= kaddr) { 634 while ((char *)de <= kaddr) {
635 if (de->d_reclen == 0) { 635 if (de->d_reclen == 0) {
636 ufs_error(inode->i_sb, __FUNCTION__, 636 ufs_error(inode->i_sb, __func__,
637 "zero-length directory entry: " 637 "zero-length directory entry: "
638 "kaddr=%p, de=%p\n", kaddr, de); 638 "kaddr=%p, de=%p\n", kaddr, de);
639 goto not_empty; 639 goto not_empty;
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 5446b888fc8e..39f877898565 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -929,7 +929,7 @@ void ufs_delete_inode (struct inode * inode)
929 old_i_size = inode->i_size; 929 old_i_size = inode->i_size;
930 inode->i_size = 0; 930 inode->i_size = 0;
931 if (inode->i_blocks && ufs_truncate(inode, old_i_size)) 931 if (inode->i_blocks && ufs_truncate(inode, old_i_size))
932 ufs_warning(inode->i_sb, __FUNCTION__, "ufs_truncate failed\n"); 932 ufs_warning(inode->i_sb, __func__, "ufs_truncate failed\n");
933 ufs_free_inode (inode); 933 ufs_free_inode (inode);
934 unlock_kernel(); 934 unlock_kernel();
935 return; 935 return;
diff --git a/fs/ufs/swab.h b/fs/ufs/swab.h
index 1683d2bee614..8d974c4fd18b 100644
--- a/fs/ufs/swab.h
+++ b/fs/ufs/swab.h
@@ -40,25 +40,7 @@ cpu_to_fs64(struct super_block *sbp, u64 n)
40 return (__force __fs64)cpu_to_be64(n); 40 return (__force __fs64)cpu_to_be64(n);
41} 41}
42 42
43static __inline u32 43static inline u32
44fs64_add(struct super_block *sbp, u32 *n, int d)
45{
46 if (UFS_SB(sbp)->s_bytesex == BYTESEX_LE)
47 return *n = cpu_to_le64(le64_to_cpu(*n)+d);
48 else
49 return *n = cpu_to_be64(be64_to_cpu(*n)+d);
50}
51
52static __inline u32
53fs64_sub(struct super_block *sbp, u32 *n, int d)
54{
55 if (UFS_SB(sbp)->s_bytesex == BYTESEX_LE)
56 return *n = cpu_to_le64(le64_to_cpu(*n)-d);
57 else
58 return *n = cpu_to_be64(be64_to_cpu(*n)-d);
59}
60
61static __inline u32
62fs32_to_cpu(struct super_block *sbp, __fs32 n) 44fs32_to_cpu(struct super_block *sbp, __fs32 n)
63{ 45{
64 if (UFS_SB(sbp)->s_bytesex == BYTESEX_LE) 46 if (UFS_SB(sbp)->s_bytesex == BYTESEX_LE)
@@ -80,18 +62,18 @@ static inline void
80fs32_add(struct super_block *sbp, __fs32 *n, int d) 62fs32_add(struct super_block *sbp, __fs32 *n, int d)
81{ 63{
82 if (UFS_SB(sbp)->s_bytesex == BYTESEX_LE) 64 if (UFS_SB(sbp)->s_bytesex == BYTESEX_LE)
83 *(__le32 *)n = cpu_to_le32(le32_to_cpu(*(__le32 *)n)+d); 65 le32_add_cpu((__le32 *)n, d);
84 else 66 else
85 *(__be32 *)n = cpu_to_be32(be32_to_cpu(*(__be32 *)n)+d); 67 be32_add_cpu((__be32 *)n, d);
86} 68}
87 69
88static inline void 70static inline void
89fs32_sub(struct super_block *sbp, __fs32 *n, int d) 71fs32_sub(struct super_block *sbp, __fs32 *n, int d)
90{ 72{
91 if (UFS_SB(sbp)->s_bytesex == BYTESEX_LE) 73 if (UFS_SB(sbp)->s_bytesex == BYTESEX_LE)
92 *(__le32 *)n = cpu_to_le32(le32_to_cpu(*(__le32 *)n)-d); 74 le32_add_cpu((__le32 *)n, -d);
93 else 75 else
94 *(__be32 *)n = cpu_to_be32(be32_to_cpu(*(__be32 *)n)-d); 76 be32_add_cpu((__be32 *)n, -d);
95} 77}
96 78
97static inline u16 79static inline u16
@@ -116,18 +98,18 @@ static inline void
116fs16_add(struct super_block *sbp, __fs16 *n, int d) 98fs16_add(struct super_block *sbp, __fs16 *n, int d)
117{ 99{
118 if (UFS_SB(sbp)->s_bytesex == BYTESEX_LE) 100 if (UFS_SB(sbp)->s_bytesex == BYTESEX_LE)
119 *(__le16 *)n = cpu_to_le16(le16_to_cpu(*(__le16 *)n)+d); 101 le16_add_cpu((__le16 *)n, d);
120 else 102 else
121 *(__be16 *)n = cpu_to_be16(be16_to_cpu(*(__be16 *)n)+d); 103 be16_add_cpu((__be16 *)n, d);
122} 104}
123 105
124static inline void 106static inline void
125fs16_sub(struct super_block *sbp, __fs16 *n, int d) 107fs16_sub(struct super_block *sbp, __fs16 *n, int d)
126{ 108{
127 if (UFS_SB(sbp)->s_bytesex == BYTESEX_LE) 109 if (UFS_SB(sbp)->s_bytesex == BYTESEX_LE)
128 *(__le16 *)n = cpu_to_le16(le16_to_cpu(*(__le16 *)n)-d); 110 le16_add_cpu((__le16 *)n, -d);
129 else 111 else
130 *(__be16 *)n = cpu_to_be16(be16_to_cpu(*(__be16 *)n)-d); 112 be16_add_cpu((__be16 *)n, -d);
131} 113}
132 114
133#endif /* _UFS_SWAB_H */ 115#endif /* _UFS_SWAB_H */
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index fcb9231bb9ed..244a1aaa940e 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -66,7 +66,7 @@ struct ufs_inode_info {
66#ifdef CONFIG_UFS_DEBUG 66#ifdef CONFIG_UFS_DEBUG
67# define UFSD(f, a...) { \ 67# define UFSD(f, a...) { \
68 printk ("UFSD (%s, %d): %s:", \ 68 printk ("UFSD (%s, %d): %s:", \
69 __FILE__, __LINE__, __FUNCTION__); \ 69 __FILE__, __LINE__, __func__); \
70 printk (f, ## a); \ 70 printk (f, ## a); \
71 } 71 }
72#else 72#else
diff --git a/fs/utimes.c b/fs/utimes.c
index a2bef77dc9c9..af059d5cb485 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -40,9 +40,14 @@ asmlinkage long sys_utime(char __user *filename, struct utimbuf __user *times)
40 40
41#endif 41#endif
42 42
43static bool nsec_special(long nsec)
44{
45 return nsec == UTIME_OMIT || nsec == UTIME_NOW;
46}
47
43static bool nsec_valid(long nsec) 48static bool nsec_valid(long nsec)
44{ 49{
45 if (nsec == UTIME_OMIT || nsec == UTIME_NOW) 50 if (nsec_special(nsec))
46 return true; 51 return true;
47 52
48 return nsec >= 0 && nsec <= 999999999; 53 return nsec >= 0 && nsec <= 999999999;
@@ -119,7 +124,15 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
119 newattrs.ia_mtime.tv_nsec = times[1].tv_nsec; 124 newattrs.ia_mtime.tv_nsec = times[1].tv_nsec;
120 newattrs.ia_valid |= ATTR_MTIME_SET; 125 newattrs.ia_valid |= ATTR_MTIME_SET;
121 } 126 }
122 } else { 127 }
128
129 /*
130 * If times is NULL or both times are either UTIME_OMIT or
131 * UTIME_NOW, then need to check permissions, because
132 * inode_change_ok() won't do it.
133 */
134 if (!times || (nsec_special(times[0].tv_nsec) &&
135 nsec_special(times[1].tv_nsec))) {
123 error = -EACCES; 136 error = -EACCES;
124 if (IS_IMMUTABLE(inode)) 137 if (IS_IMMUTABLE(inode))
125 goto mnt_drop_write_and_out; 138 goto mnt_drop_write_and_out;
diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c
index cd450bea9f1a..a3522727ea5b 100644
--- a/fs/vfat/namei.c
+++ b/fs/vfat/namei.c
@@ -176,15 +176,10 @@ static inline int vfat_is_used_badchars(const wchar_t *s, int len)
176 for (i = 0; i < len; i++) 176 for (i = 0; i < len; i++)
177 if (vfat_bad_char(s[i])) 177 if (vfat_bad_char(s[i]))
178 return -EINVAL; 178 return -EINVAL;
179 return 0;
180}
181 179
182static int vfat_valid_longname(const unsigned char *name, unsigned int len) 180 if (s[i - 1] == ' ') /* last character cannot be space */
183{
184 if (name[len - 1] == ' ')
185 return -EINVAL; 181 return -EINVAL;
186 if (len >= 256) 182
187 return -ENAMETOOLONG;
188 return 0; 183 return 0;
189} 184}
190 185
@@ -477,7 +472,7 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
477 if (utf8) { 472 if (utf8) {
478 int name_len = strlen(name); 473 int name_len = strlen(name);
479 474
480 *outlen = utf8_mbstowcs((wchar_t *)outname, name, PAGE_SIZE); 475 *outlen = utf8_mbstowcs((wchar_t *)outname, name, PATH_MAX);
481 476
482 /* 477 /*
483 * We stripped '.'s before and set len appropriately, 478 * We stripped '.'s before and set len appropriately,
@@ -485,11 +480,14 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
485 */ 480 */
486 *outlen -= (name_len - len); 481 *outlen -= (name_len - len);
487 482
483 if (*outlen > 255)
484 return -ENAMETOOLONG;
485
488 op = &outname[*outlen * sizeof(wchar_t)]; 486 op = &outname[*outlen * sizeof(wchar_t)];
489 } else { 487 } else {
490 if (nls) { 488 if (nls) {
491 for (i = 0, ip = name, op = outname, *outlen = 0; 489 for (i = 0, ip = name, op = outname, *outlen = 0;
492 i < len && *outlen <= 260; 490 i < len && *outlen <= 255;
493 *outlen += 1) 491 *outlen += 1)
494 { 492 {
495 if (escape && (*ip == ':')) { 493 if (escape && (*ip == ':')) {
@@ -525,18 +523,20 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
525 op += 2; 523 op += 2;
526 } 524 }
527 } 525 }
526 if (i < len)
527 return -ENAMETOOLONG;
528 } else { 528 } else {
529 for (i = 0, ip = name, op = outname, *outlen = 0; 529 for (i = 0, ip = name, op = outname, *outlen = 0;
530 i < len && *outlen <= 260; 530 i < len && *outlen <= 255;
531 i++, *outlen += 1) 531 i++, *outlen += 1)
532 { 532 {
533 *op++ = *ip++; 533 *op++ = *ip++;
534 *op++ = 0; 534 *op++ = 0;
535 } 535 }
536 if (i < len)
537 return -ENAMETOOLONG;
536 } 538 }
537 } 539 }
538 if (*outlen > 260)
539 return -ENAMETOOLONG;
540 540
541 *longlen = *outlen; 541 *longlen = *outlen;
542 if (*outlen % 13) { 542 if (*outlen % 13) {
@@ -565,7 +565,6 @@ static int vfat_build_slots(struct inode *dir, const unsigned char *name,
565 struct fat_mount_options *opts = &sbi->options; 565 struct fat_mount_options *opts = &sbi->options;
566 struct msdos_dir_slot *ps; 566 struct msdos_dir_slot *ps;
567 struct msdos_dir_entry *de; 567 struct msdos_dir_entry *de;
568 unsigned long page;
569 unsigned char cksum, lcase; 568 unsigned char cksum, lcase;
570 unsigned char msdos_name[MSDOS_NAME]; 569 unsigned char msdos_name[MSDOS_NAME];
571 wchar_t *uname; 570 wchar_t *uname;
@@ -574,15 +573,11 @@ static int vfat_build_slots(struct inode *dir, const unsigned char *name,
574 loff_t offset; 573 loff_t offset;
575 574
576 *nr_slots = 0; 575 *nr_slots = 0;
577 err = vfat_valid_longname(name, len);
578 if (err)
579 return err;
580 576
581 page = __get_free_page(GFP_KERNEL); 577 uname = __getname();
582 if (!page) 578 if (!uname)
583 return -ENOMEM; 579 return -ENOMEM;
584 580
585 uname = (wchar_t *)page;
586 err = xlate_to_uni(name, len, (unsigned char *)uname, &ulen, &usize, 581 err = xlate_to_uni(name, len, (unsigned char *)uname, &ulen, &usize,
587 opts->unicode_xlate, opts->utf8, sbi->nls_io); 582 opts->unicode_xlate, opts->utf8, sbi->nls_io);
588 if (err) 583 if (err)
@@ -634,7 +629,7 @@ shortname:
634 de->starthi = cpu_to_le16(cluster >> 16); 629 de->starthi = cpu_to_le16(cluster >> 16);
635 de->size = 0; 630 de->size = 0;
636out_free: 631out_free:
637 free_page(page); 632 __putname(uname);
638 return err; 633 return err;
639} 634}
640 635
@@ -991,7 +986,7 @@ error_inode:
991 if (corrupt < 0) { 986 if (corrupt < 0) {
992 fat_fs_panic(new_dir->i_sb, 987 fat_fs_panic(new_dir->i_sb,
993 "%s: Filesystem corrupted (i_pos %lld)", 988 "%s: Filesystem corrupted (i_pos %lld)",
994 __FUNCTION__, sinfo.i_pos); 989 __func__, sinfo.i_pos);
995 } 990 }
996 goto out; 991 goto out;
997} 992}
@@ -1003,7 +998,7 @@ static const struct inode_operations vfat_dir_inode_operations = {
1003 .mkdir = vfat_mkdir, 998 .mkdir = vfat_mkdir,
1004 .rmdir = vfat_rmdir, 999 .rmdir = vfat_rmdir,
1005 .rename = vfat_rename, 1000 .rename = vfat_rename,
1006 .setattr = fat_notify_change, 1001 .setattr = fat_setattr,
1007 .getattr = fat_getattr, 1002 .getattr = fat_getattr,
1008}; 1003};
1009 1004
diff --git a/fs/xattr.c b/fs/xattr.c
index f7062da505d4..4706a8b1f495 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -67,7 +67,7 @@ xattr_permission(struct inode *inode, const char *name, int mask)
67} 67}
68 68
69int 69int
70vfs_setxattr(struct dentry *dentry, char *name, void *value, 70vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
71 size_t size, int flags) 71 size_t size, int flags)
72{ 72{
73 struct inode *inode = dentry->d_inode; 73 struct inode *inode = dentry->d_inode;
@@ -131,7 +131,7 @@ out_noalloc:
131EXPORT_SYMBOL_GPL(xattr_getsecurity); 131EXPORT_SYMBOL_GPL(xattr_getsecurity);
132 132
133ssize_t 133ssize_t
134vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size) 134vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size)
135{ 135{
136 struct inode *inode = dentry->d_inode; 136 struct inode *inode = dentry->d_inode;
137 int error; 137 int error;
@@ -187,7 +187,7 @@ vfs_listxattr(struct dentry *d, char *list, size_t size)
187EXPORT_SYMBOL_GPL(vfs_listxattr); 187EXPORT_SYMBOL_GPL(vfs_listxattr);
188 188
189int 189int
190vfs_removexattr(struct dentry *dentry, char *name) 190vfs_removexattr(struct dentry *dentry, const char *name)
191{ 191{
192 struct inode *inode = dentry->d_inode; 192 struct inode *inode = dentry->d_inode;
193 int error; 193 int error;
@@ -218,7 +218,7 @@ EXPORT_SYMBOL_GPL(vfs_removexattr);
218 * Extended attribute SET operations 218 * Extended attribute SET operations
219 */ 219 */
220static long 220static long
221setxattr(struct dentry *d, char __user *name, void __user *value, 221setxattr(struct dentry *d, const char __user *name, const void __user *value,
222 size_t size, int flags) 222 size_t size, int flags)
223{ 223{
224 int error; 224 int error;
@@ -252,8 +252,8 @@ setxattr(struct dentry *d, char __user *name, void __user *value,
252} 252}
253 253
254asmlinkage long 254asmlinkage long
255sys_setxattr(char __user *path, char __user *name, void __user *value, 255sys_setxattr(const char __user *path, const char __user *name,
256 size_t size, int flags) 256 const void __user *value, size_t size, int flags)
257{ 257{
258 struct nameidata nd; 258 struct nameidata nd;
259 int error; 259 int error;
@@ -271,8 +271,8 @@ sys_setxattr(char __user *path, char __user *name, void __user *value,
271} 271}
272 272
273asmlinkage long 273asmlinkage long
274sys_lsetxattr(char __user *path, char __user *name, void __user *value, 274sys_lsetxattr(const char __user *path, const char __user *name,
275 size_t size, int flags) 275 const void __user *value, size_t size, int flags)
276{ 276{
277 struct nameidata nd; 277 struct nameidata nd;
278 int error; 278 int error;
@@ -290,7 +290,7 @@ sys_lsetxattr(char __user *path, char __user *name, void __user *value,
290} 290}
291 291
292asmlinkage long 292asmlinkage long
293sys_fsetxattr(int fd, char __user *name, void __user *value, 293sys_fsetxattr(int fd, const char __user *name, const void __user *value,
294 size_t size, int flags) 294 size_t size, int flags)
295{ 295{
296 struct file *f; 296 struct file *f;
@@ -307,7 +307,6 @@ sys_fsetxattr(int fd, char __user *name, void __user *value,
307 error = setxattr(dentry, name, value, size, flags); 307 error = setxattr(dentry, name, value, size, flags);
308 mnt_drop_write(f->f_path.mnt); 308 mnt_drop_write(f->f_path.mnt);
309 } 309 }
310out_fput:
311 fput(f); 310 fput(f);
312 return error; 311 return error;
313} 312}
@@ -316,7 +315,8 @@ out_fput:
316 * Extended attribute GET operations 315 * Extended attribute GET operations
317 */ 316 */
318static ssize_t 317static ssize_t
319getxattr(struct dentry *d, char __user *name, void __user *value, size_t size) 318getxattr(struct dentry *d, const char __user *name, void __user *value,
319 size_t size)
320{ 320{
321 ssize_t error; 321 ssize_t error;
322 void *kvalue = NULL; 322 void *kvalue = NULL;
@@ -350,8 +350,8 @@ getxattr(struct dentry *d, char __user *name, void __user *value, size_t size)
350} 350}
351 351
352asmlinkage ssize_t 352asmlinkage ssize_t
353sys_getxattr(char __user *path, char __user *name, void __user *value, 353sys_getxattr(const char __user *path, const char __user *name,
354 size_t size) 354 void __user *value, size_t size)
355{ 355{
356 struct nameidata nd; 356 struct nameidata nd;
357 ssize_t error; 357 ssize_t error;
@@ -365,7 +365,7 @@ sys_getxattr(char __user *path, char __user *name, void __user *value,
365} 365}
366 366
367asmlinkage ssize_t 367asmlinkage ssize_t
368sys_lgetxattr(char __user *path, char __user *name, void __user *value, 368sys_lgetxattr(const char __user *path, const char __user *name, void __user *value,
369 size_t size) 369 size_t size)
370{ 370{
371 struct nameidata nd; 371 struct nameidata nd;
@@ -380,7 +380,7 @@ sys_lgetxattr(char __user *path, char __user *name, void __user *value,
380} 380}
381 381
382asmlinkage ssize_t 382asmlinkage ssize_t
383sys_fgetxattr(int fd, char __user *name, void __user *value, size_t size) 383sys_fgetxattr(int fd, const char __user *name, void __user *value, size_t size)
384{ 384{
385 struct file *f; 385 struct file *f;
386 ssize_t error = -EBADF; 386 ssize_t error = -EBADF;
@@ -425,7 +425,7 @@ listxattr(struct dentry *d, char __user *list, size_t size)
425} 425}
426 426
427asmlinkage ssize_t 427asmlinkage ssize_t
428sys_listxattr(char __user *path, char __user *list, size_t size) 428sys_listxattr(const char __user *path, char __user *list, size_t size)
429{ 429{
430 struct nameidata nd; 430 struct nameidata nd;
431 ssize_t error; 431 ssize_t error;
@@ -439,7 +439,7 @@ sys_listxattr(char __user *path, char __user *list, size_t size)
439} 439}
440 440
441asmlinkage ssize_t 441asmlinkage ssize_t
442sys_llistxattr(char __user *path, char __user *list, size_t size) 442sys_llistxattr(const char __user *path, char __user *list, size_t size)
443{ 443{
444 struct nameidata nd; 444 struct nameidata nd;
445 ssize_t error; 445 ssize_t error;
@@ -471,7 +471,7 @@ sys_flistxattr(int fd, char __user *list, size_t size)
471 * Extended attribute REMOVE operations 471 * Extended attribute REMOVE operations
472 */ 472 */
473static long 473static long
474removexattr(struct dentry *d, char __user *name) 474removexattr(struct dentry *d, const char __user *name)
475{ 475{
476 int error; 476 int error;
477 char kname[XATTR_NAME_MAX + 1]; 477 char kname[XATTR_NAME_MAX + 1];
@@ -486,7 +486,7 @@ removexattr(struct dentry *d, char __user *name)
486} 486}
487 487
488asmlinkage long 488asmlinkage long
489sys_removexattr(char __user *path, char __user *name) 489sys_removexattr(const char __user *path, const char __user *name)
490{ 490{
491 struct nameidata nd; 491 struct nameidata nd;
492 int error; 492 int error;
@@ -504,7 +504,7 @@ sys_removexattr(char __user *path, char __user *name)
504} 504}
505 505
506asmlinkage long 506asmlinkage long
507sys_lremovexattr(char __user *path, char __user *name) 507sys_lremovexattr(const char __user *path, const char __user *name)
508{ 508{
509 struct nameidata nd; 509 struct nameidata nd;
510 int error; 510 int error;
@@ -522,7 +522,7 @@ sys_lremovexattr(char __user *path, char __user *name)
522} 522}
523 523
524asmlinkage long 524asmlinkage long
525sys_fremovexattr(int fd, char __user *name) 525sys_fremovexattr(int fd, const char __user *name)
526{ 526{
527 struct file *f; 527 struct file *f;
528 struct dentry *dentry; 528 struct dentry *dentry;
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 524021ff5436..3f53dd101f99 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -64,3 +64,16 @@ config XFS_RT
64 See the xfs man page in section 5 for additional information. 64 See the xfs man page in section 5 for additional information.
65 65
66 If unsure, say N. 66 If unsure, say N.
67
68config XFS_DEBUG
69 bool "XFS Debugging support (EXPERIMENTAL)"
70 depends on XFS_FS && EXPERIMENTAL
71 help
72 Say Y here to get an XFS build with many debugging features,
73 including ASSERT checks, function wrappers around macros,
74 and extra sanity-checking functions in various code paths.
75
76 Note that the resulting code will be HUGE and SLOW, and probably
77 not useful unless you are debugging a particular problem.
78
79 Say N unless you are an XFS developer, or you play one on TV.
diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/linux-2.6/mrlock.h
index c110bb002665..ff6a19873e5c 100644
--- a/fs/xfs/linux-2.6/mrlock.h
+++ b/fs/xfs/linux-2.6/mrlock.h
@@ -20,29 +20,24 @@
20 20
21#include <linux/rwsem.h> 21#include <linux/rwsem.h>
22 22
23enum { MR_NONE, MR_ACCESS, MR_UPDATE };
24
25typedef struct { 23typedef struct {
26 struct rw_semaphore mr_lock; 24 struct rw_semaphore mr_lock;
25#ifdef DEBUG
27 int mr_writer; 26 int mr_writer;
27#endif
28} mrlock_t; 28} mrlock_t;
29 29
30#ifdef DEBUG
30#define mrinit(mrp, name) \ 31#define mrinit(mrp, name) \
31 do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0) 32 do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0)
33#else
34#define mrinit(mrp, name) \
35 do { init_rwsem(&(mrp)->mr_lock); } while (0)
36#endif
37
32#define mrlock_init(mrp, t,n,s) mrinit(mrp, n) 38#define mrlock_init(mrp, t,n,s) mrinit(mrp, n)
33#define mrfree(mrp) do { } while (0) 39#define mrfree(mrp) do { } while (0)
34 40
35static inline void mraccess(mrlock_t *mrp)
36{
37 down_read(&mrp->mr_lock);
38}
39
40static inline void mrupdate(mrlock_t *mrp)
41{
42 down_write(&mrp->mr_lock);
43 mrp->mr_writer = 1;
44}
45
46static inline void mraccess_nested(mrlock_t *mrp, int subclass) 41static inline void mraccess_nested(mrlock_t *mrp, int subclass)
47{ 42{
48 down_read_nested(&mrp->mr_lock, subclass); 43 down_read_nested(&mrp->mr_lock, subclass);
@@ -51,10 +46,11 @@ static inline void mraccess_nested(mrlock_t *mrp, int subclass)
51static inline void mrupdate_nested(mrlock_t *mrp, int subclass) 46static inline void mrupdate_nested(mrlock_t *mrp, int subclass)
52{ 47{
53 down_write_nested(&mrp->mr_lock, subclass); 48 down_write_nested(&mrp->mr_lock, subclass);
49#ifdef DEBUG
54 mrp->mr_writer = 1; 50 mrp->mr_writer = 1;
51#endif
55} 52}
56 53
57
58static inline int mrtryaccess(mrlock_t *mrp) 54static inline int mrtryaccess(mrlock_t *mrp)
59{ 55{
60 return down_read_trylock(&mrp->mr_lock); 56 return down_read_trylock(&mrp->mr_lock);
@@ -64,39 +60,31 @@ static inline int mrtryupdate(mrlock_t *mrp)
64{ 60{
65 if (!down_write_trylock(&mrp->mr_lock)) 61 if (!down_write_trylock(&mrp->mr_lock))
66 return 0; 62 return 0;
63#ifdef DEBUG
67 mrp->mr_writer = 1; 64 mrp->mr_writer = 1;
65#endif
68 return 1; 66 return 1;
69} 67}
70 68
71static inline void mrunlock(mrlock_t *mrp) 69static inline void mrunlock_excl(mrlock_t *mrp)
72{ 70{
73 if (mrp->mr_writer) { 71#ifdef DEBUG
74 mrp->mr_writer = 0; 72 mrp->mr_writer = 0;
75 up_write(&mrp->mr_lock); 73#endif
76 } else { 74 up_write(&mrp->mr_lock);
77 up_read(&mrp->mr_lock);
78 }
79} 75}
80 76
81static inline void mrdemote(mrlock_t *mrp) 77static inline void mrunlock_shared(mrlock_t *mrp)
82{ 78{
83 mrp->mr_writer = 0; 79 up_read(&mrp->mr_lock);
84 downgrade_write(&mrp->mr_lock);
85} 80}
86 81
87#ifdef DEBUG 82static inline void mrdemote(mrlock_t *mrp)
88/*
89 * Debug-only routine, without some platform-specific asm code, we can
90 * now only answer requests regarding whether we hold the lock for write
91 * (reader state is outside our visibility, we only track writer state).
92 * Note: means !ismrlocked would give false positives, so don't do that.
93 */
94static inline int ismrlocked(mrlock_t *mrp, int type)
95{ 83{
96 if (mrp && type == MR_UPDATE) 84#ifdef DEBUG
97 return mrp->mr_writer; 85 mrp->mr_writer = 0;
98 return 1;
99}
100#endif 86#endif
87 downgrade_write(&mrp->mr_lock);
88}
101 89
102#endif /* __XFS_SUPPORT_MRLOCK_H__ */ 90#endif /* __XFS_SUPPORT_MRLOCK_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 52f6846101d5..5105015a75ad 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -886,7 +886,7 @@ int
886xfs_buf_lock_value( 886xfs_buf_lock_value(
887 xfs_buf_t *bp) 887 xfs_buf_t *bp)
888{ 888{
889 return atomic_read(&bp->b_sema.count); 889 return bp->b_sema.count;
890} 890}
891#endif 891#endif
892 892
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 265f0168ab76..c672b3238b14 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -133,7 +133,7 @@ xfs_nfs_get_inode(
133 if (!ip) 133 if (!ip)
134 return ERR_PTR(-EIO); 134 return ERR_PTR(-EIO);
135 135
136 if (!ip->i_d.di_mode || ip->i_d.di_gen != generation) { 136 if (ip->i_d.di_gen != generation) {
137 xfs_iput_new(ip, XFS_ILOCK_SHARED); 137 xfs_iput_new(ip, XFS_ILOCK_SHARED);
138 return ERR_PTR(-ENOENT); 138 return ERR_PTR(-ENOENT);
139 } 139 }
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 05905246434d..65e78c13d4ae 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -43,9 +43,6 @@
43#include <linux/smp_lock.h> 43#include <linux/smp_lock.h>
44 44
45static struct vm_operations_struct xfs_file_vm_ops; 45static struct vm_operations_struct xfs_file_vm_ops;
46#ifdef CONFIG_XFS_DMAPI
47static struct vm_operations_struct xfs_dmapi_file_vm_ops;
48#endif
49 46
50STATIC_INLINE ssize_t 47STATIC_INLINE ssize_t
51__xfs_file_read( 48__xfs_file_read(
@@ -202,22 +199,6 @@ xfs_file_fsync(
202 (xfs_off_t)0, (xfs_off_t)-1); 199 (xfs_off_t)0, (xfs_off_t)-1);
203} 200}
204 201
205#ifdef CONFIG_XFS_DMAPI
206STATIC int
207xfs_vm_fault(
208 struct vm_area_struct *vma,
209 struct vm_fault *vmf)
210{
211 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
212 bhv_vnode_t *vp = vn_from_inode(inode);
213
214 ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI);
215 if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), vma, 0))
216 return VM_FAULT_SIGBUS;
217 return filemap_fault(vma, vmf);
218}
219#endif /* CONFIG_XFS_DMAPI */
220
221/* 202/*
222 * Unfortunately we can't just use the clean and simple readdir implementation 203 * Unfortunately we can't just use the clean and simple readdir implementation
223 * below, because nfs might call back into ->lookup from the filldir callback 204 * below, because nfs might call back into ->lookup from the filldir callback
@@ -386,11 +367,6 @@ xfs_file_mmap(
386 vma->vm_ops = &xfs_file_vm_ops; 367 vma->vm_ops = &xfs_file_vm_ops;
387 vma->vm_flags |= VM_CAN_NONLINEAR; 368 vma->vm_flags |= VM_CAN_NONLINEAR;
388 369
389#ifdef CONFIG_XFS_DMAPI
390 if (XFS_M(filp->f_path.dentry->d_inode->i_sb)->m_flags & XFS_MOUNT_DMAPI)
391 vma->vm_ops = &xfs_dmapi_file_vm_ops;
392#endif /* CONFIG_XFS_DMAPI */
393
394 file_accessed(filp); 370 file_accessed(filp);
395 return 0; 371 return 0;
396} 372}
@@ -437,47 +413,6 @@ xfs_file_ioctl_invis(
437 return error; 413 return error;
438} 414}
439 415
440#ifdef CONFIG_XFS_DMAPI
441#ifdef HAVE_VMOP_MPROTECT
442STATIC int
443xfs_vm_mprotect(
444 struct vm_area_struct *vma,
445 unsigned int newflags)
446{
447 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
448 struct xfs_mount *mp = XFS_M(inode->i_sb);
449 int error = 0;
450
451 if (mp->m_flags & XFS_MOUNT_DMAPI) {
452 if ((vma->vm_flags & VM_MAYSHARE) &&
453 (newflags & VM_WRITE) && !(vma->vm_flags & VM_WRITE))
454 error = XFS_SEND_MMAP(mp, vma, VM_WRITE);
455 }
456 return error;
457}
458#endif /* HAVE_VMOP_MPROTECT */
459#endif /* CONFIG_XFS_DMAPI */
460
461#ifdef HAVE_FOP_OPEN_EXEC
462/* If the user is attempting to execute a file that is offline then
463 * we have to trigger a DMAPI READ event before the file is marked as busy
464 * otherwise the invisible I/O will not be able to write to the file to bring
465 * it back online.
466 */
467STATIC int
468xfs_file_open_exec(
469 struct inode *inode)
470{
471 struct xfs_mount *mp = XFS_M(inode->i_sb);
472 struct xfs_inode *ip = XFS_I(inode);
473
474 if (unlikely(mp->m_flags & XFS_MOUNT_DMAPI) &&
475 DM_EVENT_ENABLED(ip, DM_EVENT_READ))
476 return -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL);
477 return 0;
478}
479#endif /* HAVE_FOP_OPEN_EXEC */
480
481/* 416/*
482 * mmap()d file has taken write protection fault and is being made 417 * mmap()d file has taken write protection fault and is being made
483 * writable. We can set the page state up correctly for a writable 418 * writable. We can set the page state up correctly for a writable
@@ -546,13 +481,3 @@ static struct vm_operations_struct xfs_file_vm_ops = {
546 .fault = filemap_fault, 481 .fault = filemap_fault,
547 .page_mkwrite = xfs_vm_page_mkwrite, 482 .page_mkwrite = xfs_vm_page_mkwrite,
548}; 483};
549
550#ifdef CONFIG_XFS_DMAPI
551static struct vm_operations_struct xfs_dmapi_file_vm_ops = {
552 .fault = xfs_vm_fault,
553 .page_mkwrite = xfs_vm_page_mkwrite,
554#ifdef HAVE_VMOP_MPROTECT
555 .mprotect = xfs_vm_mprotect,
556#endif
557};
558#endif /* CONFIG_XFS_DMAPI */
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 4ddb86b73c6b..a42ba9d71156 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -238,7 +238,7 @@ xfs_vget_fsop_handlereq(
238 return error; 238 return error;
239 if (ip == NULL) 239 if (ip == NULL)
240 return XFS_ERROR(EIO); 240 return XFS_ERROR(EIO);
241 if (ip->i_d.di_mode == 0 || ip->i_d.di_gen != igen) { 241 if (ip->i_d.di_gen != igen) {
242 xfs_iput_new(ip, XFS_ILOCK_SHARED); 242 xfs_iput_new(ip, XFS_ILOCK_SHARED);
243 return XFS_ERROR(ENOENT); 243 return XFS_ERROR(ENOENT);
244 } 244 }
@@ -505,14 +505,14 @@ xfs_attrmulti_attr_get(
505{ 505{
506 char *kbuf; 506 char *kbuf;
507 int error = EFAULT; 507 int error = EFAULT;
508 508
509 if (*len > XATTR_SIZE_MAX) 509 if (*len > XATTR_SIZE_MAX)
510 return EINVAL; 510 return EINVAL;
511 kbuf = kmalloc(*len, GFP_KERNEL); 511 kbuf = kmalloc(*len, GFP_KERNEL);
512 if (!kbuf) 512 if (!kbuf)
513 return ENOMEM; 513 return ENOMEM;
514 514
515 error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags, NULL); 515 error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags);
516 if (error) 516 if (error)
517 goto out_kfree; 517 goto out_kfree;
518 518
@@ -546,7 +546,7 @@ xfs_attrmulti_attr_set(
546 546
547 if (copy_from_user(kbuf, ubuf, len)) 547 if (copy_from_user(kbuf, ubuf, len))
548 goto out_kfree; 548 goto out_kfree;
549 549
550 error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); 550 error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
551 551
552 out_kfree: 552 out_kfree:
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index a1237dad6430..2bf287ef5489 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -511,7 +511,8 @@ xfs_vn_rename(
511 xfs_dentry_to_name(&nname, ndentry); 511 xfs_dentry_to_name(&nname, ndentry);
512 512
513 error = xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode), 513 error = xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
514 XFS_I(ndir), &nname); 514 XFS_I(ndir), &nname, new_inode ?
515 XFS_I(new_inode) : NULL);
515 if (likely(!error)) { 516 if (likely(!error)) {
516 if (new_inode) 517 if (new_inode)
517 xfs_validate_fields(new_inode); 518 xfs_validate_fields(new_inode);
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index e5143323e71f..4edc46915b57 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -75,6 +75,7 @@
75#include <linux/delay.h> 75#include <linux/delay.h>
76#include <linux/log2.h> 76#include <linux/log2.h>
77#include <linux/spinlock.h> 77#include <linux/spinlock.h>
78#include <linux/random.h>
78 79
79#include <asm/page.h> 80#include <asm/page.h>
80#include <asm/div64.h> 81#include <asm/div64.h>
@@ -99,7 +100,6 @@
99/* 100/*
100 * Feature macros (disable/enable) 101 * Feature macros (disable/enable)
101 */ 102 */
102#define HAVE_SPLICE /* a splice(2) exists in 2.6, but not in 2.4 */
103#ifdef CONFIG_SMP 103#ifdef CONFIG_SMP
104#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ 104#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
105#else 105#else
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 1ebd8004469c..5e3b57516ec7 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -394,7 +394,7 @@ xfs_zero_last_block(
394 int error = 0; 394 int error = 0;
395 xfs_bmbt_irec_t imap; 395 xfs_bmbt_irec_t imap;
396 396
397 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0); 397 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
398 398
399 zero_offset = XFS_B_FSB_OFFSET(mp, isize); 399 zero_offset = XFS_B_FSB_OFFSET(mp, isize);
400 if (zero_offset == 0) { 400 if (zero_offset == 0) {
@@ -425,14 +425,14 @@ xfs_zero_last_block(
425 * out sync. We need to drop the ilock while we do this so we 425 * out sync. We need to drop the ilock while we do this so we
426 * don't deadlock when the buffer cache calls back to us. 426 * don't deadlock when the buffer cache calls back to us.
427 */ 427 */
428 xfs_iunlock(ip, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD); 428 xfs_iunlock(ip, XFS_ILOCK_EXCL);
429 429
430 zero_len = mp->m_sb.sb_blocksize - zero_offset; 430 zero_len = mp->m_sb.sb_blocksize - zero_offset;
431 if (isize + zero_len > offset) 431 if (isize + zero_len > offset)
432 zero_len = offset - isize; 432 zero_len = offset - isize;
433 error = xfs_iozero(ip, isize, zero_len); 433 error = xfs_iozero(ip, isize, zero_len);
434 434
435 xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); 435 xfs_ilock(ip, XFS_ILOCK_EXCL);
436 ASSERT(error >= 0); 436 ASSERT(error >= 0);
437 return error; 437 return error;
438} 438}
@@ -465,8 +465,7 @@ xfs_zero_eof(
465 int error = 0; 465 int error = 0;
466 xfs_bmbt_irec_t imap; 466 xfs_bmbt_irec_t imap;
467 467
468 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 468 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
469 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
470 ASSERT(offset > isize); 469 ASSERT(offset > isize);
471 470
472 /* 471 /*
@@ -475,8 +474,7 @@ xfs_zero_eof(
475 */ 474 */
476 error = xfs_zero_last_block(ip, offset, isize); 475 error = xfs_zero_last_block(ip, offset, isize);
477 if (error) { 476 if (error) {
478 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 477 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
479 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
480 return error; 478 return error;
481 } 479 }
482 480
@@ -507,8 +505,7 @@ xfs_zero_eof(
507 error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb, 505 error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
508 0, NULL, 0, &imap, &nimaps, NULL, NULL); 506 0, NULL, 0, &imap, &nimaps, NULL, NULL);
509 if (error) { 507 if (error) {
510 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 508 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
511 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
512 return error; 509 return error;
513 } 510 }
514 ASSERT(nimaps > 0); 511 ASSERT(nimaps > 0);
@@ -532,7 +529,7 @@ xfs_zero_eof(
532 * Drop the inode lock while we're doing the I/O. 529 * Drop the inode lock while we're doing the I/O.
533 * We'll still have the iolock to protect us. 530 * We'll still have the iolock to protect us.
534 */ 531 */
535 xfs_iunlock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); 532 xfs_iunlock(ip, XFS_ILOCK_EXCL);
536 533
537 zero_off = XFS_FSB_TO_B(mp, start_zero_fsb); 534 zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
538 zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount); 535 zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
@@ -548,13 +545,13 @@ xfs_zero_eof(
548 start_zero_fsb = imap.br_startoff + imap.br_blockcount; 545 start_zero_fsb = imap.br_startoff + imap.br_blockcount;
549 ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); 546 ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
550 547
551 xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); 548 xfs_ilock(ip, XFS_ILOCK_EXCL);
552 } 549 }
553 550
554 return 0; 551 return 0;
555 552
556out_lock: 553out_lock:
557 xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); 554 xfs_ilock(ip, XFS_ILOCK_EXCL);
558 ASSERT(error >= 0); 555 ASSERT(error >= 0);
559 return error; 556 return error;
560} 557}
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index e1d498b4ba7a..e6be37dbd0e9 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -50,7 +50,6 @@ struct xfs_iomap;
50#define XFS_INVAL_CACHED 18 50#define XFS_INVAL_CACHED 18
51#define XFS_DIORD_ENTER 19 51#define XFS_DIORD_ENTER 19
52#define XFS_DIOWR_ENTER 20 52#define XFS_DIOWR_ENTER 20
53#define XFS_SENDFILE_ENTER 21
54#define XFS_WRITEPAGE_ENTER 22 53#define XFS_WRITEPAGE_ENTER 22
55#define XFS_RELEASEPAGE_ENTER 23 54#define XFS_RELEASEPAGE_ENTER 23
56#define XFS_INVALIDPAGE_ENTER 24 55#define XFS_INVALIDPAGE_ENTER 24
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 865eb708aa95..742b2c7852c1 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1181,7 +1181,7 @@ xfs_fs_statfs(
1181 statp->f_fsid.val[0] = (u32)id; 1181 statp->f_fsid.val[0] = (u32)id;
1182 statp->f_fsid.val[1] = (u32)(id >> 32); 1182 statp->f_fsid.val[1] = (u32)(id >> 32);
1183 1183
1184 xfs_icsb_sync_counters_flags(mp, XFS_ICSB_LAZY_COUNT); 1184 xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
1185 1185
1186 spin_lock(&mp->m_sb_lock); 1186 spin_lock(&mp->m_sb_lock);
1187 statp->f_bsize = sbp->sb_blocksize; 1187 statp->f_bsize = sbp->sb_blocksize;
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 8b4d63ce8694..9d73cb5c0fc7 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -25,12 +25,6 @@ struct attrlist_cursor_kern;
25 25
26typedef struct inode bhv_vnode_t; 26typedef struct inode bhv_vnode_t;
27 27
28#define VN_ISLNK(vp) S_ISLNK((vp)->i_mode)
29#define VN_ISREG(vp) S_ISREG((vp)->i_mode)
30#define VN_ISDIR(vp) S_ISDIR((vp)->i_mode)
31#define VN_ISCHR(vp) S_ISCHR((vp)->i_mode)
32#define VN_ISBLK(vp) S_ISBLK((vp)->i_mode)
33
34/* 28/*
35 * Vnode to Linux inode mapping. 29 * Vnode to Linux inode mapping.
36 */ 30 */
@@ -151,24 +145,6 @@ typedef struct bhv_vattr {
151 XFS_AT_TYPE|XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|\ 145 XFS_AT_TYPE|XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|\
152 XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_GENCOUNT) 146 XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_GENCOUNT)
153 147
154/*
155 * Modes.
156 */
157#define VSUID S_ISUID /* set user id on execution */
158#define VSGID S_ISGID /* set group id on execution */
159#define VSVTX S_ISVTX /* save swapped text even after use */
160#define VREAD S_IRUSR /* read, write, execute permissions */
161#define VWRITE S_IWUSR
162#define VEXEC S_IXUSR
163
164#define MODEMASK S_IALLUGO /* mode bits plus permission bits */
165
166/*
167 * Check whether mandatory file locking is enabled.
168 */
169#define MANDLOCK(vp, mode) \
170 (VN_ISREG(vp) && ((mode) & (VSGID|(VEXEC>>3))) == VSGID)
171
172extern void vn_init(void); 148extern void vn_init(void);
173extern int vn_revalidate(bhv_vnode_t *); 149extern int vn_revalidate(bhv_vnode_t *);
174 150
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 631ebb31b295..85df3288efd5 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -933,7 +933,7 @@ xfs_qm_dqget(
933 type == XFS_DQ_PROJ || 933 type == XFS_DQ_PROJ ||
934 type == XFS_DQ_GROUP); 934 type == XFS_DQ_GROUP);
935 if (ip) { 935 if (ip) {
936 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 936 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
937 if (type == XFS_DQ_USER) 937 if (type == XFS_DQ_USER)
938 ASSERT(ip->i_udquot == NULL); 938 ASSERT(ip->i_udquot == NULL);
939 else 939 else
@@ -1088,7 +1088,7 @@ xfs_qm_dqget(
1088 xfs_qm_mplist_unlock(mp); 1088 xfs_qm_mplist_unlock(mp);
1089 XFS_DQ_HASH_UNLOCK(h); 1089 XFS_DQ_HASH_UNLOCK(h);
1090 dqret: 1090 dqret:
1091 ASSERT((ip == NULL) || XFS_ISLOCKED_INODE_EXCL(ip)); 1091 ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
1092 xfs_dqtrace_entry(dqp, "DQGET DONE"); 1092 xfs_dqtrace_entry(dqp, "DQGET DONE");
1093 *O_dqpp = dqp; 1093 *O_dqpp = dqp;
1094 return (0); 1094 return (0);
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 40ea56409561..d31cce1165c5 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -670,7 +670,7 @@ xfs_qm_dqattach_one(
670 xfs_dquot_t *dqp; 670 xfs_dquot_t *dqp;
671 int error; 671 int error;
672 672
673 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 673 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
674 error = 0; 674 error = 0;
675 /* 675 /*
676 * See if we already have it in the inode itself. IO_idqpp is 676 * See if we already have it in the inode itself. IO_idqpp is
@@ -874,7 +874,7 @@ xfs_qm_dqattach(
874 return 0; 874 return 0;
875 875
876 ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 || 876 ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 ||
877 XFS_ISLOCKED_INODE_EXCL(ip)); 877 xfs_isilocked(ip, XFS_ILOCK_EXCL));
878 878
879 if (! (flags & XFS_QMOPT_ILOCKED)) 879 if (! (flags & XFS_QMOPT_ILOCKED))
880 xfs_ilock(ip, XFS_ILOCK_EXCL); 880 xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -888,7 +888,8 @@ xfs_qm_dqattach(
888 goto done; 888 goto done;
889 nquotas++; 889 nquotas++;
890 } 890 }
891 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 891
892 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
892 if (XFS_IS_OQUOTA_ON(mp)) { 893 if (XFS_IS_OQUOTA_ON(mp)) {
893 error = XFS_IS_GQUOTA_ON(mp) ? 894 error = XFS_IS_GQUOTA_ON(mp) ?
894 xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, 895 xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
@@ -913,7 +914,7 @@ xfs_qm_dqattach(
913 * This WON'T, in general, result in a thrash. 914 * This WON'T, in general, result in a thrash.
914 */ 915 */
915 if (nquotas == 2) { 916 if (nquotas == 2) {
916 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 917 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
917 ASSERT(ip->i_udquot); 918 ASSERT(ip->i_udquot);
918 ASSERT(ip->i_gdquot); 919 ASSERT(ip->i_gdquot);
919 920
@@ -956,7 +957,7 @@ xfs_qm_dqattach(
956 957
957#ifdef QUOTADEBUG 958#ifdef QUOTADEBUG
958 else 959 else
959 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 960 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
960#endif 961#endif
961 return error; 962 return error;
962} 963}
@@ -1291,7 +1292,7 @@ xfs_qm_dqget_noattach(
1291 xfs_mount_t *mp; 1292 xfs_mount_t *mp;
1292 xfs_dquot_t *udqp, *gdqp; 1293 xfs_dquot_t *udqp, *gdqp;
1293 1294
1294 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 1295 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1295 mp = ip->i_mount; 1296 mp = ip->i_mount;
1296 udqp = NULL; 1297 udqp = NULL;
1297 gdqp = NULL; 1298 gdqp = NULL;
@@ -1392,7 +1393,7 @@ xfs_qm_qino_alloc(
1392 * Keep an extra reference to this quota inode. This inode is 1393 * Keep an extra reference to this quota inode. This inode is
1393 * locked exclusively and joined to the transaction already. 1394 * locked exclusively and joined to the transaction already.
1394 */ 1395 */
1395 ASSERT(XFS_ISLOCKED_INODE_EXCL(*ip)); 1396 ASSERT(xfs_isilocked(*ip, XFS_ILOCK_EXCL));
1396 VN_HOLD(XFS_ITOV((*ip))); 1397 VN_HOLD(XFS_ITOV((*ip)));
1397 1398
1398 /* 1399 /*
@@ -1737,12 +1738,6 @@ xfs_qm_dqusage_adjust(
1737 return error; 1738 return error;
1738 } 1739 }
1739 1740
1740 if (ip->i_d.di_mode == 0) {
1741 xfs_iput_new(ip, XFS_ILOCK_EXCL);
1742 *res = BULKSTAT_RV_NOTHING;
1743 return XFS_ERROR(ENOENT);
1744 }
1745
1746 /* 1741 /*
1747 * Obtain the locked dquots. In case of an error (eg. allocation 1742 * Obtain the locked dquots. In case of an error (eg. allocation
1748 * fails for ENOSPC), we return the negative of the error number 1743 * fails for ENOSPC), we return the negative of the error number
@@ -2563,7 +2558,7 @@ xfs_qm_vop_chown(
2563 uint bfield = XFS_IS_REALTIME_INODE(ip) ? 2558 uint bfield = XFS_IS_REALTIME_INODE(ip) ?
2564 XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT; 2559 XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
2565 2560
2566 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 2561 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2567 ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount)); 2562 ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
2568 2563
2569 /* old dquot */ 2564 /* old dquot */
@@ -2607,7 +2602,7 @@ xfs_qm_vop_chown_reserve(
2607 uint delblks, blkflags, prjflags = 0; 2602 uint delblks, blkflags, prjflags = 0;
2608 xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq; 2603 xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq;
2609 2604
2610 ASSERT(XFS_ISLOCKED_INODE(ip)); 2605 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2611 mp = ip->i_mount; 2606 mp = ip->i_mount;
2612 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 2607 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
2613 2608
@@ -2717,7 +2712,7 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
2717 if (!XFS_IS_QUOTA_ON(tp->t_mountp)) 2712 if (!XFS_IS_QUOTA_ON(tp->t_mountp))
2718 return; 2713 return;
2719 2714
2720 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 2715 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2721 ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp)); 2716 ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
2722 2717
2723 if (udqp) { 2718 if (udqp) {
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 8342823dbdc3..768a3b27d2b6 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -1366,12 +1366,6 @@ xfs_qm_internalqcheck_adjust(
1366 return (error); 1366 return (error);
1367 } 1367 }
1368 1368
1369 if (ip->i_d.di_mode == 0) {
1370 xfs_iput_new(ip, lock_flags);
1371 *res = BULKSTAT_RV_NOTHING;
1372 return XFS_ERROR(ENOENT);
1373 }
1374
1375 /* 1369 /*
1376 * This inode can have blocks after eof which can get released 1370 * This inode can have blocks after eof which can get released
1377 * when we send it to inactive. Since we don't check the dquot 1371 * when we send it to inactive. Since we don't check the dquot
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
index a8b85e2be9d5..5e4a40b1c565 100644
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ b/fs/xfs/quota/xfs_quota_priv.h
@@ -27,11 +27,6 @@
27/* Number of dquots that fit in to a dquot block */ 27/* Number of dquots that fit in to a dquot block */
28#define XFS_QM_DQPERBLK(mp) ((mp)->m_quotainfo->qi_dqperchunk) 28#define XFS_QM_DQPERBLK(mp) ((mp)->m_quotainfo->qi_dqperchunk)
29 29
30#define XFS_ISLOCKED_INODE(ip) (ismrlocked(&(ip)->i_lock, \
31 MR_UPDATE | MR_ACCESS) != 0)
32#define XFS_ISLOCKED_INODE_EXCL(ip) (ismrlocked(&(ip)->i_lock, \
33 MR_UPDATE) != 0)
34
35#define XFS_DQ_IS_ADDEDTO_TRX(t, d) ((d)->q_transp == (t)) 30#define XFS_DQ_IS_ADDEDTO_TRX(t, d) ((d)->q_transp == (t))
36 31
37#define XFS_QI_MPLRECLAIMS(mp) ((mp)->m_quotainfo->qi_dqreclaims) 32#define XFS_QI_MPLRECLAIMS(mp) ((mp)->m_quotainfo->qi_dqreclaims)
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index f441f836ca8b..99611381e740 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -834,7 +834,7 @@ xfs_trans_reserve_quota_nblks(
834 ASSERT(ip->i_ino != mp->m_sb.sb_uquotino); 834 ASSERT(ip->i_ino != mp->m_sb.sb_uquotino);
835 ASSERT(ip->i_ino != mp->m_sb.sb_gquotino); 835 ASSERT(ip->i_ino != mp->m_sb.sb_gquotino);
836 836
837 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 837 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
838 ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount)); 838 ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
839 ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) == 839 ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
840 XFS_TRANS_DQ_RES_RTBLKS || 840 XFS_TRANS_DQ_RES_RTBLKS ||
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h
index 855da0408647..75845f950814 100644
--- a/fs/xfs/support/debug.h
+++ b/fs/xfs/support/debug.h
@@ -49,8 +49,6 @@ extern void assfail(char *expr, char *f, int l);
49 49
50#else /* DEBUG */ 50#else /* DEBUG */
51 51
52#include <linux/random.h>
53
54#define ASSERT(expr) \ 52#define ASSERT(expr) \
55 (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) 53 (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
56 54
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index 765aaf65e2d3..540e4c989825 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -22,7 +22,7 @@
22#define STATIC 22#define STATIC
23#define DEBUG 1 23#define DEBUG 1
24#define XFS_BUF_LOCK_TRACKING 1 24#define XFS_BUF_LOCK_TRACKING 1
25#define QUOTADEBUG 1 25/* #define QUOTADEBUG 1 */
26#endif 26#endif
27 27
28#ifdef CONFIG_XFS_TRACE 28#ifdef CONFIG_XFS_TRACE
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 8e130b9720ae..ebee3a4f703a 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -72,7 +72,7 @@ xfs_acl_vhasacl_default(
72{ 72{
73 int error; 73 int error;
74 74
75 if (!VN_ISDIR(vp)) 75 if (!S_ISDIR(vp->i_mode))
76 return 0; 76 return 0;
77 xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error); 77 xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error);
78 return (error == 0); 78 return (error == 0);
@@ -238,15 +238,8 @@ xfs_acl_vget(
238 error = EINVAL; 238 error = EINVAL;
239 goto out; 239 goto out;
240 } 240 }
241 if (kind == _ACL_TYPE_ACCESS) { 241 if (kind == _ACL_TYPE_ACCESS)
242 bhv_vattr_t va; 242 xfs_acl_sync_mode(xfs_vtoi(vp)->i_d.di_mode, xfs_acl);
243
244 va.va_mask = XFS_AT_MODE;
245 error = xfs_getattr(xfs_vtoi(vp), &va, 0);
246 if (error)
247 goto out;
248 xfs_acl_sync_mode(va.va_mode, xfs_acl);
249 }
250 error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size); 243 error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size);
251 } 244 }
252out: 245out:
@@ -341,14 +334,15 @@ xfs_acl_iaccess(
341{ 334{
342 xfs_acl_t *acl; 335 xfs_acl_t *acl;
343 int rval; 336 int rval;
337 struct xfs_name acl_name = {SGI_ACL_FILE, SGI_ACL_FILE_SIZE};
344 338
345 if (!(_ACL_ALLOC(acl))) 339 if (!(_ACL_ALLOC(acl)))
346 return -1; 340 return -1;
347 341
348 /* If the file has no ACL return -1. */ 342 /* If the file has no ACL return -1. */
349 rval = sizeof(xfs_acl_t); 343 rval = sizeof(xfs_acl_t);
350 if (xfs_attr_fetch(ip, SGI_ACL_FILE, SGI_ACL_FILE_SIZE, 344 if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval,
351 (char *)acl, &rval, ATTR_ROOT | ATTR_KERNACCESS, cr)) { 345 ATTR_ROOT | ATTR_KERNACCESS)) {
352 _ACL_FREE(acl); 346 _ACL_FREE(acl);
353 return -1; 347 return -1;
354 } 348 }
@@ -373,23 +367,15 @@ xfs_acl_allow_set(
373 bhv_vnode_t *vp, 367 bhv_vnode_t *vp,
374 int kind) 368 int kind)
375{ 369{
376 xfs_inode_t *ip = xfs_vtoi(vp);
377 bhv_vattr_t va;
378 int error;
379
380 if (vp->i_flags & (S_IMMUTABLE|S_APPEND)) 370 if (vp->i_flags & (S_IMMUTABLE|S_APPEND))
381 return EPERM; 371 return EPERM;
382 if (kind == _ACL_TYPE_DEFAULT && !VN_ISDIR(vp)) 372 if (kind == _ACL_TYPE_DEFAULT && !S_ISDIR(vp->i_mode))
383 return ENOTDIR; 373 return ENOTDIR;
384 if (vp->i_sb->s_flags & MS_RDONLY) 374 if (vp->i_sb->s_flags & MS_RDONLY)
385 return EROFS; 375 return EROFS;
386 va.va_mask = XFS_AT_UID; 376 if (xfs_vtoi(vp)->i_d.di_uid != current->fsuid && !capable(CAP_FOWNER))
387 error = xfs_getattr(ip, &va, 0);
388 if (error)
389 return error;
390 if (va.va_uid != current->fsuid && !capable(CAP_FOWNER))
391 return EPERM; 377 return EPERM;
392 return error; 378 return 0;
393} 379}
394 380
395/* 381/*
@@ -594,7 +580,7 @@ xfs_acl_get_attr(
594 *error = xfs_attr_get(xfs_vtoi(vp), 580 *error = xfs_attr_get(xfs_vtoi(vp),
595 kind == _ACL_TYPE_ACCESS ? 581 kind == _ACL_TYPE_ACCESS ?
596 SGI_ACL_FILE : SGI_ACL_DEFAULT, 582 SGI_ACL_FILE : SGI_ACL_DEFAULT,
597 (char *)aclp, &len, flags, sys_cred); 583 (char *)aclp, &len, flags);
598 if (*error || (flags & ATTR_KERNOVAL)) 584 if (*error || (flags & ATTR_KERNOVAL))
599 return; 585 return;
600 xfs_acl_get_endian(aclp); 586 xfs_acl_get_endian(aclp);
@@ -643,7 +629,6 @@ xfs_acl_vtoacl(
643 xfs_acl_t *access_acl, 629 xfs_acl_t *access_acl,
644 xfs_acl_t *default_acl) 630 xfs_acl_t *default_acl)
645{ 631{
646 bhv_vattr_t va;
647 int error = 0; 632 int error = 0;
648 633
649 if (access_acl) { 634 if (access_acl) {
@@ -652,16 +637,10 @@ xfs_acl_vtoacl(
652 * be obtained for some reason, invalidate the access ACL. 637 * be obtained for some reason, invalidate the access ACL.
653 */ 638 */
654 xfs_acl_get_attr(vp, access_acl, _ACL_TYPE_ACCESS, 0, &error); 639 xfs_acl_get_attr(vp, access_acl, _ACL_TYPE_ACCESS, 0, &error);
655 if (!error) {
656 /* Got the ACL, need the mode... */
657 va.va_mask = XFS_AT_MODE;
658 error = xfs_getattr(xfs_vtoi(vp), &va, 0);
659 }
660
661 if (error) 640 if (error)
662 access_acl->acl_cnt = XFS_ACL_NOT_PRESENT; 641 access_acl->acl_cnt = XFS_ACL_NOT_PRESENT;
663 else /* We have a good ACL and the file mode, synchronize. */ 642 else /* We have a good ACL and the file mode, synchronize. */
664 xfs_acl_sync_mode(va.va_mode, access_acl); 643 xfs_acl_sync_mode(xfs_vtoi(vp)->i_d.di_mode, access_acl);
665 } 644 }
666 645
667 if (default_acl) { 646 if (default_acl) {
@@ -719,7 +698,7 @@ xfs_acl_inherit(
719 * If the new file is a directory, its default ACL is a copy of 698 * If the new file is a directory, its default ACL is a copy of
720 * the containing directory's default ACL. 699 * the containing directory's default ACL.
721 */ 700 */
722 if (VN_ISDIR(vp)) 701 if (S_ISDIR(vp->i_mode))
723 xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error); 702 xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error);
724 if (!error && !basicperms) 703 if (!error && !basicperms)
725 xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error); 704 xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error);
@@ -744,7 +723,7 @@ xfs_acl_setmode(
744 bhv_vattr_t va; 723 bhv_vattr_t va;
745 xfs_acl_entry_t *ap; 724 xfs_acl_entry_t *ap;
746 xfs_acl_entry_t *gap = NULL; 725 xfs_acl_entry_t *gap = NULL;
747 int i, error, nomask = 1; 726 int i, nomask = 1;
748 727
749 *basicperms = 1; 728 *basicperms = 1;
750 729
@@ -756,11 +735,7 @@ xfs_acl_setmode(
756 * mode. The m:: bits take precedence over the g:: bits. 735 * mode. The m:: bits take precedence over the g:: bits.
757 */ 736 */
758 va.va_mask = XFS_AT_MODE; 737 va.va_mask = XFS_AT_MODE;
759 error = xfs_getattr(xfs_vtoi(vp), &va, 0); 738 va.va_mode = xfs_vtoi(vp)->i_d.di_mode;
760 if (error)
761 return error;
762
763 va.va_mask = XFS_AT_MODE;
764 va.va_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO); 739 va.va_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO);
765 ap = acl->acl_entry; 740 ap = acl->acl_entry;
766 for (i = 0; i < acl->acl_cnt; ++i) { 741 for (i = 0; i < acl->acl_cnt; ++i) {
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 36d781ee5fcc..df151a859186 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -101,14 +101,28 @@ STATIC int xfs_attr_rmtval_remove(xfs_da_args_t *args);
101ktrace_t *xfs_attr_trace_buf; 101ktrace_t *xfs_attr_trace_buf;
102#endif 102#endif
103 103
104STATIC int
105xfs_attr_name_to_xname(
106 struct xfs_name *xname,
107 const char *aname)
108{
109 if (!aname)
110 return EINVAL;
111 xname->name = aname;
112 xname->len = strlen(aname);
113 if (xname->len >= MAXNAMELEN)
114 return EFAULT; /* match IRIX behaviour */
115
116 return 0;
117}
104 118
105/*======================================================================== 119/*========================================================================
106 * Overall external interface routines. 120 * Overall external interface routines.
107 *========================================================================*/ 121 *========================================================================*/
108 122
109int 123int
110xfs_attr_fetch(xfs_inode_t *ip, const char *name, int namelen, 124xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name,
111 char *value, int *valuelenp, int flags, struct cred *cred) 125 char *value, int *valuelenp, int flags)
112{ 126{
113 xfs_da_args_t args; 127 xfs_da_args_t args;
114 int error; 128 int error;
@@ -122,8 +136,8 @@ xfs_attr_fetch(xfs_inode_t *ip, const char *name, int namelen,
122 * Fill in the arg structure for this request. 136 * Fill in the arg structure for this request.
123 */ 137 */
124 memset((char *)&args, 0, sizeof(args)); 138 memset((char *)&args, 0, sizeof(args));
125 args.name = name; 139 args.name = name->name;
126 args.namelen = namelen; 140 args.namelen = name->len;
127 args.value = value; 141 args.value = value;
128 args.valuelen = *valuelenp; 142 args.valuelen = *valuelenp;
129 args.flags = flags; 143 args.flags = flags;
@@ -162,31 +176,29 @@ xfs_attr_get(
162 const char *name, 176 const char *name,
163 char *value, 177 char *value,
164 int *valuelenp, 178 int *valuelenp,
165 int flags, 179 int flags)
166 cred_t *cred)
167{ 180{
168 int error, namelen; 181 int error;
182 struct xfs_name xname;
169 183
170 XFS_STATS_INC(xs_attr_get); 184 XFS_STATS_INC(xs_attr_get);
171 185
172 if (!name)
173 return(EINVAL);
174 namelen = strlen(name);
175 if (namelen >= MAXNAMELEN)
176 return(EFAULT); /* match IRIX behaviour */
177
178 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 186 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
179 return(EIO); 187 return(EIO);
180 188
189 error = xfs_attr_name_to_xname(&xname, name);
190 if (error)
191 return error;
192
181 xfs_ilock(ip, XFS_ILOCK_SHARED); 193 xfs_ilock(ip, XFS_ILOCK_SHARED);
182 error = xfs_attr_fetch(ip, name, namelen, value, valuelenp, flags, cred); 194 error = xfs_attr_fetch(ip, &xname, value, valuelenp, flags);
183 xfs_iunlock(ip, XFS_ILOCK_SHARED); 195 xfs_iunlock(ip, XFS_ILOCK_SHARED);
184 return(error); 196 return(error);
185} 197}
186 198
187int 199STATIC int
188xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen, 200xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
189 char *value, int valuelen, int flags) 201 char *value, int valuelen, int flags)
190{ 202{
191 xfs_da_args_t args; 203 xfs_da_args_t args;
192 xfs_fsblock_t firstblock; 204 xfs_fsblock_t firstblock;
@@ -209,7 +221,7 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
209 */ 221 */
210 if (XFS_IFORK_Q(dp) == 0) { 222 if (XFS_IFORK_Q(dp) == 0) {
211 int sf_size = sizeof(xfs_attr_sf_hdr_t) + 223 int sf_size = sizeof(xfs_attr_sf_hdr_t) +
212 XFS_ATTR_SF_ENTSIZE_BYNAME(namelen, valuelen); 224 XFS_ATTR_SF_ENTSIZE_BYNAME(name->len, valuelen);
213 225
214 if ((error = xfs_bmap_add_attrfork(dp, sf_size, rsvd))) 226 if ((error = xfs_bmap_add_attrfork(dp, sf_size, rsvd)))
215 return(error); 227 return(error);
@@ -219,8 +231,8 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
219 * Fill in the arg structure for this request. 231 * Fill in the arg structure for this request.
220 */ 232 */
221 memset((char *)&args, 0, sizeof(args)); 233 memset((char *)&args, 0, sizeof(args));
222 args.name = name; 234 args.name = name->name;
223 args.namelen = namelen; 235 args.namelen = name->len;
224 args.value = value; 236 args.value = value;
225 args.valuelen = valuelen; 237 args.valuelen = valuelen;
226 args.flags = flags; 238 args.flags = flags;
@@ -236,7 +248,7 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
236 * Determine space new attribute will use, and if it would be 248 * Determine space new attribute will use, and if it would be
237 * "local" or "remote" (note: local != inline). 249 * "local" or "remote" (note: local != inline).
238 */ 250 */
239 size = xfs_attr_leaf_newentsize(namelen, valuelen, 251 size = xfs_attr_leaf_newentsize(name->len, valuelen,
240 mp->m_sb.sb_blocksize, &local); 252 mp->m_sb.sb_blocksize, &local);
241 253
242 nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK); 254 nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
@@ -429,26 +441,27 @@ xfs_attr_set(
429 int valuelen, 441 int valuelen,
430 int flags) 442 int flags)
431{ 443{
432 int namelen; 444 int error;
433 445 struct xfs_name xname;
434 namelen = strlen(name);
435 if (namelen >= MAXNAMELEN)
436 return EFAULT; /* match IRIX behaviour */
437 446
438 XFS_STATS_INC(xs_attr_set); 447 XFS_STATS_INC(xs_attr_set);
439 448
440 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 449 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
441 return (EIO); 450 return (EIO);
442 451
443 return xfs_attr_set_int(dp, name, namelen, value, valuelen, flags); 452 error = xfs_attr_name_to_xname(&xname, name);
453 if (error)
454 return error;
455
456 return xfs_attr_set_int(dp, &xname, value, valuelen, flags);
444} 457}
445 458
446/* 459/*
447 * Generic handler routine to remove a name from an attribute list. 460 * Generic handler routine to remove a name from an attribute list.
448 * Transitions attribute list from Btree to shortform as necessary. 461 * Transitions attribute list from Btree to shortform as necessary.
449 */ 462 */
450int 463STATIC int
451xfs_attr_remove_int(xfs_inode_t *dp, const char *name, int namelen, int flags) 464xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
452{ 465{
453 xfs_da_args_t args; 466 xfs_da_args_t args;
454 xfs_fsblock_t firstblock; 467 xfs_fsblock_t firstblock;
@@ -460,8 +473,8 @@ xfs_attr_remove_int(xfs_inode_t *dp, const char *name, int namelen, int flags)
460 * Fill in the arg structure for this request. 473 * Fill in the arg structure for this request.
461 */ 474 */
462 memset((char *)&args, 0, sizeof(args)); 475 memset((char *)&args, 0, sizeof(args));
463 args.name = name; 476 args.name = name->name;
464 args.namelen = namelen; 477 args.namelen = name->len;
465 args.flags = flags; 478 args.flags = flags;
466 args.hashval = xfs_da_hashname(args.name, args.namelen); 479 args.hashval = xfs_da_hashname(args.name, args.namelen);
467 args.dp = dp; 480 args.dp = dp;
@@ -575,17 +588,18 @@ xfs_attr_remove(
575 const char *name, 588 const char *name,
576 int flags) 589 int flags)
577{ 590{
578 int namelen; 591 int error;
579 592 struct xfs_name xname;
580 namelen = strlen(name);
581 if (namelen >= MAXNAMELEN)
582 return EFAULT; /* match IRIX behaviour */
583 593
584 XFS_STATS_INC(xs_attr_remove); 594 XFS_STATS_INC(xs_attr_remove);
585 595
586 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 596 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
587 return (EIO); 597 return (EIO);
588 598
599 error = xfs_attr_name_to_xname(&xname, name);
600 if (error)
601 return error;
602
589 xfs_ilock(dp, XFS_ILOCK_SHARED); 603 xfs_ilock(dp, XFS_ILOCK_SHARED);
590 if (XFS_IFORK_Q(dp) == 0 || 604 if (XFS_IFORK_Q(dp) == 0 ||
591 (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && 605 (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
@@ -595,10 +609,10 @@ xfs_attr_remove(
595 } 609 }
596 xfs_iunlock(dp, XFS_ILOCK_SHARED); 610 xfs_iunlock(dp, XFS_ILOCK_SHARED);
597 611
598 return xfs_attr_remove_int(dp, name, namelen, flags); 612 return xfs_attr_remove_int(dp, &xname, flags);
599} 613}
600 614
601int /* error */ 615STATIC int
602xfs_attr_list_int(xfs_attr_list_context_t *context) 616xfs_attr_list_int(xfs_attr_list_context_t *context)
603{ 617{
604 int error; 618 int error;
@@ -2522,8 +2536,7 @@ attr_generic_get(
2522{ 2536{
2523 int error, asize = size; 2537 int error, asize = size;
2524 2538
2525 error = xfs_attr_get(xfs_vtoi(vp), name, data, 2539 error = xfs_attr_get(xfs_vtoi(vp), name, data, &asize, xflags);
2526 &asize, xflags, NULL);
2527 if (!error) 2540 if (!error)
2528 return asize; 2541 return asize;
2529 return -error; 2542 return -error;
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index 786eba3121c4..6cfc9384fe35 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -158,14 +158,10 @@ struct xfs_da_args;
158/* 158/*
159 * Overall external interface routines. 159 * Overall external interface routines.
160 */ 160 */
161int xfs_attr_set_int(struct xfs_inode *, const char *, int, char *, int, int);
162int xfs_attr_remove_int(struct xfs_inode *, const char *, int, int);
163int xfs_attr_list_int(struct xfs_attr_list_context *);
164int xfs_attr_inactive(struct xfs_inode *dp); 161int xfs_attr_inactive(struct xfs_inode *dp);
165 162
166int xfs_attr_shortform_getvalue(struct xfs_da_args *); 163int xfs_attr_shortform_getvalue(struct xfs_da_args *);
167int xfs_attr_fetch(struct xfs_inode *, const char *, int, 164int xfs_attr_fetch(struct xfs_inode *, struct xfs_name *, char *, int *, int);
168 char *, int *, int, struct cred *);
169int xfs_attr_rmtval_get(struct xfs_da_args *args); 165int xfs_attr_rmtval_get(struct xfs_da_args *args);
170 166
171#endif /* __XFS_ATTR_H__ */ 167#endif /* __XFS_ATTR_H__ */
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index eb198c01c35d..53c259f5a5af 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -4074,7 +4074,6 @@ xfs_bmap_add_attrfork(
4074error2: 4074error2:
4075 xfs_bmap_cancel(&flist); 4075 xfs_bmap_cancel(&flist);
4076error1: 4076error1:
4077 ASSERT(ismrlocked(&ip->i_lock,MR_UPDATE));
4078 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4077 xfs_iunlock(ip, XFS_ILOCK_EXCL);
4079error0: 4078error0:
4080 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); 4079 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 3f53fad356a3..5f3647cb9885 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -162,7 +162,7 @@ xfs_swap_extents(
162 ips[1] = ip; 162 ips[1] = ip;
163 } 163 }
164 164
165 xfs_lock_inodes(ips, 2, 0, lock_flags); 165 xfs_lock_inodes(ips, 2, lock_flags);
166 locked = 1; 166 locked = 1;
167 167
168 /* Verify that both files have the same format */ 168 /* Verify that both files have the same format */
@@ -265,7 +265,7 @@ xfs_swap_extents(
265 locked = 0; 265 locked = 0;
266 goto error0; 266 goto error0;
267 } 267 }
268 xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL); 268 xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
269 269
270 /* 270 /*
271 * Count the number of extended attribute blocks 271 * Count the number of extended attribute blocks
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index d3a0f538d6a6..381ebda4f7bc 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -462,7 +462,7 @@ xfs_fs_counts(
462 xfs_mount_t *mp, 462 xfs_mount_t *mp,
463 xfs_fsop_counts_t *cnt) 463 xfs_fsop_counts_t *cnt)
464{ 464{
465 xfs_icsb_sync_counters_flags(mp, XFS_ICSB_LAZY_COUNT); 465 xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
466 spin_lock(&mp->m_sb_lock); 466 spin_lock(&mp->m_sb_lock);
467 cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); 467 cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
468 cnt->freertx = mp->m_sb.sb_frextents; 468 cnt->freertx = mp->m_sb.sb_frextents;
@@ -524,7 +524,7 @@ xfs_reserve_blocks(
524 */ 524 */
525retry: 525retry:
526 spin_lock(&mp->m_sb_lock); 526 spin_lock(&mp->m_sb_lock);
527 xfs_icsb_sync_counters_flags(mp, XFS_ICSB_SB_LOCKED); 527 xfs_icsb_sync_counters_locked(mp, 0);
528 528
529 /* 529 /*
530 * If our previous reservation was larger than the current value, 530 * If our previous reservation was larger than the current value,
@@ -552,11 +552,8 @@ retry:
552 mp->m_resblks += free; 552 mp->m_resblks += free;
553 mp->m_resblks_avail += free; 553 mp->m_resblks_avail += free;
554 fdblks_delta = -free; 554 fdblks_delta = -free;
555 mp->m_sb.sb_fdblocks = XFS_ALLOC_SET_ASIDE(mp);
556 } else { 555 } else {
557 fdblks_delta = -delta; 556 fdblks_delta = -delta;
558 mp->m_sb.sb_fdblocks =
559 lcounter + XFS_ALLOC_SET_ASIDE(mp);
560 mp->m_resblks = request; 557 mp->m_resblks = request;
561 mp->m_resblks_avail += delta; 558 mp->m_resblks_avail += delta;
562 } 559 }
@@ -587,7 +584,6 @@ out:
587 if (error == ENOSPC) 584 if (error == ENOSPC)
588 goto retry; 585 goto retry;
589 } 586 }
590
591 return 0; 587 return 0;
592} 588}
593 589
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index a64dfbd565a5..aad8c5da38af 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -147,6 +147,7 @@ xfs_ialloc_ag_alloc(
147 int version; /* inode version number to use */ 147 int version; /* inode version number to use */
148 int isaligned = 0; /* inode allocation at stripe unit */ 148 int isaligned = 0; /* inode allocation at stripe unit */
149 /* boundary */ 149 /* boundary */
150 unsigned int gen;
150 151
151 args.tp = tp; 152 args.tp = tp;
152 args.mp = tp->t_mountp; 153 args.mp = tp->t_mountp;
@@ -290,6 +291,14 @@ xfs_ialloc_ag_alloc(
290 else 291 else
291 version = XFS_DINODE_VERSION_1; 292 version = XFS_DINODE_VERSION_1;
292 293
294 /*
295 * Seed the new inode cluster with a random generation number. This
296 * prevents short-term reuse of generation numbers if a chunk is
297 * freed and then immediately reallocated. We use random numbers
298 * rather than a linear progression to prevent the next generation
299 * number from being easily guessable.
300 */
301 gen = random32();
293 for (j = 0; j < nbufs; j++) { 302 for (j = 0; j < nbufs; j++) {
294 /* 303 /*
295 * Get the block. 304 * Get the block.
@@ -309,6 +318,7 @@ xfs_ialloc_ag_alloc(
309 free = XFS_MAKE_IPTR(args.mp, fbuf, i); 318 free = XFS_MAKE_IPTR(args.mp, fbuf, i);
310 free->di_core.di_magic = cpu_to_be16(XFS_DINODE_MAGIC); 319 free->di_core.di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
311 free->di_core.di_version = version; 320 free->di_core.di_version = version;
321 free->di_core.di_gen = cpu_to_be32(gen);
312 free->di_next_unlinked = cpu_to_be32(NULLAGINO); 322 free->di_next_unlinked = cpu_to_be32(NULLAGINO);
313 xfs_ialloc_log_di(tp, fbuf, i, 323 xfs_ialloc_log_di(tp, fbuf, i,
314 XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED); 324 XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index e657c5128460..b07604b94d9f 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -593,8 +593,9 @@ xfs_iunlock_map_shared(
593 * XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL 593 * XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL
594 */ 594 */
595void 595void
596xfs_ilock(xfs_inode_t *ip, 596xfs_ilock(
597 uint lock_flags) 597 xfs_inode_t *ip,
598 uint lock_flags)
598{ 599{
599 /* 600 /*
600 * You can't set both SHARED and EXCL for the same lock, 601 * You can't set both SHARED and EXCL for the same lock,
@@ -607,16 +608,16 @@ xfs_ilock(xfs_inode_t *ip,
607 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 608 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
608 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 609 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
609 610
610 if (lock_flags & XFS_IOLOCK_EXCL) { 611 if (lock_flags & XFS_IOLOCK_EXCL)
611 mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); 612 mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
612 } else if (lock_flags & XFS_IOLOCK_SHARED) { 613 else if (lock_flags & XFS_IOLOCK_SHARED)
613 mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); 614 mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
614 } 615
615 if (lock_flags & XFS_ILOCK_EXCL) { 616 if (lock_flags & XFS_ILOCK_EXCL)
616 mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); 617 mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
617 } else if (lock_flags & XFS_ILOCK_SHARED) { 618 else if (lock_flags & XFS_ILOCK_SHARED)
618 mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); 619 mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
619 } 620
620 xfs_ilock_trace(ip, 1, lock_flags, (inst_t *)__return_address); 621 xfs_ilock_trace(ip, 1, lock_flags, (inst_t *)__return_address);
621} 622}
622 623
@@ -631,15 +632,12 @@ xfs_ilock(xfs_inode_t *ip,
631 * lock_flags -- this parameter indicates the inode's locks to be 632 * lock_flags -- this parameter indicates the inode's locks to be
632 * to be locked. See the comment for xfs_ilock() for a list 633 * to be locked. See the comment for xfs_ilock() for a list
633 * of valid values. 634 * of valid values.
634 *
635 */ 635 */
636int 636int
637xfs_ilock_nowait(xfs_inode_t *ip, 637xfs_ilock_nowait(
638 uint lock_flags) 638 xfs_inode_t *ip,
639 uint lock_flags)
639{ 640{
640 int iolocked;
641 int ilocked;
642
643 /* 641 /*
644 * You can't set both SHARED and EXCL for the same lock, 642 * You can't set both SHARED and EXCL for the same lock,
645 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, 643 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
@@ -651,37 +649,30 @@ xfs_ilock_nowait(xfs_inode_t *ip,
651 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 649 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
652 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 650 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
653 651
654 iolocked = 0;
655 if (lock_flags & XFS_IOLOCK_EXCL) { 652 if (lock_flags & XFS_IOLOCK_EXCL) {
656 iolocked = mrtryupdate(&ip->i_iolock); 653 if (!mrtryupdate(&ip->i_iolock))
657 if (!iolocked) { 654 goto out;
658 return 0;
659 }
660 } else if (lock_flags & XFS_IOLOCK_SHARED) { 655 } else if (lock_flags & XFS_IOLOCK_SHARED) {
661 iolocked = mrtryaccess(&ip->i_iolock); 656 if (!mrtryaccess(&ip->i_iolock))
662 if (!iolocked) { 657 goto out;
663 return 0;
664 }
665 } 658 }
666 if (lock_flags & XFS_ILOCK_EXCL) { 659 if (lock_flags & XFS_ILOCK_EXCL) {
667 ilocked = mrtryupdate(&ip->i_lock); 660 if (!mrtryupdate(&ip->i_lock))
668 if (!ilocked) { 661 goto out_undo_iolock;
669 if (iolocked) {
670 mrunlock(&ip->i_iolock);
671 }
672 return 0;
673 }
674 } else if (lock_flags & XFS_ILOCK_SHARED) { 662 } else if (lock_flags & XFS_ILOCK_SHARED) {
675 ilocked = mrtryaccess(&ip->i_lock); 663 if (!mrtryaccess(&ip->i_lock))
676 if (!ilocked) { 664 goto out_undo_iolock;
677 if (iolocked) {
678 mrunlock(&ip->i_iolock);
679 }
680 return 0;
681 }
682 } 665 }
683 xfs_ilock_trace(ip, 2, lock_flags, (inst_t *)__return_address); 666 xfs_ilock_trace(ip, 2, lock_flags, (inst_t *)__return_address);
684 return 1; 667 return 1;
668
669 out_undo_iolock:
670 if (lock_flags & XFS_IOLOCK_EXCL)
671 mrunlock_excl(&ip->i_iolock);
672 else if (lock_flags & XFS_IOLOCK_SHARED)
673 mrunlock_shared(&ip->i_iolock);
674 out:
675 return 0;
685} 676}
686 677
687/* 678/*
@@ -697,8 +688,9 @@ xfs_ilock_nowait(xfs_inode_t *ip,
697 * 688 *
698 */ 689 */
699void 690void
700xfs_iunlock(xfs_inode_t *ip, 691xfs_iunlock(
701 uint lock_flags) 692 xfs_inode_t *ip,
693 uint lock_flags)
702{ 694{
703 /* 695 /*
704 * You can't set both SHARED and EXCL for the same lock, 696 * You can't set both SHARED and EXCL for the same lock,
@@ -713,31 +705,25 @@ xfs_iunlock(xfs_inode_t *ip,
713 XFS_LOCK_DEP_MASK)) == 0); 705 XFS_LOCK_DEP_MASK)) == 0);
714 ASSERT(lock_flags != 0); 706 ASSERT(lock_flags != 0);
715 707
716 if (lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) { 708 if (lock_flags & XFS_IOLOCK_EXCL)
717 ASSERT(!(lock_flags & XFS_IOLOCK_SHARED) || 709 mrunlock_excl(&ip->i_iolock);
718 (ismrlocked(&ip->i_iolock, MR_ACCESS))); 710 else if (lock_flags & XFS_IOLOCK_SHARED)
719 ASSERT(!(lock_flags & XFS_IOLOCK_EXCL) || 711 mrunlock_shared(&ip->i_iolock);
720 (ismrlocked(&ip->i_iolock, MR_UPDATE)));
721 mrunlock(&ip->i_iolock);
722 }
723 712
724 if (lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) { 713 if (lock_flags & XFS_ILOCK_EXCL)
725 ASSERT(!(lock_flags & XFS_ILOCK_SHARED) || 714 mrunlock_excl(&ip->i_lock);
726 (ismrlocked(&ip->i_lock, MR_ACCESS))); 715 else if (lock_flags & XFS_ILOCK_SHARED)
727 ASSERT(!(lock_flags & XFS_ILOCK_EXCL) || 716 mrunlock_shared(&ip->i_lock);
728 (ismrlocked(&ip->i_lock, MR_UPDATE)));
729 mrunlock(&ip->i_lock);
730 717
718 if ((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) &&
719 !(lock_flags & XFS_IUNLOCK_NONOTIFY) && ip->i_itemp) {
731 /* 720 /*
732 * Let the AIL know that this item has been unlocked in case 721 * Let the AIL know that this item has been unlocked in case
733 * it is in the AIL and anyone is waiting on it. Don't do 722 * it is in the AIL and anyone is waiting on it. Don't do
734 * this if the caller has asked us not to. 723 * this if the caller has asked us not to.
735 */ 724 */
736 if (!(lock_flags & XFS_IUNLOCK_NONOTIFY) && 725 xfs_trans_unlocked_item(ip->i_mount,
737 ip->i_itemp != NULL) { 726 (xfs_log_item_t*)(ip->i_itemp));
738 xfs_trans_unlocked_item(ip->i_mount,
739 (xfs_log_item_t*)(ip->i_itemp));
740 }
741 } 727 }
742 xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address); 728 xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address);
743} 729}
@@ -747,21 +733,47 @@ xfs_iunlock(xfs_inode_t *ip,
747 * if it is being demoted. 733 * if it is being demoted.
748 */ 734 */
749void 735void
750xfs_ilock_demote(xfs_inode_t *ip, 736xfs_ilock_demote(
751 uint lock_flags) 737 xfs_inode_t *ip,
738 uint lock_flags)
752{ 739{
753 ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)); 740 ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL));
754 ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0); 741 ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
755 742
756 if (lock_flags & XFS_ILOCK_EXCL) { 743 if (lock_flags & XFS_ILOCK_EXCL)
757 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
758 mrdemote(&ip->i_lock); 744 mrdemote(&ip->i_lock);
759 } 745 if (lock_flags & XFS_IOLOCK_EXCL)
760 if (lock_flags & XFS_IOLOCK_EXCL) {
761 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
762 mrdemote(&ip->i_iolock); 746 mrdemote(&ip->i_iolock);
747}
748
749#ifdef DEBUG
750/*
751 * Debug-only routine, without additional rw_semaphore APIs, we can
752 * now only answer requests regarding whether we hold the lock for write
753 * (reader state is outside our visibility, we only track writer state).
754 *
755 * Note: this means !xfs_isilocked would give false positives, so don't do that.
756 */
757int
758xfs_isilocked(
759 xfs_inode_t *ip,
760 uint lock_flags)
761{
762 if ((lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) ==
763 XFS_ILOCK_EXCL) {
764 if (!ip->i_lock.mr_writer)
765 return 0;
763 } 766 }
767
768 if ((lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) ==
769 XFS_IOLOCK_EXCL) {
770 if (!ip->i_iolock.mr_writer)
771 return 0;
772 }
773
774 return 1;
764} 775}
776#endif
765 777
766/* 778/*
767 * The following three routines simply manage the i_flock 779 * The following three routines simply manage the i_flock
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ca12acb90394..cf0bb9c1d621 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1291,7 +1291,7 @@ xfs_file_last_byte(
1291 xfs_fileoff_t size_last_block; 1291 xfs_fileoff_t size_last_block;
1292 int error; 1292 int error;
1293 1293
1294 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE | MR_ACCESS)); 1294 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED));
1295 1295
1296 mp = ip->i_mount; 1296 mp = ip->i_mount;
1297 /* 1297 /*
@@ -1402,7 +1402,7 @@ xfs_itruncate_start(
1402 bhv_vnode_t *vp; 1402 bhv_vnode_t *vp;
1403 int error = 0; 1403 int error = 0;
1404 1404
1405 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0); 1405 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1406 ASSERT((new_size == 0) || (new_size <= ip->i_size)); 1406 ASSERT((new_size == 0) || (new_size <= ip->i_size));
1407 ASSERT((flags == XFS_ITRUNC_DEFINITE) || 1407 ASSERT((flags == XFS_ITRUNC_DEFINITE) ||
1408 (flags == XFS_ITRUNC_MAYBE)); 1408 (flags == XFS_ITRUNC_MAYBE));
@@ -1528,8 +1528,7 @@ xfs_itruncate_finish(
1528 xfs_bmap_free_t free_list; 1528 xfs_bmap_free_t free_list;
1529 int error; 1529 int error;
1530 1530
1531 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0); 1531 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
1532 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
1533 ASSERT((new_size == 0) || (new_size <= ip->i_size)); 1532 ASSERT((new_size == 0) || (new_size <= ip->i_size));
1534 ASSERT(*tp != NULL); 1533 ASSERT(*tp != NULL);
1535 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); 1534 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
@@ -1780,8 +1779,7 @@ xfs_igrow_start(
1780 xfs_fsize_t new_size, 1779 xfs_fsize_t new_size,
1781 cred_t *credp) 1780 cred_t *credp)
1782{ 1781{
1783 ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0); 1782 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
1784 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
1785 ASSERT(new_size > ip->i_size); 1783 ASSERT(new_size > ip->i_size);
1786 1784
1787 /* 1785 /*
@@ -1809,8 +1807,7 @@ xfs_igrow_finish(
1809 xfs_fsize_t new_size, 1807 xfs_fsize_t new_size,
1810 int change_flag) 1808 int change_flag)
1811{ 1809{
1812 ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0); 1810 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
1813 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
1814 ASSERT(ip->i_transp == tp); 1811 ASSERT(ip->i_transp == tp);
1815 ASSERT(new_size > ip->i_size); 1812 ASSERT(new_size > ip->i_size);
1816 1813
@@ -2287,7 +2284,7 @@ xfs_ifree(
2287 xfs_dinode_t *dip; 2284 xfs_dinode_t *dip;
2288 xfs_buf_t *ibp; 2285 xfs_buf_t *ibp;
2289 2286
2290 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 2287 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2291 ASSERT(ip->i_transp == tp); 2288 ASSERT(ip->i_transp == tp);
2292 ASSERT(ip->i_d.di_nlink == 0); 2289 ASSERT(ip->i_d.di_nlink == 0);
2293 ASSERT(ip->i_d.di_nextents == 0); 2290 ASSERT(ip->i_d.di_nextents == 0);
@@ -2746,7 +2743,7 @@ void
2746xfs_ipin( 2743xfs_ipin(
2747 xfs_inode_t *ip) 2744 xfs_inode_t *ip)
2748{ 2745{
2749 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 2746 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2750 2747
2751 atomic_inc(&ip->i_pincount); 2748 atomic_inc(&ip->i_pincount);
2752} 2749}
@@ -2779,7 +2776,7 @@ __xfs_iunpin_wait(
2779{ 2776{
2780 xfs_inode_log_item_t *iip = ip->i_itemp; 2777 xfs_inode_log_item_t *iip = ip->i_itemp;
2781 2778
2782 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE | MR_ACCESS)); 2779 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2783 if (atomic_read(&ip->i_pincount) == 0) 2780 if (atomic_read(&ip->i_pincount) == 0)
2784 return; 2781 return;
2785 2782
@@ -2829,7 +2826,7 @@ xfs_iextents_copy(
2829 xfs_fsblock_t start_block; 2826 xfs_fsblock_t start_block;
2830 2827
2831 ifp = XFS_IFORK_PTR(ip, whichfork); 2828 ifp = XFS_IFORK_PTR(ip, whichfork);
2832 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); 2829 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2833 ASSERT(ifp->if_bytes > 0); 2830 ASSERT(ifp->if_bytes > 0);
2834 2831
2835 nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 2832 nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
@@ -3132,7 +3129,7 @@ xfs_iflush(
3132 3129
3133 XFS_STATS_INC(xs_iflush_count); 3130 XFS_STATS_INC(xs_iflush_count);
3134 3131
3135 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); 3132 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
3136 ASSERT(issemalocked(&(ip->i_flock))); 3133 ASSERT(issemalocked(&(ip->i_flock)));
3137 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 3134 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
3138 ip->i_d.di_nextents > ip->i_df.if_ext_max); 3135 ip->i_d.di_nextents > ip->i_df.if_ext_max);
@@ -3297,7 +3294,7 @@ xfs_iflush_int(
3297 int first; 3294 int first;
3298#endif 3295#endif
3299 3296
3300 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); 3297 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
3301 ASSERT(issemalocked(&(ip->i_flock))); 3298 ASSERT(issemalocked(&(ip->i_flock)));
3302 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 3299 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
3303 ip->i_d.di_nextents > ip->i_df.if_ext_max); 3300 ip->i_d.di_nextents > ip->i_df.if_ext_max);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 93c37697a72c..0a999fee4f03 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -386,20 +386,9 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
386#define XFS_ILOCK_EXCL (1<<2) 386#define XFS_ILOCK_EXCL (1<<2)
387#define XFS_ILOCK_SHARED (1<<3) 387#define XFS_ILOCK_SHARED (1<<3)
388#define XFS_IUNLOCK_NONOTIFY (1<<4) 388#define XFS_IUNLOCK_NONOTIFY (1<<4)
389/* #define XFS_IOLOCK_NESTED (1<<5) */
390#define XFS_EXTENT_TOKEN_RD (1<<6)
391#define XFS_SIZE_TOKEN_RD (1<<7)
392#define XFS_EXTSIZE_RD (XFS_EXTENT_TOKEN_RD|XFS_SIZE_TOKEN_RD)
393#define XFS_WILLLEND (1<<8) /* Always acquire tokens for lending */
394#define XFS_EXTENT_TOKEN_WR (XFS_EXTENT_TOKEN_RD | XFS_WILLLEND)
395#define XFS_SIZE_TOKEN_WR (XFS_SIZE_TOKEN_RD | XFS_WILLLEND)
396#define XFS_EXTSIZE_WR (XFS_EXTSIZE_RD | XFS_WILLLEND)
397/* TODO:XFS_SIZE_TOKEN_WANT (1<<9) */
398 389
399#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \ 390#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
400 | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \ 391 | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)
401 | XFS_EXTENT_TOKEN_RD | XFS_SIZE_TOKEN_RD \
402 | XFS_WILLLEND)
403 392
404/* 393/*
405 * Flags for lockdep annotations. 394 * Flags for lockdep annotations.
@@ -483,6 +472,7 @@ void xfs_ilock(xfs_inode_t *, uint);
483int xfs_ilock_nowait(xfs_inode_t *, uint); 472int xfs_ilock_nowait(xfs_inode_t *, uint);
484void xfs_iunlock(xfs_inode_t *, uint); 473void xfs_iunlock(xfs_inode_t *, uint);
485void xfs_ilock_demote(xfs_inode_t *, uint); 474void xfs_ilock_demote(xfs_inode_t *, uint);
475int xfs_isilocked(xfs_inode_t *, uint);
486void xfs_iflock(xfs_inode_t *); 476void xfs_iflock(xfs_inode_t *);
487int xfs_iflock_nowait(xfs_inode_t *); 477int xfs_iflock_nowait(xfs_inode_t *);
488uint xfs_ilock_map_shared(xfs_inode_t *); 478uint xfs_ilock_map_shared(xfs_inode_t *);
@@ -534,7 +524,7 @@ int xfs_iflush(xfs_inode_t *, uint);
534void xfs_iflush_all(struct xfs_mount *); 524void xfs_iflush_all(struct xfs_mount *);
535void xfs_ichgtime(xfs_inode_t *, int); 525void xfs_ichgtime(xfs_inode_t *, int);
536xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); 526xfs_fsize_t xfs_file_last_byte(xfs_inode_t *);
537void xfs_lock_inodes(xfs_inode_t **, int, int, uint); 527void xfs_lock_inodes(xfs_inode_t **, int, uint);
538 528
539void xfs_synchronize_atime(xfs_inode_t *); 529void xfs_synchronize_atime(xfs_inode_t *);
540void xfs_mark_inode_dirty_sync(xfs_inode_t *); 530void xfs_mark_inode_dirty_sync(xfs_inode_t *);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 93b5db453ea2..167b33f15772 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -547,7 +547,7 @@ STATIC void
547xfs_inode_item_pin( 547xfs_inode_item_pin(
548 xfs_inode_log_item_t *iip) 548 xfs_inode_log_item_t *iip)
549{ 549{
550 ASSERT(ismrlocked(&(iip->ili_inode->i_lock), MR_UPDATE)); 550 ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
551 xfs_ipin(iip->ili_inode); 551 xfs_ipin(iip->ili_inode);
552} 552}
553 553
@@ -664,13 +664,13 @@ xfs_inode_item_unlock(
664 664
665 ASSERT(iip != NULL); 665 ASSERT(iip != NULL);
666 ASSERT(iip->ili_inode->i_itemp != NULL); 666 ASSERT(iip->ili_inode->i_itemp != NULL);
667 ASSERT(ismrlocked(&(iip->ili_inode->i_lock), MR_UPDATE)); 667 ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
668 ASSERT((!(iip->ili_inode->i_itemp->ili_flags & 668 ASSERT((!(iip->ili_inode->i_itemp->ili_flags &
669 XFS_ILI_IOLOCKED_EXCL)) || 669 XFS_ILI_IOLOCKED_EXCL)) ||
670 ismrlocked(&(iip->ili_inode->i_iolock), MR_UPDATE)); 670 xfs_isilocked(iip->ili_inode, XFS_IOLOCK_EXCL));
671 ASSERT((!(iip->ili_inode->i_itemp->ili_flags & 671 ASSERT((!(iip->ili_inode->i_itemp->ili_flags &
672 XFS_ILI_IOLOCKED_SHARED)) || 672 XFS_ILI_IOLOCKED_SHARED)) ||
673 ismrlocked(&(iip->ili_inode->i_iolock), MR_ACCESS)); 673 xfs_isilocked(iip->ili_inode, XFS_IOLOCK_SHARED));
674 /* 674 /*
675 * Clear the transaction pointer in the inode. 675 * Clear the transaction pointer in the inode.
676 */ 676 */
@@ -769,7 +769,7 @@ xfs_inode_item_pushbuf(
769 769
770 ip = iip->ili_inode; 770 ip = iip->ili_inode;
771 771
772 ASSERT(ismrlocked(&(ip->i_lock), MR_ACCESS)); 772 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
773 773
774 /* 774 /*
775 * The ili_pushbuf_flag keeps others from 775 * The ili_pushbuf_flag keeps others from
@@ -857,7 +857,7 @@ xfs_inode_item_push(
857 857
858 ip = iip->ili_inode; 858 ip = iip->ili_inode;
859 859
860 ASSERT(ismrlocked(&(ip->i_lock), MR_ACCESS)); 860 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
861 ASSERT(issemalocked(&(ip->i_flock))); 861 ASSERT(issemalocked(&(ip->i_flock)));
862 /* 862 /*
863 * Since we were able to lock the inode's flush lock and 863 * Since we were able to lock the inode's flush lock and
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index fb3cf1191419..7edcde691d1a 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -196,14 +196,14 @@ xfs_iomap(
196 break; 196 break;
197 case BMAPI_WRITE: 197 case BMAPI_WRITE:
198 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, ip, offset, count); 198 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, ip, offset, count);
199 lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR; 199 lockmode = XFS_ILOCK_EXCL;
200 if (flags & BMAPI_IGNSTATE) 200 if (flags & BMAPI_IGNSTATE)
201 bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE; 201 bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE;
202 xfs_ilock(ip, lockmode); 202 xfs_ilock(ip, lockmode);
203 break; 203 break;
204 case BMAPI_ALLOCATE: 204 case BMAPI_ALLOCATE:
205 xfs_iomap_enter_trace(XFS_IOMAP_ALLOC_ENTER, ip, offset, count); 205 xfs_iomap_enter_trace(XFS_IOMAP_ALLOC_ENTER, ip, offset, count);
206 lockmode = XFS_ILOCK_SHARED|XFS_EXTSIZE_RD; 206 lockmode = XFS_ILOCK_SHARED;
207 bmapi_flags = XFS_BMAPI_ENTIRE; 207 bmapi_flags = XFS_BMAPI_ENTIRE;
208 208
209 /* Attempt non-blocking lock */ 209 /* Attempt non-blocking lock */
@@ -523,8 +523,7 @@ xfs_iomap_write_direct(
523 goto error_out; 523 goto error_out;
524 } 524 }
525 525
526 if (unlikely(!imap.br_startblock && 526 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) {
527 !(XFS_IS_REALTIME_INODE(ip)))) {
528 error = xfs_cmn_err_fsblock_zero(ip, &imap); 527 error = xfs_cmn_err_fsblock_zero(ip, &imap);
529 goto error_out; 528 goto error_out;
530 } 529 }
@@ -624,7 +623,7 @@ xfs_iomap_write_delay(
624 int prealloc, fsynced = 0; 623 int prealloc, fsynced = 0;
625 int error; 624 int error;
626 625
627 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0); 626 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
628 627
629 /* 628 /*
630 * Make sure that the dquots are there. This doesn't hold 629 * Make sure that the dquots are there. This doesn't hold
@@ -686,8 +685,7 @@ retry:
686 goto retry; 685 goto retry;
687 } 686 }
688 687
689 if (unlikely(!imap[0].br_startblock && 688 if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip)))
690 !(XFS_IS_REALTIME_INODE(ip))))
691 return xfs_cmn_err_fsblock_zero(ip, &imap[0]); 689 return xfs_cmn_err_fsblock_zero(ip, &imap[0]);
692 690
693 *ret_imap = imap[0]; 691 *ret_imap = imap[0];
@@ -838,9 +836,9 @@ xfs_iomap_write_allocate(
838 * See if we were able to allocate an extent that 836 * See if we were able to allocate an extent that
839 * covers at least part of the callers request 837 * covers at least part of the callers request
840 */ 838 */
841 if (unlikely(!imap.br_startblock && 839 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
842 XFS_IS_REALTIME_INODE(ip)))
843 return xfs_cmn_err_fsblock_zero(ip, &imap); 840 return xfs_cmn_err_fsblock_zero(ip, &imap);
841
844 if ((offset_fsb >= imap.br_startoff) && 842 if ((offset_fsb >= imap.br_startoff) &&
845 (offset_fsb < (imap.br_startoff + 843 (offset_fsb < (imap.br_startoff +
846 imap.br_blockcount))) { 844 imap.br_blockcount))) {
@@ -934,8 +932,7 @@ xfs_iomap_write_unwritten(
934 if (error) 932 if (error)
935 return XFS_ERROR(error); 933 return XFS_ERROR(error);
936 934
937 if (unlikely(!imap.br_startblock && 935 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
938 !(XFS_IS_REALTIME_INODE(ip))))
939 return xfs_cmn_err_fsblock_zero(ip, &imap); 936 return xfs_cmn_err_fsblock_zero(ip, &imap);
940 937
941 if ((numblks_fsb = imap.br_blockcount) == 0) { 938 if ((numblks_fsb = imap.br_blockcount) == 0) {
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index eb85bdedad0c..419de15aeb43 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -71,11 +71,6 @@ xfs_bulkstat_one_iget(
71 71
72 ASSERT(ip != NULL); 72 ASSERT(ip != NULL);
73 ASSERT(ip->i_blkno != (xfs_daddr_t)0); 73 ASSERT(ip->i_blkno != (xfs_daddr_t)0);
74 if (ip->i_d.di_mode == 0) {
75 *stat = BULKSTAT_RV_NOTHING;
76 error = XFS_ERROR(ENOENT);
77 goto out_iput;
78 }
79 74
80 vp = XFS_ITOV(ip); 75 vp = XFS_ITOV(ip);
81 dic = &ip->i_d; 76 dic = &ip->i_d;
@@ -124,7 +119,6 @@ xfs_bulkstat_one_iget(
124 break; 119 break;
125 } 120 }
126 121
127 out_iput:
128 xfs_iput(ip, XFS_ILOCK_SHARED); 122 xfs_iput(ip, XFS_ILOCK_SHARED);
129 return error; 123 return error;
130} 124}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 2fec452afbcc..da3988453b71 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -54,8 +54,9 @@ STATIC void xfs_unmountfs_wait(xfs_mount_t *);
54#ifdef HAVE_PERCPU_SB 54#ifdef HAVE_PERCPU_SB
55STATIC void xfs_icsb_destroy_counters(xfs_mount_t *); 55STATIC void xfs_icsb_destroy_counters(xfs_mount_t *);
56STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, 56STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
57 int, int); 57 int);
58STATIC void xfs_icsb_sync_counters(xfs_mount_t *); 58STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t,
59 int);
59STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t, 60STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t,
60 int64_t, int); 61 int64_t, int);
61STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); 62STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
@@ -63,8 +64,8 @@ STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
63#else 64#else
64 65
65#define xfs_icsb_destroy_counters(mp) do { } while (0) 66#define xfs_icsb_destroy_counters(mp) do { } while (0)
66#define xfs_icsb_balance_counter(mp, a, b, c) do { } while (0) 67#define xfs_icsb_balance_counter(mp, a, b) do { } while (0)
67#define xfs_icsb_sync_counters(mp) do { } while (0) 68#define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0)
68#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0) 69#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0)
69 70
70#endif 71#endif
@@ -1400,7 +1401,7 @@ xfs_log_sbcount(
1400 if (!xfs_fs_writable(mp)) 1401 if (!xfs_fs_writable(mp))
1401 return 0; 1402 return 0;
1402 1403
1403 xfs_icsb_sync_counters(mp); 1404 xfs_icsb_sync_counters(mp, 0);
1404 1405
1405 /* 1406 /*
1406 * we don't need to do this if we are updating the superblock 1407 * we don't need to do this if we are updating the superblock
@@ -2026,9 +2027,9 @@ xfs_icsb_cpu_notify(
2026 case CPU_ONLINE: 2027 case CPU_ONLINE:
2027 case CPU_ONLINE_FROZEN: 2028 case CPU_ONLINE_FROZEN:
2028 xfs_icsb_lock(mp); 2029 xfs_icsb_lock(mp);
2029 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0); 2030 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
2030 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0); 2031 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
2031 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0, 0); 2032 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
2032 xfs_icsb_unlock(mp); 2033 xfs_icsb_unlock(mp);
2033 break; 2034 break;
2034 case CPU_DEAD: 2035 case CPU_DEAD:
@@ -2048,12 +2049,9 @@ xfs_icsb_cpu_notify(
2048 2049
2049 memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); 2050 memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
2050 2051
2051 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 2052 xfs_icsb_balance_counter_locked(mp, XFS_SBS_ICOUNT, 0);
2052 XFS_ICSB_SB_LOCKED, 0); 2053 xfs_icsb_balance_counter_locked(mp, XFS_SBS_IFREE, 0);
2053 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 2054 xfs_icsb_balance_counter_locked(mp, XFS_SBS_FDBLOCKS, 0);
2054 XFS_ICSB_SB_LOCKED, 0);
2055 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS,
2056 XFS_ICSB_SB_LOCKED, 0);
2057 spin_unlock(&mp->m_sb_lock); 2055 spin_unlock(&mp->m_sb_lock);
2058 xfs_icsb_unlock(mp); 2056 xfs_icsb_unlock(mp);
2059 break; 2057 break;
@@ -2105,9 +2103,9 @@ xfs_icsb_reinit_counters(
2105 * initial balance kicks us off correctly 2103 * initial balance kicks us off correctly
2106 */ 2104 */
2107 mp->m_icsb_counters = -1; 2105 mp->m_icsb_counters = -1;
2108 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0); 2106 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
2109 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0); 2107 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
2110 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0, 0); 2108 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
2111 xfs_icsb_unlock(mp); 2109 xfs_icsb_unlock(mp);
2112} 2110}
2113 2111
@@ -2223,7 +2221,7 @@ xfs_icsb_disable_counter(
2223 if (!test_and_set_bit(field, &mp->m_icsb_counters)) { 2221 if (!test_and_set_bit(field, &mp->m_icsb_counters)) {
2224 /* drain back to superblock */ 2222 /* drain back to superblock */
2225 2223
2226 xfs_icsb_count(mp, &cnt, XFS_ICSB_SB_LOCKED|XFS_ICSB_LAZY_COUNT); 2224 xfs_icsb_count(mp, &cnt, XFS_ICSB_LAZY_COUNT);
2227 switch(field) { 2225 switch(field) {
2228 case XFS_SBS_ICOUNT: 2226 case XFS_SBS_ICOUNT:
2229 mp->m_sb.sb_icount = cnt.icsb_icount; 2227 mp->m_sb.sb_icount = cnt.icsb_icount;
@@ -2278,38 +2276,33 @@ xfs_icsb_enable_counter(
2278} 2276}
2279 2277
2280void 2278void
2281xfs_icsb_sync_counters_flags( 2279xfs_icsb_sync_counters_locked(
2282 xfs_mount_t *mp, 2280 xfs_mount_t *mp,
2283 int flags) 2281 int flags)
2284{ 2282{
2285 xfs_icsb_cnts_t cnt; 2283 xfs_icsb_cnts_t cnt;
2286 2284
2287 /* Pass 1: lock all counters */
2288 if ((flags & XFS_ICSB_SB_LOCKED) == 0)
2289 spin_lock(&mp->m_sb_lock);
2290
2291 xfs_icsb_count(mp, &cnt, flags); 2285 xfs_icsb_count(mp, &cnt, flags);
2292 2286
2293 /* Step 3: update mp->m_sb fields */
2294 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT)) 2287 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT))
2295 mp->m_sb.sb_icount = cnt.icsb_icount; 2288 mp->m_sb.sb_icount = cnt.icsb_icount;
2296 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE)) 2289 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE))
2297 mp->m_sb.sb_ifree = cnt.icsb_ifree; 2290 mp->m_sb.sb_ifree = cnt.icsb_ifree;
2298 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS)) 2291 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS))
2299 mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks; 2292 mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
2300
2301 if ((flags & XFS_ICSB_SB_LOCKED) == 0)
2302 spin_unlock(&mp->m_sb_lock);
2303} 2293}
2304 2294
2305/* 2295/*
2306 * Accurate update of per-cpu counters to incore superblock 2296 * Accurate update of per-cpu counters to incore superblock
2307 */ 2297 */
2308STATIC void 2298void
2309xfs_icsb_sync_counters( 2299xfs_icsb_sync_counters(
2310 xfs_mount_t *mp) 2300 xfs_mount_t *mp,
2301 int flags)
2311{ 2302{
2312 xfs_icsb_sync_counters_flags(mp, 0); 2303 spin_lock(&mp->m_sb_lock);
2304 xfs_icsb_sync_counters_locked(mp, flags);
2305 spin_unlock(&mp->m_sb_lock);
2313} 2306}
2314 2307
2315/* 2308/*
@@ -2332,19 +2325,15 @@ xfs_icsb_sync_counters(
2332#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \ 2325#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \
2333 (uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp)) 2326 (uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp))
2334STATIC void 2327STATIC void
2335xfs_icsb_balance_counter( 2328xfs_icsb_balance_counter_locked(
2336 xfs_mount_t *mp, 2329 xfs_mount_t *mp,
2337 xfs_sb_field_t field, 2330 xfs_sb_field_t field,
2338 int flags,
2339 int min_per_cpu) 2331 int min_per_cpu)
2340{ 2332{
2341 uint64_t count, resid; 2333 uint64_t count, resid;
2342 int weight = num_online_cpus(); 2334 int weight = num_online_cpus();
2343 uint64_t min = (uint64_t)min_per_cpu; 2335 uint64_t min = (uint64_t)min_per_cpu;
2344 2336
2345 if (!(flags & XFS_ICSB_SB_LOCKED))
2346 spin_lock(&mp->m_sb_lock);
2347
2348 /* disable counter and sync counter */ 2337 /* disable counter and sync counter */
2349 xfs_icsb_disable_counter(mp, field); 2338 xfs_icsb_disable_counter(mp, field);
2350 2339
@@ -2354,19 +2343,19 @@ xfs_icsb_balance_counter(
2354 count = mp->m_sb.sb_icount; 2343 count = mp->m_sb.sb_icount;
2355 resid = do_div(count, weight); 2344 resid = do_div(count, weight);
2356 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE)) 2345 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
2357 goto out; 2346 return;
2358 break; 2347 break;
2359 case XFS_SBS_IFREE: 2348 case XFS_SBS_IFREE:
2360 count = mp->m_sb.sb_ifree; 2349 count = mp->m_sb.sb_ifree;
2361 resid = do_div(count, weight); 2350 resid = do_div(count, weight);
2362 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE)) 2351 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
2363 goto out; 2352 return;
2364 break; 2353 break;
2365 case XFS_SBS_FDBLOCKS: 2354 case XFS_SBS_FDBLOCKS:
2366 count = mp->m_sb.sb_fdblocks; 2355 count = mp->m_sb.sb_fdblocks;
2367 resid = do_div(count, weight); 2356 resid = do_div(count, weight);
2368 if (count < max(min, XFS_ICSB_FDBLK_CNTR_REENABLE(mp))) 2357 if (count < max(min, XFS_ICSB_FDBLK_CNTR_REENABLE(mp)))
2369 goto out; 2358 return;
2370 break; 2359 break;
2371 default: 2360 default:
2372 BUG(); 2361 BUG();
@@ -2375,9 +2364,17 @@ xfs_icsb_balance_counter(
2375 } 2364 }
2376 2365
2377 xfs_icsb_enable_counter(mp, field, count, resid); 2366 xfs_icsb_enable_counter(mp, field, count, resid);
2378out: 2367}
2379 if (!(flags & XFS_ICSB_SB_LOCKED)) 2368
2380 spin_unlock(&mp->m_sb_lock); 2369STATIC void
2370xfs_icsb_balance_counter(
2371 xfs_mount_t *mp,
2372 xfs_sb_field_t fields,
2373 int min_per_cpu)
2374{
2375 spin_lock(&mp->m_sb_lock);
2376 xfs_icsb_balance_counter_locked(mp, fields, min_per_cpu);
2377 spin_unlock(&mp->m_sb_lock);
2381} 2378}
2382 2379
2383STATIC int 2380STATIC int
@@ -2484,7 +2481,7 @@ slow_path:
2484 * we are done. 2481 * we are done.
2485 */ 2482 */
2486 if (ret != ENOSPC) 2483 if (ret != ENOSPC)
2487 xfs_icsb_balance_counter(mp, field, 0, 0); 2484 xfs_icsb_balance_counter(mp, field, 0);
2488 xfs_icsb_unlock(mp); 2485 xfs_icsb_unlock(mp);
2489 return ret; 2486 return ret;
2490 2487
@@ -2508,7 +2505,7 @@ balance_counter:
2508 * will either succeed through the fast path or slow path without 2505 * will either succeed through the fast path or slow path without
2509 * another balance operation being required. 2506 * another balance operation being required.
2510 */ 2507 */
2511 xfs_icsb_balance_counter(mp, field, 0, delta); 2508 xfs_icsb_balance_counter(mp, field, delta);
2512 xfs_icsb_unlock(mp); 2509 xfs_icsb_unlock(mp);
2513 goto again; 2510 goto again;
2514} 2511}
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 1ed575110ff0..63e0693a358a 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -206,17 +206,18 @@ typedef struct xfs_icsb_cnts {
206 206
207#define XFS_ICSB_FLAG_LOCK (1 << 0) /* counter lock bit */ 207#define XFS_ICSB_FLAG_LOCK (1 << 0) /* counter lock bit */
208 208
209#define XFS_ICSB_SB_LOCKED (1 << 0) /* sb already locked */
210#define XFS_ICSB_LAZY_COUNT (1 << 1) /* accuracy not needed */ 209#define XFS_ICSB_LAZY_COUNT (1 << 1) /* accuracy not needed */
211 210
212extern int xfs_icsb_init_counters(struct xfs_mount *); 211extern int xfs_icsb_init_counters(struct xfs_mount *);
213extern void xfs_icsb_reinit_counters(struct xfs_mount *); 212extern void xfs_icsb_reinit_counters(struct xfs_mount *);
214extern void xfs_icsb_sync_counters_flags(struct xfs_mount *, int); 213extern void xfs_icsb_sync_counters(struct xfs_mount *, int);
214extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
215 215
216#else 216#else
217#define xfs_icsb_init_counters(mp) (0) 217#define xfs_icsb_init_counters(mp) (0)
218#define xfs_icsb_reinit_counters(mp) do { } while (0) 218#define xfs_icsb_reinit_counters(mp) do { } while (0)
219#define xfs_icsb_sync_counters_flags(mp, flags) do { } while (0) 219#define xfs_icsb_sync_counters(mp, flags) do { } while (0)
220#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
220#endif 221#endif
221 222
222typedef struct xfs_ail { 223typedef struct xfs_ail {
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index ee371890d85d..d8063e1ad298 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -55,85 +55,32 @@ xfs_rename_unlock4(
55 55
56 xfs_iunlock(i_tab[0], lock_mode); 56 xfs_iunlock(i_tab[0], lock_mode);
57 for (i = 1; i < 4; i++) { 57 for (i = 1; i < 4; i++) {
58 if (i_tab[i] == NULL) { 58 if (i_tab[i] == NULL)
59 break; 59 break;
60 } 60
61 /* 61 /*
62 * Watch out for duplicate entries in the table. 62 * Watch out for duplicate entries in the table.
63 */ 63 */
64 if (i_tab[i] != i_tab[i-1]) { 64 if (i_tab[i] != i_tab[i-1])
65 xfs_iunlock(i_tab[i], lock_mode); 65 xfs_iunlock(i_tab[i], lock_mode);
66 }
67 } 66 }
68} 67}
69 68
70#ifdef DEBUG
71int xfs_rename_skip, xfs_rename_nskip;
72#endif
73
74/* 69/*
75 * The following routine will acquire the locks required for a rename 70 * Enter all inodes for a rename transaction into a sorted array.
76 * operation. The code understands the semantics of renames and will
77 * validate that name1 exists under dp1 & that name2 may or may not
78 * exist under dp2.
79 *
80 * We are renaming dp1/name1 to dp2/name2.
81 *
82 * Return ENOENT if dp1 does not exist, other lookup errors, or 0 for success.
83 */ 71 */
84STATIC int 72STATIC void
85xfs_lock_for_rename( 73xfs_sort_for_rename(
86 xfs_inode_t *dp1, /* in: old (source) directory inode */ 74 xfs_inode_t *dp1, /* in: old (source) directory inode */
87 xfs_inode_t *dp2, /* in: new (target) directory inode */ 75 xfs_inode_t *dp2, /* in: new (target) directory inode */
88 xfs_inode_t *ip1, /* in: inode of old entry */ 76 xfs_inode_t *ip1, /* in: inode of old entry */
89 struct xfs_name *name2, /* in: new entry name */ 77 xfs_inode_t *ip2, /* in: inode of new entry, if it
90 xfs_inode_t **ipp2, /* out: inode of new entry, if it
91 already exists, NULL otherwise. */ 78 already exists, NULL otherwise. */
92 xfs_inode_t **i_tab,/* out: array of inode returned, sorted */ 79 xfs_inode_t **i_tab,/* out: array of inode returned, sorted */
93 int *num_inodes) /* out: number of inodes in array */ 80 int *num_inodes) /* out: number of inodes in array */
94{ 81{
95 xfs_inode_t *ip2 = NULL;
96 xfs_inode_t *temp; 82 xfs_inode_t *temp;
97 xfs_ino_t inum1, inum2;
98 int error;
99 int i, j; 83 int i, j;
100 uint lock_mode;
101 int diff_dirs = (dp1 != dp2);
102
103 /*
104 * First, find out the current inums of the entries so that we
105 * can determine the initial locking order. We'll have to
106 * sanity check stuff after all the locks have been acquired
107 * to see if we still have the right inodes, directories, etc.
108 */
109 lock_mode = xfs_ilock_map_shared(dp1);
110 IHOLD(ip1);
111 xfs_itrace_ref(ip1);
112
113 inum1 = ip1->i_ino;
114
115 /*
116 * Unlock dp1 and lock dp2 if they are different.
117 */
118 if (diff_dirs) {
119 xfs_iunlock_map_shared(dp1, lock_mode);
120 lock_mode = xfs_ilock_map_shared(dp2);
121 }
122
123 error = xfs_dir_lookup_int(dp2, lock_mode, name2, &inum2, &ip2);
124 if (error == ENOENT) { /* target does not need to exist. */
125 inum2 = 0;
126 } else if (error) {
127 /*
128 * If dp2 and dp1 are the same, the next line unlocks dp1.
129 * Got it?
130 */
131 xfs_iunlock_map_shared(dp2, lock_mode);
132 IRELE (ip1);
133 return error;
134 } else {
135 xfs_itrace_ref(ip2);
136 }
137 84
138 /* 85 /*
139 * i_tab contains a list of pointers to inodes. We initialize 86 * i_tab contains a list of pointers to inodes. We initialize
@@ -145,21 +92,20 @@ xfs_lock_for_rename(
145 i_tab[0] = dp1; 92 i_tab[0] = dp1;
146 i_tab[1] = dp2; 93 i_tab[1] = dp2;
147 i_tab[2] = ip1; 94 i_tab[2] = ip1;
148 if (inum2 == 0) { 95 if (ip2) {
149 *num_inodes = 3;
150 i_tab[3] = NULL;
151 } else {
152 *num_inodes = 4; 96 *num_inodes = 4;
153 i_tab[3] = ip2; 97 i_tab[3] = ip2;
98 } else {
99 *num_inodes = 3;
100 i_tab[3] = NULL;
154 } 101 }
155 *ipp2 = i_tab[3];
156 102
157 /* 103 /*
158 * Sort the elements via bubble sort. (Remember, there are at 104 * Sort the elements via bubble sort. (Remember, there are at
159 * most 4 elements to sort, so this is adequate.) 105 * most 4 elements to sort, so this is adequate.)
160 */ 106 */
161 for (i=0; i < *num_inodes; i++) { 107 for (i = 0; i < *num_inodes; i++) {
162 for (j=1; j < *num_inodes; j++) { 108 for (j = 1; j < *num_inodes; j++) {
163 if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) { 109 if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) {
164 temp = i_tab[j]; 110 temp = i_tab[j];
165 i_tab[j] = i_tab[j-1]; 111 i_tab[j] = i_tab[j-1];
@@ -167,30 +113,6 @@ xfs_lock_for_rename(
167 } 113 }
168 } 114 }
169 } 115 }
170
171 /*
172 * We have dp2 locked. If it isn't first, unlock it.
173 * If it is first, tell xfs_lock_inodes so it can skip it
174 * when locking. if dp1 == dp2, xfs_lock_inodes will skip both
175 * since they are equal. xfs_lock_inodes needs all these inodes
176 * so that it can unlock and retry if there might be a dead-lock
177 * potential with the log.
178 */
179
180 if (i_tab[0] == dp2 && lock_mode == XFS_ILOCK_SHARED) {
181#ifdef DEBUG
182 xfs_rename_skip++;
183#endif
184 xfs_lock_inodes(i_tab, *num_inodes, 1, XFS_ILOCK_SHARED);
185 } else {
186#ifdef DEBUG
187 xfs_rename_nskip++;
188#endif
189 xfs_iunlock_map_shared(dp2, lock_mode);
190 xfs_lock_inodes(i_tab, *num_inodes, 0, XFS_ILOCK_SHARED);
191 }
192
193 return 0;
194} 116}
195 117
196/* 118/*
@@ -202,10 +124,10 @@ xfs_rename(
202 struct xfs_name *src_name, 124 struct xfs_name *src_name,
203 xfs_inode_t *src_ip, 125 xfs_inode_t *src_ip,
204 xfs_inode_t *target_dp, 126 xfs_inode_t *target_dp,
205 struct xfs_name *target_name) 127 struct xfs_name *target_name,
128 xfs_inode_t *target_ip)
206{ 129{
207 xfs_trans_t *tp; 130 xfs_trans_t *tp = NULL;
208 xfs_inode_t *target_ip;
209 xfs_mount_t *mp = src_dp->i_mount; 131 xfs_mount_t *mp = src_dp->i_mount;
210 int new_parent; /* moving to a new dir */ 132 int new_parent; /* moving to a new dir */
211 int src_is_directory; /* src_name is a directory */ 133 int src_is_directory; /* src_name is a directory */
@@ -215,9 +137,7 @@ xfs_rename(
215 int cancel_flags; 137 int cancel_flags;
216 int committed; 138 int committed;
217 xfs_inode_t *inodes[4]; 139 xfs_inode_t *inodes[4];
218 int target_ip_dropped = 0; /* dropped target_ip link? */
219 int spaceres; 140 int spaceres;
220 int target_link_zero = 0;
221 int num_inodes; 141 int num_inodes;
222 142
223 xfs_itrace_entry(src_dp); 143 xfs_itrace_entry(src_dp);
@@ -230,64 +150,27 @@ xfs_rename(
230 target_dp, DM_RIGHT_NULL, 150 target_dp, DM_RIGHT_NULL,
231 src_name->name, target_name->name, 151 src_name->name, target_name->name,
232 0, 0, 0); 152 0, 0, 0);
233 if (error) { 153 if (error)
234 return error; 154 return error;
235 }
236 } 155 }
237 /* Return through std_return after this point. */ 156 /* Return through std_return after this point. */
238 157
239 /* 158 new_parent = (src_dp != target_dp);
240 * Lock all the participating inodes. Depending upon whether 159 src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR);
241 * the target_name exists in the target directory, and
242 * whether the target directory is the same as the source
243 * directory, we can lock from 2 to 4 inodes.
244 * xfs_lock_for_rename() will return ENOENT if src_name
245 * does not exist in the source directory.
246 */
247 tp = NULL;
248 error = xfs_lock_for_rename(src_dp, target_dp, src_ip, target_name,
249 &target_ip, inodes, &num_inodes);
250 if (error) {
251 /*
252 * We have nothing locked, no inode references, and
253 * no transaction, so just get out.
254 */
255 goto std_return;
256 }
257
258 ASSERT(src_ip != NULL);
259 160
260 if ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR) { 161 if (src_is_directory) {
261 /* 162 /*
262 * Check for link count overflow on target_dp 163 * Check for link count overflow on target_dp
263 */ 164 */
264 if (target_ip == NULL && (src_dp != target_dp) && 165 if (target_ip == NULL && new_parent &&
265 target_dp->i_d.di_nlink >= XFS_MAXLINK) { 166 target_dp->i_d.di_nlink >= XFS_MAXLINK) {
266 error = XFS_ERROR(EMLINK); 167 error = XFS_ERROR(EMLINK);
267 xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED); 168 goto std_return;
268 goto rele_return;
269 } 169 }
270 } 170 }
271 171
272 /* 172 xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip,
273 * If we are using project inheritance, we only allow renames 173 inodes, &num_inodes);
274 * into our tree when the project IDs are the same; else the
275 * tree quota mechanism would be circumvented.
276 */
277 if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
278 (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) {
279 error = XFS_ERROR(EXDEV);
280 xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED);
281 goto rele_return;
282 }
283
284 new_parent = (src_dp != target_dp);
285 src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR);
286
287 /*
288 * Drop the locks on our inodes so that we can start the transaction.
289 */
290 xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED);
291 174
292 XFS_BMAP_INIT(&free_list, &first_block); 175 XFS_BMAP_INIT(&free_list, &first_block);
293 tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME); 176 tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME);
@@ -302,7 +185,7 @@ xfs_rename(
302 } 185 }
303 if (error) { 186 if (error) {
304 xfs_trans_cancel(tp, 0); 187 xfs_trans_cancel(tp, 0);
305 goto rele_return; 188 goto std_return;
306 } 189 }
307 190
308 /* 191 /*
@@ -310,13 +193,29 @@ xfs_rename(
310 */ 193 */
311 if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) { 194 if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) {
312 xfs_trans_cancel(tp, cancel_flags); 195 xfs_trans_cancel(tp, cancel_flags);
313 goto rele_return; 196 goto std_return;
314 } 197 }
315 198
316 /* 199 /*
317 * Reacquire the inode locks we dropped above. 200 * Lock all the participating inodes. Depending upon whether
201 * the target_name exists in the target directory, and
202 * whether the target directory is the same as the source
203 * directory, we can lock from 2 to 4 inodes.
204 */
205 xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL);
206
207 /*
208 * If we are using project inheritance, we only allow renames
209 * into our tree when the project IDs are the same; else the
210 * tree quota mechanism would be circumvented.
318 */ 211 */
319 xfs_lock_inodes(inodes, num_inodes, 0, XFS_ILOCK_EXCL); 212 if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
213 (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) {
214 error = XFS_ERROR(EXDEV);
215 xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED);
216 xfs_trans_cancel(tp, cancel_flags);
217 goto std_return;
218 }
320 219
321 /* 220 /*
322 * Join all the inodes to the transaction. From this point on, 221 * Join all the inodes to the transaction. From this point on,
@@ -328,17 +227,17 @@ xfs_rename(
328 */ 227 */
329 IHOLD(src_dp); 228 IHOLD(src_dp);
330 xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); 229 xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL);
230
331 if (new_parent) { 231 if (new_parent) {
332 IHOLD(target_dp); 232 IHOLD(target_dp);
333 xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); 233 xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL);
334 } 234 }
335 if ((src_ip != src_dp) && (src_ip != target_dp)) { 235
336 xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); 236 IHOLD(src_ip);
337 } 237 xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
338 if ((target_ip != NULL) && 238
339 (target_ip != src_ip) && 239 if (target_ip) {
340 (target_ip != src_dp) && 240 IHOLD(target_ip);
341 (target_ip != target_dp)) {
342 xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); 241 xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
343 } 242 }
344 243
@@ -412,7 +311,6 @@ xfs_rename(
412 error = xfs_droplink(tp, target_ip); 311 error = xfs_droplink(tp, target_ip);
413 if (error) 312 if (error)
414 goto abort_return; 313 goto abort_return;
415 target_ip_dropped = 1;
416 314
417 if (src_is_directory) { 315 if (src_is_directory) {
418 /* 316 /*
@@ -422,10 +320,6 @@ xfs_rename(
422 if (error) 320 if (error)
423 goto abort_return; 321 goto abort_return;
424 } 322 }
425
426 /* Do this test while we still hold the locks */
427 target_link_zero = (target_ip)->i_d.di_nlink==0;
428
429 } /* target_ip != NULL */ 323 } /* target_ip != NULL */
430 324
431 /* 325 /*
@@ -492,15 +386,6 @@ xfs_rename(
492 } 386 }
493 387
494 /* 388 /*
495 * If there was a target inode, take an extra reference on
496 * it here so that it doesn't go to xfs_inactive() from
497 * within the commit.
498 */
499 if (target_ip != NULL) {
500 IHOLD(target_ip);
501 }
502
503 /*
504 * If this is a synchronous mount, make sure that the 389 * If this is a synchronous mount, make sure that the
505 * rename transaction goes to disk before returning to 390 * rename transaction goes to disk before returning to
506 * the user. 391 * the user.
@@ -509,30 +394,11 @@ xfs_rename(
509 xfs_trans_set_sync(tp); 394 xfs_trans_set_sync(tp);
510 } 395 }
511 396
512 /*
513 * Take refs. for vop_link_removed calls below. No need to worry
514 * about directory refs. because the caller holds them.
515 *
516 * Do holds before the xfs_bmap_finish since it might rele them down
517 * to zero.
518 */
519
520 if (target_ip_dropped)
521 IHOLD(target_ip);
522 IHOLD(src_ip);
523
524 error = xfs_bmap_finish(&tp, &free_list, &committed); 397 error = xfs_bmap_finish(&tp, &free_list, &committed);
525 if (error) { 398 if (error) {
526 xfs_bmap_cancel(&free_list); 399 xfs_bmap_cancel(&free_list);
527 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | 400 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
528 XFS_TRANS_ABORT)); 401 XFS_TRANS_ABORT));
529 if (target_ip != NULL) {
530 IRELE(target_ip);
531 }
532 if (target_ip_dropped) {
533 IRELE(target_ip);
534 }
535 IRELE(src_ip);
536 goto std_return; 402 goto std_return;
537 } 403 }
538 404
@@ -541,15 +407,6 @@ xfs_rename(
541 * the vnode references. 407 * the vnode references.
542 */ 408 */
543 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 409 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
544 if (target_ip != NULL)
545 IRELE(target_ip);
546 /*
547 * Let interposed file systems know about removed links.
548 */
549 if (target_ip_dropped)
550 IRELE(target_ip);
551
552 IRELE(src_ip);
553 410
554 /* Fall through to std_return with error = 0 or errno from 411 /* Fall through to std_return with error = 0 or errno from
555 * xfs_trans_commit */ 412 * xfs_trans_commit */
@@ -571,11 +428,4 @@ std_return:
571 xfs_bmap_cancel(&free_list); 428 xfs_bmap_cancel(&free_list);
572 xfs_trans_cancel(tp, cancel_flags); 429 xfs_trans_cancel(tp, cancel_flags);
573 goto std_return; 430 goto std_return;
574
575 rele_return:
576 IRELE(src_ip);
577 if (target_ip != NULL) {
578 IRELE(target_ip);
579 }
580 goto std_return;
581} 431}
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index b8db1d5cde5a..4c70bf5e9985 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -111,13 +111,13 @@ xfs_trans_iget(
111 */ 111 */
112 ASSERT(ip->i_itemp != NULL); 112 ASSERT(ip->i_itemp != NULL);
113 ASSERT(lock_flags & XFS_ILOCK_EXCL); 113 ASSERT(lock_flags & XFS_ILOCK_EXCL);
114 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 114 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
115 ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) || 115 ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) ||
116 ismrlocked(&ip->i_iolock, MR_UPDATE)); 116 xfs_isilocked(ip, XFS_IOLOCK_EXCL));
117 ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) || 117 ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) ||
118 (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_EXCL)); 118 (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_EXCL));
119 ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) || 119 ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) ||
120 ismrlocked(&ip->i_iolock, (MR_UPDATE | MR_ACCESS))); 120 xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED));
121 ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) || 121 ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) ||
122 (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_ANY)); 122 (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_ANY));
123 123
@@ -185,7 +185,7 @@ xfs_trans_ijoin(
185 xfs_inode_log_item_t *iip; 185 xfs_inode_log_item_t *iip;
186 186
187 ASSERT(ip->i_transp == NULL); 187 ASSERT(ip->i_transp == NULL);
188 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 188 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
189 ASSERT(lock_flags & XFS_ILOCK_EXCL); 189 ASSERT(lock_flags & XFS_ILOCK_EXCL);
190 if (ip->i_itemp == NULL) 190 if (ip->i_itemp == NULL)
191 xfs_inode_item_init(ip, ip->i_mount); 191 xfs_inode_item_init(ip, ip->i_mount);
@@ -232,7 +232,7 @@ xfs_trans_ihold(
232{ 232{
233 ASSERT(ip->i_transp == tp); 233 ASSERT(ip->i_transp == tp);
234 ASSERT(ip->i_itemp != NULL); 234 ASSERT(ip->i_itemp != NULL);
235 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 235 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
236 236
237 ip->i_itemp->ili_flags |= XFS_ILI_HOLD; 237 ip->i_itemp->ili_flags |= XFS_ILI_HOLD;
238} 238}
@@ -257,7 +257,7 @@ xfs_trans_log_inode(
257 257
258 ASSERT(ip->i_transp == tp); 258 ASSERT(ip->i_transp == tp);
259 ASSERT(ip->i_itemp != NULL); 259 ASSERT(ip->i_itemp != NULL);
260 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 260 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
261 261
262 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(ip->i_itemp)); 262 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(ip->i_itemp));
263 ASSERT(lidp != NULL); 263 ASSERT(lidp != NULL);
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 2b8dc7e40772..98e5f110ba5f 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -41,49 +41,6 @@
41#include "xfs_utils.h" 41#include "xfs_utils.h"
42 42
43 43
44int
45xfs_dir_lookup_int(
46 xfs_inode_t *dp,
47 uint lock_mode,
48 struct xfs_name *name,
49 xfs_ino_t *inum,
50 xfs_inode_t **ipp)
51{
52 int error;
53
54 xfs_itrace_entry(dp);
55
56 error = xfs_dir_lookup(NULL, dp, name, inum);
57 if (!error) {
58 /*
59 * Unlock the directory. We do this because we can't
60 * hold the directory lock while doing the vn_get()
61 * in xfs_iget(). Doing so could cause us to hold
62 * a lock while waiting for the inode to finish
63 * being inactive while it's waiting for a log
64 * reservation in the inactive routine.
65 */
66 xfs_iunlock(dp, lock_mode);
67 error = xfs_iget(dp->i_mount, NULL, *inum, 0, 0, ipp, 0);
68 xfs_ilock(dp, lock_mode);
69
70 if (error) {
71 *ipp = NULL;
72 } else if ((*ipp)->i_d.di_mode == 0) {
73 /*
74 * The inode has been freed. Something is
75 * wrong so just get out of here.
76 */
77 xfs_iunlock(dp, lock_mode);
78 xfs_iput_new(*ipp, 0);
79 *ipp = NULL;
80 xfs_ilock(dp, lock_mode);
81 error = XFS_ERROR(ENOENT);
82 }
83 }
84 return error;
85}
86
87/* 44/*
88 * Allocates a new inode from disk and return a pointer to the 45 * Allocates a new inode from disk and return a pointer to the
89 * incore copy. This routine will internally commit the current 46 * incore copy. This routine will internally commit the current
@@ -310,7 +267,7 @@ xfs_bump_ino_vers2(
310{ 267{
311 xfs_mount_t *mp; 268 xfs_mount_t *mp;
312 269
313 ASSERT(ismrlocked (&ip->i_lock, MR_UPDATE)); 270 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
314 ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1); 271 ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1);
315 272
316 ip->i_d.di_version = XFS_DINODE_VERSION_2; 273 ip->i_d.di_version = XFS_DINODE_VERSION_2;
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h
index 175b126d2cab..f316cb85d8e2 100644
--- a/fs/xfs/xfs_utils.h
+++ b/fs/xfs/xfs_utils.h
@@ -21,8 +21,6 @@
21#define IRELE(ip) VN_RELE(XFS_ITOV(ip)) 21#define IRELE(ip) VN_RELE(XFS_ITOV(ip))
22#define IHOLD(ip) VN_HOLD(XFS_ITOV(ip)) 22#define IHOLD(ip) VN_HOLD(XFS_ITOV(ip))
23 23
24extern int xfs_dir_lookup_int(xfs_inode_t *, uint, struct xfs_name *,
25 xfs_ino_t *, xfs_inode_t **);
26extern int xfs_truncate_file(xfs_mount_t *, xfs_inode_t *); 24extern int xfs_truncate_file(xfs_mount_t *, xfs_inode_t *);
27extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t, 25extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t,
28 xfs_dev_t, cred_t *, prid_t, int, 26 xfs_dev_t, cred_t *, prid_t, int,
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index fc48158fe479..30bacd8bb0e5 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -186,6 +186,7 @@ xfs_cleanup(void)
186 kmem_zone_destroy(xfs_efi_zone); 186 kmem_zone_destroy(xfs_efi_zone);
187 kmem_zone_destroy(xfs_ifork_zone); 187 kmem_zone_destroy(xfs_ifork_zone);
188 kmem_zone_destroy(xfs_ili_zone); 188 kmem_zone_destroy(xfs_ili_zone);
189 kmem_zone_destroy(xfs_log_ticket_zone);
189} 190}
190 191
191/* 192/*
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 6650601c64f7..70702a60b4bb 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -76,132 +76,6 @@ xfs_open(
76} 76}
77 77
78/* 78/*
79 * xfs_getattr
80 */
81int
82xfs_getattr(
83 xfs_inode_t *ip,
84 bhv_vattr_t *vap,
85 int flags)
86{
87 bhv_vnode_t *vp = XFS_ITOV(ip);
88 xfs_mount_t *mp = ip->i_mount;
89
90 xfs_itrace_entry(ip);
91
92 if (XFS_FORCED_SHUTDOWN(mp))
93 return XFS_ERROR(EIO);
94
95 if (!(flags & ATTR_LAZY))
96 xfs_ilock(ip, XFS_ILOCK_SHARED);
97
98 vap->va_size = XFS_ISIZE(ip);
99 if (vap->va_mask == XFS_AT_SIZE)
100 goto all_done;
101
102 vap->va_nblocks =
103 XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
104 vap->va_nodeid = ip->i_ino;
105#if XFS_BIG_INUMS
106 vap->va_nodeid += mp->m_inoadd;
107#endif
108 vap->va_nlink = ip->i_d.di_nlink;
109
110 /*
111 * Quick exit for non-stat callers
112 */
113 if ((vap->va_mask &
114 ~(XFS_AT_SIZE|XFS_AT_FSID|XFS_AT_NODEID|
115 XFS_AT_NLINK|XFS_AT_BLKSIZE)) == 0)
116 goto all_done;
117
118 /*
119 * Copy from in-core inode.
120 */
121 vap->va_mode = ip->i_d.di_mode;
122 vap->va_uid = ip->i_d.di_uid;
123 vap->va_gid = ip->i_d.di_gid;
124 vap->va_projid = ip->i_d.di_projid;
125
126 /*
127 * Check vnode type block/char vs. everything else.
128 */
129 switch (ip->i_d.di_mode & S_IFMT) {
130 case S_IFBLK:
131 case S_IFCHR:
132 vap->va_rdev = ip->i_df.if_u2.if_rdev;
133 vap->va_blocksize = BLKDEV_IOSIZE;
134 break;
135 default:
136 vap->va_rdev = 0;
137
138 if (!(XFS_IS_REALTIME_INODE(ip))) {
139 vap->va_blocksize = xfs_preferred_iosize(mp);
140 } else {
141
142 /*
143 * If the file blocks are being allocated from a
144 * realtime partition, then return the inode's
145 * realtime extent size or the realtime volume's
146 * extent size.
147 */
148 vap->va_blocksize =
149 xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog;
150 }
151 break;
152 }
153
154 vn_atime_to_timespec(vp, &vap->va_atime);
155 vap->va_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
156 vap->va_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
157 vap->va_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
158 vap->va_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
159
160 /*
161 * Exit for stat callers. See if any of the rest of the fields
162 * to be filled in are needed.
163 */
164 if ((vap->va_mask &
165 (XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|
166 XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0)
167 goto all_done;
168
169 /*
170 * Convert di_flags to xflags.
171 */
172 vap->va_xflags = xfs_ip2xflags(ip);
173
174 /*
175 * Exit for inode revalidate. See if any of the rest of
176 * the fields to be filled in are needed.
177 */
178 if ((vap->va_mask &
179 (XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|
180 XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0)
181 goto all_done;
182
183 vap->va_extsize = ip->i_d.di_extsize << mp->m_sb.sb_blocklog;
184 vap->va_nextents =
185 (ip->i_df.if_flags & XFS_IFEXTENTS) ?
186 ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) :
187 ip->i_d.di_nextents;
188 if (ip->i_afp)
189 vap->va_anextents =
190 (ip->i_afp->if_flags & XFS_IFEXTENTS) ?
191 ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) :
192 ip->i_d.di_anextents;
193 else
194 vap->va_anextents = 0;
195 vap->va_gen = ip->i_d.di_gen;
196
197 all_done:
198 if (!(flags & ATTR_LAZY))
199 xfs_iunlock(ip, XFS_ILOCK_SHARED);
200 return 0;
201}
202
203
204/*
205 * xfs_setattr 79 * xfs_setattr
206 */ 80 */
207int 81int
@@ -211,7 +85,6 @@ xfs_setattr(
211 int flags, 85 int flags,
212 cred_t *credp) 86 cred_t *credp)
213{ 87{
214 bhv_vnode_t *vp = XFS_ITOV(ip);
215 xfs_mount_t *mp = ip->i_mount; 88 xfs_mount_t *mp = ip->i_mount;
216 xfs_trans_t *tp; 89 xfs_trans_t *tp;
217 int mask; 90 int mask;
@@ -222,7 +95,6 @@ xfs_setattr(
222 gid_t gid=0, igid=0; 95 gid_t gid=0, igid=0;
223 int timeflags = 0; 96 int timeflags = 0;
224 xfs_prid_t projid=0, iprojid=0; 97 xfs_prid_t projid=0, iprojid=0;
225 int mandlock_before, mandlock_after;
226 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; 98 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2;
227 int file_owner; 99 int file_owner;
228 int need_iolock = 1; 100 int need_iolock = 1;
@@ -383,7 +255,7 @@ xfs_setattr(
383 m |= S_ISGID; 255 m |= S_ISGID;
384#if 0 256#if 0
385 /* Linux allows this, Irix doesn't. */ 257 /* Linux allows this, Irix doesn't. */
386 if ((vap->va_mode & S_ISVTX) && !VN_ISDIR(vp)) 258 if ((vap->va_mode & S_ISVTX) && !S_ISDIR(ip->i_d.di_mode))
387 m |= S_ISVTX; 259 m |= S_ISVTX;
388#endif 260#endif
389 if (m && !capable(CAP_FSETID)) 261 if (m && !capable(CAP_FSETID))
@@ -461,10 +333,10 @@ xfs_setattr(
461 goto error_return; 333 goto error_return;
462 } 334 }
463 335
464 if (VN_ISDIR(vp)) { 336 if (S_ISDIR(ip->i_d.di_mode)) {
465 code = XFS_ERROR(EISDIR); 337 code = XFS_ERROR(EISDIR);
466 goto error_return; 338 goto error_return;
467 } else if (!VN_ISREG(vp)) { 339 } else if (!S_ISREG(ip->i_d.di_mode)) {
468 code = XFS_ERROR(EINVAL); 340 code = XFS_ERROR(EINVAL);
469 goto error_return; 341 goto error_return;
470 } 342 }
@@ -626,9 +498,6 @@ xfs_setattr(
626 xfs_trans_ihold(tp, ip); 498 xfs_trans_ihold(tp, ip);
627 } 499 }
628 500
629 /* determine whether mandatory locking mode changes */
630 mandlock_before = MANDLOCK(vp, ip->i_d.di_mode);
631
632 /* 501 /*
633 * Truncate file. Must have write permission and not be a directory. 502 * Truncate file. Must have write permission and not be a directory.
634 */ 503 */
@@ -858,13 +727,6 @@ xfs_setattr(
858 code = xfs_trans_commit(tp, commit_flags); 727 code = xfs_trans_commit(tp, commit_flags);
859 } 728 }
860 729
861 /*
862 * If the (regular) file's mandatory locking mode changed, then
863 * notify the vnode. We do this under the inode lock to prevent
864 * racing calls to vop_vnode_change.
865 */
866 mandlock_after = MANDLOCK(vp, ip->i_d.di_mode);
867
868 xfs_iunlock(ip, lock_flags); 730 xfs_iunlock(ip, lock_flags);
869 731
870 /* 732 /*
@@ -1443,7 +1305,7 @@ xfs_inactive_attrs(
1443 int error; 1305 int error;
1444 xfs_mount_t *mp; 1306 xfs_mount_t *mp;
1445 1307
1446 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE)); 1308 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1447 tp = *tpp; 1309 tp = *tpp;
1448 mp = ip->i_mount; 1310 mp = ip->i_mount;
1449 ASSERT(ip->i_d.di_forkoff != 0); 1311 ASSERT(ip->i_d.di_forkoff != 0);
@@ -1491,7 +1353,7 @@ xfs_release(
1491 xfs_mount_t *mp = ip->i_mount; 1353 xfs_mount_t *mp = ip->i_mount;
1492 int error; 1354 int error;
1493 1355
1494 if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) 1356 if (!S_ISREG(ip->i_d.di_mode) || (ip->i_d.di_mode == 0))
1495 return 0; 1357 return 0;
1496 1358
1497 /* If this is a read-only mount, don't do this (would generate I/O) */ 1359 /* If this is a read-only mount, don't do this (would generate I/O) */
@@ -1774,8 +1636,7 @@ xfs_lookup(
1774 struct xfs_name *name, 1636 struct xfs_name *name,
1775 xfs_inode_t **ipp) 1637 xfs_inode_t **ipp)
1776{ 1638{
1777 xfs_inode_t *ip; 1639 xfs_ino_t inum;
1778 xfs_ino_t e_inum;
1779 int error; 1640 int error;
1780 uint lock_mode; 1641 uint lock_mode;
1781 1642
@@ -1785,12 +1646,21 @@ xfs_lookup(
1785 return XFS_ERROR(EIO); 1646 return XFS_ERROR(EIO);
1786 1647
1787 lock_mode = xfs_ilock_map_shared(dp); 1648 lock_mode = xfs_ilock_map_shared(dp);
1788 error = xfs_dir_lookup_int(dp, lock_mode, name, &e_inum, &ip); 1649 error = xfs_dir_lookup(NULL, dp, name, &inum);
1789 if (!error) {
1790 *ipp = ip;
1791 xfs_itrace_ref(ip);
1792 }
1793 xfs_iunlock_map_shared(dp, lock_mode); 1650 xfs_iunlock_map_shared(dp, lock_mode);
1651
1652 if (error)
1653 goto out;
1654
1655 error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp, 0);
1656 if (error)
1657 goto out;
1658
1659 xfs_itrace_ref(*ipp);
1660 return 0;
1661
1662 out:
1663 *ipp = NULL;
1794 return error; 1664 return error;
1795} 1665}
1796 1666
@@ -1906,7 +1776,7 @@ xfs_create(
1906 * It is locked (and joined to the transaction). 1776 * It is locked (and joined to the transaction).
1907 */ 1777 */
1908 1778
1909 ASSERT(ismrlocked (&ip->i_lock, MR_UPDATE)); 1779 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1910 1780
1911 /* 1781 /*
1912 * Now we join the directory inode to the transaction. We do not do it 1782 * Now we join the directory inode to the transaction. We do not do it
@@ -2112,7 +1982,7 @@ again:
2112 1982
2113 ips[0] = ip; 1983 ips[0] = ip;
2114 ips[1] = dp; 1984 ips[1] = dp;
2115 xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL); 1985 xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
2116 } 1986 }
2117 /* else e_inum == dp->i_ino */ 1987 /* else e_inum == dp->i_ino */
2118 /* This can happen if we're asked to lock /x/.. 1988 /* This can happen if we're asked to lock /x/..
@@ -2160,7 +2030,6 @@ void
2160xfs_lock_inodes( 2030xfs_lock_inodes(
2161 xfs_inode_t **ips, 2031 xfs_inode_t **ips,
2162 int inodes, 2032 int inodes,
2163 int first_locked,
2164 uint lock_mode) 2033 uint lock_mode)
2165{ 2034{
2166 int attempts = 0, i, j, try_lock; 2035 int attempts = 0, i, j, try_lock;
@@ -2168,13 +2037,8 @@ xfs_lock_inodes(
2168 2037
2169 ASSERT(ips && (inodes >= 2)); /* we need at least two */ 2038 ASSERT(ips && (inodes >= 2)); /* we need at least two */
2170 2039
2171 if (first_locked) { 2040 try_lock = 0;
2172 try_lock = 1; 2041 i = 0;
2173 i = 1;
2174 } else {
2175 try_lock = 0;
2176 i = 0;
2177 }
2178 2042
2179again: 2043again:
2180 for (; i < inodes; i++) { 2044 for (; i < inodes; i++) {
@@ -2298,29 +2162,14 @@ xfs_remove(
2298 return error; 2162 return error;
2299 } 2163 }
2300 2164
2301 /*
2302 * We need to get a reference to ip before we get our log
2303 * reservation. The reason for this is that we cannot call
2304 * xfs_iget for an inode for which we do not have a reference
2305 * once we've acquired a log reservation. This is because the
2306 * inode we are trying to get might be in xfs_inactive going
2307 * for a log reservation. Since we'll have to wait for the
2308 * inactive code to complete before returning from xfs_iget,
2309 * we need to make sure that we don't have log space reserved
2310 * when we call xfs_iget. Instead we get an unlocked reference
2311 * to the inode before getting our log reservation.
2312 */
2313 IHOLD(ip);
2314
2315 xfs_itrace_entry(ip); 2165 xfs_itrace_entry(ip);
2316 xfs_itrace_ref(ip); 2166 xfs_itrace_ref(ip);
2317 2167
2318 error = XFS_QM_DQATTACH(mp, dp, 0); 2168 error = XFS_QM_DQATTACH(mp, dp, 0);
2319 if (!error && dp != ip) 2169 if (!error)
2320 error = XFS_QM_DQATTACH(mp, ip, 0); 2170 error = XFS_QM_DQATTACH(mp, ip, 0);
2321 if (error) { 2171 if (error) {
2322 REMOVE_DEBUG_TRACE(__LINE__); 2172 REMOVE_DEBUG_TRACE(__LINE__);
2323 IRELE(ip);
2324 goto std_return; 2173 goto std_return;
2325 } 2174 }
2326 2175
@@ -2347,7 +2196,6 @@ xfs_remove(
2347 ASSERT(error != ENOSPC); 2196 ASSERT(error != ENOSPC);
2348 REMOVE_DEBUG_TRACE(__LINE__); 2197 REMOVE_DEBUG_TRACE(__LINE__);
2349 xfs_trans_cancel(tp, 0); 2198 xfs_trans_cancel(tp, 0);
2350 IRELE(ip);
2351 return error; 2199 return error;
2352 } 2200 }
2353 2201
@@ -2355,7 +2203,6 @@ xfs_remove(
2355 if (error) { 2203 if (error) {
2356 REMOVE_DEBUG_TRACE(__LINE__); 2204 REMOVE_DEBUG_TRACE(__LINE__);
2357 xfs_trans_cancel(tp, cancel_flags); 2205 xfs_trans_cancel(tp, cancel_flags);
2358 IRELE(ip);
2359 goto std_return; 2206 goto std_return;
2360 } 2207 }
2361 2208
@@ -2363,23 +2210,18 @@ xfs_remove(
2363 * At this point, we've gotten both the directory and the entry 2210 * At this point, we've gotten both the directory and the entry
2364 * inodes locked. 2211 * inodes locked.
2365 */ 2212 */
2213 IHOLD(ip);
2366 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2214 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
2367 if (dp != ip) { 2215
2368 /* 2216 IHOLD(dp);
2369 * Increment vnode ref count only in this case since 2217 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
2370 * there's an extra vnode reference in the case where
2371 * dp == ip.
2372 */
2373 IHOLD(dp);
2374 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
2375 }
2376 2218
2377 /* 2219 /*
2378 * Entry must exist since we did a lookup in xfs_lock_dir_and_entry. 2220 * Entry must exist since we did a lookup in xfs_lock_dir_and_entry.
2379 */ 2221 */
2380 XFS_BMAP_INIT(&free_list, &first_block); 2222 XFS_BMAP_INIT(&free_list, &first_block);
2381 error = xfs_dir_removename(tp, dp, name, ip->i_ino, 2223 error = xfs_dir_removename(tp, dp, name, ip->i_ino,
2382 &first_block, &free_list, 0); 2224 &first_block, &free_list, resblks);
2383 if (error) { 2225 if (error) {
2384 ASSERT(error != ENOENT); 2226 ASSERT(error != ENOENT);
2385 REMOVE_DEBUG_TRACE(__LINE__); 2227 REMOVE_DEBUG_TRACE(__LINE__);
@@ -2402,12 +2244,6 @@ xfs_remove(
2402 link_zero = (ip)->i_d.di_nlink==0; 2244 link_zero = (ip)->i_d.di_nlink==0;
2403 2245
2404 /* 2246 /*
2405 * Take an extra ref on the inode so that it doesn't
2406 * go to xfs_inactive() from within the commit.
2407 */
2408 IHOLD(ip);
2409
2410 /*
2411 * If this is a synchronous mount, make sure that the 2247 * If this is a synchronous mount, make sure that the
2412 * remove transaction goes to disk before returning to 2248 * remove transaction goes to disk before returning to
2413 * the user. 2249 * the user.
@@ -2423,10 +2259,8 @@ xfs_remove(
2423 } 2259 }
2424 2260
2425 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2261 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
2426 if (error) { 2262 if (error)
2427 IRELE(ip);
2428 goto std_return; 2263 goto std_return;
2429 }
2430 2264
2431 /* 2265 /*
2432 * If we are using filestreams, kill the stream association. 2266 * If we are using filestreams, kill the stream association.
@@ -2438,7 +2272,6 @@ xfs_remove(
2438 xfs_filestream_deassociate(ip); 2272 xfs_filestream_deassociate(ip);
2439 2273
2440 xfs_itrace_exit(ip); 2274 xfs_itrace_exit(ip);
2441 IRELE(ip);
2442 2275
2443/* Fall through to std_return with error = 0 */ 2276/* Fall through to std_return with error = 0 */
2444 std_return: 2277 std_return:
@@ -2467,8 +2300,6 @@ xfs_remove(
2467 cancel_flags |= XFS_TRANS_ABORT; 2300 cancel_flags |= XFS_TRANS_ABORT;
2468 xfs_trans_cancel(tp, cancel_flags); 2301 xfs_trans_cancel(tp, cancel_flags);
2469 2302
2470 IRELE(ip);
2471
2472 goto std_return; 2303 goto std_return;
2473} 2304}
2474 2305
@@ -2536,7 +2367,7 @@ xfs_link(
2536 ips[1] = sip; 2367 ips[1] = sip;
2537 } 2368 }
2538 2369
2539 xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL); 2370 xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
2540 2371
2541 /* 2372 /*
2542 * Increment vnode ref counts since xfs_trans_commit & 2373 * Increment vnode ref counts since xfs_trans_commit &
@@ -2840,7 +2671,6 @@ xfs_rmdir(
2840 struct xfs_name *name, 2671 struct xfs_name *name,
2841 xfs_inode_t *cdp) 2672 xfs_inode_t *cdp)
2842{ 2673{
2843 bhv_vnode_t *dir_vp = XFS_ITOV(dp);
2844 xfs_mount_t *mp = dp->i_mount; 2674 xfs_mount_t *mp = dp->i_mount;
2845 xfs_trans_t *tp; 2675 xfs_trans_t *tp;
2846 int error; 2676 int error;
@@ -2866,27 +2696,12 @@ xfs_rmdir(
2866 } 2696 }
2867 2697
2868 /* 2698 /*
2869 * We need to get a reference to cdp before we get our log
2870 * reservation. The reason for this is that we cannot call
2871 * xfs_iget for an inode for which we do not have a reference
2872 * once we've acquired a log reservation. This is because the
2873 * inode we are trying to get might be in xfs_inactive going
2874 * for a log reservation. Since we'll have to wait for the
2875 * inactive code to complete before returning from xfs_iget,
2876 * we need to make sure that we don't have log space reserved
2877 * when we call xfs_iget. Instead we get an unlocked reference
2878 * to the inode before getting our log reservation.
2879 */
2880 IHOLD(cdp);
2881
2882 /*
2883 * Get the dquots for the inodes. 2699 * Get the dquots for the inodes.
2884 */ 2700 */
2885 error = XFS_QM_DQATTACH(mp, dp, 0); 2701 error = XFS_QM_DQATTACH(mp, dp, 0);
2886 if (!error && dp != cdp) 2702 if (!error)
2887 error = XFS_QM_DQATTACH(mp, cdp, 0); 2703 error = XFS_QM_DQATTACH(mp, cdp, 0);
2888 if (error) { 2704 if (error) {
2889 IRELE(cdp);
2890 REMOVE_DEBUG_TRACE(__LINE__); 2705 REMOVE_DEBUG_TRACE(__LINE__);
2891 goto std_return; 2706 goto std_return;
2892 } 2707 }
@@ -2913,7 +2728,6 @@ xfs_rmdir(
2913 if (error) { 2728 if (error) {
2914 ASSERT(error != ENOSPC); 2729 ASSERT(error != ENOSPC);
2915 cancel_flags = 0; 2730 cancel_flags = 0;
2916 IRELE(cdp);
2917 goto error_return; 2731 goto error_return;
2918 } 2732 }
2919 XFS_BMAP_INIT(&free_list, &first_block); 2733 XFS_BMAP_INIT(&free_list, &first_block);
@@ -2927,21 +2741,13 @@ xfs_rmdir(
2927 error = xfs_lock_dir_and_entry(dp, cdp); 2741 error = xfs_lock_dir_and_entry(dp, cdp);
2928 if (error) { 2742 if (error) {
2929 xfs_trans_cancel(tp, cancel_flags); 2743 xfs_trans_cancel(tp, cancel_flags);
2930 IRELE(cdp);
2931 goto std_return; 2744 goto std_return;
2932 } 2745 }
2933 2746
2747 IHOLD(dp);
2934 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2748 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
2935 if (dp != cdp) {
2936 /*
2937 * Only increment the parent directory vnode count if
2938 * we didn't bump it in looking up cdp. The only time
2939 * we don't bump it is when we're looking up ".".
2940 */
2941 VN_HOLD(dir_vp);
2942 }
2943 2749
2944 xfs_itrace_ref(cdp); 2750 IHOLD(cdp);
2945 xfs_trans_ijoin(tp, cdp, XFS_ILOCK_EXCL); 2751 xfs_trans_ijoin(tp, cdp, XFS_ILOCK_EXCL);
2946 2752
2947 ASSERT(cdp->i_d.di_nlink >= 2); 2753 ASSERT(cdp->i_d.di_nlink >= 2);
@@ -2995,12 +2801,6 @@ xfs_rmdir(
2995 last_cdp_link = (cdp)->i_d.di_nlink==0; 2801 last_cdp_link = (cdp)->i_d.di_nlink==0;
2996 2802
2997 /* 2803 /*
2998 * Take an extra ref on the child vnode so that it
2999 * does not go to xfs_inactive() from within the commit.
3000 */
3001 IHOLD(cdp);
3002
3003 /*
3004 * If this is a synchronous mount, make sure that the 2804 * If this is a synchronous mount, make sure that the
3005 * rmdir transaction goes to disk before returning to 2805 * rmdir transaction goes to disk before returning to
3006 * the user. 2806 * the user.
@@ -3014,19 +2814,15 @@ xfs_rmdir(
3014 xfs_bmap_cancel(&free_list); 2814 xfs_bmap_cancel(&free_list);
3015 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | 2815 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
3016 XFS_TRANS_ABORT)); 2816 XFS_TRANS_ABORT));
3017 IRELE(cdp);
3018 goto std_return; 2817 goto std_return;
3019 } 2818 }
3020 2819
3021 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2820 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
3022 if (error) { 2821 if (error) {
3023 IRELE(cdp);
3024 goto std_return; 2822 goto std_return;
3025 } 2823 }
3026 2824
3027 2825
3028 IRELE(cdp);
3029
3030 /* Fall through to std_return with error = 0 or the errno 2826 /* Fall through to std_return with error = 0 or the errno
3031 * from xfs_trans_commit. */ 2827 * from xfs_trans_commit. */
3032 std_return: 2828 std_return:
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 24c53923dc2c..8abe8f186e20 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -15,7 +15,6 @@ struct xfs_iomap;
15 15
16 16
17int xfs_open(struct xfs_inode *ip); 17int xfs_open(struct xfs_inode *ip);
18int xfs_getattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags);
19int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags, 18int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags,
20 struct cred *credp); 19 struct cred *credp);
21int xfs_readlink(struct xfs_inode *ip, char *link); 20int xfs_readlink(struct xfs_inode *ip, char *link);
@@ -48,9 +47,9 @@ int xfs_change_file_space(struct xfs_inode *ip, int cmd,
48 struct cred *credp, int attr_flags); 47 struct cred *credp, int attr_flags);
49int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name, 48int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name,
50 struct xfs_inode *src_ip, struct xfs_inode *target_dp, 49 struct xfs_inode *src_ip, struct xfs_inode *target_dp,
51 struct xfs_name *target_name); 50 struct xfs_name *target_name, struct xfs_inode *target_ip);
52int xfs_attr_get(struct xfs_inode *ip, const char *name, char *value, 51int xfs_attr_get(struct xfs_inode *ip, const char *name, char *value,
53 int *valuelenp, int flags, cred_t *cred); 52 int *valuelenp, int flags);
54int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value, 53int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value,
55 int valuelen, int flags); 54 int valuelen, int flags);
56int xfs_attr_remove(struct xfs_inode *dp, const char *name, int flags); 55int xfs_attr_remove(struct xfs_inode *dp, const char *name, int flags);
@@ -61,9 +60,6 @@ int xfs_ioctl(struct xfs_inode *ip, struct file *filp,
61ssize_t xfs_read(struct xfs_inode *ip, struct kiocb *iocb, 60ssize_t xfs_read(struct xfs_inode *ip, struct kiocb *iocb,
62 const struct iovec *iovp, unsigned int segs, 61 const struct iovec *iovp, unsigned int segs,
63 loff_t *offset, int ioflags); 62 loff_t *offset, int ioflags);
64ssize_t xfs_sendfile(struct xfs_inode *ip, struct file *filp,
65 loff_t *offset, int ioflags, size_t count,
66 read_actor_t actor, void *target);
67ssize_t xfs_splice_read(struct xfs_inode *ip, struct file *infilp, 63ssize_t xfs_splice_read(struct xfs_inode *ip, struct file *infilp,
68 loff_t *ppos, struct pipe_inode_info *pipe, size_t count, 64 loff_t *ppos, struct pipe_inode_info *pipe, size_t count,
69 int flags, int ioflags); 65 int flags, int ioflags);